{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999461468038128, "eval_steps": 500, "global_step": 4642, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.3680746555328369, "learning_rate": 7.142857142857144e-08, "loss": 0.6038, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.5339280366897583, "learning_rate": 1.4285714285714287e-07, "loss": 0.6378, "step": 2 }, { "epoch": 0.0, "grad_norm": 0.68046635389328, "learning_rate": 2.142857142857143e-07, "loss": 0.6492, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.4668383002281189, "learning_rate": 2.8571428571428575e-07, "loss": 0.5478, "step": 4 }, { "epoch": 0.0, "grad_norm": 0.7353125810623169, "learning_rate": 3.5714285714285716e-07, "loss": 0.5802, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.4221579134464264, "learning_rate": 4.285714285714286e-07, "loss": 0.5863, "step": 6 }, { "epoch": 0.0, "grad_norm": 0.35981830954551697, "learning_rate": 5.000000000000001e-07, "loss": 0.5881, "step": 7 }, { "epoch": 0.0, "grad_norm": 0.49171018600463867, "learning_rate": 5.714285714285715e-07, "loss": 0.6196, "step": 8 }, { "epoch": 0.0, "grad_norm": 0.26764529943466187, "learning_rate": 6.428571428571428e-07, "loss": 0.6026, "step": 9 }, { "epoch": 0.0, "grad_norm": 0.31956347823143005, "learning_rate": 7.142857142857143e-07, "loss": 0.6077, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.46464401483535767, "learning_rate": 7.857142857142857e-07, "loss": 0.6144, "step": 11 }, { "epoch": 0.0, "grad_norm": 0.41506412625312805, "learning_rate": 8.571428571428572e-07, "loss": 0.5684, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.448373407125473, "learning_rate": 9.285714285714287e-07, "loss": 0.5785, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.3355347514152527, "learning_rate": 1.0000000000000002e-06, "loss": 0.5688, "step": 14 }, { "epoch": 0.0, "grad_norm": 0.4548545181751251, "learning_rate": 1.0714285714285714e-06, "loss": 0.574, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.4471697211265564, "learning_rate": 1.142857142857143e-06, "loss": 0.5526, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.5379983186721802, "learning_rate": 1.2142857142857144e-06, "loss": 0.5503, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.3886550962924957, "learning_rate": 1.2857142857142856e-06, "loss": 0.6074, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.4560534656047821, "learning_rate": 1.3571428571428572e-06, "loss": 0.6262, "step": 19 }, { "epoch": 0.0, "grad_norm": 0.3122997283935547, "learning_rate": 1.4285714285714286e-06, "loss": 0.6103, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.35939767956733704, "learning_rate": 1.5e-06, "loss": 0.6164, "step": 21 }, { "epoch": 0.0, "grad_norm": 0.3751821517944336, "learning_rate": 1.5714285714285714e-06, "loss": 0.5534, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.5395365953445435, "learning_rate": 1.642857142857143e-06, "loss": 0.5911, "step": 23 }, { "epoch": 0.01, "grad_norm": 0.46072208881378174, "learning_rate": 1.7142857142857145e-06, "loss": 0.5986, "step": 24 }, { "epoch": 0.01, "grad_norm": 0.35585564374923706, "learning_rate": 1.7857142857142859e-06, "loss": 0.5726, "step": 25 }, { "epoch": 0.01, "grad_norm": 0.5622196197509766, "learning_rate": 1.8571428571428573e-06, "loss": 0.6092, "step": 26 }, { "epoch": 0.01, "grad_norm": 0.4780106246471405, "learning_rate": 1.928571428571429e-06, "loss": 0.6318, "step": 27 }, { "epoch": 0.01, "grad_norm": 0.4055005609989166, "learning_rate": 2.0000000000000003e-06, "loss": 0.5731, "step": 28 }, { "epoch": 0.01, "grad_norm": 0.29330089688301086, "learning_rate": 2.0714285714285717e-06, "loss": 0.577, "step": 29 }, { "epoch": 0.01, "grad_norm": 0.4011281132698059, "learning_rate": 2.1428571428571427e-06, "loss": 0.5771, "step": 30 }, { "epoch": 0.01, "grad_norm": 0.5358087420463562, "learning_rate": 2.2142857142857146e-06, "loss": 0.5324, "step": 31 }, { "epoch": 0.01, "grad_norm": 0.39781442284584045, "learning_rate": 2.285714285714286e-06, "loss": 0.5668, "step": 32 }, { "epoch": 0.01, "grad_norm": 0.44512811303138733, "learning_rate": 2.3571428571428574e-06, "loss": 0.6232, "step": 33 }, { "epoch": 0.01, "grad_norm": 0.3510986566543579, "learning_rate": 2.428571428571429e-06, "loss": 0.5816, "step": 34 }, { "epoch": 0.01, "grad_norm": 0.39098355174064636, "learning_rate": 2.5e-06, "loss": 0.5486, "step": 35 }, { "epoch": 0.01, "grad_norm": 0.4460495412349701, "learning_rate": 2.571428571428571e-06, "loss": 0.6256, "step": 36 }, { "epoch": 0.01, "grad_norm": 0.5601059794425964, "learning_rate": 2.642857142857143e-06, "loss": 0.5408, "step": 37 }, { "epoch": 0.01, "grad_norm": 0.543770432472229, "learning_rate": 2.7142857142857144e-06, "loss": 0.5843, "step": 38 }, { "epoch": 0.01, "grad_norm": 0.45234617590904236, "learning_rate": 2.785714285714286e-06, "loss": 0.6491, "step": 39 }, { "epoch": 0.01, "grad_norm": 0.35524260997772217, "learning_rate": 2.8571428571428573e-06, "loss": 0.5765, "step": 40 }, { "epoch": 0.01, "grad_norm": 0.3411543071269989, "learning_rate": 2.928571428571429e-06, "loss": 0.5184, "step": 41 }, { "epoch": 0.01, "grad_norm": 0.2239224910736084, "learning_rate": 3e-06, "loss": 0.5921, "step": 42 }, { "epoch": 0.01, "grad_norm": 0.4779617190361023, "learning_rate": 3.071428571428572e-06, "loss": 0.5981, "step": 43 }, { "epoch": 0.01, "grad_norm": 0.43023017048835754, "learning_rate": 3.142857142857143e-06, "loss": 0.5822, "step": 44 }, { "epoch": 0.01, "grad_norm": 0.5614896416664124, "learning_rate": 3.2142857142857147e-06, "loss": 0.5585, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.35685351490974426, "learning_rate": 3.285714285714286e-06, "loss": 0.5623, "step": 46 }, { "epoch": 0.01, "grad_norm": 0.2451944649219513, "learning_rate": 3.357142857142857e-06, "loss": 0.4998, "step": 47 }, { "epoch": 0.01, "grad_norm": 0.24035461246967316, "learning_rate": 3.428571428571429e-06, "loss": 0.5864, "step": 48 }, { "epoch": 0.01, "grad_norm": 0.29300373792648315, "learning_rate": 3.5e-06, "loss": 0.5861, "step": 49 }, { "epoch": 0.01, "grad_norm": 0.3160554766654968, "learning_rate": 3.5714285714285718e-06, "loss": 0.5769, "step": 50 }, { "epoch": 0.01, "grad_norm": 0.20753253996372223, "learning_rate": 3.642857142857143e-06, "loss": 0.5592, "step": 51 }, { "epoch": 0.01, "grad_norm": 0.29364365339279175, "learning_rate": 3.7142857142857146e-06, "loss": 0.5855, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.25966310501098633, "learning_rate": 3.785714285714286e-06, "loss": 0.5714, "step": 53 }, { "epoch": 0.01, "grad_norm": 0.5970475673675537, "learning_rate": 3.857142857142858e-06, "loss": 0.5955, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.2742187976837158, "learning_rate": 3.928571428571429e-06, "loss": 0.5982, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.21851637959480286, "learning_rate": 4.000000000000001e-06, "loss": 0.5086, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.31623247265815735, "learning_rate": 4.071428571428572e-06, "loss": 0.5507, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.21017701923847198, "learning_rate": 4.1428571428571435e-06, "loss": 0.5681, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.23857641220092773, "learning_rate": 4.2142857142857145e-06, "loss": 0.5873, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.330123633146286, "learning_rate": 4.2857142857142855e-06, "loss": 0.5507, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.2590518295764923, "learning_rate": 4.357142857142857e-06, "loss": 0.5563, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.22396299242973328, "learning_rate": 4.428571428571429e-06, "loss": 0.5208, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.20799732208251953, "learning_rate": 4.5e-06, "loss": 0.5636, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.28667014837265015, "learning_rate": 4.571428571428572e-06, "loss": 0.5126, "step": 64 }, { "epoch": 0.01, "grad_norm": 0.3183256983757019, "learning_rate": 4.642857142857144e-06, "loss": 0.4917, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.34055784344673157, "learning_rate": 4.714285714285715e-06, "loss": 0.5869, "step": 66 }, { "epoch": 0.01, "grad_norm": 0.18996137380599976, "learning_rate": 4.785714285714287e-06, "loss": 0.5898, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.19642199575901031, "learning_rate": 4.857142857142858e-06, "loss": 0.5024, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.2719160318374634, "learning_rate": 4.928571428571429e-06, "loss": 0.5239, "step": 69 }, { "epoch": 0.02, "grad_norm": 0.2251090109348297, "learning_rate": 5e-06, "loss": 0.5327, "step": 70 }, { "epoch": 0.02, "grad_norm": 0.15708671510219574, "learning_rate": 5.071428571428571e-06, "loss": 0.5446, "step": 71 }, { "epoch": 0.02, "grad_norm": 0.2416897416114807, "learning_rate": 5.142857142857142e-06, "loss": 0.5972, "step": 72 }, { "epoch": 0.02, "grad_norm": 0.30218374729156494, "learning_rate": 5.214285714285715e-06, "loss": 0.5723, "step": 73 }, { "epoch": 0.02, "grad_norm": 0.23136214911937714, "learning_rate": 5.285714285714286e-06, "loss": 0.5919, "step": 74 }, { "epoch": 0.02, "grad_norm": 0.29520007967948914, "learning_rate": 5.357142857142857e-06, "loss": 0.5443, "step": 75 }, { "epoch": 0.02, "grad_norm": 0.2675969898700714, "learning_rate": 5.428571428571429e-06, "loss": 0.6078, "step": 76 }, { "epoch": 0.02, "grad_norm": 0.21040533483028412, "learning_rate": 5.500000000000001e-06, "loss": 0.5131, "step": 77 }, { "epoch": 0.02, "grad_norm": 0.21507872641086578, "learning_rate": 5.571428571428572e-06, "loss": 0.5537, "step": 78 }, { "epoch": 0.02, "grad_norm": 0.3713940680027008, "learning_rate": 5.6428571428571435e-06, "loss": 0.5677, "step": 79 }, { "epoch": 0.02, "grad_norm": 0.2338705062866211, "learning_rate": 5.7142857142857145e-06, "loss": 0.5583, "step": 80 }, { "epoch": 0.02, "grad_norm": 0.24568618834018707, "learning_rate": 5.785714285714286e-06, "loss": 0.591, "step": 81 }, { "epoch": 0.02, "grad_norm": 0.2607351541519165, "learning_rate": 5.857142857142858e-06, "loss": 0.5854, "step": 82 }, { "epoch": 0.02, "grad_norm": 0.25233450531959534, "learning_rate": 5.928571428571429e-06, "loss": 0.5435, "step": 83 }, { "epoch": 0.02, "grad_norm": 0.1901504397392273, "learning_rate": 6e-06, "loss": 0.5189, "step": 84 }, { "epoch": 0.02, "grad_norm": 0.20455455780029297, "learning_rate": 6.071428571428571e-06, "loss": 0.5372, "step": 85 }, { "epoch": 0.02, "grad_norm": 0.2110891193151474, "learning_rate": 6.142857142857144e-06, "loss": 0.5386, "step": 86 }, { "epoch": 0.02, "grad_norm": 0.18980112671852112, "learning_rate": 6.214285714285715e-06, "loss": 0.5834, "step": 87 }, { "epoch": 0.02, "grad_norm": 0.24649843573570251, "learning_rate": 6.285714285714286e-06, "loss": 0.5769, "step": 88 }, { "epoch": 0.02, "grad_norm": 0.20015066862106323, "learning_rate": 6.357142857142858e-06, "loss": 0.5213, "step": 89 }, { "epoch": 0.02, "grad_norm": 0.23394432663917542, "learning_rate": 6.4285714285714295e-06, "loss": 0.5351, "step": 90 }, { "epoch": 0.02, "grad_norm": 0.23542846739292145, "learning_rate": 6.5000000000000004e-06, "loss": 0.5527, "step": 91 }, { "epoch": 0.02, "grad_norm": 0.2840578258037567, "learning_rate": 6.571428571428572e-06, "loss": 0.5917, "step": 92 }, { "epoch": 0.02, "grad_norm": 0.5794204473495483, "learning_rate": 6.642857142857143e-06, "loss": 0.5693, "step": 93 }, { "epoch": 0.02, "grad_norm": 0.2579974830150604, "learning_rate": 6.714285714285714e-06, "loss": 0.577, "step": 94 }, { "epoch": 0.02, "grad_norm": 0.23646292090415955, "learning_rate": 6.785714285714287e-06, "loss": 0.5327, "step": 95 }, { "epoch": 0.02, "grad_norm": 0.3467201888561249, "learning_rate": 6.857142857142858e-06, "loss": 0.5163, "step": 96 }, { "epoch": 0.02, "grad_norm": 0.1848195195198059, "learning_rate": 6.928571428571429e-06, "loss": 0.5614, "step": 97 }, { "epoch": 0.02, "grad_norm": 0.20233601331710815, "learning_rate": 7e-06, "loss": 0.5993, "step": 98 }, { "epoch": 0.02, "grad_norm": 0.2592422366142273, "learning_rate": 7.0714285714285726e-06, "loss": 0.5568, "step": 99 }, { "epoch": 0.02, "grad_norm": 0.19915878772735596, "learning_rate": 7.1428571428571436e-06, "loss": 0.5331, "step": 100 }, { "epoch": 0.02, "grad_norm": 0.24805685877799988, "learning_rate": 7.2142857142857145e-06, "loss": 0.5241, "step": 101 }, { "epoch": 0.02, "grad_norm": 0.2214994579553604, "learning_rate": 7.285714285714286e-06, "loss": 0.5219, "step": 102 }, { "epoch": 0.02, "grad_norm": 0.2977463901042938, "learning_rate": 7.357142857142858e-06, "loss": 0.5836, "step": 103 }, { "epoch": 0.02, "grad_norm": 0.23825155198574066, "learning_rate": 7.428571428571429e-06, "loss": 0.5452, "step": 104 }, { "epoch": 0.02, "grad_norm": 0.22910349071025848, "learning_rate": 7.500000000000001e-06, "loss": 0.5525, "step": 105 }, { "epoch": 0.02, "grad_norm": 0.24861909449100494, "learning_rate": 7.571428571428572e-06, "loss": 0.5602, "step": 106 }, { "epoch": 0.02, "grad_norm": 0.220360666513443, "learning_rate": 7.642857142857143e-06, "loss": 0.4968, "step": 107 }, { "epoch": 0.02, "grad_norm": 0.29663270711898804, "learning_rate": 7.714285714285716e-06, "loss": 0.547, "step": 108 }, { "epoch": 0.02, "grad_norm": 0.15902388095855713, "learning_rate": 7.785714285714287e-06, "loss": 0.5593, "step": 109 }, { "epoch": 0.02, "grad_norm": 0.25408726930618286, "learning_rate": 7.857142857142858e-06, "loss": 0.5129, "step": 110 }, { "epoch": 0.02, "grad_norm": 0.25450989603996277, "learning_rate": 7.928571428571429e-06, "loss": 0.5568, "step": 111 }, { "epoch": 0.02, "grad_norm": 0.2113712877035141, "learning_rate": 8.000000000000001e-06, "loss": 0.5311, "step": 112 }, { "epoch": 0.02, "grad_norm": 0.2673487663269043, "learning_rate": 8.071428571428572e-06, "loss": 0.5063, "step": 113 }, { "epoch": 0.02, "grad_norm": 0.17971846461296082, "learning_rate": 8.142857142857143e-06, "loss": 0.4959, "step": 114 }, { "epoch": 0.02, "grad_norm": 0.27486327290534973, "learning_rate": 8.214285714285714e-06, "loss": 0.504, "step": 115 }, { "epoch": 0.02, "grad_norm": 0.3731400966644287, "learning_rate": 8.285714285714287e-06, "loss": 0.5262, "step": 116 }, { "epoch": 0.03, "grad_norm": 0.1998678743839264, "learning_rate": 8.357142857142858e-06, "loss": 0.6066, "step": 117 }, { "epoch": 0.03, "grad_norm": 0.18095743656158447, "learning_rate": 8.428571428571429e-06, "loss": 0.581, "step": 118 }, { "epoch": 0.03, "grad_norm": 0.20576633512973785, "learning_rate": 8.5e-06, "loss": 0.5646, "step": 119 }, { "epoch": 0.03, "grad_norm": 0.21952424943447113, "learning_rate": 8.571428571428571e-06, "loss": 0.5274, "step": 120 }, { "epoch": 0.03, "grad_norm": 0.22617046535015106, "learning_rate": 8.642857142857144e-06, "loss": 0.4918, "step": 121 }, { "epoch": 0.03, "grad_norm": 0.22353151440620422, "learning_rate": 8.714285714285715e-06, "loss": 0.5383, "step": 122 }, { "epoch": 0.03, "grad_norm": 0.24257732927799225, "learning_rate": 8.785714285714286e-06, "loss": 0.4734, "step": 123 }, { "epoch": 0.03, "grad_norm": 0.16320379078388214, "learning_rate": 8.857142857142858e-06, "loss": 0.5033, "step": 124 }, { "epoch": 0.03, "grad_norm": 0.21186141669750214, "learning_rate": 8.92857142857143e-06, "loss": 0.5116, "step": 125 }, { "epoch": 0.03, "grad_norm": 0.1727321892976761, "learning_rate": 9e-06, "loss": 0.4887, "step": 126 }, { "epoch": 0.03, "grad_norm": 0.17333361506462097, "learning_rate": 9.071428571428573e-06, "loss": 0.5629, "step": 127 }, { "epoch": 0.03, "grad_norm": 0.20159348845481873, "learning_rate": 9.142857142857144e-06, "loss": 0.5855, "step": 128 }, { "epoch": 0.03, "grad_norm": 0.25432631373405457, "learning_rate": 9.214285714285715e-06, "loss": 0.5565, "step": 129 }, { "epoch": 0.03, "grad_norm": 0.18436311185359955, "learning_rate": 9.285714285714288e-06, "loss": 0.4746, "step": 130 }, { "epoch": 0.03, "grad_norm": 0.22167499363422394, "learning_rate": 9.357142857142859e-06, "loss": 0.5437, "step": 131 }, { "epoch": 0.03, "grad_norm": 0.29192057251930237, "learning_rate": 9.42857142857143e-06, "loss": 0.5443, "step": 132 }, { "epoch": 0.03, "grad_norm": 0.1628040224313736, "learning_rate": 9.5e-06, "loss": 0.5563, "step": 133 }, { "epoch": 0.03, "grad_norm": 0.32334551215171814, "learning_rate": 9.571428571428573e-06, "loss": 0.575, "step": 134 }, { "epoch": 0.03, "grad_norm": 0.272955983877182, "learning_rate": 9.642857142857144e-06, "loss": 0.5363, "step": 135 }, { "epoch": 0.03, "grad_norm": 0.2314363420009613, "learning_rate": 9.714285714285715e-06, "loss": 0.5356, "step": 136 }, { "epoch": 0.03, "grad_norm": 0.18768808245658875, "learning_rate": 9.785714285714286e-06, "loss": 0.5053, "step": 137 }, { "epoch": 0.03, "grad_norm": 0.22900734841823578, "learning_rate": 9.857142857142859e-06, "loss": 0.5594, "step": 138 }, { "epoch": 0.03, "grad_norm": 0.1723155379295349, "learning_rate": 9.92857142857143e-06, "loss": 0.5104, "step": 139 }, { "epoch": 0.03, "grad_norm": 0.2596263885498047, "learning_rate": 1e-05, "loss": 0.5271, "step": 140 }, { "epoch": 0.03, "grad_norm": 0.15986420214176178, "learning_rate": 9.999998782612734e-06, "loss": 0.5382, "step": 141 }, { "epoch": 0.03, "grad_norm": 0.33412984013557434, "learning_rate": 9.999995130451526e-06, "loss": 0.4807, "step": 142 }, { "epoch": 0.03, "grad_norm": 0.20340685546398163, "learning_rate": 9.999989043518153e-06, "loss": 0.519, "step": 143 }, { "epoch": 0.03, "grad_norm": 0.18798081576824188, "learning_rate": 9.999980521815582e-06, "loss": 0.5347, "step": 144 }, { "epoch": 0.03, "grad_norm": 0.20350557565689087, "learning_rate": 9.99996956534796e-06, "loss": 0.4913, "step": 145 }, { "epoch": 0.03, "grad_norm": 0.2547079026699066, "learning_rate": 9.999956174120626e-06, "loss": 0.5284, "step": 146 }, { "epoch": 0.03, "grad_norm": 0.26818037033081055, "learning_rate": 9.999940348140098e-06, "loss": 0.5597, "step": 147 }, { "epoch": 0.03, "grad_norm": 0.1871444284915924, "learning_rate": 9.999922087414084e-06, "loss": 0.4857, "step": 148 }, { "epoch": 0.03, "grad_norm": 0.24267414212226868, "learning_rate": 9.999901391951474e-06, "loss": 0.5243, "step": 149 }, { "epoch": 0.03, "grad_norm": 0.22753533720970154, "learning_rate": 9.99987826176235e-06, "loss": 0.4868, "step": 150 }, { "epoch": 0.03, "grad_norm": 0.17949774861335754, "learning_rate": 9.99985269685797e-06, "loss": 0.5477, "step": 151 }, { "epoch": 0.03, "grad_norm": 0.24117450416088104, "learning_rate": 9.999824697250786e-06, "loss": 0.5583, "step": 152 }, { "epoch": 0.03, "grad_norm": 0.2758869230747223, "learning_rate": 9.999794262954432e-06, "loss": 0.6049, "step": 153 }, { "epoch": 0.03, "grad_norm": 0.22826828062534332, "learning_rate": 9.999761393983728e-06, "loss": 0.5437, "step": 154 }, { "epoch": 0.03, "grad_norm": 0.24656014144420624, "learning_rate": 9.999726090354683e-06, "loss": 0.5417, "step": 155 }, { "epoch": 0.03, "grad_norm": 0.1714806854724884, "learning_rate": 9.999688352084482e-06, "loss": 0.5189, "step": 156 }, { "epoch": 0.03, "grad_norm": 0.17295528948307037, "learning_rate": 9.999648179191505e-06, "loss": 0.5478, "step": 157 }, { "epoch": 0.03, "grad_norm": 0.19324244558811188, "learning_rate": 9.999605571695317e-06, "loss": 0.5664, "step": 158 }, { "epoch": 0.03, "grad_norm": 0.20624053478240967, "learning_rate": 9.999560529616661e-06, "loss": 0.5087, "step": 159 }, { "epoch": 0.03, "grad_norm": 0.26294004917144775, "learning_rate": 9.999513052977473e-06, "loss": 0.6106, "step": 160 }, { "epoch": 0.03, "grad_norm": 0.19021935760974884, "learning_rate": 9.999463141800873e-06, "loss": 0.4975, "step": 161 }, { "epoch": 0.03, "grad_norm": 0.17529787123203278, "learning_rate": 9.999410796111163e-06, "loss": 0.5204, "step": 162 }, { "epoch": 0.04, "grad_norm": 0.19823302328586578, "learning_rate": 9.999356015933834e-06, "loss": 0.5312, "step": 163 }, { "epoch": 0.04, "grad_norm": 0.2933864891529083, "learning_rate": 9.999298801295564e-06, "loss": 0.5123, "step": 164 }, { "epoch": 0.04, "grad_norm": 0.19900120794773102, "learning_rate": 9.99923915222421e-06, "loss": 0.5824, "step": 165 }, { "epoch": 0.04, "grad_norm": 0.18184617161750793, "learning_rate": 9.99917706874882e-06, "loss": 0.5623, "step": 166 }, { "epoch": 0.04, "grad_norm": 0.1705755591392517, "learning_rate": 9.999112550899627e-06, "loss": 0.5458, "step": 167 }, { "epoch": 0.04, "grad_norm": 0.3029863238334656, "learning_rate": 9.999045598708047e-06, "loss": 0.54, "step": 168 }, { "epoch": 0.04, "grad_norm": 0.1956048458814621, "learning_rate": 9.998976212206683e-06, "loss": 0.5387, "step": 169 }, { "epoch": 0.04, "grad_norm": 0.1850360929965973, "learning_rate": 9.998904391429323e-06, "loss": 0.5085, "step": 170 }, { "epoch": 0.04, "grad_norm": 0.21800455451011658, "learning_rate": 9.99883013641094e-06, "loss": 0.5358, "step": 171 }, { "epoch": 0.04, "grad_norm": 0.2191917896270752, "learning_rate": 9.998753447187693e-06, "loss": 0.5668, "step": 172 }, { "epoch": 0.04, "grad_norm": 0.22353680431842804, "learning_rate": 9.998674323796928e-06, "loss": 0.5358, "step": 173 }, { "epoch": 0.04, "grad_norm": 0.2535366117954254, "learning_rate": 9.998592766277173e-06, "loss": 0.5041, "step": 174 }, { "epoch": 0.04, "grad_norm": 0.20851938426494598, "learning_rate": 9.998508774668142e-06, "loss": 0.4944, "step": 175 }, { "epoch": 0.04, "grad_norm": 0.1755622774362564, "learning_rate": 9.998422349010736e-06, "loss": 0.5156, "step": 176 }, { "epoch": 0.04, "grad_norm": 0.15284371376037598, "learning_rate": 9.998333489347042e-06, "loss": 0.5233, "step": 177 }, { "epoch": 0.04, "grad_norm": 0.2053551822900772, "learning_rate": 9.998242195720327e-06, "loss": 0.5414, "step": 178 }, { "epoch": 0.04, "grad_norm": 0.18832677602767944, "learning_rate": 9.99814846817505e-06, "loss": 0.5724, "step": 179 }, { "epoch": 0.04, "grad_norm": 0.19767887890338898, "learning_rate": 9.998052306756852e-06, "loss": 0.5258, "step": 180 }, { "epoch": 0.04, "grad_norm": 0.17682293057441711, "learning_rate": 9.997953711512556e-06, "loss": 0.5718, "step": 181 }, { "epoch": 0.04, "grad_norm": 0.23476260900497437, "learning_rate": 9.997852682490179e-06, "loss": 0.5566, "step": 182 }, { "epoch": 0.04, "grad_norm": 0.18441180884838104, "learning_rate": 9.997749219738912e-06, "loss": 0.583, "step": 183 }, { "epoch": 0.04, "grad_norm": 0.2411859780550003, "learning_rate": 9.997643323309139e-06, "loss": 0.4423, "step": 184 }, { "epoch": 0.04, "grad_norm": 0.1665976196527481, "learning_rate": 9.997534993252427e-06, "loss": 0.5353, "step": 185 }, { "epoch": 0.04, "grad_norm": 0.1685672253370285, "learning_rate": 9.997424229621529e-06, "loss": 0.5073, "step": 186 }, { "epoch": 0.04, "grad_norm": 0.277639776468277, "learning_rate": 9.99731103247038e-06, "loss": 0.4813, "step": 187 }, { "epoch": 0.04, "grad_norm": 0.17908422648906708, "learning_rate": 9.997195401854102e-06, "loss": 0.5088, "step": 188 }, { "epoch": 0.04, "grad_norm": 0.1873718649148941, "learning_rate": 9.997077337829003e-06, "loss": 0.5072, "step": 189 }, { "epoch": 0.04, "grad_norm": 0.256670743227005, "learning_rate": 9.996956840452573e-06, "loss": 0.4865, "step": 190 }, { "epoch": 0.04, "grad_norm": 0.27443787455558777, "learning_rate": 9.996833909783492e-06, "loss": 0.5466, "step": 191 }, { "epoch": 0.04, "grad_norm": 0.19919687509536743, "learning_rate": 9.996708545881617e-06, "loss": 0.5387, "step": 192 }, { "epoch": 0.04, "grad_norm": 0.16513916850090027, "learning_rate": 9.996580748808e-06, "loss": 0.5223, "step": 193 }, { "epoch": 0.04, "grad_norm": 0.20502988994121552, "learning_rate": 9.996450518624868e-06, "loss": 0.5194, "step": 194 }, { "epoch": 0.04, "grad_norm": 0.18695437908172607, "learning_rate": 9.99631785539564e-06, "loss": 0.4778, "step": 195 }, { "epoch": 0.04, "grad_norm": 0.16061006486415863, "learning_rate": 9.996182759184916e-06, "loss": 0.5192, "step": 196 }, { "epoch": 0.04, "grad_norm": 0.18725766241550446, "learning_rate": 9.99604523005848e-06, "loss": 0.5199, "step": 197 }, { "epoch": 0.04, "grad_norm": 0.1948050707578659, "learning_rate": 9.995905268083306e-06, "loss": 0.5511, "step": 198 }, { "epoch": 0.04, "grad_norm": 0.1752336025238037, "learning_rate": 9.995762873327548e-06, "loss": 0.5705, "step": 199 }, { "epoch": 0.04, "grad_norm": 0.2874692678451538, "learning_rate": 9.995618045860545e-06, "loss": 0.5504, "step": 200 }, { "epoch": 0.04, "grad_norm": 0.18488091230392456, "learning_rate": 9.99547078575282e-06, "loss": 0.5219, "step": 201 }, { "epoch": 0.04, "grad_norm": 0.21118810772895813, "learning_rate": 9.995321093076085e-06, "loss": 0.6084, "step": 202 }, { "epoch": 0.04, "grad_norm": 0.17937391996383667, "learning_rate": 9.99516896790323e-06, "loss": 0.4974, "step": 203 }, { "epoch": 0.04, "grad_norm": 0.24880222976207733, "learning_rate": 9.995014410308336e-06, "loss": 0.5524, "step": 204 }, { "epoch": 0.04, "grad_norm": 0.2270919531583786, "learning_rate": 9.994857420366669e-06, "loss": 0.5298, "step": 205 }, { "epoch": 0.04, "grad_norm": 0.2064422369003296, "learning_rate": 9.994697998154668e-06, "loss": 0.5442, "step": 206 }, { "epoch": 0.04, "grad_norm": 0.18932758271694183, "learning_rate": 9.994536143749969e-06, "loss": 0.4992, "step": 207 }, { "epoch": 0.04, "grad_norm": 0.18627791106700897, "learning_rate": 9.994371857231388e-06, "loss": 0.5652, "step": 208 }, { "epoch": 0.05, "grad_norm": 0.18181046843528748, "learning_rate": 9.994205138678923e-06, "loss": 0.4876, "step": 209 }, { "epoch": 0.05, "grad_norm": 0.19523365795612335, "learning_rate": 9.99403598817376e-06, "loss": 0.5355, "step": 210 }, { "epoch": 0.05, "grad_norm": 0.202137753367424, "learning_rate": 9.993864405798268e-06, "loss": 0.5474, "step": 211 }, { "epoch": 0.05, "grad_norm": 0.1764814555644989, "learning_rate": 9.993690391636e-06, "loss": 0.511, "step": 212 }, { "epoch": 0.05, "grad_norm": 0.36145585775375366, "learning_rate": 9.99351394577169e-06, "loss": 0.5303, "step": 213 }, { "epoch": 0.05, "grad_norm": 0.16018763184547424, "learning_rate": 9.993335068291264e-06, "loss": 0.5363, "step": 214 }, { "epoch": 0.05, "grad_norm": 0.14477033913135529, "learning_rate": 9.993153759281824e-06, "loss": 0.5394, "step": 215 }, { "epoch": 0.05, "grad_norm": 0.20048737525939941, "learning_rate": 9.99297001883166e-06, "loss": 0.5348, "step": 216 }, { "epoch": 0.05, "grad_norm": 0.1712445169687271, "learning_rate": 9.992783847030246e-06, "loss": 0.5438, "step": 217 }, { "epoch": 0.05, "grad_norm": 0.15328474342823029, "learning_rate": 9.992595243968238e-06, "loss": 0.5454, "step": 218 }, { "epoch": 0.05, "grad_norm": 0.22686372697353363, "learning_rate": 9.992404209737476e-06, "loss": 0.5648, "step": 219 }, { "epoch": 0.05, "grad_norm": 0.21831001341342926, "learning_rate": 9.99221074443099e-06, "loss": 0.5224, "step": 220 }, { "epoch": 0.05, "grad_norm": 0.15410234034061432, "learning_rate": 9.992014848142984e-06, "loss": 0.5185, "step": 221 }, { "epoch": 0.05, "grad_norm": 0.20523761212825775, "learning_rate": 9.991816520968853e-06, "loss": 0.5687, "step": 222 }, { "epoch": 0.05, "grad_norm": 0.15560153126716614, "learning_rate": 9.991615763005172e-06, "loss": 0.5229, "step": 223 }, { "epoch": 0.05, "grad_norm": 0.19702470302581787, "learning_rate": 9.991412574349704e-06, "loss": 0.5337, "step": 224 }, { "epoch": 0.05, "grad_norm": 0.24464666843414307, "learning_rate": 9.991206955101388e-06, "loss": 0.5367, "step": 225 }, { "epoch": 0.05, "grad_norm": 0.1894879937171936, "learning_rate": 9.990998905360357e-06, "loss": 0.5228, "step": 226 }, { "epoch": 0.05, "grad_norm": 0.14452479779720306, "learning_rate": 9.990788425227915e-06, "loss": 0.5354, "step": 227 }, { "epoch": 0.05, "grad_norm": 0.23448392748832703, "learning_rate": 9.990575514806563e-06, "loss": 0.545, "step": 228 }, { "epoch": 0.05, "grad_norm": 0.20030318200588226, "learning_rate": 9.990360174199975e-06, "loss": 0.5239, "step": 229 }, { "epoch": 0.05, "grad_norm": 0.1632775366306305, "learning_rate": 9.990142403513012e-06, "loss": 0.5507, "step": 230 }, { "epoch": 0.05, "grad_norm": 0.17613913118839264, "learning_rate": 9.989922202851722e-06, "loss": 0.5077, "step": 231 }, { "epoch": 0.05, "grad_norm": 0.1816764920949936, "learning_rate": 9.989699572323328e-06, "loss": 0.5121, "step": 232 }, { "epoch": 0.05, "grad_norm": 0.18507419526576996, "learning_rate": 9.989474512036245e-06, "loss": 0.5335, "step": 233 }, { "epoch": 0.05, "grad_norm": 0.22486189007759094, "learning_rate": 9.989247022100065e-06, "loss": 0.5223, "step": 234 }, { "epoch": 0.05, "grad_norm": 0.19390611350536346, "learning_rate": 9.989017102625565e-06, "loss": 0.564, "step": 235 }, { "epoch": 0.05, "grad_norm": 0.22769489884376526, "learning_rate": 9.988784753724707e-06, "loss": 0.4891, "step": 236 }, { "epoch": 0.05, "grad_norm": 0.18696601688861847, "learning_rate": 9.988549975510635e-06, "loss": 0.5424, "step": 237 }, { "epoch": 0.05, "grad_norm": 0.1786351501941681, "learning_rate": 9.988312768097673e-06, "loss": 0.5279, "step": 238 }, { "epoch": 0.05, "grad_norm": 0.19431112706661224, "learning_rate": 9.988073131601332e-06, "loss": 0.5463, "step": 239 }, { "epoch": 0.05, "grad_norm": 0.171942800283432, "learning_rate": 9.987831066138302e-06, "loss": 0.5208, "step": 240 }, { "epoch": 0.05, "grad_norm": 0.15704870223999023, "learning_rate": 9.987586571826461e-06, "loss": 0.5413, "step": 241 }, { "epoch": 0.05, "grad_norm": 0.16401955485343933, "learning_rate": 9.987339648784866e-06, "loss": 0.562, "step": 242 }, { "epoch": 0.05, "grad_norm": 0.2467910647392273, "learning_rate": 9.987090297133756e-06, "loss": 0.559, "step": 243 }, { "epoch": 0.05, "grad_norm": 0.1753203123807907, "learning_rate": 9.986838516994555e-06, "loss": 0.6251, "step": 244 }, { "epoch": 0.05, "grad_norm": 0.21456435322761536, "learning_rate": 9.986584308489867e-06, "loss": 0.5495, "step": 245 }, { "epoch": 0.05, "grad_norm": 0.2137192189693451, "learning_rate": 9.986327671743484e-06, "loss": 0.5475, "step": 246 }, { "epoch": 0.05, "grad_norm": 0.16317638754844666, "learning_rate": 9.98606860688037e-06, "loss": 0.565, "step": 247 }, { "epoch": 0.05, "grad_norm": 0.15220917761325836, "learning_rate": 9.985807114026684e-06, "loss": 0.5185, "step": 248 }, { "epoch": 0.05, "grad_norm": 0.2033926397562027, "learning_rate": 9.98554319330976e-06, "loss": 0.5255, "step": 249 }, { "epoch": 0.05, "grad_norm": 0.17678335309028625, "learning_rate": 9.985276844858114e-06, "loss": 0.5371, "step": 250 }, { "epoch": 0.05, "grad_norm": 0.1734929084777832, "learning_rate": 9.985008068801446e-06, "loss": 0.5148, "step": 251 }, { "epoch": 0.05, "grad_norm": 0.19290420413017273, "learning_rate": 9.984736865270637e-06, "loss": 0.5077, "step": 252 }, { "epoch": 0.05, "grad_norm": 0.19296742975711823, "learning_rate": 9.984463234397752e-06, "loss": 0.5376, "step": 253 }, { "epoch": 0.05, "grad_norm": 0.21208150684833527, "learning_rate": 9.984187176316038e-06, "loss": 0.5431, "step": 254 }, { "epoch": 0.05, "grad_norm": 0.16331081092357635, "learning_rate": 9.983908691159921e-06, "loss": 0.5494, "step": 255 }, { "epoch": 0.06, "grad_norm": 0.2127610296010971, "learning_rate": 9.983627779065012e-06, "loss": 0.5196, "step": 256 }, { "epoch": 0.06, "grad_norm": 0.27896058559417725, "learning_rate": 9.983344440168101e-06, "loss": 0.5004, "step": 257 }, { "epoch": 0.06, "grad_norm": 0.2308092564344406, "learning_rate": 9.983058674607164e-06, "loss": 0.4996, "step": 258 }, { "epoch": 0.06, "grad_norm": 0.1815384477376938, "learning_rate": 9.982770482521353e-06, "loss": 0.5484, "step": 259 }, { "epoch": 0.06, "grad_norm": 0.19610373675823212, "learning_rate": 9.982479864051005e-06, "loss": 0.5465, "step": 260 }, { "epoch": 0.06, "grad_norm": 0.21339653432369232, "learning_rate": 9.982186819337639e-06, "loss": 0.5318, "step": 261 }, { "epoch": 0.06, "grad_norm": 0.16528427600860596, "learning_rate": 9.981891348523955e-06, "loss": 0.5164, "step": 262 }, { "epoch": 0.06, "grad_norm": 0.19075849652290344, "learning_rate": 9.981593451753833e-06, "loss": 0.482, "step": 263 }, { "epoch": 0.06, "grad_norm": 0.18467120826244354, "learning_rate": 9.981293129172334e-06, "loss": 0.4893, "step": 264 }, { "epoch": 0.06, "grad_norm": 0.2131132185459137, "learning_rate": 9.980990380925705e-06, "loss": 0.5839, "step": 265 }, { "epoch": 0.06, "grad_norm": 0.214164599776268, "learning_rate": 9.980685207161368e-06, "loss": 0.5351, "step": 266 }, { "epoch": 0.06, "grad_norm": 0.20279422402381897, "learning_rate": 9.98037760802793e-06, "loss": 0.5339, "step": 267 }, { "epoch": 0.06, "grad_norm": 0.16691498458385468, "learning_rate": 9.980067583675177e-06, "loss": 0.5257, "step": 268 }, { "epoch": 0.06, "grad_norm": 0.19010309875011444, "learning_rate": 9.97975513425408e-06, "loss": 0.4422, "step": 269 }, { "epoch": 0.06, "grad_norm": 0.17077746987342834, "learning_rate": 9.979440259916782e-06, "loss": 0.5756, "step": 270 }, { "epoch": 0.06, "grad_norm": 0.15563777089118958, "learning_rate": 9.979122960816617e-06, "loss": 0.5803, "step": 271 }, { "epoch": 0.06, "grad_norm": 0.1896345168352127, "learning_rate": 9.978803237108095e-06, "loss": 0.5307, "step": 272 }, { "epoch": 0.06, "grad_norm": 0.20084036886692047, "learning_rate": 9.978481088946905e-06, "loss": 0.4988, "step": 273 }, { "epoch": 0.06, "grad_norm": 0.18005971610546112, "learning_rate": 9.97815651648992e-06, "loss": 0.494, "step": 274 }, { "epoch": 0.06, "grad_norm": 0.14255790412425995, "learning_rate": 9.977829519895193e-06, "loss": 0.5534, "step": 275 }, { "epoch": 0.06, "grad_norm": 0.1580318808555603, "learning_rate": 9.977500099321956e-06, "loss": 0.5083, "step": 276 }, { "epoch": 0.06, "grad_norm": 0.20587489008903503, "learning_rate": 9.977168254930621e-06, "loss": 0.5438, "step": 277 }, { "epoch": 0.06, "grad_norm": 0.18426474928855896, "learning_rate": 9.97683398688278e-06, "loss": 0.5399, "step": 278 }, { "epoch": 0.06, "grad_norm": 0.17722034454345703, "learning_rate": 9.976497295341212e-06, "loss": 0.4957, "step": 279 }, { "epoch": 0.06, "grad_norm": 0.216731995344162, "learning_rate": 9.976158180469866e-06, "loss": 0.5127, "step": 280 }, { "epoch": 0.06, "grad_norm": 0.20815429091453552, "learning_rate": 9.975816642433876e-06, "loss": 0.5859, "step": 281 }, { "epoch": 0.06, "grad_norm": 0.15470731258392334, "learning_rate": 9.975472681399556e-06, "loss": 0.5417, "step": 282 }, { "epoch": 0.06, "grad_norm": 0.17505955696105957, "learning_rate": 9.975126297534399e-06, "loss": 0.5197, "step": 283 }, { "epoch": 0.06, "grad_norm": 0.15607048571109772, "learning_rate": 9.97477749100708e-06, "loss": 0.5202, "step": 284 }, { "epoch": 0.06, "grad_norm": 0.25984108448028564, "learning_rate": 9.97442626198745e-06, "loss": 0.5113, "step": 285 }, { "epoch": 0.06, "grad_norm": 0.17469698190689087, "learning_rate": 9.974072610646543e-06, "loss": 0.5274, "step": 286 }, { "epoch": 0.06, "grad_norm": 0.1947067826986313, "learning_rate": 9.973716537156573e-06, "loss": 0.5743, "step": 287 }, { "epoch": 0.06, "grad_norm": 0.16918258368968964, "learning_rate": 9.973358041690926e-06, "loss": 0.5623, "step": 288 }, { "epoch": 0.06, "grad_norm": 0.1726803183555603, "learning_rate": 9.972997124424179e-06, "loss": 0.5577, "step": 289 }, { "epoch": 0.06, "grad_norm": 0.22636979818344116, "learning_rate": 9.972633785532082e-06, "loss": 0.4822, "step": 290 }, { "epoch": 0.06, "grad_norm": 0.1924733966588974, "learning_rate": 9.972268025191561e-06, "loss": 0.5294, "step": 291 }, { "epoch": 0.06, "grad_norm": 0.16325077414512634, "learning_rate": 9.971899843580728e-06, "loss": 0.5588, "step": 292 }, { "epoch": 0.06, "grad_norm": 0.17010986804962158, "learning_rate": 9.971529240878869e-06, "loss": 0.5254, "step": 293 }, { "epoch": 0.06, "grad_norm": 0.22150567173957825, "learning_rate": 9.971156217266451e-06, "loss": 0.545, "step": 294 }, { "epoch": 0.06, "grad_norm": 0.24462495744228363, "learning_rate": 9.97078077292512e-06, "loss": 0.5738, "step": 295 }, { "epoch": 0.06, "grad_norm": 0.21568679809570312, "learning_rate": 9.970402908037703e-06, "loss": 0.5129, "step": 296 }, { "epoch": 0.06, "grad_norm": 0.22609004378318787, "learning_rate": 9.970022622788198e-06, "loss": 0.535, "step": 297 }, { "epoch": 0.06, "grad_norm": 0.20871202647686005, "learning_rate": 9.96963991736179e-06, "loss": 0.5405, "step": 298 }, { "epoch": 0.06, "grad_norm": 0.20957247912883759, "learning_rate": 9.969254791944839e-06, "loss": 0.4701, "step": 299 }, { "epoch": 0.06, "grad_norm": 0.26258644461631775, "learning_rate": 9.968867246724882e-06, "loss": 0.5575, "step": 300 }, { "epoch": 0.06, "grad_norm": 0.17494796216487885, "learning_rate": 9.96847728189064e-06, "loss": 0.5414, "step": 301 }, { "epoch": 0.07, "grad_norm": 0.21521888673305511, "learning_rate": 9.968084897632004e-06, "loss": 0.5152, "step": 302 }, { "epoch": 0.07, "grad_norm": 0.16436263918876648, "learning_rate": 9.967690094140052e-06, "loss": 0.5144, "step": 303 }, { "epoch": 0.07, "grad_norm": 0.17806245386600494, "learning_rate": 9.96729287160703e-06, "loss": 0.59, "step": 304 }, { "epoch": 0.07, "grad_norm": 0.13292109966278076, "learning_rate": 9.966893230226371e-06, "loss": 0.5804, "step": 305 }, { "epoch": 0.07, "grad_norm": 0.18737316131591797, "learning_rate": 9.966491170192682e-06, "loss": 0.5104, "step": 306 }, { "epoch": 0.07, "grad_norm": 0.25303030014038086, "learning_rate": 9.966086691701748e-06, "loss": 0.5501, "step": 307 }, { "epoch": 0.07, "grad_norm": 0.17302893102169037, "learning_rate": 9.96567979495053e-06, "loss": 0.5236, "step": 308 }, { "epoch": 0.07, "grad_norm": 0.16693797707557678, "learning_rate": 9.96527048013717e-06, "loss": 0.5157, "step": 309 }, { "epoch": 0.07, "grad_norm": 0.22584576904773712, "learning_rate": 9.964858747460989e-06, "loss": 0.5828, "step": 310 }, { "epoch": 0.07, "grad_norm": 0.21684272587299347, "learning_rate": 9.964444597122476e-06, "loss": 0.5082, "step": 311 }, { "epoch": 0.07, "grad_norm": 0.16628780961036682, "learning_rate": 9.964028029323305e-06, "loss": 0.5581, "step": 312 }, { "epoch": 0.07, "grad_norm": 0.15919576585292816, "learning_rate": 9.963609044266328e-06, "loss": 0.5713, "step": 313 }, { "epoch": 0.07, "grad_norm": 0.17761071026325226, "learning_rate": 9.963187642155573e-06, "loss": 0.5417, "step": 314 }, { "epoch": 0.07, "grad_norm": 0.24367289245128632, "learning_rate": 9.962763823196242e-06, "loss": 0.5147, "step": 315 }, { "epoch": 0.07, "grad_norm": 0.15818822383880615, "learning_rate": 9.962337587594713e-06, "loss": 0.4555, "step": 316 }, { "epoch": 0.07, "grad_norm": 0.1815144419670105, "learning_rate": 9.961908935558548e-06, "loss": 0.5394, "step": 317 }, { "epoch": 0.07, "grad_norm": 0.18169505894184113, "learning_rate": 9.961477867296479e-06, "loss": 0.5654, "step": 318 }, { "epoch": 0.07, "grad_norm": 0.15763549506664276, "learning_rate": 9.961044383018416e-06, "loss": 0.5565, "step": 319 }, { "epoch": 0.07, "grad_norm": 0.1651836484670639, "learning_rate": 9.96060848293545e-06, "loss": 0.5522, "step": 320 }, { "epoch": 0.07, "grad_norm": 0.1605907380580902, "learning_rate": 9.96017016725984e-06, "loss": 0.521, "step": 321 }, { "epoch": 0.07, "grad_norm": 0.1753988415002823, "learning_rate": 9.959729436205027e-06, "loss": 0.5217, "step": 322 }, { "epoch": 0.07, "grad_norm": 0.15843777358531952, "learning_rate": 9.95928628998563e-06, "loss": 0.5384, "step": 323 }, { "epoch": 0.07, "grad_norm": 0.15907292068004608, "learning_rate": 9.95884072881744e-06, "loss": 0.4972, "step": 324 }, { "epoch": 0.07, "grad_norm": 0.14925910532474518, "learning_rate": 9.958392752917425e-06, "loss": 0.5313, "step": 325 }, { "epoch": 0.07, "grad_norm": 0.1515308916568756, "learning_rate": 9.957942362503728e-06, "loss": 0.5329, "step": 326 }, { "epoch": 0.07, "grad_norm": 0.1642216593027115, "learning_rate": 9.957489557795667e-06, "loss": 0.516, "step": 327 }, { "epoch": 0.07, "grad_norm": 0.1622897833585739, "learning_rate": 9.957034339013742e-06, "loss": 0.5641, "step": 328 }, { "epoch": 0.07, "grad_norm": 0.19205595552921295, "learning_rate": 9.956576706379623e-06, "loss": 0.5109, "step": 329 }, { "epoch": 0.07, "grad_norm": 0.16615568101406097, "learning_rate": 9.956116660116155e-06, "loss": 0.5208, "step": 330 }, { "epoch": 0.07, "grad_norm": 0.24837036430835724, "learning_rate": 9.95565420044736e-06, "loss": 0.5572, "step": 331 }, { "epoch": 0.07, "grad_norm": 0.15492476522922516, "learning_rate": 9.955189327598435e-06, "loss": 0.5439, "step": 332 }, { "epoch": 0.07, "grad_norm": 0.21482093632221222, "learning_rate": 9.954722041795753e-06, "loss": 0.5498, "step": 333 }, { "epoch": 0.07, "grad_norm": 0.2388935685157776, "learning_rate": 9.954252343266859e-06, "loss": 0.4783, "step": 334 }, { "epoch": 0.07, "grad_norm": 0.16139458119869232, "learning_rate": 9.953780232240477e-06, "loss": 0.5553, "step": 335 }, { "epoch": 0.07, "grad_norm": 0.20065993070602417, "learning_rate": 9.953305708946504e-06, "loss": 0.5273, "step": 336 }, { "epoch": 0.07, "grad_norm": 0.19159255921840668, "learning_rate": 9.95282877361601e-06, "loss": 0.5237, "step": 337 }, { "epoch": 0.07, "grad_norm": 0.21565450727939606, "learning_rate": 9.952349426481243e-06, "loss": 0.5408, "step": 338 }, { "epoch": 0.07, "grad_norm": 0.16351784765720367, "learning_rate": 9.95186766777562e-06, "loss": 0.516, "step": 339 }, { "epoch": 0.07, "grad_norm": 0.18081237375736237, "learning_rate": 9.95138349773374e-06, "loss": 0.5319, "step": 340 }, { "epoch": 0.07, "grad_norm": 0.17618371546268463, "learning_rate": 9.950896916591368e-06, "loss": 0.4894, "step": 341 }, { "epoch": 0.07, "grad_norm": 0.21193227171897888, "learning_rate": 9.95040792458545e-06, "loss": 0.5017, "step": 342 }, { "epoch": 0.07, "grad_norm": 0.15902818739414215, "learning_rate": 9.949916521954104e-06, "loss": 0.5468, "step": 343 }, { "epoch": 0.07, "grad_norm": 0.15740692615509033, "learning_rate": 9.949422708936616e-06, "loss": 0.5108, "step": 344 }, { "epoch": 0.07, "grad_norm": 0.2678494453430176, "learning_rate": 9.948926485773455e-06, "loss": 0.5588, "step": 345 }, { "epoch": 0.07, "grad_norm": 0.11566779017448425, "learning_rate": 9.948427852706257e-06, "loss": 0.5603, "step": 346 }, { "epoch": 0.07, "grad_norm": 0.19342099130153656, "learning_rate": 9.947926809977835e-06, "loss": 0.5577, "step": 347 }, { "epoch": 0.07, "grad_norm": 0.20827247202396393, "learning_rate": 9.947423357832176e-06, "loss": 0.5401, "step": 348 }, { "epoch": 0.08, "grad_norm": 0.21266911923885345, "learning_rate": 9.946917496514435e-06, "loss": 0.5555, "step": 349 }, { "epoch": 0.08, "grad_norm": 0.2901332378387451, "learning_rate": 9.946409226270945e-06, "loss": 0.5615, "step": 350 }, { "epoch": 0.08, "grad_norm": 0.18509002029895782, "learning_rate": 9.94589854734921e-06, "loss": 0.5187, "step": 351 }, { "epoch": 0.08, "grad_norm": 0.1265445202589035, "learning_rate": 9.945385459997909e-06, "loss": 0.5356, "step": 352 }, { "epoch": 0.08, "grad_norm": 0.1595221310853958, "learning_rate": 9.944869964466892e-06, "loss": 0.5861, "step": 353 }, { "epoch": 0.08, "grad_norm": 0.24293914437294006, "learning_rate": 9.944352061007182e-06, "loss": 0.5336, "step": 354 }, { "epoch": 0.08, "grad_norm": 0.17409418523311615, "learning_rate": 9.943831749870973e-06, "loss": 0.5194, "step": 355 }, { "epoch": 0.08, "grad_norm": 0.2043304294347763, "learning_rate": 9.943309031311637e-06, "loss": 0.5477, "step": 356 }, { "epoch": 0.08, "grad_norm": 0.16727691888809204, "learning_rate": 9.942783905583711e-06, "loss": 0.5276, "step": 357 }, { "epoch": 0.08, "grad_norm": 0.19069473445415497, "learning_rate": 9.942256372942909e-06, "loss": 0.5096, "step": 358 }, { "epoch": 0.08, "grad_norm": 0.1717585176229477, "learning_rate": 9.941726433646115e-06, "loss": 0.5186, "step": 359 }, { "epoch": 0.08, "grad_norm": 0.15714509785175323, "learning_rate": 9.941194087951384e-06, "loss": 0.5358, "step": 360 }, { "epoch": 0.08, "grad_norm": 0.1681104153394699, "learning_rate": 9.940659336117948e-06, "loss": 0.5832, "step": 361 }, { "epoch": 0.08, "grad_norm": 0.21985803544521332, "learning_rate": 9.940122178406205e-06, "loss": 0.5477, "step": 362 }, { "epoch": 0.08, "grad_norm": 0.19286422431468964, "learning_rate": 9.939582615077724e-06, "loss": 0.5428, "step": 363 }, { "epoch": 0.08, "grad_norm": 0.198882594704628, "learning_rate": 9.939040646395252e-06, "loss": 0.5572, "step": 364 }, { "epoch": 0.08, "grad_norm": 0.18456579744815826, "learning_rate": 9.938496272622703e-06, "loss": 0.5168, "step": 365 }, { "epoch": 0.08, "grad_norm": 0.15817482769489288, "learning_rate": 9.93794949402516e-06, "loss": 0.4895, "step": 366 }, { "epoch": 0.08, "grad_norm": 0.17915575206279755, "learning_rate": 9.937400310868883e-06, "loss": 0.6069, "step": 367 }, { "epoch": 0.08, "grad_norm": 0.24024631083011627, "learning_rate": 9.936848723421295e-06, "loss": 0.5585, "step": 368 }, { "epoch": 0.08, "grad_norm": 0.20291557908058167, "learning_rate": 9.936294731950999e-06, "loss": 0.5197, "step": 369 }, { "epoch": 0.08, "grad_norm": 0.17010553181171417, "learning_rate": 9.93573833672776e-06, "loss": 0.4911, "step": 370 }, { "epoch": 0.08, "grad_norm": 0.21095992624759674, "learning_rate": 9.935179538022518e-06, "loss": 0.5152, "step": 371 }, { "epoch": 0.08, "grad_norm": 0.14240865409374237, "learning_rate": 9.934618336107385e-06, "loss": 0.5663, "step": 372 }, { "epoch": 0.08, "grad_norm": 0.13972750306129456, "learning_rate": 9.934054731255638e-06, "loss": 0.5214, "step": 373 }, { "epoch": 0.08, "grad_norm": 0.2143140733242035, "learning_rate": 9.933488723741731e-06, "loss": 0.5213, "step": 374 }, { "epoch": 0.08, "grad_norm": 0.16753612458705902, "learning_rate": 9.932920313841281e-06, "loss": 0.5654, "step": 375 }, { "epoch": 0.08, "grad_norm": 0.17069406807422638, "learning_rate": 9.932349501831077e-06, "loss": 0.5813, "step": 376 }, { "epoch": 0.08, "grad_norm": 0.16417956352233887, "learning_rate": 9.931776287989084e-06, "loss": 0.5091, "step": 377 }, { "epoch": 0.08, "grad_norm": 0.14209146797657013, "learning_rate": 9.931200672594425e-06, "loss": 0.5498, "step": 378 }, { "epoch": 0.08, "grad_norm": 0.15816187858581543, "learning_rate": 9.930622655927403e-06, "loss": 0.5175, "step": 379 }, { "epoch": 0.08, "grad_norm": 0.1323387622833252, "learning_rate": 9.930042238269485e-06, "loss": 0.5217, "step": 380 }, { "epoch": 0.08, "grad_norm": 0.14928382635116577, "learning_rate": 9.929459419903307e-06, "loss": 0.5655, "step": 381 }, { "epoch": 0.08, "grad_norm": 0.1628965139389038, "learning_rate": 9.928874201112677e-06, "loss": 0.5221, "step": 382 }, { "epoch": 0.08, "grad_norm": 0.16259951889514923, "learning_rate": 9.92828658218257e-06, "loss": 0.496, "step": 383 }, { "epoch": 0.08, "grad_norm": 0.15290167927742004, "learning_rate": 9.927696563399127e-06, "loss": 0.5241, "step": 384 }, { "epoch": 0.08, "grad_norm": 0.1801231950521469, "learning_rate": 9.927104145049664e-06, "loss": 0.5671, "step": 385 }, { "epoch": 0.08, "grad_norm": 0.1866559088230133, "learning_rate": 9.926509327422661e-06, "loss": 0.5476, "step": 386 }, { "epoch": 0.08, "grad_norm": 0.1865171194076538, "learning_rate": 9.925912110807766e-06, "loss": 0.5352, "step": 387 }, { "epoch": 0.08, "grad_norm": 0.1893538534641266, "learning_rate": 9.9253124954958e-06, "loss": 0.5537, "step": 388 }, { "epoch": 0.08, "grad_norm": 0.18070872128009796, "learning_rate": 9.924710481778746e-06, "loss": 0.5292, "step": 389 }, { "epoch": 0.08, "grad_norm": 0.17466960847377777, "learning_rate": 9.924106069949756e-06, "loss": 0.5212, "step": 390 }, { "epoch": 0.08, "grad_norm": 0.18615244328975677, "learning_rate": 9.923499260303155e-06, "loss": 0.5116, "step": 391 }, { "epoch": 0.08, "grad_norm": 0.17259790003299713, "learning_rate": 9.922890053134428e-06, "loss": 0.5159, "step": 392 }, { "epoch": 0.08, "grad_norm": 0.16246852278709412, "learning_rate": 9.922278448740235e-06, "loss": 0.5268, "step": 393 }, { "epoch": 0.08, "grad_norm": 0.15586566925048828, "learning_rate": 9.9216644474184e-06, "loss": 0.4965, "step": 394 }, { "epoch": 0.09, "grad_norm": 0.1761687695980072, "learning_rate": 9.92104804946791e-06, "loss": 0.5504, "step": 395 }, { "epoch": 0.09, "grad_norm": 0.19697882235050201, "learning_rate": 9.920429255188926e-06, "loss": 0.5055, "step": 396 }, { "epoch": 0.09, "grad_norm": 0.1910158395767212, "learning_rate": 9.919808064882773e-06, "loss": 0.4947, "step": 397 }, { "epoch": 0.09, "grad_norm": 0.18492764234542847, "learning_rate": 9.91918447885194e-06, "loss": 0.569, "step": 398 }, { "epoch": 0.09, "grad_norm": 0.17928937077522278, "learning_rate": 9.918558497400088e-06, "loss": 0.4933, "step": 399 }, { "epoch": 0.09, "grad_norm": 0.18491177260875702, "learning_rate": 9.91793012083204e-06, "loss": 0.5489, "step": 400 }, { "epoch": 0.09, "grad_norm": 0.1796533763408661, "learning_rate": 9.917299349453791e-06, "loss": 0.5575, "step": 401 }, { "epoch": 0.09, "grad_norm": 0.14460118114948273, "learning_rate": 9.916666183572492e-06, "loss": 0.4632, "step": 402 }, { "epoch": 0.09, "grad_norm": 0.13730689883232117, "learning_rate": 9.916030623496472e-06, "loss": 0.5634, "step": 403 }, { "epoch": 0.09, "grad_norm": 0.18971490859985352, "learning_rate": 9.915392669535214e-06, "loss": 0.5193, "step": 404 }, { "epoch": 0.09, "grad_norm": 0.12481328845024109, "learning_rate": 9.914752321999379e-06, "loss": 0.5389, "step": 405 }, { "epoch": 0.09, "grad_norm": 0.17612747848033905, "learning_rate": 9.914109581200785e-06, "loss": 0.5129, "step": 406 }, { "epoch": 0.09, "grad_norm": 0.1852181851863861, "learning_rate": 9.913464447452414e-06, "loss": 0.5124, "step": 407 }, { "epoch": 0.09, "grad_norm": 0.23606260120868683, "learning_rate": 9.912816921068424e-06, "loss": 0.4736, "step": 408 }, { "epoch": 0.09, "grad_norm": 0.23079735040664673, "learning_rate": 9.912167002364126e-06, "loss": 0.5612, "step": 409 }, { "epoch": 0.09, "grad_norm": 0.22326047718524933, "learning_rate": 9.911514691656003e-06, "loss": 0.5367, "step": 410 }, { "epoch": 0.09, "grad_norm": 0.1975882351398468, "learning_rate": 9.910859989261702e-06, "loss": 0.5575, "step": 411 }, { "epoch": 0.09, "grad_norm": 0.16411826014518738, "learning_rate": 9.910202895500031e-06, "loss": 0.5506, "step": 412 }, { "epoch": 0.09, "grad_norm": 0.1982284039258957, "learning_rate": 9.909543410690967e-06, "loss": 0.5443, "step": 413 }, { "epoch": 0.09, "grad_norm": 0.1679336577653885, "learning_rate": 9.908881535155647e-06, "loss": 0.4876, "step": 414 }, { "epoch": 0.09, "grad_norm": 0.17549291253089905, "learning_rate": 9.908217269216377e-06, "loss": 0.558, "step": 415 }, { "epoch": 0.09, "grad_norm": 0.13716278970241547, "learning_rate": 9.907550613196624e-06, "loss": 0.5527, "step": 416 }, { "epoch": 0.09, "grad_norm": 0.171469584107399, "learning_rate": 9.90688156742102e-06, "loss": 0.5066, "step": 417 }, { "epoch": 0.09, "grad_norm": 0.19487224519252777, "learning_rate": 9.906210132215357e-06, "loss": 0.5211, "step": 418 }, { "epoch": 0.09, "grad_norm": 0.16895724833011627, "learning_rate": 9.905536307906599e-06, "loss": 0.4936, "step": 419 }, { "epoch": 0.09, "grad_norm": 0.2055499255657196, "learning_rate": 9.904860094822861e-06, "loss": 0.4719, "step": 420 }, { "epoch": 0.09, "grad_norm": 0.30334606766700745, "learning_rate": 9.904181493293434e-06, "loss": 0.5743, "step": 421 }, { "epoch": 0.09, "grad_norm": 0.15841247141361237, "learning_rate": 9.903500503648766e-06, "loss": 0.5722, "step": 422 }, { "epoch": 0.09, "grad_norm": 0.15331457555294037, "learning_rate": 9.902817126220465e-06, "loss": 0.4636, "step": 423 }, { "epoch": 0.09, "grad_norm": 0.22973452508449554, "learning_rate": 9.902131361341307e-06, "loss": 0.5427, "step": 424 }, { "epoch": 0.09, "grad_norm": 0.21478115022182465, "learning_rate": 9.901443209345229e-06, "loss": 0.5324, "step": 425 }, { "epoch": 0.09, "grad_norm": 0.2344510704278946, "learning_rate": 9.900752670567331e-06, "loss": 0.5439, "step": 426 }, { "epoch": 0.09, "grad_norm": 0.17472712695598602, "learning_rate": 9.90005974534387e-06, "loss": 0.4745, "step": 427 }, { "epoch": 0.09, "grad_norm": 0.1476239264011383, "learning_rate": 9.899364434012273e-06, "loss": 0.4726, "step": 428 }, { "epoch": 0.09, "grad_norm": 0.174478217959404, "learning_rate": 9.898666736911125e-06, "loss": 0.5485, "step": 429 }, { "epoch": 0.09, "grad_norm": 0.20660632848739624, "learning_rate": 9.897966654380172e-06, "loss": 0.5274, "step": 430 }, { "epoch": 0.09, "grad_norm": 0.1528811752796173, "learning_rate": 9.89726418676032e-06, "loss": 0.5305, "step": 431 }, { "epoch": 0.09, "grad_norm": 0.23785395920276642, "learning_rate": 9.896559334393644e-06, "loss": 0.5553, "step": 432 }, { "epoch": 0.09, "grad_norm": 0.19750644266605377, "learning_rate": 9.895852097623374e-06, "loss": 0.5441, "step": 433 }, { "epoch": 0.09, "grad_norm": 0.16664327681064606, "learning_rate": 9.895142476793902e-06, "loss": 0.4756, "step": 434 }, { "epoch": 0.09, "grad_norm": 0.18724434077739716, "learning_rate": 9.89443047225078e-06, "loss": 0.5046, "step": 435 }, { "epoch": 0.09, "grad_norm": 0.20234829187393188, "learning_rate": 9.893716084340723e-06, "loss": 0.5276, "step": 436 }, { "epoch": 0.09, "grad_norm": 0.17969612777233124, "learning_rate": 9.892999313411607e-06, "loss": 0.5428, "step": 437 }, { "epoch": 0.09, "grad_norm": 0.19304272532463074, "learning_rate": 9.892280159812465e-06, "loss": 0.5281, "step": 438 }, { "epoch": 0.09, "grad_norm": 0.15909235179424286, "learning_rate": 9.891558623893492e-06, "loss": 0.5393, "step": 439 }, { "epoch": 0.09, "grad_norm": 0.24154618382453918, "learning_rate": 9.890834706006048e-06, "loss": 0.5446, "step": 440 }, { "epoch": 0.09, "grad_norm": 0.1484946757555008, "learning_rate": 9.890108406502642e-06, "loss": 0.5034, "step": 441 }, { "epoch": 0.1, "grad_norm": 0.20041412115097046, "learning_rate": 9.889379725736953e-06, "loss": 0.5569, "step": 442 }, { "epoch": 0.1, "grad_norm": 0.1696542501449585, "learning_rate": 9.888648664063815e-06, "loss": 0.5521, "step": 443 }, { "epoch": 0.1, "grad_norm": 0.2253563106060028, "learning_rate": 9.887915221839223e-06, "loss": 0.5881, "step": 444 }, { "epoch": 0.1, "grad_norm": 0.16398414969444275, "learning_rate": 9.88717939942033e-06, "loss": 0.5276, "step": 445 }, { "epoch": 0.1, "grad_norm": 0.19543707370758057, "learning_rate": 9.886441197165446e-06, "loss": 0.5172, "step": 446 }, { "epoch": 0.1, "grad_norm": 0.19510690867900848, "learning_rate": 9.885700615434044e-06, "loss": 0.5489, "step": 447 }, { "epoch": 0.1, "grad_norm": 0.20647871494293213, "learning_rate": 9.884957654586753e-06, "loss": 0.5691, "step": 448 }, { "epoch": 0.1, "grad_norm": 0.1428651362657547, "learning_rate": 9.884212314985363e-06, "loss": 0.5415, "step": 449 }, { "epoch": 0.1, "grad_norm": 0.20169362425804138, "learning_rate": 9.88346459699282e-06, "loss": 0.5035, "step": 450 }, { "epoch": 0.1, "grad_norm": 0.1399114578962326, "learning_rate": 9.88271450097323e-06, "loss": 0.4997, "step": 451 }, { "epoch": 0.1, "grad_norm": 0.13809053599834442, "learning_rate": 9.881962027291855e-06, "loss": 0.5106, "step": 452 }, { "epoch": 0.1, "grad_norm": 0.15126360952854156, "learning_rate": 9.881207176315112e-06, "loss": 0.4804, "step": 453 }, { "epoch": 0.1, "grad_norm": 0.17541149258613586, "learning_rate": 9.880449948410587e-06, "loss": 0.5529, "step": 454 }, { "epoch": 0.1, "grad_norm": 0.21182189881801605, "learning_rate": 9.879690343947009e-06, "loss": 0.5671, "step": 455 }, { "epoch": 0.1, "grad_norm": 0.16285210847854614, "learning_rate": 9.878928363294275e-06, "loss": 0.5288, "step": 456 }, { "epoch": 0.1, "grad_norm": 0.17910024523735046, "learning_rate": 9.878164006823434e-06, "loss": 0.4876, "step": 457 }, { "epoch": 0.1, "grad_norm": 0.18861602246761322, "learning_rate": 9.877397274906694e-06, "loss": 0.5403, "step": 458 }, { "epoch": 0.1, "grad_norm": 0.18446660041809082, "learning_rate": 9.876628167917417e-06, "loss": 0.5558, "step": 459 }, { "epoch": 0.1, "grad_norm": 0.23668085038661957, "learning_rate": 9.875856686230125e-06, "loss": 0.5781, "step": 460 }, { "epoch": 0.1, "grad_norm": 0.1459677368402481, "learning_rate": 9.875082830220496e-06, "loss": 0.5102, "step": 461 }, { "epoch": 0.1, "grad_norm": 0.19470389187335968, "learning_rate": 9.87430660026536e-06, "loss": 0.4579, "step": 462 }, { "epoch": 0.1, "grad_norm": 0.13941837847232819, "learning_rate": 9.873527996742707e-06, "loss": 0.5971, "step": 463 }, { "epoch": 0.1, "grad_norm": 0.1971631497144699, "learning_rate": 9.872747020031682e-06, "loss": 0.5637, "step": 464 }, { "epoch": 0.1, "grad_norm": 0.1430051177740097, "learning_rate": 9.871963670512586e-06, "loss": 0.4621, "step": 465 }, { "epoch": 0.1, "grad_norm": 0.1990920901298523, "learning_rate": 9.871177948566875e-06, "loss": 0.508, "step": 466 }, { "epoch": 0.1, "grad_norm": 0.1744355857372284, "learning_rate": 9.870389854577157e-06, "loss": 0.5115, "step": 467 }, { "epoch": 0.1, "grad_norm": 0.1770336627960205, "learning_rate": 9.869599388927204e-06, "loss": 0.5535, "step": 468 }, { "epoch": 0.1, "grad_norm": 0.1405770629644394, "learning_rate": 9.868806552001933e-06, "loss": 0.5188, "step": 469 }, { "epoch": 0.1, "grad_norm": 0.164622500538826, "learning_rate": 9.868011344187421e-06, "loss": 0.543, "step": 470 }, { "epoch": 0.1, "grad_norm": 0.34272515773773193, "learning_rate": 9.867213765870897e-06, "loss": 0.444, "step": 471 }, { "epoch": 0.1, "grad_norm": 0.29466864466667175, "learning_rate": 9.866413817440748e-06, "loss": 0.5177, "step": 472 }, { "epoch": 0.1, "grad_norm": 0.1591256856918335, "learning_rate": 9.865611499286511e-06, "loss": 0.543, "step": 473 }, { "epoch": 0.1, "grad_norm": 0.17290642857551575, "learning_rate": 9.864806811798881e-06, "loss": 0.5571, "step": 474 }, { "epoch": 0.1, "grad_norm": 0.14120014011859894, "learning_rate": 9.863999755369703e-06, "loss": 0.5366, "step": 475 }, { "epoch": 0.1, "grad_norm": 0.1678173840045929, "learning_rate": 9.863190330391974e-06, "loss": 0.5301, "step": 476 }, { "epoch": 0.1, "grad_norm": 0.17042382061481476, "learning_rate": 9.862378537259853e-06, "loss": 0.5669, "step": 477 }, { "epoch": 0.1, "grad_norm": 0.1473226100206375, "learning_rate": 9.861564376368645e-06, "loss": 0.5113, "step": 478 }, { "epoch": 0.1, "grad_norm": 0.1683841496706009, "learning_rate": 9.860747848114805e-06, "loss": 0.542, "step": 479 }, { "epoch": 0.1, "grad_norm": 0.17106866836547852, "learning_rate": 9.859928952895952e-06, "loss": 0.5023, "step": 480 }, { "epoch": 0.1, "grad_norm": 0.16280145943164825, "learning_rate": 9.859107691110847e-06, "loss": 0.5605, "step": 481 }, { "epoch": 0.1, "grad_norm": 0.14175820350646973, "learning_rate": 9.858284063159411e-06, "loss": 0.5716, "step": 482 }, { "epoch": 0.1, "grad_norm": 0.21412678062915802, "learning_rate": 9.857458069442709e-06, "loss": 0.515, "step": 483 }, { "epoch": 0.1, "grad_norm": 0.19349491596221924, "learning_rate": 9.856629710362966e-06, "loss": 0.5198, "step": 484 }, { "epoch": 0.1, "grad_norm": 0.1516617089509964, "learning_rate": 9.855798986323556e-06, "loss": 0.4953, "step": 485 }, { "epoch": 0.1, "grad_norm": 0.2074221968650818, "learning_rate": 9.854965897729001e-06, "loss": 0.5118, "step": 486 }, { "epoch": 0.1, "grad_norm": 0.14066927134990692, "learning_rate": 9.85413044498498e-06, "loss": 0.5228, "step": 487 }, { "epoch": 0.11, "grad_norm": 0.2228998988866806, "learning_rate": 9.853292628498319e-06, "loss": 0.6139, "step": 488 }, { "epoch": 0.11, "grad_norm": 0.31960368156433105, "learning_rate": 9.852452448676999e-06, "loss": 0.5553, "step": 489 }, { "epoch": 0.11, "grad_norm": 0.17156168818473816, "learning_rate": 9.851609905930149e-06, "loss": 0.5373, "step": 490 }, { "epoch": 0.11, "grad_norm": 0.12861701846122742, "learning_rate": 9.850765000668048e-06, "loss": 0.5126, "step": 491 }, { "epoch": 0.11, "grad_norm": 0.17264096438884735, "learning_rate": 9.849917733302128e-06, "loss": 0.5141, "step": 492 }, { "epoch": 0.11, "grad_norm": 0.16210493445396423, "learning_rate": 9.84906810424497e-06, "loss": 0.524, "step": 493 }, { "epoch": 0.11, "grad_norm": 0.12152129411697388, "learning_rate": 9.848216113910306e-06, "loss": 0.5405, "step": 494 }, { "epoch": 0.11, "grad_norm": 0.17325671017169952, "learning_rate": 9.847361762713013e-06, "loss": 0.5062, "step": 495 }, { "epoch": 0.11, "grad_norm": 0.14273308217525482, "learning_rate": 9.846505051069126e-06, "loss": 0.5302, "step": 496 }, { "epoch": 0.11, "grad_norm": 0.2055240273475647, "learning_rate": 9.845645979395824e-06, "loss": 0.5018, "step": 497 }, { "epoch": 0.11, "grad_norm": 0.1372431516647339, "learning_rate": 9.844784548111433e-06, "loss": 0.5665, "step": 498 }, { "epoch": 0.11, "grad_norm": 0.1912691444158554, "learning_rate": 9.843920757635435e-06, "loss": 0.5267, "step": 499 }, { "epoch": 0.11, "grad_norm": 0.14471903443336487, "learning_rate": 9.843054608388455e-06, "loss": 0.5087, "step": 500 }, { "epoch": 0.11, "grad_norm": 0.17829883098602295, "learning_rate": 9.84218610079227e-06, "loss": 0.5029, "step": 501 }, { "epoch": 0.11, "grad_norm": 0.16071033477783203, "learning_rate": 9.8413152352698e-06, "loss": 0.5259, "step": 502 }, { "epoch": 0.11, "grad_norm": 0.21240952610969543, "learning_rate": 9.840442012245125e-06, "loss": 0.5266, "step": 503 }, { "epoch": 0.11, "grad_norm": 0.1682009994983673, "learning_rate": 9.839566432143459e-06, "loss": 0.5132, "step": 504 }, { "epoch": 0.11, "grad_norm": 0.14732059836387634, "learning_rate": 9.838688495391171e-06, "loss": 0.5745, "step": 505 }, { "epoch": 0.11, "grad_norm": 0.15087178349494934, "learning_rate": 9.837808202415778e-06, "loss": 0.5017, "step": 506 }, { "epoch": 0.11, "grad_norm": 0.16476622223854065, "learning_rate": 9.836925553645941e-06, "loss": 0.5044, "step": 507 }, { "epoch": 0.11, "grad_norm": 0.23170307278633118, "learning_rate": 9.836040549511472e-06, "loss": 0.574, "step": 508 }, { "epoch": 0.11, "grad_norm": 0.18723872303962708, "learning_rate": 9.835153190443327e-06, "loss": 0.4981, "step": 509 }, { "epoch": 0.11, "grad_norm": 0.18692149221897125, "learning_rate": 9.83426347687361e-06, "loss": 0.554, "step": 510 }, { "epoch": 0.11, "grad_norm": 0.16876354813575745, "learning_rate": 9.833371409235575e-06, "loss": 0.5535, "step": 511 }, { "epoch": 0.11, "grad_norm": 0.1443847119808197, "learning_rate": 9.832476987963613e-06, "loss": 0.4957, "step": 512 }, { "epoch": 0.11, "grad_norm": 0.17338885366916656, "learning_rate": 9.83158021349327e-06, "loss": 0.5019, "step": 513 }, { "epoch": 0.11, "grad_norm": 0.19001881778240204, "learning_rate": 9.830681086261234e-06, "loss": 0.5165, "step": 514 }, { "epoch": 0.11, "grad_norm": 0.24521715939044952, "learning_rate": 9.829779606705337e-06, "loss": 0.579, "step": 515 }, { "epoch": 0.11, "grad_norm": 0.16400645673274994, "learning_rate": 9.828875775264564e-06, "loss": 0.5429, "step": 516 }, { "epoch": 0.11, "grad_norm": 0.2782368063926697, "learning_rate": 9.827969592379036e-06, "loss": 0.4832, "step": 517 }, { "epoch": 0.11, "grad_norm": 0.15196365118026733, "learning_rate": 9.827061058490027e-06, "loss": 0.4643, "step": 518 }, { "epoch": 0.11, "grad_norm": 0.17149809002876282, "learning_rate": 9.826150174039949e-06, "loss": 0.5388, "step": 519 }, { "epoch": 0.11, "grad_norm": 0.152251735329628, "learning_rate": 9.82523693947236e-06, "loss": 0.5147, "step": 520 }, { "epoch": 0.11, "grad_norm": 0.1551138162612915, "learning_rate": 9.824321355231968e-06, "loss": 0.4826, "step": 521 }, { "epoch": 0.11, "grad_norm": 0.15354926884174347, "learning_rate": 9.82340342176462e-06, "loss": 0.482, "step": 522 }, { "epoch": 0.11, "grad_norm": 0.15569601953029633, "learning_rate": 9.822483139517307e-06, "loss": 0.4989, "step": 523 }, { "epoch": 0.11, "grad_norm": 0.17023304104804993, "learning_rate": 9.821560508938167e-06, "loss": 0.4974, "step": 524 }, { "epoch": 0.11, "grad_norm": 0.17514115571975708, "learning_rate": 9.820635530476478e-06, "loss": 0.4923, "step": 525 }, { "epoch": 0.11, "grad_norm": 0.2187749445438385, "learning_rate": 9.819708204582664e-06, "loss": 0.5623, "step": 526 }, { "epoch": 0.11, "grad_norm": 0.1382010579109192, "learning_rate": 9.818778531708288e-06, "loss": 0.4999, "step": 527 }, { "epoch": 0.11, "grad_norm": 0.15643168985843658, "learning_rate": 9.817846512306062e-06, "loss": 0.4885, "step": 528 }, { "epoch": 0.11, "grad_norm": 0.16030187904834747, "learning_rate": 9.816912146829836e-06, "loss": 0.5217, "step": 529 }, { "epoch": 0.11, "grad_norm": 0.2057688981294632, "learning_rate": 9.815975435734604e-06, "loss": 0.5254, "step": 530 }, { "epoch": 0.11, "grad_norm": 0.20325696468353271, "learning_rate": 9.815036379476502e-06, "loss": 0.5831, "step": 531 }, { "epoch": 0.11, "grad_norm": 0.19320160150527954, "learning_rate": 9.814094978512808e-06, "loss": 0.4558, "step": 532 }, { "epoch": 0.11, "grad_norm": 0.20372559130191803, "learning_rate": 9.813151233301943e-06, "loss": 0.5431, "step": 533 }, { "epoch": 0.12, "grad_norm": 0.18915057182312012, "learning_rate": 9.812205144303466e-06, "loss": 0.5243, "step": 534 }, { "epoch": 0.12, "grad_norm": 0.25529226660728455, "learning_rate": 9.811256711978082e-06, "loss": 0.5403, "step": 535 }, { "epoch": 0.12, "grad_norm": 0.2046184092760086, "learning_rate": 9.810305936787633e-06, "loss": 0.5275, "step": 536 }, { "epoch": 0.12, "grad_norm": 0.15673565864562988, "learning_rate": 9.809352819195106e-06, "loss": 0.57, "step": 537 }, { "epoch": 0.12, "grad_norm": 0.13071295619010925, "learning_rate": 9.808397359664624e-06, "loss": 0.5232, "step": 538 }, { "epoch": 0.12, "grad_norm": 0.17526838183403015, "learning_rate": 9.807439558661453e-06, "loss": 0.498, "step": 539 }, { "epoch": 0.12, "grad_norm": 0.1860094964504242, "learning_rate": 9.806479416652e-06, "loss": 0.5327, "step": 540 }, { "epoch": 0.12, "grad_norm": 0.18813055753707886, "learning_rate": 9.80551693410381e-06, "loss": 0.5199, "step": 541 }, { "epoch": 0.12, "grad_norm": 0.1620221734046936, "learning_rate": 9.804552111485568e-06, "loss": 0.4961, "step": 542 }, { "epoch": 0.12, "grad_norm": 0.2016637921333313, "learning_rate": 9.8035849492671e-06, "loss": 0.5042, "step": 543 }, { "epoch": 0.12, "grad_norm": 0.15297263860702515, "learning_rate": 9.80261544791937e-06, "loss": 0.5561, "step": 544 }, { "epoch": 0.12, "grad_norm": 0.16937948763370514, "learning_rate": 9.801643607914485e-06, "loss": 0.5356, "step": 545 }, { "epoch": 0.12, "grad_norm": 0.1809961050748825, "learning_rate": 9.80066942972568e-06, "loss": 0.5516, "step": 546 }, { "epoch": 0.12, "grad_norm": 0.14697958528995514, "learning_rate": 9.799692913827342e-06, "loss": 0.5072, "step": 547 }, { "epoch": 0.12, "grad_norm": 0.18541914224624634, "learning_rate": 9.798714060694988e-06, "loss": 0.4925, "step": 548 }, { "epoch": 0.12, "grad_norm": 0.19727596640586853, "learning_rate": 9.797732870805273e-06, "loss": 0.5206, "step": 549 }, { "epoch": 0.12, "grad_norm": 0.15478399395942688, "learning_rate": 9.796749344635996e-06, "loss": 0.5122, "step": 550 }, { "epoch": 0.12, "grad_norm": 0.1174599900841713, "learning_rate": 9.79576348266609e-06, "loss": 0.5003, "step": 551 }, { "epoch": 0.12, "grad_norm": 0.16585126519203186, "learning_rate": 9.794775285375623e-06, "loss": 0.5029, "step": 552 }, { "epoch": 0.12, "grad_norm": 0.17350535094738007, "learning_rate": 9.793784753245802e-06, "loss": 0.548, "step": 553 }, { "epoch": 0.12, "grad_norm": 0.18735840916633606, "learning_rate": 9.792791886758976e-06, "loss": 0.5455, "step": 554 }, { "epoch": 0.12, "grad_norm": 0.22835886478424072, "learning_rate": 9.79179668639862e-06, "loss": 0.4881, "step": 555 }, { "epoch": 0.12, "grad_norm": 0.16034086048603058, "learning_rate": 9.790799152649356e-06, "loss": 0.5222, "step": 556 }, { "epoch": 0.12, "grad_norm": 0.20814630389213562, "learning_rate": 9.789799285996937e-06, "loss": 0.5489, "step": 557 }, { "epoch": 0.12, "grad_norm": 0.17920532822608948, "learning_rate": 9.788797086928252e-06, "loss": 0.493, "step": 558 }, { "epoch": 0.12, "grad_norm": 0.21627596020698547, "learning_rate": 9.787792555931328e-06, "loss": 0.5491, "step": 559 }, { "epoch": 0.12, "grad_norm": 0.14485371112823486, "learning_rate": 9.786785693495327e-06, "loss": 0.5144, "step": 560 }, { "epoch": 0.12, "grad_norm": 0.1546938121318817, "learning_rate": 9.785776500110542e-06, "loss": 0.4812, "step": 561 }, { "epoch": 0.12, "grad_norm": 0.1761971116065979, "learning_rate": 9.784764976268408e-06, "loss": 0.5788, "step": 562 }, { "epoch": 0.12, "grad_norm": 0.18302011489868164, "learning_rate": 9.78375112246149e-06, "loss": 0.5186, "step": 563 }, { "epoch": 0.12, "grad_norm": 0.17190533876419067, "learning_rate": 9.78273493918349e-06, "loss": 0.5252, "step": 564 }, { "epoch": 0.12, "grad_norm": 0.1821742206811905, "learning_rate": 9.781716426929243e-06, "loss": 0.5174, "step": 565 }, { "epoch": 0.12, "grad_norm": 0.19061587750911713, "learning_rate": 9.780695586194719e-06, "loss": 0.5662, "step": 566 }, { "epoch": 0.12, "grad_norm": 0.15308646857738495, "learning_rate": 9.77967241747702e-06, "loss": 0.5297, "step": 567 }, { "epoch": 0.12, "grad_norm": 0.16192299127578735, "learning_rate": 9.778646921274385e-06, "loss": 0.5846, "step": 568 }, { "epoch": 0.12, "grad_norm": 0.1472279578447342, "learning_rate": 9.777619098086181e-06, "loss": 0.5596, "step": 569 }, { "epoch": 0.12, "grad_norm": 0.20292969048023224, "learning_rate": 9.776588948412917e-06, "loss": 0.5179, "step": 570 }, { "epoch": 0.12, "grad_norm": 0.16102533042430878, "learning_rate": 9.775556472756226e-06, "loss": 0.4919, "step": 571 }, { "epoch": 0.12, "grad_norm": 0.18485024571418762, "learning_rate": 9.774521671618877e-06, "loss": 0.5455, "step": 572 }, { "epoch": 0.12, "grad_norm": 0.1821717470884323, "learning_rate": 9.773484545504771e-06, "loss": 0.5091, "step": 573 }, { "epoch": 0.12, "grad_norm": 0.16444329917430878, "learning_rate": 9.772445094918944e-06, "loss": 0.5218, "step": 574 }, { "epoch": 0.12, "grad_norm": 0.17274467647075653, "learning_rate": 9.771403320367558e-06, "loss": 0.5823, "step": 575 }, { "epoch": 0.12, "grad_norm": 0.15213851630687714, "learning_rate": 9.770359222357914e-06, "loss": 0.4696, "step": 576 }, { "epoch": 0.12, "grad_norm": 0.13404731452465057, "learning_rate": 9.76931280139844e-06, "loss": 0.5365, "step": 577 }, { "epoch": 0.12, "grad_norm": 0.1744057685136795, "learning_rate": 9.768264057998693e-06, "loss": 0.5559, "step": 578 }, { "epoch": 0.12, "grad_norm": 0.17314410209655762, "learning_rate": 9.767212992669368e-06, "loss": 0.5614, "step": 579 }, { "epoch": 0.12, "grad_norm": 0.1846940666437149, "learning_rate": 9.766159605922282e-06, "loss": 0.5122, "step": 580 }, { "epoch": 0.13, "grad_norm": 0.15393884479999542, "learning_rate": 9.76510389827039e-06, "loss": 0.5734, "step": 581 }, { "epoch": 0.13, "grad_norm": 0.1504923403263092, "learning_rate": 9.764045870227772e-06, "loss": 0.5111, "step": 582 }, { "epoch": 0.13, "grad_norm": 0.16151262819766998, "learning_rate": 9.762985522309642e-06, "loss": 0.4965, "step": 583 }, { "epoch": 0.13, "grad_norm": 0.15211625397205353, "learning_rate": 9.761922855032339e-06, "loss": 0.5263, "step": 584 }, { "epoch": 0.13, "grad_norm": 0.17688104510307312, "learning_rate": 9.760857868913335e-06, "loss": 0.4846, "step": 585 }, { "epoch": 0.13, "grad_norm": 0.1481778621673584, "learning_rate": 9.759790564471233e-06, "loss": 0.5189, "step": 586 }, { "epoch": 0.13, "grad_norm": 0.1728227287530899, "learning_rate": 9.758720942225759e-06, "loss": 0.4878, "step": 587 }, { "epoch": 0.13, "grad_norm": 0.15571308135986328, "learning_rate": 9.757649002697771e-06, "loss": 0.5456, "step": 588 }, { "epoch": 0.13, "grad_norm": 0.15774881839752197, "learning_rate": 9.756574746409258e-06, "loss": 0.522, "step": 589 }, { "epoch": 0.13, "grad_norm": 0.18842703104019165, "learning_rate": 9.755498173883331e-06, "loss": 0.442, "step": 590 }, { "epoch": 0.13, "grad_norm": 0.1557362824678421, "learning_rate": 9.754419285644233e-06, "loss": 0.5149, "step": 591 }, { "epoch": 0.13, "grad_norm": 0.15488624572753906, "learning_rate": 9.753338082217334e-06, "loss": 0.5567, "step": 592 }, { "epoch": 0.13, "grad_norm": 0.14816376566886902, "learning_rate": 9.752254564129134e-06, "loss": 0.5244, "step": 593 }, { "epoch": 0.13, "grad_norm": 0.144754558801651, "learning_rate": 9.751168731907253e-06, "loss": 0.4777, "step": 594 }, { "epoch": 0.13, "grad_norm": 0.2727169096469879, "learning_rate": 9.750080586080445e-06, "loss": 0.5165, "step": 595 }, { "epoch": 0.13, "grad_norm": 0.21706987917423248, "learning_rate": 9.748990127178589e-06, "loss": 0.5346, "step": 596 }, { "epoch": 0.13, "grad_norm": 0.20381216704845428, "learning_rate": 9.747897355732684e-06, "loss": 0.5546, "step": 597 }, { "epoch": 0.13, "grad_norm": 0.1514424830675125, "learning_rate": 9.746802272274868e-06, "loss": 0.5593, "step": 598 }, { "epoch": 0.13, "grad_norm": 0.16018468141555786, "learning_rate": 9.745704877338393e-06, "loss": 0.5303, "step": 599 }, { "epoch": 0.13, "grad_norm": 0.1491565853357315, "learning_rate": 9.74460517145764e-06, "loss": 0.5265, "step": 600 }, { "epoch": 0.13, "grad_norm": 0.1546594202518463, "learning_rate": 9.743503155168119e-06, "loss": 0.5193, "step": 601 }, { "epoch": 0.13, "grad_norm": 0.18948593735694885, "learning_rate": 9.74239882900646e-06, "loss": 0.5794, "step": 602 }, { "epoch": 0.13, "grad_norm": 0.16691826283931732, "learning_rate": 9.74129219351042e-06, "loss": 0.5327, "step": 603 }, { "epoch": 0.13, "grad_norm": 0.15864987671375275, "learning_rate": 9.740183249218883e-06, "loss": 0.5189, "step": 604 }, { "epoch": 0.13, "grad_norm": 0.17005395889282227, "learning_rate": 9.739071996671851e-06, "loss": 0.5345, "step": 605 }, { "epoch": 0.13, "grad_norm": 0.16413751244544983, "learning_rate": 9.737958436410459e-06, "loss": 0.5135, "step": 606 }, { "epoch": 0.13, "grad_norm": 0.16234463453292847, "learning_rate": 9.736842568976957e-06, "loss": 0.523, "step": 607 }, { "epoch": 0.13, "grad_norm": 0.24228431284427643, "learning_rate": 9.73572439491472e-06, "loss": 0.5346, "step": 608 }, { "epoch": 0.13, "grad_norm": 0.16661712527275085, "learning_rate": 9.734603914768254e-06, "loss": 0.5846, "step": 609 }, { "epoch": 0.13, "grad_norm": 0.1640872210264206, "learning_rate": 9.73348112908318e-06, "loss": 0.5245, "step": 610 }, { "epoch": 0.13, "grad_norm": 0.15542039275169373, "learning_rate": 9.732356038406242e-06, "loss": 0.5418, "step": 611 }, { "epoch": 0.13, "grad_norm": 0.16618283092975616, "learning_rate": 9.73122864328531e-06, "loss": 0.5491, "step": 612 }, { "epoch": 0.13, "grad_norm": 0.15610603988170624, "learning_rate": 9.730098944269377e-06, "loss": 0.5672, "step": 613 }, { "epoch": 0.13, "grad_norm": 0.16698190569877625, "learning_rate": 9.72896694190855e-06, "loss": 0.5467, "step": 614 }, { "epoch": 0.13, "grad_norm": 0.16580110788345337, "learning_rate": 9.727832636754066e-06, "loss": 0.5943, "step": 615 }, { "epoch": 0.13, "grad_norm": 0.17117217183113098, "learning_rate": 9.726696029358283e-06, "loss": 0.5022, "step": 616 }, { "epoch": 0.13, "grad_norm": 0.18671591579914093, "learning_rate": 9.725557120274673e-06, "loss": 0.544, "step": 617 }, { "epoch": 0.13, "grad_norm": 0.1708557903766632, "learning_rate": 9.724415910057839e-06, "loss": 0.5172, "step": 618 }, { "epoch": 0.13, "grad_norm": 0.17382651567459106, "learning_rate": 9.723272399263492e-06, "loss": 0.5278, "step": 619 }, { "epoch": 0.13, "grad_norm": 0.20800824463367462, "learning_rate": 9.722126588448473e-06, "loss": 0.5484, "step": 620 }, { "epoch": 0.13, "grad_norm": 0.1626998484134674, "learning_rate": 9.720978478170745e-06, "loss": 0.5248, "step": 621 }, { "epoch": 0.13, "grad_norm": 0.20097678899765015, "learning_rate": 9.719828068989378e-06, "loss": 0.4871, "step": 622 }, { "epoch": 0.13, "grad_norm": 0.17374666035175323, "learning_rate": 9.718675361464574e-06, "loss": 0.5118, "step": 623 }, { "epoch": 0.13, "grad_norm": 0.16872042417526245, "learning_rate": 9.717520356157648e-06, "loss": 0.5554, "step": 624 }, { "epoch": 0.13, "grad_norm": 0.1880810260772705, "learning_rate": 9.716363053631039e-06, "loss": 0.4936, "step": 625 }, { "epoch": 0.13, "grad_norm": 0.20248694717884064, "learning_rate": 9.715203454448297e-06, "loss": 0.5005, "step": 626 }, { "epoch": 0.14, "grad_norm": 0.20392434298992157, "learning_rate": 9.714041559174095e-06, "loss": 0.5389, "step": 627 }, { "epoch": 0.14, "grad_norm": 0.16137023270130157, "learning_rate": 9.712877368374226e-06, "loss": 0.5599, "step": 628 }, { "epoch": 0.14, "grad_norm": 0.17454127967357635, "learning_rate": 9.711710882615595e-06, "loss": 0.5127, "step": 629 }, { "epoch": 0.14, "grad_norm": 0.13746435940265656, "learning_rate": 9.710542102466229e-06, "loss": 0.5617, "step": 630 }, { "epoch": 0.14, "grad_norm": 0.16572695970535278, "learning_rate": 9.709371028495276e-06, "loss": 0.5421, "step": 631 }, { "epoch": 0.14, "grad_norm": 0.21156027913093567, "learning_rate": 9.708197661272989e-06, "loss": 0.5373, "step": 632 }, { "epoch": 0.14, "grad_norm": 0.1574900895357132, "learning_rate": 9.707022001370749e-06, "loss": 0.526, "step": 633 }, { "epoch": 0.14, "grad_norm": 0.14249767363071442, "learning_rate": 9.70584404936105e-06, "loss": 0.5104, "step": 634 }, { "epoch": 0.14, "grad_norm": 0.18953874707221985, "learning_rate": 9.704663805817499e-06, "loss": 0.54, "step": 635 }, { "epoch": 0.14, "grad_norm": 0.17047786712646484, "learning_rate": 9.703481271314823e-06, "loss": 0.5185, "step": 636 }, { "epoch": 0.14, "grad_norm": 0.19651903212070465, "learning_rate": 9.702296446428863e-06, "loss": 0.5147, "step": 637 }, { "epoch": 0.14, "grad_norm": 0.13926255702972412, "learning_rate": 9.701109331736573e-06, "loss": 0.5381, "step": 638 }, { "epoch": 0.14, "grad_norm": 0.16642118990421295, "learning_rate": 9.699919927816027e-06, "loss": 0.5114, "step": 639 }, { "epoch": 0.14, "grad_norm": 0.19338329136371613, "learning_rate": 9.69872823524641e-06, "loss": 0.5317, "step": 640 }, { "epoch": 0.14, "grad_norm": 0.26251357793807983, "learning_rate": 9.697534254608024e-06, "loss": 0.5122, "step": 641 }, { "epoch": 0.14, "grad_norm": 0.1683933585882187, "learning_rate": 9.69633798648228e-06, "loss": 0.5429, "step": 642 }, { "epoch": 0.14, "grad_norm": 0.20809942483901978, "learning_rate": 9.695139431451712e-06, "loss": 0.537, "step": 643 }, { "epoch": 0.14, "grad_norm": 0.1489880532026291, "learning_rate": 9.693938590099958e-06, "loss": 0.5049, "step": 644 }, { "epoch": 0.14, "grad_norm": 0.14825065433979034, "learning_rate": 9.692735463011774e-06, "loss": 0.496, "step": 645 }, { "epoch": 0.14, "grad_norm": 0.17281201481819153, "learning_rate": 9.691530050773031e-06, "loss": 0.524, "step": 646 }, { "epoch": 0.14, "grad_norm": 0.227024644613266, "learning_rate": 9.690322353970708e-06, "loss": 0.5191, "step": 647 }, { "epoch": 0.14, "grad_norm": 0.16183902323246002, "learning_rate": 9.689112373192899e-06, "loss": 0.5557, "step": 648 }, { "epoch": 0.14, "grad_norm": 0.19648505747318268, "learning_rate": 9.687900109028813e-06, "loss": 0.4963, "step": 649 }, { "epoch": 0.14, "grad_norm": 0.1439117044210434, "learning_rate": 9.686685562068765e-06, "loss": 0.5512, "step": 650 }, { "epoch": 0.14, "grad_norm": 0.15333153307437897, "learning_rate": 9.685468732904187e-06, "loss": 0.4566, "step": 651 }, { "epoch": 0.14, "grad_norm": 0.16783007979393005, "learning_rate": 9.684249622127616e-06, "loss": 0.5197, "step": 652 }, { "epoch": 0.14, "grad_norm": 0.1759708672761917, "learning_rate": 9.683028230332707e-06, "loss": 0.5086, "step": 653 }, { "epoch": 0.14, "grad_norm": 0.18851730227470398, "learning_rate": 9.681804558114222e-06, "loss": 0.5563, "step": 654 }, { "epoch": 0.14, "grad_norm": 0.18417790532112122, "learning_rate": 9.680578606068037e-06, "loss": 0.5028, "step": 655 }, { "epoch": 0.14, "grad_norm": 0.1564049869775772, "learning_rate": 9.67935037479113e-06, "loss": 0.5071, "step": 656 }, { "epoch": 0.14, "grad_norm": 0.18582746386528015, "learning_rate": 9.678119864881597e-06, "loss": 0.4922, "step": 657 }, { "epoch": 0.14, "grad_norm": 0.16415338218212128, "learning_rate": 9.676887076938642e-06, "loss": 0.5226, "step": 658 }, { "epoch": 0.14, "grad_norm": 0.19364799559116364, "learning_rate": 9.675652011562576e-06, "loss": 0.5294, "step": 659 }, { "epoch": 0.14, "grad_norm": 0.19111143052577972, "learning_rate": 9.674414669354819e-06, "loss": 0.5486, "step": 660 }, { "epoch": 0.14, "grad_norm": 0.1607770472764969, "learning_rate": 9.673175050917902e-06, "loss": 0.5674, "step": 661 }, { "epoch": 0.14, "grad_norm": 0.16375142335891724, "learning_rate": 9.671933156855464e-06, "loss": 0.5305, "step": 662 }, { "epoch": 0.14, "grad_norm": 0.2223857343196869, "learning_rate": 9.67068898777225e-06, "loss": 0.5073, "step": 663 }, { "epoch": 0.14, "grad_norm": 0.15344950556755066, "learning_rate": 9.669442544274115e-06, "loss": 0.5176, "step": 664 }, { "epoch": 0.14, "grad_norm": 0.1769074946641922, "learning_rate": 9.66819382696802e-06, "loss": 0.4888, "step": 665 }, { "epoch": 0.14, "grad_norm": 0.14724504947662354, "learning_rate": 9.666942836462036e-06, "loss": 0.5251, "step": 666 }, { "epoch": 0.14, "grad_norm": 0.14657099545001984, "learning_rate": 9.665689573365336e-06, "loss": 0.5271, "step": 667 }, { "epoch": 0.14, "grad_norm": 0.16149552166461945, "learning_rate": 9.664434038288207e-06, "loss": 0.521, "step": 668 }, { "epoch": 0.14, "grad_norm": 0.17453457415103912, "learning_rate": 9.663176231842034e-06, "loss": 0.5071, "step": 669 }, { "epoch": 0.14, "grad_norm": 0.1480516791343689, "learning_rate": 9.661916154639312e-06, "loss": 0.598, "step": 670 }, { "epoch": 0.14, "grad_norm": 0.17226625978946686, "learning_rate": 9.660653807293643e-06, "loss": 0.534, "step": 671 }, { "epoch": 0.14, "grad_norm": 0.13685333728790283, "learning_rate": 9.659389190419735e-06, "loss": 0.5049, "step": 672 }, { "epoch": 0.14, "grad_norm": 0.1628991812467575, "learning_rate": 9.658122304633395e-06, "loss": 0.5246, "step": 673 }, { "epoch": 0.15, "grad_norm": 0.1640816479921341, "learning_rate": 9.656853150551543e-06, "loss": 0.5104, "step": 674 }, { "epoch": 0.15, "grad_norm": 0.16424889862537384, "learning_rate": 9.6555817287922e-06, "loss": 0.5173, "step": 675 }, { "epoch": 0.15, "grad_norm": 0.26323530077934265, "learning_rate": 9.654308039974489e-06, "loss": 0.5144, "step": 676 }, { "epoch": 0.15, "grad_norm": 0.13345208764076233, "learning_rate": 9.65303208471864e-06, "loss": 0.5294, "step": 677 }, { "epoch": 0.15, "grad_norm": 0.179282546043396, "learning_rate": 9.651753863645985e-06, "loss": 0.5211, "step": 678 }, { "epoch": 0.15, "grad_norm": 0.1983976811170578, "learning_rate": 9.650473377378961e-06, "loss": 0.5435, "step": 679 }, { "epoch": 0.15, "grad_norm": 0.18049369752407074, "learning_rate": 9.649190626541105e-06, "loss": 0.533, "step": 680 }, { "epoch": 0.15, "grad_norm": 0.16596846282482147, "learning_rate": 9.647905611757062e-06, "loss": 0.5274, "step": 681 }, { "epoch": 0.15, "grad_norm": 0.17268408834934235, "learning_rate": 9.646618333652574e-06, "loss": 0.5481, "step": 682 }, { "epoch": 0.15, "grad_norm": 0.168728306889534, "learning_rate": 9.64532879285449e-06, "loss": 0.5201, "step": 683 }, { "epoch": 0.15, "grad_norm": 0.2116057574748993, "learning_rate": 9.644036989990753e-06, "loss": 0.5107, "step": 684 }, { "epoch": 0.15, "grad_norm": 0.14726531505584717, "learning_rate": 9.642742925690417e-06, "loss": 0.5546, "step": 685 }, { "epoch": 0.15, "grad_norm": 0.17111736536026, "learning_rate": 9.641446600583632e-06, "loss": 0.5123, "step": 686 }, { "epoch": 0.15, "grad_norm": 0.17838339507579803, "learning_rate": 9.640148015301651e-06, "loss": 0.4966, "step": 687 }, { "epoch": 0.15, "grad_norm": 0.17207923531532288, "learning_rate": 9.638847170476824e-06, "loss": 0.5189, "step": 688 }, { "epoch": 0.15, "grad_norm": 0.15716849267482758, "learning_rate": 9.637544066742606e-06, "loss": 0.5553, "step": 689 }, { "epoch": 0.15, "grad_norm": 0.19608205556869507, "learning_rate": 9.636238704733547e-06, "loss": 0.5691, "step": 690 }, { "epoch": 0.15, "grad_norm": 0.15424737334251404, "learning_rate": 9.634931085085301e-06, "loss": 0.5419, "step": 691 }, { "epoch": 0.15, "grad_norm": 0.24781200289726257, "learning_rate": 9.633621208434623e-06, "loss": 0.5374, "step": 692 }, { "epoch": 0.15, "grad_norm": 0.1594979614019394, "learning_rate": 9.63230907541936e-06, "loss": 0.5093, "step": 693 }, { "epoch": 0.15, "grad_norm": 0.1622641682624817, "learning_rate": 9.630994686678462e-06, "loss": 0.5247, "step": 694 }, { "epoch": 0.15, "grad_norm": 0.19124239683151245, "learning_rate": 9.629678042851976e-06, "loss": 0.5241, "step": 695 }, { "epoch": 0.15, "grad_norm": 0.1495082974433899, "learning_rate": 9.628359144581052e-06, "loss": 0.5295, "step": 696 }, { "epoch": 0.15, "grad_norm": 0.1647813469171524, "learning_rate": 9.627037992507931e-06, "loss": 0.494, "step": 697 }, { "epoch": 0.15, "grad_norm": 0.16081197559833527, "learning_rate": 9.625714587275954e-06, "loss": 0.5414, "step": 698 }, { "epoch": 0.15, "grad_norm": 0.14257070422172546, "learning_rate": 9.624388929529563e-06, "loss": 0.5634, "step": 699 }, { "epoch": 0.15, "grad_norm": 0.1383073329925537, "learning_rate": 9.623061019914291e-06, "loss": 0.4961, "step": 700 }, { "epoch": 0.15, "grad_norm": 0.1932617723941803, "learning_rate": 9.621730859076768e-06, "loss": 0.522, "step": 701 }, { "epoch": 0.15, "grad_norm": 0.20005308091640472, "learning_rate": 9.620398447664727e-06, "loss": 0.522, "step": 702 }, { "epoch": 0.15, "grad_norm": 0.17601189017295837, "learning_rate": 9.61906378632699e-06, "loss": 0.5707, "step": 703 }, { "epoch": 0.15, "grad_norm": 0.14197023212909698, "learning_rate": 9.617726875713477e-06, "loss": 0.5194, "step": 704 }, { "epoch": 0.15, "grad_norm": 0.17921584844589233, "learning_rate": 9.616387716475203e-06, "loss": 0.5067, "step": 705 }, { "epoch": 0.15, "grad_norm": 0.1330891251564026, "learning_rate": 9.615046309264278e-06, "loss": 0.4925, "step": 706 }, { "epoch": 0.15, "grad_norm": 0.19038861989974976, "learning_rate": 9.613702654733908e-06, "loss": 0.5745, "step": 707 }, { "epoch": 0.15, "grad_norm": 0.2451518177986145, "learning_rate": 9.612356753538392e-06, "loss": 0.5799, "step": 708 }, { "epoch": 0.15, "grad_norm": 0.20882856845855713, "learning_rate": 9.611008606333121e-06, "loss": 0.4886, "step": 709 }, { "epoch": 0.15, "grad_norm": 0.170186385512352, "learning_rate": 9.609658213774584e-06, "loss": 0.5118, "step": 710 }, { "epoch": 0.15, "grad_norm": 0.15260860323905945, "learning_rate": 9.608305576520361e-06, "loss": 0.5166, "step": 711 }, { "epoch": 0.15, "grad_norm": 0.16833122074604034, "learning_rate": 9.606950695229125e-06, "loss": 0.5003, "step": 712 }, { "epoch": 0.15, "grad_norm": 0.17692722380161285, "learning_rate": 9.605593570560642e-06, "loss": 0.5378, "step": 713 }, { "epoch": 0.15, "grad_norm": 0.2011829912662506, "learning_rate": 9.60423420317577e-06, "loss": 0.531, "step": 714 }, { "epoch": 0.15, "grad_norm": 0.1459263414144516, "learning_rate": 9.602872593736461e-06, "loss": 0.5278, "step": 715 }, { "epoch": 0.15, "grad_norm": 0.15884311497211456, "learning_rate": 9.601508742905757e-06, "loss": 0.5615, "step": 716 }, { "epoch": 0.15, "grad_norm": 0.2560180127620697, "learning_rate": 9.600142651347792e-06, "loss": 0.5295, "step": 717 }, { "epoch": 0.15, "grad_norm": 0.15647375583648682, "learning_rate": 9.59877431972779e-06, "loss": 0.5028, "step": 718 }, { "epoch": 0.15, "grad_norm": 0.21782688796520233, "learning_rate": 9.597403748712067e-06, "loss": 0.4902, "step": 719 }, { "epoch": 0.16, "grad_norm": 0.16878049075603485, "learning_rate": 9.596030938968028e-06, "loss": 0.5524, "step": 720 }, { "epoch": 0.16, "grad_norm": 0.1529654562473297, "learning_rate": 9.594655891164174e-06, "loss": 0.4946, "step": 721 }, { "epoch": 0.16, "grad_norm": 0.2102820873260498, "learning_rate": 9.593278605970086e-06, "loss": 0.5093, "step": 722 }, { "epoch": 0.16, "grad_norm": 0.13754625618457794, "learning_rate": 9.591899084056444e-06, "loss": 0.55, "step": 723 }, { "epoch": 0.16, "grad_norm": 0.20235078036785126, "learning_rate": 9.590517326095012e-06, "loss": 0.5277, "step": 724 }, { "epoch": 0.16, "grad_norm": 0.20487360656261444, "learning_rate": 9.58913333275864e-06, "loss": 0.5274, "step": 725 }, { "epoch": 0.16, "grad_norm": 0.15242727100849152, "learning_rate": 9.587747104721275e-06, "loss": 0.5361, "step": 726 }, { "epoch": 0.16, "grad_norm": 0.16651783883571625, "learning_rate": 9.586358642657946e-06, "loss": 0.5422, "step": 727 }, { "epoch": 0.16, "grad_norm": 0.20768210291862488, "learning_rate": 9.58496794724477e-06, "loss": 0.5204, "step": 728 }, { "epoch": 0.16, "grad_norm": 0.13769538700580597, "learning_rate": 9.583575019158954e-06, "loss": 0.5485, "step": 729 }, { "epoch": 0.16, "grad_norm": 0.2392173558473587, "learning_rate": 9.582179859078793e-06, "loss": 0.5178, "step": 730 }, { "epoch": 0.16, "grad_norm": 0.17117203772068024, "learning_rate": 9.580782467683666e-06, "loss": 0.4959, "step": 731 }, { "epoch": 0.16, "grad_norm": 0.14463159441947937, "learning_rate": 9.579382845654038e-06, "loss": 0.5405, "step": 732 }, { "epoch": 0.16, "grad_norm": 0.15378107130527496, "learning_rate": 9.577980993671461e-06, "loss": 0.5239, "step": 733 }, { "epoch": 0.16, "grad_norm": 0.18154248595237732, "learning_rate": 9.576576912418577e-06, "loss": 0.5138, "step": 734 }, { "epoch": 0.16, "grad_norm": 0.17718815803527832, "learning_rate": 9.575170602579109e-06, "loss": 0.5281, "step": 735 }, { "epoch": 0.16, "grad_norm": 0.18913020193576813, "learning_rate": 9.573762064837866e-06, "loss": 0.4653, "step": 736 }, { "epoch": 0.16, "grad_norm": 0.16615386307239532, "learning_rate": 9.572351299880742e-06, "loss": 0.4993, "step": 737 }, { "epoch": 0.16, "grad_norm": 0.1711035966873169, "learning_rate": 9.570938308394717e-06, "loss": 0.5527, "step": 738 }, { "epoch": 0.16, "grad_norm": 0.1759718656539917, "learning_rate": 9.569523091067855e-06, "loss": 0.4892, "step": 739 }, { "epoch": 0.16, "grad_norm": 0.16556698083877563, "learning_rate": 9.568105648589299e-06, "loss": 0.512, "step": 740 }, { "epoch": 0.16, "grad_norm": 0.16739937663078308, "learning_rate": 9.566685981649283e-06, "loss": 0.5167, "step": 741 }, { "epoch": 0.16, "grad_norm": 0.16000035405158997, "learning_rate": 9.565264090939122e-06, "loss": 0.5528, "step": 742 }, { "epoch": 0.16, "grad_norm": 0.2087719887495041, "learning_rate": 9.563839977151208e-06, "loss": 0.5447, "step": 743 }, { "epoch": 0.16, "grad_norm": 0.17800335586071014, "learning_rate": 9.562413640979024e-06, "loss": 0.5615, "step": 744 }, { "epoch": 0.16, "grad_norm": 0.13852566480636597, "learning_rate": 9.56098508311713e-06, "loss": 0.5196, "step": 745 }, { "epoch": 0.16, "grad_norm": 0.15705984830856323, "learning_rate": 9.55955430426117e-06, "loss": 0.5286, "step": 746 }, { "epoch": 0.16, "grad_norm": 0.13705521821975708, "learning_rate": 9.558121305107868e-06, "loss": 0.4874, "step": 747 }, { "epoch": 0.16, "grad_norm": 0.1593395620584488, "learning_rate": 9.556686086355032e-06, "loss": 0.508, "step": 748 }, { "epoch": 0.16, "grad_norm": 0.1956239640712738, "learning_rate": 9.555248648701546e-06, "loss": 0.5165, "step": 749 }, { "epoch": 0.16, "grad_norm": 0.15301111340522766, "learning_rate": 9.553808992847377e-06, "loss": 0.5279, "step": 750 }, { "epoch": 0.16, "grad_norm": 0.1944187432527542, "learning_rate": 9.552367119493575e-06, "loss": 0.5328, "step": 751 }, { "epoch": 0.16, "grad_norm": 0.16133981943130493, "learning_rate": 9.550923029342266e-06, "loss": 0.5258, "step": 752 }, { "epoch": 0.16, "grad_norm": 0.1575002670288086, "learning_rate": 9.549476723096658e-06, "loss": 0.4785, "step": 753 }, { "epoch": 0.16, "grad_norm": 0.2158762514591217, "learning_rate": 9.548028201461034e-06, "loss": 0.5069, "step": 754 }, { "epoch": 0.16, "grad_norm": 0.1875433325767517, "learning_rate": 9.546577465140763e-06, "loss": 0.5165, "step": 755 }, { "epoch": 0.16, "grad_norm": 0.15603913366794586, "learning_rate": 9.545124514842284e-06, "loss": 0.523, "step": 756 }, { "epoch": 0.16, "grad_norm": 0.15902650356292725, "learning_rate": 9.543669351273122e-06, "loss": 0.5527, "step": 757 }, { "epoch": 0.16, "grad_norm": 0.14115546643733978, "learning_rate": 9.542211975141871e-06, "loss": 0.515, "step": 758 }, { "epoch": 0.16, "grad_norm": 0.12460020929574966, "learning_rate": 9.540752387158213e-06, "loss": 0.5186, "step": 759 }, { "epoch": 0.16, "grad_norm": 0.2988269627094269, "learning_rate": 9.5392905880329e-06, "loss": 0.5062, "step": 760 }, { "epoch": 0.16, "grad_norm": 0.1358107179403305, "learning_rate": 9.537826578477758e-06, "loss": 0.5129, "step": 761 }, { "epoch": 0.16, "grad_norm": 0.1808885931968689, "learning_rate": 9.5363603592057e-06, "loss": 0.5442, "step": 762 }, { "epoch": 0.16, "grad_norm": 0.16095423698425293, "learning_rate": 9.534891930930705e-06, "loss": 0.5632, "step": 763 }, { "epoch": 0.16, "grad_norm": 0.14927184581756592, "learning_rate": 9.53342129436783e-06, "loss": 0.5345, "step": 764 }, { "epoch": 0.16, "grad_norm": 0.17672008275985718, "learning_rate": 9.531948450233213e-06, "loss": 0.5667, "step": 765 }, { "epoch": 0.17, "grad_norm": 0.17709845304489136, "learning_rate": 9.530473399244061e-06, "loss": 0.5354, "step": 766 }, { "epoch": 0.17, "grad_norm": 0.16679351031780243, "learning_rate": 9.528996142118654e-06, "loss": 0.5584, "step": 767 }, { "epoch": 0.17, "grad_norm": 0.21075226366519928, "learning_rate": 9.527516679576353e-06, "loss": 0.4759, "step": 768 }, { "epoch": 0.17, "grad_norm": 0.15864352881908417, "learning_rate": 9.526035012337591e-06, "loss": 0.5861, "step": 769 }, { "epoch": 0.17, "grad_norm": 0.18424198031425476, "learning_rate": 9.52455114112387e-06, "loss": 0.5176, "step": 770 }, { "epoch": 0.17, "grad_norm": 0.14614816009998322, "learning_rate": 9.523065066657769e-06, "loss": 0.5267, "step": 771 }, { "epoch": 0.17, "grad_norm": 0.18655577301979065, "learning_rate": 9.52157678966294e-06, "loss": 0.5034, "step": 772 }, { "epoch": 0.17, "grad_norm": 0.1492408663034439, "learning_rate": 9.520086310864104e-06, "loss": 0.5242, "step": 773 }, { "epoch": 0.17, "grad_norm": 0.18119966983795166, "learning_rate": 9.518593630987063e-06, "loss": 0.503, "step": 774 }, { "epoch": 0.17, "grad_norm": 0.2733058035373688, "learning_rate": 9.51709875075868e-06, "loss": 0.5293, "step": 775 }, { "epoch": 0.17, "grad_norm": 0.15847504138946533, "learning_rate": 9.515601670906895e-06, "loss": 0.5012, "step": 776 }, { "epoch": 0.17, "grad_norm": 0.17875181138515472, "learning_rate": 9.51410239216072e-06, "loss": 0.4895, "step": 777 }, { "epoch": 0.17, "grad_norm": 0.19667181372642517, "learning_rate": 9.512600915250232e-06, "loss": 0.5493, "step": 778 }, { "epoch": 0.17, "grad_norm": 0.1711205095052719, "learning_rate": 9.511097240906588e-06, "loss": 0.4674, "step": 779 }, { "epoch": 0.17, "grad_norm": 0.18481481075286865, "learning_rate": 9.509591369862007e-06, "loss": 0.5166, "step": 780 }, { "epoch": 0.17, "grad_norm": 0.15598368644714355, "learning_rate": 9.50808330284978e-06, "loss": 0.5697, "step": 781 }, { "epoch": 0.17, "grad_norm": 0.19259214401245117, "learning_rate": 9.506573040604268e-06, "loss": 0.5114, "step": 782 }, { "epoch": 0.17, "grad_norm": 0.14538073539733887, "learning_rate": 9.5050605838609e-06, "loss": 0.5485, "step": 783 }, { "epoch": 0.17, "grad_norm": 0.18423911929130554, "learning_rate": 9.503545933356175e-06, "loss": 0.5254, "step": 784 }, { "epoch": 0.17, "grad_norm": 0.1563284546136856, "learning_rate": 9.50202908982766e-06, "loss": 0.5266, "step": 785 }, { "epoch": 0.17, "grad_norm": 0.16368651390075684, "learning_rate": 9.500510054013989e-06, "loss": 0.5289, "step": 786 }, { "epoch": 0.17, "grad_norm": 0.16315564513206482, "learning_rate": 9.498988826654863e-06, "loss": 0.4904, "step": 787 }, { "epoch": 0.17, "grad_norm": 0.15771108865737915, "learning_rate": 9.49746540849105e-06, "loss": 0.5132, "step": 788 }, { "epoch": 0.17, "grad_norm": 0.19994409382343292, "learning_rate": 9.49593980026439e-06, "loss": 0.5498, "step": 789 }, { "epoch": 0.17, "grad_norm": 0.13863793015480042, "learning_rate": 9.494412002717784e-06, "loss": 0.5206, "step": 790 }, { "epoch": 0.17, "grad_norm": 0.17389997839927673, "learning_rate": 9.4928820165952e-06, "loss": 0.4742, "step": 791 }, { "epoch": 0.17, "grad_norm": 0.15407484769821167, "learning_rate": 9.49134984264167e-06, "loss": 0.4783, "step": 792 }, { "epoch": 0.17, "grad_norm": 0.15034940838813782, "learning_rate": 9.489815481603297e-06, "loss": 0.5066, "step": 793 }, { "epoch": 0.17, "grad_norm": 0.14711235463619232, "learning_rate": 9.488278934227242e-06, "loss": 0.5068, "step": 794 }, { "epoch": 0.17, "grad_norm": 0.17346839606761932, "learning_rate": 9.48674020126174e-06, "loss": 0.536, "step": 795 }, { "epoch": 0.17, "grad_norm": 0.14369408786296844, "learning_rate": 9.485199283456078e-06, "loss": 0.4971, "step": 796 }, { "epoch": 0.17, "grad_norm": 0.1965474933385849, "learning_rate": 9.483656181560618e-06, "loss": 0.5791, "step": 797 }, { "epoch": 0.17, "grad_norm": 0.17605896294116974, "learning_rate": 9.48211089632678e-06, "loss": 0.5551, "step": 798 }, { "epoch": 0.17, "grad_norm": 0.1731802225112915, "learning_rate": 9.480563428507045e-06, "loss": 0.4776, "step": 799 }, { "epoch": 0.17, "grad_norm": 0.17883409559726715, "learning_rate": 9.479013778854966e-06, "loss": 0.5357, "step": 800 }, { "epoch": 0.17, "grad_norm": 0.1549665927886963, "learning_rate": 9.477461948125149e-06, "loss": 0.4987, "step": 801 }, { "epoch": 0.17, "grad_norm": 0.23310746252536774, "learning_rate": 9.475907937073265e-06, "loss": 0.5242, "step": 802 }, { "epoch": 0.17, "grad_norm": 0.21235214173793793, "learning_rate": 9.474351746456048e-06, "loss": 0.4909, "step": 803 }, { "epoch": 0.17, "grad_norm": 0.16170482337474823, "learning_rate": 9.472793377031293e-06, "loss": 0.4607, "step": 804 }, { "epoch": 0.17, "grad_norm": 0.21534408628940582, "learning_rate": 9.471232829557857e-06, "loss": 0.5182, "step": 805 }, { "epoch": 0.17, "grad_norm": 0.155525341629982, "learning_rate": 9.469670104795655e-06, "loss": 0.5337, "step": 806 }, { "epoch": 0.17, "grad_norm": 0.1875993311405182, "learning_rate": 9.468105203505661e-06, "loss": 0.4955, "step": 807 }, { "epoch": 0.17, "grad_norm": 0.1549602895975113, "learning_rate": 9.466538126449915e-06, "loss": 0.5879, "step": 808 }, { "epoch": 0.17, "grad_norm": 0.22798140347003937, "learning_rate": 9.464968874391511e-06, "loss": 0.539, "step": 809 }, { "epoch": 0.17, "grad_norm": 0.1601991057395935, "learning_rate": 9.463397448094605e-06, "loss": 0.4695, "step": 810 }, { "epoch": 0.17, "grad_norm": 0.16516649723052979, "learning_rate": 9.46182384832441e-06, "loss": 0.5621, "step": 811 }, { "epoch": 0.17, "grad_norm": 0.14943736791610718, "learning_rate": 9.460248075847199e-06, "loss": 0.5337, "step": 812 }, { "epoch": 0.18, "grad_norm": 0.1822364181280136, "learning_rate": 9.4586701314303e-06, "loss": 0.5071, "step": 813 }, { "epoch": 0.18, "grad_norm": 0.16500526666641235, "learning_rate": 9.457090015842104e-06, "loss": 0.483, "step": 814 }, { "epoch": 0.18, "grad_norm": 0.1568198800086975, "learning_rate": 9.455507729852053e-06, "loss": 0.496, "step": 815 }, { "epoch": 0.18, "grad_norm": 0.17206601798534393, "learning_rate": 9.453923274230653e-06, "loss": 0.5544, "step": 816 }, { "epoch": 0.18, "grad_norm": 0.15982304513454437, "learning_rate": 9.452336649749458e-06, "loss": 0.5124, "step": 817 }, { "epoch": 0.18, "grad_norm": 0.19488324224948883, "learning_rate": 9.450747857181084e-06, "loss": 0.4981, "step": 818 }, { "epoch": 0.18, "grad_norm": 0.23650221526622772, "learning_rate": 9.449156897299202e-06, "loss": 0.5373, "step": 819 }, { "epoch": 0.18, "grad_norm": 0.15237529575824738, "learning_rate": 9.447563770878535e-06, "loss": 0.5248, "step": 820 }, { "epoch": 0.18, "grad_norm": 0.15700353682041168, "learning_rate": 9.44596847869487e-06, "loss": 0.5289, "step": 821 }, { "epoch": 0.18, "grad_norm": 0.17049898207187653, "learning_rate": 9.444371021525036e-06, "loss": 0.5195, "step": 822 }, { "epoch": 0.18, "grad_norm": 0.18980465829372406, "learning_rate": 9.442771400146926e-06, "loss": 0.5191, "step": 823 }, { "epoch": 0.18, "grad_norm": 0.14770746231079102, "learning_rate": 9.441169615339482e-06, "loss": 0.4799, "step": 824 }, { "epoch": 0.18, "grad_norm": 0.1894197016954422, "learning_rate": 9.439565667882702e-06, "loss": 0.5771, "step": 825 }, { "epoch": 0.18, "grad_norm": 0.17405198514461517, "learning_rate": 9.437959558557635e-06, "loss": 0.5276, "step": 826 }, { "epoch": 0.18, "grad_norm": 0.2038612961769104, "learning_rate": 9.436351288146383e-06, "loss": 0.4888, "step": 827 }, { "epoch": 0.18, "grad_norm": 0.18169601261615753, "learning_rate": 9.434740857432105e-06, "loss": 0.5273, "step": 828 }, { "epoch": 0.18, "grad_norm": 0.19223563373088837, "learning_rate": 9.433128267199006e-06, "loss": 0.534, "step": 829 }, { "epoch": 0.18, "grad_norm": 0.20077872276306152, "learning_rate": 9.431513518232343e-06, "loss": 0.5153, "step": 830 }, { "epoch": 0.18, "grad_norm": 0.1688869744539261, "learning_rate": 9.429896611318428e-06, "loss": 0.5408, "step": 831 }, { "epoch": 0.18, "grad_norm": 0.24384887516498566, "learning_rate": 9.42827754724462e-06, "loss": 0.5771, "step": 832 }, { "epoch": 0.18, "grad_norm": 0.15766644477844238, "learning_rate": 9.426656326799333e-06, "loss": 0.4948, "step": 833 }, { "epoch": 0.18, "grad_norm": 0.1572624146938324, "learning_rate": 9.425032950772025e-06, "loss": 0.5612, "step": 834 }, { "epoch": 0.18, "grad_norm": 0.15511459112167358, "learning_rate": 9.42340741995321e-06, "loss": 0.544, "step": 835 }, { "epoch": 0.18, "grad_norm": 0.1777951866388321, "learning_rate": 9.421779735134446e-06, "loss": 0.5394, "step": 836 }, { "epoch": 0.18, "grad_norm": 0.2677023410797119, "learning_rate": 9.420149897108341e-06, "loss": 0.484, "step": 837 }, { "epoch": 0.18, "grad_norm": 0.1472686529159546, "learning_rate": 9.418517906668556e-06, "loss": 0.4913, "step": 838 }, { "epoch": 0.18, "grad_norm": 0.15383826196193695, "learning_rate": 9.416883764609797e-06, "loss": 0.4718, "step": 839 }, { "epoch": 0.18, "grad_norm": 0.19486670196056366, "learning_rate": 9.415247471727813e-06, "loss": 0.527, "step": 840 }, { "epoch": 0.18, "grad_norm": 0.16585233807563782, "learning_rate": 9.413609028819409e-06, "loss": 0.5039, "step": 841 }, { "epoch": 0.18, "grad_norm": 0.18775971233844757, "learning_rate": 9.41196843668243e-06, "loss": 0.4744, "step": 842 }, { "epoch": 0.18, "grad_norm": 0.16499534249305725, "learning_rate": 9.410325696115775e-06, "loss": 0.5376, "step": 843 }, { "epoch": 0.18, "grad_norm": 0.1950598657131195, "learning_rate": 9.408680807919377e-06, "loss": 0.5213, "step": 844 }, { "epoch": 0.18, "grad_norm": 0.14264388382434845, "learning_rate": 9.407033772894229e-06, "loss": 0.566, "step": 845 }, { "epoch": 0.18, "grad_norm": 0.16956187784671783, "learning_rate": 9.405384591842358e-06, "loss": 0.5058, "step": 846 }, { "epoch": 0.18, "grad_norm": 0.13649915158748627, "learning_rate": 9.403733265566848e-06, "loss": 0.4948, "step": 847 }, { "epoch": 0.18, "grad_norm": 0.1546815037727356, "learning_rate": 9.402079794871812e-06, "loss": 0.5087, "step": 848 }, { "epoch": 0.18, "grad_norm": 0.17630915343761444, "learning_rate": 9.400424180562421e-06, "loss": 0.5477, "step": 849 }, { "epoch": 0.18, "grad_norm": 0.19923992455005646, "learning_rate": 9.398766423444883e-06, "loss": 0.5332, "step": 850 }, { "epoch": 0.18, "grad_norm": 0.1514226794242859, "learning_rate": 9.397106524326449e-06, "loss": 0.5278, "step": 851 }, { "epoch": 0.18, "grad_norm": 0.17602422833442688, "learning_rate": 9.39544448401542e-06, "loss": 0.4708, "step": 852 }, { "epoch": 0.18, "grad_norm": 0.17394909262657166, "learning_rate": 9.393780303321128e-06, "loss": 0.5128, "step": 853 }, { "epoch": 0.18, "grad_norm": 0.14890971779823303, "learning_rate": 9.392113983053958e-06, "loss": 0.4967, "step": 854 }, { "epoch": 0.18, "grad_norm": 0.18306109309196472, "learning_rate": 9.390445524025336e-06, "loss": 0.4917, "step": 855 }, { "epoch": 0.18, "grad_norm": 0.16756963729858398, "learning_rate": 9.38877492704772e-06, "loss": 0.5143, "step": 856 }, { "epoch": 0.18, "grad_norm": 0.15101511776447296, "learning_rate": 9.387102192934618e-06, "loss": 0.5214, "step": 857 }, { "epoch": 0.18, "grad_norm": 0.21072083711624146, "learning_rate": 9.385427322500575e-06, "loss": 0.5188, "step": 858 }, { "epoch": 0.19, "grad_norm": 0.3193773627281189, "learning_rate": 9.38375031656118e-06, "loss": 0.5248, "step": 859 }, { "epoch": 0.19, "grad_norm": 0.17284849286079407, "learning_rate": 9.382071175933058e-06, "loss": 0.5331, "step": 860 }, { "epoch": 0.19, "grad_norm": 0.16421107947826385, "learning_rate": 9.380389901433875e-06, "loss": 0.5512, "step": 861 }, { "epoch": 0.19, "grad_norm": 0.19052369892597198, "learning_rate": 9.378706493882335e-06, "loss": 0.5485, "step": 862 }, { "epoch": 0.19, "grad_norm": 0.20467452704906464, "learning_rate": 9.377020954098181e-06, "loss": 0.5334, "step": 863 }, { "epoch": 0.19, "grad_norm": 0.14375852048397064, "learning_rate": 9.375333282902198e-06, "loss": 0.5574, "step": 864 }, { "epoch": 0.19, "grad_norm": 0.16476349532604218, "learning_rate": 9.3736434811162e-06, "loss": 0.542, "step": 865 }, { "epoch": 0.19, "grad_norm": 0.18003122508525848, "learning_rate": 9.37195154956305e-06, "loss": 0.5179, "step": 866 }, { "epoch": 0.19, "grad_norm": 0.17590996623039246, "learning_rate": 9.37025748906664e-06, "loss": 0.5528, "step": 867 }, { "epoch": 0.19, "grad_norm": 0.20183418691158295, "learning_rate": 9.368561300451902e-06, "loss": 0.544, "step": 868 }, { "epoch": 0.19, "grad_norm": 0.1688835769891739, "learning_rate": 9.366862984544802e-06, "loss": 0.4812, "step": 869 }, { "epoch": 0.19, "grad_norm": 0.21423234045505524, "learning_rate": 9.365162542172346e-06, "loss": 0.5428, "step": 870 }, { "epoch": 0.19, "grad_norm": 0.15897509455680847, "learning_rate": 9.363459974162568e-06, "loss": 0.5227, "step": 871 }, { "epoch": 0.19, "grad_norm": 0.16136378049850464, "learning_rate": 9.361755281344547e-06, "loss": 0.555, "step": 872 }, { "epoch": 0.19, "grad_norm": 0.1796402931213379, "learning_rate": 9.360048464548386e-06, "loss": 0.4782, "step": 873 }, { "epoch": 0.19, "grad_norm": 0.531697690486908, "learning_rate": 9.358339524605233e-06, "loss": 0.5207, "step": 874 }, { "epoch": 0.19, "grad_norm": 0.15121889114379883, "learning_rate": 9.356628462347264e-06, "loss": 0.4837, "step": 875 }, { "epoch": 0.19, "grad_norm": 0.17013150453567505, "learning_rate": 9.354915278607685e-06, "loss": 0.4911, "step": 876 }, { "epoch": 0.19, "grad_norm": 0.18947632610797882, "learning_rate": 9.353199974220744e-06, "loss": 0.5029, "step": 877 }, { "epoch": 0.19, "grad_norm": 0.13245789706707, "learning_rate": 9.351482550021713e-06, "loss": 0.4782, "step": 878 }, { "epoch": 0.19, "grad_norm": 0.21231511235237122, "learning_rate": 9.349763006846903e-06, "loss": 0.5535, "step": 879 }, { "epoch": 0.19, "grad_norm": 0.18766769766807556, "learning_rate": 9.348041345533653e-06, "loss": 0.5222, "step": 880 }, { "epoch": 0.19, "grad_norm": 0.16258256137371063, "learning_rate": 9.346317566920335e-06, "loss": 0.4873, "step": 881 }, { "epoch": 0.19, "grad_norm": 0.14111053943634033, "learning_rate": 9.34459167184635e-06, "loss": 0.4795, "step": 882 }, { "epoch": 0.19, "grad_norm": 0.20663069188594818, "learning_rate": 9.342863661152133e-06, "loss": 0.5221, "step": 883 }, { "epoch": 0.19, "grad_norm": 0.1376432627439499, "learning_rate": 9.341133535679145e-06, "loss": 0.464, "step": 884 }, { "epoch": 0.19, "grad_norm": 0.15190206468105316, "learning_rate": 9.33940129626988e-06, "loss": 0.5118, "step": 885 }, { "epoch": 0.19, "grad_norm": 0.16546842455863953, "learning_rate": 9.337666943767863e-06, "loss": 0.5256, "step": 886 }, { "epoch": 0.19, "grad_norm": 0.1859419345855713, "learning_rate": 9.335930479017642e-06, "loss": 0.562, "step": 887 }, { "epoch": 0.19, "grad_norm": 0.2912534475326538, "learning_rate": 9.334191902864799e-06, "loss": 0.5298, "step": 888 }, { "epoch": 0.19, "grad_norm": 0.16982656717300415, "learning_rate": 9.33245121615594e-06, "loss": 0.4953, "step": 889 }, { "epoch": 0.19, "grad_norm": 0.21246029436588287, "learning_rate": 9.330708419738704e-06, "loss": 0.5222, "step": 890 }, { "epoch": 0.19, "grad_norm": 0.3462158739566803, "learning_rate": 9.328963514461753e-06, "loss": 0.5451, "step": 891 }, { "epoch": 0.19, "grad_norm": 0.14150933921337128, "learning_rate": 9.327216501174775e-06, "loss": 0.5529, "step": 892 }, { "epoch": 0.19, "grad_norm": 0.15398851037025452, "learning_rate": 9.32546738072849e-06, "loss": 0.5258, "step": 893 }, { "epoch": 0.19, "grad_norm": 0.14066843688488007, "learning_rate": 9.323716153974639e-06, "loss": 0.5097, "step": 894 }, { "epoch": 0.19, "grad_norm": 0.1923949271440506, "learning_rate": 9.321962821765991e-06, "loss": 0.5511, "step": 895 }, { "epoch": 0.19, "grad_norm": 0.2550576627254486, "learning_rate": 9.320207384956339e-06, "loss": 0.5541, "step": 896 }, { "epoch": 0.19, "grad_norm": 0.178908571600914, "learning_rate": 9.318449844400504e-06, "loss": 0.5135, "step": 897 }, { "epoch": 0.19, "grad_norm": 0.19086679816246033, "learning_rate": 9.316690200954324e-06, "loss": 0.5143, "step": 898 }, { "epoch": 0.19, "grad_norm": 0.1290796995162964, "learning_rate": 9.31492845547467e-06, "loss": 0.502, "step": 899 }, { "epoch": 0.19, "grad_norm": 0.14759333431720734, "learning_rate": 9.313164608819434e-06, "loss": 0.5287, "step": 900 }, { "epoch": 0.19, "grad_norm": 0.158295676112175, "learning_rate": 9.311398661847526e-06, "loss": 0.56, "step": 901 }, { "epoch": 0.19, "grad_norm": 0.15422774851322174, "learning_rate": 9.309630615418884e-06, "loss": 0.5334, "step": 902 }, { "epoch": 0.19, "grad_norm": 0.16427233815193176, "learning_rate": 9.307860470394467e-06, "loss": 0.5364, "step": 903 }, { "epoch": 0.19, "grad_norm": 0.21173103153705597, "learning_rate": 9.306088227636257e-06, "loss": 0.5094, "step": 904 }, { "epoch": 0.19, "grad_norm": 0.21965515613555908, "learning_rate": 9.304313888007254e-06, "loss": 0.5219, "step": 905 }, { "epoch": 0.2, "grad_norm": 0.15674197673797607, "learning_rate": 9.302537452371482e-06, "loss": 0.5188, "step": 906 }, { "epoch": 0.2, "grad_norm": 0.19992782175540924, "learning_rate": 9.300758921593986e-06, "loss": 0.499, "step": 907 }, { "epoch": 0.2, "grad_norm": 0.17217914760112762, "learning_rate": 9.298978296540829e-06, "loss": 0.5364, "step": 908 }, { "epoch": 0.2, "grad_norm": 0.17133580148220062, "learning_rate": 9.297195578079096e-06, "loss": 0.4968, "step": 909 }, { "epoch": 0.2, "grad_norm": 0.23099388182163239, "learning_rate": 9.295410767076891e-06, "loss": 0.5252, "step": 910 }, { "epoch": 0.2, "grad_norm": 0.16104039549827576, "learning_rate": 9.293623864403336e-06, "loss": 0.4742, "step": 911 }, { "epoch": 0.2, "grad_norm": 0.12712013721466064, "learning_rate": 9.291834870928573e-06, "loss": 0.559, "step": 912 }, { "epoch": 0.2, "grad_norm": 0.17714501917362213, "learning_rate": 9.29004378752376e-06, "loss": 0.6085, "step": 913 }, { "epoch": 0.2, "grad_norm": 0.16740843653678894, "learning_rate": 9.288250615061073e-06, "loss": 0.5035, "step": 914 }, { "epoch": 0.2, "grad_norm": 0.19126859307289124, "learning_rate": 9.286455354413707e-06, "loss": 0.5777, "step": 915 }, { "epoch": 0.2, "grad_norm": 0.14088517427444458, "learning_rate": 9.284658006455871e-06, "loss": 0.5092, "step": 916 }, { "epoch": 0.2, "grad_norm": 0.14722870290279388, "learning_rate": 9.282858572062795e-06, "loss": 0.5206, "step": 917 }, { "epoch": 0.2, "grad_norm": 0.1408064216375351, "learning_rate": 9.281057052110725e-06, "loss": 0.5287, "step": 918 }, { "epoch": 0.2, "grad_norm": 0.1396157294511795, "learning_rate": 9.279253447476914e-06, "loss": 0.5116, "step": 919 }, { "epoch": 0.2, "grad_norm": 0.14657460153102875, "learning_rate": 9.27744775903964e-06, "loss": 0.5108, "step": 920 }, { "epoch": 0.2, "grad_norm": 0.17514435946941376, "learning_rate": 9.27563998767819e-06, "loss": 0.5112, "step": 921 }, { "epoch": 0.2, "grad_norm": 0.17996759712696075, "learning_rate": 9.27383013427287e-06, "loss": 0.495, "step": 922 }, { "epoch": 0.2, "grad_norm": 0.18228891491889954, "learning_rate": 9.272018199704993e-06, "loss": 0.4843, "step": 923 }, { "epoch": 0.2, "grad_norm": 0.18271513283252716, "learning_rate": 9.270204184856893e-06, "loss": 0.5625, "step": 924 }, { "epoch": 0.2, "grad_norm": 0.18662293255329132, "learning_rate": 9.26838809061191e-06, "loss": 0.5065, "step": 925 }, { "epoch": 0.2, "grad_norm": 0.15625204145908356, "learning_rate": 9.266569917854403e-06, "loss": 0.5557, "step": 926 }, { "epoch": 0.2, "grad_norm": 0.16261446475982666, "learning_rate": 9.264749667469737e-06, "loss": 0.5583, "step": 927 }, { "epoch": 0.2, "grad_norm": 0.14734329283237457, "learning_rate": 9.262927340344296e-06, "loss": 0.567, "step": 928 }, { "epoch": 0.2, "grad_norm": 0.18826404213905334, "learning_rate": 9.261102937365468e-06, "loss": 0.5309, "step": 929 }, { "epoch": 0.2, "grad_norm": 0.18732258677482605, "learning_rate": 9.259276459421655e-06, "loss": 0.525, "step": 930 }, { "epoch": 0.2, "grad_norm": 0.176020547747612, "learning_rate": 9.257447907402272e-06, "loss": 0.5187, "step": 931 }, { "epoch": 0.2, "grad_norm": 0.15038305521011353, "learning_rate": 9.255617282197739e-06, "loss": 0.5049, "step": 932 }, { "epoch": 0.2, "grad_norm": 0.15459555387496948, "learning_rate": 9.253784584699488e-06, "loss": 0.5021, "step": 933 }, { "epoch": 0.2, "grad_norm": 0.16818863153457642, "learning_rate": 9.25194981579996e-06, "loss": 0.5109, "step": 934 }, { "epoch": 0.2, "grad_norm": 0.158711776137352, "learning_rate": 9.250112976392608e-06, "loss": 0.5235, "step": 935 }, { "epoch": 0.2, "grad_norm": 0.13350459933280945, "learning_rate": 9.248274067371886e-06, "loss": 0.5624, "step": 936 }, { "epoch": 0.2, "grad_norm": 0.16148029267787933, "learning_rate": 9.24643308963326e-06, "loss": 0.5562, "step": 937 }, { "epoch": 0.2, "grad_norm": 0.17886267602443695, "learning_rate": 9.244590044073205e-06, "loss": 0.5252, "step": 938 }, { "epoch": 0.2, "grad_norm": 0.18493221700191498, "learning_rate": 9.2427449315892e-06, "loss": 0.5195, "step": 939 }, { "epoch": 0.2, "grad_norm": 0.1529918760061264, "learning_rate": 9.240897753079734e-06, "loss": 0.517, "step": 940 }, { "epoch": 0.2, "grad_norm": 0.18862253427505493, "learning_rate": 9.239048509444296e-06, "loss": 0.5214, "step": 941 }, { "epoch": 0.2, "grad_norm": 0.1629784107208252, "learning_rate": 9.237197201583386e-06, "loss": 0.5421, "step": 942 }, { "epoch": 0.2, "grad_norm": 0.2280578315258026, "learning_rate": 9.235343830398506e-06, "loss": 0.5033, "step": 943 }, { "epoch": 0.2, "grad_norm": 0.15682753920555115, "learning_rate": 9.233488396792167e-06, "loss": 0.562, "step": 944 }, { "epoch": 0.2, "grad_norm": 0.16542381048202515, "learning_rate": 9.231630901667879e-06, "loss": 0.5448, "step": 945 }, { "epoch": 0.2, "grad_norm": 0.1738227754831314, "learning_rate": 9.22977134593016e-06, "loss": 0.5662, "step": 946 }, { "epoch": 0.2, "grad_norm": 0.13837432861328125, "learning_rate": 9.227909730484527e-06, "loss": 0.5259, "step": 947 }, { "epoch": 0.2, "grad_norm": 0.1606243997812271, "learning_rate": 9.226046056237508e-06, "loss": 0.5666, "step": 948 }, { "epoch": 0.2, "grad_norm": 0.14131122827529907, "learning_rate": 9.224180324096623e-06, "loss": 0.5486, "step": 949 }, { "epoch": 0.2, "grad_norm": 0.13392627239227295, "learning_rate": 9.222312534970403e-06, "loss": 0.4792, "step": 950 }, { "epoch": 0.2, "grad_norm": 0.1538127064704895, "learning_rate": 9.220442689768376e-06, "loss": 0.484, "step": 951 }, { "epoch": 0.21, "grad_norm": 0.13414621353149414, "learning_rate": 9.218570789401071e-06, "loss": 0.5123, "step": 952 }, { "epoch": 0.21, "grad_norm": 0.17911511659622192, "learning_rate": 9.21669683478002e-06, "loss": 0.5549, "step": 953 }, { "epoch": 0.21, "grad_norm": 0.19138379395008087, "learning_rate": 9.214820826817754e-06, "loss": 0.4892, "step": 954 }, { "epoch": 0.21, "grad_norm": 0.20988555252552032, "learning_rate": 9.212942766427806e-06, "loss": 0.498, "step": 955 }, { "epoch": 0.21, "grad_norm": 0.13097749650478363, "learning_rate": 9.211062654524705e-06, "loss": 0.4603, "step": 956 }, { "epoch": 0.21, "grad_norm": 0.1466490477323532, "learning_rate": 9.20918049202398e-06, "loss": 0.4924, "step": 957 }, { "epoch": 0.21, "grad_norm": 0.23887225985527039, "learning_rate": 9.207296279842162e-06, "loss": 0.5725, "step": 958 }, { "epoch": 0.21, "grad_norm": 0.13960181176662445, "learning_rate": 9.205410018896775e-06, "loss": 0.5444, "step": 959 }, { "epoch": 0.21, "grad_norm": 0.21269811689853668, "learning_rate": 9.203521710106344e-06, "loss": 0.5672, "step": 960 }, { "epoch": 0.21, "grad_norm": 0.1967892199754715, "learning_rate": 9.201631354390391e-06, "loss": 0.5674, "step": 961 }, { "epoch": 0.21, "grad_norm": 0.20892930030822754, "learning_rate": 9.199738952669431e-06, "loss": 0.4915, "step": 962 }, { "epoch": 0.21, "grad_norm": 0.15803126990795135, "learning_rate": 9.197844505864982e-06, "loss": 0.4839, "step": 963 }, { "epoch": 0.21, "grad_norm": 0.17779715359210968, "learning_rate": 9.195948014899551e-06, "loss": 0.5204, "step": 964 }, { "epoch": 0.21, "grad_norm": 0.1472802758216858, "learning_rate": 9.194049480696647e-06, "loss": 0.5691, "step": 965 }, { "epoch": 0.21, "grad_norm": 0.19076858460903168, "learning_rate": 9.192148904180769e-06, "loss": 0.555, "step": 966 }, { "epoch": 0.21, "grad_norm": 0.15820792317390442, "learning_rate": 9.19024628627741e-06, "loss": 0.5462, "step": 967 }, { "epoch": 0.21, "grad_norm": 0.1319994479417801, "learning_rate": 9.188341627913061e-06, "loss": 0.5487, "step": 968 }, { "epoch": 0.21, "grad_norm": 0.24205906689167023, "learning_rate": 9.186434930015205e-06, "loss": 0.518, "step": 969 }, { "epoch": 0.21, "grad_norm": 0.15955299139022827, "learning_rate": 9.184526193512318e-06, "loss": 0.5596, "step": 970 }, { "epoch": 0.21, "grad_norm": 0.16520148515701294, "learning_rate": 9.182615419333867e-06, "loss": 0.5647, "step": 971 }, { "epoch": 0.21, "grad_norm": 0.2001345306634903, "learning_rate": 9.180702608410314e-06, "loss": 0.544, "step": 972 }, { "epoch": 0.21, "grad_norm": 0.17887279391288757, "learning_rate": 9.178787761673111e-06, "loss": 0.5225, "step": 973 }, { "epoch": 0.21, "grad_norm": 0.15997150540351868, "learning_rate": 9.176870880054704e-06, "loss": 0.5674, "step": 974 }, { "epoch": 0.21, "grad_norm": 0.14125515520572662, "learning_rate": 9.174951964488528e-06, "loss": 0.5542, "step": 975 }, { "epoch": 0.21, "grad_norm": 0.1298058182001114, "learning_rate": 9.173031015909005e-06, "loss": 0.5015, "step": 976 }, { "epoch": 0.21, "grad_norm": 0.17486491799354553, "learning_rate": 9.17110803525155e-06, "loss": 0.569, "step": 977 }, { "epoch": 0.21, "grad_norm": 0.18652723729610443, "learning_rate": 9.169183023452574e-06, "loss": 0.5062, "step": 978 }, { "epoch": 0.21, "grad_norm": 0.1338779628276825, "learning_rate": 9.167255981449466e-06, "loss": 0.5122, "step": 979 }, { "epoch": 0.21, "grad_norm": 0.13061174750328064, "learning_rate": 9.165326910180608e-06, "loss": 0.4903, "step": 980 }, { "epoch": 0.21, "grad_norm": 0.13457538187503815, "learning_rate": 9.163395810585374e-06, "loss": 0.5316, "step": 981 }, { "epoch": 0.21, "grad_norm": 0.14567075669765472, "learning_rate": 9.161462683604118e-06, "loss": 0.5241, "step": 982 }, { "epoch": 0.21, "grad_norm": 0.2161962240934372, "learning_rate": 9.159527530178191e-06, "loss": 0.513, "step": 983 }, { "epoch": 0.21, "grad_norm": 0.14327682554721832, "learning_rate": 9.157590351249923e-06, "loss": 0.5493, "step": 984 }, { "epoch": 0.21, "grad_norm": 0.14309169352054596, "learning_rate": 9.155651147762631e-06, "loss": 0.514, "step": 985 }, { "epoch": 0.21, "grad_norm": 0.16566166281700134, "learning_rate": 9.153709920660624e-06, "loss": 0.4916, "step": 986 }, { "epoch": 0.21, "grad_norm": 0.18244121968746185, "learning_rate": 9.151766670889186e-06, "loss": 0.5397, "step": 987 }, { "epoch": 0.21, "grad_norm": 0.1684887707233429, "learning_rate": 9.149821399394597e-06, "loss": 0.5094, "step": 988 }, { "epoch": 0.21, "grad_norm": 0.15885643661022186, "learning_rate": 9.147874107124114e-06, "loss": 0.5258, "step": 989 }, { "epoch": 0.21, "grad_norm": 0.2527085542678833, "learning_rate": 9.145924795025984e-06, "loss": 0.5456, "step": 990 }, { "epoch": 0.21, "grad_norm": 0.20791400969028473, "learning_rate": 9.14397346404943e-06, "loss": 0.5137, "step": 991 }, { "epoch": 0.21, "grad_norm": 0.18550600111484528, "learning_rate": 9.142020115144662e-06, "loss": 0.4834, "step": 992 }, { "epoch": 0.21, "grad_norm": 0.15677522122859955, "learning_rate": 9.140064749262876e-06, "loss": 0.5201, "step": 993 }, { "epoch": 0.21, "grad_norm": 0.15685126185417175, "learning_rate": 9.138107367356247e-06, "loss": 0.4838, "step": 994 }, { "epoch": 0.21, "grad_norm": 0.13539238274097443, "learning_rate": 9.136147970377926e-06, "loss": 0.5323, "step": 995 }, { "epoch": 0.21, "grad_norm": 0.18492737412452698, "learning_rate": 9.134186559282058e-06, "loss": 0.5457, "step": 996 }, { "epoch": 0.21, "grad_norm": 0.14817145466804504, "learning_rate": 9.132223135023759e-06, "loss": 0.5151, "step": 997 }, { "epoch": 0.21, "grad_norm": 0.17167848348617554, "learning_rate": 9.130257698559129e-06, "loss": 0.5397, "step": 998 }, { "epoch": 0.22, "grad_norm": 0.15762774646282196, "learning_rate": 9.128290250845244e-06, "loss": 0.527, "step": 999 }, { "epoch": 0.22, "grad_norm": 0.20650818943977356, "learning_rate": 9.126320792840165e-06, "loss": 0.5657, "step": 1000 }, { "epoch": 0.22, "grad_norm": 0.192567840218544, "learning_rate": 9.124349325502928e-06, "loss": 0.5291, "step": 1001 }, { "epoch": 0.22, "grad_norm": 0.13800346851348877, "learning_rate": 9.12237584979355e-06, "loss": 0.526, "step": 1002 }, { "epoch": 0.22, "grad_norm": 0.12781374156475067, "learning_rate": 9.120400366673024e-06, "loss": 0.5068, "step": 1003 }, { "epoch": 0.22, "grad_norm": 0.1455235779285431, "learning_rate": 9.11842287710332e-06, "loss": 0.4949, "step": 1004 }, { "epoch": 0.22, "grad_norm": 0.16621056199073792, "learning_rate": 9.116443382047391e-06, "loss": 0.5166, "step": 1005 }, { "epoch": 0.22, "grad_norm": 0.19221191108226776, "learning_rate": 9.114461882469154e-06, "loss": 0.5088, "step": 1006 }, { "epoch": 0.22, "grad_norm": 0.15902382135391235, "learning_rate": 9.112478379333517e-06, "loss": 0.5388, "step": 1007 }, { "epoch": 0.22, "grad_norm": 0.13084392249584198, "learning_rate": 9.110492873606351e-06, "loss": 0.4672, "step": 1008 }, { "epoch": 0.22, "grad_norm": 0.15393121540546417, "learning_rate": 9.108505366254512e-06, "loss": 0.5063, "step": 1009 }, { "epoch": 0.22, "grad_norm": 0.16303934156894684, "learning_rate": 9.106515858245825e-06, "loss": 0.545, "step": 1010 }, { "epoch": 0.22, "grad_norm": 0.16543173789978027, "learning_rate": 9.10452435054909e-06, "loss": 0.5345, "step": 1011 }, { "epoch": 0.22, "grad_norm": 0.16311848163604736, "learning_rate": 9.102530844134084e-06, "loss": 0.4611, "step": 1012 }, { "epoch": 0.22, "grad_norm": 0.16494883596897125, "learning_rate": 9.10053533997155e-06, "loss": 0.4955, "step": 1013 }, { "epoch": 0.22, "grad_norm": 0.14451864361763, "learning_rate": 9.098537839033213e-06, "loss": 0.4997, "step": 1014 }, { "epoch": 0.22, "grad_norm": 0.20046649873256683, "learning_rate": 9.096538342291763e-06, "loss": 0.5718, "step": 1015 }, { "epoch": 0.22, "grad_norm": 0.1361169070005417, "learning_rate": 9.094536850720867e-06, "loss": 0.4561, "step": 1016 }, { "epoch": 0.22, "grad_norm": 0.1675615757703781, "learning_rate": 9.09253336529516e-06, "loss": 0.5372, "step": 1017 }, { "epoch": 0.22, "grad_norm": 0.22339864075183868, "learning_rate": 9.090527886990249e-06, "loss": 0.5611, "step": 1018 }, { "epoch": 0.22, "grad_norm": 0.17522381246089935, "learning_rate": 9.088520416782712e-06, "loss": 0.5352, "step": 1019 }, { "epoch": 0.22, "grad_norm": 0.13996882736682892, "learning_rate": 9.086510955650095e-06, "loss": 0.4947, "step": 1020 }, { "epoch": 0.22, "grad_norm": 0.15913517773151398, "learning_rate": 9.084499504570918e-06, "loss": 0.4947, "step": 1021 }, { "epoch": 0.22, "grad_norm": 0.17235067486763, "learning_rate": 9.082486064524663e-06, "loss": 0.53, "step": 1022 }, { "epoch": 0.22, "grad_norm": 0.2162034660577774, "learning_rate": 9.080470636491787e-06, "loss": 0.4904, "step": 1023 }, { "epoch": 0.22, "grad_norm": 0.21353678405284882, "learning_rate": 9.078453221453714e-06, "loss": 0.5088, "step": 1024 }, { "epoch": 0.22, "grad_norm": 0.1277047097682953, "learning_rate": 9.076433820392831e-06, "loss": 0.5207, "step": 1025 }, { "epoch": 0.22, "grad_norm": 0.15845198929309845, "learning_rate": 9.074412434292496e-06, "loss": 0.5951, "step": 1026 }, { "epoch": 0.22, "grad_norm": 0.17977949976921082, "learning_rate": 9.072389064137035e-06, "loss": 0.5098, "step": 1027 }, { "epoch": 0.22, "grad_norm": 0.15521718561649323, "learning_rate": 9.070363710911736e-06, "loss": 0.5513, "step": 1028 }, { "epoch": 0.22, "grad_norm": 0.14528630673885345, "learning_rate": 9.068336375602853e-06, "loss": 0.4895, "step": 1029 }, { "epoch": 0.22, "grad_norm": 0.16791880130767822, "learning_rate": 9.066307059197612e-06, "loss": 0.528, "step": 1030 }, { "epoch": 0.22, "grad_norm": 0.1570877581834793, "learning_rate": 9.064275762684194e-06, "loss": 0.4957, "step": 1031 }, { "epoch": 0.22, "grad_norm": 0.130596324801445, "learning_rate": 9.062242487051752e-06, "loss": 0.5338, "step": 1032 }, { "epoch": 0.22, "grad_norm": 0.14908380806446075, "learning_rate": 9.060207233290396e-06, "loss": 0.5295, "step": 1033 }, { "epoch": 0.22, "grad_norm": 0.18400724232196808, "learning_rate": 9.058170002391205e-06, "loss": 0.5265, "step": 1034 }, { "epoch": 0.22, "grad_norm": 0.1491273045539856, "learning_rate": 9.05613079534622e-06, "loss": 0.4974, "step": 1035 }, { "epoch": 0.22, "grad_norm": 0.1835760623216629, "learning_rate": 9.05408961314844e-06, "loss": 0.5317, "step": 1036 }, { "epoch": 0.22, "grad_norm": 0.14263573288917542, "learning_rate": 9.052046456791829e-06, "loss": 0.4928, "step": 1037 }, { "epoch": 0.22, "grad_norm": 0.3876129686832428, "learning_rate": 9.050001327271314e-06, "loss": 0.5149, "step": 1038 }, { "epoch": 0.22, "grad_norm": 0.16249504685401917, "learning_rate": 9.04795422558278e-06, "loss": 0.5251, "step": 1039 }, { "epoch": 0.22, "grad_norm": 0.16931766271591187, "learning_rate": 9.045905152723074e-06, "loss": 0.5532, "step": 1040 }, { "epoch": 0.22, "grad_norm": 0.1582767814397812, "learning_rate": 9.043854109689998e-06, "loss": 0.4976, "step": 1041 }, { "epoch": 0.22, "grad_norm": 0.15859778225421906, "learning_rate": 9.041801097482323e-06, "loss": 0.4995, "step": 1042 }, { "epoch": 0.22, "grad_norm": 0.18055035173892975, "learning_rate": 9.03974611709977e-06, "loss": 0.493, "step": 1043 }, { "epoch": 0.22, "grad_norm": 0.16349811851978302, "learning_rate": 9.037689169543024e-06, "loss": 0.5102, "step": 1044 }, { "epoch": 0.23, "grad_norm": 0.19477395713329315, "learning_rate": 9.035630255813724e-06, "loss": 0.5361, "step": 1045 }, { "epoch": 0.23, "grad_norm": 0.2538851499557495, "learning_rate": 9.033569376914467e-06, "loss": 0.5118, "step": 1046 }, { "epoch": 0.23, "grad_norm": 0.16743601858615875, "learning_rate": 9.031506533848811e-06, "loss": 0.5127, "step": 1047 }, { "epoch": 0.23, "grad_norm": 0.1517488956451416, "learning_rate": 9.029441727621267e-06, "loss": 0.4791, "step": 1048 }, { "epoch": 0.23, "grad_norm": 0.17050126194953918, "learning_rate": 9.0273749592373e-06, "loss": 0.5652, "step": 1049 }, { "epoch": 0.23, "grad_norm": 0.20682963728904724, "learning_rate": 9.025306229703334e-06, "loss": 0.5183, "step": 1050 }, { "epoch": 0.23, "grad_norm": 0.16146351397037506, "learning_rate": 9.02323554002675e-06, "loss": 0.5112, "step": 1051 }, { "epoch": 0.23, "grad_norm": 0.23130019009113312, "learning_rate": 9.021162891215879e-06, "loss": 0.5573, "step": 1052 }, { "epoch": 0.23, "grad_norm": 0.15757335722446442, "learning_rate": 9.019088284280004e-06, "loss": 0.5232, "step": 1053 }, { "epoch": 0.23, "grad_norm": 0.14029166102409363, "learning_rate": 9.017011720229368e-06, "loss": 0.5329, "step": 1054 }, { "epoch": 0.23, "grad_norm": 0.14857496321201324, "learning_rate": 9.014933200075165e-06, "loss": 0.514, "step": 1055 }, { "epoch": 0.23, "grad_norm": 0.17802828550338745, "learning_rate": 9.012852724829539e-06, "loss": 0.5324, "step": 1056 }, { "epoch": 0.23, "grad_norm": 0.18392032384872437, "learning_rate": 9.010770295505587e-06, "loss": 0.603, "step": 1057 }, { "epoch": 0.23, "grad_norm": 0.13357198238372803, "learning_rate": 9.008685913117361e-06, "loss": 0.4848, "step": 1058 }, { "epoch": 0.23, "grad_norm": 0.2151726484298706, "learning_rate": 9.006599578679859e-06, "loss": 0.4963, "step": 1059 }, { "epoch": 0.23, "grad_norm": 0.1715989112854004, "learning_rate": 9.00451129320903e-06, "loss": 0.5639, "step": 1060 }, { "epoch": 0.23, "grad_norm": 0.19878040254116058, "learning_rate": 9.002421057721781e-06, "loss": 0.5452, "step": 1061 }, { "epoch": 0.23, "grad_norm": 0.16640903055667877, "learning_rate": 9.000328873235955e-06, "loss": 0.5471, "step": 1062 }, { "epoch": 0.23, "grad_norm": 0.15267455577850342, "learning_rate": 8.998234740770358e-06, "loss": 0.545, "step": 1063 }, { "epoch": 0.23, "grad_norm": 0.1756962686777115, "learning_rate": 8.996138661344734e-06, "loss": 0.5793, "step": 1064 }, { "epoch": 0.23, "grad_norm": 0.1579316258430481, "learning_rate": 8.994040635979779e-06, "loss": 0.466, "step": 1065 }, { "epoch": 0.23, "grad_norm": 0.14408744871616364, "learning_rate": 8.99194066569714e-06, "loss": 0.5637, "step": 1066 }, { "epoch": 0.23, "grad_norm": 0.20260116457939148, "learning_rate": 8.989838751519404e-06, "loss": 0.5361, "step": 1067 }, { "epoch": 0.23, "grad_norm": 0.17308081686496735, "learning_rate": 8.987734894470111e-06, "loss": 0.5083, "step": 1068 }, { "epoch": 0.23, "grad_norm": 0.21290896832942963, "learning_rate": 8.985629095573743e-06, "loss": 0.5312, "step": 1069 }, { "epoch": 0.23, "grad_norm": 0.15569837391376495, "learning_rate": 8.983521355855731e-06, "loss": 0.5513, "step": 1070 }, { "epoch": 0.23, "grad_norm": 0.169041246175766, "learning_rate": 8.98141167634245e-06, "loss": 0.5262, "step": 1071 }, { "epoch": 0.23, "grad_norm": 0.15449997782707214, "learning_rate": 8.979300058061214e-06, "loss": 0.5301, "step": 1072 }, { "epoch": 0.23, "grad_norm": 0.15848426520824432, "learning_rate": 8.977186502040288e-06, "loss": 0.556, "step": 1073 }, { "epoch": 0.23, "grad_norm": 0.1425653100013733, "learning_rate": 8.97507100930888e-06, "loss": 0.489, "step": 1074 }, { "epoch": 0.23, "grad_norm": 0.1488298773765564, "learning_rate": 8.97295358089714e-06, "loss": 0.5091, "step": 1075 }, { "epoch": 0.23, "grad_norm": 0.2116803079843521, "learning_rate": 8.97083421783616e-06, "loss": 0.5654, "step": 1076 }, { "epoch": 0.23, "grad_norm": 0.17678038775920868, "learning_rate": 8.96871292115797e-06, "loss": 0.5485, "step": 1077 }, { "epoch": 0.23, "grad_norm": 0.2219185084104538, "learning_rate": 8.96658969189555e-06, "loss": 0.5414, "step": 1078 }, { "epoch": 0.23, "grad_norm": 0.18654341995716095, "learning_rate": 8.964464531082817e-06, "loss": 0.4603, "step": 1079 }, { "epoch": 0.23, "grad_norm": 0.29177331924438477, "learning_rate": 8.962337439754627e-06, "loss": 0.5267, "step": 1080 }, { "epoch": 0.23, "grad_norm": 0.15607115626335144, "learning_rate": 8.960208418946778e-06, "loss": 0.5295, "step": 1081 }, { "epoch": 0.23, "grad_norm": 0.161067396402359, "learning_rate": 8.958077469696007e-06, "loss": 0.5795, "step": 1082 }, { "epoch": 0.23, "grad_norm": 0.1314525008201599, "learning_rate": 8.955944593039991e-06, "loss": 0.5274, "step": 1083 }, { "epoch": 0.23, "grad_norm": 0.1945776492357254, "learning_rate": 8.953809790017342e-06, "loss": 0.4744, "step": 1084 }, { "epoch": 0.23, "grad_norm": 0.1876978725194931, "learning_rate": 8.951673061667616e-06, "loss": 0.5036, "step": 1085 }, { "epoch": 0.23, "grad_norm": 0.1536783128976822, "learning_rate": 8.949534409031305e-06, "loss": 0.5387, "step": 1086 }, { "epoch": 0.23, "grad_norm": 0.15228869020938873, "learning_rate": 8.94739383314983e-06, "loss": 0.4566, "step": 1087 }, { "epoch": 0.23, "grad_norm": 0.17565909028053284, "learning_rate": 8.94525133506556e-06, "loss": 0.4965, "step": 1088 }, { "epoch": 0.23, "grad_norm": 0.17287708818912506, "learning_rate": 8.943106915821793e-06, "loss": 0.505, "step": 1089 }, { "epoch": 0.23, "grad_norm": 0.13172249495983124, "learning_rate": 8.940960576462763e-06, "loss": 0.522, "step": 1090 }, { "epoch": 0.24, "grad_norm": 0.14747697114944458, "learning_rate": 8.938812318033646e-06, "loss": 0.5058, "step": 1091 }, { "epoch": 0.24, "grad_norm": 0.16435351967811584, "learning_rate": 8.93666214158054e-06, "loss": 0.5572, "step": 1092 }, { "epoch": 0.24, "grad_norm": 0.12998394668102264, "learning_rate": 8.93451004815049e-06, "loss": 0.4825, "step": 1093 }, { "epoch": 0.24, "grad_norm": 0.2101740837097168, "learning_rate": 8.932356038791465e-06, "loss": 0.5399, "step": 1094 }, { "epoch": 0.24, "grad_norm": 0.14743265509605408, "learning_rate": 8.930200114552371e-06, "loss": 0.4891, "step": 1095 }, { "epoch": 0.24, "grad_norm": 0.19330647587776184, "learning_rate": 8.928042276483048e-06, "loss": 0.5756, "step": 1096 }, { "epoch": 0.24, "grad_norm": 0.14885154366493225, "learning_rate": 8.925882525634262e-06, "loss": 0.4704, "step": 1097 }, { "epoch": 0.24, "grad_norm": 0.17634066939353943, "learning_rate": 8.923720863057718e-06, "loss": 0.4969, "step": 1098 }, { "epoch": 0.24, "grad_norm": 0.16363896429538727, "learning_rate": 8.921557289806045e-06, "loss": 0.5074, "step": 1099 }, { "epoch": 0.24, "grad_norm": 0.20823244750499725, "learning_rate": 8.919391806932807e-06, "loss": 0.5217, "step": 1100 }, { "epoch": 0.24, "grad_norm": 0.16124127805233002, "learning_rate": 8.917224415492497e-06, "loss": 0.4827, "step": 1101 }, { "epoch": 0.24, "grad_norm": 0.16462095081806183, "learning_rate": 8.915055116540538e-06, "loss": 0.5878, "step": 1102 }, { "epoch": 0.24, "grad_norm": 0.1553676277399063, "learning_rate": 8.912883911133276e-06, "loss": 0.4883, "step": 1103 }, { "epoch": 0.24, "grad_norm": 0.17461282014846802, "learning_rate": 8.910710800327996e-06, "loss": 0.4893, "step": 1104 }, { "epoch": 0.24, "grad_norm": 0.179164856672287, "learning_rate": 8.908535785182902e-06, "loss": 0.4993, "step": 1105 }, { "epoch": 0.24, "grad_norm": 0.16661059856414795, "learning_rate": 8.906358866757128e-06, "loss": 0.4797, "step": 1106 }, { "epoch": 0.24, "grad_norm": 0.15980976819992065, "learning_rate": 8.904180046110736e-06, "loss": 0.5167, "step": 1107 }, { "epoch": 0.24, "grad_norm": 0.15015141665935516, "learning_rate": 8.901999324304713e-06, "loss": 0.4971, "step": 1108 }, { "epoch": 0.24, "grad_norm": 0.15872696042060852, "learning_rate": 8.899816702400973e-06, "loss": 0.5469, "step": 1109 }, { "epoch": 0.24, "grad_norm": 0.13243776559829712, "learning_rate": 8.897632181462354e-06, "loss": 0.5135, "step": 1110 }, { "epoch": 0.24, "grad_norm": 0.1544090360403061, "learning_rate": 8.895445762552618e-06, "loss": 0.4792, "step": 1111 }, { "epoch": 0.24, "grad_norm": 0.15280136466026306, "learning_rate": 8.893257446736455e-06, "loss": 0.4888, "step": 1112 }, { "epoch": 0.24, "grad_norm": 0.14897377789020538, "learning_rate": 8.891067235079473e-06, "loss": 0.4846, "step": 1113 }, { "epoch": 0.24, "grad_norm": 0.21805572509765625, "learning_rate": 8.888875128648208e-06, "loss": 0.5184, "step": 1114 }, { "epoch": 0.24, "grad_norm": 0.15725421905517578, "learning_rate": 8.886681128510118e-06, "loss": 0.5857, "step": 1115 }, { "epoch": 0.24, "grad_norm": 0.1463284194469452, "learning_rate": 8.884485235733579e-06, "loss": 0.4969, "step": 1116 }, { "epoch": 0.24, "grad_norm": 0.1490708589553833, "learning_rate": 8.882287451387894e-06, "loss": 0.5814, "step": 1117 }, { "epoch": 0.24, "grad_norm": 0.20178869366645813, "learning_rate": 8.880087776543287e-06, "loss": 0.5091, "step": 1118 }, { "epoch": 0.24, "grad_norm": 0.1965067982673645, "learning_rate": 8.877886212270897e-06, "loss": 0.4933, "step": 1119 }, { "epoch": 0.24, "grad_norm": 0.16523069143295288, "learning_rate": 8.875682759642786e-06, "loss": 0.5445, "step": 1120 }, { "epoch": 0.24, "grad_norm": 0.1690714955329895, "learning_rate": 8.873477419731938e-06, "loss": 0.5567, "step": 1121 }, { "epoch": 0.24, "grad_norm": 0.18909381330013275, "learning_rate": 8.871270193612254e-06, "loss": 0.5133, "step": 1122 }, { "epoch": 0.24, "grad_norm": 0.1338469237089157, "learning_rate": 8.869061082358555e-06, "loss": 0.4958, "step": 1123 }, { "epoch": 0.24, "grad_norm": 0.1580471694469452, "learning_rate": 8.866850087046574e-06, "loss": 0.5595, "step": 1124 }, { "epoch": 0.24, "grad_norm": 0.1788654625415802, "learning_rate": 8.864637208752972e-06, "loss": 0.5481, "step": 1125 }, { "epoch": 0.24, "grad_norm": 0.20803380012512207, "learning_rate": 8.862422448555317e-06, "loss": 0.5478, "step": 1126 }, { "epoch": 0.24, "grad_norm": 0.19867488741874695, "learning_rate": 8.860205807532097e-06, "loss": 0.4927, "step": 1127 }, { "epoch": 0.24, "grad_norm": 0.13807149231433868, "learning_rate": 8.857987286762718e-06, "loss": 0.5021, "step": 1128 }, { "epoch": 0.24, "grad_norm": 0.15068547427654266, "learning_rate": 8.8557668873275e-06, "loss": 0.4993, "step": 1129 }, { "epoch": 0.24, "grad_norm": 0.14488062262535095, "learning_rate": 8.853544610307675e-06, "loss": 0.4815, "step": 1130 }, { "epoch": 0.24, "grad_norm": 0.15107618272304535, "learning_rate": 8.851320456785394e-06, "loss": 0.5086, "step": 1131 }, { "epoch": 0.24, "grad_norm": 0.16421128809452057, "learning_rate": 8.84909442784372e-06, "loss": 0.4844, "step": 1132 }, { "epoch": 0.24, "grad_norm": 0.17027032375335693, "learning_rate": 8.846866524566624e-06, "loss": 0.4721, "step": 1133 }, { "epoch": 0.24, "grad_norm": 0.2614370584487915, "learning_rate": 8.844636748038999e-06, "loss": 0.5745, "step": 1134 }, { "epoch": 0.24, "grad_norm": 0.15496228635311127, "learning_rate": 8.842405099346645e-06, "loss": 0.5499, "step": 1135 }, { "epoch": 0.24, "grad_norm": 0.1893419474363327, "learning_rate": 8.840171579576273e-06, "loss": 0.4691, "step": 1136 }, { "epoch": 0.24, "grad_norm": 0.13554450869560242, "learning_rate": 8.837936189815507e-06, "loss": 0.54, "step": 1137 }, { "epoch": 0.25, "grad_norm": 0.12900054454803467, "learning_rate": 8.83569893115288e-06, "loss": 0.479, "step": 1138 }, { "epoch": 0.25, "grad_norm": 0.1491711586713791, "learning_rate": 8.83345980467784e-06, "loss": 0.5322, "step": 1139 }, { "epoch": 0.25, "grad_norm": 0.16243106126785278, "learning_rate": 8.831218811480735e-06, "loss": 0.4434, "step": 1140 }, { "epoch": 0.25, "grad_norm": 0.16812367737293243, "learning_rate": 8.828975952652833e-06, "loss": 0.5024, "step": 1141 }, { "epoch": 0.25, "grad_norm": 0.1857740879058838, "learning_rate": 8.8267312292863e-06, "loss": 0.5696, "step": 1142 }, { "epoch": 0.25, "grad_norm": 0.13055641949176788, "learning_rate": 8.824484642474217e-06, "loss": 0.4787, "step": 1143 }, { "epoch": 0.25, "grad_norm": 0.17672252655029297, "learning_rate": 8.822236193310574e-06, "loss": 0.5788, "step": 1144 }, { "epoch": 0.25, "grad_norm": 0.15305279195308685, "learning_rate": 8.81998588289026e-06, "loss": 0.503, "step": 1145 }, { "epoch": 0.25, "grad_norm": 0.15624657273292542, "learning_rate": 8.817733712309078e-06, "loss": 0.5346, "step": 1146 }, { "epoch": 0.25, "grad_norm": 0.14786425232887268, "learning_rate": 8.815479682663729e-06, "loss": 0.5083, "step": 1147 }, { "epoch": 0.25, "grad_norm": 0.19573761522769928, "learning_rate": 8.813223795051828e-06, "loss": 0.5298, "step": 1148 }, { "epoch": 0.25, "grad_norm": 0.1662847250699997, "learning_rate": 8.810966050571888e-06, "loss": 0.533, "step": 1149 }, { "epoch": 0.25, "grad_norm": 0.1873636543750763, "learning_rate": 8.80870645032333e-06, "loss": 0.4825, "step": 1150 }, { "epoch": 0.25, "grad_norm": 0.1731029748916626, "learning_rate": 8.806444995406475e-06, "loss": 0.488, "step": 1151 }, { "epoch": 0.25, "grad_norm": 0.18040412664413452, "learning_rate": 8.804181686922555e-06, "loss": 0.5282, "step": 1152 }, { "epoch": 0.25, "grad_norm": 0.15593977272510529, "learning_rate": 8.801916525973696e-06, "loss": 0.5124, "step": 1153 }, { "epoch": 0.25, "grad_norm": 0.15248659253120422, "learning_rate": 8.799649513662926e-06, "loss": 0.513, "step": 1154 }, { "epoch": 0.25, "grad_norm": 0.14471983909606934, "learning_rate": 8.797380651094182e-06, "loss": 0.504, "step": 1155 }, { "epoch": 0.25, "grad_norm": 0.1660238355398178, "learning_rate": 8.795109939372298e-06, "loss": 0.5266, "step": 1156 }, { "epoch": 0.25, "grad_norm": 0.15838298201560974, "learning_rate": 8.792837379603005e-06, "loss": 0.5438, "step": 1157 }, { "epoch": 0.25, "grad_norm": 0.17816348373889923, "learning_rate": 8.79056297289294e-06, "loss": 0.5428, "step": 1158 }, { "epoch": 0.25, "grad_norm": 0.1319669485092163, "learning_rate": 8.788286720349638e-06, "loss": 0.5487, "step": 1159 }, { "epoch": 0.25, "grad_norm": 0.14675050973892212, "learning_rate": 8.786008623081526e-06, "loss": 0.5409, "step": 1160 }, { "epoch": 0.25, "grad_norm": 0.16564631462097168, "learning_rate": 8.783728682197935e-06, "loss": 0.5405, "step": 1161 }, { "epoch": 0.25, "grad_norm": 0.1422412395477295, "learning_rate": 8.781446898809101e-06, "loss": 0.5069, "step": 1162 }, { "epoch": 0.25, "grad_norm": 0.16499634087085724, "learning_rate": 8.77916327402614e-06, "loss": 0.5038, "step": 1163 }, { "epoch": 0.25, "grad_norm": 0.19437891244888306, "learning_rate": 8.776877808961082e-06, "loss": 0.5249, "step": 1164 }, { "epoch": 0.25, "grad_norm": 0.16480234265327454, "learning_rate": 8.774590504726842e-06, "loss": 0.5104, "step": 1165 }, { "epoch": 0.25, "grad_norm": 0.12336334586143494, "learning_rate": 8.772301362437233e-06, "loss": 0.497, "step": 1166 }, { "epoch": 0.25, "grad_norm": 0.19107873737812042, "learning_rate": 8.770010383206967e-06, "loss": 0.5441, "step": 1167 }, { "epoch": 0.25, "grad_norm": 0.16102471947669983, "learning_rate": 8.767717568151643e-06, "loss": 0.4736, "step": 1168 }, { "epoch": 0.25, "grad_norm": 0.14254657924175262, "learning_rate": 8.765422918387764e-06, "loss": 0.5339, "step": 1169 }, { "epoch": 0.25, "grad_norm": 0.1567242443561554, "learning_rate": 8.763126435032717e-06, "loss": 0.5516, "step": 1170 }, { "epoch": 0.25, "grad_norm": 0.16098615527153015, "learning_rate": 8.760828119204787e-06, "loss": 0.5642, "step": 1171 }, { "epoch": 0.25, "grad_norm": 0.16631126403808594, "learning_rate": 8.758527972023151e-06, "loss": 0.4856, "step": 1172 }, { "epoch": 0.25, "grad_norm": 0.15367335081100464, "learning_rate": 8.756225994607877e-06, "loss": 0.5066, "step": 1173 }, { "epoch": 0.25, "grad_norm": 0.14037656784057617, "learning_rate": 8.753922188079923e-06, "loss": 0.5029, "step": 1174 }, { "epoch": 0.25, "grad_norm": 0.15949761867523193, "learning_rate": 8.75161655356114e-06, "loss": 0.4636, "step": 1175 }, { "epoch": 0.25, "grad_norm": 0.1654081493616104, "learning_rate": 8.749309092174267e-06, "loss": 0.5005, "step": 1176 }, { "epoch": 0.25, "grad_norm": 0.2345263659954071, "learning_rate": 8.746999805042932e-06, "loss": 0.5147, "step": 1177 }, { "epoch": 0.25, "grad_norm": 0.13465999066829681, "learning_rate": 8.744688693291658e-06, "loss": 0.4982, "step": 1178 }, { "epoch": 0.25, "grad_norm": 0.1473112851381302, "learning_rate": 8.74237575804585e-06, "loss": 0.4857, "step": 1179 }, { "epoch": 0.25, "grad_norm": 0.18562085926532745, "learning_rate": 8.740061000431805e-06, "loss": 0.505, "step": 1180 }, { "epoch": 0.25, "grad_norm": 0.15015870332717896, "learning_rate": 8.737744421576702e-06, "loss": 0.5246, "step": 1181 }, { "epoch": 0.25, "grad_norm": 0.16794438660144806, "learning_rate": 8.735426022608611e-06, "loss": 0.5393, "step": 1182 }, { "epoch": 0.25, "grad_norm": 0.15591543912887573, "learning_rate": 8.73310580465649e-06, "loss": 0.4964, "step": 1183 }, { "epoch": 0.26, "grad_norm": 0.2005312144756317, "learning_rate": 8.73078376885018e-06, "loss": 0.5, "step": 1184 }, { "epoch": 0.26, "grad_norm": 0.15269523859024048, "learning_rate": 8.728459916320406e-06, "loss": 0.509, "step": 1185 }, { "epoch": 0.26, "grad_norm": 0.14824025332927704, "learning_rate": 8.726134248198782e-06, "loss": 0.5186, "step": 1186 }, { "epoch": 0.26, "grad_norm": 0.15085245668888092, "learning_rate": 8.723806765617801e-06, "loss": 0.4852, "step": 1187 }, { "epoch": 0.26, "grad_norm": 0.1564967930316925, "learning_rate": 8.721477469710845e-06, "loss": 0.5095, "step": 1188 }, { "epoch": 0.26, "grad_norm": 0.1731698215007782, "learning_rate": 8.719146361612172e-06, "loss": 0.5231, "step": 1189 }, { "epoch": 0.26, "grad_norm": 0.18087385594844818, "learning_rate": 8.71681344245693e-06, "loss": 0.5556, "step": 1190 }, { "epoch": 0.26, "grad_norm": 0.1844499558210373, "learning_rate": 8.714478713381144e-06, "loss": 0.5893, "step": 1191 }, { "epoch": 0.26, "grad_norm": 0.12835489213466644, "learning_rate": 8.712142175521723e-06, "loss": 0.4653, "step": 1192 }, { "epoch": 0.26, "grad_norm": 0.1417992115020752, "learning_rate": 8.709803830016454e-06, "loss": 0.5421, "step": 1193 }, { "epoch": 0.26, "grad_norm": 0.13503408432006836, "learning_rate": 8.707463678004004e-06, "loss": 0.5036, "step": 1194 }, { "epoch": 0.26, "grad_norm": 0.1597423255443573, "learning_rate": 8.705121720623927e-06, "loss": 0.5046, "step": 1195 }, { "epoch": 0.26, "grad_norm": 0.1646643579006195, "learning_rate": 8.702777959016647e-06, "loss": 0.5126, "step": 1196 }, { "epoch": 0.26, "grad_norm": 0.18008291721343994, "learning_rate": 8.700432394323471e-06, "loss": 0.5419, "step": 1197 }, { "epoch": 0.26, "grad_norm": 0.14976496994495392, "learning_rate": 8.698085027686581e-06, "loss": 0.5095, "step": 1198 }, { "epoch": 0.26, "grad_norm": 0.16157154738903046, "learning_rate": 8.695735860249041e-06, "loss": 0.5152, "step": 1199 }, { "epoch": 0.26, "grad_norm": 0.16819888353347778, "learning_rate": 8.69338489315479e-06, "loss": 0.5401, "step": 1200 }, { "epoch": 0.26, "grad_norm": 0.16953587532043457, "learning_rate": 8.691032127548643e-06, "loss": 0.5177, "step": 1201 }, { "epoch": 0.26, "grad_norm": 0.15358132123947144, "learning_rate": 8.68867756457629e-06, "loss": 0.547, "step": 1202 }, { "epoch": 0.26, "grad_norm": 0.13902026414871216, "learning_rate": 8.686321205384296e-06, "loss": 0.5487, "step": 1203 }, { "epoch": 0.26, "grad_norm": 0.1606639176607132, "learning_rate": 8.683963051120103e-06, "loss": 0.4611, "step": 1204 }, { "epoch": 0.26, "grad_norm": 0.14703510701656342, "learning_rate": 8.681603102932026e-06, "loss": 0.4999, "step": 1205 }, { "epoch": 0.26, "grad_norm": 0.19730383157730103, "learning_rate": 8.679241361969252e-06, "loss": 0.4937, "step": 1206 }, { "epoch": 0.26, "grad_norm": 0.1710227131843567, "learning_rate": 8.676877829381843e-06, "loss": 0.5255, "step": 1207 }, { "epoch": 0.26, "grad_norm": 0.18335406482219696, "learning_rate": 8.674512506320733e-06, "loss": 0.603, "step": 1208 }, { "epoch": 0.26, "grad_norm": 0.14981816709041595, "learning_rate": 8.67214539393773e-06, "loss": 0.4541, "step": 1209 }, { "epoch": 0.26, "grad_norm": 0.2136390507221222, "learning_rate": 8.669776493385506e-06, "loss": 0.5327, "step": 1210 }, { "epoch": 0.26, "grad_norm": 0.1298462301492691, "learning_rate": 8.667405805817613e-06, "loss": 0.5373, "step": 1211 }, { "epoch": 0.26, "grad_norm": 0.1850888431072235, "learning_rate": 8.665033332388466e-06, "loss": 0.5459, "step": 1212 }, { "epoch": 0.26, "grad_norm": 0.19591952860355377, "learning_rate": 8.662659074253355e-06, "loss": 0.5137, "step": 1213 }, { "epoch": 0.26, "grad_norm": 0.13489966094493866, "learning_rate": 8.660283032568435e-06, "loss": 0.5468, "step": 1214 }, { "epoch": 0.26, "grad_norm": 0.15992878377437592, "learning_rate": 8.657905208490732e-06, "loss": 0.5045, "step": 1215 }, { "epoch": 0.26, "grad_norm": 0.16097012162208557, "learning_rate": 8.655525603178137e-06, "loss": 0.5239, "step": 1216 }, { "epoch": 0.26, "grad_norm": 0.17989301681518555, "learning_rate": 8.653144217789414e-06, "loss": 0.5239, "step": 1217 }, { "epoch": 0.26, "grad_norm": 0.1628495454788208, "learning_rate": 8.650761053484188e-06, "loss": 0.5315, "step": 1218 }, { "epoch": 0.26, "grad_norm": 0.13146936893463135, "learning_rate": 8.648376111422954e-06, "loss": 0.5351, "step": 1219 }, { "epoch": 0.26, "grad_norm": 0.212355837225914, "learning_rate": 8.645989392767068e-06, "loss": 0.5092, "step": 1220 }, { "epoch": 0.26, "grad_norm": 0.11173799633979797, "learning_rate": 8.643600898678758e-06, "loss": 0.5176, "step": 1221 }, { "epoch": 0.26, "grad_norm": 0.1470513939857483, "learning_rate": 8.641210630321115e-06, "loss": 0.5159, "step": 1222 }, { "epoch": 0.26, "grad_norm": 0.1474858522415161, "learning_rate": 8.638818588858084e-06, "loss": 0.5103, "step": 1223 }, { "epoch": 0.26, "grad_norm": 0.13153494894504547, "learning_rate": 8.636424775454489e-06, "loss": 0.5596, "step": 1224 }, { "epoch": 0.26, "grad_norm": 0.1469038426876068, "learning_rate": 8.634029191276003e-06, "loss": 0.5363, "step": 1225 }, { "epoch": 0.26, "grad_norm": 0.15724244713783264, "learning_rate": 8.631631837489173e-06, "loss": 0.5318, "step": 1226 }, { "epoch": 0.26, "grad_norm": 0.16701483726501465, "learning_rate": 8.6292327152614e-06, "loss": 0.5219, "step": 1227 }, { "epoch": 0.26, "grad_norm": 0.1822412610054016, "learning_rate": 8.626831825760946e-06, "loss": 0.5067, "step": 1228 }, { "epoch": 0.26, "grad_norm": 0.14838603138923645, "learning_rate": 8.62442917015694e-06, "loss": 0.5298, "step": 1229 }, { "epoch": 0.26, "grad_norm": 0.13148529827594757, "learning_rate": 8.622024749619363e-06, "loss": 0.4947, "step": 1230 }, { "epoch": 0.27, "grad_norm": 0.1674978882074356, "learning_rate": 8.619618565319063e-06, "loss": 0.5674, "step": 1231 }, { "epoch": 0.27, "grad_norm": 0.2056237906217575, "learning_rate": 8.61721061842774e-06, "loss": 0.4931, "step": 1232 }, { "epoch": 0.27, "grad_norm": 0.1400204300880432, "learning_rate": 8.614800910117958e-06, "loss": 0.543, "step": 1233 }, { "epoch": 0.27, "grad_norm": 0.1407189816236496, "learning_rate": 8.612389441563136e-06, "loss": 0.5108, "step": 1234 }, { "epoch": 0.27, "grad_norm": 0.1611378788948059, "learning_rate": 8.60997621393755e-06, "loss": 0.4961, "step": 1235 }, { "epoch": 0.27, "grad_norm": 0.1521531641483307, "learning_rate": 8.60756122841633e-06, "loss": 0.4755, "step": 1236 }, { "epoch": 0.27, "grad_norm": 0.14714032411575317, "learning_rate": 8.60514448617547e-06, "loss": 0.5365, "step": 1237 }, { "epoch": 0.27, "grad_norm": 0.17980900406837463, "learning_rate": 8.602725988391814e-06, "loss": 0.5424, "step": 1238 }, { "epoch": 0.27, "grad_norm": 0.16438312828540802, "learning_rate": 8.600305736243057e-06, "loss": 0.5523, "step": 1239 }, { "epoch": 0.27, "grad_norm": 0.1427246630191803, "learning_rate": 8.597883730907757e-06, "loss": 0.5091, "step": 1240 }, { "epoch": 0.27, "grad_norm": 0.1325269341468811, "learning_rate": 8.59545997356532e-06, "loss": 0.481, "step": 1241 }, { "epoch": 0.27, "grad_norm": 0.17241443693637848, "learning_rate": 8.593034465396007e-06, "loss": 0.5071, "step": 1242 }, { "epoch": 0.27, "grad_norm": 0.14038234949111938, "learning_rate": 8.590607207580927e-06, "loss": 0.5394, "step": 1243 }, { "epoch": 0.27, "grad_norm": 0.20857305824756622, "learning_rate": 8.588178201302052e-06, "loss": 0.4944, "step": 1244 }, { "epoch": 0.27, "grad_norm": 0.1448458433151245, "learning_rate": 8.585747447742194e-06, "loss": 0.52, "step": 1245 }, { "epoch": 0.27, "grad_norm": 0.17979028820991516, "learning_rate": 8.583314948085023e-06, "loss": 0.5241, "step": 1246 }, { "epoch": 0.27, "grad_norm": 0.16653123497962952, "learning_rate": 8.580880703515052e-06, "loss": 0.5061, "step": 1247 }, { "epoch": 0.27, "grad_norm": 0.2052346169948578, "learning_rate": 8.578444715217652e-06, "loss": 0.471, "step": 1248 }, { "epoch": 0.27, "grad_norm": 0.1382577270269394, "learning_rate": 8.576006984379042e-06, "loss": 0.4621, "step": 1249 }, { "epoch": 0.27, "grad_norm": 0.17501065135002136, "learning_rate": 8.57356751218628e-06, "loss": 0.5761, "step": 1250 }, { "epoch": 0.27, "grad_norm": 0.14629067480564117, "learning_rate": 8.571126299827284e-06, "loss": 0.511, "step": 1251 }, { "epoch": 0.27, "grad_norm": 0.16205544769763947, "learning_rate": 8.568683348490817e-06, "loss": 0.5259, "step": 1252 }, { "epoch": 0.27, "grad_norm": 0.14176106452941895, "learning_rate": 8.566238659366477e-06, "loss": 0.5333, "step": 1253 }, { "epoch": 0.27, "grad_norm": 0.27345001697540283, "learning_rate": 8.563792233644725e-06, "loss": 0.5117, "step": 1254 }, { "epoch": 0.27, "grad_norm": 0.16053150594234467, "learning_rate": 8.561344072516858e-06, "loss": 0.5015, "step": 1255 }, { "epoch": 0.27, "grad_norm": 0.19150519371032715, "learning_rate": 8.558894177175019e-06, "loss": 0.5326, "step": 1256 }, { "epoch": 0.27, "grad_norm": 0.14895778894424438, "learning_rate": 8.556442548812198e-06, "loss": 0.5247, "step": 1257 }, { "epoch": 0.27, "grad_norm": 0.16230621933937073, "learning_rate": 8.553989188622228e-06, "loss": 0.5634, "step": 1258 }, { "epoch": 0.27, "grad_norm": 0.15796539187431335, "learning_rate": 8.55153409779978e-06, "loss": 0.5686, "step": 1259 }, { "epoch": 0.27, "grad_norm": 0.15374596416950226, "learning_rate": 8.549077277540379e-06, "loss": 0.5287, "step": 1260 }, { "epoch": 0.27, "grad_norm": 0.14890524744987488, "learning_rate": 8.546618729040382e-06, "loss": 0.5112, "step": 1261 }, { "epoch": 0.27, "grad_norm": 0.1993798166513443, "learning_rate": 8.544158453496992e-06, "loss": 0.5229, "step": 1262 }, { "epoch": 0.27, "grad_norm": 0.16211991012096405, "learning_rate": 8.541696452108253e-06, "loss": 0.5332, "step": 1263 }, { "epoch": 0.27, "grad_norm": 0.2108837217092514, "learning_rate": 8.539232726073046e-06, "loss": 0.5223, "step": 1264 }, { "epoch": 0.27, "grad_norm": 0.14320197701454163, "learning_rate": 8.536767276591098e-06, "loss": 0.4906, "step": 1265 }, { "epoch": 0.27, "grad_norm": 0.14289528131484985, "learning_rate": 8.53430010486297e-06, "loss": 0.5253, "step": 1266 }, { "epoch": 0.27, "grad_norm": 0.1269850730895996, "learning_rate": 8.531831212090062e-06, "loss": 0.5145, "step": 1267 }, { "epoch": 0.27, "grad_norm": 0.18504297733306885, "learning_rate": 8.529360599474616e-06, "loss": 0.4976, "step": 1268 }, { "epoch": 0.27, "grad_norm": 0.13720788061618805, "learning_rate": 8.52688826821971e-06, "loss": 0.4952, "step": 1269 }, { "epoch": 0.27, "grad_norm": 0.2334408462047577, "learning_rate": 8.524414219529253e-06, "loss": 0.5416, "step": 1270 }, { "epoch": 0.27, "grad_norm": 0.21838751435279846, "learning_rate": 8.521938454608e-06, "loss": 0.5012, "step": 1271 }, { "epoch": 0.27, "grad_norm": 0.143874391913414, "learning_rate": 8.519460974661533e-06, "loss": 0.5323, "step": 1272 }, { "epoch": 0.27, "grad_norm": 0.14506854116916656, "learning_rate": 8.516981780896276e-06, "loss": 0.5148, "step": 1273 }, { "epoch": 0.27, "grad_norm": 0.1657627373933792, "learning_rate": 8.514500874519483e-06, "loss": 0.5507, "step": 1274 }, { "epoch": 0.27, "grad_norm": 0.15067879855632782, "learning_rate": 8.512018256739242e-06, "loss": 0.4994, "step": 1275 }, { "epoch": 0.27, "grad_norm": 0.1645599901676178, "learning_rate": 8.509533928764482e-06, "loss": 0.5025, "step": 1276 }, { "epoch": 0.28, "grad_norm": 0.14725331962108612, "learning_rate": 8.507047891804951e-06, "loss": 0.5635, "step": 1277 }, { "epoch": 0.28, "grad_norm": 0.16245393455028534, "learning_rate": 8.50456014707124e-06, "loss": 0.4446, "step": 1278 }, { "epoch": 0.28, "grad_norm": 0.14229734241962433, "learning_rate": 8.502070695774771e-06, "loss": 0.5043, "step": 1279 }, { "epoch": 0.28, "grad_norm": 0.20700879395008087, "learning_rate": 8.499579539127794e-06, "loss": 0.487, "step": 1280 }, { "epoch": 0.28, "grad_norm": 0.1793096512556076, "learning_rate": 8.497086678343385e-06, "loss": 0.5082, "step": 1281 }, { "epoch": 0.28, "grad_norm": 0.14241085946559906, "learning_rate": 8.494592114635458e-06, "loss": 0.5334, "step": 1282 }, { "epoch": 0.28, "grad_norm": 0.1370537132024765, "learning_rate": 8.492095849218756e-06, "loss": 0.5242, "step": 1283 }, { "epoch": 0.28, "grad_norm": 0.1460958868265152, "learning_rate": 8.489597883308844e-06, "loss": 0.5325, "step": 1284 }, { "epoch": 0.28, "grad_norm": 0.18947859108448029, "learning_rate": 8.487098218122119e-06, "loss": 0.5344, "step": 1285 }, { "epoch": 0.28, "grad_norm": 0.2026044875383377, "learning_rate": 8.484596854875806e-06, "loss": 0.5627, "step": 1286 }, { "epoch": 0.28, "grad_norm": 0.13377788662910461, "learning_rate": 8.482093794787956e-06, "loss": 0.5525, "step": 1287 }, { "epoch": 0.28, "grad_norm": 0.22986631095409393, "learning_rate": 8.479589039077446e-06, "loss": 0.5288, "step": 1288 }, { "epoch": 0.28, "grad_norm": 0.17068606615066528, "learning_rate": 8.47708258896398e-06, "loss": 0.5352, "step": 1289 }, { "epoch": 0.28, "grad_norm": 0.15582841634750366, "learning_rate": 8.474574445668085e-06, "loss": 0.5475, "step": 1290 }, { "epoch": 0.28, "grad_norm": 0.19104814529418945, "learning_rate": 8.472064610411115e-06, "loss": 0.5225, "step": 1291 }, { "epoch": 0.28, "grad_norm": 0.12952920794487, "learning_rate": 8.469553084415247e-06, "loss": 0.4927, "step": 1292 }, { "epoch": 0.28, "grad_norm": 0.32774683833122253, "learning_rate": 8.467039868903477e-06, "loss": 0.5286, "step": 1293 }, { "epoch": 0.28, "grad_norm": 0.16002535820007324, "learning_rate": 8.464524965099632e-06, "loss": 0.5124, "step": 1294 }, { "epoch": 0.28, "grad_norm": 0.15826278924942017, "learning_rate": 8.462008374228356e-06, "loss": 0.5502, "step": 1295 }, { "epoch": 0.28, "grad_norm": 0.1503647416830063, "learning_rate": 8.459490097515114e-06, "loss": 0.5833, "step": 1296 }, { "epoch": 0.28, "grad_norm": 0.18131448328495026, "learning_rate": 8.456970136186193e-06, "loss": 0.4606, "step": 1297 }, { "epoch": 0.28, "grad_norm": 0.16622257232666016, "learning_rate": 8.454448491468702e-06, "loss": 0.5207, "step": 1298 }, { "epoch": 0.28, "grad_norm": 0.16979950666427612, "learning_rate": 8.451925164590568e-06, "loss": 0.5655, "step": 1299 }, { "epoch": 0.28, "grad_norm": 0.19531172513961792, "learning_rate": 8.449400156780536e-06, "loss": 0.4779, "step": 1300 }, { "epoch": 0.28, "grad_norm": 0.17314670979976654, "learning_rate": 8.44687346926817e-06, "loss": 0.5046, "step": 1301 }, { "epoch": 0.28, "grad_norm": 0.1429021954536438, "learning_rate": 8.444345103283858e-06, "loss": 0.527, "step": 1302 }, { "epoch": 0.28, "grad_norm": 0.19530290365219116, "learning_rate": 8.441815060058795e-06, "loss": 0.518, "step": 1303 }, { "epoch": 0.28, "grad_norm": 0.1742294281721115, "learning_rate": 8.439283340825002e-06, "loss": 0.5443, "step": 1304 }, { "epoch": 0.28, "grad_norm": 0.18429934978485107, "learning_rate": 8.436749946815308e-06, "loss": 0.5474, "step": 1305 }, { "epoch": 0.28, "grad_norm": 0.1543246954679489, "learning_rate": 8.434214879263365e-06, "loss": 0.5142, "step": 1306 }, { "epoch": 0.28, "grad_norm": 0.16444545984268188, "learning_rate": 8.431678139403635e-06, "loss": 0.5534, "step": 1307 }, { "epoch": 0.28, "grad_norm": 0.19701968133449554, "learning_rate": 8.429139728471395e-06, "loss": 0.5156, "step": 1308 }, { "epoch": 0.28, "grad_norm": 0.14688943326473236, "learning_rate": 8.426599647702738e-06, "loss": 0.5208, "step": 1309 }, { "epoch": 0.28, "grad_norm": 0.19136419892311096, "learning_rate": 8.424057898334569e-06, "loss": 0.6148, "step": 1310 }, { "epoch": 0.28, "grad_norm": 0.17055533826351166, "learning_rate": 8.421514481604605e-06, "loss": 0.5107, "step": 1311 }, { "epoch": 0.28, "grad_norm": 0.16385668516159058, "learning_rate": 8.418969398751375e-06, "loss": 0.502, "step": 1312 }, { "epoch": 0.28, "grad_norm": 0.17869453132152557, "learning_rate": 8.41642265101422e-06, "loss": 0.5464, "step": 1313 }, { "epoch": 0.28, "grad_norm": 0.14309388399124146, "learning_rate": 8.413874239633291e-06, "loss": 0.5585, "step": 1314 }, { "epoch": 0.28, "grad_norm": 0.16163702309131622, "learning_rate": 8.41132416584955e-06, "loss": 0.553, "step": 1315 }, { "epoch": 0.28, "grad_norm": 0.15878815948963165, "learning_rate": 8.408772430904768e-06, "loss": 0.5359, "step": 1316 }, { "epoch": 0.28, "grad_norm": 0.14803734421730042, "learning_rate": 8.406219036041523e-06, "loss": 0.5177, "step": 1317 }, { "epoch": 0.28, "grad_norm": 0.16167186200618744, "learning_rate": 8.403663982503205e-06, "loss": 0.5106, "step": 1318 }, { "epoch": 0.28, "grad_norm": 0.14223089814186096, "learning_rate": 8.40110727153401e-06, "loss": 0.4768, "step": 1319 }, { "epoch": 0.28, "grad_norm": 0.1392257660627365, "learning_rate": 8.398548904378938e-06, "loss": 0.4928, "step": 1320 }, { "epoch": 0.28, "grad_norm": 0.1703733652830124, "learning_rate": 8.395988882283803e-06, "loss": 0.462, "step": 1321 }, { "epoch": 0.28, "grad_norm": 0.14999133348464966, "learning_rate": 8.393427206495217e-06, "loss": 0.5035, "step": 1322 }, { "epoch": 0.28, "grad_norm": 0.18849503993988037, "learning_rate": 8.390863878260602e-06, "loss": 0.5025, "step": 1323 }, { "epoch": 0.29, "grad_norm": 0.2667754888534546, "learning_rate": 8.388298898828182e-06, "loss": 0.517, "step": 1324 }, { "epoch": 0.29, "grad_norm": 0.1366441398859024, "learning_rate": 8.385732269446987e-06, "loss": 0.4938, "step": 1325 }, { "epoch": 0.29, "grad_norm": 0.16878017783164978, "learning_rate": 8.383163991366852e-06, "loss": 0.5057, "step": 1326 }, { "epoch": 0.29, "grad_norm": 0.14408189058303833, "learning_rate": 8.38059406583841e-06, "loss": 0.5197, "step": 1327 }, { "epoch": 0.29, "grad_norm": 0.14448203146457672, "learning_rate": 8.378022494113099e-06, "loss": 0.5289, "step": 1328 }, { "epoch": 0.29, "grad_norm": 0.1776053011417389, "learning_rate": 8.37544927744316e-06, "loss": 0.529, "step": 1329 }, { "epoch": 0.29, "grad_norm": 0.1904003769159317, "learning_rate": 8.372874417081632e-06, "loss": 0.5253, "step": 1330 }, { "epoch": 0.29, "grad_norm": 0.15336477756500244, "learning_rate": 8.370297914282354e-06, "loss": 0.5307, "step": 1331 }, { "epoch": 0.29, "grad_norm": 0.1891254037618637, "learning_rate": 8.367719770299972e-06, "loss": 0.5089, "step": 1332 }, { "epoch": 0.29, "grad_norm": 0.22274090349674225, "learning_rate": 8.36513998638992e-06, "loss": 0.5328, "step": 1333 }, { "epoch": 0.29, "grad_norm": 0.1466333568096161, "learning_rate": 8.36255856380844e-06, "loss": 0.5408, "step": 1334 }, { "epoch": 0.29, "grad_norm": 0.15075673162937164, "learning_rate": 8.359975503812569e-06, "loss": 0.5402, "step": 1335 }, { "epoch": 0.29, "grad_norm": 0.1457648128271103, "learning_rate": 8.35739080766014e-06, "loss": 0.5256, "step": 1336 }, { "epoch": 0.29, "grad_norm": 0.1647207885980606, "learning_rate": 8.35480447660978e-06, "loss": 0.5204, "step": 1337 }, { "epoch": 0.29, "grad_norm": 0.16034474968910217, "learning_rate": 8.352216511920921e-06, "loss": 0.5282, "step": 1338 }, { "epoch": 0.29, "grad_norm": 0.1303335428237915, "learning_rate": 8.349626914853781e-06, "loss": 0.4993, "step": 1339 }, { "epoch": 0.29, "grad_norm": 0.17350099980831146, "learning_rate": 8.34703568666938e-06, "loss": 0.6363, "step": 1340 }, { "epoch": 0.29, "grad_norm": 0.16359736025333405, "learning_rate": 8.344442828629526e-06, "loss": 0.5418, "step": 1341 }, { "epoch": 0.29, "grad_norm": 0.1771382838487625, "learning_rate": 8.341848341996828e-06, "loss": 0.5243, "step": 1342 }, { "epoch": 0.29, "grad_norm": 0.14461980760097504, "learning_rate": 8.33925222803468e-06, "loss": 0.5308, "step": 1343 }, { "epoch": 0.29, "grad_norm": 0.19642101228237152, "learning_rate": 8.336654488007277e-06, "loss": 0.5189, "step": 1344 }, { "epoch": 0.29, "grad_norm": 0.18800689280033112, "learning_rate": 8.334055123179596e-06, "loss": 0.5177, "step": 1345 }, { "epoch": 0.29, "grad_norm": 0.20820565521717072, "learning_rate": 8.331454134817414e-06, "loss": 0.5033, "step": 1346 }, { "epoch": 0.29, "grad_norm": 0.15935355424880981, "learning_rate": 8.328851524187292e-06, "loss": 0.4901, "step": 1347 }, { "epoch": 0.29, "grad_norm": 0.15410637855529785, "learning_rate": 8.326247292556588e-06, "loss": 0.5402, "step": 1348 }, { "epoch": 0.29, "grad_norm": 0.21510785818099976, "learning_rate": 8.323641441193441e-06, "loss": 0.5414, "step": 1349 }, { "epoch": 0.29, "grad_norm": 0.20484770834445953, "learning_rate": 8.321033971366788e-06, "loss": 0.4995, "step": 1350 }, { "epoch": 0.29, "grad_norm": 0.15138699114322662, "learning_rate": 8.318424884346347e-06, "loss": 0.5191, "step": 1351 }, { "epoch": 0.29, "grad_norm": 0.1576775163412094, "learning_rate": 8.315814181402623e-06, "loss": 0.5358, "step": 1352 }, { "epoch": 0.29, "grad_norm": 0.15024110674858093, "learning_rate": 8.313201863806915e-06, "loss": 0.4613, "step": 1353 }, { "epoch": 0.29, "grad_norm": 0.15514235198497772, "learning_rate": 8.310587932831302e-06, "loss": 0.4951, "step": 1354 }, { "epoch": 0.29, "grad_norm": 0.20852284133434296, "learning_rate": 8.30797238974865e-06, "loss": 0.5085, "step": 1355 }, { "epoch": 0.29, "grad_norm": 0.15601487457752228, "learning_rate": 8.305355235832611e-06, "loss": 0.5467, "step": 1356 }, { "epoch": 0.29, "grad_norm": 0.22823049128055573, "learning_rate": 8.30273647235762e-06, "loss": 0.5444, "step": 1357 }, { "epoch": 0.29, "grad_norm": 0.17297740280628204, "learning_rate": 8.300116100598899e-06, "loss": 0.4745, "step": 1358 }, { "epoch": 0.29, "grad_norm": 0.16721418499946594, "learning_rate": 8.297494121832449e-06, "loss": 0.5331, "step": 1359 }, { "epoch": 0.29, "grad_norm": 0.20764422416687012, "learning_rate": 8.294870537335054e-06, "loss": 0.5123, "step": 1360 }, { "epoch": 0.29, "grad_norm": 0.12124624103307724, "learning_rate": 8.292245348384285e-06, "loss": 0.4942, "step": 1361 }, { "epoch": 0.29, "grad_norm": 0.18373292684555054, "learning_rate": 8.28961855625849e-06, "loss": 0.6003, "step": 1362 }, { "epoch": 0.29, "grad_norm": 0.15665894746780396, "learning_rate": 8.286990162236796e-06, "loss": 0.5199, "step": 1363 }, { "epoch": 0.29, "grad_norm": 0.18932463228702545, "learning_rate": 8.284360167599113e-06, "loss": 0.5577, "step": 1364 }, { "epoch": 0.29, "grad_norm": 0.14339394867420197, "learning_rate": 8.28172857362613e-06, "loss": 0.5319, "step": 1365 }, { "epoch": 0.29, "grad_norm": 0.16630741953849792, "learning_rate": 8.279095381599318e-06, "loss": 0.506, "step": 1366 }, { "epoch": 0.29, "grad_norm": 0.15607817471027374, "learning_rate": 8.27646059280092e-06, "loss": 0.5348, "step": 1367 }, { "epoch": 0.29, "grad_norm": 0.1827673465013504, "learning_rate": 8.273824208513956e-06, "loss": 0.5234, "step": 1368 }, { "epoch": 0.29, "grad_norm": 0.18514670431613922, "learning_rate": 8.27118623002223e-06, "loss": 0.4667, "step": 1369 }, { "epoch": 0.3, "grad_norm": 0.14588609337806702, "learning_rate": 8.268546658610319e-06, "loss": 0.4641, "step": 1370 }, { "epoch": 0.3, "grad_norm": 0.14752966165542603, "learning_rate": 8.265905495563573e-06, "loss": 0.4737, "step": 1371 }, { "epoch": 0.3, "grad_norm": 0.18035411834716797, "learning_rate": 8.26326274216812e-06, "loss": 0.5087, "step": 1372 }, { "epoch": 0.3, "grad_norm": 0.14755289256572723, "learning_rate": 8.260618399710864e-06, "loss": 0.5454, "step": 1373 }, { "epoch": 0.3, "grad_norm": 0.18107686936855316, "learning_rate": 8.257972469479478e-06, "loss": 0.469, "step": 1374 }, { "epoch": 0.3, "grad_norm": 0.13992854952812195, "learning_rate": 8.255324952762413e-06, "loss": 0.4561, "step": 1375 }, { "epoch": 0.3, "grad_norm": 0.18599078059196472, "learning_rate": 8.252675850848886e-06, "loss": 0.4449, "step": 1376 }, { "epoch": 0.3, "grad_norm": 0.14460837841033936, "learning_rate": 8.250025165028897e-06, "loss": 0.5144, "step": 1377 }, { "epoch": 0.3, "grad_norm": 0.15791229903697968, "learning_rate": 8.247372896593203e-06, "loss": 0.5268, "step": 1378 }, { "epoch": 0.3, "grad_norm": 0.15533843636512756, "learning_rate": 8.244719046833342e-06, "loss": 0.5176, "step": 1379 }, { "epoch": 0.3, "grad_norm": 0.16106192767620087, "learning_rate": 8.24206361704162e-06, "loss": 0.5609, "step": 1380 }, { "epoch": 0.3, "grad_norm": 0.1757259964942932, "learning_rate": 8.239406608511113e-06, "loss": 0.5459, "step": 1381 }, { "epoch": 0.3, "grad_norm": 0.14974632859230042, "learning_rate": 8.236748022535662e-06, "loss": 0.5193, "step": 1382 }, { "epoch": 0.3, "grad_norm": 0.16588665544986725, "learning_rate": 8.23408786040988e-06, "loss": 0.5399, "step": 1383 }, { "epoch": 0.3, "grad_norm": 0.18392562866210938, "learning_rate": 8.231426123429143e-06, "loss": 0.5266, "step": 1384 }, { "epoch": 0.3, "grad_norm": 0.15321050584316254, "learning_rate": 8.2287628128896e-06, "loss": 0.5206, "step": 1385 }, { "epoch": 0.3, "grad_norm": 0.25465235114097595, "learning_rate": 8.226097930088162e-06, "loss": 0.5679, "step": 1386 }, { "epoch": 0.3, "grad_norm": 0.16098381578922272, "learning_rate": 8.223431476322508e-06, "loss": 0.501, "step": 1387 }, { "epoch": 0.3, "grad_norm": 0.18890248239040375, "learning_rate": 8.220763452891078e-06, "loss": 0.5524, "step": 1388 }, { "epoch": 0.3, "grad_norm": 0.19365254044532776, "learning_rate": 8.218093861093082e-06, "loss": 0.4858, "step": 1389 }, { "epoch": 0.3, "grad_norm": 0.13747772574424744, "learning_rate": 8.215422702228487e-06, "loss": 0.5109, "step": 1390 }, { "epoch": 0.3, "grad_norm": 0.1644936501979828, "learning_rate": 8.212749977598032e-06, "loss": 0.4996, "step": 1391 }, { "epoch": 0.3, "grad_norm": 0.17819000780582428, "learning_rate": 8.210075688503209e-06, "loss": 0.5312, "step": 1392 }, { "epoch": 0.3, "grad_norm": 0.15765920281410217, "learning_rate": 8.207399836246278e-06, "loss": 0.5171, "step": 1393 }, { "epoch": 0.3, "grad_norm": 0.20357385277748108, "learning_rate": 8.20472242213026e-06, "loss": 0.5364, "step": 1394 }, { "epoch": 0.3, "grad_norm": 0.15080830454826355, "learning_rate": 8.202043447458934e-06, "loss": 0.5169, "step": 1395 }, { "epoch": 0.3, "grad_norm": 0.15993140637874603, "learning_rate": 8.199362913536837e-06, "loss": 0.6155, "step": 1396 }, { "epoch": 0.3, "grad_norm": 0.18161435425281525, "learning_rate": 8.19668082166927e-06, "loss": 0.5493, "step": 1397 }, { "epoch": 0.3, "grad_norm": 0.1412186175584793, "learning_rate": 8.193997173162293e-06, "loss": 0.5242, "step": 1398 }, { "epoch": 0.3, "grad_norm": 0.15259157121181488, "learning_rate": 8.19131196932272e-06, "loss": 0.5644, "step": 1399 }, { "epoch": 0.3, "grad_norm": 0.2190113365650177, "learning_rate": 8.188625211458123e-06, "loss": 0.541, "step": 1400 }, { "epoch": 0.3, "grad_norm": 0.17318737506866455, "learning_rate": 8.185936900876834e-06, "loss": 0.5085, "step": 1401 }, { "epoch": 0.3, "grad_norm": 0.16196967661380768, "learning_rate": 8.183247038887937e-06, "loss": 0.485, "step": 1402 }, { "epoch": 0.3, "grad_norm": 0.19770100712776184, "learning_rate": 8.180555626801274e-06, "loss": 0.5142, "step": 1403 }, { "epoch": 0.3, "grad_norm": 0.1743081659078598, "learning_rate": 8.177862665927445e-06, "loss": 0.565, "step": 1404 }, { "epoch": 0.3, "grad_norm": 0.18734456598758698, "learning_rate": 8.175168157577795e-06, "loss": 0.5631, "step": 1405 }, { "epoch": 0.3, "grad_norm": 0.15591241419315338, "learning_rate": 8.17247210306443e-06, "loss": 0.4886, "step": 1406 }, { "epoch": 0.3, "grad_norm": 0.20416924357414246, "learning_rate": 8.169774503700209e-06, "loss": 0.5232, "step": 1407 }, { "epoch": 0.3, "grad_norm": 0.1668728142976761, "learning_rate": 8.167075360798739e-06, "loss": 0.5058, "step": 1408 }, { "epoch": 0.3, "grad_norm": 0.1554676592350006, "learning_rate": 8.164374675674382e-06, "loss": 0.5154, "step": 1409 }, { "epoch": 0.3, "grad_norm": 0.2015198916196823, "learning_rate": 8.161672449642248e-06, "loss": 0.482, "step": 1410 }, { "epoch": 0.3, "grad_norm": 0.13508014380931854, "learning_rate": 8.158968684018202e-06, "loss": 0.5501, "step": 1411 }, { "epoch": 0.3, "grad_norm": 0.18742331862449646, "learning_rate": 8.156263380118855e-06, "loss": 0.5439, "step": 1412 }, { "epoch": 0.3, "grad_norm": 0.13899442553520203, "learning_rate": 8.153556539261566e-06, "loss": 0.4965, "step": 1413 }, { "epoch": 0.3, "grad_norm": 0.15461724996566772, "learning_rate": 8.150848162764448e-06, "loss": 0.5158, "step": 1414 }, { "epoch": 0.3, "grad_norm": 0.1699683964252472, "learning_rate": 8.148138251946355e-06, "loss": 0.5345, "step": 1415 }, { "epoch": 0.31, "grad_norm": 0.1647995263338089, "learning_rate": 8.145426808126894e-06, "loss": 0.5417, "step": 1416 }, { "epoch": 0.31, "grad_norm": 0.15304109454154968, "learning_rate": 8.142713832626412e-06, "loss": 0.5546, "step": 1417 }, { "epoch": 0.31, "grad_norm": 0.12711341679096222, "learning_rate": 8.139999326766011e-06, "loss": 0.5176, "step": 1418 }, { "epoch": 0.31, "grad_norm": 0.15692314505577087, "learning_rate": 8.137283291867527e-06, "loss": 0.4648, "step": 1419 }, { "epoch": 0.31, "grad_norm": 0.16730400919914246, "learning_rate": 8.134565729253554e-06, "loss": 0.5099, "step": 1420 }, { "epoch": 0.31, "grad_norm": 0.15150144696235657, "learning_rate": 8.131846640247415e-06, "loss": 0.5261, "step": 1421 }, { "epoch": 0.31, "grad_norm": 0.25064417719841003, "learning_rate": 8.129126026173189e-06, "loss": 0.5097, "step": 1422 }, { "epoch": 0.31, "grad_norm": 0.1557064801454544, "learning_rate": 8.126403888355689e-06, "loss": 0.4951, "step": 1423 }, { "epoch": 0.31, "grad_norm": 0.17393703758716583, "learning_rate": 8.123680228120474e-06, "loss": 0.5257, "step": 1424 }, { "epoch": 0.31, "grad_norm": 0.1844862401485443, "learning_rate": 8.120955046793847e-06, "loss": 0.5361, "step": 1425 }, { "epoch": 0.31, "grad_norm": 0.17331448197364807, "learning_rate": 8.118228345702843e-06, "loss": 0.5718, "step": 1426 }, { "epoch": 0.31, "grad_norm": 0.19549396634101868, "learning_rate": 8.115500126175246e-06, "loss": 0.5322, "step": 1427 }, { "epoch": 0.31, "grad_norm": 0.16723619401454926, "learning_rate": 8.112770389539574e-06, "loss": 0.5048, "step": 1428 }, { "epoch": 0.31, "grad_norm": 0.15985050797462463, "learning_rate": 8.11003913712509e-06, "loss": 0.4759, "step": 1429 }, { "epoch": 0.31, "grad_norm": 0.16711269319057465, "learning_rate": 8.107306370261785e-06, "loss": 0.5433, "step": 1430 }, { "epoch": 0.31, "grad_norm": 0.15856465697288513, "learning_rate": 8.104572090280397e-06, "loss": 0.5132, "step": 1431 }, { "epoch": 0.31, "grad_norm": 0.14167572557926178, "learning_rate": 8.101836298512396e-06, "loss": 0.4879, "step": 1432 }, { "epoch": 0.31, "grad_norm": 0.17282311618328094, "learning_rate": 8.099098996289986e-06, "loss": 0.5943, "step": 1433 }, { "epoch": 0.31, "grad_norm": 0.1634991616010666, "learning_rate": 8.096360184946117e-06, "loss": 0.5256, "step": 1434 }, { "epoch": 0.31, "grad_norm": 0.17868229746818542, "learning_rate": 8.093619865814461e-06, "loss": 0.5314, "step": 1435 }, { "epoch": 0.31, "grad_norm": 0.17916221916675568, "learning_rate": 8.09087804022943e-06, "loss": 0.5192, "step": 1436 }, { "epoch": 0.31, "grad_norm": 0.15131542086601257, "learning_rate": 8.088134709526174e-06, "loss": 0.4965, "step": 1437 }, { "epoch": 0.31, "grad_norm": 0.15476344525814056, "learning_rate": 8.085389875040566e-06, "loss": 0.547, "step": 1438 }, { "epoch": 0.31, "grad_norm": 0.18421463668346405, "learning_rate": 8.082643538109217e-06, "loss": 0.5478, "step": 1439 }, { "epoch": 0.31, "grad_norm": 0.1662701666355133, "learning_rate": 8.079895700069473e-06, "loss": 0.5092, "step": 1440 }, { "epoch": 0.31, "grad_norm": 0.18112128973007202, "learning_rate": 8.077146362259405e-06, "loss": 0.5242, "step": 1441 }, { "epoch": 0.31, "grad_norm": 0.13690048456192017, "learning_rate": 8.074395526017816e-06, "loss": 0.5172, "step": 1442 }, { "epoch": 0.31, "grad_norm": 0.16095203161239624, "learning_rate": 8.07164319268424e-06, "loss": 0.5465, "step": 1443 }, { "epoch": 0.31, "grad_norm": 0.13967949151992798, "learning_rate": 8.06888936359894e-06, "loss": 0.5786, "step": 1444 }, { "epoch": 0.31, "grad_norm": 0.23251961171627045, "learning_rate": 8.066134040102904e-06, "loss": 0.5086, "step": 1445 }, { "epoch": 0.31, "grad_norm": 0.20811443030834198, "learning_rate": 8.063377223537853e-06, "loss": 0.5101, "step": 1446 }, { "epoch": 0.31, "grad_norm": 0.1625215709209442, "learning_rate": 8.060618915246233e-06, "loss": 0.5268, "step": 1447 }, { "epoch": 0.31, "grad_norm": 0.1501462310552597, "learning_rate": 8.057859116571213e-06, "loss": 0.547, "step": 1448 }, { "epoch": 0.31, "grad_norm": 0.16021014750003815, "learning_rate": 8.055097828856691e-06, "loss": 0.5311, "step": 1449 }, { "epoch": 0.31, "grad_norm": 0.20781485736370087, "learning_rate": 8.05233505344729e-06, "loss": 0.5188, "step": 1450 }, { "epoch": 0.31, "grad_norm": 0.3020351231098175, "learning_rate": 8.049570791688356e-06, "loss": 0.5023, "step": 1451 }, { "epoch": 0.31, "grad_norm": 0.1566857397556305, "learning_rate": 8.046805044925964e-06, "loss": 0.48, "step": 1452 }, { "epoch": 0.31, "grad_norm": 0.1672096997499466, "learning_rate": 8.044037814506905e-06, "loss": 0.5301, "step": 1453 }, { "epoch": 0.31, "grad_norm": 0.19419468939304352, "learning_rate": 8.041269101778694e-06, "loss": 0.5226, "step": 1454 }, { "epoch": 0.31, "grad_norm": 0.16195285320281982, "learning_rate": 8.03849890808957e-06, "loss": 0.5223, "step": 1455 }, { "epoch": 0.31, "grad_norm": 0.14367403090000153, "learning_rate": 8.035727234788496e-06, "loss": 0.5274, "step": 1456 }, { "epoch": 0.31, "grad_norm": 0.1967507302761078, "learning_rate": 8.032954083225146e-06, "loss": 0.4899, "step": 1457 }, { "epoch": 0.31, "grad_norm": 0.23297229409217834, "learning_rate": 8.030179454749925e-06, "loss": 0.5186, "step": 1458 }, { "epoch": 0.31, "grad_norm": 0.16745884716510773, "learning_rate": 8.027403350713948e-06, "loss": 0.492, "step": 1459 }, { "epoch": 0.31, "grad_norm": 0.13999496400356293, "learning_rate": 8.024625772469055e-06, "loss": 0.5221, "step": 1460 }, { "epoch": 0.31, "grad_norm": 0.140817791223526, "learning_rate": 8.0218467213678e-06, "loss": 0.5128, "step": 1461 }, { "epoch": 0.31, "grad_norm": 0.15968118607997894, "learning_rate": 8.019066198763458e-06, "loss": 0.525, "step": 1462 }, { "epoch": 0.32, "grad_norm": 0.13812531530857086, "learning_rate": 8.016284206010015e-06, "loss": 0.4477, "step": 1463 }, { "epoch": 0.32, "grad_norm": 0.16426512598991394, "learning_rate": 8.013500744462177e-06, "loss": 0.4974, "step": 1464 }, { "epoch": 0.32, "grad_norm": 0.15231406688690186, "learning_rate": 8.010715815475365e-06, "loss": 0.5289, "step": 1465 }, { "epoch": 0.32, "grad_norm": 0.1844695508480072, "learning_rate": 8.007929420405714e-06, "loss": 0.5201, "step": 1466 }, { "epoch": 0.32, "grad_norm": 0.17498986423015594, "learning_rate": 8.005141560610072e-06, "loss": 0.5619, "step": 1467 }, { "epoch": 0.32, "grad_norm": 0.16564463078975677, "learning_rate": 8.002352237446e-06, "loss": 0.5398, "step": 1468 }, { "epoch": 0.32, "grad_norm": 0.15143102407455444, "learning_rate": 7.999561452271776e-06, "loss": 0.5038, "step": 1469 }, { "epoch": 0.32, "grad_norm": 0.17521046102046967, "learning_rate": 7.996769206446383e-06, "loss": 0.4634, "step": 1470 }, { "epoch": 0.32, "grad_norm": 0.16226552426815033, "learning_rate": 7.993975501329518e-06, "loss": 0.5735, "step": 1471 }, { "epoch": 0.32, "grad_norm": 0.2068720906972885, "learning_rate": 7.991180338281594e-06, "loss": 0.5329, "step": 1472 }, { "epoch": 0.32, "grad_norm": 0.2290961742401123, "learning_rate": 7.988383718663727e-06, "loss": 0.5203, "step": 1473 }, { "epoch": 0.32, "grad_norm": 0.14001663029193878, "learning_rate": 7.985585643837743e-06, "loss": 0.4844, "step": 1474 }, { "epoch": 0.32, "grad_norm": 0.15565429627895355, "learning_rate": 7.982786115166182e-06, "loss": 0.5158, "step": 1475 }, { "epoch": 0.32, "grad_norm": 0.12718220055103302, "learning_rate": 7.979985134012285e-06, "loss": 0.5256, "step": 1476 }, { "epoch": 0.32, "grad_norm": 0.1732247918844223, "learning_rate": 7.977182701740003e-06, "loss": 0.5447, "step": 1477 }, { "epoch": 0.32, "grad_norm": 0.16792930662631989, "learning_rate": 7.974378819713998e-06, "loss": 0.5415, "step": 1478 }, { "epoch": 0.32, "grad_norm": 0.1823003590106964, "learning_rate": 7.97157348929963e-06, "loss": 0.5089, "step": 1479 }, { "epoch": 0.32, "grad_norm": 0.1478123515844345, "learning_rate": 7.968766711862971e-06, "loss": 0.5763, "step": 1480 }, { "epoch": 0.32, "grad_norm": 0.16354763507843018, "learning_rate": 7.965958488770796e-06, "loss": 0.5476, "step": 1481 }, { "epoch": 0.32, "grad_norm": 0.13449835777282715, "learning_rate": 7.963148821390578e-06, "loss": 0.5205, "step": 1482 }, { "epoch": 0.32, "grad_norm": 0.17802083492279053, "learning_rate": 7.960337711090504e-06, "loss": 0.5239, "step": 1483 }, { "epoch": 0.32, "grad_norm": 0.20004011690616608, "learning_rate": 7.957525159239454e-06, "loss": 0.5291, "step": 1484 }, { "epoch": 0.32, "grad_norm": 0.17748400568962097, "learning_rate": 7.954711167207016e-06, "loss": 0.4913, "step": 1485 }, { "epoch": 0.32, "grad_norm": 0.22476144134998322, "learning_rate": 7.951895736363477e-06, "loss": 0.4939, "step": 1486 }, { "epoch": 0.32, "grad_norm": 0.16127091646194458, "learning_rate": 7.949078868079825e-06, "loss": 0.5272, "step": 1487 }, { "epoch": 0.32, "grad_norm": 0.18299731612205505, "learning_rate": 7.946260563727746e-06, "loss": 0.5951, "step": 1488 }, { "epoch": 0.32, "grad_norm": 0.13896289467811584, "learning_rate": 7.94344082467963e-06, "loss": 0.5591, "step": 1489 }, { "epoch": 0.32, "grad_norm": 0.1735697239637375, "learning_rate": 7.940619652308562e-06, "loss": 0.5432, "step": 1490 }, { "epoch": 0.32, "grad_norm": 0.16972100734710693, "learning_rate": 7.937797047988322e-06, "loss": 0.4821, "step": 1491 }, { "epoch": 0.32, "grad_norm": 0.1734873354434967, "learning_rate": 7.934973013093397e-06, "loss": 0.4922, "step": 1492 }, { "epoch": 0.32, "grad_norm": 0.16801413893699646, "learning_rate": 7.932147548998958e-06, "loss": 0.5599, "step": 1493 }, { "epoch": 0.32, "grad_norm": 0.12655183672904968, "learning_rate": 7.929320657080886e-06, "loss": 0.5432, "step": 1494 }, { "epoch": 0.32, "grad_norm": 0.2155943512916565, "learning_rate": 7.926492338715746e-06, "loss": 0.5351, "step": 1495 }, { "epoch": 0.32, "grad_norm": 0.1321111023426056, "learning_rate": 7.923662595280799e-06, "loss": 0.5267, "step": 1496 }, { "epoch": 0.32, "grad_norm": 0.19633205235004425, "learning_rate": 7.920831428154008e-06, "loss": 0.5296, "step": 1497 }, { "epoch": 0.32, "grad_norm": 0.19406452775001526, "learning_rate": 7.917998838714019e-06, "loss": 0.569, "step": 1498 }, { "epoch": 0.32, "grad_norm": 0.17301122844219208, "learning_rate": 7.915164828340179e-06, "loss": 0.5303, "step": 1499 }, { "epoch": 0.32, "grad_norm": 0.14050279557704926, "learning_rate": 7.91232939841252e-06, "loss": 0.5045, "step": 1500 }, { "epoch": 0.32, "grad_norm": 0.13988257944583893, "learning_rate": 7.909492550311769e-06, "loss": 0.4965, "step": 1501 }, { "epoch": 0.32, "grad_norm": 0.13999608159065247, "learning_rate": 7.906654285419347e-06, "loss": 0.5337, "step": 1502 }, { "epoch": 0.32, "grad_norm": 0.18495085835456848, "learning_rate": 7.903814605117355e-06, "loss": 0.5266, "step": 1503 }, { "epoch": 0.32, "grad_norm": 0.131727397441864, "learning_rate": 7.900973510788595e-06, "loss": 0.5131, "step": 1504 }, { "epoch": 0.32, "grad_norm": 0.13659153878688812, "learning_rate": 7.898131003816547e-06, "loss": 0.4934, "step": 1505 }, { "epoch": 0.32, "grad_norm": 0.22903259098529816, "learning_rate": 7.895287085585386e-06, "loss": 0.5258, "step": 1506 }, { "epoch": 0.32, "grad_norm": 0.23151510953903198, "learning_rate": 7.892441757479974e-06, "loss": 0.5321, "step": 1507 }, { "epoch": 0.32, "grad_norm": 0.18955311179161072, "learning_rate": 7.889595020885853e-06, "loss": 0.4939, "step": 1508 }, { "epoch": 0.33, "grad_norm": 0.14848068356513977, "learning_rate": 7.88674687718926e-06, "loss": 0.4916, "step": 1509 }, { "epoch": 0.33, "grad_norm": 0.13812664151191711, "learning_rate": 7.883897327777108e-06, "loss": 0.51, "step": 1510 }, { "epoch": 0.33, "grad_norm": 0.14594610035419464, "learning_rate": 7.881046374037002e-06, "loss": 0.497, "step": 1511 }, { "epoch": 0.33, "grad_norm": 0.18314702808856964, "learning_rate": 7.878194017357229e-06, "loss": 0.4968, "step": 1512 }, { "epoch": 0.33, "grad_norm": 0.15771466493606567, "learning_rate": 7.875340259126754e-06, "loss": 0.5373, "step": 1513 }, { "epoch": 0.33, "grad_norm": 0.15456095337867737, "learning_rate": 7.87248510073523e-06, "loss": 0.4797, "step": 1514 }, { "epoch": 0.33, "grad_norm": 0.14819829165935516, "learning_rate": 7.869628543572994e-06, "loss": 0.4645, "step": 1515 }, { "epoch": 0.33, "grad_norm": 0.16360363364219666, "learning_rate": 7.866770589031057e-06, "loss": 0.4941, "step": 1516 }, { "epoch": 0.33, "grad_norm": 0.1475502848625183, "learning_rate": 7.863911238501113e-06, "loss": 0.5693, "step": 1517 }, { "epoch": 0.33, "grad_norm": 0.17970135807991028, "learning_rate": 7.86105049337554e-06, "loss": 0.6145, "step": 1518 }, { "epoch": 0.33, "grad_norm": 0.16100694239139557, "learning_rate": 7.85818835504739e-06, "loss": 0.4806, "step": 1519 }, { "epoch": 0.33, "grad_norm": 0.18620309233665466, "learning_rate": 7.855324824910395e-06, "loss": 0.5659, "step": 1520 }, { "epoch": 0.33, "grad_norm": 0.1660996675491333, "learning_rate": 7.852459904358968e-06, "loss": 0.5211, "step": 1521 }, { "epoch": 0.33, "grad_norm": 0.18867598474025726, "learning_rate": 7.849593594788192e-06, "loss": 0.4975, "step": 1522 }, { "epoch": 0.33, "grad_norm": 0.17060688138008118, "learning_rate": 7.846725897593834e-06, "loss": 0.527, "step": 1523 }, { "epoch": 0.33, "grad_norm": 0.14144161343574524, "learning_rate": 7.843856814172329e-06, "loss": 0.478, "step": 1524 }, { "epoch": 0.33, "grad_norm": 0.15240880846977234, "learning_rate": 7.840986345920795e-06, "loss": 0.4896, "step": 1525 }, { "epoch": 0.33, "grad_norm": 0.1528806835412979, "learning_rate": 7.83811449423702e-06, "loss": 0.4968, "step": 1526 }, { "epoch": 0.33, "grad_norm": 0.1606244146823883, "learning_rate": 7.835241260519467e-06, "loss": 0.4879, "step": 1527 }, { "epoch": 0.33, "grad_norm": 0.14756283164024353, "learning_rate": 7.832366646167268e-06, "loss": 0.5135, "step": 1528 }, { "epoch": 0.33, "grad_norm": 0.16397136449813843, "learning_rate": 7.829490652580233e-06, "loss": 0.5549, "step": 1529 }, { "epoch": 0.33, "grad_norm": 0.1577044427394867, "learning_rate": 7.82661328115884e-06, "loss": 0.5037, "step": 1530 }, { "epoch": 0.33, "grad_norm": 0.16425062716007233, "learning_rate": 7.823734533304241e-06, "loss": 0.5245, "step": 1531 }, { "epoch": 0.33, "grad_norm": 0.18981023132801056, "learning_rate": 7.820854410418255e-06, "loss": 0.5009, "step": 1532 }, { "epoch": 0.33, "grad_norm": 0.14500872790813446, "learning_rate": 7.817972913903373e-06, "loss": 0.4711, "step": 1533 }, { "epoch": 0.33, "grad_norm": 0.2270984947681427, "learning_rate": 7.815090045162752e-06, "loss": 0.5454, "step": 1534 }, { "epoch": 0.33, "grad_norm": 0.1595790833234787, "learning_rate": 7.81220580560022e-06, "loss": 0.5159, "step": 1535 }, { "epoch": 0.33, "grad_norm": 0.18246832489967346, "learning_rate": 7.809320196620272e-06, "loss": 0.5324, "step": 1536 }, { "epoch": 0.33, "grad_norm": 0.15763631463050842, "learning_rate": 7.80643321962807e-06, "loss": 0.5348, "step": 1537 }, { "epoch": 0.33, "grad_norm": 0.1331566572189331, "learning_rate": 7.80354487602944e-06, "loss": 0.4746, "step": 1538 }, { "epoch": 0.33, "grad_norm": 0.17700472474098206, "learning_rate": 7.800655167230877e-06, "loss": 0.5652, "step": 1539 }, { "epoch": 0.33, "grad_norm": 0.15402348339557648, "learning_rate": 7.797764094639537e-06, "loss": 0.557, "step": 1540 }, { "epoch": 0.33, "grad_norm": 0.17362762987613678, "learning_rate": 7.794871659663242e-06, "loss": 0.491, "step": 1541 }, { "epoch": 0.33, "grad_norm": 0.14665651321411133, "learning_rate": 7.79197786371048e-06, "loss": 0.5373, "step": 1542 }, { "epoch": 0.33, "grad_norm": 0.17219582200050354, "learning_rate": 7.789082708190397e-06, "loss": 0.4852, "step": 1543 }, { "epoch": 0.33, "grad_norm": 0.15352313220500946, "learning_rate": 7.786186194512802e-06, "loss": 0.4926, "step": 1544 }, { "epoch": 0.33, "grad_norm": 0.17823894321918488, "learning_rate": 7.78328832408817e-06, "loss": 0.5275, "step": 1545 }, { "epoch": 0.33, "grad_norm": 0.20020678639411926, "learning_rate": 7.780389098327629e-06, "loss": 0.4786, "step": 1546 }, { "epoch": 0.33, "grad_norm": 0.13879740238189697, "learning_rate": 7.777488518642975e-06, "loss": 0.5054, "step": 1547 }, { "epoch": 0.33, "grad_norm": 0.1314191222190857, "learning_rate": 7.774586586446658e-06, "loss": 0.4901, "step": 1548 }, { "epoch": 0.33, "grad_norm": 0.26172900199890137, "learning_rate": 7.77168330315179e-06, "loss": 0.5073, "step": 1549 }, { "epoch": 0.33, "grad_norm": 0.15131932497024536, "learning_rate": 7.768778670172135e-06, "loss": 0.532, "step": 1550 }, { "epoch": 0.33, "grad_norm": 0.14957192540168762, "learning_rate": 7.76587268892212e-06, "loss": 0.489, "step": 1551 }, { "epoch": 0.33, "grad_norm": 0.15338850021362305, "learning_rate": 7.762965360816828e-06, "loss": 0.5161, "step": 1552 }, { "epoch": 0.33, "grad_norm": 0.14951498806476593, "learning_rate": 7.760056687271996e-06, "loss": 0.545, "step": 1553 }, { "epoch": 0.33, "grad_norm": 0.32918447256088257, "learning_rate": 7.757146669704016e-06, "loss": 0.5144, "step": 1554 }, { "epoch": 0.33, "grad_norm": 0.1633896380662918, "learning_rate": 7.754235309529939e-06, "loss": 0.5305, "step": 1555 }, { "epoch": 0.34, "grad_norm": 0.15538008511066437, "learning_rate": 7.75132260816746e-06, "loss": 0.5787, "step": 1556 }, { "epoch": 0.34, "grad_norm": 0.16210249066352844, "learning_rate": 7.748408567034938e-06, "loss": 0.516, "step": 1557 }, { "epoch": 0.34, "grad_norm": 0.140504851937294, "learning_rate": 7.745493187551378e-06, "loss": 0.5344, "step": 1558 }, { "epoch": 0.34, "grad_norm": 0.1350797414779663, "learning_rate": 7.74257647113644e-06, "loss": 0.5773, "step": 1559 }, { "epoch": 0.34, "grad_norm": 0.16812683641910553, "learning_rate": 7.739658419210429e-06, "loss": 0.4808, "step": 1560 }, { "epoch": 0.34, "grad_norm": 0.15915554761886597, "learning_rate": 7.73673903319431e-06, "loss": 0.51, "step": 1561 }, { "epoch": 0.34, "grad_norm": 0.14357538521289825, "learning_rate": 7.733818314509689e-06, "loss": 0.4821, "step": 1562 }, { "epoch": 0.34, "grad_norm": 0.1362561285495758, "learning_rate": 7.730896264578825e-06, "loss": 0.5051, "step": 1563 }, { "epoch": 0.34, "grad_norm": 0.29245832562446594, "learning_rate": 7.727972884824625e-06, "loss": 0.5387, "step": 1564 }, { "epoch": 0.34, "grad_norm": 0.1896662563085556, "learning_rate": 7.725048176670643e-06, "loss": 0.5269, "step": 1565 }, { "epoch": 0.34, "grad_norm": 0.16521599888801575, "learning_rate": 7.72212214154108e-06, "loss": 0.5207, "step": 1566 }, { "epoch": 0.34, "grad_norm": 0.1532319337129593, "learning_rate": 7.719194780860783e-06, "loss": 0.4951, "step": 1567 }, { "epoch": 0.34, "grad_norm": 0.15770648419857025, "learning_rate": 7.716266096055243e-06, "loss": 0.5328, "step": 1568 }, { "epoch": 0.34, "grad_norm": 0.13383062183856964, "learning_rate": 7.713336088550601e-06, "loss": 0.5463, "step": 1569 }, { "epoch": 0.34, "grad_norm": 0.2122948169708252, "learning_rate": 7.710404759773637e-06, "loss": 0.5193, "step": 1570 }, { "epoch": 0.34, "grad_norm": 0.1524578481912613, "learning_rate": 7.707472111151775e-06, "loss": 0.5058, "step": 1571 }, { "epoch": 0.34, "grad_norm": 0.1887030303478241, "learning_rate": 7.704538144113082e-06, "loss": 0.515, "step": 1572 }, { "epoch": 0.34, "grad_norm": 0.18387439846992493, "learning_rate": 7.70160286008627e-06, "loss": 0.523, "step": 1573 }, { "epoch": 0.34, "grad_norm": 0.1244322806596756, "learning_rate": 7.698666260500688e-06, "loss": 0.4878, "step": 1574 }, { "epoch": 0.34, "grad_norm": 0.13694074749946594, "learning_rate": 7.69572834678633e-06, "loss": 0.4722, "step": 1575 }, { "epoch": 0.34, "grad_norm": 0.17935697734355927, "learning_rate": 7.692789120373824e-06, "loss": 0.4532, "step": 1576 }, { "epoch": 0.34, "grad_norm": 0.1903911679983139, "learning_rate": 7.689848582694444e-06, "loss": 0.5128, "step": 1577 }, { "epoch": 0.34, "grad_norm": 0.15431609749794006, "learning_rate": 7.686906735180099e-06, "loss": 0.4882, "step": 1578 }, { "epoch": 0.34, "grad_norm": 0.17097975313663483, "learning_rate": 7.683963579263332e-06, "loss": 0.5729, "step": 1579 }, { "epoch": 0.34, "grad_norm": 0.14723485708236694, "learning_rate": 7.681019116377331e-06, "loss": 0.494, "step": 1580 }, { "epoch": 0.34, "grad_norm": 0.17691069841384888, "learning_rate": 7.678073347955918e-06, "loss": 0.5062, "step": 1581 }, { "epoch": 0.34, "grad_norm": 0.161320298910141, "learning_rate": 7.675126275433545e-06, "loss": 0.5685, "step": 1582 }, { "epoch": 0.34, "grad_norm": 0.18011566996574402, "learning_rate": 7.672177900245307e-06, "loss": 0.5103, "step": 1583 }, { "epoch": 0.34, "grad_norm": 0.16380946338176727, "learning_rate": 7.669228223826926e-06, "loss": 0.4897, "step": 1584 }, { "epoch": 0.34, "grad_norm": 0.15541784465312958, "learning_rate": 7.666277247614766e-06, "loss": 0.4562, "step": 1585 }, { "epoch": 0.34, "grad_norm": 0.21574871242046356, "learning_rate": 7.663324973045818e-06, "loss": 0.5683, "step": 1586 }, { "epoch": 0.34, "grad_norm": 0.18054868280887604, "learning_rate": 7.660371401557703e-06, "loss": 0.5149, "step": 1587 }, { "epoch": 0.34, "grad_norm": 0.1341419368982315, "learning_rate": 7.657416534588683e-06, "loss": 0.4946, "step": 1588 }, { "epoch": 0.34, "grad_norm": 0.1958109736442566, "learning_rate": 7.654460373577639e-06, "loss": 0.5204, "step": 1589 }, { "epoch": 0.34, "grad_norm": 0.13961777091026306, "learning_rate": 7.651502919964092e-06, "loss": 0.4753, "step": 1590 }, { "epoch": 0.34, "grad_norm": 0.16249793767929077, "learning_rate": 7.648544175188189e-06, "loss": 0.5392, "step": 1591 }, { "epoch": 0.34, "grad_norm": 0.17830121517181396, "learning_rate": 7.645584140690702e-06, "loss": 0.5414, "step": 1592 }, { "epoch": 0.34, "grad_norm": 0.164913147687912, "learning_rate": 7.642622817913036e-06, "loss": 0.5127, "step": 1593 }, { "epoch": 0.34, "grad_norm": 0.13776592910289764, "learning_rate": 7.639660208297221e-06, "loss": 0.4568, "step": 1594 }, { "epoch": 0.34, "grad_norm": 0.4830784499645233, "learning_rate": 7.636696313285917e-06, "loss": 0.5153, "step": 1595 }, { "epoch": 0.34, "grad_norm": 0.14156107604503632, "learning_rate": 7.633731134322404e-06, "loss": 0.5142, "step": 1596 }, { "epoch": 0.34, "grad_norm": 0.1518123894929886, "learning_rate": 7.630764672850593e-06, "loss": 0.51, "step": 1597 }, { "epoch": 0.34, "grad_norm": 0.17625145614147186, "learning_rate": 7.6277969303150155e-06, "loss": 0.495, "step": 1598 }, { "epoch": 0.34, "grad_norm": 0.17110183835029602, "learning_rate": 7.624827908160828e-06, "loss": 0.5465, "step": 1599 }, { "epoch": 0.34, "grad_norm": 0.18074309825897217, "learning_rate": 7.6218576078338115e-06, "loss": 0.519, "step": 1600 }, { "epoch": 0.34, "grad_norm": 0.176472008228302, "learning_rate": 7.618886030780366e-06, "loss": 0.5301, "step": 1601 }, { "epoch": 0.35, "grad_norm": 0.23984403908252716, "learning_rate": 7.615913178447518e-06, "loss": 0.5679, "step": 1602 }, { "epoch": 0.35, "grad_norm": 0.16570177674293518, "learning_rate": 7.612939052282913e-06, "loss": 0.5353, "step": 1603 }, { "epoch": 0.35, "grad_norm": 0.15504352748394012, "learning_rate": 7.609963653734814e-06, "loss": 0.4889, "step": 1604 }, { "epoch": 0.35, "grad_norm": 0.12483610212802887, "learning_rate": 7.606986984252107e-06, "loss": 0.4901, "step": 1605 }, { "epoch": 0.35, "grad_norm": 0.1474786102771759, "learning_rate": 7.604009045284295e-06, "loss": 0.5106, "step": 1606 }, { "epoch": 0.35, "grad_norm": 0.1935417652130127, "learning_rate": 7.601029838281503e-06, "loss": 0.54, "step": 1607 }, { "epoch": 0.35, "grad_norm": 0.15936410427093506, "learning_rate": 7.598049364694466e-06, "loss": 0.5259, "step": 1608 }, { "epoch": 0.35, "grad_norm": 0.23374778032302856, "learning_rate": 7.595067625974544e-06, "loss": 0.4745, "step": 1609 }, { "epoch": 0.35, "grad_norm": 0.1541801393032074, "learning_rate": 7.592084623573708e-06, "loss": 0.5009, "step": 1610 }, { "epoch": 0.35, "grad_norm": 0.1573501080274582, "learning_rate": 7.589100358944546e-06, "loss": 0.5054, "step": 1611 }, { "epoch": 0.35, "grad_norm": 0.14179089665412903, "learning_rate": 7.586114833540257e-06, "loss": 0.4971, "step": 1612 }, { "epoch": 0.35, "grad_norm": 0.12740643322467804, "learning_rate": 7.583128048814663e-06, "loss": 0.5311, "step": 1613 }, { "epoch": 0.35, "grad_norm": 0.18302515149116516, "learning_rate": 7.58014000622219e-06, "loss": 0.5443, "step": 1614 }, { "epoch": 0.35, "grad_norm": 0.22869239747524261, "learning_rate": 7.577150707217878e-06, "loss": 0.5488, "step": 1615 }, { "epoch": 0.35, "grad_norm": 0.11746443063020706, "learning_rate": 7.574160153257386e-06, "loss": 0.5052, "step": 1616 }, { "epoch": 0.35, "grad_norm": 0.15382401645183563, "learning_rate": 7.571168345796975e-06, "loss": 0.5468, "step": 1617 }, { "epoch": 0.35, "grad_norm": 0.18465621769428253, "learning_rate": 7.568175286293522e-06, "loss": 0.557, "step": 1618 }, { "epoch": 0.35, "grad_norm": 0.14507010579109192, "learning_rate": 7.5651809762045115e-06, "loss": 0.4686, "step": 1619 }, { "epoch": 0.35, "grad_norm": 0.17526701092720032, "learning_rate": 7.562185416988039e-06, "loss": 0.5065, "step": 1620 }, { "epoch": 0.35, "grad_norm": 0.16445392370224, "learning_rate": 7.559188610102803e-06, "loss": 0.4226, "step": 1621 }, { "epoch": 0.35, "grad_norm": 0.13059720396995544, "learning_rate": 7.556190557008116e-06, "loss": 0.4899, "step": 1622 }, { "epoch": 0.35, "grad_norm": 0.19847136735916138, "learning_rate": 7.553191259163896e-06, "loss": 0.5169, "step": 1623 }, { "epoch": 0.35, "grad_norm": 0.1679173707962036, "learning_rate": 7.550190718030663e-06, "loss": 0.5012, "step": 1624 }, { "epoch": 0.35, "grad_norm": 0.15986262261867523, "learning_rate": 7.547188935069547e-06, "loss": 0.5436, "step": 1625 }, { "epoch": 0.35, "grad_norm": 0.13230155408382416, "learning_rate": 7.54418591174228e-06, "loss": 0.5307, "step": 1626 }, { "epoch": 0.35, "grad_norm": 0.13571912050247192, "learning_rate": 7.5411816495111985e-06, "loss": 0.5169, "step": 1627 }, { "epoch": 0.35, "grad_norm": 0.17367611825466156, "learning_rate": 7.5381761498392435e-06, "loss": 0.5677, "step": 1628 }, { "epoch": 0.35, "grad_norm": 0.1747978776693344, "learning_rate": 7.535169414189959e-06, "loss": 0.5706, "step": 1629 }, { "epoch": 0.35, "grad_norm": 0.11080675572156906, "learning_rate": 7.532161444027488e-06, "loss": 0.4933, "step": 1630 }, { "epoch": 0.35, "grad_norm": 0.1479070633649826, "learning_rate": 7.529152240816577e-06, "loss": 0.4794, "step": 1631 }, { "epoch": 0.35, "grad_norm": 0.12181144952774048, "learning_rate": 7.526141806022571e-06, "loss": 0.5346, "step": 1632 }, { "epoch": 0.35, "grad_norm": 0.18355728685855865, "learning_rate": 7.523130141111419e-06, "loss": 0.5696, "step": 1633 }, { "epoch": 0.35, "grad_norm": 0.12792839109897614, "learning_rate": 7.520117247549661e-06, "loss": 0.5148, "step": 1634 }, { "epoch": 0.35, "grad_norm": 0.17084498703479767, "learning_rate": 7.517103126804446e-06, "loss": 0.5362, "step": 1635 }, { "epoch": 0.35, "grad_norm": 0.1391141563653946, "learning_rate": 7.514087780343511e-06, "loss": 0.4839, "step": 1636 }, { "epoch": 0.35, "grad_norm": 0.13675713539123535, "learning_rate": 7.511071209635197e-06, "loss": 0.5153, "step": 1637 }, { "epoch": 0.35, "grad_norm": 0.13880731165409088, "learning_rate": 7.508053416148433e-06, "loss": 0.5117, "step": 1638 }, { "epoch": 0.35, "grad_norm": 0.11620379984378815, "learning_rate": 7.5050344013527535e-06, "loss": 0.5146, "step": 1639 }, { "epoch": 0.35, "grad_norm": 0.1520024538040161, "learning_rate": 7.502014166718279e-06, "loss": 0.5332, "step": 1640 }, { "epoch": 0.35, "grad_norm": 0.16113972663879395, "learning_rate": 7.49899271371573e-06, "loss": 0.4881, "step": 1641 }, { "epoch": 0.35, "grad_norm": 0.177647203207016, "learning_rate": 7.495970043816416e-06, "loss": 0.506, "step": 1642 }, { "epoch": 0.35, "grad_norm": 0.20048052072525024, "learning_rate": 7.492946158492243e-06, "loss": 0.5128, "step": 1643 }, { "epoch": 0.35, "grad_norm": 0.18544965982437134, "learning_rate": 7.489921059215703e-06, "loss": 0.4755, "step": 1644 }, { "epoch": 0.35, "grad_norm": 0.15983660519123077, "learning_rate": 7.486894747459887e-06, "loss": 0.5021, "step": 1645 }, { "epoch": 0.35, "grad_norm": 0.13609494268894196, "learning_rate": 7.483867224698471e-06, "loss": 0.5392, "step": 1646 }, { "epoch": 0.35, "grad_norm": 0.15707872807979584, "learning_rate": 7.480838492405722e-06, "loss": 0.5503, "step": 1647 }, { "epoch": 0.36, "grad_norm": 0.14846757054328918, "learning_rate": 7.477808552056496e-06, "loss": 0.5162, "step": 1648 }, { "epoch": 0.36, "grad_norm": 0.20370322465896606, "learning_rate": 7.474777405126236e-06, "loss": 0.5291, "step": 1649 }, { "epoch": 0.36, "grad_norm": 0.19087088108062744, "learning_rate": 7.471745053090976e-06, "loss": 0.5647, "step": 1650 }, { "epoch": 0.36, "grad_norm": 0.1674560159444809, "learning_rate": 7.468711497427335e-06, "loss": 0.502, "step": 1651 }, { "epoch": 0.36, "grad_norm": 0.1854984611272812, "learning_rate": 7.465676739612514e-06, "loss": 0.5304, "step": 1652 }, { "epoch": 0.36, "grad_norm": 0.17334036529064178, "learning_rate": 7.462640781124309e-06, "loss": 0.5476, "step": 1653 }, { "epoch": 0.36, "grad_norm": 0.1636764258146286, "learning_rate": 7.45960362344109e-06, "loss": 0.5359, "step": 1654 }, { "epoch": 0.36, "grad_norm": 0.16120000183582306, "learning_rate": 7.456565268041815e-06, "loss": 0.5591, "step": 1655 }, { "epoch": 0.36, "grad_norm": 0.16681008040905, "learning_rate": 7.4535257164060324e-06, "loss": 0.4933, "step": 1656 }, { "epoch": 0.36, "grad_norm": 0.15936830639839172, "learning_rate": 7.450484970013863e-06, "loss": 0.4903, "step": 1657 }, { "epoch": 0.36, "grad_norm": 0.1579248011112213, "learning_rate": 7.447443030346011e-06, "loss": 0.5368, "step": 1658 }, { "epoch": 0.36, "grad_norm": 0.17494046688079834, "learning_rate": 7.444399898883768e-06, "loss": 0.4972, "step": 1659 }, { "epoch": 0.36, "grad_norm": 0.15343308448791504, "learning_rate": 7.441355577108998e-06, "loss": 0.485, "step": 1660 }, { "epoch": 0.36, "grad_norm": 0.24387070536613464, "learning_rate": 7.438310066504152e-06, "loss": 0.5527, "step": 1661 }, { "epoch": 0.36, "grad_norm": 0.27083417773246765, "learning_rate": 7.4352633685522535e-06, "loss": 0.4657, "step": 1662 }, { "epoch": 0.36, "grad_norm": 0.20291651785373688, "learning_rate": 7.432215484736909e-06, "loss": 0.4805, "step": 1663 }, { "epoch": 0.36, "grad_norm": 0.17441540956497192, "learning_rate": 7.4291664165422985e-06, "loss": 0.5157, "step": 1664 }, { "epoch": 0.36, "grad_norm": 0.21364037692546844, "learning_rate": 7.426116165453181e-06, "loss": 0.5072, "step": 1665 }, { "epoch": 0.36, "grad_norm": 0.16811180114746094, "learning_rate": 7.423064732954895e-06, "loss": 0.4577, "step": 1666 }, { "epoch": 0.36, "grad_norm": 0.2634996473789215, "learning_rate": 7.420012120533346e-06, "loss": 0.5387, "step": 1667 }, { "epoch": 0.36, "grad_norm": 0.15785469114780426, "learning_rate": 7.4169583296750194e-06, "loss": 0.5052, "step": 1668 }, { "epoch": 0.36, "grad_norm": 0.18810074031352997, "learning_rate": 7.4139033618669764e-06, "loss": 0.5234, "step": 1669 }, { "epoch": 0.36, "grad_norm": 0.14630138874053955, "learning_rate": 7.410847218596846e-06, "loss": 0.5155, "step": 1670 }, { "epoch": 0.36, "grad_norm": 0.18249250948429108, "learning_rate": 7.407789901352831e-06, "loss": 0.5351, "step": 1671 }, { "epoch": 0.36, "grad_norm": 0.13652457296848297, "learning_rate": 7.40473141162371e-06, "loss": 0.4474, "step": 1672 }, { "epoch": 0.36, "grad_norm": 0.18352244794368744, "learning_rate": 7.401671750898829e-06, "loss": 0.4628, "step": 1673 }, { "epoch": 0.36, "grad_norm": 0.16410337388515472, "learning_rate": 7.398610920668102e-06, "loss": 0.5673, "step": 1674 }, { "epoch": 0.36, "grad_norm": 0.14850519597530365, "learning_rate": 7.39554892242202e-06, "loss": 0.48, "step": 1675 }, { "epoch": 0.36, "grad_norm": 0.1457439661026001, "learning_rate": 7.392485757651634e-06, "loss": 0.5061, "step": 1676 }, { "epoch": 0.36, "grad_norm": 0.15839837491512299, "learning_rate": 7.3894214278485685e-06, "loss": 0.5482, "step": 1677 }, { "epoch": 0.36, "grad_norm": 0.1379930078983307, "learning_rate": 7.386355934505015e-06, "loss": 0.5207, "step": 1678 }, { "epoch": 0.36, "grad_norm": 0.2140752226114273, "learning_rate": 7.38328927911373e-06, "loss": 0.5709, "step": 1679 }, { "epoch": 0.36, "grad_norm": 0.16319052875041962, "learning_rate": 7.380221463168036e-06, "loss": 0.5182, "step": 1680 }, { "epoch": 0.36, "grad_norm": 0.12774449586868286, "learning_rate": 7.3771524881618204e-06, "loss": 0.5274, "step": 1681 }, { "epoch": 0.36, "grad_norm": 0.13371047377586365, "learning_rate": 7.374082355589536e-06, "loss": 0.4983, "step": 1682 }, { "epoch": 0.36, "grad_norm": 0.13684460520744324, "learning_rate": 7.371011066946199e-06, "loss": 0.5395, "step": 1683 }, { "epoch": 0.36, "grad_norm": 0.16260729730129242, "learning_rate": 7.367938623727389e-06, "loss": 0.4927, "step": 1684 }, { "epoch": 0.36, "grad_norm": 0.1580437868833542, "learning_rate": 7.364865027429247e-06, "loss": 0.5391, "step": 1685 }, { "epoch": 0.36, "grad_norm": 0.41100969910621643, "learning_rate": 7.361790279548476e-06, "loss": 0.4922, "step": 1686 }, { "epoch": 0.36, "grad_norm": 0.16328592598438263, "learning_rate": 7.358714381582339e-06, "loss": 0.5809, "step": 1687 }, { "epoch": 0.36, "grad_norm": 0.16407454013824463, "learning_rate": 7.35563733502866e-06, "loss": 0.5317, "step": 1688 }, { "epoch": 0.36, "grad_norm": 0.16385860741138458, "learning_rate": 7.352559141385823e-06, "loss": 0.5182, "step": 1689 }, { "epoch": 0.36, "grad_norm": 0.1773800253868103, "learning_rate": 7.3494798021527665e-06, "loss": 0.4972, "step": 1690 }, { "epoch": 0.36, "grad_norm": 0.14111146330833435, "learning_rate": 7.346399318828994e-06, "loss": 0.485, "step": 1691 }, { "epoch": 0.36, "grad_norm": 0.18736319243907928, "learning_rate": 7.3433176929145574e-06, "loss": 0.532, "step": 1692 }, { "epoch": 0.36, "grad_norm": 0.1659240871667862, "learning_rate": 7.3402349259100725e-06, "loss": 0.4878, "step": 1693 }, { "epoch": 0.36, "grad_norm": 0.13603948056697845, "learning_rate": 7.337151019316708e-06, "loss": 0.5024, "step": 1694 }, { "epoch": 0.37, "grad_norm": 0.14938659965991974, "learning_rate": 7.334065974636186e-06, "loss": 0.4882, "step": 1695 }, { "epoch": 0.37, "grad_norm": 0.15664424002170563, "learning_rate": 7.330979793370784e-06, "loss": 0.4855, "step": 1696 }, { "epoch": 0.37, "grad_norm": 0.15226437151432037, "learning_rate": 7.327892477023335e-06, "loss": 0.5258, "step": 1697 }, { "epoch": 0.37, "grad_norm": 0.20304326713085175, "learning_rate": 7.324804027097221e-06, "loss": 0.5325, "step": 1698 }, { "epoch": 0.37, "grad_norm": 0.14442868530750275, "learning_rate": 7.3217144450963774e-06, "loss": 0.4676, "step": 1699 }, { "epoch": 0.37, "grad_norm": 0.14504297077655792, "learning_rate": 7.318623732525294e-06, "loss": 0.523, "step": 1700 }, { "epoch": 0.37, "grad_norm": 0.13879434764385223, "learning_rate": 7.315531890889007e-06, "loss": 0.5121, "step": 1701 }, { "epoch": 0.37, "grad_norm": 0.16492860019207, "learning_rate": 7.312438921693101e-06, "loss": 0.508, "step": 1702 }, { "epoch": 0.37, "grad_norm": 0.13094115257263184, "learning_rate": 7.309344826443718e-06, "loss": 0.5123, "step": 1703 }, { "epoch": 0.37, "grad_norm": 0.16071003675460815, "learning_rate": 7.30624960664754e-06, "loss": 0.5077, "step": 1704 }, { "epoch": 0.37, "grad_norm": 0.1596524864435196, "learning_rate": 7.3031532638117974e-06, "loss": 0.5193, "step": 1705 }, { "epoch": 0.37, "grad_norm": 0.15532274544239044, "learning_rate": 7.300055799444273e-06, "loss": 0.5651, "step": 1706 }, { "epoch": 0.37, "grad_norm": 0.1956198513507843, "learning_rate": 7.296957215053292e-06, "loss": 0.5238, "step": 1707 }, { "epoch": 0.37, "grad_norm": 0.17350712418556213, "learning_rate": 7.293857512147723e-06, "loss": 0.5064, "step": 1708 }, { "epoch": 0.37, "grad_norm": 0.1837831437587738, "learning_rate": 7.290756692236982e-06, "loss": 0.5456, "step": 1709 }, { "epoch": 0.37, "grad_norm": 0.20104587078094482, "learning_rate": 7.287654756831031e-06, "loss": 0.5701, "step": 1710 }, { "epoch": 0.37, "grad_norm": 0.22067013382911682, "learning_rate": 7.284551707440369e-06, "loss": 0.4858, "step": 1711 }, { "epoch": 0.37, "grad_norm": 0.17873504757881165, "learning_rate": 7.2814475455760445e-06, "loss": 0.5027, "step": 1712 }, { "epoch": 0.37, "grad_norm": 0.16447962820529938, "learning_rate": 7.278342272749643e-06, "loss": 0.4854, "step": 1713 }, { "epoch": 0.37, "grad_norm": 0.18496006727218628, "learning_rate": 7.275235890473291e-06, "loss": 0.5098, "step": 1714 }, { "epoch": 0.37, "grad_norm": 0.20452427864074707, "learning_rate": 7.272128400259658e-06, "loss": 0.4419, "step": 1715 }, { "epoch": 0.37, "grad_norm": 0.16275016963481903, "learning_rate": 7.269019803621953e-06, "loss": 0.535, "step": 1716 }, { "epoch": 0.37, "grad_norm": 0.15786287188529968, "learning_rate": 7.2659101020739195e-06, "loss": 0.4883, "step": 1717 }, { "epoch": 0.37, "grad_norm": 0.1765165776014328, "learning_rate": 7.262799297129843e-06, "loss": 0.5827, "step": 1718 }, { "epoch": 0.37, "grad_norm": 0.12849071621894836, "learning_rate": 7.259687390304546e-06, "loss": 0.4739, "step": 1719 }, { "epoch": 0.37, "grad_norm": 0.18336515128612518, "learning_rate": 7.256574383113386e-06, "loss": 0.5344, "step": 1720 }, { "epoch": 0.37, "grad_norm": 0.14962013065814972, "learning_rate": 7.253460277072258e-06, "loss": 0.4984, "step": 1721 }, { "epoch": 0.37, "grad_norm": 0.14270378649234772, "learning_rate": 7.25034507369759e-06, "loss": 0.491, "step": 1722 }, { "epoch": 0.37, "grad_norm": 0.18622830510139465, "learning_rate": 7.247228774506347e-06, "loss": 0.5553, "step": 1723 }, { "epoch": 0.37, "grad_norm": 0.16195961833000183, "learning_rate": 7.244111381016024e-06, "loss": 0.5497, "step": 1724 }, { "epoch": 0.37, "grad_norm": 0.1802990436553955, "learning_rate": 7.2409928947446526e-06, "loss": 0.5371, "step": 1725 }, { "epoch": 0.37, "grad_norm": 0.1768779754638672, "learning_rate": 7.237873317210796e-06, "loss": 0.5328, "step": 1726 }, { "epoch": 0.37, "grad_norm": 0.15915416181087494, "learning_rate": 7.234752649933545e-06, "loss": 0.5206, "step": 1727 }, { "epoch": 0.37, "grad_norm": 0.22865630686283112, "learning_rate": 7.231630894432527e-06, "loss": 0.5433, "step": 1728 }, { "epoch": 0.37, "grad_norm": 0.13628236949443817, "learning_rate": 7.228508052227895e-06, "loss": 0.4809, "step": 1729 }, { "epoch": 0.37, "grad_norm": 0.1925947070121765, "learning_rate": 7.22538412484033e-06, "loss": 0.5716, "step": 1730 }, { "epoch": 0.37, "grad_norm": 0.14507855474948883, "learning_rate": 7.2222591137910454e-06, "loss": 0.5409, "step": 1731 }, { "epoch": 0.37, "grad_norm": 0.1448884755373001, "learning_rate": 7.219133020601783e-06, "loss": 0.5184, "step": 1732 }, { "epoch": 0.37, "grad_norm": 0.24185587465763092, "learning_rate": 7.216005846794807e-06, "loss": 0.5093, "step": 1733 }, { "epoch": 0.37, "grad_norm": 0.14733339846134186, "learning_rate": 7.2128775938929095e-06, "loss": 0.5361, "step": 1734 }, { "epoch": 0.37, "grad_norm": 0.1741349697113037, "learning_rate": 7.209748263419409e-06, "loss": 0.5405, "step": 1735 }, { "epoch": 0.37, "grad_norm": 0.16004079580307007, "learning_rate": 7.206617856898149e-06, "loss": 0.5217, "step": 1736 }, { "epoch": 0.37, "grad_norm": 0.16466408967971802, "learning_rate": 7.203486375853496e-06, "loss": 0.4928, "step": 1737 }, { "epoch": 0.37, "grad_norm": 0.17737893760204315, "learning_rate": 7.20035382181034e-06, "loss": 0.5084, "step": 1738 }, { "epoch": 0.37, "grad_norm": 0.33183491230010986, "learning_rate": 7.197220196294094e-06, "loss": 0.5574, "step": 1739 }, { "epoch": 0.37, "grad_norm": 0.14042764902114868, "learning_rate": 7.194085500830691e-06, "loss": 0.5856, "step": 1740 }, { "epoch": 0.38, "grad_norm": 0.17238366603851318, "learning_rate": 7.190949736946587e-06, "loss": 0.5456, "step": 1741 }, { "epoch": 0.38, "grad_norm": 0.17922283709049225, "learning_rate": 7.1878129061687595e-06, "loss": 0.5223, "step": 1742 }, { "epoch": 0.38, "grad_norm": 0.14631612598896027, "learning_rate": 7.184675010024701e-06, "loss": 0.5193, "step": 1743 }, { "epoch": 0.38, "grad_norm": 0.1614404171705246, "learning_rate": 7.181536050042427e-06, "loss": 0.5372, "step": 1744 }, { "epoch": 0.38, "grad_norm": 0.14466199278831482, "learning_rate": 7.1783960277504685e-06, "loss": 0.4811, "step": 1745 }, { "epoch": 0.38, "grad_norm": 0.14429622888565063, "learning_rate": 7.175254944677874e-06, "loss": 0.4989, "step": 1746 }, { "epoch": 0.38, "grad_norm": 0.1409209966659546, "learning_rate": 7.172112802354212e-06, "loss": 0.5104, "step": 1747 }, { "epoch": 0.38, "grad_norm": 0.19490914046764374, "learning_rate": 7.1689696023095625e-06, "loss": 0.5189, "step": 1748 }, { "epoch": 0.38, "grad_norm": 0.20314301550388336, "learning_rate": 7.165825346074521e-06, "loss": 0.5169, "step": 1749 }, { "epoch": 0.38, "grad_norm": 0.1676884889602661, "learning_rate": 7.162680035180201e-06, "loss": 0.5543, "step": 1750 }, { "epoch": 0.38, "grad_norm": 0.17340156435966492, "learning_rate": 7.159533671158225e-06, "loss": 0.5374, "step": 1751 }, { "epoch": 0.38, "grad_norm": 0.1684662252664566, "learning_rate": 7.156386255540732e-06, "loss": 0.5167, "step": 1752 }, { "epoch": 0.38, "grad_norm": 0.1722518354654312, "learning_rate": 7.15323778986037e-06, "loss": 0.5236, "step": 1753 }, { "epoch": 0.38, "grad_norm": 0.1535075604915619, "learning_rate": 7.150088275650302e-06, "loss": 0.5676, "step": 1754 }, { "epoch": 0.38, "grad_norm": 0.2000323235988617, "learning_rate": 7.1469377144441954e-06, "loss": 0.5039, "step": 1755 }, { "epoch": 0.38, "grad_norm": 0.1701248437166214, "learning_rate": 7.143786107776236e-06, "loss": 0.5528, "step": 1756 }, { "epoch": 0.38, "grad_norm": 0.15805946290493011, "learning_rate": 7.140633457181112e-06, "loss": 0.4744, "step": 1757 }, { "epoch": 0.38, "grad_norm": 0.1715155392885208, "learning_rate": 7.137479764194022e-06, "loss": 0.5385, "step": 1758 }, { "epoch": 0.38, "grad_norm": 0.20759384334087372, "learning_rate": 7.134325030350672e-06, "loss": 0.4994, "step": 1759 }, { "epoch": 0.38, "grad_norm": 0.1527446210384369, "learning_rate": 7.131169257187276e-06, "loss": 0.5411, "step": 1760 }, { "epoch": 0.38, "grad_norm": 0.15912318229675293, "learning_rate": 7.128012446240552e-06, "loss": 0.5674, "step": 1761 }, { "epoch": 0.38, "grad_norm": 0.1656845211982727, "learning_rate": 7.1248545990477256e-06, "loss": 0.4999, "step": 1762 }, { "epoch": 0.38, "grad_norm": 0.14019495248794556, "learning_rate": 7.121695717146526e-06, "loss": 0.5353, "step": 1763 }, { "epoch": 0.38, "grad_norm": 0.17298150062561035, "learning_rate": 7.1185358020751875e-06, "loss": 0.5064, "step": 1764 }, { "epoch": 0.38, "grad_norm": 0.14910168945789337, "learning_rate": 7.1153748553724425e-06, "loss": 0.5262, "step": 1765 }, { "epoch": 0.38, "grad_norm": 0.20957139134407043, "learning_rate": 7.112212878577533e-06, "loss": 0.5084, "step": 1766 }, { "epoch": 0.38, "grad_norm": 0.17487388849258423, "learning_rate": 7.109049873230198e-06, "loss": 0.5578, "step": 1767 }, { "epoch": 0.38, "grad_norm": 0.20940136909484863, "learning_rate": 7.1058858408706765e-06, "loss": 0.5895, "step": 1768 }, { "epoch": 0.38, "grad_norm": 0.23022903501987457, "learning_rate": 7.1027207830397134e-06, "loss": 0.5334, "step": 1769 }, { "epoch": 0.38, "grad_norm": 0.15674887597560883, "learning_rate": 7.099554701278547e-06, "loss": 0.5144, "step": 1770 }, { "epoch": 0.38, "grad_norm": 0.15679983794689178, "learning_rate": 7.096387597128916e-06, "loss": 0.5139, "step": 1771 }, { "epoch": 0.38, "grad_norm": 0.19758965075016022, "learning_rate": 7.093219472133059e-06, "loss": 0.5184, "step": 1772 }, { "epoch": 0.38, "grad_norm": 0.17212289571762085, "learning_rate": 7.0900503278337074e-06, "loss": 0.5164, "step": 1773 }, { "epoch": 0.38, "grad_norm": 0.18704959750175476, "learning_rate": 7.086880165774093e-06, "loss": 0.5332, "step": 1774 }, { "epoch": 0.38, "grad_norm": 0.1653163731098175, "learning_rate": 7.083708987497943e-06, "loss": 0.536, "step": 1775 }, { "epoch": 0.38, "grad_norm": 0.1986512988805771, "learning_rate": 7.080536794549477e-06, "loss": 0.5382, "step": 1776 }, { "epoch": 0.38, "grad_norm": 0.15724928677082062, "learning_rate": 7.077363588473408e-06, "loss": 0.5549, "step": 1777 }, { "epoch": 0.38, "grad_norm": 0.14671437442302704, "learning_rate": 7.0741893708149475e-06, "loss": 0.5662, "step": 1778 }, { "epoch": 0.38, "grad_norm": 0.15560339391231537, "learning_rate": 7.071014143119796e-06, "loss": 0.5198, "step": 1779 }, { "epoch": 0.38, "grad_norm": 0.14752082526683807, "learning_rate": 7.067837906934143e-06, "loss": 0.5337, "step": 1780 }, { "epoch": 0.38, "grad_norm": 0.13522642850875854, "learning_rate": 7.064660663804677e-06, "loss": 0.5066, "step": 1781 }, { "epoch": 0.38, "grad_norm": 0.1374634951353073, "learning_rate": 7.061482415278569e-06, "loss": 0.4911, "step": 1782 }, { "epoch": 0.38, "grad_norm": 0.18049356341362, "learning_rate": 7.058303162903483e-06, "loss": 0.5261, "step": 1783 }, { "epoch": 0.38, "grad_norm": 0.17125682532787323, "learning_rate": 7.055122908227571e-06, "loss": 0.5311, "step": 1784 }, { "epoch": 0.38, "grad_norm": 0.16370706260204315, "learning_rate": 7.051941652799476e-06, "loss": 0.4968, "step": 1785 }, { "epoch": 0.38, "grad_norm": 0.1682046800851822, "learning_rate": 7.0487593981683246e-06, "loss": 0.4958, "step": 1786 }, { "epoch": 0.38, "grad_norm": 0.1765281856060028, "learning_rate": 7.04557614588373e-06, "loss": 0.5139, "step": 1787 }, { "epoch": 0.39, "grad_norm": 0.33266332745552063, "learning_rate": 7.042391897495795e-06, "loss": 0.5654, "step": 1788 }, { "epoch": 0.39, "grad_norm": 0.1499028503894806, "learning_rate": 7.039206654555103e-06, "loss": 0.4745, "step": 1789 }, { "epoch": 0.39, "grad_norm": 0.1392756998538971, "learning_rate": 7.036020418612724e-06, "loss": 0.5564, "step": 1790 }, { "epoch": 0.39, "grad_norm": 0.1803901195526123, "learning_rate": 7.032833191220213e-06, "loss": 0.4915, "step": 1791 }, { "epoch": 0.39, "grad_norm": 0.17533114552497864, "learning_rate": 7.029644973929604e-06, "loss": 0.4861, "step": 1792 }, { "epoch": 0.39, "grad_norm": 0.1752566695213318, "learning_rate": 7.026455768293416e-06, "loss": 0.508, "step": 1793 }, { "epoch": 0.39, "grad_norm": 0.14547456800937653, "learning_rate": 7.023265575864648e-06, "loss": 0.5137, "step": 1794 }, { "epoch": 0.39, "grad_norm": 0.19993162155151367, "learning_rate": 7.020074398196779e-06, "loss": 0.5089, "step": 1795 }, { "epoch": 0.39, "grad_norm": 0.28430238366127014, "learning_rate": 7.016882236843769e-06, "loss": 0.536, "step": 1796 }, { "epoch": 0.39, "grad_norm": 0.16877298057079315, "learning_rate": 7.013689093360059e-06, "loss": 0.5131, "step": 1797 }, { "epoch": 0.39, "grad_norm": 0.12015072256326675, "learning_rate": 7.0104949693005645e-06, "loss": 0.4872, "step": 1798 }, { "epoch": 0.39, "grad_norm": 0.154635950922966, "learning_rate": 7.0072998662206775e-06, "loss": 0.5255, "step": 1799 }, { "epoch": 0.39, "grad_norm": 0.1528724581003189, "learning_rate": 7.00410378567627e-06, "loss": 0.5689, "step": 1800 }, { "epoch": 0.39, "grad_norm": 0.1700393408536911, "learning_rate": 7.000906729223693e-06, "loss": 0.4934, "step": 1801 }, { "epoch": 0.39, "grad_norm": 0.1635403037071228, "learning_rate": 6.997708698419765e-06, "loss": 0.4775, "step": 1802 }, { "epoch": 0.39, "grad_norm": 0.14558027684688568, "learning_rate": 6.994509694821784e-06, "loss": 0.5529, "step": 1803 }, { "epoch": 0.39, "grad_norm": 0.1189364641904831, "learning_rate": 6.99130971998752e-06, "loss": 0.5022, "step": 1804 }, { "epoch": 0.39, "grad_norm": 0.17554467916488647, "learning_rate": 6.988108775475218e-06, "loss": 0.5326, "step": 1805 }, { "epoch": 0.39, "grad_norm": 0.15480519831180573, "learning_rate": 6.98490686284359e-06, "loss": 0.4882, "step": 1806 }, { "epoch": 0.39, "grad_norm": 0.1570086032152176, "learning_rate": 6.981703983651827e-06, "loss": 0.4771, "step": 1807 }, { "epoch": 0.39, "grad_norm": 0.14414653182029724, "learning_rate": 6.978500139459583e-06, "loss": 0.4844, "step": 1808 }, { "epoch": 0.39, "grad_norm": 0.181270033121109, "learning_rate": 6.97529533182699e-06, "loss": 0.6205, "step": 1809 }, { "epoch": 0.39, "grad_norm": 0.13571658730506897, "learning_rate": 6.972089562314644e-06, "loss": 0.5364, "step": 1810 }, { "epoch": 0.39, "grad_norm": 0.12950097024440765, "learning_rate": 6.968882832483606e-06, "loss": 0.5254, "step": 1811 }, { "epoch": 0.39, "grad_norm": 0.15108050405979156, "learning_rate": 6.9656751438954115e-06, "loss": 0.5432, "step": 1812 }, { "epoch": 0.39, "grad_norm": 0.1494326889514923, "learning_rate": 6.962466498112062e-06, "loss": 0.5615, "step": 1813 }, { "epoch": 0.39, "grad_norm": 0.17007635533809662, "learning_rate": 6.959256896696021e-06, "loss": 0.5191, "step": 1814 }, { "epoch": 0.39, "grad_norm": 0.16112545132637024, "learning_rate": 6.956046341210221e-06, "loss": 0.5374, "step": 1815 }, { "epoch": 0.39, "grad_norm": 0.1815643608570099, "learning_rate": 6.952834833218056e-06, "loss": 0.5312, "step": 1816 }, { "epoch": 0.39, "grad_norm": 0.14015376567840576, "learning_rate": 6.949622374283387e-06, "loss": 0.5012, "step": 1817 }, { "epoch": 0.39, "grad_norm": 0.14989694952964783, "learning_rate": 6.946408965970536e-06, "loss": 0.5075, "step": 1818 }, { "epoch": 0.39, "grad_norm": 0.1673702746629715, "learning_rate": 6.943194609844288e-06, "loss": 0.5485, "step": 1819 }, { "epoch": 0.39, "grad_norm": 0.1309339702129364, "learning_rate": 6.939979307469892e-06, "loss": 0.5218, "step": 1820 }, { "epoch": 0.39, "grad_norm": 0.1157936230301857, "learning_rate": 6.93676306041305e-06, "loss": 0.502, "step": 1821 }, { "epoch": 0.39, "grad_norm": 0.1451912224292755, "learning_rate": 6.933545870239933e-06, "loss": 0.5339, "step": 1822 }, { "epoch": 0.39, "grad_norm": 0.18552608788013458, "learning_rate": 6.930327738517168e-06, "loss": 0.4766, "step": 1823 }, { "epoch": 0.39, "grad_norm": 0.1459437906742096, "learning_rate": 6.927108666811837e-06, "loss": 0.5381, "step": 1824 }, { "epoch": 0.39, "grad_norm": 0.14324288070201874, "learning_rate": 6.923888656691487e-06, "loss": 0.4846, "step": 1825 }, { "epoch": 0.39, "grad_norm": 0.14252141118049622, "learning_rate": 6.920667709724113e-06, "loss": 0.4756, "step": 1826 }, { "epoch": 0.39, "grad_norm": 0.1347956657409668, "learning_rate": 6.917445827478175e-06, "loss": 0.5006, "step": 1827 }, { "epoch": 0.39, "grad_norm": 0.17314203083515167, "learning_rate": 6.914223011522581e-06, "loss": 0.5711, "step": 1828 }, { "epoch": 0.39, "grad_norm": 0.13734053075313568, "learning_rate": 6.9109992634267e-06, "loss": 0.4959, "step": 1829 }, { "epoch": 0.39, "grad_norm": 0.15517868101596832, "learning_rate": 6.90777458476035e-06, "loss": 0.5151, "step": 1830 }, { "epoch": 0.39, "grad_norm": 0.17450636625289917, "learning_rate": 6.9045489770938045e-06, "loss": 0.4883, "step": 1831 }, { "epoch": 0.39, "grad_norm": 0.202430859208107, "learning_rate": 6.901322441997791e-06, "loss": 0.4894, "step": 1832 }, { "epoch": 0.39, "grad_norm": 0.27107375860214233, "learning_rate": 6.898094981043482e-06, "loss": 0.5584, "step": 1833 }, { "epoch": 0.4, "grad_norm": 0.15221843123435974, "learning_rate": 6.894866595802509e-06, "loss": 0.5003, "step": 1834 }, { "epoch": 0.4, "grad_norm": 0.17178794741630554, "learning_rate": 6.89163728784695e-06, "loss": 0.548, "step": 1835 }, { "epoch": 0.4, "grad_norm": 0.16640210151672363, "learning_rate": 6.888407058749331e-06, "loss": 0.5008, "step": 1836 }, { "epoch": 0.4, "grad_norm": 0.19455331563949585, "learning_rate": 6.885175910082631e-06, "loss": 0.5069, "step": 1837 }, { "epoch": 0.4, "grad_norm": 0.1528869867324829, "learning_rate": 6.881943843420268e-06, "loss": 0.5051, "step": 1838 }, { "epoch": 0.4, "grad_norm": 0.16115941107273102, "learning_rate": 6.878710860336118e-06, "loss": 0.4924, "step": 1839 }, { "epoch": 0.4, "grad_norm": 0.12841373682022095, "learning_rate": 6.875476962404495e-06, "loss": 0.4966, "step": 1840 }, { "epoch": 0.4, "grad_norm": 0.1625949740409851, "learning_rate": 6.8722421512001625e-06, "loss": 0.5575, "step": 1841 }, { "epoch": 0.4, "grad_norm": 0.18129919469356537, "learning_rate": 6.869006428298328e-06, "loss": 0.5509, "step": 1842 }, { "epoch": 0.4, "grad_norm": 0.14833548665046692, "learning_rate": 6.865769795274641e-06, "loss": 0.5444, "step": 1843 }, { "epoch": 0.4, "grad_norm": 0.14769743382930756, "learning_rate": 6.862532253705199e-06, "loss": 0.4723, "step": 1844 }, { "epoch": 0.4, "grad_norm": 0.13029511272907257, "learning_rate": 6.859293805166536e-06, "loss": 0.4908, "step": 1845 }, { "epoch": 0.4, "grad_norm": 0.19006066024303436, "learning_rate": 6.85605445123563e-06, "loss": 0.4983, "step": 1846 }, { "epoch": 0.4, "grad_norm": 0.13327574729919434, "learning_rate": 6.852814193489903e-06, "loss": 0.5046, "step": 1847 }, { "epoch": 0.4, "grad_norm": 0.16421039402484894, "learning_rate": 6.849573033507213e-06, "loss": 0.4845, "step": 1848 }, { "epoch": 0.4, "grad_norm": 0.14652986824512482, "learning_rate": 6.846330972865857e-06, "loss": 0.5351, "step": 1849 }, { "epoch": 0.4, "grad_norm": 0.1581708788871765, "learning_rate": 6.843088013144575e-06, "loss": 0.5125, "step": 1850 }, { "epoch": 0.4, "grad_norm": 0.13055890798568726, "learning_rate": 6.839844155922543e-06, "loss": 0.4872, "step": 1851 }, { "epoch": 0.4, "grad_norm": 0.17920167744159698, "learning_rate": 6.8365994027793695e-06, "loss": 0.5181, "step": 1852 }, { "epoch": 0.4, "grad_norm": 0.16211476922035217, "learning_rate": 6.833353755295104e-06, "loss": 0.4617, "step": 1853 }, { "epoch": 0.4, "grad_norm": 0.15161064267158508, "learning_rate": 6.830107215050232e-06, "loss": 0.4736, "step": 1854 }, { "epoch": 0.4, "grad_norm": 0.15771758556365967, "learning_rate": 6.826859783625674e-06, "loss": 0.5481, "step": 1855 }, { "epoch": 0.4, "grad_norm": 0.17663753032684326, "learning_rate": 6.823611462602777e-06, "loss": 0.562, "step": 1856 }, { "epoch": 0.4, "grad_norm": 0.16153866052627563, "learning_rate": 6.82036225356333e-06, "loss": 0.4947, "step": 1857 }, { "epoch": 0.4, "grad_norm": 0.20720727741718292, "learning_rate": 6.817112158089554e-06, "loss": 0.5606, "step": 1858 }, { "epoch": 0.4, "grad_norm": 0.21727946400642395, "learning_rate": 6.813861177764094e-06, "loss": 0.5017, "step": 1859 }, { "epoch": 0.4, "grad_norm": 0.2113008350133896, "learning_rate": 6.8106093141700336e-06, "loss": 0.5526, "step": 1860 }, { "epoch": 0.4, "grad_norm": 0.16218236088752747, "learning_rate": 6.807356568890884e-06, "loss": 0.4807, "step": 1861 }, { "epoch": 0.4, "grad_norm": 0.18519651889801025, "learning_rate": 6.804102943510583e-06, "loss": 0.5168, "step": 1862 }, { "epoch": 0.4, "grad_norm": 0.18724150955677032, "learning_rate": 6.800848439613504e-06, "loss": 0.4815, "step": 1863 }, { "epoch": 0.4, "grad_norm": 0.14294007420539856, "learning_rate": 6.797593058784437e-06, "loss": 0.5586, "step": 1864 }, { "epoch": 0.4, "grad_norm": 0.159059077501297, "learning_rate": 6.7943368026086124e-06, "loss": 0.5098, "step": 1865 }, { "epoch": 0.4, "grad_norm": 0.16052033007144928, "learning_rate": 6.791079672671677e-06, "loss": 0.5117, "step": 1866 }, { "epoch": 0.4, "grad_norm": 0.1647024303674698, "learning_rate": 6.787821670559705e-06, "loss": 0.5381, "step": 1867 }, { "epoch": 0.4, "grad_norm": 0.18817616999149323, "learning_rate": 6.784562797859198e-06, "loss": 0.4719, "step": 1868 }, { "epoch": 0.4, "grad_norm": 0.18448995053768158, "learning_rate": 6.78130305615708e-06, "loss": 0.5259, "step": 1869 }, { "epoch": 0.4, "grad_norm": 0.1643984615802765, "learning_rate": 6.7780424470407004e-06, "loss": 0.5437, "step": 1870 }, { "epoch": 0.4, "grad_norm": 0.14963030815124512, "learning_rate": 6.774780972097823e-06, "loss": 0.4785, "step": 1871 }, { "epoch": 0.4, "grad_norm": 0.18385331332683563, "learning_rate": 6.771518632916645e-06, "loss": 0.4909, "step": 1872 }, { "epoch": 0.4, "grad_norm": 0.1393522322177887, "learning_rate": 6.7682554310857755e-06, "loss": 0.4809, "step": 1873 }, { "epoch": 0.4, "grad_norm": 0.16635462641716003, "learning_rate": 6.7649913681942455e-06, "loss": 0.5425, "step": 1874 }, { "epoch": 0.4, "grad_norm": 0.15144184231758118, "learning_rate": 6.761726445831511e-06, "loss": 0.5033, "step": 1875 }, { "epoch": 0.4, "grad_norm": 0.17777347564697266, "learning_rate": 6.758460665587437e-06, "loss": 0.5561, "step": 1876 }, { "epoch": 0.4, "grad_norm": 0.2699100375175476, "learning_rate": 6.755194029052313e-06, "loss": 0.5314, "step": 1877 }, { "epoch": 0.4, "grad_norm": 0.17995339632034302, "learning_rate": 6.751926537816846e-06, "loss": 0.5097, "step": 1878 }, { "epoch": 0.4, "grad_norm": 0.14517782628536224, "learning_rate": 6.748658193472155e-06, "loss": 0.524, "step": 1879 }, { "epoch": 0.4, "grad_norm": 0.14715701341629028, "learning_rate": 6.745388997609774e-06, "loss": 0.5633, "step": 1880 }, { "epoch": 0.41, "grad_norm": 0.16807079315185547, "learning_rate": 6.7421189518216576e-06, "loss": 0.5106, "step": 1881 }, { "epoch": 0.41, "grad_norm": 0.1584351658821106, "learning_rate": 6.738848057700169e-06, "loss": 0.5602, "step": 1882 }, { "epoch": 0.41, "grad_norm": 0.16300451755523682, "learning_rate": 6.735576316838087e-06, "loss": 0.5455, "step": 1883 }, { "epoch": 0.41, "grad_norm": 0.16324667632579803, "learning_rate": 6.732303730828601e-06, "loss": 0.5247, "step": 1884 }, { "epoch": 0.41, "grad_norm": 0.16887761652469635, "learning_rate": 6.7290303012653136e-06, "loss": 0.4953, "step": 1885 }, { "epoch": 0.41, "grad_norm": 0.1934385746717453, "learning_rate": 6.725756029742234e-06, "loss": 0.4727, "step": 1886 }, { "epoch": 0.41, "grad_norm": 0.17485982179641724, "learning_rate": 6.7224809178537894e-06, "loss": 0.5003, "step": 1887 }, { "epoch": 0.41, "grad_norm": 0.14065895974636078, "learning_rate": 6.7192049671948115e-06, "loss": 0.4841, "step": 1888 }, { "epoch": 0.41, "grad_norm": 0.12996014952659607, "learning_rate": 6.715928179360538e-06, "loss": 0.4906, "step": 1889 }, { "epoch": 0.41, "grad_norm": 0.14599494636058807, "learning_rate": 6.712650555946616e-06, "loss": 0.5114, "step": 1890 }, { "epoch": 0.41, "grad_norm": 0.1689714789390564, "learning_rate": 6.709372098549104e-06, "loss": 0.5318, "step": 1891 }, { "epoch": 0.41, "grad_norm": 0.14123961329460144, "learning_rate": 6.706092808764459e-06, "loss": 0.5013, "step": 1892 }, { "epoch": 0.41, "grad_norm": 0.14629031717777252, "learning_rate": 6.702812688189551e-06, "loss": 0.5524, "step": 1893 }, { "epoch": 0.41, "grad_norm": 0.1583494246006012, "learning_rate": 6.699531738421648e-06, "loss": 0.5285, "step": 1894 }, { "epoch": 0.41, "grad_norm": 0.17046624422073364, "learning_rate": 6.696249961058426e-06, "loss": 0.5125, "step": 1895 }, { "epoch": 0.41, "grad_norm": 0.1436389535665512, "learning_rate": 6.692967357697961e-06, "loss": 0.5045, "step": 1896 }, { "epoch": 0.41, "grad_norm": 0.18508578836917877, "learning_rate": 6.689683929938736e-06, "loss": 0.5401, "step": 1897 }, { "epoch": 0.41, "grad_norm": 0.1609339416027069, "learning_rate": 6.6863996793796286e-06, "loss": 0.5026, "step": 1898 }, { "epoch": 0.41, "grad_norm": 0.17639221251010895, "learning_rate": 6.683114607619923e-06, "loss": 0.5563, "step": 1899 }, { "epoch": 0.41, "grad_norm": 0.15782758593559265, "learning_rate": 6.6798287162593e-06, "loss": 0.5344, "step": 1900 }, { "epoch": 0.41, "grad_norm": 0.14880798757076263, "learning_rate": 6.676542006897842e-06, "loss": 0.4987, "step": 1901 }, { "epoch": 0.41, "grad_norm": 0.18628853559494019, "learning_rate": 6.6732544811360255e-06, "loss": 0.4961, "step": 1902 }, { "epoch": 0.41, "grad_norm": 0.18380938470363617, "learning_rate": 6.669966140574729e-06, "loss": 0.5529, "step": 1903 }, { "epoch": 0.41, "grad_norm": 0.18866044282913208, "learning_rate": 6.666676986815227e-06, "loss": 0.5462, "step": 1904 }, { "epoch": 0.41, "grad_norm": 0.16578936576843262, "learning_rate": 6.663387021459187e-06, "loss": 0.513, "step": 1905 }, { "epoch": 0.41, "grad_norm": 0.18033047020435333, "learning_rate": 6.660096246108677e-06, "loss": 0.4892, "step": 1906 }, { "epoch": 0.41, "grad_norm": 0.16443459689617157, "learning_rate": 6.656804662366153e-06, "loss": 0.5372, "step": 1907 }, { "epoch": 0.41, "grad_norm": 0.14939545094966888, "learning_rate": 6.653512271834468e-06, "loss": 0.5273, "step": 1908 }, { "epoch": 0.41, "grad_norm": 0.17759068310260773, "learning_rate": 6.650219076116868e-06, "loss": 0.4714, "step": 1909 }, { "epoch": 0.41, "grad_norm": 0.1866803765296936, "learning_rate": 6.646925076816994e-06, "loss": 0.5261, "step": 1910 }, { "epoch": 0.41, "grad_norm": 0.15621764957904816, "learning_rate": 6.643630275538871e-06, "loss": 0.521, "step": 1911 }, { "epoch": 0.41, "grad_norm": 0.20561483502388, "learning_rate": 6.640334673886921e-06, "loss": 0.531, "step": 1912 }, { "epoch": 0.41, "grad_norm": 0.1349986344575882, "learning_rate": 6.637038273465952e-06, "loss": 0.5328, "step": 1913 }, { "epoch": 0.41, "grad_norm": 0.1595732718706131, "learning_rate": 6.633741075881163e-06, "loss": 0.5151, "step": 1914 }, { "epoch": 0.41, "grad_norm": 0.15593409538269043, "learning_rate": 6.63044308273814e-06, "loss": 0.5507, "step": 1915 }, { "epoch": 0.41, "grad_norm": 0.1654960662126541, "learning_rate": 6.627144295642859e-06, "loss": 0.5172, "step": 1916 }, { "epoch": 0.41, "grad_norm": 0.13034138083457947, "learning_rate": 6.6238447162016786e-06, "loss": 0.561, "step": 1917 }, { "epoch": 0.41, "grad_norm": 0.14604593813419342, "learning_rate": 6.6205443460213445e-06, "loss": 0.5173, "step": 1918 }, { "epoch": 0.41, "grad_norm": 0.18159790337085724, "learning_rate": 6.617243186708989e-06, "loss": 0.5295, "step": 1919 }, { "epoch": 0.41, "grad_norm": 0.1321515291929245, "learning_rate": 6.613941239872129e-06, "loss": 0.4762, "step": 1920 }, { "epoch": 0.41, "grad_norm": 0.13790853321552277, "learning_rate": 6.610638507118663e-06, "loss": 0.5172, "step": 1921 }, { "epoch": 0.41, "grad_norm": 0.15198110044002533, "learning_rate": 6.607334990056873e-06, "loss": 0.5019, "step": 1922 }, { "epoch": 0.41, "grad_norm": 0.1440410614013672, "learning_rate": 6.604030690295422e-06, "loss": 0.481, "step": 1923 }, { "epoch": 0.41, "grad_norm": 0.23520071804523468, "learning_rate": 6.600725609443356e-06, "loss": 0.4935, "step": 1924 }, { "epoch": 0.41, "grad_norm": 0.12442398816347122, "learning_rate": 6.597419749110099e-06, "loss": 0.5067, "step": 1925 }, { "epoch": 0.41, "grad_norm": 0.19941824674606323, "learning_rate": 6.594113110905458e-06, "loss": 0.5489, "step": 1926 }, { "epoch": 0.42, "grad_norm": 0.16936185956001282, "learning_rate": 6.5908056964396135e-06, "loss": 0.5173, "step": 1927 }, { "epoch": 0.42, "grad_norm": 0.1414109170436859, "learning_rate": 6.587497507323132e-06, "loss": 0.4946, "step": 1928 }, { "epoch": 0.42, "grad_norm": 0.1461210548877716, "learning_rate": 6.584188545166948e-06, "loss": 0.5585, "step": 1929 }, { "epoch": 0.42, "grad_norm": 0.14086653292179108, "learning_rate": 6.580878811582379e-06, "loss": 0.5138, "step": 1930 }, { "epoch": 0.42, "grad_norm": 0.13712497055530548, "learning_rate": 6.5775683081811144e-06, "loss": 0.5223, "step": 1931 }, { "epoch": 0.42, "grad_norm": 0.18051303923130035, "learning_rate": 6.574257036575224e-06, "loss": 0.5229, "step": 1932 }, { "epoch": 0.42, "grad_norm": 0.18365350365638733, "learning_rate": 6.5709449983771414e-06, "loss": 0.5357, "step": 1933 }, { "epoch": 0.42, "grad_norm": 0.1633131057024002, "learning_rate": 6.567632195199686e-06, "loss": 0.5919, "step": 1934 }, { "epoch": 0.42, "grad_norm": 0.18704870343208313, "learning_rate": 6.564318628656039e-06, "loss": 0.5212, "step": 1935 }, { "epoch": 0.42, "grad_norm": 0.15724125504493713, "learning_rate": 6.5610043003597615e-06, "loss": 0.5219, "step": 1936 }, { "epoch": 0.42, "grad_norm": 0.14116469025611877, "learning_rate": 6.557689211924779e-06, "loss": 0.5133, "step": 1937 }, { "epoch": 0.42, "grad_norm": 0.20150695741176605, "learning_rate": 6.554373364965392e-06, "loss": 0.5256, "step": 1938 }, { "epoch": 0.42, "grad_norm": 0.18280090391635895, "learning_rate": 6.551056761096269e-06, "loss": 0.5481, "step": 1939 }, { "epoch": 0.42, "grad_norm": 0.18789951503276825, "learning_rate": 6.547739401932443e-06, "loss": 0.4974, "step": 1940 }, { "epoch": 0.42, "grad_norm": 0.15406067669391632, "learning_rate": 6.544421289089321e-06, "loss": 0.543, "step": 1941 }, { "epoch": 0.42, "grad_norm": 0.16543880105018616, "learning_rate": 6.541102424182676e-06, "loss": 0.5503, "step": 1942 }, { "epoch": 0.42, "grad_norm": 0.17979435622692108, "learning_rate": 6.537782808828641e-06, "loss": 0.5514, "step": 1943 }, { "epoch": 0.42, "grad_norm": 0.19799616932868958, "learning_rate": 6.5344624446437234e-06, "loss": 0.499, "step": 1944 }, { "epoch": 0.42, "grad_norm": 0.16152727603912354, "learning_rate": 6.531141333244789e-06, "loss": 0.5483, "step": 1945 }, { "epoch": 0.42, "grad_norm": 0.16674454510211945, "learning_rate": 6.527819476249066e-06, "loss": 0.5127, "step": 1946 }, { "epoch": 0.42, "grad_norm": 0.16409684717655182, "learning_rate": 6.5244968752741555e-06, "loss": 0.5407, "step": 1947 }, { "epoch": 0.42, "grad_norm": 0.1826597899198532, "learning_rate": 6.521173531938011e-06, "loss": 0.446, "step": 1948 }, { "epoch": 0.42, "grad_norm": 0.17517463862895966, "learning_rate": 6.517849447858951e-06, "loss": 0.5539, "step": 1949 }, { "epoch": 0.42, "grad_norm": 0.14857599139213562, "learning_rate": 6.514524624655654e-06, "loss": 0.5278, "step": 1950 }, { "epoch": 0.42, "grad_norm": 0.13251933455467224, "learning_rate": 6.511199063947159e-06, "loss": 0.4874, "step": 1951 }, { "epoch": 0.42, "grad_norm": 0.138553187251091, "learning_rate": 6.507872767352863e-06, "loss": 0.5654, "step": 1952 }, { "epoch": 0.42, "grad_norm": 0.13305741548538208, "learning_rate": 6.504545736492526e-06, "loss": 0.5318, "step": 1953 }, { "epoch": 0.42, "grad_norm": 0.14779391884803772, "learning_rate": 6.50121797298626e-06, "loss": 0.5017, "step": 1954 }, { "epoch": 0.42, "grad_norm": 0.1407061219215393, "learning_rate": 6.497889478454534e-06, "loss": 0.4967, "step": 1955 }, { "epoch": 0.42, "grad_norm": 0.14632262289524078, "learning_rate": 6.494560254518179e-06, "loss": 0.4989, "step": 1956 }, { "epoch": 0.42, "grad_norm": 0.2105487734079361, "learning_rate": 6.491230302798372e-06, "loss": 0.5095, "step": 1957 }, { "epoch": 0.42, "grad_norm": 0.15186044573783875, "learning_rate": 6.487899624916654e-06, "loss": 0.5069, "step": 1958 }, { "epoch": 0.42, "grad_norm": 0.15018121898174286, "learning_rate": 6.484568222494911e-06, "loss": 0.5031, "step": 1959 }, { "epoch": 0.42, "grad_norm": 0.15453185141086578, "learning_rate": 6.481236097155389e-06, "loss": 0.513, "step": 1960 }, { "epoch": 0.42, "grad_norm": 0.14290063083171844, "learning_rate": 6.47790325052068e-06, "loss": 0.4524, "step": 1961 }, { "epoch": 0.42, "grad_norm": 0.17694444954395294, "learning_rate": 6.4745696842137305e-06, "loss": 0.5628, "step": 1962 }, { "epoch": 0.42, "grad_norm": 0.15745702385902405, "learning_rate": 6.4712353998578396e-06, "loss": 0.5302, "step": 1963 }, { "epoch": 0.42, "grad_norm": 0.1511646956205368, "learning_rate": 6.467900399076651e-06, "loss": 0.5041, "step": 1964 }, { "epoch": 0.42, "grad_norm": 0.15910549461841583, "learning_rate": 6.46456468349416e-06, "loss": 0.5193, "step": 1965 }, { "epoch": 0.42, "grad_norm": 0.15061886608600616, "learning_rate": 6.461228254734711e-06, "loss": 0.48, "step": 1966 }, { "epoch": 0.42, "grad_norm": 0.1490405946969986, "learning_rate": 6.4578911144229915e-06, "loss": 0.4894, "step": 1967 }, { "epoch": 0.42, "grad_norm": 0.13372862339019775, "learning_rate": 6.454553264184041e-06, "loss": 0.5259, "step": 1968 }, { "epoch": 0.42, "grad_norm": 0.15754102170467377, "learning_rate": 6.451214705643241e-06, "loss": 0.5001, "step": 1969 }, { "epoch": 0.42, "grad_norm": 0.17153845727443695, "learning_rate": 6.447875440426319e-06, "loss": 0.5492, "step": 1970 }, { "epoch": 0.42, "grad_norm": 0.1674170047044754, "learning_rate": 6.444535470159346e-06, "loss": 0.5032, "step": 1971 }, { "epoch": 0.42, "grad_norm": 0.12836651504039764, "learning_rate": 6.441194796468739e-06, "loss": 0.4732, "step": 1972 }, { "epoch": 0.43, "grad_norm": 0.1573239266872406, "learning_rate": 6.437853420981254e-06, "loss": 0.4972, "step": 1973 }, { "epoch": 0.43, "grad_norm": 0.18357399106025696, "learning_rate": 6.434511345323988e-06, "loss": 0.5351, "step": 1974 }, { "epoch": 0.43, "grad_norm": 0.14846058189868927, "learning_rate": 6.431168571124387e-06, "loss": 0.4689, "step": 1975 }, { "epoch": 0.43, "grad_norm": 0.14961528778076172, "learning_rate": 6.427825100010225e-06, "loss": 0.5394, "step": 1976 }, { "epoch": 0.43, "grad_norm": 0.2099412977695465, "learning_rate": 6.424480933609626e-06, "loss": 0.5802, "step": 1977 }, { "epoch": 0.43, "grad_norm": 0.1339603066444397, "learning_rate": 6.421136073551047e-06, "loss": 0.499, "step": 1978 }, { "epoch": 0.43, "grad_norm": 0.1474086493253708, "learning_rate": 6.417790521463282e-06, "loss": 0.511, "step": 1979 }, { "epoch": 0.43, "grad_norm": 0.13013140857219696, "learning_rate": 6.414444278975465e-06, "loss": 0.5215, "step": 1980 }, { "epoch": 0.43, "grad_norm": 0.14738723635673523, "learning_rate": 6.411097347717068e-06, "loss": 0.5079, "step": 1981 }, { "epoch": 0.43, "grad_norm": 0.18411760032176971, "learning_rate": 6.407749729317892e-06, "loss": 0.51, "step": 1982 }, { "epoch": 0.43, "grad_norm": 0.15733294188976288, "learning_rate": 6.404401425408079e-06, "loss": 0.5247, "step": 1983 }, { "epoch": 0.43, "grad_norm": 0.1328936368227005, "learning_rate": 6.401052437618098e-06, "loss": 0.5223, "step": 1984 }, { "epoch": 0.43, "grad_norm": 0.15146395564079285, "learning_rate": 6.397702767578761e-06, "loss": 0.5676, "step": 1985 }, { "epoch": 0.43, "grad_norm": 0.1269007921218872, "learning_rate": 6.394352416921201e-06, "loss": 0.47, "step": 1986 }, { "epoch": 0.43, "grad_norm": 0.15181781351566315, "learning_rate": 6.39100138727689e-06, "loss": 0.56, "step": 1987 }, { "epoch": 0.43, "grad_norm": 0.1406852751970291, "learning_rate": 6.387649680277629e-06, "loss": 0.5753, "step": 1988 }, { "epoch": 0.43, "grad_norm": 0.2074470818042755, "learning_rate": 6.384297297555546e-06, "loss": 0.528, "step": 1989 }, { "epoch": 0.43, "grad_norm": 0.15589666366577148, "learning_rate": 6.380944240743101e-06, "loss": 0.5103, "step": 1990 }, { "epoch": 0.43, "grad_norm": 0.156142920255661, "learning_rate": 6.377590511473083e-06, "loss": 0.5082, "step": 1991 }, { "epoch": 0.43, "grad_norm": 0.18364138901233673, "learning_rate": 6.374236111378605e-06, "loss": 0.5319, "step": 1992 }, { "epoch": 0.43, "grad_norm": 0.13717058300971985, "learning_rate": 6.37088104209311e-06, "loss": 0.5207, "step": 1993 }, { "epoch": 0.43, "grad_norm": 0.1605088859796524, "learning_rate": 6.3675253052503645e-06, "loss": 0.4823, "step": 1994 }, { "epoch": 0.43, "grad_norm": 0.13547933101654053, "learning_rate": 6.364168902484461e-06, "loss": 0.5081, "step": 1995 }, { "epoch": 0.43, "grad_norm": 0.1631360799074173, "learning_rate": 6.360811835429817e-06, "loss": 0.5494, "step": 1996 }, { "epoch": 0.43, "grad_norm": 0.15566737949848175, "learning_rate": 6.357454105721171e-06, "loss": 0.5708, "step": 1997 }, { "epoch": 0.43, "grad_norm": 0.18726012110710144, "learning_rate": 6.35409571499359e-06, "loss": 0.524, "step": 1998 }, { "epoch": 0.43, "grad_norm": 0.18683874607086182, "learning_rate": 6.350736664882454e-06, "loss": 0.477, "step": 1999 }, { "epoch": 0.43, "grad_norm": 0.15933635830879211, "learning_rate": 6.347376957023471e-06, "loss": 0.5524, "step": 2000 }, { "epoch": 0.43, "grad_norm": 0.16675737500190735, "learning_rate": 6.344016593052669e-06, "loss": 0.5126, "step": 2001 }, { "epoch": 0.43, "grad_norm": 0.22275328636169434, "learning_rate": 6.340655574606391e-06, "loss": 0.5203, "step": 2002 }, { "epoch": 0.43, "grad_norm": 0.1311800628900528, "learning_rate": 6.337293903321303e-06, "loss": 0.5132, "step": 2003 }, { "epoch": 0.43, "grad_norm": 0.12225490063428879, "learning_rate": 6.333931580834387e-06, "loss": 0.5529, "step": 2004 }, { "epoch": 0.43, "grad_norm": 0.14834477007389069, "learning_rate": 6.330568608782941e-06, "loss": 0.5045, "step": 2005 }, { "epoch": 0.43, "grad_norm": 0.13984233140945435, "learning_rate": 6.327204988804583e-06, "loss": 0.5398, "step": 2006 }, { "epoch": 0.43, "grad_norm": 0.13225583732128143, "learning_rate": 6.323840722537243e-06, "loss": 0.5065, "step": 2007 }, { "epoch": 0.43, "grad_norm": 0.16569088399410248, "learning_rate": 6.320475811619167e-06, "loss": 0.529, "step": 2008 }, { "epoch": 0.43, "grad_norm": 0.20376458764076233, "learning_rate": 6.317110257688917e-06, "loss": 0.47, "step": 2009 }, { "epoch": 0.43, "grad_norm": 0.20211917161941528, "learning_rate": 6.313744062385363e-06, "loss": 0.5044, "step": 2010 }, { "epoch": 0.43, "grad_norm": 0.1894192099571228, "learning_rate": 6.31037722734769e-06, "loss": 0.574, "step": 2011 }, { "epoch": 0.43, "grad_norm": 0.14667464792728424, "learning_rate": 6.307009754215397e-06, "loss": 0.5502, "step": 2012 }, { "epoch": 0.43, "grad_norm": 0.17428962886333466, "learning_rate": 6.303641644628291e-06, "loss": 0.5423, "step": 2013 }, { "epoch": 0.43, "grad_norm": 0.1584947109222412, "learning_rate": 6.300272900226491e-06, "loss": 0.4784, "step": 2014 }, { "epoch": 0.43, "grad_norm": 0.14651672542095184, "learning_rate": 6.296903522650419e-06, "loss": 0.4896, "step": 2015 }, { "epoch": 0.43, "grad_norm": 0.13722088932991028, "learning_rate": 6.2935335135408135e-06, "loss": 0.4324, "step": 2016 }, { "epoch": 0.43, "grad_norm": 0.16364432871341705, "learning_rate": 6.290162874538718e-06, "loss": 0.5051, "step": 2017 }, { "epoch": 0.43, "grad_norm": 0.17197778820991516, "learning_rate": 6.286791607285478e-06, "loss": 0.4707, "step": 2018 }, { "epoch": 0.43, "grad_norm": 0.19821661710739136, "learning_rate": 6.283419713422754e-06, "loss": 0.5365, "step": 2019 }, { "epoch": 0.44, "grad_norm": 0.18750454485416412, "learning_rate": 6.2800471945925e-06, "loss": 0.5813, "step": 2020 }, { "epoch": 0.44, "grad_norm": 0.15011686086654663, "learning_rate": 6.276674052436984e-06, "loss": 0.4686, "step": 2021 }, { "epoch": 0.44, "grad_norm": 0.14810575544834137, "learning_rate": 6.2733002885987734e-06, "loss": 0.5666, "step": 2022 }, { "epoch": 0.44, "grad_norm": 0.15707622468471527, "learning_rate": 6.26992590472074e-06, "loss": 0.4939, "step": 2023 }, { "epoch": 0.44, "grad_norm": 0.16260173916816711, "learning_rate": 6.2665509024460554e-06, "loss": 0.5063, "step": 2024 }, { "epoch": 0.44, "grad_norm": 0.14994855225086212, "learning_rate": 6.263175283418196e-06, "loss": 0.4813, "step": 2025 }, { "epoch": 0.44, "grad_norm": 0.16885532438755035, "learning_rate": 6.259799049280932e-06, "loss": 0.5239, "step": 2026 }, { "epoch": 0.44, "grad_norm": 0.1944415271282196, "learning_rate": 6.256422201678341e-06, "loss": 0.4999, "step": 2027 }, { "epoch": 0.44, "grad_norm": 0.16358405351638794, "learning_rate": 6.253044742254791e-06, "loss": 0.532, "step": 2028 }, { "epoch": 0.44, "grad_norm": 0.184137225151062, "learning_rate": 6.249666672654958e-06, "loss": 0.4797, "step": 2029 }, { "epoch": 0.44, "grad_norm": 0.18166375160217285, "learning_rate": 6.246287994523805e-06, "loss": 0.5129, "step": 2030 }, { "epoch": 0.44, "grad_norm": 0.13478122651576996, "learning_rate": 6.242908709506599e-06, "loss": 0.4996, "step": 2031 }, { "epoch": 0.44, "grad_norm": 0.1508375108242035, "learning_rate": 6.239528819248898e-06, "loss": 0.4822, "step": 2032 }, { "epoch": 0.44, "grad_norm": 0.14239796996116638, "learning_rate": 6.236148325396555e-06, "loss": 0.5381, "step": 2033 }, { "epoch": 0.44, "grad_norm": 0.13590578734874725, "learning_rate": 6.232767229595719e-06, "loss": 0.5076, "step": 2034 }, { "epoch": 0.44, "grad_norm": 0.1495681256055832, "learning_rate": 6.229385533492833e-06, "loss": 0.5012, "step": 2035 }, { "epoch": 0.44, "grad_norm": 0.11667856574058533, "learning_rate": 6.226003238734628e-06, "loss": 0.5408, "step": 2036 }, { "epoch": 0.44, "grad_norm": 0.12598071992397308, "learning_rate": 6.222620346968131e-06, "loss": 0.4822, "step": 2037 }, { "epoch": 0.44, "grad_norm": 0.18622703850269318, "learning_rate": 6.219236859840656e-06, "loss": 0.5583, "step": 2038 }, { "epoch": 0.44, "grad_norm": 0.15623895823955536, "learning_rate": 6.21585277899981e-06, "loss": 0.5272, "step": 2039 }, { "epoch": 0.44, "grad_norm": 0.15245303511619568, "learning_rate": 6.2124681060934866e-06, "loss": 0.5504, "step": 2040 }, { "epoch": 0.44, "grad_norm": 0.2059142142534256, "learning_rate": 6.2090828427698706e-06, "loss": 0.5196, "step": 2041 }, { "epoch": 0.44, "grad_norm": 0.14754840731620789, "learning_rate": 6.205696990677431e-06, "loss": 0.5198, "step": 2042 }, { "epoch": 0.44, "grad_norm": 0.14195892214775085, "learning_rate": 6.202310551464924e-06, "loss": 0.523, "step": 2043 }, { "epoch": 0.44, "grad_norm": 0.17063148319721222, "learning_rate": 6.1989235267813964e-06, "loss": 0.5115, "step": 2044 }, { "epoch": 0.44, "grad_norm": 0.1315128356218338, "learning_rate": 6.1955359182761745e-06, "loss": 0.5535, "step": 2045 }, { "epoch": 0.44, "grad_norm": 0.26819273829460144, "learning_rate": 6.192147727598869e-06, "loss": 0.4942, "step": 2046 }, { "epoch": 0.44, "grad_norm": 0.15203434228897095, "learning_rate": 6.188758956399379e-06, "loss": 0.5349, "step": 2047 }, { "epoch": 0.44, "grad_norm": 0.17396771907806396, "learning_rate": 6.185369606327882e-06, "loss": 0.5134, "step": 2048 }, { "epoch": 0.44, "grad_norm": 0.14054559171199799, "learning_rate": 6.1819796790348376e-06, "loss": 0.5346, "step": 2049 }, { "epoch": 0.44, "grad_norm": 0.13480958342552185, "learning_rate": 6.178589176170991e-06, "loss": 0.4995, "step": 2050 }, { "epoch": 0.44, "grad_norm": 0.15606021881103516, "learning_rate": 6.175198099387361e-06, "loss": 0.5519, "step": 2051 }, { "epoch": 0.44, "grad_norm": 0.14711807668209076, "learning_rate": 6.171806450335248e-06, "loss": 0.5303, "step": 2052 }, { "epoch": 0.44, "grad_norm": 0.18359160423278809, "learning_rate": 6.1684142306662366e-06, "loss": 0.5784, "step": 2053 }, { "epoch": 0.44, "grad_norm": 0.15108604729175568, "learning_rate": 6.16502144203218e-06, "loss": 0.5499, "step": 2054 }, { "epoch": 0.44, "grad_norm": 0.12765131890773773, "learning_rate": 6.161628086085218e-06, "loss": 0.5531, "step": 2055 }, { "epoch": 0.44, "grad_norm": 0.18855132162570953, "learning_rate": 6.1582341644777575e-06, "loss": 0.5236, "step": 2056 }, { "epoch": 0.44, "grad_norm": 0.14612235128879547, "learning_rate": 6.15483967886249e-06, "loss": 0.5035, "step": 2057 }, { "epoch": 0.44, "grad_norm": 0.1928872913122177, "learning_rate": 6.151444630892372e-06, "loss": 0.541, "step": 2058 }, { "epoch": 0.44, "grad_norm": 0.16574794054031372, "learning_rate": 6.1480490222206415e-06, "loss": 0.5139, "step": 2059 }, { "epoch": 0.44, "grad_norm": 0.17566706240177155, "learning_rate": 6.144652854500806e-06, "loss": 0.4495, "step": 2060 }, { "epoch": 0.44, "grad_norm": 0.17141076922416687, "learning_rate": 6.1412561293866455e-06, "loss": 0.5434, "step": 2061 }, { "epoch": 0.44, "grad_norm": 0.16970355808734894, "learning_rate": 6.1378588485322145e-06, "loss": 0.5635, "step": 2062 }, { "epoch": 0.44, "grad_norm": 0.20742008090019226, "learning_rate": 6.134461013591832e-06, "loss": 0.5435, "step": 2063 }, { "epoch": 0.44, "grad_norm": 0.1773451417684555, "learning_rate": 6.131062626220094e-06, "loss": 0.5539, "step": 2064 }, { "epoch": 0.44, "grad_norm": 0.18251217901706696, "learning_rate": 6.127663688071859e-06, "loss": 0.5046, "step": 2065 }, { "epoch": 0.45, "grad_norm": 0.19838100671768188, "learning_rate": 6.124264200802259e-06, "loss": 0.4714, "step": 2066 }, { "epoch": 0.45, "grad_norm": 0.154763326048851, "learning_rate": 6.120864166066689e-06, "loss": 0.528, "step": 2067 }, { "epoch": 0.45, "grad_norm": 0.19701255857944489, "learning_rate": 6.117463585520813e-06, "loss": 0.5295, "step": 2068 }, { "epoch": 0.45, "grad_norm": 0.17150332033634186, "learning_rate": 6.1140624608205626e-06, "loss": 0.4896, "step": 2069 }, { "epoch": 0.45, "grad_norm": 0.1474120020866394, "learning_rate": 6.110660793622127e-06, "loss": 0.5046, "step": 2070 }, { "epoch": 0.45, "grad_norm": 0.18776945769786835, "learning_rate": 6.10725858558197e-06, "loss": 0.5263, "step": 2071 }, { "epoch": 0.45, "grad_norm": 0.14684580266475677, "learning_rate": 6.103855838356813e-06, "loss": 0.5539, "step": 2072 }, { "epoch": 0.45, "grad_norm": 0.12644240260124207, "learning_rate": 6.100452553603638e-06, "loss": 0.5047, "step": 2073 }, { "epoch": 0.45, "grad_norm": 0.18356040120124817, "learning_rate": 6.097048732979691e-06, "loss": 0.5408, "step": 2074 }, { "epoch": 0.45, "grad_norm": 0.13573047518730164, "learning_rate": 6.093644378142481e-06, "loss": 0.5369, "step": 2075 }, { "epoch": 0.45, "grad_norm": 0.1704436093568802, "learning_rate": 6.090239490749775e-06, "loss": 0.4905, "step": 2076 }, { "epoch": 0.45, "grad_norm": 0.1508268564939499, "learning_rate": 6.086834072459599e-06, "loss": 0.5288, "step": 2077 }, { "epoch": 0.45, "grad_norm": 0.17939120531082153, "learning_rate": 6.083428124930239e-06, "loss": 0.5089, "step": 2078 }, { "epoch": 0.45, "grad_norm": 0.1567559689283371, "learning_rate": 6.080021649820238e-06, "loss": 0.4933, "step": 2079 }, { "epoch": 0.45, "grad_norm": 0.1430431753396988, "learning_rate": 6.076614648788392e-06, "loss": 0.5396, "step": 2080 }, { "epoch": 0.45, "grad_norm": 0.15456099808216095, "learning_rate": 6.073207123493763e-06, "loss": 0.4786, "step": 2081 }, { "epoch": 0.45, "grad_norm": 0.17080536484718323, "learning_rate": 6.069799075595658e-06, "loss": 0.5233, "step": 2082 }, { "epoch": 0.45, "grad_norm": 0.13564909994602203, "learning_rate": 6.066390506753644e-06, "loss": 0.5682, "step": 2083 }, { "epoch": 0.45, "grad_norm": 0.15913358330726624, "learning_rate": 6.062981418627539e-06, "loss": 0.5222, "step": 2084 }, { "epoch": 0.45, "grad_norm": 0.16424204409122467, "learning_rate": 6.059571812877419e-06, "loss": 0.5062, "step": 2085 }, { "epoch": 0.45, "grad_norm": 0.16678033769130707, "learning_rate": 6.0561616911636025e-06, "loss": 0.5138, "step": 2086 }, { "epoch": 0.45, "grad_norm": 0.15992575883865356, "learning_rate": 6.052751055146669e-06, "loss": 0.5199, "step": 2087 }, { "epoch": 0.45, "grad_norm": 0.18692535161972046, "learning_rate": 6.049339906487443e-06, "loss": 0.5434, "step": 2088 }, { "epoch": 0.45, "grad_norm": 0.13587631285190582, "learning_rate": 6.045928246847003e-06, "loss": 0.5013, "step": 2089 }, { "epoch": 0.45, "grad_norm": 0.20116516947746277, "learning_rate": 6.042516077886669e-06, "loss": 0.5329, "step": 2090 }, { "epoch": 0.45, "grad_norm": 0.13471555709838867, "learning_rate": 6.039103401268016e-06, "loss": 0.4862, "step": 2091 }, { "epoch": 0.45, "grad_norm": 0.15407685935497284, "learning_rate": 6.035690218652861e-06, "loss": 0.6036, "step": 2092 }, { "epoch": 0.45, "grad_norm": 0.14876054227352142, "learning_rate": 6.032276531703274e-06, "loss": 0.4963, "step": 2093 }, { "epoch": 0.45, "grad_norm": 0.16624298691749573, "learning_rate": 6.028862342081564e-06, "loss": 0.5164, "step": 2094 }, { "epoch": 0.45, "grad_norm": 0.15883252024650574, "learning_rate": 6.025447651450289e-06, "loss": 0.5082, "step": 2095 }, { "epoch": 0.45, "grad_norm": 0.1502091884613037, "learning_rate": 6.022032461472247e-06, "loss": 0.5722, "step": 2096 }, { "epoch": 0.45, "grad_norm": 0.1553240269422531, "learning_rate": 6.018616773810483e-06, "loss": 0.5173, "step": 2097 }, { "epoch": 0.45, "grad_norm": 0.15653330087661743, "learning_rate": 6.015200590128284e-06, "loss": 0.5355, "step": 2098 }, { "epoch": 0.45, "grad_norm": 0.1457417756319046, "learning_rate": 6.011783912089174e-06, "loss": 0.5205, "step": 2099 }, { "epoch": 0.45, "grad_norm": 0.13138940930366516, "learning_rate": 6.008366741356926e-06, "loss": 0.5424, "step": 2100 }, { "epoch": 0.45, "grad_norm": 0.15823757648468018, "learning_rate": 6.004949079595544e-06, "loss": 0.5272, "step": 2101 }, { "epoch": 0.45, "grad_norm": 0.17084883153438568, "learning_rate": 6.001530928469277e-06, "loss": 0.5291, "step": 2102 }, { "epoch": 0.45, "grad_norm": 0.14622004330158234, "learning_rate": 5.998112289642608e-06, "loss": 0.498, "step": 2103 }, { "epoch": 0.45, "grad_norm": 0.1439567655324936, "learning_rate": 5.9946931647802645e-06, "loss": 0.5381, "step": 2104 }, { "epoch": 0.45, "grad_norm": 0.23978291451931, "learning_rate": 5.9912735555472015e-06, "loss": 0.5141, "step": 2105 }, { "epoch": 0.45, "grad_norm": 0.14025025069713593, "learning_rate": 5.987853463608618e-06, "loss": 0.4712, "step": 2106 }, { "epoch": 0.45, "grad_norm": 0.16210734844207764, "learning_rate": 5.984432890629943e-06, "loss": 0.5103, "step": 2107 }, { "epoch": 0.45, "grad_norm": 0.17586356401443481, "learning_rate": 5.981011838276841e-06, "loss": 0.5507, "step": 2108 }, { "epoch": 0.45, "grad_norm": 0.1554114818572998, "learning_rate": 5.977590308215211e-06, "loss": 0.5375, "step": 2109 }, { "epoch": 0.45, "grad_norm": 0.14625568687915802, "learning_rate": 5.974168302111183e-06, "loss": 0.5195, "step": 2110 }, { "epoch": 0.45, "grad_norm": 0.1564107984304428, "learning_rate": 5.970745821631121e-06, "loss": 0.5006, "step": 2111 }, { "epoch": 0.45, "grad_norm": 0.1529904454946518, "learning_rate": 5.967322868441616e-06, "loss": 0.5455, "step": 2112 }, { "epoch": 0.46, "grad_norm": 0.16919173300266266, "learning_rate": 5.963899444209496e-06, "loss": 0.5323, "step": 2113 }, { "epoch": 0.46, "grad_norm": 0.2237899899482727, "learning_rate": 5.9604755506018105e-06, "loss": 0.5153, "step": 2114 }, { "epoch": 0.46, "grad_norm": 0.17237022519111633, "learning_rate": 5.957051189285843e-06, "loss": 0.5237, "step": 2115 }, { "epoch": 0.46, "grad_norm": 0.18111760914325714, "learning_rate": 5.953626361929102e-06, "loss": 0.492, "step": 2116 }, { "epoch": 0.46, "grad_norm": 0.13480786979198456, "learning_rate": 5.950201070199326e-06, "loss": 0.4827, "step": 2117 }, { "epoch": 0.46, "grad_norm": 0.17693190276622772, "learning_rate": 5.946775315764476e-06, "loss": 0.5592, "step": 2118 }, { "epoch": 0.46, "grad_norm": 0.13885067403316498, "learning_rate": 5.943349100292739e-06, "loss": 0.497, "step": 2119 }, { "epoch": 0.46, "grad_norm": 0.1679374873638153, "learning_rate": 5.939922425452531e-06, "loss": 0.5045, "step": 2120 }, { "epoch": 0.46, "grad_norm": 0.1675940304994583, "learning_rate": 5.936495292912483e-06, "loss": 0.5518, "step": 2121 }, { "epoch": 0.46, "grad_norm": 0.16924212872982025, "learning_rate": 5.93306770434146e-06, "loss": 0.5481, "step": 2122 }, { "epoch": 0.46, "grad_norm": 0.21032755076885223, "learning_rate": 5.929639661408538e-06, "loss": 0.4816, "step": 2123 }, { "epoch": 0.46, "grad_norm": 0.11854084581136703, "learning_rate": 5.926211165783021e-06, "loss": 0.5009, "step": 2124 }, { "epoch": 0.46, "grad_norm": 0.13082769513130188, "learning_rate": 5.922782219134433e-06, "loss": 0.4822, "step": 2125 }, { "epoch": 0.46, "grad_norm": 0.1662750393152237, "learning_rate": 5.919352823132515e-06, "loss": 0.5262, "step": 2126 }, { "epoch": 0.46, "grad_norm": 0.1488747000694275, "learning_rate": 5.915922979447228e-06, "loss": 0.5553, "step": 2127 }, { "epoch": 0.46, "grad_norm": 0.1871393918991089, "learning_rate": 5.912492689748753e-06, "loss": 0.4965, "step": 2128 }, { "epoch": 0.46, "grad_norm": 0.18025460839271545, "learning_rate": 5.909061955707486e-06, "loss": 0.531, "step": 2129 }, { "epoch": 0.46, "grad_norm": 0.1580578088760376, "learning_rate": 5.905630778994036e-06, "loss": 0.5089, "step": 2130 }, { "epoch": 0.46, "grad_norm": 0.16995598375797272, "learning_rate": 5.902199161279236e-06, "loss": 0.5137, "step": 2131 }, { "epoch": 0.46, "grad_norm": 0.14344586431980133, "learning_rate": 5.898767104234128e-06, "loss": 0.5051, "step": 2132 }, { "epoch": 0.46, "grad_norm": 0.1728695183992386, "learning_rate": 5.895334609529967e-06, "loss": 0.509, "step": 2133 }, { "epoch": 0.46, "grad_norm": 0.13887768983840942, "learning_rate": 5.891901678838227e-06, "loss": 0.4838, "step": 2134 }, { "epoch": 0.46, "grad_norm": 0.18018049001693726, "learning_rate": 5.8884683138305854e-06, "loss": 0.5273, "step": 2135 }, { "epoch": 0.46, "grad_norm": 0.15605993568897247, "learning_rate": 5.88503451617894e-06, "loss": 0.4847, "step": 2136 }, { "epoch": 0.46, "grad_norm": 0.14139895141124725, "learning_rate": 5.881600287555393e-06, "loss": 0.4769, "step": 2137 }, { "epoch": 0.46, "grad_norm": 0.15375615656375885, "learning_rate": 5.878165629632262e-06, "loss": 0.5479, "step": 2138 }, { "epoch": 0.46, "grad_norm": 0.16424569487571716, "learning_rate": 5.874730544082069e-06, "loss": 0.5337, "step": 2139 }, { "epoch": 0.46, "grad_norm": 0.20334842801094055, "learning_rate": 5.8712950325775416e-06, "loss": 0.5627, "step": 2140 }, { "epoch": 0.46, "grad_norm": 0.13510531187057495, "learning_rate": 5.867859096791626e-06, "loss": 0.4906, "step": 2141 }, { "epoch": 0.46, "grad_norm": 0.158345028758049, "learning_rate": 5.864422738397465e-06, "loss": 0.5136, "step": 2142 }, { "epoch": 0.46, "grad_norm": 0.1618645340204239, "learning_rate": 5.860985959068408e-06, "loss": 0.4867, "step": 2143 }, { "epoch": 0.46, "grad_norm": 0.1342993676662445, "learning_rate": 5.857548760478015e-06, "loss": 0.5011, "step": 2144 }, { "epoch": 0.46, "grad_norm": 0.14608271420001984, "learning_rate": 5.8541111443000455e-06, "loss": 0.4916, "step": 2145 }, { "epoch": 0.46, "grad_norm": 0.1352057158946991, "learning_rate": 5.85067311220846e-06, "loss": 0.5195, "step": 2146 }, { "epoch": 0.46, "grad_norm": 0.1447547972202301, "learning_rate": 5.847234665877432e-06, "loss": 0.4918, "step": 2147 }, { "epoch": 0.46, "grad_norm": 0.17679902911186218, "learning_rate": 5.843795806981325e-06, "loss": 0.5345, "step": 2148 }, { "epoch": 0.46, "grad_norm": 0.1902516484260559, "learning_rate": 5.840356537194708e-06, "loss": 0.5343, "step": 2149 }, { "epoch": 0.46, "grad_norm": 0.19225680828094482, "learning_rate": 5.836916858192353e-06, "loss": 0.4972, "step": 2150 }, { "epoch": 0.46, "grad_norm": 0.17341876029968262, "learning_rate": 5.833476771649227e-06, "loss": 0.5002, "step": 2151 }, { "epoch": 0.46, "grad_norm": 0.149870827794075, "learning_rate": 5.830036279240497e-06, "loss": 0.5484, "step": 2152 }, { "epoch": 0.46, "grad_norm": 0.1548566222190857, "learning_rate": 5.826595382641529e-06, "loss": 0.5553, "step": 2153 }, { "epoch": 0.46, "grad_norm": 0.16744022071361542, "learning_rate": 5.823154083527884e-06, "loss": 0.5416, "step": 2154 }, { "epoch": 0.46, "grad_norm": 0.18139050900936127, "learning_rate": 5.819712383575316e-06, "loss": 0.5225, "step": 2155 }, { "epoch": 0.46, "grad_norm": 0.16486258804798126, "learning_rate": 5.816270284459783e-06, "loss": 0.4938, "step": 2156 }, { "epoch": 0.46, "grad_norm": 0.15385212004184723, "learning_rate": 5.812827787857428e-06, "loss": 0.562, "step": 2157 }, { "epoch": 0.46, "grad_norm": 0.17840281128883362, "learning_rate": 5.809384895444594e-06, "loss": 0.487, "step": 2158 }, { "epoch": 0.47, "grad_norm": 0.16368557512760162, "learning_rate": 5.805941608897814e-06, "loss": 0.4991, "step": 2159 }, { "epoch": 0.47, "grad_norm": 0.22969526052474976, "learning_rate": 5.802497929893813e-06, "loss": 0.4751, "step": 2160 }, { "epoch": 0.47, "grad_norm": 0.21182815730571747, "learning_rate": 5.799053860109506e-06, "loss": 0.5603, "step": 2161 }, { "epoch": 0.47, "grad_norm": 0.16508375108242035, "learning_rate": 5.795609401222001e-06, "loss": 0.5308, "step": 2162 }, { "epoch": 0.47, "grad_norm": 0.3659750521183014, "learning_rate": 5.7921645549085955e-06, "loss": 0.5229, "step": 2163 }, { "epoch": 0.47, "grad_norm": 0.15634752810001373, "learning_rate": 5.7887193228467735e-06, "loss": 0.5594, "step": 2164 }, { "epoch": 0.47, "grad_norm": 0.15100319683551788, "learning_rate": 5.785273706714205e-06, "loss": 0.5619, "step": 2165 }, { "epoch": 0.47, "grad_norm": 0.13537266850471497, "learning_rate": 5.781827708188753e-06, "loss": 0.5224, "step": 2166 }, { "epoch": 0.47, "grad_norm": 0.16945107281208038, "learning_rate": 5.778381328948461e-06, "loss": 0.513, "step": 2167 }, { "epoch": 0.47, "grad_norm": 0.1476183384656906, "learning_rate": 5.774934570671562e-06, "loss": 0.5124, "step": 2168 }, { "epoch": 0.47, "grad_norm": 0.156847283244133, "learning_rate": 5.771487435036472e-06, "loss": 0.5185, "step": 2169 }, { "epoch": 0.47, "grad_norm": 0.24519124627113342, "learning_rate": 5.768039923721791e-06, "loss": 0.5001, "step": 2170 }, { "epoch": 0.47, "grad_norm": 0.19340813159942627, "learning_rate": 5.764592038406298e-06, "loss": 0.528, "step": 2171 }, { "epoch": 0.47, "grad_norm": 0.16022874414920807, "learning_rate": 5.761143780768962e-06, "loss": 0.4961, "step": 2172 }, { "epoch": 0.47, "grad_norm": 0.18600255250930786, "learning_rate": 5.7576951524889245e-06, "loss": 0.4908, "step": 2173 }, { "epoch": 0.47, "grad_norm": 0.11501923948526382, "learning_rate": 5.7542461552455165e-06, "loss": 0.5403, "step": 2174 }, { "epoch": 0.47, "grad_norm": 0.14986415207386017, "learning_rate": 5.750796790718243e-06, "loss": 0.5027, "step": 2175 }, { "epoch": 0.47, "grad_norm": 0.13095037639141083, "learning_rate": 5.747347060586787e-06, "loss": 0.5339, "step": 2176 }, { "epoch": 0.47, "grad_norm": 0.12488746643066406, "learning_rate": 5.743896966531012e-06, "loss": 0.5256, "step": 2177 }, { "epoch": 0.47, "grad_norm": 0.1328728049993515, "learning_rate": 5.740446510230959e-06, "loss": 0.429, "step": 2178 }, { "epoch": 0.47, "grad_norm": 0.13304339349269867, "learning_rate": 5.736995693366847e-06, "loss": 0.4621, "step": 2179 }, { "epoch": 0.47, "grad_norm": 0.22455641627311707, "learning_rate": 5.733544517619064e-06, "loss": 0.5157, "step": 2180 }, { "epoch": 0.47, "grad_norm": 0.13997776806354523, "learning_rate": 5.730092984668179e-06, "loss": 0.4909, "step": 2181 }, { "epoch": 0.47, "grad_norm": 0.1835583746433258, "learning_rate": 5.726641096194932e-06, "loss": 0.4697, "step": 2182 }, { "epoch": 0.47, "grad_norm": 0.1669677495956421, "learning_rate": 5.723188853880238e-06, "loss": 0.5484, "step": 2183 }, { "epoch": 0.47, "grad_norm": 0.1625543087720871, "learning_rate": 5.719736259405182e-06, "loss": 0.4743, "step": 2184 }, { "epoch": 0.47, "grad_norm": 0.15123441815376282, "learning_rate": 5.716283314451026e-06, "loss": 0.482, "step": 2185 }, { "epoch": 0.47, "grad_norm": 0.16270317137241364, "learning_rate": 5.7128300206991925e-06, "loss": 0.4675, "step": 2186 }, { "epoch": 0.47, "grad_norm": 0.1661555916070938, "learning_rate": 5.709376379831283e-06, "loss": 0.5076, "step": 2187 }, { "epoch": 0.47, "grad_norm": 0.16409648954868317, "learning_rate": 5.705922393529065e-06, "loss": 0.5271, "step": 2188 }, { "epoch": 0.47, "grad_norm": 0.14545123279094696, "learning_rate": 5.702468063474473e-06, "loss": 0.4966, "step": 2189 }, { "epoch": 0.47, "grad_norm": 0.22827212512493134, "learning_rate": 5.69901339134961e-06, "loss": 0.4808, "step": 2190 }, { "epoch": 0.47, "grad_norm": 0.1843656748533249, "learning_rate": 5.695558378836749e-06, "loss": 0.505, "step": 2191 }, { "epoch": 0.47, "grad_norm": 0.19031104445457458, "learning_rate": 5.692103027618321e-06, "loss": 0.5571, "step": 2192 }, { "epoch": 0.47, "grad_norm": 0.16894584894180298, "learning_rate": 5.688647339376926e-06, "loss": 0.5266, "step": 2193 }, { "epoch": 0.47, "grad_norm": 0.14823244512081146, "learning_rate": 5.685191315795331e-06, "loss": 0.5572, "step": 2194 }, { "epoch": 0.47, "grad_norm": 0.13419359922409058, "learning_rate": 5.681734958556463e-06, "loss": 0.5163, "step": 2195 }, { "epoch": 0.47, "grad_norm": 0.18760497868061066, "learning_rate": 5.678278269343411e-06, "loss": 0.5218, "step": 2196 }, { "epoch": 0.47, "grad_norm": 0.1401587277650833, "learning_rate": 5.674821249839428e-06, "loss": 0.4821, "step": 2197 }, { "epoch": 0.47, "grad_norm": 0.15496966242790222, "learning_rate": 5.671363901727927e-06, "loss": 0.504, "step": 2198 }, { "epoch": 0.47, "grad_norm": 0.17478565871715546, "learning_rate": 5.667906226692479e-06, "loss": 0.5252, "step": 2199 }, { "epoch": 0.47, "grad_norm": 0.14033323526382446, "learning_rate": 5.664448226416815e-06, "loss": 0.5534, "step": 2200 }, { "epoch": 0.47, "grad_norm": 0.23815791308879852, "learning_rate": 5.660989902584829e-06, "loss": 0.5357, "step": 2201 }, { "epoch": 0.47, "grad_norm": 0.16176384687423706, "learning_rate": 5.657531256880565e-06, "loss": 0.5378, "step": 2202 }, { "epoch": 0.47, "grad_norm": 0.20444779098033905, "learning_rate": 5.654072290988231e-06, "loss": 0.5905, "step": 2203 }, { "epoch": 0.47, "grad_norm": 0.14830709993839264, "learning_rate": 5.650613006592185e-06, "loss": 0.5192, "step": 2204 }, { "epoch": 0.47, "grad_norm": 0.2211901992559433, "learning_rate": 5.647153405376942e-06, "loss": 0.564, "step": 2205 }, { "epoch": 0.48, "grad_norm": 0.15610624849796295, "learning_rate": 5.643693489027172e-06, "loss": 0.49, "step": 2206 }, { "epoch": 0.48, "grad_norm": 0.13824397325515747, "learning_rate": 5.6402332592277e-06, "loss": 0.519, "step": 2207 }, { "epoch": 0.48, "grad_norm": 0.18318380415439606, "learning_rate": 5.636772717663501e-06, "loss": 0.5294, "step": 2208 }, { "epoch": 0.48, "grad_norm": 0.20423349738121033, "learning_rate": 5.633311866019703e-06, "loss": 0.5128, "step": 2209 }, { "epoch": 0.48, "grad_norm": 0.14289386570453644, "learning_rate": 5.629850705981584e-06, "loss": 0.5008, "step": 2210 }, { "epoch": 0.48, "grad_norm": 0.17370502650737762, "learning_rate": 5.626389239234572e-06, "loss": 0.5657, "step": 2211 }, { "epoch": 0.48, "grad_norm": 0.1700432002544403, "learning_rate": 5.622927467464247e-06, "loss": 0.5137, "step": 2212 }, { "epoch": 0.48, "grad_norm": 0.1566981077194214, "learning_rate": 5.619465392356335e-06, "loss": 0.5698, "step": 2213 }, { "epoch": 0.48, "grad_norm": 0.166670560836792, "learning_rate": 5.6160030155967116e-06, "loss": 0.5272, "step": 2214 }, { "epoch": 0.48, "grad_norm": 0.14587420225143433, "learning_rate": 5.612540338871395e-06, "loss": 0.5049, "step": 2215 }, { "epoch": 0.48, "grad_norm": 0.14537444710731506, "learning_rate": 5.609077363866555e-06, "loss": 0.523, "step": 2216 }, { "epoch": 0.48, "grad_norm": 0.15122370421886444, "learning_rate": 5.605614092268506e-06, "loss": 0.5304, "step": 2217 }, { "epoch": 0.48, "grad_norm": 0.11322161555290222, "learning_rate": 5.602150525763701e-06, "loss": 0.5269, "step": 2218 }, { "epoch": 0.48, "grad_norm": 0.1510639786720276, "learning_rate": 5.598686666038745e-06, "loss": 0.5668, "step": 2219 }, { "epoch": 0.48, "grad_norm": 0.16219152510166168, "learning_rate": 5.595222514780379e-06, "loss": 0.5016, "step": 2220 }, { "epoch": 0.48, "grad_norm": 0.14243803918361664, "learning_rate": 5.591758073675485e-06, "loss": 0.5398, "step": 2221 }, { "epoch": 0.48, "grad_norm": 0.16937606036663055, "learning_rate": 5.588293344411097e-06, "loss": 0.5621, "step": 2222 }, { "epoch": 0.48, "grad_norm": 0.15524210035800934, "learning_rate": 5.5848283286743786e-06, "loss": 0.5695, "step": 2223 }, { "epoch": 0.48, "grad_norm": 0.1765149086713791, "learning_rate": 5.581363028152633e-06, "loss": 0.5126, "step": 2224 }, { "epoch": 0.48, "grad_norm": 0.2328573763370514, "learning_rate": 5.5778974445333115e-06, "loss": 0.5701, "step": 2225 }, { "epoch": 0.48, "grad_norm": 0.1344151794910431, "learning_rate": 5.574431579503991e-06, "loss": 0.5512, "step": 2226 }, { "epoch": 0.48, "grad_norm": 0.14871002733707428, "learning_rate": 5.570965434752396e-06, "loss": 0.5196, "step": 2227 }, { "epoch": 0.48, "grad_norm": 0.19491346180438995, "learning_rate": 5.5674990119663794e-06, "loss": 0.5809, "step": 2228 }, { "epoch": 0.48, "grad_norm": 0.15575414896011353, "learning_rate": 5.564032312833936e-06, "loss": 0.5395, "step": 2229 }, { "epoch": 0.48, "grad_norm": 0.25920212268829346, "learning_rate": 5.560565339043188e-06, "loss": 0.4677, "step": 2230 }, { "epoch": 0.48, "grad_norm": 0.1457945555448532, "learning_rate": 5.557098092282399e-06, "loss": 0.5326, "step": 2231 }, { "epoch": 0.48, "grad_norm": 0.13234636187553406, "learning_rate": 5.55363057423996e-06, "loss": 0.4859, "step": 2232 }, { "epoch": 0.48, "grad_norm": 0.146928608417511, "learning_rate": 5.550162786604397e-06, "loss": 0.5834, "step": 2233 }, { "epoch": 0.48, "grad_norm": 0.13184037804603577, "learning_rate": 5.546694731064364e-06, "loss": 0.5236, "step": 2234 }, { "epoch": 0.48, "grad_norm": 0.2852530777454376, "learning_rate": 5.5432264093086505e-06, "loss": 0.5034, "step": 2235 }, { "epoch": 0.48, "grad_norm": 0.15083038806915283, "learning_rate": 5.5397578230261715e-06, "loss": 0.5118, "step": 2236 }, { "epoch": 0.48, "grad_norm": 0.1430756151676178, "learning_rate": 5.536288973905971e-06, "loss": 0.5202, "step": 2237 }, { "epoch": 0.48, "grad_norm": 0.16797691583633423, "learning_rate": 5.532819863637223e-06, "loss": 0.5105, "step": 2238 }, { "epoch": 0.48, "grad_norm": 0.15367530286312103, "learning_rate": 5.529350493909229e-06, "loss": 0.5178, "step": 2239 }, { "epoch": 0.48, "grad_norm": 0.13238172233104706, "learning_rate": 5.525880866411414e-06, "loss": 0.5376, "step": 2240 }, { "epoch": 0.48, "grad_norm": 0.17009180784225464, "learning_rate": 5.522410982833331e-06, "loss": 0.5508, "step": 2241 }, { "epoch": 0.48, "grad_norm": 0.1846666783094406, "learning_rate": 5.5189408448646565e-06, "loss": 0.5625, "step": 2242 }, { "epoch": 0.48, "grad_norm": 0.18193793296813965, "learning_rate": 5.515470454195188e-06, "loss": 0.4663, "step": 2243 }, { "epoch": 0.48, "grad_norm": 0.15874691307544708, "learning_rate": 5.511999812514857e-06, "loss": 0.5035, "step": 2244 }, { "epoch": 0.48, "grad_norm": 0.17099712789058685, "learning_rate": 5.5085289215137035e-06, "loss": 0.5301, "step": 2245 }, { "epoch": 0.48, "grad_norm": 0.14446376264095306, "learning_rate": 5.505057782881896e-06, "loss": 0.4915, "step": 2246 }, { "epoch": 0.48, "grad_norm": 0.3006593883037567, "learning_rate": 5.501586398309724e-06, "loss": 0.5032, "step": 2247 }, { "epoch": 0.48, "grad_norm": 0.175115704536438, "learning_rate": 5.4981147694875924e-06, "loss": 0.5242, "step": 2248 }, { "epoch": 0.48, "grad_norm": 0.14558811485767365, "learning_rate": 5.494642898106029e-06, "loss": 0.4991, "step": 2249 }, { "epoch": 0.48, "grad_norm": 0.1611151546239853, "learning_rate": 5.491170785855681e-06, "loss": 0.5272, "step": 2250 }, { "epoch": 0.48, "grad_norm": 0.15863467752933502, "learning_rate": 5.4876984344273095e-06, "loss": 0.5034, "step": 2251 }, { "epoch": 0.49, "grad_norm": 0.1683708131313324, "learning_rate": 5.484225845511791e-06, "loss": 0.4884, "step": 2252 }, { "epoch": 0.49, "grad_norm": 0.1344245821237564, "learning_rate": 5.480753020800121e-06, "loss": 0.5165, "step": 2253 }, { "epoch": 0.49, "grad_norm": 0.1735605001449585, "learning_rate": 5.477279961983408e-06, "loss": 0.5519, "step": 2254 }, { "epoch": 0.49, "grad_norm": 0.14727462828159332, "learning_rate": 5.473806670752877e-06, "loss": 0.4778, "step": 2255 }, { "epoch": 0.49, "grad_norm": 0.1414579451084137, "learning_rate": 5.470333148799862e-06, "loss": 0.4707, "step": 2256 }, { "epoch": 0.49, "grad_norm": 0.1338963657617569, "learning_rate": 5.466859397815812e-06, "loss": 0.5236, "step": 2257 }, { "epoch": 0.49, "grad_norm": 0.1523580551147461, "learning_rate": 5.463385419492288e-06, "loss": 0.516, "step": 2258 }, { "epoch": 0.49, "grad_norm": 0.17260035872459412, "learning_rate": 5.459911215520959e-06, "loss": 0.5188, "step": 2259 }, { "epoch": 0.49, "grad_norm": 0.19136221706867218, "learning_rate": 5.456436787593609e-06, "loss": 0.4909, "step": 2260 }, { "epoch": 0.49, "grad_norm": 0.17576466500759125, "learning_rate": 5.452962137402125e-06, "loss": 0.5374, "step": 2261 }, { "epoch": 0.49, "grad_norm": 0.18410582840442657, "learning_rate": 5.449487266638504e-06, "loss": 0.5541, "step": 2262 }, { "epoch": 0.49, "grad_norm": 0.15502192080020905, "learning_rate": 5.446012176994854e-06, "loss": 0.5411, "step": 2263 }, { "epoch": 0.49, "grad_norm": 0.21357733011245728, "learning_rate": 5.442536870163386e-06, "loss": 0.5284, "step": 2264 }, { "epoch": 0.49, "grad_norm": 0.15364959836006165, "learning_rate": 5.439061347836416e-06, "loss": 0.4631, "step": 2265 }, { "epoch": 0.49, "grad_norm": 0.14856620132923126, "learning_rate": 5.43558561170637e-06, "loss": 0.5164, "step": 2266 }, { "epoch": 0.49, "grad_norm": 0.13780789077281952, "learning_rate": 5.432109663465773e-06, "loss": 0.5108, "step": 2267 }, { "epoch": 0.49, "grad_norm": 0.13712283968925476, "learning_rate": 5.428633504807253e-06, "loss": 0.4914, "step": 2268 }, { "epoch": 0.49, "grad_norm": 0.1509259045124054, "learning_rate": 5.425157137423548e-06, "loss": 0.5178, "step": 2269 }, { "epoch": 0.49, "grad_norm": 0.16157595813274384, "learning_rate": 5.421680563007486e-06, "loss": 0.5337, "step": 2270 }, { "epoch": 0.49, "grad_norm": 0.17313942313194275, "learning_rate": 5.418203783252005e-06, "loss": 0.512, "step": 2271 }, { "epoch": 0.49, "grad_norm": 0.1417136937379837, "learning_rate": 5.414726799850141e-06, "loss": 0.5123, "step": 2272 }, { "epoch": 0.49, "grad_norm": 0.15452702343463898, "learning_rate": 5.411249614495027e-06, "loss": 0.5249, "step": 2273 }, { "epoch": 0.49, "grad_norm": 0.17498227953910828, "learning_rate": 5.407772228879894e-06, "loss": 0.5008, "step": 2274 }, { "epoch": 0.49, "grad_norm": 0.2232121229171753, "learning_rate": 5.404294644698073e-06, "loss": 0.5113, "step": 2275 }, { "epoch": 0.49, "grad_norm": 0.11952576041221619, "learning_rate": 5.400816863642991e-06, "loss": 0.5147, "step": 2276 }, { "epoch": 0.49, "grad_norm": 0.15340656042099, "learning_rate": 5.397338887408171e-06, "loss": 0.478, "step": 2277 }, { "epoch": 0.49, "grad_norm": 0.1494847536087036, "learning_rate": 5.393860717687231e-06, "loss": 0.5173, "step": 2278 }, { "epoch": 0.49, "grad_norm": 0.16914784908294678, "learning_rate": 5.390382356173881e-06, "loss": 0.4979, "step": 2279 }, { "epoch": 0.49, "grad_norm": 0.10972032696008682, "learning_rate": 5.3869038045619275e-06, "loss": 0.5214, "step": 2280 }, { "epoch": 0.49, "grad_norm": 0.1643581986427307, "learning_rate": 5.383425064545267e-06, "loss": 0.535, "step": 2281 }, { "epoch": 0.49, "grad_norm": 0.1384391486644745, "learning_rate": 5.379946137817891e-06, "loss": 0.5034, "step": 2282 }, { "epoch": 0.49, "grad_norm": 0.1642947793006897, "learning_rate": 5.376467026073878e-06, "loss": 0.5549, "step": 2283 }, { "epoch": 0.49, "grad_norm": 0.15689925849437714, "learning_rate": 5.3729877310073985e-06, "loss": 0.5086, "step": 2284 }, { "epoch": 0.49, "grad_norm": 0.17627274990081787, "learning_rate": 5.369508254312715e-06, "loss": 0.5223, "step": 2285 }, { "epoch": 0.49, "grad_norm": 0.12727420032024384, "learning_rate": 5.366028597684173e-06, "loss": 0.5149, "step": 2286 }, { "epoch": 0.49, "grad_norm": 0.15203452110290527, "learning_rate": 5.362548762816209e-06, "loss": 0.5713, "step": 2287 }, { "epoch": 0.49, "grad_norm": 0.13790200650691986, "learning_rate": 5.359068751403347e-06, "loss": 0.545, "step": 2288 }, { "epoch": 0.49, "grad_norm": 0.13259437680244446, "learning_rate": 5.355588565140195e-06, "loss": 0.4586, "step": 2289 }, { "epoch": 0.49, "grad_norm": 0.1421840488910675, "learning_rate": 5.352108205721445e-06, "loss": 0.4915, "step": 2290 }, { "epoch": 0.49, "grad_norm": 0.14462217688560486, "learning_rate": 5.348627674841876e-06, "loss": 0.4412, "step": 2291 }, { "epoch": 0.49, "grad_norm": 0.15902197360992432, "learning_rate": 5.345146974196351e-06, "loss": 0.5418, "step": 2292 }, { "epoch": 0.49, "grad_norm": 0.1560838520526886, "learning_rate": 5.341666105479812e-06, "loss": 0.4639, "step": 2293 }, { "epoch": 0.49, "grad_norm": 0.15082865953445435, "learning_rate": 5.338185070387289e-06, "loss": 0.501, "step": 2294 }, { "epoch": 0.49, "grad_norm": 0.1447245180606842, "learning_rate": 5.334703870613887e-06, "loss": 0.4603, "step": 2295 }, { "epoch": 0.49, "grad_norm": 0.23148810863494873, "learning_rate": 5.3312225078547895e-06, "loss": 0.5145, "step": 2296 }, { "epoch": 0.49, "grad_norm": 0.1934991329908371, "learning_rate": 5.327740983805267e-06, "loss": 0.5137, "step": 2297 }, { "epoch": 0.5, "grad_norm": 0.18782839179039001, "learning_rate": 5.324259300160667e-06, "loss": 0.5348, "step": 2298 }, { "epoch": 0.5, "grad_norm": 0.17964793741703033, "learning_rate": 5.320777458616407e-06, "loss": 0.4938, "step": 2299 }, { "epoch": 0.5, "grad_norm": 0.1606227159500122, "learning_rate": 5.31729546086799e-06, "loss": 0.5483, "step": 2300 }, { "epoch": 0.5, "grad_norm": 0.16519147157669067, "learning_rate": 5.313813308610993e-06, "loss": 0.5018, "step": 2301 }, { "epoch": 0.5, "grad_norm": 0.1705171763896942, "learning_rate": 5.310331003541065e-06, "loss": 0.4838, "step": 2302 }, { "epoch": 0.5, "grad_norm": 0.22581948339939117, "learning_rate": 5.30684854735393e-06, "loss": 0.5207, "step": 2303 }, { "epoch": 0.5, "grad_norm": 0.16089698672294617, "learning_rate": 5.303365941745392e-06, "loss": 0.5237, "step": 2304 }, { "epoch": 0.5, "grad_norm": 0.15881328284740448, "learning_rate": 5.299883188411318e-06, "loss": 0.477, "step": 2305 }, { "epoch": 0.5, "grad_norm": 0.21279747784137726, "learning_rate": 5.296400289047655e-06, "loss": 0.5183, "step": 2306 }, { "epoch": 0.5, "grad_norm": 0.16910669207572937, "learning_rate": 5.292917245350417e-06, "loss": 0.4759, "step": 2307 }, { "epoch": 0.5, "grad_norm": 0.16905193030834198, "learning_rate": 5.289434059015689e-06, "loss": 0.5334, "step": 2308 }, { "epoch": 0.5, "grad_norm": 0.11337817460298538, "learning_rate": 5.285950731739624e-06, "loss": 0.4597, "step": 2309 }, { "epoch": 0.5, "grad_norm": 0.20089925825595856, "learning_rate": 5.28246726521845e-06, "loss": 0.5143, "step": 2310 }, { "epoch": 0.5, "grad_norm": 0.152847558259964, "learning_rate": 5.278983661148453e-06, "loss": 0.5067, "step": 2311 }, { "epoch": 0.5, "grad_norm": 0.16231143474578857, "learning_rate": 5.275499921225994e-06, "loss": 0.4883, "step": 2312 }, { "epoch": 0.5, "grad_norm": 0.13849905133247375, "learning_rate": 5.2720160471474955e-06, "loss": 0.5279, "step": 2313 }, { "epoch": 0.5, "grad_norm": 0.2002251148223877, "learning_rate": 5.26853204060945e-06, "loss": 0.5652, "step": 2314 }, { "epoch": 0.5, "grad_norm": 0.14642587304115295, "learning_rate": 5.2650479033084075e-06, "loss": 0.4926, "step": 2315 }, { "epoch": 0.5, "grad_norm": 0.19536569714546204, "learning_rate": 5.26156363694099e-06, "loss": 0.5673, "step": 2316 }, { "epoch": 0.5, "grad_norm": 0.16617797315120697, "learning_rate": 5.258079243203875e-06, "loss": 0.5427, "step": 2317 }, { "epoch": 0.5, "grad_norm": 0.11626624315977097, "learning_rate": 5.2545947237938055e-06, "loss": 0.5398, "step": 2318 }, { "epoch": 0.5, "grad_norm": 0.17686258256435394, "learning_rate": 5.251110080407587e-06, "loss": 0.5253, "step": 2319 }, { "epoch": 0.5, "grad_norm": 0.1972484439611435, "learning_rate": 5.247625314742083e-06, "loss": 0.4815, "step": 2320 }, { "epoch": 0.5, "grad_norm": 0.14836078882217407, "learning_rate": 5.244140428494216e-06, "loss": 0.5806, "step": 2321 }, { "epoch": 0.5, "grad_norm": 0.22560933232307434, "learning_rate": 5.240655423360969e-06, "loss": 0.5267, "step": 2322 }, { "epoch": 0.5, "grad_norm": 0.19489476084709167, "learning_rate": 5.237170301039385e-06, "loss": 0.5376, "step": 2323 }, { "epoch": 0.5, "grad_norm": 0.1505575180053711, "learning_rate": 5.233685063226557e-06, "loss": 0.5049, "step": 2324 }, { "epoch": 0.5, "grad_norm": 0.1474577635526657, "learning_rate": 5.23019971161964e-06, "loss": 0.5244, "step": 2325 }, { "epoch": 0.5, "grad_norm": 0.15484069287776947, "learning_rate": 5.226714247915846e-06, "loss": 0.5052, "step": 2326 }, { "epoch": 0.5, "grad_norm": 0.1554277092218399, "learning_rate": 5.2232286738124346e-06, "loss": 0.557, "step": 2327 }, { "epoch": 0.5, "grad_norm": 0.16746380925178528, "learning_rate": 5.219742991006728e-06, "loss": 0.5164, "step": 2328 }, { "epoch": 0.5, "grad_norm": 0.19356447458267212, "learning_rate": 5.216257201196091e-06, "loss": 0.5051, "step": 2329 }, { "epoch": 0.5, "grad_norm": 0.19989141821861267, "learning_rate": 5.212771306077951e-06, "loss": 0.545, "step": 2330 }, { "epoch": 0.5, "grad_norm": 0.14954493939876556, "learning_rate": 5.209285307349776e-06, "loss": 0.4857, "step": 2331 }, { "epoch": 0.5, "grad_norm": 0.1772209256887436, "learning_rate": 5.205799206709097e-06, "loss": 0.4962, "step": 2332 }, { "epoch": 0.5, "grad_norm": 0.18169115483760834, "learning_rate": 5.202313005853483e-06, "loss": 0.5147, "step": 2333 }, { "epoch": 0.5, "grad_norm": 0.1574869155883789, "learning_rate": 5.198826706480558e-06, "loss": 0.5343, "step": 2334 }, { "epoch": 0.5, "grad_norm": 0.1543438583612442, "learning_rate": 5.195340310287993e-06, "loss": 0.4861, "step": 2335 }, { "epoch": 0.5, "grad_norm": 0.16991272568702698, "learning_rate": 5.191853818973505e-06, "loss": 0.5657, "step": 2336 }, { "epoch": 0.5, "grad_norm": 0.198355033993721, "learning_rate": 5.188367234234859e-06, "loss": 0.551, "step": 2337 }, { "epoch": 0.5, "grad_norm": 0.1566164791584015, "learning_rate": 5.184880557769865e-06, "loss": 0.5248, "step": 2338 }, { "epoch": 0.5, "grad_norm": 0.1619618833065033, "learning_rate": 5.181393791276374e-06, "loss": 0.4884, "step": 2339 }, { "epoch": 0.5, "grad_norm": 0.1328553855419159, "learning_rate": 5.177906936452287e-06, "loss": 0.5129, "step": 2340 }, { "epoch": 0.5, "grad_norm": 0.1531621217727661, "learning_rate": 5.174419994995545e-06, "loss": 0.4932, "step": 2341 }, { "epoch": 0.5, "grad_norm": 0.20409497618675232, "learning_rate": 5.170932968604131e-06, "loss": 0.5065, "step": 2342 }, { "epoch": 0.5, "grad_norm": 0.14799822866916656, "learning_rate": 5.167445858976068e-06, "loss": 0.5578, "step": 2343 }, { "epoch": 0.5, "grad_norm": 0.1554175615310669, "learning_rate": 5.163958667809422e-06, "loss": 0.514, "step": 2344 }, { "epoch": 0.51, "grad_norm": 0.19117942452430725, "learning_rate": 5.1604713968023e-06, "loss": 0.5341, "step": 2345 }, { "epoch": 0.51, "grad_norm": 0.15868812799453735, "learning_rate": 5.156984047652841e-06, "loss": 0.5528, "step": 2346 }, { "epoch": 0.51, "grad_norm": 0.13103894889354706, "learning_rate": 5.153496622059232e-06, "loss": 0.4764, "step": 2347 }, { "epoch": 0.51, "grad_norm": 0.1614736169576645, "learning_rate": 5.15000912171969e-06, "loss": 0.5218, "step": 2348 }, { "epoch": 0.51, "grad_norm": 0.1403590589761734, "learning_rate": 5.1465215483324685e-06, "loss": 0.493, "step": 2349 }, { "epoch": 0.51, "grad_norm": 0.13807451725006104, "learning_rate": 5.143033903595862e-06, "loss": 0.502, "step": 2350 }, { "epoch": 0.51, "grad_norm": 0.1550104022026062, "learning_rate": 5.1395461892081925e-06, "loss": 0.541, "step": 2351 }, { "epoch": 0.51, "grad_norm": 0.18088415265083313, "learning_rate": 5.1360584068678225e-06, "loss": 0.4898, "step": 2352 }, { "epoch": 0.51, "grad_norm": 0.1560092568397522, "learning_rate": 5.132570558273143e-06, "loss": 0.4938, "step": 2353 }, { "epoch": 0.51, "grad_norm": 0.21202325820922852, "learning_rate": 5.129082645122579e-06, "loss": 0.5163, "step": 2354 }, { "epoch": 0.51, "grad_norm": 0.1860700100660324, "learning_rate": 5.125594669114589e-06, "loss": 0.528, "step": 2355 }, { "epoch": 0.51, "grad_norm": 0.17803077399730682, "learning_rate": 5.1221066319476576e-06, "loss": 0.5005, "step": 2356 }, { "epoch": 0.51, "grad_norm": 0.13310760259628296, "learning_rate": 5.118618535320303e-06, "loss": 0.5061, "step": 2357 }, { "epoch": 0.51, "grad_norm": 0.14596043527126312, "learning_rate": 5.115130380931071e-06, "loss": 0.5381, "step": 2358 }, { "epoch": 0.51, "grad_norm": 0.1787167489528656, "learning_rate": 5.111642170478534e-06, "loss": 0.4973, "step": 2359 }, { "epoch": 0.51, "grad_norm": 0.1591702401638031, "learning_rate": 5.108153905661296e-06, "loss": 0.5501, "step": 2360 }, { "epoch": 0.51, "grad_norm": 0.15234871208667755, "learning_rate": 5.1046655881779825e-06, "loss": 0.5135, "step": 2361 }, { "epoch": 0.51, "grad_norm": 0.19040155410766602, "learning_rate": 5.101177219727245e-06, "loss": 0.5693, "step": 2362 }, { "epoch": 0.51, "grad_norm": 0.15070025622844696, "learning_rate": 5.097688802007767e-06, "loss": 0.5232, "step": 2363 }, { "epoch": 0.51, "grad_norm": 0.15969093143939972, "learning_rate": 5.094200336718246e-06, "loss": 0.5405, "step": 2364 }, { "epoch": 0.51, "grad_norm": 0.12944184243679047, "learning_rate": 5.090711825557408e-06, "loss": 0.491, "step": 2365 }, { "epoch": 0.51, "grad_norm": 0.1388048529624939, "learning_rate": 5.087223270224003e-06, "loss": 0.5004, "step": 2366 }, { "epoch": 0.51, "grad_norm": 0.18210247159004211, "learning_rate": 5.083734672416797e-06, "loss": 0.4767, "step": 2367 }, { "epoch": 0.51, "grad_norm": 0.1709405779838562, "learning_rate": 5.080246033834581e-06, "loss": 0.5355, "step": 2368 }, { "epoch": 0.51, "grad_norm": 0.16608983278274536, "learning_rate": 5.076757356176168e-06, "loss": 0.5589, "step": 2369 }, { "epoch": 0.51, "grad_norm": 0.18925471603870392, "learning_rate": 5.0732686411403816e-06, "loss": 0.5443, "step": 2370 }, { "epoch": 0.51, "grad_norm": 0.17456351220607758, "learning_rate": 5.069779890426072e-06, "loss": 0.4903, "step": 2371 }, { "epoch": 0.51, "grad_norm": 0.14656615257263184, "learning_rate": 5.066291105732102e-06, "loss": 0.4646, "step": 2372 }, { "epoch": 0.51, "grad_norm": 0.14051038026809692, "learning_rate": 5.0628022887573515e-06, "loss": 0.5032, "step": 2373 }, { "epoch": 0.51, "grad_norm": 0.15590442717075348, "learning_rate": 5.05931344120072e-06, "loss": 0.5255, "step": 2374 }, { "epoch": 0.51, "grad_norm": 0.15599004924297333, "learning_rate": 5.0558245647611155e-06, "loss": 0.5418, "step": 2375 }, { "epoch": 0.51, "grad_norm": 0.1530722975730896, "learning_rate": 5.052335661137467e-06, "loss": 0.469, "step": 2376 }, { "epoch": 0.51, "grad_norm": 0.16184838116168976, "learning_rate": 5.0488467320287106e-06, "loss": 0.4887, "step": 2377 }, { "epoch": 0.51, "grad_norm": 0.12114948034286499, "learning_rate": 5.0453577791337984e-06, "loss": 0.4982, "step": 2378 }, { "epoch": 0.51, "grad_norm": 0.14550864696502686, "learning_rate": 5.041868804151694e-06, "loss": 0.4555, "step": 2379 }, { "epoch": 0.51, "grad_norm": 0.1462576687335968, "learning_rate": 5.03837980878137e-06, "loss": 0.5135, "step": 2380 }, { "epoch": 0.51, "grad_norm": 0.1352759301662445, "learning_rate": 5.0348907947218086e-06, "loss": 0.5367, "step": 2381 }, { "epoch": 0.51, "grad_norm": 0.18618960678577423, "learning_rate": 5.031401763672003e-06, "loss": 0.4918, "step": 2382 }, { "epoch": 0.51, "grad_norm": 0.1655811071395874, "learning_rate": 5.027912717330956e-06, "loss": 0.5077, "step": 2383 }, { "epoch": 0.51, "grad_norm": 0.14371387660503387, "learning_rate": 5.024423657397674e-06, "loss": 0.5463, "step": 2384 }, { "epoch": 0.51, "grad_norm": 0.1331823766231537, "learning_rate": 5.020934585571171e-06, "loss": 0.5586, "step": 2385 }, { "epoch": 0.51, "grad_norm": 0.16544833779335022, "learning_rate": 5.017445503550471e-06, "loss": 0.5493, "step": 2386 }, { "epoch": 0.51, "grad_norm": 0.16902866959571838, "learning_rate": 5.013956413034595e-06, "loss": 0.5215, "step": 2387 }, { "epoch": 0.51, "grad_norm": 0.19423706829547882, "learning_rate": 5.010467315722578e-06, "loss": 0.5343, "step": 2388 }, { "epoch": 0.51, "grad_norm": 0.1521768569946289, "learning_rate": 5.006978213313448e-06, "loss": 0.5021, "step": 2389 }, { "epoch": 0.51, "grad_norm": 0.12153864651918411, "learning_rate": 5.003489107506243e-06, "loss": 0.4893, "step": 2390 }, { "epoch": 0.52, "grad_norm": 0.1757657527923584, "learning_rate": 5e-06, "loss": 0.535, "step": 2391 }, { "epoch": 0.52, "grad_norm": 0.17673848569393158, "learning_rate": 4.996510892493758e-06, "loss": 0.5201, "step": 2392 }, { "epoch": 0.52, "grad_norm": 0.17887622117996216, "learning_rate": 4.993021786686554e-06, "loss": 0.5413, "step": 2393 }, { "epoch": 0.52, "grad_norm": 0.1362655609846115, "learning_rate": 4.989532684277424e-06, "loss": 0.4757, "step": 2394 }, { "epoch": 0.52, "grad_norm": 0.21385332942008972, "learning_rate": 4.986043586965406e-06, "loss": 0.5233, "step": 2395 }, { "epoch": 0.52, "grad_norm": 0.16764004528522491, "learning_rate": 4.98255449644953e-06, "loss": 0.5193, "step": 2396 }, { "epoch": 0.52, "grad_norm": 0.12933380901813507, "learning_rate": 4.979065414428829e-06, "loss": 0.4681, "step": 2397 }, { "epoch": 0.52, "grad_norm": 0.17438261210918427, "learning_rate": 4.975576342602329e-06, "loss": 0.5437, "step": 2398 }, { "epoch": 0.52, "grad_norm": 0.1581277847290039, "learning_rate": 4.9720872826690455e-06, "loss": 0.5147, "step": 2399 }, { "epoch": 0.52, "grad_norm": 0.15126928687095642, "learning_rate": 4.968598236327998e-06, "loss": 0.5033, "step": 2400 }, { "epoch": 0.52, "grad_norm": 0.144017294049263, "learning_rate": 4.965109205278193e-06, "loss": 0.4557, "step": 2401 }, { "epoch": 0.52, "grad_norm": 0.158042773604393, "learning_rate": 4.961620191218632e-06, "loss": 0.5118, "step": 2402 }, { "epoch": 0.52, "grad_norm": 0.21210241317749023, "learning_rate": 4.9581311958483075e-06, "loss": 0.556, "step": 2403 }, { "epoch": 0.52, "grad_norm": 0.25010186433792114, "learning_rate": 4.954642220866202e-06, "loss": 0.542, "step": 2404 }, { "epoch": 0.52, "grad_norm": 0.21155287325382233, "learning_rate": 4.95115326797129e-06, "loss": 0.4882, "step": 2405 }, { "epoch": 0.52, "grad_norm": 0.17543160915374756, "learning_rate": 4.947664338862534e-06, "loss": 0.5883, "step": 2406 }, { "epoch": 0.52, "grad_norm": 0.1969243884086609, "learning_rate": 4.944175435238886e-06, "loss": 0.5051, "step": 2407 }, { "epoch": 0.52, "grad_norm": 0.1602378487586975, "learning_rate": 4.940686558799283e-06, "loss": 0.5139, "step": 2408 }, { "epoch": 0.52, "grad_norm": 0.1955273449420929, "learning_rate": 4.9371977112426485e-06, "loss": 0.5096, "step": 2409 }, { "epoch": 0.52, "grad_norm": 0.15544743835926056, "learning_rate": 4.933708894267901e-06, "loss": 0.5081, "step": 2410 }, { "epoch": 0.52, "grad_norm": 0.177435964345932, "learning_rate": 4.93022010957393e-06, "loss": 0.5476, "step": 2411 }, { "epoch": 0.52, "grad_norm": 0.14814496040344238, "learning_rate": 4.92673135885962e-06, "loss": 0.5653, "step": 2412 }, { "epoch": 0.52, "grad_norm": 0.144011989235878, "learning_rate": 4.923242643823834e-06, "loss": 0.6024, "step": 2413 }, { "epoch": 0.52, "grad_norm": 0.16023319959640503, "learning_rate": 4.919753966165419e-06, "loss": 0.5927, "step": 2414 }, { "epoch": 0.52, "grad_norm": 0.13633988797664642, "learning_rate": 4.916265327583204e-06, "loss": 0.5548, "step": 2415 }, { "epoch": 0.52, "grad_norm": 0.20109489560127258, "learning_rate": 4.912776729775999e-06, "loss": 0.5668, "step": 2416 }, { "epoch": 0.52, "grad_norm": 0.3419434428215027, "learning_rate": 4.9092881744425944e-06, "loss": 0.4842, "step": 2417 }, { "epoch": 0.52, "grad_norm": 0.1448926031589508, "learning_rate": 4.905799663281756e-06, "loss": 0.4886, "step": 2418 }, { "epoch": 0.52, "grad_norm": 0.2309703230857849, "learning_rate": 4.902311197992234e-06, "loss": 0.5237, "step": 2419 }, { "epoch": 0.52, "grad_norm": 0.14190784096717834, "learning_rate": 4.898822780272757e-06, "loss": 0.5218, "step": 2420 }, { "epoch": 0.52, "grad_norm": 0.18205609917640686, "learning_rate": 4.895334411822019e-06, "loss": 0.5251, "step": 2421 }, { "epoch": 0.52, "grad_norm": 0.13188670575618744, "learning_rate": 4.8918460943387065e-06, "loss": 0.4971, "step": 2422 }, { "epoch": 0.52, "grad_norm": 0.16982056200504303, "learning_rate": 4.888357829521466e-06, "loss": 0.4846, "step": 2423 }, { "epoch": 0.52, "grad_norm": 0.12666776776313782, "learning_rate": 4.8848696190689295e-06, "loss": 0.4853, "step": 2424 }, { "epoch": 0.52, "grad_norm": 0.1458110213279724, "learning_rate": 4.881381464679698e-06, "loss": 0.4871, "step": 2425 }, { "epoch": 0.52, "grad_norm": 0.18324485421180725, "learning_rate": 4.877893368052343e-06, "loss": 0.545, "step": 2426 }, { "epoch": 0.52, "grad_norm": 0.18099121749401093, "learning_rate": 4.874405330885413e-06, "loss": 0.5002, "step": 2427 }, { "epoch": 0.52, "grad_norm": 0.13774168491363525, "learning_rate": 4.870917354877421e-06, "loss": 0.4789, "step": 2428 }, { "epoch": 0.52, "grad_norm": 0.16247624158859253, "learning_rate": 4.867429441726858e-06, "loss": 0.5491, "step": 2429 }, { "epoch": 0.52, "grad_norm": 0.2958735525608063, "learning_rate": 4.863941593132179e-06, "loss": 0.5158, "step": 2430 }, { "epoch": 0.52, "grad_norm": 0.1791061908006668, "learning_rate": 4.860453810791808e-06, "loss": 0.5083, "step": 2431 }, { "epoch": 0.52, "grad_norm": 0.15824836492538452, "learning_rate": 4.856966096404141e-06, "loss": 0.5177, "step": 2432 }, { "epoch": 0.52, "grad_norm": 0.15134254097938538, "learning_rate": 4.853478451667532e-06, "loss": 0.4666, "step": 2433 }, { "epoch": 0.52, "grad_norm": 0.14412038028240204, "learning_rate": 4.849990878280313e-06, "loss": 0.5838, "step": 2434 }, { "epoch": 0.52, "grad_norm": 0.1476101279258728, "learning_rate": 4.84650337794077e-06, "loss": 0.5155, "step": 2435 }, { "epoch": 0.52, "grad_norm": 0.13752271234989166, "learning_rate": 4.843015952347159e-06, "loss": 0.5225, "step": 2436 }, { "epoch": 0.52, "grad_norm": 0.1495019495487213, "learning_rate": 4.839528603197702e-06, "loss": 0.5148, "step": 2437 }, { "epoch": 0.53, "grad_norm": 0.18052443861961365, "learning_rate": 4.8360413321905786e-06, "loss": 0.5321, "step": 2438 }, { "epoch": 0.53, "grad_norm": 0.16000008583068848, "learning_rate": 4.832554141023934e-06, "loss": 0.5374, "step": 2439 }, { "epoch": 0.53, "grad_norm": 0.14435116946697235, "learning_rate": 4.829067031395871e-06, "loss": 0.4763, "step": 2440 }, { "epoch": 0.53, "grad_norm": 0.1648446023464203, "learning_rate": 4.825580005004456e-06, "loss": 0.5029, "step": 2441 }, { "epoch": 0.53, "grad_norm": 0.1948603093624115, "learning_rate": 4.822093063547715e-06, "loss": 0.517, "step": 2442 }, { "epoch": 0.53, "grad_norm": 0.1540631800889969, "learning_rate": 4.818606208723627e-06, "loss": 0.5113, "step": 2443 }, { "epoch": 0.53, "grad_norm": 0.14734607934951782, "learning_rate": 4.815119442230138e-06, "loss": 0.5323, "step": 2444 }, { "epoch": 0.53, "grad_norm": 0.13016067445278168, "learning_rate": 4.811632765765143e-06, "loss": 0.4841, "step": 2445 }, { "epoch": 0.53, "grad_norm": 0.1437351554632187, "learning_rate": 4.8081461810264955e-06, "loss": 0.4775, "step": 2446 }, { "epoch": 0.53, "grad_norm": 0.16473154723644257, "learning_rate": 4.804659689712009e-06, "loss": 0.5019, "step": 2447 }, { "epoch": 0.53, "grad_norm": 0.13416069746017456, "learning_rate": 4.801173293519442e-06, "loss": 0.5193, "step": 2448 }, { "epoch": 0.53, "grad_norm": 0.12704534828662872, "learning_rate": 4.797686994146519e-06, "loss": 0.499, "step": 2449 }, { "epoch": 0.53, "grad_norm": 0.15111473202705383, "learning_rate": 4.7942007932909046e-06, "loss": 0.5168, "step": 2450 }, { "epoch": 0.53, "grad_norm": 0.13630732893943787, "learning_rate": 4.790714692650223e-06, "loss": 0.4938, "step": 2451 }, { "epoch": 0.53, "grad_norm": 0.13137710094451904, "learning_rate": 4.7872286939220516e-06, "loss": 0.4544, "step": 2452 }, { "epoch": 0.53, "grad_norm": 0.15518240630626678, "learning_rate": 4.783742798803909e-06, "loss": 0.5013, "step": 2453 }, { "epoch": 0.53, "grad_norm": 0.13857389986515045, "learning_rate": 4.7802570089932746e-06, "loss": 0.5551, "step": 2454 }, { "epoch": 0.53, "grad_norm": 0.1502048522233963, "learning_rate": 4.776771326187566e-06, "loss": 0.4341, "step": 2455 }, { "epoch": 0.53, "grad_norm": 0.16226448118686676, "learning_rate": 4.773285752084154e-06, "loss": 0.5555, "step": 2456 }, { "epoch": 0.53, "grad_norm": 0.15207113325595856, "learning_rate": 4.769800288380361e-06, "loss": 0.4934, "step": 2457 }, { "epoch": 0.53, "grad_norm": 0.16286228597164154, "learning_rate": 4.766314936773445e-06, "loss": 0.5066, "step": 2458 }, { "epoch": 0.53, "grad_norm": 0.15286804735660553, "learning_rate": 4.762829698960618e-06, "loss": 0.5425, "step": 2459 }, { "epoch": 0.53, "grad_norm": 0.149344801902771, "learning_rate": 4.7593445766390315e-06, "loss": 0.5626, "step": 2460 }, { "epoch": 0.53, "grad_norm": 0.1389455944299698, "learning_rate": 4.755859571505786e-06, "loss": 0.4964, "step": 2461 }, { "epoch": 0.53, "grad_norm": 0.14913085103034973, "learning_rate": 4.752374685257919e-06, "loss": 0.524, "step": 2462 }, { "epoch": 0.53, "grad_norm": 0.14657723903656006, "learning_rate": 4.748889919592414e-06, "loss": 0.5059, "step": 2463 }, { "epoch": 0.53, "grad_norm": 0.12738269567489624, "learning_rate": 4.745405276206196e-06, "loss": 0.5039, "step": 2464 }, { "epoch": 0.53, "grad_norm": 0.2088775783777237, "learning_rate": 4.741920756796126e-06, "loss": 0.5238, "step": 2465 }, { "epoch": 0.53, "grad_norm": 0.1429111510515213, "learning_rate": 4.738436363059013e-06, "loss": 0.4606, "step": 2466 }, { "epoch": 0.53, "grad_norm": 0.1563674658536911, "learning_rate": 4.734952096691594e-06, "loss": 0.5982, "step": 2467 }, { "epoch": 0.53, "grad_norm": 0.15420180559158325, "learning_rate": 4.731467959390552e-06, "loss": 0.515, "step": 2468 }, { "epoch": 0.53, "grad_norm": 0.21299295127391815, "learning_rate": 4.727983952852505e-06, "loss": 0.5306, "step": 2469 }, { "epoch": 0.53, "grad_norm": 0.15745538473129272, "learning_rate": 4.724500078774008e-06, "loss": 0.5118, "step": 2470 }, { "epoch": 0.53, "grad_norm": 0.1578780859708786, "learning_rate": 4.721016338851549e-06, "loss": 0.5061, "step": 2471 }, { "epoch": 0.53, "grad_norm": 0.1522160917520523, "learning_rate": 4.717532734781552e-06, "loss": 0.5417, "step": 2472 }, { "epoch": 0.53, "grad_norm": 0.12511098384857178, "learning_rate": 4.714049268260376e-06, "loss": 0.4981, "step": 2473 }, { "epoch": 0.53, "grad_norm": 0.1434258371591568, "learning_rate": 4.710565940984313e-06, "loss": 0.5178, "step": 2474 }, { "epoch": 0.53, "grad_norm": 0.13308405876159668, "learning_rate": 4.707082754649584e-06, "loss": 0.4986, "step": 2475 }, { "epoch": 0.53, "grad_norm": 0.15585026144981384, "learning_rate": 4.703599710952347e-06, "loss": 0.5179, "step": 2476 }, { "epoch": 0.53, "grad_norm": 0.1660911738872528, "learning_rate": 4.700116811588684e-06, "loss": 0.4997, "step": 2477 }, { "epoch": 0.53, "grad_norm": 0.1638256311416626, "learning_rate": 4.6966340582546085e-06, "loss": 0.4711, "step": 2478 }, { "epoch": 0.53, "grad_norm": 0.14776884019374847, "learning_rate": 4.693151452646071e-06, "loss": 0.47, "step": 2479 }, { "epoch": 0.53, "grad_norm": 0.15483321249485016, "learning_rate": 4.689668996458937e-06, "loss": 0.5476, "step": 2480 }, { "epoch": 0.53, "grad_norm": 0.18256203830242157, "learning_rate": 4.6861866913890094e-06, "loss": 0.5002, "step": 2481 }, { "epoch": 0.53, "grad_norm": 0.1372958868741989, "learning_rate": 4.682704539132011e-06, "loss": 0.5201, "step": 2482 }, { "epoch": 0.53, "grad_norm": 0.17966903746128082, "learning_rate": 4.679222541383594e-06, "loss": 0.4896, "step": 2483 }, { "epoch": 0.54, "grad_norm": 0.16434355080127716, "learning_rate": 4.6757406998393354e-06, "loss": 0.5577, "step": 2484 }, { "epoch": 0.54, "grad_norm": 0.12742279469966888, "learning_rate": 4.672259016194733e-06, "loss": 0.5662, "step": 2485 }, { "epoch": 0.54, "grad_norm": 0.14353856444358826, "learning_rate": 4.668777492145212e-06, "loss": 0.5476, "step": 2486 }, { "epoch": 0.54, "grad_norm": 0.17390471696853638, "learning_rate": 4.665296129386116e-06, "loss": 0.5625, "step": 2487 }, { "epoch": 0.54, "grad_norm": 0.16890183091163635, "learning_rate": 4.661814929612713e-06, "loss": 0.5211, "step": 2488 }, { "epoch": 0.54, "grad_norm": 0.16704991459846497, "learning_rate": 4.658333894520189e-06, "loss": 0.4941, "step": 2489 }, { "epoch": 0.54, "grad_norm": 0.15086905658245087, "learning_rate": 4.654853025803649e-06, "loss": 0.5065, "step": 2490 }, { "epoch": 0.54, "grad_norm": 0.13586142659187317, "learning_rate": 4.651372325158125e-06, "loss": 0.5415, "step": 2491 }, { "epoch": 0.54, "grad_norm": 0.17813622951507568, "learning_rate": 4.6478917942785575e-06, "loss": 0.5101, "step": 2492 }, { "epoch": 0.54, "grad_norm": 0.16348902881145477, "learning_rate": 4.644411434859808e-06, "loss": 0.4916, "step": 2493 }, { "epoch": 0.54, "grad_norm": 0.17885281145572662, "learning_rate": 4.640931248596655e-06, "loss": 0.4749, "step": 2494 }, { "epoch": 0.54, "grad_norm": 0.15020768344402313, "learning_rate": 4.637451237183792e-06, "loss": 0.5273, "step": 2495 }, { "epoch": 0.54, "grad_norm": 0.15204519033432007, "learning_rate": 4.633971402315828e-06, "loss": 0.5244, "step": 2496 }, { "epoch": 0.54, "grad_norm": 0.15182174742221832, "learning_rate": 4.630491745687286e-06, "loss": 0.4928, "step": 2497 }, { "epoch": 0.54, "grad_norm": 0.163527712225914, "learning_rate": 4.627012268992603e-06, "loss": 0.5102, "step": 2498 }, { "epoch": 0.54, "grad_norm": 0.1811029613018036, "learning_rate": 4.623532973926124e-06, "loss": 0.5091, "step": 2499 }, { "epoch": 0.54, "grad_norm": 0.17676551640033722, "learning_rate": 4.62005386218211e-06, "loss": 0.5543, "step": 2500 }, { "epoch": 0.54, "grad_norm": 0.14058449864387512, "learning_rate": 4.616574935454735e-06, "loss": 0.4906, "step": 2501 }, { "epoch": 0.54, "grad_norm": 0.14341934025287628, "learning_rate": 4.613096195438074e-06, "loss": 0.5007, "step": 2502 }, { "epoch": 0.54, "grad_norm": 0.17558392882347107, "learning_rate": 4.609617643826121e-06, "loss": 0.4882, "step": 2503 }, { "epoch": 0.54, "grad_norm": 0.13475576043128967, "learning_rate": 4.60613928231277e-06, "loss": 0.5144, "step": 2504 }, { "epoch": 0.54, "grad_norm": 0.158226877450943, "learning_rate": 4.602661112591829e-06, "loss": 0.5136, "step": 2505 }, { "epoch": 0.54, "grad_norm": 0.1458200067281723, "learning_rate": 4.59918313635701e-06, "loss": 0.4688, "step": 2506 }, { "epoch": 0.54, "grad_norm": 0.19686149060726166, "learning_rate": 4.595705355301927e-06, "loss": 0.5185, "step": 2507 }, { "epoch": 0.54, "grad_norm": 0.1282099336385727, "learning_rate": 4.592227771120108e-06, "loss": 0.5569, "step": 2508 }, { "epoch": 0.54, "grad_norm": 0.18009676039218903, "learning_rate": 4.588750385504975e-06, "loss": 0.4903, "step": 2509 }, { "epoch": 0.54, "grad_norm": 0.21005766093730927, "learning_rate": 4.585273200149859e-06, "loss": 0.5475, "step": 2510 }, { "epoch": 0.54, "grad_norm": 0.12568634748458862, "learning_rate": 4.581796216747996e-06, "loss": 0.5061, "step": 2511 }, { "epoch": 0.54, "grad_norm": 0.14265067875385284, "learning_rate": 4.578319436992515e-06, "loss": 0.4862, "step": 2512 }, { "epoch": 0.54, "grad_norm": 0.14382900297641754, "learning_rate": 4.574842862576455e-06, "loss": 0.5384, "step": 2513 }, { "epoch": 0.54, "grad_norm": 0.20735333859920502, "learning_rate": 4.5713664951927475e-06, "loss": 0.4868, "step": 2514 }, { "epoch": 0.54, "grad_norm": 0.14771424233913422, "learning_rate": 4.56789033653423e-06, "loss": 0.4837, "step": 2515 }, { "epoch": 0.54, "grad_norm": 0.1805439293384552, "learning_rate": 4.5644143882936316e-06, "loss": 0.5152, "step": 2516 }, { "epoch": 0.54, "grad_norm": 0.16125157475471497, "learning_rate": 4.560938652163585e-06, "loss": 0.514, "step": 2517 }, { "epoch": 0.54, "grad_norm": 0.16599629819393158, "learning_rate": 4.5574631298366165e-06, "loss": 0.4994, "step": 2518 }, { "epoch": 0.54, "grad_norm": 0.15320484340190887, "learning_rate": 4.553987823005148e-06, "loss": 0.4958, "step": 2519 }, { "epoch": 0.54, "grad_norm": 0.1553090214729309, "learning_rate": 4.550512733361499e-06, "loss": 0.5354, "step": 2520 }, { "epoch": 0.54, "grad_norm": 0.14517144858837128, "learning_rate": 4.5470378625978775e-06, "loss": 0.5354, "step": 2521 }, { "epoch": 0.54, "grad_norm": 0.2318032830953598, "learning_rate": 4.543563212406392e-06, "loss": 0.5206, "step": 2522 }, { "epoch": 0.54, "grad_norm": 0.15330053865909576, "learning_rate": 4.540088784479043e-06, "loss": 0.4928, "step": 2523 }, { "epoch": 0.54, "grad_norm": 0.16507619619369507, "learning_rate": 4.536614580507714e-06, "loss": 0.5271, "step": 2524 }, { "epoch": 0.54, "grad_norm": 0.2185535430908203, "learning_rate": 4.53314060218419e-06, "loss": 0.4909, "step": 2525 }, { "epoch": 0.54, "grad_norm": 0.1298246681690216, "learning_rate": 4.52966685120014e-06, "loss": 0.4953, "step": 2526 }, { "epoch": 0.54, "grad_norm": 0.16641001403331757, "learning_rate": 4.526193329247124e-06, "loss": 0.5287, "step": 2527 }, { "epoch": 0.54, "grad_norm": 0.1725553274154663, "learning_rate": 4.5227200380165925e-06, "loss": 0.5028, "step": 2528 }, { "epoch": 0.54, "grad_norm": 0.14473970234394073, "learning_rate": 4.51924697919988e-06, "loss": 0.5306, "step": 2529 }, { "epoch": 0.54, "grad_norm": 0.15652526915073395, "learning_rate": 4.51577415448821e-06, "loss": 0.5716, "step": 2530 }, { "epoch": 0.55, "grad_norm": 0.127789705991745, "learning_rate": 4.512301565572691e-06, "loss": 0.501, "step": 2531 }, { "epoch": 0.55, "grad_norm": 0.1573222577571869, "learning_rate": 4.508829214144318e-06, "loss": 0.5025, "step": 2532 }, { "epoch": 0.55, "grad_norm": 0.14756949245929718, "learning_rate": 4.5053571018939715e-06, "loss": 0.5278, "step": 2533 }, { "epoch": 0.55, "grad_norm": 0.16021455824375153, "learning_rate": 4.5018852305124075e-06, "loss": 0.4744, "step": 2534 }, { "epoch": 0.55, "grad_norm": 0.16380415856838226, "learning_rate": 4.498413601690278e-06, "loss": 0.5568, "step": 2535 }, { "epoch": 0.55, "grad_norm": 0.18932676315307617, "learning_rate": 4.494942217118105e-06, "loss": 0.4957, "step": 2536 }, { "epoch": 0.55, "grad_norm": 0.18895933032035828, "learning_rate": 4.491471078486297e-06, "loss": 0.5109, "step": 2537 }, { "epoch": 0.55, "grad_norm": 0.14104638993740082, "learning_rate": 4.488000187485144e-06, "loss": 0.5168, "step": 2538 }, { "epoch": 0.55, "grad_norm": 0.13941968977451324, "learning_rate": 4.484529545804811e-06, "loss": 0.6087, "step": 2539 }, { "epoch": 0.55, "grad_norm": 0.1779303252696991, "learning_rate": 4.481059155135346e-06, "loss": 0.5274, "step": 2540 }, { "epoch": 0.55, "grad_norm": 0.16781829297542572, "learning_rate": 4.477589017166671e-06, "loss": 0.5129, "step": 2541 }, { "epoch": 0.55, "grad_norm": 0.149240642786026, "learning_rate": 4.474119133588588e-06, "loss": 0.5388, "step": 2542 }, { "epoch": 0.55, "grad_norm": 0.22637441754341125, "learning_rate": 4.470649506090772e-06, "loss": 0.5118, "step": 2543 }, { "epoch": 0.55, "grad_norm": 0.1592012643814087, "learning_rate": 4.4671801363627776e-06, "loss": 0.5295, "step": 2544 }, { "epoch": 0.55, "grad_norm": 0.18590115010738373, "learning_rate": 4.463711026094032e-06, "loss": 0.4976, "step": 2545 }, { "epoch": 0.55, "grad_norm": 0.2293752282857895, "learning_rate": 4.460242176973829e-06, "loss": 0.54, "step": 2546 }, { "epoch": 0.55, "grad_norm": 0.13310395181179047, "learning_rate": 4.456773590691352e-06, "loss": 0.5073, "step": 2547 }, { "epoch": 0.55, "grad_norm": 0.1677958369255066, "learning_rate": 4.453305268935637e-06, "loss": 0.5132, "step": 2548 }, { "epoch": 0.55, "grad_norm": 0.1587441861629486, "learning_rate": 4.4498372133956046e-06, "loss": 0.4854, "step": 2549 }, { "epoch": 0.55, "grad_norm": 0.16079290211200714, "learning_rate": 4.446369425760042e-06, "loss": 0.4615, "step": 2550 }, { "epoch": 0.55, "grad_norm": 0.1533065140247345, "learning_rate": 4.442901907717603e-06, "loss": 0.487, "step": 2551 }, { "epoch": 0.55, "grad_norm": 0.17068631947040558, "learning_rate": 4.439434660956814e-06, "loss": 0.5596, "step": 2552 }, { "epoch": 0.55, "grad_norm": 0.15625819563865662, "learning_rate": 4.4359676871660665e-06, "loss": 0.5136, "step": 2553 }, { "epoch": 0.55, "grad_norm": 0.16105841100215912, "learning_rate": 4.432500988033621e-06, "loss": 0.5351, "step": 2554 }, { "epoch": 0.55, "grad_norm": 0.16557753086090088, "learning_rate": 4.429034565247606e-06, "loss": 0.491, "step": 2555 }, { "epoch": 0.55, "grad_norm": 0.151271790266037, "learning_rate": 4.42556842049601e-06, "loss": 0.5274, "step": 2556 }, { "epoch": 0.55, "grad_norm": 0.1744944155216217, "learning_rate": 4.422102555466691e-06, "loss": 0.5155, "step": 2557 }, { "epoch": 0.55, "grad_norm": 0.14737199246883392, "learning_rate": 4.418636971847367e-06, "loss": 0.5281, "step": 2558 }, { "epoch": 0.55, "grad_norm": 0.16449463367462158, "learning_rate": 4.415171671325622e-06, "loss": 0.5183, "step": 2559 }, { "epoch": 0.55, "grad_norm": 0.12860900163650513, "learning_rate": 4.4117066555889045e-06, "loss": 0.4566, "step": 2560 }, { "epoch": 0.55, "grad_norm": 0.1798088699579239, "learning_rate": 4.408241926324515e-06, "loss": 0.5072, "step": 2561 }, { "epoch": 0.55, "grad_norm": 0.1573198288679123, "learning_rate": 4.404777485219624e-06, "loss": 0.5375, "step": 2562 }, { "epoch": 0.55, "grad_norm": 0.12489344924688339, "learning_rate": 4.401313333961257e-06, "loss": 0.4767, "step": 2563 }, { "epoch": 0.55, "grad_norm": 0.1495581567287445, "learning_rate": 4.397849474236299e-06, "loss": 0.5327, "step": 2564 }, { "epoch": 0.55, "grad_norm": 0.21391817927360535, "learning_rate": 4.3943859077314956e-06, "loss": 0.536, "step": 2565 }, { "epoch": 0.55, "grad_norm": 0.1484507918357849, "learning_rate": 4.390922636133444e-06, "loss": 0.4943, "step": 2566 }, { "epoch": 0.55, "grad_norm": 0.16837376356124878, "learning_rate": 4.3874596611286076e-06, "loss": 0.544, "step": 2567 }, { "epoch": 0.55, "grad_norm": 0.1593373417854309, "learning_rate": 4.38399698440329e-06, "loss": 0.5119, "step": 2568 }, { "epoch": 0.55, "grad_norm": 0.18334493041038513, "learning_rate": 4.380534607643668e-06, "loss": 0.5283, "step": 2569 }, { "epoch": 0.55, "grad_norm": 0.1494050920009613, "learning_rate": 4.377072532535756e-06, "loss": 0.5343, "step": 2570 }, { "epoch": 0.55, "grad_norm": 0.1535796821117401, "learning_rate": 4.37361076076543e-06, "loss": 0.5707, "step": 2571 }, { "epoch": 0.55, "grad_norm": 0.22966866195201874, "learning_rate": 4.370149294018419e-06, "loss": 0.5478, "step": 2572 }, { "epoch": 0.55, "grad_norm": 0.166322723031044, "learning_rate": 4.366688133980299e-06, "loss": 0.5321, "step": 2573 }, { "epoch": 0.55, "grad_norm": 0.1559196412563324, "learning_rate": 4.3632272823365004e-06, "loss": 0.4929, "step": 2574 }, { "epoch": 0.55, "grad_norm": 0.13609760999679565, "learning_rate": 4.359766740772301e-06, "loss": 0.5255, "step": 2575 }, { "epoch": 0.55, "grad_norm": 0.17465785145759583, "learning_rate": 4.356306510972829e-06, "loss": 0.4871, "step": 2576 }, { "epoch": 0.56, "grad_norm": 0.16919800639152527, "learning_rate": 4.35284659462306e-06, "loss": 0.5335, "step": 2577 }, { "epoch": 0.56, "grad_norm": 0.20923703908920288, "learning_rate": 4.349386993407817e-06, "loss": 0.5549, "step": 2578 }, { "epoch": 0.56, "grad_norm": 0.14986181259155273, "learning_rate": 4.345927709011771e-06, "loss": 0.5111, "step": 2579 }, { "epoch": 0.56, "grad_norm": 0.151445209980011, "learning_rate": 4.342468743119436e-06, "loss": 0.5129, "step": 2580 }, { "epoch": 0.56, "grad_norm": 0.14397896826267242, "learning_rate": 4.3390100974151715e-06, "loss": 0.4842, "step": 2581 }, { "epoch": 0.56, "grad_norm": 0.16390201449394226, "learning_rate": 4.335551773583186e-06, "loss": 0.4678, "step": 2582 }, { "epoch": 0.56, "grad_norm": 0.16361331939697266, "learning_rate": 4.332093773307523e-06, "loss": 0.5084, "step": 2583 }, { "epoch": 0.56, "grad_norm": 0.17257274687290192, "learning_rate": 4.328636098272075e-06, "loss": 0.5223, "step": 2584 }, { "epoch": 0.56, "grad_norm": 0.14300677180290222, "learning_rate": 4.325178750160573e-06, "loss": 0.4712, "step": 2585 }, { "epoch": 0.56, "grad_norm": 0.18836241960525513, "learning_rate": 4.32172173065659e-06, "loss": 0.5017, "step": 2586 }, { "epoch": 0.56, "grad_norm": 0.1701335459947586, "learning_rate": 4.318265041443538e-06, "loss": 0.4977, "step": 2587 }, { "epoch": 0.56, "grad_norm": 0.18600672483444214, "learning_rate": 4.31480868420467e-06, "loss": 0.5121, "step": 2588 }, { "epoch": 0.56, "grad_norm": 0.13420304656028748, "learning_rate": 4.311352660623076e-06, "loss": 0.4936, "step": 2589 }, { "epoch": 0.56, "grad_norm": 0.19042494893074036, "learning_rate": 4.307896972381681e-06, "loss": 0.5553, "step": 2590 }, { "epoch": 0.56, "grad_norm": 0.14162546396255493, "learning_rate": 4.304441621163252e-06, "loss": 0.547, "step": 2591 }, { "epoch": 0.56, "grad_norm": 0.17262719571590424, "learning_rate": 4.3009866086503905e-06, "loss": 0.5414, "step": 2592 }, { "epoch": 0.56, "grad_norm": 0.1511780321598053, "learning_rate": 4.297531936525528e-06, "loss": 0.4973, "step": 2593 }, { "epoch": 0.56, "grad_norm": 0.15445083379745483, "learning_rate": 4.294077606470937e-06, "loss": 0.5506, "step": 2594 }, { "epoch": 0.56, "grad_norm": 0.22606535255908966, "learning_rate": 4.2906236201687186e-06, "loss": 0.5627, "step": 2595 }, { "epoch": 0.56, "grad_norm": 0.2494857758283615, "learning_rate": 4.28716997930081e-06, "loss": 0.5328, "step": 2596 }, { "epoch": 0.56, "grad_norm": 0.1547478884458542, "learning_rate": 4.283716685548976e-06, "loss": 0.5037, "step": 2597 }, { "epoch": 0.56, "grad_norm": 0.17305047810077667, "learning_rate": 4.2802637405948175e-06, "loss": 0.4701, "step": 2598 }, { "epoch": 0.56, "grad_norm": 0.20879824459552765, "learning_rate": 4.2768111461197635e-06, "loss": 0.5721, "step": 2599 }, { "epoch": 0.56, "grad_norm": 0.20338691771030426, "learning_rate": 4.273358903805069e-06, "loss": 0.4916, "step": 2600 }, { "epoch": 0.56, "grad_norm": 0.1474212110042572, "learning_rate": 4.2699070153318244e-06, "loss": 0.5426, "step": 2601 }, { "epoch": 0.56, "grad_norm": 0.1909620314836502, "learning_rate": 4.266455482380938e-06, "loss": 0.4591, "step": 2602 }, { "epoch": 0.56, "grad_norm": 0.16102322936058044, "learning_rate": 4.2630043066331536e-06, "loss": 0.4825, "step": 2603 }, { "epoch": 0.56, "grad_norm": 0.174557164311409, "learning_rate": 4.2595534897690415e-06, "loss": 0.5141, "step": 2604 }, { "epoch": 0.56, "grad_norm": 0.17708678543567657, "learning_rate": 4.256103033468989e-06, "loss": 0.5301, "step": 2605 }, { "epoch": 0.56, "grad_norm": 0.13558730483055115, "learning_rate": 4.252652939413215e-06, "loss": 0.4784, "step": 2606 }, { "epoch": 0.56, "grad_norm": 0.188698410987854, "learning_rate": 4.24920320928176e-06, "loss": 0.5073, "step": 2607 }, { "epoch": 0.56, "grad_norm": 0.181773841381073, "learning_rate": 4.245753844754484e-06, "loss": 0.5205, "step": 2608 }, { "epoch": 0.56, "grad_norm": 0.15207915008068085, "learning_rate": 4.242304847511076e-06, "loss": 0.5098, "step": 2609 }, { "epoch": 0.56, "grad_norm": 0.17972496151924133, "learning_rate": 4.23885621923104e-06, "loss": 0.5511, "step": 2610 }, { "epoch": 0.56, "grad_norm": 0.14959251880645752, "learning_rate": 4.235407961593704e-06, "loss": 0.49, "step": 2611 }, { "epoch": 0.56, "grad_norm": 0.1577451229095459, "learning_rate": 4.231960076278211e-06, "loss": 0.4616, "step": 2612 }, { "epoch": 0.56, "grad_norm": 0.1617031991481781, "learning_rate": 4.228512564963528e-06, "loss": 0.5371, "step": 2613 }, { "epoch": 0.56, "grad_norm": 0.15706071257591248, "learning_rate": 4.225065429328439e-06, "loss": 0.4847, "step": 2614 }, { "epoch": 0.56, "grad_norm": 0.14980901777744293, "learning_rate": 4.221618671051539e-06, "loss": 0.5232, "step": 2615 }, { "epoch": 0.56, "grad_norm": 0.15324559807777405, "learning_rate": 4.218172291811249e-06, "loss": 0.5333, "step": 2616 }, { "epoch": 0.56, "grad_norm": 0.13998126983642578, "learning_rate": 4.214726293285797e-06, "loss": 0.5366, "step": 2617 }, { "epoch": 0.56, "grad_norm": 0.16418395936489105, "learning_rate": 4.211280677153228e-06, "loss": 0.5233, "step": 2618 }, { "epoch": 0.56, "grad_norm": 0.16183216869831085, "learning_rate": 4.207835445091405e-06, "loss": 0.4953, "step": 2619 }, { "epoch": 0.56, "grad_norm": 0.15545772016048431, "learning_rate": 4.204390598777999e-06, "loss": 0.5336, "step": 2620 }, { "epoch": 0.56, "grad_norm": 0.1599649339914322, "learning_rate": 4.2009461398904955e-06, "loss": 0.5047, "step": 2621 }, { "epoch": 0.56, "grad_norm": 0.178667351603508, "learning_rate": 4.1975020701061884e-06, "loss": 0.5114, "step": 2622 }, { "epoch": 0.57, "grad_norm": 0.16403385996818542, "learning_rate": 4.194058391102188e-06, "loss": 0.5077, "step": 2623 }, { "epoch": 0.57, "grad_norm": 0.16363531351089478, "learning_rate": 4.190615104555407e-06, "loss": 0.5107, "step": 2624 }, { "epoch": 0.57, "grad_norm": 0.1554226130247116, "learning_rate": 4.1871722121425725e-06, "loss": 0.489, "step": 2625 }, { "epoch": 0.57, "grad_norm": 0.14770759642124176, "learning_rate": 4.1837297155402204e-06, "loss": 0.5776, "step": 2626 }, { "epoch": 0.57, "grad_norm": 0.17107781767845154, "learning_rate": 4.180287616424685e-06, "loss": 0.4841, "step": 2627 }, { "epoch": 0.57, "grad_norm": 0.17729692161083221, "learning_rate": 4.17684591647212e-06, "loss": 0.5217, "step": 2628 }, { "epoch": 0.57, "grad_norm": 0.12309854477643967, "learning_rate": 4.173404617358473e-06, "loss": 0.5291, "step": 2629 }, { "epoch": 0.57, "grad_norm": 0.1765958070755005, "learning_rate": 4.1699637207595035e-06, "loss": 0.5339, "step": 2630 }, { "epoch": 0.57, "grad_norm": 0.13170979917049408, "learning_rate": 4.166523228350775e-06, "loss": 0.4824, "step": 2631 }, { "epoch": 0.57, "grad_norm": 0.16714021563529968, "learning_rate": 4.163083141807648e-06, "loss": 0.5273, "step": 2632 }, { "epoch": 0.57, "grad_norm": 0.14370590448379517, "learning_rate": 4.159643462805293e-06, "loss": 0.5099, "step": 2633 }, { "epoch": 0.57, "grad_norm": 0.16657981276512146, "learning_rate": 4.156204193018677e-06, "loss": 0.5525, "step": 2634 }, { "epoch": 0.57, "grad_norm": 0.16202954947948456, "learning_rate": 4.152765334122569e-06, "loss": 0.514, "step": 2635 }, { "epoch": 0.57, "grad_norm": 0.16040794551372528, "learning_rate": 4.149326887791541e-06, "loss": 0.506, "step": 2636 }, { "epoch": 0.57, "grad_norm": 0.20684373378753662, "learning_rate": 4.145888855699957e-06, "loss": 0.4962, "step": 2637 }, { "epoch": 0.57, "grad_norm": 0.1377829760313034, "learning_rate": 4.142451239521988e-06, "loss": 0.5331, "step": 2638 }, { "epoch": 0.57, "grad_norm": 0.1686798632144928, "learning_rate": 4.139014040931594e-06, "loss": 0.4454, "step": 2639 }, { "epoch": 0.57, "grad_norm": 0.14603053033351898, "learning_rate": 4.135577261602537e-06, "loss": 0.4832, "step": 2640 }, { "epoch": 0.57, "grad_norm": 0.14471474289894104, "learning_rate": 4.132140903208376e-06, "loss": 0.5147, "step": 2641 }, { "epoch": 0.57, "grad_norm": 0.17779991030693054, "learning_rate": 4.128704967422458e-06, "loss": 0.5427, "step": 2642 }, { "epoch": 0.57, "grad_norm": 0.15965478122234344, "learning_rate": 4.125269455917934e-06, "loss": 0.5276, "step": 2643 }, { "epoch": 0.57, "grad_norm": 0.13071952760219574, "learning_rate": 4.1218343703677385e-06, "loss": 0.5247, "step": 2644 }, { "epoch": 0.57, "grad_norm": 0.1529112011194229, "learning_rate": 4.118399712444607e-06, "loss": 0.4814, "step": 2645 }, { "epoch": 0.57, "grad_norm": 0.1327049285173416, "learning_rate": 4.114965483821061e-06, "loss": 0.5298, "step": 2646 }, { "epoch": 0.57, "grad_norm": 0.1503492295742035, "learning_rate": 4.111531686169415e-06, "loss": 0.4757, "step": 2647 }, { "epoch": 0.57, "grad_norm": 0.15045446157455444, "learning_rate": 4.108098321161776e-06, "loss": 0.5147, "step": 2648 }, { "epoch": 0.57, "grad_norm": 0.14457084238529205, "learning_rate": 4.104665390470034e-06, "loss": 0.4722, "step": 2649 }, { "epoch": 0.57, "grad_norm": 0.16404461860656738, "learning_rate": 4.101232895765875e-06, "loss": 0.5217, "step": 2650 }, { "epoch": 0.57, "grad_norm": 0.15563920140266418, "learning_rate": 4.0978008387207656e-06, "loss": 0.4825, "step": 2651 }, { "epoch": 0.57, "grad_norm": 0.1561812460422516, "learning_rate": 4.094369221005965e-06, "loss": 0.461, "step": 2652 }, { "epoch": 0.57, "grad_norm": 0.1469080001115799, "learning_rate": 4.090938044292517e-06, "loss": 0.5018, "step": 2653 }, { "epoch": 0.57, "grad_norm": 0.14523988962173462, "learning_rate": 4.0875073102512485e-06, "loss": 0.5539, "step": 2654 }, { "epoch": 0.57, "grad_norm": 0.13977643847465515, "learning_rate": 4.084077020552773e-06, "loss": 0.5137, "step": 2655 }, { "epoch": 0.57, "grad_norm": 0.1671827733516693, "learning_rate": 4.080647176867486e-06, "loss": 0.4837, "step": 2656 }, { "epoch": 0.57, "grad_norm": 0.17201554775238037, "learning_rate": 4.077217780865568e-06, "loss": 0.5317, "step": 2657 }, { "epoch": 0.57, "grad_norm": 0.16702772676944733, "learning_rate": 4.07378883421698e-06, "loss": 0.5107, "step": 2658 }, { "epoch": 0.57, "grad_norm": 0.15763095021247864, "learning_rate": 4.070360338591463e-06, "loss": 0.4985, "step": 2659 }, { "epoch": 0.57, "grad_norm": 0.19694013893604279, "learning_rate": 4.066932295658543e-06, "loss": 0.5392, "step": 2660 }, { "epoch": 0.57, "grad_norm": 0.16363875567913055, "learning_rate": 4.0635047070875175e-06, "loss": 0.5371, "step": 2661 }, { "epoch": 0.57, "grad_norm": 0.14083370566368103, "learning_rate": 4.06007757454747e-06, "loss": 0.5127, "step": 2662 }, { "epoch": 0.57, "grad_norm": 0.20537738502025604, "learning_rate": 4.056650899707262e-06, "loss": 0.5337, "step": 2663 }, { "epoch": 0.57, "grad_norm": 0.18272843956947327, "learning_rate": 4.053224684235526e-06, "loss": 0.4706, "step": 2664 }, { "epoch": 0.57, "grad_norm": 0.1590733379125595, "learning_rate": 4.049798929800676e-06, "loss": 0.5598, "step": 2665 }, { "epoch": 0.57, "grad_norm": 0.1680363267660141, "learning_rate": 4.0463736380708986e-06, "loss": 0.5321, "step": 2666 }, { "epoch": 0.57, "grad_norm": 0.14845013618469238, "learning_rate": 4.042948810714158e-06, "loss": 0.508, "step": 2667 }, { "epoch": 0.57, "grad_norm": 0.18387869000434875, "learning_rate": 4.039524449398191e-06, "loss": 0.5134, "step": 2668 }, { "epoch": 0.57, "grad_norm": 0.14832563698291779, "learning_rate": 4.036100555790505e-06, "loss": 0.5149, "step": 2669 }, { "epoch": 0.58, "grad_norm": 0.16505834460258484, "learning_rate": 4.032677131558386e-06, "loss": 0.5326, "step": 2670 }, { "epoch": 0.58, "grad_norm": 0.16617228090763092, "learning_rate": 4.0292541783688804e-06, "loss": 0.5246, "step": 2671 }, { "epoch": 0.58, "grad_norm": 0.14918413758277893, "learning_rate": 4.025831697888817e-06, "loss": 0.4876, "step": 2672 }, { "epoch": 0.58, "grad_norm": 0.14089904725551605, "learning_rate": 4.022409691784791e-06, "loss": 0.4799, "step": 2673 }, { "epoch": 0.58, "grad_norm": 0.20409740507602692, "learning_rate": 4.01898816172316e-06, "loss": 0.4963, "step": 2674 }, { "epoch": 0.58, "grad_norm": 0.180314838886261, "learning_rate": 4.015567109370059e-06, "loss": 0.4895, "step": 2675 }, { "epoch": 0.58, "grad_norm": 0.1631566435098648, "learning_rate": 4.012146536391383e-06, "loss": 0.4868, "step": 2676 }, { "epoch": 0.58, "grad_norm": 0.1476244032382965, "learning_rate": 4.008726444452799e-06, "loss": 0.4909, "step": 2677 }, { "epoch": 0.58, "grad_norm": 0.14692912995815277, "learning_rate": 4.005306835219737e-06, "loss": 0.48, "step": 2678 }, { "epoch": 0.58, "grad_norm": 0.15567363798618317, "learning_rate": 4.001887710357392e-06, "loss": 0.5127, "step": 2679 }, { "epoch": 0.58, "grad_norm": 0.14364320039749146, "learning_rate": 3.998469071530725e-06, "loss": 0.5628, "step": 2680 }, { "epoch": 0.58, "grad_norm": 0.132903054356575, "learning_rate": 3.995050920404457e-06, "loss": 0.5542, "step": 2681 }, { "epoch": 0.58, "grad_norm": 0.18202251195907593, "learning_rate": 3.991633258643077e-06, "loss": 0.5721, "step": 2682 }, { "epoch": 0.58, "grad_norm": 0.16773462295532227, "learning_rate": 3.988216087910827e-06, "loss": 0.5039, "step": 2683 }, { "epoch": 0.58, "grad_norm": 0.14804890751838684, "learning_rate": 3.9847994098717166e-06, "loss": 0.5011, "step": 2684 }, { "epoch": 0.58, "grad_norm": 0.14443790912628174, "learning_rate": 3.981383226189518e-06, "loss": 0.5187, "step": 2685 }, { "epoch": 0.58, "grad_norm": 0.1854676902294159, "learning_rate": 3.9779675385277545e-06, "loss": 0.564, "step": 2686 }, { "epoch": 0.58, "grad_norm": 0.14072245359420776, "learning_rate": 3.974552348549714e-06, "loss": 0.4614, "step": 2687 }, { "epoch": 0.58, "grad_norm": 0.1532825082540512, "learning_rate": 3.971137657918437e-06, "loss": 0.5517, "step": 2688 }, { "epoch": 0.58, "grad_norm": 0.16841329634189606, "learning_rate": 3.967723468296727e-06, "loss": 0.4833, "step": 2689 }, { "epoch": 0.58, "grad_norm": 0.16667723655700684, "learning_rate": 3.96430978134714e-06, "loss": 0.4646, "step": 2690 }, { "epoch": 0.58, "grad_norm": 0.15399962663650513, "learning_rate": 3.960896598731986e-06, "loss": 0.5664, "step": 2691 }, { "epoch": 0.58, "grad_norm": 0.13954004645347595, "learning_rate": 3.957483922113334e-06, "loss": 0.4877, "step": 2692 }, { "epoch": 0.58, "grad_norm": 0.1900685578584671, "learning_rate": 3.954071753152999e-06, "loss": 0.5557, "step": 2693 }, { "epoch": 0.58, "grad_norm": 0.16399481892585754, "learning_rate": 3.950660093512556e-06, "loss": 0.5266, "step": 2694 }, { "epoch": 0.58, "grad_norm": 0.1279776692390442, "learning_rate": 3.947248944853332e-06, "loss": 0.4697, "step": 2695 }, { "epoch": 0.58, "grad_norm": 0.14985980093479156, "learning_rate": 3.943838308836398e-06, "loss": 0.5437, "step": 2696 }, { "epoch": 0.58, "grad_norm": 0.2673838138580322, "learning_rate": 3.940428187122584e-06, "loss": 0.5087, "step": 2697 }, { "epoch": 0.58, "grad_norm": 0.14051130414009094, "learning_rate": 3.937018581372462e-06, "loss": 0.5061, "step": 2698 }, { "epoch": 0.58, "grad_norm": 0.16947080194950104, "learning_rate": 3.933609493246357e-06, "loss": 0.5193, "step": 2699 }, { "epoch": 0.58, "grad_norm": 0.18120256066322327, "learning_rate": 3.9302009244043435e-06, "loss": 0.576, "step": 2700 }, { "epoch": 0.58, "grad_norm": 0.1373000591993332, "learning_rate": 3.926792876506238e-06, "loss": 0.5132, "step": 2701 }, { "epoch": 0.58, "grad_norm": 0.13234984874725342, "learning_rate": 3.923385351211609e-06, "loss": 0.5311, "step": 2702 }, { "epoch": 0.58, "grad_norm": 0.1670055389404297, "learning_rate": 3.919978350179764e-06, "loss": 0.5461, "step": 2703 }, { "epoch": 0.58, "grad_norm": 0.13284355401992798, "learning_rate": 3.916571875069764e-06, "loss": 0.4916, "step": 2704 }, { "epoch": 0.58, "grad_norm": 0.16038647294044495, "learning_rate": 3.913165927540403e-06, "loss": 0.5024, "step": 2705 }, { "epoch": 0.58, "grad_norm": 0.15044786036014557, "learning_rate": 3.909760509250225e-06, "loss": 0.5306, "step": 2706 }, { "epoch": 0.58, "grad_norm": 0.1481400430202484, "learning_rate": 3.90635562185752e-06, "loss": 0.4572, "step": 2707 }, { "epoch": 0.58, "grad_norm": 0.11982067674398422, "learning_rate": 3.902951267020311e-06, "loss": 0.4793, "step": 2708 }, { "epoch": 0.58, "grad_norm": 0.1427111029624939, "learning_rate": 3.899547446396365e-06, "loss": 0.5488, "step": 2709 }, { "epoch": 0.58, "grad_norm": 0.18075178563594818, "learning_rate": 3.896144161643189e-06, "loss": 0.5251, "step": 2710 }, { "epoch": 0.58, "grad_norm": 0.17146192491054535, "learning_rate": 3.89274141441803e-06, "loss": 0.5115, "step": 2711 }, { "epoch": 0.58, "grad_norm": 0.19387201964855194, "learning_rate": 3.8893392063778736e-06, "loss": 0.5017, "step": 2712 }, { "epoch": 0.58, "grad_norm": 0.12743881344795227, "learning_rate": 3.88593753917944e-06, "loss": 0.4378, "step": 2713 }, { "epoch": 0.58, "grad_norm": 0.20177234709262848, "learning_rate": 3.882536414479189e-06, "loss": 0.5104, "step": 2714 }, { "epoch": 0.58, "grad_norm": 0.15477432310581207, "learning_rate": 3.879135833933311e-06, "loss": 0.4847, "step": 2715 }, { "epoch": 0.59, "grad_norm": 0.18448805809020996, "learning_rate": 3.8757357991977415e-06, "loss": 0.4854, "step": 2716 }, { "epoch": 0.59, "grad_norm": 0.1681356281042099, "learning_rate": 3.8723363119281426e-06, "loss": 0.5493, "step": 2717 }, { "epoch": 0.59, "grad_norm": 0.1758078634738922, "learning_rate": 3.868937373779907e-06, "loss": 0.5012, "step": 2718 }, { "epoch": 0.59, "grad_norm": 0.18355970084667206, "learning_rate": 3.865538986408169e-06, "loss": 0.5385, "step": 2719 }, { "epoch": 0.59, "grad_norm": 0.15605668723583221, "learning_rate": 3.862141151467787e-06, "loss": 0.547, "step": 2720 }, { "epoch": 0.59, "grad_norm": 0.17370112240314484, "learning_rate": 3.858743870613355e-06, "loss": 0.5308, "step": 2721 }, { "epoch": 0.59, "grad_norm": 0.1348809152841568, "learning_rate": 3.855347145499197e-06, "loss": 0.5194, "step": 2722 }, { "epoch": 0.59, "grad_norm": 0.17120207846164703, "learning_rate": 3.851950977779361e-06, "loss": 0.5159, "step": 2723 }, { "epoch": 0.59, "grad_norm": 0.15506203472614288, "learning_rate": 3.848555369107631e-06, "loss": 0.5213, "step": 2724 }, { "epoch": 0.59, "grad_norm": 0.13186971843242645, "learning_rate": 3.845160321137512e-06, "loss": 0.4798, "step": 2725 }, { "epoch": 0.59, "grad_norm": 0.15033838152885437, "learning_rate": 3.841765835522242e-06, "loss": 0.5573, "step": 2726 }, { "epoch": 0.59, "grad_norm": 0.18248233199119568, "learning_rate": 3.838371913914783e-06, "loss": 0.4529, "step": 2727 }, { "epoch": 0.59, "grad_norm": 0.1604524403810501, "learning_rate": 3.83497855796782e-06, "loss": 0.506, "step": 2728 }, { "epoch": 0.59, "grad_norm": 0.1608039289712906, "learning_rate": 3.831585769333766e-06, "loss": 0.5207, "step": 2729 }, { "epoch": 0.59, "grad_norm": 0.14408713579177856, "learning_rate": 3.8281935496647526e-06, "loss": 0.5487, "step": 2730 }, { "epoch": 0.59, "grad_norm": 0.15173058211803436, "learning_rate": 3.824801900612642e-06, "loss": 0.5054, "step": 2731 }, { "epoch": 0.59, "grad_norm": 0.20556017756462097, "learning_rate": 3.821410823829011e-06, "loss": 0.5244, "step": 2732 }, { "epoch": 0.59, "grad_norm": 0.1307820975780487, "learning_rate": 3.818020320965162e-06, "loss": 0.5035, "step": 2733 }, { "epoch": 0.59, "grad_norm": 0.18517783284187317, "learning_rate": 3.8146303936721197e-06, "loss": 0.4838, "step": 2734 }, { "epoch": 0.59, "grad_norm": 0.13619892299175262, "learning_rate": 3.811241043600622e-06, "loss": 0.5416, "step": 2735 }, { "epoch": 0.59, "grad_norm": 0.19370396435260773, "learning_rate": 3.8078522724011324e-06, "loss": 0.5622, "step": 2736 }, { "epoch": 0.59, "grad_norm": 0.1536007523536682, "learning_rate": 3.8044640817238276e-06, "loss": 0.5121, "step": 2737 }, { "epoch": 0.59, "grad_norm": 0.15463578701019287, "learning_rate": 3.8010764732186044e-06, "loss": 0.5102, "step": 2738 }, { "epoch": 0.59, "grad_norm": 0.17096665501594543, "learning_rate": 3.797689448535078e-06, "loss": 0.4799, "step": 2739 }, { "epoch": 0.59, "grad_norm": 0.3133319616317749, "learning_rate": 3.79430300932257e-06, "loss": 0.5698, "step": 2740 }, { "epoch": 0.59, "grad_norm": 0.23030497133731842, "learning_rate": 3.790917157230132e-06, "loss": 0.5706, "step": 2741 }, { "epoch": 0.59, "grad_norm": 0.15342505276203156, "learning_rate": 3.7875318939065147e-06, "loss": 0.4826, "step": 2742 }, { "epoch": 0.59, "grad_norm": 0.2005234658718109, "learning_rate": 3.784147221000191e-06, "loss": 0.5415, "step": 2743 }, { "epoch": 0.59, "grad_norm": 0.13762331008911133, "learning_rate": 3.7807631401593455e-06, "loss": 0.5106, "step": 2744 }, { "epoch": 0.59, "grad_norm": 0.2076551467180252, "learning_rate": 3.7773796530318703e-06, "loss": 0.503, "step": 2745 }, { "epoch": 0.59, "grad_norm": 0.1570519208908081, "learning_rate": 3.773996761265373e-06, "loss": 0.5074, "step": 2746 }, { "epoch": 0.59, "grad_norm": 0.1342182457447052, "learning_rate": 3.7706144665071683e-06, "loss": 0.4931, "step": 2747 }, { "epoch": 0.59, "grad_norm": 0.17213162779808044, "learning_rate": 3.767232770404281e-06, "loss": 0.4552, "step": 2748 }, { "epoch": 0.59, "grad_norm": 0.13107101619243622, "learning_rate": 3.7638516746034465e-06, "loss": 0.4909, "step": 2749 }, { "epoch": 0.59, "grad_norm": 0.16508126258850098, "learning_rate": 3.7604711807511034e-06, "loss": 0.523, "step": 2750 }, { "epoch": 0.59, "grad_norm": 0.15281084179878235, "learning_rate": 3.757091290493404e-06, "loss": 0.5309, "step": 2751 }, { "epoch": 0.59, "grad_norm": 0.20402151346206665, "learning_rate": 3.753712005476197e-06, "loss": 0.5493, "step": 2752 }, { "epoch": 0.59, "grad_norm": 0.15612109005451202, "learning_rate": 3.7503333273450425e-06, "loss": 0.5259, "step": 2753 }, { "epoch": 0.59, "grad_norm": 0.1936381310224533, "learning_rate": 3.74695525774521e-06, "loss": 0.5087, "step": 2754 }, { "epoch": 0.59, "grad_norm": 0.1426432728767395, "learning_rate": 3.7435777983216614e-06, "loss": 0.5044, "step": 2755 }, { "epoch": 0.59, "grad_norm": 0.14533087611198425, "learning_rate": 3.7402009507190696e-06, "loss": 0.5529, "step": 2756 }, { "epoch": 0.59, "grad_norm": 0.15488633513450623, "learning_rate": 3.7368247165818056e-06, "loss": 0.4872, "step": 2757 }, { "epoch": 0.59, "grad_norm": 0.14580923318862915, "learning_rate": 3.733449097553945e-06, "loss": 0.551, "step": 2758 }, { "epoch": 0.59, "grad_norm": 0.17380273342132568, "learning_rate": 3.7300740952792602e-06, "loss": 0.5494, "step": 2759 }, { "epoch": 0.59, "grad_norm": 0.171724334359169, "learning_rate": 3.7266997114012265e-06, "loss": 0.5556, "step": 2760 }, { "epoch": 0.59, "grad_norm": 0.15848620235919952, "learning_rate": 3.723325947563018e-06, "loss": 0.5165, "step": 2761 }, { "epoch": 0.59, "grad_norm": 0.15606124699115753, "learning_rate": 3.7199528054075005e-06, "loss": 0.5302, "step": 2762 }, { "epoch": 0.6, "grad_norm": 0.16441625356674194, "learning_rate": 3.7165802865772495e-06, "loss": 0.5862, "step": 2763 }, { "epoch": 0.6, "grad_norm": 0.13233539462089539, "learning_rate": 3.713208392714523e-06, "loss": 0.5144, "step": 2764 }, { "epoch": 0.6, "grad_norm": 0.16361810266971588, "learning_rate": 3.709837125461283e-06, "loss": 0.4873, "step": 2765 }, { "epoch": 0.6, "grad_norm": 0.9805002808570862, "learning_rate": 3.7064664864591878e-06, "loss": 0.5081, "step": 2766 }, { "epoch": 0.6, "grad_norm": 0.15291385352611542, "learning_rate": 3.7030964773495823e-06, "loss": 0.4899, "step": 2767 }, { "epoch": 0.6, "grad_norm": 0.19646501541137695, "learning_rate": 3.6997270997735122e-06, "loss": 0.5642, "step": 2768 }, { "epoch": 0.6, "grad_norm": 0.15896441042423248, "learning_rate": 3.6963583553717104e-06, "loss": 0.5153, "step": 2769 }, { "epoch": 0.6, "grad_norm": 0.15843161940574646, "learning_rate": 3.6929902457846034e-06, "loss": 0.497, "step": 2770 }, { "epoch": 0.6, "grad_norm": 0.19402094185352325, "learning_rate": 3.6896227726523113e-06, "loss": 0.5438, "step": 2771 }, { "epoch": 0.6, "grad_norm": 0.1643831878900528, "learning_rate": 3.6862559376146388e-06, "loss": 0.5383, "step": 2772 }, { "epoch": 0.6, "grad_norm": 0.15504218637943268, "learning_rate": 3.6828897423110866e-06, "loss": 0.505, "step": 2773 }, { "epoch": 0.6, "grad_norm": 0.1874060332775116, "learning_rate": 3.6795241883808342e-06, "loss": 0.5366, "step": 2774 }, { "epoch": 0.6, "grad_norm": 0.16982296109199524, "learning_rate": 3.676159277462757e-06, "loss": 0.5237, "step": 2775 }, { "epoch": 0.6, "grad_norm": 0.16953998804092407, "learning_rate": 3.6727950111954186e-06, "loss": 0.498, "step": 2776 }, { "epoch": 0.6, "grad_norm": 0.1400230973958969, "learning_rate": 3.66943139121706e-06, "loss": 0.4611, "step": 2777 }, { "epoch": 0.6, "grad_norm": 0.15184669196605682, "learning_rate": 3.6660684191656155e-06, "loss": 0.5214, "step": 2778 }, { "epoch": 0.6, "grad_norm": 0.14015498757362366, "learning_rate": 3.662706096678699e-06, "loss": 0.4915, "step": 2779 }, { "epoch": 0.6, "grad_norm": 0.17873437702655792, "learning_rate": 3.6593444253936094e-06, "loss": 0.4492, "step": 2780 }, { "epoch": 0.6, "grad_norm": 0.1276986002922058, "learning_rate": 3.655983406947332e-06, "loss": 0.4904, "step": 2781 }, { "epoch": 0.6, "grad_norm": 0.1345810890197754, "learning_rate": 3.652623042976529e-06, "loss": 0.5068, "step": 2782 }, { "epoch": 0.6, "grad_norm": 0.17123238742351532, "learning_rate": 3.649263335117548e-06, "loss": 0.5292, "step": 2783 }, { "epoch": 0.6, "grad_norm": 0.22209994494915009, "learning_rate": 3.645904285006412e-06, "loss": 0.5488, "step": 2784 }, { "epoch": 0.6, "grad_norm": 0.29981812834739685, "learning_rate": 3.6425458942788306e-06, "loss": 0.4935, "step": 2785 }, { "epoch": 0.6, "grad_norm": 0.17638364434242249, "learning_rate": 3.6391881645701854e-06, "loss": 0.5535, "step": 2786 }, { "epoch": 0.6, "grad_norm": 0.1817181259393692, "learning_rate": 3.63583109751554e-06, "loss": 0.5224, "step": 2787 }, { "epoch": 0.6, "grad_norm": 0.16286495327949524, "learning_rate": 3.632474694749638e-06, "loss": 0.5397, "step": 2788 }, { "epoch": 0.6, "grad_norm": 0.13048282265663147, "learning_rate": 3.629118957906892e-06, "loss": 0.5172, "step": 2789 }, { "epoch": 0.6, "grad_norm": 0.1269851177930832, "learning_rate": 3.625763888621397e-06, "loss": 0.4823, "step": 2790 }, { "epoch": 0.6, "grad_norm": 0.15424852073192596, "learning_rate": 3.6224094885269184e-06, "loss": 0.5374, "step": 2791 }, { "epoch": 0.6, "grad_norm": 0.1900346428155899, "learning_rate": 3.6190557592569e-06, "loss": 0.4719, "step": 2792 }, { "epoch": 0.6, "grad_norm": 0.1395425945520401, "learning_rate": 3.6157027024444558e-06, "loss": 0.5218, "step": 2793 }, { "epoch": 0.6, "grad_norm": 0.1748196929693222, "learning_rate": 3.612350319722372e-06, "loss": 0.5003, "step": 2794 }, { "epoch": 0.6, "grad_norm": 0.15849445760250092, "learning_rate": 3.6089986127231117e-06, "loss": 0.5239, "step": 2795 }, { "epoch": 0.6, "grad_norm": 0.16830691695213318, "learning_rate": 3.6056475830787997e-06, "loss": 0.5213, "step": 2796 }, { "epoch": 0.6, "grad_norm": 0.13852837681770325, "learning_rate": 3.6022972324212396e-06, "loss": 0.4697, "step": 2797 }, { "epoch": 0.6, "grad_norm": 0.12535005807876587, "learning_rate": 3.5989475623819025e-06, "loss": 0.5444, "step": 2798 }, { "epoch": 0.6, "grad_norm": 0.1402188241481781, "learning_rate": 3.595598574591923e-06, "loss": 0.5238, "step": 2799 }, { "epoch": 0.6, "grad_norm": 0.14916275441646576, "learning_rate": 3.5922502706821094e-06, "loss": 0.4976, "step": 2800 }, { "epoch": 0.6, "grad_norm": 0.1618949919939041, "learning_rate": 3.588902652282934e-06, "loss": 0.5345, "step": 2801 }, { "epoch": 0.6, "grad_norm": 0.14491844177246094, "learning_rate": 3.585555721024535e-06, "loss": 0.515, "step": 2802 }, { "epoch": 0.6, "grad_norm": 0.15220017731189728, "learning_rate": 3.58220947853672e-06, "loss": 0.5332, "step": 2803 }, { "epoch": 0.6, "grad_norm": 0.152902290225029, "learning_rate": 3.578863926448955e-06, "loss": 0.5592, "step": 2804 }, { "epoch": 0.6, "grad_norm": 0.11480627208948135, "learning_rate": 3.5755190663903753e-06, "loss": 0.4952, "step": 2805 }, { "epoch": 0.6, "grad_norm": 0.14540837705135345, "learning_rate": 3.5721748999897753e-06, "loss": 0.5294, "step": 2806 }, { "epoch": 0.6, "grad_norm": 0.1490909457206726, "learning_rate": 3.5688314288756136e-06, "loss": 0.5052, "step": 2807 }, { "epoch": 0.6, "grad_norm": 0.18195994198322296, "learning_rate": 3.5654886546760125e-06, "loss": 0.5326, "step": 2808 }, { "epoch": 0.61, "grad_norm": 0.2022872418165207, "learning_rate": 3.562146579018747e-06, "loss": 0.5723, "step": 2809 }, { "epoch": 0.61, "grad_norm": 0.15741689503192902, "learning_rate": 3.558805203531263e-06, "loss": 0.5499, "step": 2810 }, { "epoch": 0.61, "grad_norm": 0.1889500916004181, "learning_rate": 3.5554645298406553e-06, "loss": 0.5991, "step": 2811 }, { "epoch": 0.61, "grad_norm": 0.1986282765865326, "learning_rate": 3.5521245595736837e-06, "loss": 0.4946, "step": 2812 }, { "epoch": 0.61, "grad_norm": 0.17025060951709747, "learning_rate": 3.5487852943567614e-06, "loss": 0.567, "step": 2813 }, { "epoch": 0.61, "grad_norm": 0.14447635412216187, "learning_rate": 3.5454467358159606e-06, "loss": 0.4781, "step": 2814 }, { "epoch": 0.61, "grad_norm": 0.12846069037914276, "learning_rate": 3.54210888557701e-06, "loss": 0.511, "step": 2815 }, { "epoch": 0.61, "grad_norm": 0.180193692445755, "learning_rate": 3.5387717452652914e-06, "loss": 0.4993, "step": 2816 }, { "epoch": 0.61, "grad_norm": 0.13410285115242004, "learning_rate": 3.535435316505843e-06, "loss": 0.4746, "step": 2817 }, { "epoch": 0.61, "grad_norm": 0.16177906095981598, "learning_rate": 3.53209960092335e-06, "loss": 0.5347, "step": 2818 }, { "epoch": 0.61, "grad_norm": 0.15283246338367462, "learning_rate": 3.5287646001421604e-06, "loss": 0.5191, "step": 2819 }, { "epoch": 0.61, "grad_norm": 0.15224431455135345, "learning_rate": 3.5254303157862707e-06, "loss": 0.5055, "step": 2820 }, { "epoch": 0.61, "grad_norm": 0.18944452702999115, "learning_rate": 3.5220967494793216e-06, "loss": 0.463, "step": 2821 }, { "epoch": 0.61, "grad_norm": 0.15556566417217255, "learning_rate": 3.5187639028446136e-06, "loss": 0.5134, "step": 2822 }, { "epoch": 0.61, "grad_norm": 0.155210942029953, "learning_rate": 3.5154317775050906e-06, "loss": 0.4888, "step": 2823 }, { "epoch": 0.61, "grad_norm": 0.16802415251731873, "learning_rate": 3.512100375083347e-06, "loss": 0.5124, "step": 2824 }, { "epoch": 0.61, "grad_norm": 0.23786631226539612, "learning_rate": 3.508769697201629e-06, "loss": 0.5722, "step": 2825 }, { "epoch": 0.61, "grad_norm": 0.15338438749313354, "learning_rate": 3.5054397454818224e-06, "loss": 0.5459, "step": 2826 }, { "epoch": 0.61, "grad_norm": 0.1475946456193924, "learning_rate": 3.5021105215454666e-06, "loss": 0.5012, "step": 2827 }, { "epoch": 0.61, "grad_norm": 0.15379135310649872, "learning_rate": 3.498782027013742e-06, "loss": 0.5131, "step": 2828 }, { "epoch": 0.61, "grad_norm": 0.20665378868579865, "learning_rate": 3.4954542635074744e-06, "loss": 0.5291, "step": 2829 }, { "epoch": 0.61, "grad_norm": 0.1389567106962204, "learning_rate": 3.4921272326471388e-06, "loss": 0.5211, "step": 2830 }, { "epoch": 0.61, "grad_norm": 0.1549108624458313, "learning_rate": 3.488800936052843e-06, "loss": 0.4565, "step": 2831 }, { "epoch": 0.61, "grad_norm": 0.16236495971679688, "learning_rate": 3.4854753753443494e-06, "loss": 0.4741, "step": 2832 }, { "epoch": 0.61, "grad_norm": 0.13626092672348022, "learning_rate": 3.4821505521410514e-06, "loss": 0.4822, "step": 2833 }, { "epoch": 0.61, "grad_norm": 0.13619300723075867, "learning_rate": 3.47882646806199e-06, "loss": 0.4672, "step": 2834 }, { "epoch": 0.61, "grad_norm": 0.17099611461162567, "learning_rate": 3.4755031247258453e-06, "loss": 0.5018, "step": 2835 }, { "epoch": 0.61, "grad_norm": 0.2704041600227356, "learning_rate": 3.472180523750933e-06, "loss": 0.4887, "step": 2836 }, { "epoch": 0.61, "grad_norm": 0.1702050119638443, "learning_rate": 3.468858666755214e-06, "loss": 0.4735, "step": 2837 }, { "epoch": 0.61, "grad_norm": 0.13018356263637543, "learning_rate": 3.4655375553562774e-06, "loss": 0.5054, "step": 2838 }, { "epoch": 0.61, "grad_norm": 0.15961863100528717, "learning_rate": 3.4622171911713597e-06, "loss": 0.4903, "step": 2839 }, { "epoch": 0.61, "grad_norm": 0.20230530202388763, "learning_rate": 3.458897575817326e-06, "loss": 0.4923, "step": 2840 }, { "epoch": 0.61, "grad_norm": 0.1560392677783966, "learning_rate": 3.4555787109106786e-06, "loss": 0.4996, "step": 2841 }, { "epoch": 0.61, "grad_norm": 0.17162789404392242, "learning_rate": 3.4522605980675593e-06, "loss": 0.5324, "step": 2842 }, { "epoch": 0.61, "grad_norm": 0.14241425693035126, "learning_rate": 3.4489432389037326e-06, "loss": 0.5093, "step": 2843 }, { "epoch": 0.61, "grad_norm": 0.17781661450862885, "learning_rate": 3.44562663503461e-06, "loss": 0.545, "step": 2844 }, { "epoch": 0.61, "grad_norm": 0.26344063878059387, "learning_rate": 3.4423107880752227e-06, "loss": 0.5451, "step": 2845 }, { "epoch": 0.61, "grad_norm": 0.1670253723859787, "learning_rate": 3.43899569964024e-06, "loss": 0.4649, "step": 2846 }, { "epoch": 0.61, "grad_norm": 0.17507214844226837, "learning_rate": 3.4356813713439626e-06, "loss": 0.5291, "step": 2847 }, { "epoch": 0.61, "grad_norm": 0.1973615288734436, "learning_rate": 3.432367804800316e-06, "loss": 0.5424, "step": 2848 }, { "epoch": 0.61, "grad_norm": 0.13851170241832733, "learning_rate": 3.42905500162286e-06, "loss": 0.4921, "step": 2849 }, { "epoch": 0.61, "grad_norm": 0.15649986267089844, "learning_rate": 3.4257429634247783e-06, "loss": 0.5102, "step": 2850 }, { "epoch": 0.61, "grad_norm": 0.1704344004392624, "learning_rate": 3.4224316918188855e-06, "loss": 0.5317, "step": 2851 }, { "epoch": 0.61, "grad_norm": 0.19456495344638824, "learning_rate": 3.419121188417622e-06, "loss": 0.4987, "step": 2852 }, { "epoch": 0.61, "grad_norm": 0.14243166148662567, "learning_rate": 3.4158114548330525e-06, "loss": 0.5126, "step": 2853 }, { "epoch": 0.61, "grad_norm": 0.1448044627904892, "learning_rate": 3.41250249267687e-06, "loss": 0.5183, "step": 2854 }, { "epoch": 0.62, "grad_norm": 0.17978918552398682, "learning_rate": 3.409194303560387e-06, "loss": 0.5421, "step": 2855 }, { "epoch": 0.62, "grad_norm": 0.14264936745166779, "learning_rate": 3.4058868890945425e-06, "loss": 0.4958, "step": 2856 }, { "epoch": 0.62, "grad_norm": 0.15832003951072693, "learning_rate": 3.4025802508899025e-06, "loss": 0.4939, "step": 2857 }, { "epoch": 0.62, "grad_norm": 0.1486930102109909, "learning_rate": 3.3992743905566453e-06, "loss": 0.5264, "step": 2858 }, { "epoch": 0.62, "grad_norm": 0.19173184037208557, "learning_rate": 3.39596930970458e-06, "loss": 0.5165, "step": 2859 }, { "epoch": 0.62, "grad_norm": 0.17818816006183624, "learning_rate": 3.3926650099431286e-06, "loss": 0.5617, "step": 2860 }, { "epoch": 0.62, "grad_norm": 0.15651050209999084, "learning_rate": 3.389361492881337e-06, "loss": 0.4856, "step": 2861 }, { "epoch": 0.62, "grad_norm": 0.1457422971725464, "learning_rate": 3.3860587601278715e-06, "loss": 0.5187, "step": 2862 }, { "epoch": 0.62, "grad_norm": 0.13978311419487, "learning_rate": 3.3827568132910117e-06, "loss": 0.493, "step": 2863 }, { "epoch": 0.62, "grad_norm": 0.14989745616912842, "learning_rate": 3.3794556539786584e-06, "loss": 0.5355, "step": 2864 }, { "epoch": 0.62, "grad_norm": 0.16385847330093384, "learning_rate": 3.376155283798323e-06, "loss": 0.5402, "step": 2865 }, { "epoch": 0.62, "grad_norm": 0.1365756392478943, "learning_rate": 3.372855704357144e-06, "loss": 0.5018, "step": 2866 }, { "epoch": 0.62, "grad_norm": 0.14765289425849915, "learning_rate": 3.3695569172618613e-06, "loss": 0.5786, "step": 2867 }, { "epoch": 0.62, "grad_norm": 0.14326290786266327, "learning_rate": 3.3662589241188382e-06, "loss": 0.4799, "step": 2868 }, { "epoch": 0.62, "grad_norm": 0.1515820473432541, "learning_rate": 3.3629617265340497e-06, "loss": 0.4875, "step": 2869 }, { "epoch": 0.62, "grad_norm": 0.14540225267410278, "learning_rate": 3.3596653261130806e-06, "loss": 0.5127, "step": 2870 }, { "epoch": 0.62, "grad_norm": 0.162192702293396, "learning_rate": 3.3563697244611303e-06, "loss": 0.4825, "step": 2871 }, { "epoch": 0.62, "grad_norm": 0.1744917333126068, "learning_rate": 3.3530749231830073e-06, "loss": 0.4677, "step": 2872 }, { "epoch": 0.62, "grad_norm": 0.15274450182914734, "learning_rate": 3.3497809238831314e-06, "loss": 0.498, "step": 2873 }, { "epoch": 0.62, "grad_norm": 0.15344925224781036, "learning_rate": 3.3464877281655335e-06, "loss": 0.461, "step": 2874 }, { "epoch": 0.62, "grad_norm": 0.14903058111667633, "learning_rate": 3.3431953376338487e-06, "loss": 0.5207, "step": 2875 }, { "epoch": 0.62, "grad_norm": 0.15112550556659698, "learning_rate": 3.339903753891326e-06, "loss": 0.5271, "step": 2876 }, { "epoch": 0.62, "grad_norm": 0.13480481505393982, "learning_rate": 3.3366129785408143e-06, "loss": 0.4761, "step": 2877 }, { "epoch": 0.62, "grad_norm": 0.17278815805912018, "learning_rate": 3.333323013184773e-06, "loss": 0.494, "step": 2878 }, { "epoch": 0.62, "grad_norm": 0.16020460426807404, "learning_rate": 3.3300338594252724e-06, "loss": 0.5306, "step": 2879 }, { "epoch": 0.62, "grad_norm": 0.19360634684562683, "learning_rate": 3.326745518863976e-06, "loss": 0.5292, "step": 2880 }, { "epoch": 0.62, "grad_norm": 0.15092292428016663, "learning_rate": 3.323457993102161e-06, "loss": 0.5234, "step": 2881 }, { "epoch": 0.62, "grad_norm": 0.13326002657413483, "learning_rate": 3.320171283740702e-06, "loss": 0.4962, "step": 2882 }, { "epoch": 0.62, "grad_norm": 0.13950808346271515, "learning_rate": 3.316885392380078e-06, "loss": 0.5058, "step": 2883 }, { "epoch": 0.62, "grad_norm": 0.15002663433551788, "learning_rate": 3.3136003206203727e-06, "loss": 0.5212, "step": 2884 }, { "epoch": 0.62, "grad_norm": 0.14820055663585663, "learning_rate": 3.310316070061266e-06, "loss": 0.5309, "step": 2885 }, { "epoch": 0.62, "grad_norm": 0.15101811289787292, "learning_rate": 3.307032642302041e-06, "loss": 0.5228, "step": 2886 }, { "epoch": 0.62, "grad_norm": 0.15565958619117737, "learning_rate": 3.3037500389415756e-06, "loss": 0.4449, "step": 2887 }, { "epoch": 0.62, "grad_norm": 0.12206115573644638, "learning_rate": 3.3004682615783524e-06, "loss": 0.469, "step": 2888 }, { "epoch": 0.62, "grad_norm": 0.15403220057487488, "learning_rate": 3.2971873118104515e-06, "loss": 0.4853, "step": 2889 }, { "epoch": 0.62, "grad_norm": 0.15070055425167084, "learning_rate": 3.2939071912355424e-06, "loss": 0.5003, "step": 2890 }, { "epoch": 0.62, "grad_norm": 0.14524191617965698, "learning_rate": 3.290627901450899e-06, "loss": 0.5121, "step": 2891 }, { "epoch": 0.62, "grad_norm": 0.13863269984722137, "learning_rate": 3.2873494440533856e-06, "loss": 0.483, "step": 2892 }, { "epoch": 0.62, "grad_norm": 0.162959486246109, "learning_rate": 3.284071820639465e-06, "loss": 0.4901, "step": 2893 }, { "epoch": 0.62, "grad_norm": 0.1397026926279068, "learning_rate": 3.2807950328051906e-06, "loss": 0.4907, "step": 2894 }, { "epoch": 0.62, "grad_norm": 0.17842566967010498, "learning_rate": 3.2775190821462105e-06, "loss": 0.5001, "step": 2895 }, { "epoch": 0.62, "grad_norm": 0.25389254093170166, "learning_rate": 3.2742439702577665e-06, "loss": 0.5028, "step": 2896 }, { "epoch": 0.62, "grad_norm": 0.13854780793190002, "learning_rate": 3.2709696987346885e-06, "loss": 0.5351, "step": 2897 }, { "epoch": 0.62, "grad_norm": 0.14294210076332092, "learning_rate": 3.267696269171402e-06, "loss": 0.4752, "step": 2898 }, { "epoch": 0.62, "grad_norm": 0.12487441301345825, "learning_rate": 3.264423683161914e-06, "loss": 0.4884, "step": 2899 }, { "epoch": 0.62, "grad_norm": 0.1544751673936844, "learning_rate": 3.2611519422998308e-06, "loss": 0.5406, "step": 2900 }, { "epoch": 0.62, "grad_norm": 0.16319073736667633, "learning_rate": 3.257881048178344e-06, "loss": 0.4985, "step": 2901 }, { "epoch": 0.63, "grad_norm": 0.19490410387516022, "learning_rate": 3.254611002390227e-06, "loss": 0.5006, "step": 2902 }, { "epoch": 0.63, "grad_norm": 0.14253075420856476, "learning_rate": 3.251341806527848e-06, "loss": 0.4988, "step": 2903 }, { "epoch": 0.63, "grad_norm": 0.14755187928676605, "learning_rate": 3.248073462183155e-06, "loss": 0.5083, "step": 2904 }, { "epoch": 0.63, "grad_norm": 0.1382237672805786, "learning_rate": 3.2448059709476864e-06, "loss": 0.4941, "step": 2905 }, { "epoch": 0.63, "grad_norm": 0.13519005477428436, "learning_rate": 3.2415393344125647e-06, "loss": 0.4855, "step": 2906 }, { "epoch": 0.63, "grad_norm": 0.2366933822631836, "learning_rate": 3.2382735541684905e-06, "loss": 0.4875, "step": 2907 }, { "epoch": 0.63, "grad_norm": 0.15798290073871613, "learning_rate": 3.235008631805755e-06, "loss": 0.5288, "step": 2908 }, { "epoch": 0.63, "grad_norm": 0.16785183548927307, "learning_rate": 3.231744568914226e-06, "loss": 0.5308, "step": 2909 }, { "epoch": 0.63, "grad_norm": 0.19100995361804962, "learning_rate": 3.228481367083356e-06, "loss": 0.4923, "step": 2910 }, { "epoch": 0.63, "grad_norm": 0.131486177444458, "learning_rate": 3.2252190279021788e-06, "loss": 0.4967, "step": 2911 }, { "epoch": 0.63, "grad_norm": 0.15485283732414246, "learning_rate": 3.2219575529593017e-06, "loss": 0.465, "step": 2912 }, { "epoch": 0.63, "grad_norm": 0.1736060082912445, "learning_rate": 3.2186969438429217e-06, "loss": 0.5094, "step": 2913 }, { "epoch": 0.63, "grad_norm": 0.17122332751750946, "learning_rate": 3.215437202140803e-06, "loss": 0.4891, "step": 2914 }, { "epoch": 0.63, "grad_norm": 0.15651971101760864, "learning_rate": 3.2121783294402966e-06, "loss": 0.4704, "step": 2915 }, { "epoch": 0.63, "grad_norm": 0.16835874319076538, "learning_rate": 3.2089203273283253e-06, "loss": 0.4694, "step": 2916 }, { "epoch": 0.63, "grad_norm": 0.15919756889343262, "learning_rate": 3.205663197391389e-06, "loss": 0.5043, "step": 2917 }, { "epoch": 0.63, "grad_norm": 0.17332980036735535, "learning_rate": 3.2024069412155632e-06, "loss": 0.5494, "step": 2918 }, { "epoch": 0.63, "grad_norm": 0.15382111072540283, "learning_rate": 3.199151560386498e-06, "loss": 0.4838, "step": 2919 }, { "epoch": 0.63, "grad_norm": 0.19345510005950928, "learning_rate": 3.1958970564894187e-06, "loss": 0.4929, "step": 2920 }, { "epoch": 0.63, "grad_norm": 0.18597455322742462, "learning_rate": 3.192643431109117e-06, "loss": 0.5576, "step": 2921 }, { "epoch": 0.63, "grad_norm": 0.16669237613677979, "learning_rate": 3.189390685829967e-06, "loss": 0.4878, "step": 2922 }, { "epoch": 0.63, "grad_norm": 0.13570186495780945, "learning_rate": 3.186138822235908e-06, "loss": 0.4852, "step": 2923 }, { "epoch": 0.63, "grad_norm": 0.1756938099861145, "learning_rate": 3.182887841910448e-06, "loss": 0.5295, "step": 2924 }, { "epoch": 0.63, "grad_norm": 0.1592927873134613, "learning_rate": 3.1796377464366713e-06, "loss": 0.5879, "step": 2925 }, { "epoch": 0.63, "grad_norm": 0.13915982842445374, "learning_rate": 3.1763885373972246e-06, "loss": 0.498, "step": 2926 }, { "epoch": 0.63, "grad_norm": 0.18962885439395905, "learning_rate": 3.1731402163743284e-06, "loss": 0.4949, "step": 2927 }, { "epoch": 0.63, "grad_norm": 0.17103898525238037, "learning_rate": 3.1698927849497683e-06, "loss": 0.5678, "step": 2928 }, { "epoch": 0.63, "grad_norm": 0.19355489313602448, "learning_rate": 3.166646244704896e-06, "loss": 0.4849, "step": 2929 }, { "epoch": 0.63, "grad_norm": 0.14212578535079956, "learning_rate": 3.1634005972206326e-06, "loss": 0.4616, "step": 2930 }, { "epoch": 0.63, "grad_norm": 0.13874362409114838, "learning_rate": 3.160155844077459e-06, "loss": 0.5322, "step": 2931 }, { "epoch": 0.63, "grad_norm": 0.1573115438222885, "learning_rate": 3.156911986855425e-06, "loss": 0.555, "step": 2932 }, { "epoch": 0.63, "grad_norm": 0.1475786417722702, "learning_rate": 3.153669027134144e-06, "loss": 0.5179, "step": 2933 }, { "epoch": 0.63, "grad_norm": 0.13680386543273926, "learning_rate": 3.150426966492788e-06, "loss": 0.521, "step": 2934 }, { "epoch": 0.63, "grad_norm": 0.1602596789598465, "learning_rate": 3.147185806510099e-06, "loss": 0.5499, "step": 2935 }, { "epoch": 0.63, "grad_norm": 0.14966510236263275, "learning_rate": 3.143945548764371e-06, "loss": 0.4922, "step": 2936 }, { "epoch": 0.63, "grad_norm": 0.14178875088691711, "learning_rate": 3.140706194833466e-06, "loss": 0.4547, "step": 2937 }, { "epoch": 0.63, "grad_norm": 0.16615799069404602, "learning_rate": 3.137467746294803e-06, "loss": 0.5192, "step": 2938 }, { "epoch": 0.63, "grad_norm": 0.19471901655197144, "learning_rate": 3.13423020472536e-06, "loss": 0.5068, "step": 2939 }, { "epoch": 0.63, "grad_norm": 0.1289563924074173, "learning_rate": 3.130993571701674e-06, "loss": 0.483, "step": 2940 }, { "epoch": 0.63, "grad_norm": 0.1688213050365448, "learning_rate": 3.1277578487998387e-06, "loss": 0.5033, "step": 2941 }, { "epoch": 0.63, "grad_norm": 0.14173230528831482, "learning_rate": 3.124523037595506e-06, "loss": 0.4745, "step": 2942 }, { "epoch": 0.63, "grad_norm": 0.1439976543188095, "learning_rate": 3.1212891396638834e-06, "loss": 0.4909, "step": 2943 }, { "epoch": 0.63, "grad_norm": 0.13524580001831055, "learning_rate": 3.1180561565797323e-06, "loss": 0.5079, "step": 2944 }, { "epoch": 0.63, "grad_norm": 0.1610611528158188, "learning_rate": 3.114824089917372e-06, "loss": 0.5046, "step": 2945 }, { "epoch": 0.63, "grad_norm": 0.1482682079076767, "learning_rate": 3.1115929412506698e-06, "loss": 0.4762, "step": 2946 }, { "epoch": 0.63, "grad_norm": 0.1553899049758911, "learning_rate": 3.1083627121530512e-06, "loss": 0.5337, "step": 2947 }, { "epoch": 0.64, "grad_norm": 0.14075995981693268, "learning_rate": 3.1051334041974923e-06, "loss": 0.5239, "step": 2948 }, { "epoch": 0.64, "grad_norm": 0.14739052951335907, "learning_rate": 3.1019050189565193e-06, "loss": 0.5304, "step": 2949 }, { "epoch": 0.64, "grad_norm": 0.16444166004657745, "learning_rate": 3.0986775580022122e-06, "loss": 0.5106, "step": 2950 }, { "epoch": 0.64, "grad_norm": 0.2006131410598755, "learning_rate": 3.0954510229061963e-06, "loss": 0.5723, "step": 2951 }, { "epoch": 0.64, "grad_norm": 0.16884103417396545, "learning_rate": 3.092225415239652e-06, "loss": 0.5637, "step": 2952 }, { "epoch": 0.64, "grad_norm": 0.13112773001194, "learning_rate": 3.089000736573301e-06, "loss": 0.5007, "step": 2953 }, { "epoch": 0.64, "grad_norm": 0.14087074995040894, "learning_rate": 3.0857769884774192e-06, "loss": 0.5106, "step": 2954 }, { "epoch": 0.64, "grad_norm": 0.17167288064956665, "learning_rate": 3.0825541725218266e-06, "loss": 0.5006, "step": 2955 }, { "epoch": 0.64, "grad_norm": 0.16773132979869843, "learning_rate": 3.079332290275887e-06, "loss": 0.4808, "step": 2956 }, { "epoch": 0.64, "grad_norm": 0.15428221225738525, "learning_rate": 3.076111343308516e-06, "loss": 0.531, "step": 2957 }, { "epoch": 0.64, "grad_norm": 0.2029823362827301, "learning_rate": 3.0728913331881638e-06, "loss": 0.5106, "step": 2958 }, { "epoch": 0.64, "grad_norm": 0.13769736886024475, "learning_rate": 3.069672261482832e-06, "loss": 0.5005, "step": 2959 }, { "epoch": 0.64, "grad_norm": 0.17260031402111053, "learning_rate": 3.0664541297600682e-06, "loss": 0.5118, "step": 2960 }, { "epoch": 0.64, "grad_norm": 0.1693435162305832, "learning_rate": 3.063236939586951e-06, "loss": 0.5139, "step": 2961 }, { "epoch": 0.64, "grad_norm": 0.12653128802776337, "learning_rate": 3.0600206925301114e-06, "loss": 0.5241, "step": 2962 }, { "epoch": 0.64, "grad_norm": 0.1622675359249115, "learning_rate": 3.0568053901557126e-06, "loss": 0.5418, "step": 2963 }, { "epoch": 0.64, "grad_norm": 0.12737122178077698, "learning_rate": 3.053591034029465e-06, "loss": 0.4476, "step": 2964 }, { "epoch": 0.64, "grad_norm": 0.17606867849826813, "learning_rate": 3.0503776257166145e-06, "loss": 0.5201, "step": 2965 }, { "epoch": 0.64, "grad_norm": 0.21557646989822388, "learning_rate": 3.0471651667819447e-06, "loss": 0.4985, "step": 2966 }, { "epoch": 0.64, "grad_norm": 0.20406164228916168, "learning_rate": 3.0439536587897822e-06, "loss": 0.4886, "step": 2967 }, { "epoch": 0.64, "grad_norm": 0.147229865193367, "learning_rate": 3.0407431033039795e-06, "loss": 0.5053, "step": 2968 }, { "epoch": 0.64, "grad_norm": 0.20733335614204407, "learning_rate": 3.0375335018879383e-06, "loss": 0.4798, "step": 2969 }, { "epoch": 0.64, "grad_norm": 0.17706511914730072, "learning_rate": 3.03432485610459e-06, "loss": 0.4957, "step": 2970 }, { "epoch": 0.64, "grad_norm": 0.18925561010837555, "learning_rate": 3.031117167516395e-06, "loss": 0.4832, "step": 2971 }, { "epoch": 0.64, "grad_norm": 0.14262109994888306, "learning_rate": 3.0279104376853592e-06, "loss": 0.5004, "step": 2972 }, { "epoch": 0.64, "grad_norm": 0.21173708140850067, "learning_rate": 3.0247046681730107e-06, "loss": 0.534, "step": 2973 }, { "epoch": 0.64, "grad_norm": 0.1742897927761078, "learning_rate": 3.0214998605404165e-06, "loss": 0.539, "step": 2974 }, { "epoch": 0.64, "grad_norm": 0.13318294286727905, "learning_rate": 3.0182960163481745e-06, "loss": 0.4896, "step": 2975 }, { "epoch": 0.64, "grad_norm": 0.14285793900489807, "learning_rate": 3.0150931371564107e-06, "loss": 0.5225, "step": 2976 }, { "epoch": 0.64, "grad_norm": 0.14382816851139069, "learning_rate": 3.0118912245247846e-06, "loss": 0.5033, "step": 2977 }, { "epoch": 0.64, "grad_norm": 0.1816745102405548, "learning_rate": 3.0086902800124806e-06, "loss": 0.5737, "step": 2978 }, { "epoch": 0.64, "grad_norm": 0.1659248024225235, "learning_rate": 3.005490305178218e-06, "loss": 0.513, "step": 2979 }, { "epoch": 0.64, "grad_norm": 0.16415072977542877, "learning_rate": 3.0022913015802363e-06, "loss": 0.5032, "step": 2980 }, { "epoch": 0.64, "grad_norm": 0.12613564729690552, "learning_rate": 2.9990932707763067e-06, "loss": 0.5208, "step": 2981 }, { "epoch": 0.64, "grad_norm": 0.15900714695453644, "learning_rate": 2.99589621432373e-06, "loss": 0.517, "step": 2982 }, { "epoch": 0.64, "grad_norm": 0.15835516154766083, "learning_rate": 2.992700133779324e-06, "loss": 0.5217, "step": 2983 }, { "epoch": 0.64, "grad_norm": 0.15380804240703583, "learning_rate": 2.9895050306994385e-06, "loss": 0.5457, "step": 2984 }, { "epoch": 0.64, "grad_norm": 0.138858824968338, "learning_rate": 2.986310906639942e-06, "loss": 0.5249, "step": 2985 }, { "epoch": 0.64, "grad_norm": 0.13095752894878387, "learning_rate": 2.9831177631562306e-06, "loss": 0.4808, "step": 2986 }, { "epoch": 0.64, "grad_norm": 0.12830592691898346, "learning_rate": 2.9799256018032223e-06, "loss": 0.54, "step": 2987 }, { "epoch": 0.64, "grad_norm": 0.1949312835931778, "learning_rate": 2.9767344241353535e-06, "loss": 0.5108, "step": 2988 }, { "epoch": 0.64, "grad_norm": 0.1589624434709549, "learning_rate": 2.9735442317065864e-06, "loss": 0.5641, "step": 2989 }, { "epoch": 0.64, "grad_norm": 0.14621149003505707, "learning_rate": 2.9703550260703974e-06, "loss": 0.5448, "step": 2990 }, { "epoch": 0.64, "grad_norm": 0.16770517826080322, "learning_rate": 2.967166808779788e-06, "loss": 0.5617, "step": 2991 }, { "epoch": 0.64, "grad_norm": 0.1380135864019394, "learning_rate": 2.9639795813872773e-06, "loss": 0.5228, "step": 2992 }, { "epoch": 0.64, "grad_norm": 0.13159281015396118, "learning_rate": 2.9607933454448985e-06, "loss": 0.5122, "step": 2993 }, { "epoch": 0.64, "grad_norm": 0.15131685137748718, "learning_rate": 2.9576081025042068e-06, "loss": 0.481, "step": 2994 }, { "epoch": 0.65, "grad_norm": 0.13696128129959106, "learning_rate": 2.9544238541162713e-06, "loss": 0.4559, "step": 2995 }, { "epoch": 0.65, "grad_norm": 0.17516811192035675, "learning_rate": 2.9512406018316763e-06, "loss": 0.5363, "step": 2996 }, { "epoch": 0.65, "grad_norm": 0.17963650822639465, "learning_rate": 2.9480583472005253e-06, "loss": 0.4986, "step": 2997 }, { "epoch": 0.65, "grad_norm": 0.1492321640253067, "learning_rate": 2.9448770917724296e-06, "loss": 0.5725, "step": 2998 }, { "epoch": 0.65, "grad_norm": 0.15479613840579987, "learning_rate": 2.9416968370965194e-06, "loss": 0.4926, "step": 2999 }, { "epoch": 0.65, "grad_norm": 0.1259550005197525, "learning_rate": 2.9385175847214325e-06, "loss": 0.5108, "step": 3000 }, { "epoch": 0.65, "grad_norm": 0.1810281127691269, "learning_rate": 2.9353393361953237e-06, "loss": 0.5176, "step": 3001 }, { "epoch": 0.65, "grad_norm": 0.20367856323719025, "learning_rate": 2.9321620930658578e-06, "loss": 0.5562, "step": 3002 }, { "epoch": 0.65, "grad_norm": 0.1935432255268097, "learning_rate": 2.928985856880205e-06, "loss": 0.4959, "step": 3003 }, { "epoch": 0.65, "grad_norm": 0.17958539724349976, "learning_rate": 2.925810629185054e-06, "loss": 0.5234, "step": 3004 }, { "epoch": 0.65, "grad_norm": 0.15984192490577698, "learning_rate": 2.922636411526593e-06, "loss": 0.5221, "step": 3005 }, { "epoch": 0.65, "grad_norm": 0.13086757063865662, "learning_rate": 2.919463205450526e-06, "loss": 0.5034, "step": 3006 }, { "epoch": 0.65, "grad_norm": 0.16409295797348022, "learning_rate": 2.9162910125020575e-06, "loss": 0.499, "step": 3007 }, { "epoch": 0.65, "grad_norm": 0.1658695936203003, "learning_rate": 2.9131198342259065e-06, "loss": 0.5489, "step": 3008 }, { "epoch": 0.65, "grad_norm": 0.2198559045791626, "learning_rate": 2.9099496721662947e-06, "loss": 0.5026, "step": 3009 }, { "epoch": 0.65, "grad_norm": 0.1836353838443756, "learning_rate": 2.9067805278669425e-06, "loss": 0.5644, "step": 3010 }, { "epoch": 0.65, "grad_norm": 0.20136743783950806, "learning_rate": 2.9036124028710865e-06, "loss": 0.5142, "step": 3011 }, { "epoch": 0.65, "grad_norm": 0.2073100060224533, "learning_rate": 2.900445298721455e-06, "loss": 0.5486, "step": 3012 }, { "epoch": 0.65, "grad_norm": 0.19056002795696259, "learning_rate": 2.8972792169602882e-06, "loss": 0.5525, "step": 3013 }, { "epoch": 0.65, "grad_norm": 0.16226232051849365, "learning_rate": 2.894114159129324e-06, "loss": 0.5438, "step": 3014 }, { "epoch": 0.65, "grad_norm": 0.15393410623073578, "learning_rate": 2.890950126769803e-06, "loss": 0.519, "step": 3015 }, { "epoch": 0.65, "grad_norm": 0.13310056924819946, "learning_rate": 2.8877871214224694e-06, "loss": 0.5414, "step": 3016 }, { "epoch": 0.65, "grad_norm": 0.15130481123924255, "learning_rate": 2.8846251446275587e-06, "loss": 0.5139, "step": 3017 }, { "epoch": 0.65, "grad_norm": 0.14056378602981567, "learning_rate": 2.881464197924814e-06, "loss": 0.5016, "step": 3018 }, { "epoch": 0.65, "grad_norm": 0.16934460401535034, "learning_rate": 2.8783042828534756e-06, "loss": 0.5251, "step": 3019 }, { "epoch": 0.65, "grad_norm": 0.172510027885437, "learning_rate": 2.875145400952274e-06, "loss": 0.4938, "step": 3020 }, { "epoch": 0.65, "grad_norm": 0.18168850243091583, "learning_rate": 2.87198755375945e-06, "loss": 0.557, "step": 3021 }, { "epoch": 0.65, "grad_norm": 0.18108013272285461, "learning_rate": 2.868830742812726e-06, "loss": 0.5058, "step": 3022 }, { "epoch": 0.65, "grad_norm": 0.20254182815551758, "learning_rate": 2.865674969649329e-06, "loss": 0.5228, "step": 3023 }, { "epoch": 0.65, "grad_norm": 0.1535319983959198, "learning_rate": 2.8625202358059806e-06, "loss": 0.5533, "step": 3024 }, { "epoch": 0.65, "grad_norm": 0.17317281663417816, "learning_rate": 2.85936654281889e-06, "loss": 0.5433, "step": 3025 }, { "epoch": 0.65, "grad_norm": 0.12184549868106842, "learning_rate": 2.8562138922237648e-06, "loss": 0.5126, "step": 3026 }, { "epoch": 0.65, "grad_norm": 0.15135183930397034, "learning_rate": 2.8530622855558045e-06, "loss": 0.4813, "step": 3027 }, { "epoch": 0.65, "grad_norm": 0.23094992339611053, "learning_rate": 2.8499117243496986e-06, "loss": 0.4868, "step": 3028 }, { "epoch": 0.65, "grad_norm": 0.13720989227294922, "learning_rate": 2.846762210139631e-06, "loss": 0.4968, "step": 3029 }, { "epoch": 0.65, "grad_norm": 0.1362716108560562, "learning_rate": 2.8436137444592694e-06, "loss": 0.5245, "step": 3030 }, { "epoch": 0.65, "grad_norm": 0.14415206015110016, "learning_rate": 2.840466328841778e-06, "loss": 0.5186, "step": 3031 }, { "epoch": 0.65, "grad_norm": 0.18695032596588135, "learning_rate": 2.837319964819801e-06, "loss": 0.5611, "step": 3032 }, { "epoch": 0.65, "grad_norm": 0.1513887345790863, "learning_rate": 2.8341746539254807e-06, "loss": 0.5893, "step": 3033 }, { "epoch": 0.65, "grad_norm": 0.17001493275165558, "learning_rate": 2.8310303976904396e-06, "loss": 0.4993, "step": 3034 }, { "epoch": 0.65, "grad_norm": 0.19183696806430817, "learning_rate": 2.827887197645789e-06, "loss": 0.5087, "step": 3035 }, { "epoch": 0.65, "grad_norm": 0.151499405503273, "learning_rate": 2.824745055322128e-06, "loss": 0.557, "step": 3036 }, { "epoch": 0.65, "grad_norm": 0.15552127361297607, "learning_rate": 2.8216039722495336e-06, "loss": 0.5215, "step": 3037 }, { "epoch": 0.65, "grad_norm": 0.12379120290279388, "learning_rate": 2.818463949957575e-06, "loss": 0.5217, "step": 3038 }, { "epoch": 0.65, "grad_norm": 0.13502056896686554, "learning_rate": 2.8153249899753e-06, "loss": 0.5244, "step": 3039 }, { "epoch": 0.65, "grad_norm": 0.15221551060676575, "learning_rate": 2.8121870938312413e-06, "loss": 0.5248, "step": 3040 }, { "epoch": 0.66, "grad_norm": 0.16277168691158295, "learning_rate": 2.809050263053414e-06, "loss": 0.4598, "step": 3041 }, { "epoch": 0.66, "grad_norm": 0.1595809906721115, "learning_rate": 2.80591449916931e-06, "loss": 0.5505, "step": 3042 }, { "epoch": 0.66, "grad_norm": 0.1773127317428589, "learning_rate": 2.8027798037059094e-06, "loss": 0.5169, "step": 3043 }, { "epoch": 0.66, "grad_norm": 0.1667371243238449, "learning_rate": 2.7996461781896624e-06, "loss": 0.4966, "step": 3044 }, { "epoch": 0.66, "grad_norm": 0.13818593323230743, "learning_rate": 2.796513624146504e-06, "loss": 0.5132, "step": 3045 }, { "epoch": 0.66, "grad_norm": 0.13870275020599365, "learning_rate": 2.7933821431018523e-06, "loss": 0.528, "step": 3046 }, { "epoch": 0.66, "grad_norm": 0.1374882310628891, "learning_rate": 2.7902517365805916e-06, "loss": 0.5159, "step": 3047 }, { "epoch": 0.66, "grad_norm": 0.1938783973455429, "learning_rate": 2.7871224061070935e-06, "loss": 0.5242, "step": 3048 }, { "epoch": 0.66, "grad_norm": 0.13137510418891907, "learning_rate": 2.7839941532051952e-06, "loss": 0.5338, "step": 3049 }, { "epoch": 0.66, "grad_norm": 0.1456771343946457, "learning_rate": 2.780866979398218e-06, "loss": 0.5029, "step": 3050 }, { "epoch": 0.66, "grad_norm": 0.16268415749073029, "learning_rate": 2.7777408862089537e-06, "loss": 0.5301, "step": 3051 }, { "epoch": 0.66, "grad_norm": 0.21177208423614502, "learning_rate": 2.77461587515967e-06, "loss": 0.5032, "step": 3052 }, { "epoch": 0.66, "grad_norm": 0.19144344329833984, "learning_rate": 2.771491947772108e-06, "loss": 0.5062, "step": 3053 }, { "epoch": 0.66, "grad_norm": 0.13552603125572205, "learning_rate": 2.7683691055674745e-06, "loss": 0.5184, "step": 3054 }, { "epoch": 0.66, "grad_norm": 0.2080407440662384, "learning_rate": 2.765247350066455e-06, "loss": 0.5691, "step": 3055 }, { "epoch": 0.66, "grad_norm": 0.1384773850440979, "learning_rate": 2.7621266827892062e-06, "loss": 0.4668, "step": 3056 }, { "epoch": 0.66, "grad_norm": 0.1618855744600296, "learning_rate": 2.7590071052553487e-06, "loss": 0.5399, "step": 3057 }, { "epoch": 0.66, "grad_norm": 0.14525936543941498, "learning_rate": 2.755888618983977e-06, "loss": 0.5207, "step": 3058 }, { "epoch": 0.66, "grad_norm": 0.15105114877223969, "learning_rate": 2.7527712254936545e-06, "loss": 0.5042, "step": 3059 }, { "epoch": 0.66, "grad_norm": 0.1427949219942093, "learning_rate": 2.749654926302412e-06, "loss": 0.5236, "step": 3060 }, { "epoch": 0.66, "grad_norm": 0.16231150925159454, "learning_rate": 2.7465397229277435e-06, "loss": 0.5481, "step": 3061 }, { "epoch": 0.66, "grad_norm": 0.2165137529373169, "learning_rate": 2.743425616886615e-06, "loss": 0.5748, "step": 3062 }, { "epoch": 0.66, "grad_norm": 0.2217060923576355, "learning_rate": 2.740312609695455e-06, "loss": 0.537, "step": 3063 }, { "epoch": 0.66, "grad_norm": 0.1639140248298645, "learning_rate": 2.737200702870157e-06, "loss": 0.5766, "step": 3064 }, { "epoch": 0.66, "grad_norm": 0.16004133224487305, "learning_rate": 2.734089897926082e-06, "loss": 0.5546, "step": 3065 }, { "epoch": 0.66, "grad_norm": 0.1548355221748352, "learning_rate": 2.7309801963780485e-06, "loss": 0.5479, "step": 3066 }, { "epoch": 0.66, "grad_norm": 0.13668109476566315, "learning_rate": 2.727871599740342e-06, "loss": 0.4974, "step": 3067 }, { "epoch": 0.66, "grad_norm": 0.24507245421409607, "learning_rate": 2.724764109526711e-06, "loss": 0.5418, "step": 3068 }, { "epoch": 0.66, "grad_norm": 0.1891452968120575, "learning_rate": 2.721657727250359e-06, "loss": 0.4869, "step": 3069 }, { "epoch": 0.66, "grad_norm": 0.16605839133262634, "learning_rate": 2.7185524544239567e-06, "loss": 0.5408, "step": 3070 }, { "epoch": 0.66, "grad_norm": 0.1509867161512375, "learning_rate": 2.7154482925596314e-06, "loss": 0.4962, "step": 3071 }, { "epoch": 0.66, "grad_norm": 0.13401636481285095, "learning_rate": 2.71234524316897e-06, "loss": 0.4739, "step": 3072 }, { "epoch": 0.66, "grad_norm": 0.15112657845020294, "learning_rate": 2.709243307763019e-06, "loss": 0.5719, "step": 3073 }, { "epoch": 0.66, "grad_norm": 0.1450798213481903, "learning_rate": 2.706142487852279e-06, "loss": 0.5104, "step": 3074 }, { "epoch": 0.66, "grad_norm": 0.17470777034759521, "learning_rate": 2.7030427849467113e-06, "loss": 0.5122, "step": 3075 }, { "epoch": 0.66, "grad_norm": 0.173739492893219, "learning_rate": 2.699944200555727e-06, "loss": 0.4591, "step": 3076 }, { "epoch": 0.66, "grad_norm": 0.1204950362443924, "learning_rate": 2.696846736188202e-06, "loss": 0.536, "step": 3077 }, { "epoch": 0.66, "grad_norm": 0.1670408993959427, "learning_rate": 2.693750393352462e-06, "loss": 0.5477, "step": 3078 }, { "epoch": 0.66, "grad_norm": 0.1568535566329956, "learning_rate": 2.6906551735562824e-06, "loss": 0.5682, "step": 3079 }, { "epoch": 0.66, "grad_norm": 0.18247413635253906, "learning_rate": 2.6875610783069007e-06, "loss": 0.4769, "step": 3080 }, { "epoch": 0.66, "grad_norm": 0.14836347103118896, "learning_rate": 2.6844681091109958e-06, "loss": 0.479, "step": 3081 }, { "epoch": 0.66, "grad_norm": 0.15542642772197723, "learning_rate": 2.681376267474707e-06, "loss": 0.5113, "step": 3082 }, { "epoch": 0.66, "grad_norm": 0.15311211347579956, "learning_rate": 2.678285554903623e-06, "loss": 0.5267, "step": 3083 }, { "epoch": 0.66, "grad_norm": 0.23527516424655914, "learning_rate": 2.67519597290278e-06, "loss": 0.5006, "step": 3084 }, { "epoch": 0.66, "grad_norm": 0.13628728687763214, "learning_rate": 2.6721075229766673e-06, "loss": 0.5323, "step": 3085 }, { "epoch": 0.66, "grad_norm": 0.16927917301654816, "learning_rate": 2.669020206629217e-06, "loss": 0.5134, "step": 3086 }, { "epoch": 0.66, "grad_norm": 0.17380353808403015, "learning_rate": 2.665934025363817e-06, "loss": 0.4888, "step": 3087 }, { "epoch": 0.67, "grad_norm": 0.1672961264848709, "learning_rate": 2.6628489806832947e-06, "loss": 0.4992, "step": 3088 }, { "epoch": 0.67, "grad_norm": 0.15757709741592407, "learning_rate": 2.659765074089927e-06, "loss": 0.5237, "step": 3089 }, { "epoch": 0.67, "grad_norm": 0.18813352286815643, "learning_rate": 2.6566823070854442e-06, "loss": 0.5696, "step": 3090 }, { "epoch": 0.67, "grad_norm": 0.17737697064876556, "learning_rate": 2.653600681171008e-06, "loss": 0.5657, "step": 3091 }, { "epoch": 0.67, "grad_norm": 0.1634911447763443, "learning_rate": 2.650520197847235e-06, "loss": 0.4947, "step": 3092 }, { "epoch": 0.67, "grad_norm": 0.17239625751972198, "learning_rate": 2.6474408586141794e-06, "loss": 0.4936, "step": 3093 }, { "epoch": 0.67, "grad_norm": 0.15810348093509674, "learning_rate": 2.6443626649713407e-06, "loss": 0.5008, "step": 3094 }, { "epoch": 0.67, "grad_norm": 0.13702960312366486, "learning_rate": 2.6412856184176615e-06, "loss": 0.5653, "step": 3095 }, { "epoch": 0.67, "grad_norm": 0.16318099200725555, "learning_rate": 2.6382097204515246e-06, "loss": 0.4573, "step": 3096 }, { "epoch": 0.67, "grad_norm": 0.14889857172966003, "learning_rate": 2.6351349725707543e-06, "loss": 0.5022, "step": 3097 }, { "epoch": 0.67, "grad_norm": 0.18676966428756714, "learning_rate": 2.6320613762726123e-06, "loss": 0.5089, "step": 3098 }, { "epoch": 0.67, "grad_norm": 0.20256297290325165, "learning_rate": 2.628988933053802e-06, "loss": 0.4871, "step": 3099 }, { "epoch": 0.67, "grad_norm": 0.18140171468257904, "learning_rate": 2.625917644410467e-06, "loss": 0.5102, "step": 3100 }, { "epoch": 0.67, "grad_norm": 0.12691918015480042, "learning_rate": 2.6228475118381825e-06, "loss": 0.4831, "step": 3101 }, { "epoch": 0.67, "grad_norm": 0.16980133950710297, "learning_rate": 2.6197785368319663e-06, "loss": 0.4974, "step": 3102 }, { "epoch": 0.67, "grad_norm": 0.13890565931797028, "learning_rate": 2.6167107208862707e-06, "loss": 0.5288, "step": 3103 }, { "epoch": 0.67, "grad_norm": 0.17979633808135986, "learning_rate": 2.613644065494985e-06, "loss": 0.5096, "step": 3104 }, { "epoch": 0.67, "grad_norm": 0.17529235780239105, "learning_rate": 2.610578572151433e-06, "loss": 0.5083, "step": 3105 }, { "epoch": 0.67, "grad_norm": 0.13134679198265076, "learning_rate": 2.6075142423483675e-06, "loss": 0.5128, "step": 3106 }, { "epoch": 0.67, "grad_norm": 0.12923552095890045, "learning_rate": 2.6044510775779815e-06, "loss": 0.4858, "step": 3107 }, { "epoch": 0.67, "grad_norm": 0.17722611129283905, "learning_rate": 2.6013890793318972e-06, "loss": 0.5177, "step": 3108 }, { "epoch": 0.67, "grad_norm": 0.17613767087459564, "learning_rate": 2.5983282491011718e-06, "loss": 0.5113, "step": 3109 }, { "epoch": 0.67, "grad_norm": 0.15595421195030212, "learning_rate": 2.5952685883762918e-06, "loss": 0.4972, "step": 3110 }, { "epoch": 0.67, "grad_norm": 0.23644490540027618, "learning_rate": 2.59221009864717e-06, "loss": 0.533, "step": 3111 }, { "epoch": 0.67, "grad_norm": 0.14558325707912445, "learning_rate": 2.589152781403158e-06, "loss": 0.4991, "step": 3112 }, { "epoch": 0.67, "grad_norm": 0.15675747394561768, "learning_rate": 2.5860966381330265e-06, "loss": 0.4931, "step": 3113 }, { "epoch": 0.67, "grad_norm": 0.14677970111370087, "learning_rate": 2.583041670324982e-06, "loss": 0.4964, "step": 3114 }, { "epoch": 0.67, "grad_norm": 0.1544618010520935, "learning_rate": 2.5799878794666555e-06, "loss": 0.5627, "step": 3115 }, { "epoch": 0.67, "grad_norm": 0.15437090396881104, "learning_rate": 2.5769352670451058e-06, "loss": 0.5382, "step": 3116 }, { "epoch": 0.67, "grad_norm": 0.16935445368289948, "learning_rate": 2.57388383454682e-06, "loss": 0.4881, "step": 3117 }, { "epoch": 0.67, "grad_norm": 0.145218625664711, "learning_rate": 2.5708335834577035e-06, "loss": 0.492, "step": 3118 }, { "epoch": 0.67, "grad_norm": 0.15230430662631989, "learning_rate": 2.567784515263093e-06, "loss": 0.5286, "step": 3119 }, { "epoch": 0.67, "grad_norm": 0.1474408656358719, "learning_rate": 2.5647366314477473e-06, "loss": 0.5342, "step": 3120 }, { "epoch": 0.67, "grad_norm": 0.14141744375228882, "learning_rate": 2.561689933495849e-06, "loss": 0.4877, "step": 3121 }, { "epoch": 0.67, "grad_norm": 0.14578036963939667, "learning_rate": 2.5586444228910036e-06, "loss": 0.5148, "step": 3122 }, { "epoch": 0.67, "grad_norm": 0.15471605956554413, "learning_rate": 2.5556001011162337e-06, "loss": 0.5346, "step": 3123 }, { "epoch": 0.67, "grad_norm": 0.15913046896457672, "learning_rate": 2.5525569696539916e-06, "loss": 0.5056, "step": 3124 }, { "epoch": 0.67, "grad_norm": 0.17138166725635529, "learning_rate": 2.54951502998614e-06, "loss": 0.5096, "step": 3125 }, { "epoch": 0.67, "grad_norm": 0.18976645171642303, "learning_rate": 2.546474283593969e-06, "loss": 0.484, "step": 3126 }, { "epoch": 0.67, "grad_norm": 0.14352168142795563, "learning_rate": 2.5434347319581844e-06, "loss": 0.4984, "step": 3127 }, { "epoch": 0.67, "grad_norm": 0.16046349704265594, "learning_rate": 2.540396376558912e-06, "loss": 0.5463, "step": 3128 }, { "epoch": 0.67, "grad_norm": 0.1726856231689453, "learning_rate": 2.5373592188756946e-06, "loss": 0.5361, "step": 3129 }, { "epoch": 0.67, "grad_norm": 0.16727623343467712, "learning_rate": 2.5343232603874868e-06, "loss": 0.4938, "step": 3130 }, { "epoch": 0.67, "grad_norm": 0.14829504489898682, "learning_rate": 2.531288502572667e-06, "loss": 0.5588, "step": 3131 }, { "epoch": 0.67, "grad_norm": 0.1359606683254242, "learning_rate": 2.5282549469090246e-06, "loss": 0.5151, "step": 3132 }, { "epoch": 0.67, "grad_norm": 0.17951302230358124, "learning_rate": 2.525222594873764e-06, "loss": 0.552, "step": 3133 }, { "epoch": 0.68, "grad_norm": 0.1504855751991272, "learning_rate": 2.522191447943506e-06, "loss": 0.5304, "step": 3134 }, { "epoch": 0.68, "grad_norm": 0.17266714572906494, "learning_rate": 2.519161507594279e-06, "loss": 0.513, "step": 3135 }, { "epoch": 0.68, "grad_norm": 0.164722740650177, "learning_rate": 2.5161327753015297e-06, "loss": 0.5392, "step": 3136 }, { "epoch": 0.68, "grad_norm": 0.22426824271678925, "learning_rate": 2.5131052525401145e-06, "loss": 0.5337, "step": 3137 }, { "epoch": 0.68, "grad_norm": 0.15474985539913177, "learning_rate": 2.5100789407842985e-06, "loss": 0.5149, "step": 3138 }, { "epoch": 0.68, "grad_norm": 0.155501589179039, "learning_rate": 2.5070538415077593e-06, "loss": 0.5177, "step": 3139 }, { "epoch": 0.68, "grad_norm": 0.15586499869823456, "learning_rate": 2.5040299561835846e-06, "loss": 0.4912, "step": 3140 }, { "epoch": 0.68, "grad_norm": 0.15372590720653534, "learning_rate": 2.5010072862842725e-06, "loss": 0.4981, "step": 3141 }, { "epoch": 0.68, "grad_norm": 0.1472439020872116, "learning_rate": 2.4979858332817225e-06, "loss": 0.5167, "step": 3142 }, { "epoch": 0.68, "grad_norm": 0.3344082534313202, "learning_rate": 2.494965598647248e-06, "loss": 0.5456, "step": 3143 }, { "epoch": 0.68, "grad_norm": 0.13474471867084503, "learning_rate": 2.4919465838515687e-06, "loss": 0.5113, "step": 3144 }, { "epoch": 0.68, "grad_norm": 0.15115109086036682, "learning_rate": 2.488928790364804e-06, "loss": 0.4906, "step": 3145 }, { "epoch": 0.68, "grad_norm": 0.13708892464637756, "learning_rate": 2.48591221965649e-06, "loss": 0.484, "step": 3146 }, { "epoch": 0.68, "grad_norm": 0.14575795829296112, "learning_rate": 2.482896873195555e-06, "loss": 0.5477, "step": 3147 }, { "epoch": 0.68, "grad_norm": 0.14525777101516724, "learning_rate": 2.479882752450339e-06, "loss": 0.5041, "step": 3148 }, { "epoch": 0.68, "grad_norm": 0.12615111470222473, "learning_rate": 2.4768698588885842e-06, "loss": 0.4841, "step": 3149 }, { "epoch": 0.68, "grad_norm": 0.1459239274263382, "learning_rate": 2.4738581939774303e-06, "loss": 0.5168, "step": 3150 }, { "epoch": 0.68, "grad_norm": 0.16422203183174133, "learning_rate": 2.4708477591834244e-06, "loss": 0.5476, "step": 3151 }, { "epoch": 0.68, "grad_norm": 0.12386015802621841, "learning_rate": 2.4678385559725125e-06, "loss": 0.4401, "step": 3152 }, { "epoch": 0.68, "grad_norm": 0.16602352261543274, "learning_rate": 2.4648305858100413e-06, "loss": 0.5279, "step": 3153 }, { "epoch": 0.68, "grad_norm": 0.195119708776474, "learning_rate": 2.4618238501607577e-06, "loss": 0.4794, "step": 3154 }, { "epoch": 0.68, "grad_norm": 0.14807678759098053, "learning_rate": 2.4588183504888023e-06, "loss": 0.4964, "step": 3155 }, { "epoch": 0.68, "grad_norm": 0.1313076764345169, "learning_rate": 2.455814088257723e-06, "loss": 0.5141, "step": 3156 }, { "epoch": 0.68, "grad_norm": 0.17979438602924347, "learning_rate": 2.4528110649304555e-06, "loss": 0.5335, "step": 3157 }, { "epoch": 0.68, "grad_norm": 0.17119114100933075, "learning_rate": 2.4498092819693364e-06, "loss": 0.4784, "step": 3158 }, { "epoch": 0.68, "grad_norm": 0.1639591008424759, "learning_rate": 2.4468087408361053e-06, "loss": 0.5275, "step": 3159 }, { "epoch": 0.68, "grad_norm": 0.16429801285266876, "learning_rate": 2.443809442991884e-06, "loss": 0.4829, "step": 3160 }, { "epoch": 0.68, "grad_norm": 0.1692316085100174, "learning_rate": 2.440811389897199e-06, "loss": 0.5242, "step": 3161 }, { "epoch": 0.68, "grad_norm": 0.16549012064933777, "learning_rate": 2.4378145830119637e-06, "loss": 0.5217, "step": 3162 }, { "epoch": 0.68, "grad_norm": 0.16364479064941406, "learning_rate": 2.4348190237954893e-06, "loss": 0.556, "step": 3163 }, { "epoch": 0.68, "grad_norm": 0.14696238934993744, "learning_rate": 2.4318247137064788e-06, "loss": 0.5393, "step": 3164 }, { "epoch": 0.68, "grad_norm": 0.16105543076992035, "learning_rate": 2.428831654203025e-06, "loss": 0.5169, "step": 3165 }, { "epoch": 0.68, "grad_norm": 0.14257711172103882, "learning_rate": 2.425839846742616e-06, "loss": 0.5376, "step": 3166 }, { "epoch": 0.68, "grad_norm": 0.1712980419397354, "learning_rate": 2.4228492927821227e-06, "loss": 0.4776, "step": 3167 }, { "epoch": 0.68, "grad_norm": 0.15367096662521362, "learning_rate": 2.4198599937778138e-06, "loss": 0.4887, "step": 3168 }, { "epoch": 0.68, "grad_norm": 0.17562294006347656, "learning_rate": 2.41687195118534e-06, "loss": 0.4691, "step": 3169 }, { "epoch": 0.68, "grad_norm": 0.1532362997531891, "learning_rate": 2.4138851664597424e-06, "loss": 0.5247, "step": 3170 }, { "epoch": 0.68, "grad_norm": 0.14801405370235443, "learning_rate": 2.4108996410554565e-06, "loss": 0.499, "step": 3171 }, { "epoch": 0.68, "grad_norm": 0.24842116236686707, "learning_rate": 2.407915376426293e-06, "loss": 0.5365, "step": 3172 }, { "epoch": 0.68, "grad_norm": 0.15684857964515686, "learning_rate": 2.4049323740254575e-06, "loss": 0.5435, "step": 3173 }, { "epoch": 0.68, "grad_norm": 0.15908139944076538, "learning_rate": 2.401950635305535e-06, "loss": 0.5011, "step": 3174 }, { "epoch": 0.68, "grad_norm": 0.1335798054933548, "learning_rate": 2.3989701617184986e-06, "loss": 0.5187, "step": 3175 }, { "epoch": 0.68, "grad_norm": 0.14431187510490417, "learning_rate": 2.395990954715705e-06, "loss": 0.5294, "step": 3176 }, { "epoch": 0.68, "grad_norm": 0.22326305508613586, "learning_rate": 2.3930130157478938e-06, "loss": 0.5639, "step": 3177 }, { "epoch": 0.68, "grad_norm": 0.11609046161174774, "learning_rate": 2.390036346265188e-06, "loss": 0.5045, "step": 3178 }, { "epoch": 0.68, "grad_norm": 0.1769070327281952, "learning_rate": 2.387060947717089e-06, "loss": 0.4945, "step": 3179 }, { "epoch": 0.69, "grad_norm": 0.20527726411819458, "learning_rate": 2.3840868215524824e-06, "loss": 0.5375, "step": 3180 }, { "epoch": 0.69, "grad_norm": 0.1666276752948761, "learning_rate": 2.381113969219636e-06, "loss": 0.5197, "step": 3181 }, { "epoch": 0.69, "grad_norm": 0.18321175873279572, "learning_rate": 2.378142392166191e-06, "loss": 0.5291, "step": 3182 }, { "epoch": 0.69, "grad_norm": 0.14883701503276825, "learning_rate": 2.375172091839174e-06, "loss": 0.5554, "step": 3183 }, { "epoch": 0.69, "grad_norm": 0.13777758181095123, "learning_rate": 2.3722030696849857e-06, "loss": 0.5396, "step": 3184 }, { "epoch": 0.69, "grad_norm": 0.14664918184280396, "learning_rate": 2.3692353271494073e-06, "loss": 0.4809, "step": 3185 }, { "epoch": 0.69, "grad_norm": 0.2592916190624237, "learning_rate": 2.3662688656775973e-06, "loss": 0.4879, "step": 3186 }, { "epoch": 0.69, "grad_norm": 0.22084404528141022, "learning_rate": 2.3633036867140843e-06, "loss": 0.5349, "step": 3187 }, { "epoch": 0.69, "grad_norm": 0.17190620303153992, "learning_rate": 2.3603397917027787e-06, "loss": 0.518, "step": 3188 }, { "epoch": 0.69, "grad_norm": 0.16831335425376892, "learning_rate": 2.3573771820869646e-06, "loss": 0.4805, "step": 3189 }, { "epoch": 0.69, "grad_norm": 0.1860908567905426, "learning_rate": 2.3544158593092986e-06, "loss": 0.5356, "step": 3190 }, { "epoch": 0.69, "grad_norm": 0.1522263139486313, "learning_rate": 2.3514558248118134e-06, "loss": 0.532, "step": 3191 }, { "epoch": 0.69, "grad_norm": 0.1509491503238678, "learning_rate": 2.3484970800359087e-06, "loss": 0.5388, "step": 3192 }, { "epoch": 0.69, "grad_norm": 0.1586175262928009, "learning_rate": 2.345539626422363e-06, "loss": 0.5192, "step": 3193 }, { "epoch": 0.69, "grad_norm": 0.13638122379779816, "learning_rate": 2.34258346541132e-06, "loss": 0.4945, "step": 3194 }, { "epoch": 0.69, "grad_norm": 0.1359930783510208, "learning_rate": 2.339628598442298e-06, "loss": 0.5176, "step": 3195 }, { "epoch": 0.69, "grad_norm": 0.12893450260162354, "learning_rate": 2.3366750269541833e-06, "loss": 0.5562, "step": 3196 }, { "epoch": 0.69, "grad_norm": 0.13871856033802032, "learning_rate": 2.3337227523852337e-06, "loss": 0.5027, "step": 3197 }, { "epoch": 0.69, "grad_norm": 0.3347227871417999, "learning_rate": 2.3307717761730745e-06, "loss": 0.4677, "step": 3198 }, { "epoch": 0.69, "grad_norm": 0.14517731964588165, "learning_rate": 2.3278220997546947e-06, "loss": 0.477, "step": 3199 }, { "epoch": 0.69, "grad_norm": 0.15198582410812378, "learning_rate": 2.3248737245664575e-06, "loss": 0.4762, "step": 3200 }, { "epoch": 0.69, "grad_norm": 0.14821645617485046, "learning_rate": 2.3219266520440833e-06, "loss": 0.5042, "step": 3201 }, { "epoch": 0.69, "grad_norm": 0.14736728370189667, "learning_rate": 2.318980883622668e-06, "loss": 0.542, "step": 3202 }, { "epoch": 0.69, "grad_norm": 0.1347406953573227, "learning_rate": 2.3160364207366687e-06, "loss": 0.5291, "step": 3203 }, { "epoch": 0.69, "grad_norm": 0.1243584007024765, "learning_rate": 2.313093264819903e-06, "loss": 0.5376, "step": 3204 }, { "epoch": 0.69, "grad_norm": 0.1613331139087677, "learning_rate": 2.310151417305558e-06, "loss": 0.517, "step": 3205 }, { "epoch": 0.69, "grad_norm": 0.16766297817230225, "learning_rate": 2.3072108796261766e-06, "loss": 0.4946, "step": 3206 }, { "epoch": 0.69, "grad_norm": 0.161861851811409, "learning_rate": 2.3042716532136718e-06, "loss": 0.4984, "step": 3207 }, { "epoch": 0.69, "grad_norm": 0.13287605345249176, "learning_rate": 2.301333739499312e-06, "loss": 0.4903, "step": 3208 }, { "epoch": 0.69, "grad_norm": 0.1508372724056244, "learning_rate": 2.2983971399137302e-06, "loss": 0.5094, "step": 3209 }, { "epoch": 0.69, "grad_norm": 0.11505939811468124, "learning_rate": 2.2954618558869194e-06, "loss": 0.4829, "step": 3210 }, { "epoch": 0.69, "grad_norm": 0.19699527323246002, "learning_rate": 2.2925278888482273e-06, "loss": 0.508, "step": 3211 }, { "epoch": 0.69, "grad_norm": 0.14467853307724, "learning_rate": 2.2895952402263642e-06, "loss": 0.508, "step": 3212 }, { "epoch": 0.69, "grad_norm": 0.13654360175132751, "learning_rate": 2.286663911449401e-06, "loss": 0.4768, "step": 3213 }, { "epoch": 0.69, "grad_norm": 0.18661533296108246, "learning_rate": 2.283733903944756e-06, "loss": 0.5364, "step": 3214 }, { "epoch": 0.69, "grad_norm": 0.1598883867263794, "learning_rate": 2.280805219139219e-06, "loss": 0.5225, "step": 3215 }, { "epoch": 0.69, "grad_norm": 0.174927219748497, "learning_rate": 2.2778778584589214e-06, "loss": 0.468, "step": 3216 }, { "epoch": 0.69, "grad_norm": 0.13632676005363464, "learning_rate": 2.274951823329358e-06, "loss": 0.5381, "step": 3217 }, { "epoch": 0.69, "grad_norm": 0.1499364972114563, "learning_rate": 2.272027115175377e-06, "loss": 0.5137, "step": 3218 }, { "epoch": 0.69, "grad_norm": 0.1480303555727005, "learning_rate": 2.2691037354211767e-06, "loss": 0.4381, "step": 3219 }, { "epoch": 0.69, "grad_norm": 0.13622407615184784, "learning_rate": 2.2661816854903117e-06, "loss": 0.556, "step": 3220 }, { "epoch": 0.69, "grad_norm": 0.15215899050235748, "learning_rate": 2.2632609668056906e-06, "loss": 0.5208, "step": 3221 }, { "epoch": 0.69, "grad_norm": 0.15400218963623047, "learning_rate": 2.2603415807895718e-06, "loss": 0.5073, "step": 3222 }, { "epoch": 0.69, "grad_norm": 0.1677476465702057, "learning_rate": 2.257423528863562e-06, "loss": 0.4861, "step": 3223 }, { "epoch": 0.69, "grad_norm": 0.1435810774564743, "learning_rate": 2.254506812448622e-06, "loss": 0.5095, "step": 3224 }, { "epoch": 0.69, "grad_norm": 0.18489789962768555, "learning_rate": 2.2515914329650636e-06, "loss": 0.5538, "step": 3225 }, { "epoch": 0.69, "grad_norm": 0.17401094734668732, "learning_rate": 2.2486773918325394e-06, "loss": 0.503, "step": 3226 }, { "epoch": 0.7, "grad_norm": 0.13293685019016266, "learning_rate": 2.2457646904700632e-06, "loss": 0.524, "step": 3227 }, { "epoch": 0.7, "grad_norm": 0.1453235000371933, "learning_rate": 2.242853330295984e-06, "loss": 0.5121, "step": 3228 }, { "epoch": 0.7, "grad_norm": 0.1937217265367508, "learning_rate": 2.239943312728004e-06, "loss": 0.5165, "step": 3229 }, { "epoch": 0.7, "grad_norm": 0.1602119654417038, "learning_rate": 2.2370346391831737e-06, "loss": 0.5076, "step": 3230 }, { "epoch": 0.7, "grad_norm": 0.12913572788238525, "learning_rate": 2.2341273110778817e-06, "loss": 0.5258, "step": 3231 }, { "epoch": 0.7, "grad_norm": 0.16789868474006653, "learning_rate": 2.231221329827867e-06, "loss": 0.5211, "step": 3232 }, { "epoch": 0.7, "grad_norm": 0.2248123735189438, "learning_rate": 2.228316696848212e-06, "loss": 0.5348, "step": 3233 }, { "epoch": 0.7, "grad_norm": 0.15316419303417206, "learning_rate": 2.225413413553341e-06, "loss": 0.5302, "step": 3234 }, { "epoch": 0.7, "grad_norm": 0.13006356358528137, "learning_rate": 2.222511481357026e-06, "loss": 0.5373, "step": 3235 }, { "epoch": 0.7, "grad_norm": 0.14037051796913147, "learning_rate": 2.219610901672371e-06, "loss": 0.5641, "step": 3236 }, { "epoch": 0.7, "grad_norm": 0.17289666831493378, "learning_rate": 2.216711675911833e-06, "loss": 0.5221, "step": 3237 }, { "epoch": 0.7, "grad_norm": 0.1685284972190857, "learning_rate": 2.2138138054871993e-06, "loss": 0.5341, "step": 3238 }, { "epoch": 0.7, "grad_norm": 0.14729037880897522, "learning_rate": 2.2109172918096034e-06, "loss": 0.5447, "step": 3239 }, { "epoch": 0.7, "grad_norm": 0.17818932235240936, "learning_rate": 2.208022136289521e-06, "loss": 0.5437, "step": 3240 }, { "epoch": 0.7, "grad_norm": 0.14237408339977264, "learning_rate": 2.205128340336758e-06, "loss": 0.5103, "step": 3241 }, { "epoch": 0.7, "grad_norm": 0.17500755190849304, "learning_rate": 2.2022359053604654e-06, "loss": 0.5444, "step": 3242 }, { "epoch": 0.7, "grad_norm": 0.16034772992134094, "learning_rate": 2.199344832769125e-06, "loss": 0.4989, "step": 3243 }, { "epoch": 0.7, "grad_norm": 0.18941017985343933, "learning_rate": 2.1964551239705604e-06, "loss": 0.5812, "step": 3244 }, { "epoch": 0.7, "grad_norm": 0.19451190531253815, "learning_rate": 2.1935667803719307e-06, "loss": 0.4824, "step": 3245 }, { "epoch": 0.7, "grad_norm": 0.15695609152317047, "learning_rate": 2.1906798033797276e-06, "loss": 0.6157, "step": 3246 }, { "epoch": 0.7, "grad_norm": 0.1591734141111374, "learning_rate": 2.1877941943997817e-06, "loss": 0.5586, "step": 3247 }, { "epoch": 0.7, "grad_norm": 0.15630222856998444, "learning_rate": 2.1849099548372492e-06, "loss": 0.5194, "step": 3248 }, { "epoch": 0.7, "grad_norm": 0.16930875182151794, "learning_rate": 2.18202708609663e-06, "loss": 0.4618, "step": 3249 }, { "epoch": 0.7, "grad_norm": 0.12013290822505951, "learning_rate": 2.179145589581747e-06, "loss": 0.5097, "step": 3250 }, { "epoch": 0.7, "grad_norm": 0.125696063041687, "learning_rate": 2.1762654666957606e-06, "loss": 0.5076, "step": 3251 }, { "epoch": 0.7, "grad_norm": 0.1357826292514801, "learning_rate": 2.1733867188411606e-06, "loss": 0.5084, "step": 3252 }, { "epoch": 0.7, "grad_norm": 0.14690843224525452, "learning_rate": 2.170509347419768e-06, "loss": 0.5231, "step": 3253 }, { "epoch": 0.7, "grad_norm": 0.14461292326450348, "learning_rate": 2.167633353832734e-06, "loss": 0.5063, "step": 3254 }, { "epoch": 0.7, "grad_norm": 0.12568648159503937, "learning_rate": 2.1647587394805353e-06, "loss": 0.4546, "step": 3255 }, { "epoch": 0.7, "grad_norm": 0.15997205674648285, "learning_rate": 2.1618855057629804e-06, "loss": 0.5882, "step": 3256 }, { "epoch": 0.7, "grad_norm": 0.18054303526878357, "learning_rate": 2.159013654079205e-06, "loss": 0.4966, "step": 3257 }, { "epoch": 0.7, "grad_norm": 0.15831291675567627, "learning_rate": 2.156143185827671e-06, "loss": 0.5021, "step": 3258 }, { "epoch": 0.7, "grad_norm": 0.2396220564842224, "learning_rate": 2.153274102406169e-06, "loss": 0.5147, "step": 3259 }, { "epoch": 0.7, "grad_norm": 0.13623584806919098, "learning_rate": 2.1504064052118095e-06, "loss": 0.4903, "step": 3260 }, { "epoch": 0.7, "grad_norm": 0.18768242001533508, "learning_rate": 2.1475400956410337e-06, "loss": 0.5416, "step": 3261 }, { "epoch": 0.7, "grad_norm": 0.15971659123897552, "learning_rate": 2.144675175089606e-06, "loss": 0.5072, "step": 3262 }, { "epoch": 0.7, "grad_norm": 0.20064754784107208, "learning_rate": 2.1418116449526117e-06, "loss": 0.4895, "step": 3263 }, { "epoch": 0.7, "grad_norm": 0.1600414365530014, "learning_rate": 2.1389495066244613e-06, "loss": 0.4434, "step": 3264 }, { "epoch": 0.7, "grad_norm": 0.17641644179821014, "learning_rate": 2.136088761498888e-06, "loss": 0.5195, "step": 3265 }, { "epoch": 0.7, "grad_norm": 0.16606129705905914, "learning_rate": 2.1332294109689446e-06, "loss": 0.4901, "step": 3266 }, { "epoch": 0.7, "grad_norm": 0.16686317324638367, "learning_rate": 2.1303714564270086e-06, "loss": 0.5201, "step": 3267 }, { "epoch": 0.7, "grad_norm": 0.14948517084121704, "learning_rate": 2.127514899264771e-06, "loss": 0.5022, "step": 3268 }, { "epoch": 0.7, "grad_norm": 0.21814821660518646, "learning_rate": 2.1246597408732493e-06, "loss": 0.5412, "step": 3269 }, { "epoch": 0.7, "grad_norm": 0.16125313937664032, "learning_rate": 2.1218059826427727e-06, "loss": 0.4777, "step": 3270 }, { "epoch": 0.7, "grad_norm": 0.15708723664283752, "learning_rate": 2.118953625962998e-06, "loss": 0.4976, "step": 3271 }, { "epoch": 0.7, "grad_norm": 0.14696088433265686, "learning_rate": 2.1161026722228932e-06, "loss": 0.4869, "step": 3272 }, { "epoch": 0.71, "grad_norm": 0.173833429813385, "learning_rate": 2.1132531228107416e-06, "loss": 0.5291, "step": 3273 }, { "epoch": 0.71, "grad_norm": 0.17601168155670166, "learning_rate": 2.110404979114149e-06, "loss": 0.53, "step": 3274 }, { "epoch": 0.71, "grad_norm": 0.1788867861032486, "learning_rate": 2.1075582425200286e-06, "loss": 0.5061, "step": 3275 }, { "epoch": 0.71, "grad_norm": 0.15546058118343353, "learning_rate": 2.104712914414615e-06, "loss": 0.5317, "step": 3276 }, { "epoch": 0.71, "grad_norm": 0.1524634212255478, "learning_rate": 2.101868996183454e-06, "loss": 0.4956, "step": 3277 }, { "epoch": 0.71, "grad_norm": 0.123813197016716, "learning_rate": 2.0990264892114067e-06, "loss": 0.5138, "step": 3278 }, { "epoch": 0.71, "grad_norm": 0.16139627993106842, "learning_rate": 2.0961853948826466e-06, "loss": 0.5739, "step": 3279 }, { "epoch": 0.71, "grad_norm": 0.17389854788780212, "learning_rate": 2.093345714580656e-06, "loss": 0.5025, "step": 3280 }, { "epoch": 0.71, "grad_norm": 0.12925244867801666, "learning_rate": 2.0905074496882333e-06, "loss": 0.495, "step": 3281 }, { "epoch": 0.71, "grad_norm": 0.16095152497291565, "learning_rate": 2.0876706015874816e-06, "loss": 0.5108, "step": 3282 }, { "epoch": 0.71, "grad_norm": 0.1847510039806366, "learning_rate": 2.0848351716598227e-06, "loss": 0.5201, "step": 3283 }, { "epoch": 0.71, "grad_norm": 0.20535904169082642, "learning_rate": 2.0820011612859825e-06, "loss": 0.4862, "step": 3284 }, { "epoch": 0.71, "grad_norm": 0.14182107150554657, "learning_rate": 2.0791685718459936e-06, "loss": 0.5464, "step": 3285 }, { "epoch": 0.71, "grad_norm": 0.12854298949241638, "learning_rate": 2.076337404719203e-06, "loss": 0.47, "step": 3286 }, { "epoch": 0.71, "grad_norm": 0.1679810881614685, "learning_rate": 2.073507661284257e-06, "loss": 0.5748, "step": 3287 }, { "epoch": 0.71, "grad_norm": 0.13592375814914703, "learning_rate": 2.0706793429191156e-06, "loss": 0.5212, "step": 3288 }, { "epoch": 0.71, "grad_norm": 0.18078750371932983, "learning_rate": 2.0678524510010416e-06, "loss": 0.5825, "step": 3289 }, { "epoch": 0.71, "grad_norm": 0.15121810138225555, "learning_rate": 2.0650269869066048e-06, "loss": 0.5514, "step": 3290 }, { "epoch": 0.71, "grad_norm": 0.1346854269504547, "learning_rate": 2.0622029520116798e-06, "loss": 0.5185, "step": 3291 }, { "epoch": 0.71, "grad_norm": 0.17876240611076355, "learning_rate": 2.0593803476914407e-06, "loss": 0.5203, "step": 3292 }, { "epoch": 0.71, "grad_norm": 0.13062328100204468, "learning_rate": 2.0565591753203713e-06, "loss": 0.4912, "step": 3293 }, { "epoch": 0.71, "grad_norm": 0.14382170140743256, "learning_rate": 2.053739436272256e-06, "loss": 0.5206, "step": 3294 }, { "epoch": 0.71, "grad_norm": 0.1761350929737091, "learning_rate": 2.0509211319201753e-06, "loss": 0.5529, "step": 3295 }, { "epoch": 0.71, "grad_norm": 0.14372633397579193, "learning_rate": 2.0481042636365243e-06, "loss": 0.5006, "step": 3296 }, { "epoch": 0.71, "grad_norm": 0.17832966148853302, "learning_rate": 2.045288832792985e-06, "loss": 0.5626, "step": 3297 }, { "epoch": 0.71, "grad_norm": 0.1652042418718338, "learning_rate": 2.0424748407605468e-06, "loss": 0.4809, "step": 3298 }, { "epoch": 0.71, "grad_norm": 0.16764576733112335, "learning_rate": 2.0396622889094984e-06, "loss": 0.5232, "step": 3299 }, { "epoch": 0.71, "grad_norm": 0.16923469305038452, "learning_rate": 2.036851178609423e-06, "loss": 0.5091, "step": 3300 }, { "epoch": 0.71, "grad_norm": 0.18054278194904327, "learning_rate": 2.0340415112292065e-06, "loss": 0.5236, "step": 3301 }, { "epoch": 0.71, "grad_norm": 0.18616452813148499, "learning_rate": 2.0312332881370294e-06, "loss": 0.486, "step": 3302 }, { "epoch": 0.71, "grad_norm": 0.13819892704486847, "learning_rate": 2.0284265107003715e-06, "loss": 0.5546, "step": 3303 }, { "epoch": 0.71, "grad_norm": 0.17926816642284393, "learning_rate": 2.0256211802860044e-06, "loss": 0.5188, "step": 3304 }, { "epoch": 0.71, "grad_norm": 0.17235776782035828, "learning_rate": 2.0228172982599974e-06, "loss": 0.5168, "step": 3305 }, { "epoch": 0.71, "grad_norm": 0.1339789479970932, "learning_rate": 2.0200148659877185e-06, "loss": 0.5189, "step": 3306 }, { "epoch": 0.71, "grad_norm": 0.1831931173801422, "learning_rate": 2.017213884833821e-06, "loss": 0.5357, "step": 3307 }, { "epoch": 0.71, "grad_norm": 0.1411074548959732, "learning_rate": 2.014414356162258e-06, "loss": 0.508, "step": 3308 }, { "epoch": 0.71, "grad_norm": 0.17658570408821106, "learning_rate": 2.0116162813362742e-06, "loss": 0.4947, "step": 3309 }, { "epoch": 0.71, "grad_norm": 0.15399529039859772, "learning_rate": 2.0088196617184065e-06, "loss": 0.4912, "step": 3310 }, { "epoch": 0.71, "grad_norm": 0.14932404458522797, "learning_rate": 2.0060244986704834e-06, "loss": 0.5249, "step": 3311 }, { "epoch": 0.71, "grad_norm": 0.1162419468164444, "learning_rate": 2.00323079355362e-06, "loss": 0.4978, "step": 3312 }, { "epoch": 0.71, "grad_norm": 0.16213734447956085, "learning_rate": 2.000438547728226e-06, "loss": 0.5094, "step": 3313 }, { "epoch": 0.71, "grad_norm": 0.1504596322774887, "learning_rate": 1.997647762554e-06, "loss": 0.4843, "step": 3314 }, { "epoch": 0.71, "grad_norm": 0.14448657631874084, "learning_rate": 1.994858439389929e-06, "loss": 0.5749, "step": 3315 }, { "epoch": 0.71, "grad_norm": 0.16685092449188232, "learning_rate": 1.992070579594288e-06, "loss": 0.4815, "step": 3316 }, { "epoch": 0.71, "grad_norm": 0.17471322417259216, "learning_rate": 1.9892841845246357e-06, "loss": 0.5098, "step": 3317 }, { "epoch": 0.71, "grad_norm": 0.16297274827957153, "learning_rate": 1.9864992555378256e-06, "loss": 0.5032, "step": 3318 }, { "epoch": 0.71, "grad_norm": 0.15113520622253418, "learning_rate": 1.983715793989987e-06, "loss": 0.5265, "step": 3319 }, { "epoch": 0.72, "grad_norm": 0.12620225548744202, "learning_rate": 1.9809338012365438e-06, "loss": 0.4534, "step": 3320 }, { "epoch": 0.72, "grad_norm": 0.16422003507614136, "learning_rate": 1.9781532786322005e-06, "loss": 0.5404, "step": 3321 }, { "epoch": 0.72, "grad_norm": 0.19491100311279297, "learning_rate": 1.9753742275309456e-06, "loss": 0.5811, "step": 3322 }, { "epoch": 0.72, "grad_norm": 0.21426242589950562, "learning_rate": 1.9725966492860536e-06, "loss": 0.5046, "step": 3323 }, { "epoch": 0.72, "grad_norm": 0.17637012898921967, "learning_rate": 1.9698205452500772e-06, "loss": 0.5513, "step": 3324 }, { "epoch": 0.72, "grad_norm": 0.14277128875255585, "learning_rate": 1.9670459167748552e-06, "loss": 0.5306, "step": 3325 }, { "epoch": 0.72, "grad_norm": 0.1377527266740799, "learning_rate": 1.9642727652115056e-06, "loss": 0.4497, "step": 3326 }, { "epoch": 0.72, "grad_norm": 0.1276976764202118, "learning_rate": 1.9615010919104296e-06, "loss": 0.5035, "step": 3327 }, { "epoch": 0.72, "grad_norm": 0.13921838998794556, "learning_rate": 1.9587308982213077e-06, "loss": 0.5545, "step": 3328 }, { "epoch": 0.72, "grad_norm": 0.16659273207187653, "learning_rate": 1.9559621854930968e-06, "loss": 0.541, "step": 3329 }, { "epoch": 0.72, "grad_norm": 0.17996352910995483, "learning_rate": 1.953194955074038e-06, "loss": 0.5388, "step": 3330 }, { "epoch": 0.72, "grad_norm": 0.15658895671367645, "learning_rate": 1.9504292083116442e-06, "loss": 0.4891, "step": 3331 }, { "epoch": 0.72, "grad_norm": 0.25563114881515503, "learning_rate": 1.9476649465527116e-06, "loss": 0.4923, "step": 3332 }, { "epoch": 0.72, "grad_norm": 0.12000484019517899, "learning_rate": 1.94490217114331e-06, "loss": 0.464, "step": 3333 }, { "epoch": 0.72, "grad_norm": 0.1276124268770218, "learning_rate": 1.942140883428788e-06, "loss": 0.4568, "step": 3334 }, { "epoch": 0.72, "grad_norm": 0.1600504219532013, "learning_rate": 1.939381084753769e-06, "loss": 0.5439, "step": 3335 }, { "epoch": 0.72, "grad_norm": 0.135470911860466, "learning_rate": 1.936622776462147e-06, "loss": 0.518, "step": 3336 }, { "epoch": 0.72, "grad_norm": 0.15857979655265808, "learning_rate": 1.933865959897096e-06, "loss": 0.5453, "step": 3337 }, { "epoch": 0.72, "grad_norm": 0.17830796539783478, "learning_rate": 1.931110636401062e-06, "loss": 0.5632, "step": 3338 }, { "epoch": 0.72, "grad_norm": 0.1584271937608719, "learning_rate": 1.9283568073157592e-06, "loss": 0.5207, "step": 3339 }, { "epoch": 0.72, "grad_norm": 0.1628987342119217, "learning_rate": 1.925604473982185e-06, "loss": 0.5068, "step": 3340 }, { "epoch": 0.72, "grad_norm": 0.18597134947776794, "learning_rate": 1.922853637740596e-06, "loss": 0.532, "step": 3341 }, { "epoch": 0.72, "grad_norm": 0.18993283808231354, "learning_rate": 1.9201042999305276e-06, "loss": 0.5386, "step": 3342 }, { "epoch": 0.72, "grad_norm": 0.13225135207176208, "learning_rate": 1.9173564618907843e-06, "loss": 0.5157, "step": 3343 }, { "epoch": 0.72, "grad_norm": 0.1662367582321167, "learning_rate": 1.914610124959437e-06, "loss": 0.5148, "step": 3344 }, { "epoch": 0.72, "grad_norm": 0.1778230369091034, "learning_rate": 1.9118652904738276e-06, "loss": 0.4604, "step": 3345 }, { "epoch": 0.72, "grad_norm": 0.17423133552074432, "learning_rate": 1.9091219597705694e-06, "loss": 0.5607, "step": 3346 }, { "epoch": 0.72, "grad_norm": 0.15368233621120453, "learning_rate": 1.9063801341855392e-06, "loss": 0.511, "step": 3347 }, { "epoch": 0.72, "grad_norm": 0.18018236756324768, "learning_rate": 1.9036398150538842e-06, "loss": 0.5437, "step": 3348 }, { "epoch": 0.72, "grad_norm": 0.15523254871368408, "learning_rate": 1.9009010037100133e-06, "loss": 0.5262, "step": 3349 }, { "epoch": 0.72, "grad_norm": 0.1918708235025406, "learning_rate": 1.898163701487607e-06, "loss": 0.5454, "step": 3350 }, { "epoch": 0.72, "grad_norm": 0.16382472217082977, "learning_rate": 1.8954279097196032e-06, "loss": 0.5145, "step": 3351 }, { "epoch": 0.72, "grad_norm": 0.1837274134159088, "learning_rate": 1.8926936297382148e-06, "loss": 0.5496, "step": 3352 }, { "epoch": 0.72, "grad_norm": 0.15243403613567352, "learning_rate": 1.8899608628749116e-06, "loss": 0.499, "step": 3353 }, { "epoch": 0.72, "grad_norm": 0.22877193987369537, "learning_rate": 1.8872296104604255e-06, "loss": 0.5414, "step": 3354 }, { "epoch": 0.72, "grad_norm": 0.14050383865833282, "learning_rate": 1.8844998738247562e-06, "loss": 0.5615, "step": 3355 }, { "epoch": 0.72, "grad_norm": 0.18556596338748932, "learning_rate": 1.8817716542971593e-06, "loss": 0.5232, "step": 3356 }, { "epoch": 0.72, "grad_norm": 0.16367103159427643, "learning_rate": 1.8790449532061556e-06, "loss": 0.5193, "step": 3357 }, { "epoch": 0.72, "grad_norm": 0.18238645792007446, "learning_rate": 1.8763197718795262e-06, "loss": 0.5631, "step": 3358 }, { "epoch": 0.72, "grad_norm": 0.16355136036872864, "learning_rate": 1.8735961116443118e-06, "loss": 0.5209, "step": 3359 }, { "epoch": 0.72, "grad_norm": 0.13203002512454987, "learning_rate": 1.8708739738268133e-06, "loss": 0.5164, "step": 3360 }, { "epoch": 0.72, "grad_norm": 0.14900876581668854, "learning_rate": 1.8681533597525859e-06, "loss": 0.5436, "step": 3361 }, { "epoch": 0.72, "grad_norm": 0.16481418907642365, "learning_rate": 1.865434270746449e-06, "loss": 0.4923, "step": 3362 }, { "epoch": 0.72, "grad_norm": 0.1458396166563034, "learning_rate": 1.8627167081324732e-06, "loss": 0.4808, "step": 3363 }, { "epoch": 0.72, "grad_norm": 0.15053486824035645, "learning_rate": 1.8600006732339892e-06, "loss": 0.4947, "step": 3364 }, { "epoch": 0.72, "grad_norm": 0.1663227528333664, "learning_rate": 1.8572861673735886e-06, "loss": 0.5284, "step": 3365 }, { "epoch": 0.73, "grad_norm": 0.17765146493911743, "learning_rate": 1.8545731918731074e-06, "loss": 0.5479, "step": 3366 }, { "epoch": 0.73, "grad_norm": 0.17067447304725647, "learning_rate": 1.8518617480536472e-06, "loss": 0.5336, "step": 3367 }, { "epoch": 0.73, "grad_norm": 0.15725915133953094, "learning_rate": 1.8491518372355538e-06, "loss": 0.5474, "step": 3368 }, { "epoch": 0.73, "grad_norm": 0.17181500792503357, "learning_rate": 1.8464434607384345e-06, "loss": 0.541, "step": 3369 }, { "epoch": 0.73, "grad_norm": 0.15348359942436218, "learning_rate": 1.8437366198811463e-06, "loss": 0.4621, "step": 3370 }, { "epoch": 0.73, "grad_norm": 0.1679931879043579, "learning_rate": 1.8410313159817982e-06, "loss": 0.4791, "step": 3371 }, { "epoch": 0.73, "grad_norm": 0.14566868543624878, "learning_rate": 1.838327550357753e-06, "loss": 0.4671, "step": 3372 }, { "epoch": 0.73, "grad_norm": 0.12078259885311127, "learning_rate": 1.83562532432562e-06, "loss": 0.5072, "step": 3373 }, { "epoch": 0.73, "grad_norm": 0.1410682201385498, "learning_rate": 1.8329246392012622e-06, "loss": 0.5099, "step": 3374 }, { "epoch": 0.73, "grad_norm": 0.1579367220401764, "learning_rate": 1.8302254962997934e-06, "loss": 0.5076, "step": 3375 }, { "epoch": 0.73, "grad_norm": 0.13609954714775085, "learning_rate": 1.8275278969355714e-06, "loss": 0.4894, "step": 3376 }, { "epoch": 0.73, "grad_norm": 0.14277243614196777, "learning_rate": 1.8248318424222071e-06, "loss": 0.5272, "step": 3377 }, { "epoch": 0.73, "grad_norm": 0.20254714787006378, "learning_rate": 1.8221373340725568e-06, "loss": 0.5519, "step": 3378 }, { "epoch": 0.73, "grad_norm": 0.14397798478603363, "learning_rate": 1.8194443731987254e-06, "loss": 0.56, "step": 3379 }, { "epoch": 0.73, "grad_norm": 0.1646426022052765, "learning_rate": 1.8167529611120648e-06, "loss": 0.5418, "step": 3380 }, { "epoch": 0.73, "grad_norm": 0.15328004956245422, "learning_rate": 1.8140630991231683e-06, "loss": 0.515, "step": 3381 }, { "epoch": 0.73, "grad_norm": 0.14614619314670563, "learning_rate": 1.811374788541878e-06, "loss": 0.461, "step": 3382 }, { "epoch": 0.73, "grad_norm": 0.14326471090316772, "learning_rate": 1.808688030677281e-06, "loss": 0.5391, "step": 3383 }, { "epoch": 0.73, "grad_norm": 0.16172616183757782, "learning_rate": 1.8060028268377088e-06, "loss": 0.4593, "step": 3384 }, { "epoch": 0.73, "grad_norm": 0.16830769181251526, "learning_rate": 1.8033191783307309e-06, "loss": 0.5275, "step": 3385 }, { "epoch": 0.73, "grad_norm": 0.16044098138809204, "learning_rate": 1.8006370864631644e-06, "loss": 0.4947, "step": 3386 }, { "epoch": 0.73, "grad_norm": 0.16456788778305054, "learning_rate": 1.7979565525410691e-06, "loss": 0.5459, "step": 3387 }, { "epoch": 0.73, "grad_norm": 0.18591398000717163, "learning_rate": 1.7952775778697418e-06, "loss": 0.5412, "step": 3388 }, { "epoch": 0.73, "grad_norm": 0.16051456332206726, "learning_rate": 1.7926001637537222e-06, "loss": 0.5081, "step": 3389 }, { "epoch": 0.73, "grad_norm": 0.1629684865474701, "learning_rate": 1.7899243114967918e-06, "loss": 0.5292, "step": 3390 }, { "epoch": 0.73, "grad_norm": 0.171781525015831, "learning_rate": 1.7872500224019696e-06, "loss": 0.5031, "step": 3391 }, { "epoch": 0.73, "grad_norm": 0.14067816734313965, "learning_rate": 1.7845772977715148e-06, "loss": 0.5218, "step": 3392 }, { "epoch": 0.73, "grad_norm": 0.1485803723335266, "learning_rate": 1.7819061389069208e-06, "loss": 0.5542, "step": 3393 }, { "epoch": 0.73, "grad_norm": 0.13566631078720093, "learning_rate": 1.7792365471089252e-06, "loss": 0.4824, "step": 3394 }, { "epoch": 0.73, "grad_norm": 0.17010356485843658, "learning_rate": 1.7765685236774937e-06, "loss": 0.4645, "step": 3395 }, { "epoch": 0.73, "grad_norm": 0.16269443929195404, "learning_rate": 1.773902069911838e-06, "loss": 0.4747, "step": 3396 }, { "epoch": 0.73, "grad_norm": 0.20955312252044678, "learning_rate": 1.7712371871104012e-06, "loss": 0.4571, "step": 3397 }, { "epoch": 0.73, "grad_norm": 0.13663814961910248, "learning_rate": 1.7685738765708576e-06, "loss": 0.4574, "step": 3398 }, { "epoch": 0.73, "grad_norm": 0.15181677043437958, "learning_rate": 1.765912139590123e-06, "loss": 0.5594, "step": 3399 }, { "epoch": 0.73, "grad_norm": 0.15845806896686554, "learning_rate": 1.7632519774643391e-06, "loss": 0.4867, "step": 3400 }, { "epoch": 0.73, "grad_norm": 0.18000967800617218, "learning_rate": 1.760593391488888e-06, "loss": 0.486, "step": 3401 }, { "epoch": 0.73, "grad_norm": 0.2162676602602005, "learning_rate": 1.7579363829583794e-06, "loss": 0.49, "step": 3402 }, { "epoch": 0.73, "grad_norm": 0.1404118835926056, "learning_rate": 1.7552809531666582e-06, "loss": 0.4929, "step": 3403 }, { "epoch": 0.73, "grad_norm": 0.1907334327697754, "learning_rate": 1.7526271034067993e-06, "loss": 0.5793, "step": 3404 }, { "epoch": 0.73, "grad_norm": 0.16548538208007812, "learning_rate": 1.749974834971106e-06, "loss": 0.5448, "step": 3405 }, { "epoch": 0.73, "grad_norm": 0.12368584424257278, "learning_rate": 1.7473241491511139e-06, "loss": 0.4833, "step": 3406 }, { "epoch": 0.73, "grad_norm": 0.1966637670993805, "learning_rate": 1.7446750472375879e-06, "loss": 0.5532, "step": 3407 }, { "epoch": 0.73, "grad_norm": 0.13835598528385162, "learning_rate": 1.7420275305205214e-06, "loss": 0.5279, "step": 3408 }, { "epoch": 0.73, "grad_norm": 0.15932750701904297, "learning_rate": 1.7393816002891368e-06, "loss": 0.5535, "step": 3409 }, { "epoch": 0.73, "grad_norm": 0.18594208359718323, "learning_rate": 1.7367372578318797e-06, "loss": 0.4495, "step": 3410 }, { "epoch": 0.73, "grad_norm": 0.17896361649036407, "learning_rate": 1.7340945044364293e-06, "loss": 0.5242, "step": 3411 }, { "epoch": 0.73, "grad_norm": 0.13597969710826874, "learning_rate": 1.7314533413896833e-06, "loss": 0.443, "step": 3412 }, { "epoch": 0.74, "grad_norm": 0.15737301111221313, "learning_rate": 1.7288137699777714e-06, "loss": 0.5596, "step": 3413 }, { "epoch": 0.74, "grad_norm": 0.14145611226558685, "learning_rate": 1.7261757914860456e-06, "loss": 0.5109, "step": 3414 }, { "epoch": 0.74, "grad_norm": 0.17835848033428192, "learning_rate": 1.7235394071990824e-06, "loss": 0.5036, "step": 3415 }, { "epoch": 0.74, "grad_norm": 0.21317198872566223, "learning_rate": 1.720904618400684e-06, "loss": 0.473, "step": 3416 }, { "epoch": 0.74, "grad_norm": 0.13540047407150269, "learning_rate": 1.7182714263738692e-06, "loss": 0.538, "step": 3417 }, { "epoch": 0.74, "grad_norm": 0.21162466704845428, "learning_rate": 1.7156398324008871e-06, "loss": 0.5771, "step": 3418 }, { "epoch": 0.74, "grad_norm": 0.16375568509101868, "learning_rate": 1.7130098377632065e-06, "loss": 0.5353, "step": 3419 }, { "epoch": 0.74, "grad_norm": 0.15535147488117218, "learning_rate": 1.7103814437415105e-06, "loss": 0.4993, "step": 3420 }, { "epoch": 0.74, "grad_norm": 0.17422151565551758, "learning_rate": 1.7077546516157156e-06, "loss": 0.5527, "step": 3421 }, { "epoch": 0.74, "grad_norm": 0.15542204678058624, "learning_rate": 1.7051294626649462e-06, "loss": 0.5521, "step": 3422 }, { "epoch": 0.74, "grad_norm": 0.16863486170768738, "learning_rate": 1.702505878167553e-06, "loss": 0.5245, "step": 3423 }, { "epoch": 0.74, "grad_norm": 0.17205984890460968, "learning_rate": 1.6998838994011041e-06, "loss": 0.5189, "step": 3424 }, { "epoch": 0.74, "grad_norm": 0.15427836775779724, "learning_rate": 1.6972635276423815e-06, "loss": 0.5309, "step": 3425 }, { "epoch": 0.74, "grad_norm": 0.12143069505691528, "learning_rate": 1.6946447641673907e-06, "loss": 0.4993, "step": 3426 }, { "epoch": 0.74, "grad_norm": 0.15743811428546906, "learning_rate": 1.6920276102513512e-06, "loss": 0.4693, "step": 3427 }, { "epoch": 0.74, "grad_norm": 0.15306471288204193, "learning_rate": 1.6894120671686986e-06, "loss": 0.5164, "step": 3428 }, { "epoch": 0.74, "grad_norm": 0.21616849303245544, "learning_rate": 1.6867981361930864e-06, "loss": 0.5525, "step": 3429 }, { "epoch": 0.74, "grad_norm": 0.1986081451177597, "learning_rate": 1.6841858185973775e-06, "loss": 0.5335, "step": 3430 }, { "epoch": 0.74, "grad_norm": 0.1559258997440338, "learning_rate": 1.681575115653656e-06, "loss": 0.4944, "step": 3431 }, { "epoch": 0.74, "grad_norm": 0.12250448018312454, "learning_rate": 1.6789660286332132e-06, "loss": 0.5096, "step": 3432 }, { "epoch": 0.74, "grad_norm": 0.3296593129634857, "learning_rate": 1.6763585588065579e-06, "loss": 0.5291, "step": 3433 }, { "epoch": 0.74, "grad_norm": 0.15359769761562347, "learning_rate": 1.6737527074434135e-06, "loss": 0.4591, "step": 3434 }, { "epoch": 0.74, "grad_norm": 0.15591566264629364, "learning_rate": 1.6711484758127088e-06, "loss": 0.524, "step": 3435 }, { "epoch": 0.74, "grad_norm": 0.16285593807697296, "learning_rate": 1.6685458651825892e-06, "loss": 0.4952, "step": 3436 }, { "epoch": 0.74, "grad_norm": 0.1407860666513443, "learning_rate": 1.6659448768204062e-06, "loss": 0.436, "step": 3437 }, { "epoch": 0.74, "grad_norm": 0.15365861356258392, "learning_rate": 1.6633455119927256e-06, "loss": 0.5039, "step": 3438 }, { "epoch": 0.74, "grad_norm": 0.18231172859668732, "learning_rate": 1.6607477719653198e-06, "loss": 0.5312, "step": 3439 }, { "epoch": 0.74, "grad_norm": 0.1712518185377121, "learning_rate": 1.658151658003172e-06, "loss": 0.5649, "step": 3440 }, { "epoch": 0.74, "grad_norm": 0.19198983907699585, "learning_rate": 1.6555571713704743e-06, "loss": 0.5381, "step": 3441 }, { "epoch": 0.74, "grad_norm": 0.14253763854503632, "learning_rate": 1.6529643133306212e-06, "loss": 0.545, "step": 3442 }, { "epoch": 0.74, "grad_norm": 0.16237348318099976, "learning_rate": 1.6503730851462208e-06, "loss": 0.5184, "step": 3443 }, { "epoch": 0.74, "grad_norm": 0.1761130690574646, "learning_rate": 1.647783488079081e-06, "loss": 0.547, "step": 3444 }, { "epoch": 0.74, "grad_norm": 0.17866788804531097, "learning_rate": 1.6451955233902206e-06, "loss": 0.5351, "step": 3445 }, { "epoch": 0.74, "grad_norm": 0.176165372133255, "learning_rate": 1.6426091923398619e-06, "loss": 0.4789, "step": 3446 }, { "epoch": 0.74, "grad_norm": 0.21256986260414124, "learning_rate": 1.6400244961874311e-06, "loss": 0.5431, "step": 3447 }, { "epoch": 0.74, "grad_norm": 0.17698679864406586, "learning_rate": 1.6374414361915613e-06, "loss": 0.515, "step": 3448 }, { "epoch": 0.74, "grad_norm": 0.13463236391544342, "learning_rate": 1.6348600136100817e-06, "loss": 0.5694, "step": 3449 }, { "epoch": 0.74, "grad_norm": 0.15754647552967072, "learning_rate": 1.6322802297000306e-06, "loss": 0.5126, "step": 3450 }, { "epoch": 0.74, "grad_norm": 0.19439859688282013, "learning_rate": 1.6297020857176466e-06, "loss": 0.5368, "step": 3451 }, { "epoch": 0.74, "grad_norm": 0.1266242265701294, "learning_rate": 1.6271255829183702e-06, "loss": 0.502, "step": 3452 }, { "epoch": 0.74, "grad_norm": 0.18297268450260162, "learning_rate": 1.6245507225568425e-06, "loss": 0.4904, "step": 3453 }, { "epoch": 0.74, "grad_norm": 0.1342281848192215, "learning_rate": 1.6219775058869019e-06, "loss": 0.4823, "step": 3454 }, { "epoch": 0.74, "grad_norm": 0.17030228674411774, "learning_rate": 1.6194059341615908e-06, "loss": 0.5196, "step": 3455 }, { "epoch": 0.74, "grad_norm": 0.1725773811340332, "learning_rate": 1.6168360086331498e-06, "loss": 0.4785, "step": 3456 }, { "epoch": 0.74, "grad_norm": 0.16261161863803864, "learning_rate": 1.614267730553013e-06, "loss": 0.481, "step": 3457 }, { "epoch": 0.74, "grad_norm": 0.16986827552318573, "learning_rate": 1.6117011011718188e-06, "loss": 0.4874, "step": 3458 }, { "epoch": 0.75, "grad_norm": 0.148764505982399, "learning_rate": 1.6091361217393992e-06, "loss": 0.5044, "step": 3459 }, { "epoch": 0.75, "grad_norm": 0.1262829303741455, "learning_rate": 1.6065727935047837e-06, "loss": 0.5185, "step": 3460 }, { "epoch": 0.75, "grad_norm": 0.1936028003692627, "learning_rate": 1.6040111177161994e-06, "loss": 0.5645, "step": 3461 }, { "epoch": 0.75, "grad_norm": 0.1676550656557083, "learning_rate": 1.6014510956210632e-06, "loss": 0.5394, "step": 3462 }, { "epoch": 0.75, "grad_norm": 0.19447743892669678, "learning_rate": 1.5988927284659921e-06, "loss": 0.5471, "step": 3463 }, { "epoch": 0.75, "grad_norm": 0.14620442688465118, "learning_rate": 1.5963360174967956e-06, "loss": 0.493, "step": 3464 }, { "epoch": 0.75, "grad_norm": 0.1667238175868988, "learning_rate": 1.593780963958479e-06, "loss": 0.5172, "step": 3465 }, { "epoch": 0.75, "grad_norm": 0.14564719796180725, "learning_rate": 1.5912275690952339e-06, "loss": 0.5031, "step": 3466 }, { "epoch": 0.75, "grad_norm": 0.17484711110591888, "learning_rate": 1.5886758341504506e-06, "loss": 0.4841, "step": 3467 }, { "epoch": 0.75, "grad_norm": 0.17965911328792572, "learning_rate": 1.5861257603667106e-06, "loss": 0.5354, "step": 3468 }, { "epoch": 0.75, "grad_norm": 0.16746492683887482, "learning_rate": 1.5835773489857813e-06, "loss": 0.5087, "step": 3469 }, { "epoch": 0.75, "grad_norm": 0.15599916875362396, "learning_rate": 1.581030601248626e-06, "loss": 0.5562, "step": 3470 }, { "epoch": 0.75, "grad_norm": 0.13977332413196564, "learning_rate": 1.5784855183953956e-06, "loss": 0.5003, "step": 3471 }, { "epoch": 0.75, "grad_norm": 0.19692641496658325, "learning_rate": 1.5759421016654314e-06, "loss": 0.4618, "step": 3472 }, { "epoch": 0.75, "grad_norm": 0.19811460375785828, "learning_rate": 1.5734003522972635e-06, "loss": 0.4771, "step": 3473 }, { "epoch": 0.75, "grad_norm": 0.15085352957248688, "learning_rate": 1.570860271528607e-06, "loss": 0.5023, "step": 3474 }, { "epoch": 0.75, "grad_norm": 0.16862472891807556, "learning_rate": 1.5683218605963686e-06, "loss": 0.5323, "step": 3475 }, { "epoch": 0.75, "grad_norm": 0.14913085103034973, "learning_rate": 1.5657851207366359e-06, "loss": 0.5062, "step": 3476 }, { "epoch": 0.75, "grad_norm": 0.17679521441459656, "learning_rate": 1.5632500531846916e-06, "loss": 0.4542, "step": 3477 }, { "epoch": 0.75, "grad_norm": 0.14363497495651245, "learning_rate": 1.5607166591749995e-06, "loss": 0.5322, "step": 3478 }, { "epoch": 0.75, "grad_norm": 0.1939202845096588, "learning_rate": 1.5581849399412047e-06, "loss": 0.5045, "step": 3479 }, { "epoch": 0.75, "grad_norm": 0.15432246029376984, "learning_rate": 1.555654896716144e-06, "loss": 0.5493, "step": 3480 }, { "epoch": 0.75, "grad_norm": 0.14659973978996277, "learning_rate": 1.55312653073183e-06, "loss": 0.5053, "step": 3481 }, { "epoch": 0.75, "grad_norm": 0.14200441539287567, "learning_rate": 1.5505998432194658e-06, "loss": 0.4921, "step": 3482 }, { "epoch": 0.75, "grad_norm": 0.16654643416404724, "learning_rate": 1.5480748354094332e-06, "loss": 0.4844, "step": 3483 }, { "epoch": 0.75, "grad_norm": 0.21012644469738007, "learning_rate": 1.5455515085312984e-06, "loss": 0.5075, "step": 3484 }, { "epoch": 0.75, "grad_norm": 0.20673668384552002, "learning_rate": 1.543029863813808e-06, "loss": 0.5155, "step": 3485 }, { "epoch": 0.75, "grad_norm": 0.16482393443584442, "learning_rate": 1.5405099024848874e-06, "loss": 0.4767, "step": 3486 }, { "epoch": 0.75, "grad_norm": 0.14894434809684753, "learning_rate": 1.5379916257716448e-06, "loss": 0.5139, "step": 3487 }, { "epoch": 0.75, "grad_norm": 0.14717522263526917, "learning_rate": 1.5354750349003694e-06, "loss": 0.5422, "step": 3488 }, { "epoch": 0.75, "grad_norm": 0.1304522007703781, "learning_rate": 1.5329601310965225e-06, "loss": 0.5312, "step": 3489 }, { "epoch": 0.75, "grad_norm": 0.20385202765464783, "learning_rate": 1.5304469155847556e-06, "loss": 0.5567, "step": 3490 }, { "epoch": 0.75, "grad_norm": 0.13958947360515594, "learning_rate": 1.527935389588886e-06, "loss": 0.5514, "step": 3491 }, { "epoch": 0.75, "grad_norm": 0.17612220346927643, "learning_rate": 1.5254255543319168e-06, "loss": 0.4965, "step": 3492 }, { "epoch": 0.75, "grad_norm": 0.1380666047334671, "learning_rate": 1.5229174110360222e-06, "loss": 0.5664, "step": 3493 }, { "epoch": 0.75, "grad_norm": 0.14796659350395203, "learning_rate": 1.5204109609225553e-06, "loss": 0.4855, "step": 3494 }, { "epoch": 0.75, "grad_norm": 0.17148888111114502, "learning_rate": 1.5179062052120459e-06, "loss": 0.4734, "step": 3495 }, { "epoch": 0.75, "grad_norm": 0.15438951551914215, "learning_rate": 1.5154031451241952e-06, "loss": 0.5619, "step": 3496 }, { "epoch": 0.75, "grad_norm": 0.18706923723220825, "learning_rate": 1.5129017818778835e-06, "loss": 0.5614, "step": 3497 }, { "epoch": 0.75, "grad_norm": 0.15529923141002655, "learning_rate": 1.5104021166911582e-06, "loss": 0.5682, "step": 3498 }, { "epoch": 0.75, "grad_norm": 0.21242637932300568, "learning_rate": 1.5079041507812454e-06, "loss": 0.5401, "step": 3499 }, { "epoch": 0.75, "grad_norm": 0.18680711090564728, "learning_rate": 1.5054078853645432e-06, "loss": 0.5004, "step": 3500 }, { "epoch": 0.75, "grad_norm": 0.1526576429605484, "learning_rate": 1.5029133216566172e-06, "loss": 0.4787, "step": 3501 }, { "epoch": 0.75, "grad_norm": 0.16828754544258118, "learning_rate": 1.5004204608722088e-06, "loss": 0.5431, "step": 3502 }, { "epoch": 0.75, "grad_norm": 0.12067432701587677, "learning_rate": 1.4979293042252291e-06, "loss": 0.475, "step": 3503 }, { "epoch": 0.75, "grad_norm": 0.2365456521511078, "learning_rate": 1.495439852928759e-06, "loss": 0.5043, "step": 3504 }, { "epoch": 0.76, "grad_norm": 0.15215608477592468, "learning_rate": 1.492952108195051e-06, "loss": 0.5572, "step": 3505 }, { "epoch": 0.76, "grad_norm": 0.12552374601364136, "learning_rate": 1.4904660712355207e-06, "loss": 0.4765, "step": 3506 }, { "epoch": 0.76, "grad_norm": 0.14761190116405487, "learning_rate": 1.4879817432607573e-06, "loss": 0.5246, "step": 3507 }, { "epoch": 0.76, "grad_norm": 0.18243740499019623, "learning_rate": 1.4854991254805179e-06, "loss": 0.5586, "step": 3508 }, { "epoch": 0.76, "grad_norm": 0.14932124316692352, "learning_rate": 1.4830182191037246e-06, "loss": 0.5113, "step": 3509 }, { "epoch": 0.76, "grad_norm": 0.14290283620357513, "learning_rate": 1.4805390253384683e-06, "loss": 0.5141, "step": 3510 }, { "epoch": 0.76, "grad_norm": 0.1591915637254715, "learning_rate": 1.4780615453920016e-06, "loss": 0.5043, "step": 3511 }, { "epoch": 0.76, "grad_norm": 0.15741455554962158, "learning_rate": 1.4755857804707485e-06, "loss": 0.5195, "step": 3512 }, { "epoch": 0.76, "grad_norm": 0.16539999842643738, "learning_rate": 1.4731117317802923e-06, "loss": 0.5353, "step": 3513 }, { "epoch": 0.76, "grad_norm": 0.13506761193275452, "learning_rate": 1.4706394005253838e-06, "loss": 0.5446, "step": 3514 }, { "epoch": 0.76, "grad_norm": 0.1319400519132614, "learning_rate": 1.4681687879099376e-06, "loss": 0.5075, "step": 3515 }, { "epoch": 0.76, "grad_norm": 0.1385585367679596, "learning_rate": 1.465699895137031e-06, "loss": 0.5429, "step": 3516 }, { "epoch": 0.76, "grad_norm": 0.12562334537506104, "learning_rate": 1.463232723408904e-06, "loss": 0.5114, "step": 3517 }, { "epoch": 0.76, "grad_norm": 0.1627231389284134, "learning_rate": 1.4607672739269552e-06, "loss": 0.4937, "step": 3518 }, { "epoch": 0.76, "grad_norm": 0.18348899483680725, "learning_rate": 1.458303547891749e-06, "loss": 0.5292, "step": 3519 }, { "epoch": 0.76, "grad_norm": 0.17182128131389618, "learning_rate": 1.455841546503009e-06, "loss": 0.5041, "step": 3520 }, { "epoch": 0.76, "grad_norm": 0.1899326741695404, "learning_rate": 1.4533812709596184e-06, "loss": 0.5299, "step": 3521 }, { "epoch": 0.76, "grad_norm": 0.2223159819841385, "learning_rate": 1.450922722459623e-06, "loss": 0.535, "step": 3522 }, { "epoch": 0.76, "grad_norm": 0.16189001500606537, "learning_rate": 1.4484659022002208e-06, "loss": 0.5021, "step": 3523 }, { "epoch": 0.76, "grad_norm": 0.19098880887031555, "learning_rate": 1.446010811377776e-06, "loss": 0.5376, "step": 3524 }, { "epoch": 0.76, "grad_norm": 0.19344013929367065, "learning_rate": 1.4435574511878037e-06, "loss": 0.5651, "step": 3525 }, { "epoch": 0.76, "grad_norm": 0.13735520839691162, "learning_rate": 1.4411058228249824e-06, "loss": 0.5185, "step": 3526 }, { "epoch": 0.76, "grad_norm": 0.15287934243679047, "learning_rate": 1.438655927483143e-06, "loss": 0.538, "step": 3527 }, { "epoch": 0.76, "grad_norm": 0.14649856090545654, "learning_rate": 1.4362077663552754e-06, "loss": 0.4895, "step": 3528 }, { "epoch": 0.76, "grad_norm": 0.171720489859581, "learning_rate": 1.4337613406335244e-06, "loss": 0.5108, "step": 3529 }, { "epoch": 0.76, "grad_norm": 0.15773873031139374, "learning_rate": 1.4313166515091863e-06, "loss": 0.5321, "step": 3530 }, { "epoch": 0.76, "grad_norm": 0.1555633246898651, "learning_rate": 1.428873700172716e-06, "loss": 0.533, "step": 3531 }, { "epoch": 0.76, "grad_norm": 0.15259774029254913, "learning_rate": 1.4264324878137204e-06, "loss": 0.5034, "step": 3532 }, { "epoch": 0.76, "grad_norm": 0.13174332678318024, "learning_rate": 1.4239930156209597e-06, "loss": 0.5052, "step": 3533 }, { "epoch": 0.76, "grad_norm": 0.15307480096817017, "learning_rate": 1.421555284782349e-06, "loss": 0.5494, "step": 3534 }, { "epoch": 0.76, "grad_norm": 0.14553479850292206, "learning_rate": 1.4191192964849492e-06, "loss": 0.5103, "step": 3535 }, { "epoch": 0.76, "grad_norm": 0.1426243633031845, "learning_rate": 1.4166850519149794e-06, "loss": 0.4749, "step": 3536 }, { "epoch": 0.76, "grad_norm": 0.1951218992471695, "learning_rate": 1.4142525522578082e-06, "loss": 0.4723, "step": 3537 }, { "epoch": 0.76, "grad_norm": 0.15164697170257568, "learning_rate": 1.41182179869795e-06, "loss": 0.5262, "step": 3538 }, { "epoch": 0.76, "grad_norm": 0.15860851109027863, "learning_rate": 1.409392792419073e-06, "loss": 0.5282, "step": 3539 }, { "epoch": 0.76, "grad_norm": 0.1473582684993744, "learning_rate": 1.406965534603995e-06, "loss": 0.5385, "step": 3540 }, { "epoch": 0.76, "grad_norm": 0.15201956033706665, "learning_rate": 1.404540026434681e-06, "loss": 0.5183, "step": 3541 }, { "epoch": 0.76, "grad_norm": 0.18102842569351196, "learning_rate": 1.4021162690922441e-06, "loss": 0.5474, "step": 3542 }, { "epoch": 0.76, "grad_norm": 0.13703973591327667, "learning_rate": 1.3996942637569438e-06, "loss": 0.5333, "step": 3543 }, { "epoch": 0.76, "grad_norm": 0.14135409891605377, "learning_rate": 1.397274011608189e-06, "loss": 0.5164, "step": 3544 }, { "epoch": 0.76, "grad_norm": 0.14163129031658173, "learning_rate": 1.3948555138245295e-06, "loss": 0.5044, "step": 3545 }, { "epoch": 0.76, "grad_norm": 0.17711517214775085, "learning_rate": 1.3924387715836706e-06, "loss": 0.5235, "step": 3546 }, { "epoch": 0.76, "grad_norm": 0.14216595888137817, "learning_rate": 1.390023786062452e-06, "loss": 0.4795, "step": 3547 }, { "epoch": 0.76, "grad_norm": 0.16309070587158203, "learning_rate": 1.3876105584368653e-06, "loss": 0.5377, "step": 3548 }, { "epoch": 0.76, "grad_norm": 0.1565747708082199, "learning_rate": 1.3851990898820439e-06, "loss": 0.5447, "step": 3549 }, { "epoch": 0.76, "grad_norm": 0.16009236872196198, "learning_rate": 1.3827893815722614e-06, "loss": 0.5253, "step": 3550 }, { "epoch": 0.76, "grad_norm": 0.14546410739421844, "learning_rate": 1.3803814346809386e-06, "loss": 0.5234, "step": 3551 }, { "epoch": 0.77, "grad_norm": 0.16906693577766418, "learning_rate": 1.3779752503806375e-06, "loss": 0.5151, "step": 3552 }, { "epoch": 0.77, "grad_norm": 0.16307243704795837, "learning_rate": 1.3755708298430614e-06, "loss": 0.4965, "step": 3553 }, { "epoch": 0.77, "grad_norm": 0.17134273052215576, "learning_rate": 1.3731681742390558e-06, "loss": 0.4913, "step": 3554 }, { "epoch": 0.77, "grad_norm": 0.1275198608636856, "learning_rate": 1.3707672847386021e-06, "loss": 0.4962, "step": 3555 }, { "epoch": 0.77, "grad_norm": 0.14432884752750397, "learning_rate": 1.368368162510829e-06, "loss": 0.4881, "step": 3556 }, { "epoch": 0.77, "grad_norm": 0.17812266945838928, "learning_rate": 1.3659708087239981e-06, "loss": 0.5165, "step": 3557 }, { "epoch": 0.77, "grad_norm": 0.13962845504283905, "learning_rate": 1.363575224545512e-06, "loss": 0.5171, "step": 3558 }, { "epoch": 0.77, "grad_norm": 0.15105730295181274, "learning_rate": 1.3611814111419163e-06, "loss": 0.5498, "step": 3559 }, { "epoch": 0.77, "grad_norm": 0.12979727983474731, "learning_rate": 1.3587893696788868e-06, "loss": 0.4992, "step": 3560 }, { "epoch": 0.77, "grad_norm": 0.13221748173236847, "learning_rate": 1.3563991013212424e-06, "loss": 0.5035, "step": 3561 }, { "epoch": 0.77, "grad_norm": 0.19586306810379028, "learning_rate": 1.3540106072329323e-06, "loss": 0.5264, "step": 3562 }, { "epoch": 0.77, "grad_norm": 0.17884129285812378, "learning_rate": 1.3516238885770477e-06, "loss": 0.5184, "step": 3563 }, { "epoch": 0.77, "grad_norm": 0.16461104154586792, "learning_rate": 1.349238946515813e-06, "loss": 0.5141, "step": 3564 }, { "epoch": 0.77, "grad_norm": 0.14698609709739685, "learning_rate": 1.3468557822105864e-06, "loss": 0.5084, "step": 3565 }, { "epoch": 0.77, "grad_norm": 0.1535801738500595, "learning_rate": 1.344474396821865e-06, "loss": 0.5352, "step": 3566 }, { "epoch": 0.77, "grad_norm": 0.20536081492900848, "learning_rate": 1.3420947915092708e-06, "loss": 0.5344, "step": 3567 }, { "epoch": 0.77, "grad_norm": 0.1282234936952591, "learning_rate": 1.3397169674315668e-06, "loss": 0.5156, "step": 3568 }, { "epoch": 0.77, "grad_norm": 0.16148659586906433, "learning_rate": 1.337340925746648e-06, "loss": 0.5496, "step": 3569 }, { "epoch": 0.77, "grad_norm": 0.13853482902050018, "learning_rate": 1.3349666676115358e-06, "loss": 0.5359, "step": 3570 }, { "epoch": 0.77, "grad_norm": 0.1656581163406372, "learning_rate": 1.332594194182389e-06, "loss": 0.4921, "step": 3571 }, { "epoch": 0.77, "grad_norm": 0.20992939174175262, "learning_rate": 1.3302235066144948e-06, "loss": 0.5248, "step": 3572 }, { "epoch": 0.77, "grad_norm": 0.15347984433174133, "learning_rate": 1.3278546060622727e-06, "loss": 0.5024, "step": 3573 }, { "epoch": 0.77, "grad_norm": 0.15400084853172302, "learning_rate": 1.3254874936792672e-06, "loss": 0.5103, "step": 3574 }, { "epoch": 0.77, "grad_norm": 0.23529481887817383, "learning_rate": 1.3231221706181575e-06, "loss": 0.4866, "step": 3575 }, { "epoch": 0.77, "grad_norm": 0.18368123471736908, "learning_rate": 1.3207586380307486e-06, "loss": 0.4999, "step": 3576 }, { "epoch": 0.77, "grad_norm": 0.1438288390636444, "learning_rate": 1.318396897067975e-06, "loss": 0.6058, "step": 3577 }, { "epoch": 0.77, "grad_norm": 0.13895906507968903, "learning_rate": 1.3160369488798984e-06, "loss": 0.4721, "step": 3578 }, { "epoch": 0.77, "grad_norm": 0.1586867719888687, "learning_rate": 1.3136787946157055e-06, "loss": 0.5271, "step": 3579 }, { "epoch": 0.77, "grad_norm": 0.15690717101097107, "learning_rate": 1.3113224354237113e-06, "loss": 0.5475, "step": 3580 }, { "epoch": 0.77, "grad_norm": 0.15662527084350586, "learning_rate": 1.3089678724513589e-06, "loss": 0.5388, "step": 3581 }, { "epoch": 0.77, "grad_norm": 0.12812086939811707, "learning_rate": 1.306615106845211e-06, "loss": 0.4969, "step": 3582 }, { "epoch": 0.77, "grad_norm": 0.14390747249126434, "learning_rate": 1.3042641397509597e-06, "loss": 0.4779, "step": 3583 }, { "epoch": 0.77, "grad_norm": 0.1376083791255951, "learning_rate": 1.30191497231342e-06, "loss": 0.5654, "step": 3584 }, { "epoch": 0.77, "grad_norm": 0.20095455646514893, "learning_rate": 1.299567605676531e-06, "loss": 0.518, "step": 3585 }, { "epoch": 0.77, "grad_norm": 0.16395071148872375, "learning_rate": 1.2972220409833552e-06, "loss": 0.5361, "step": 3586 }, { "epoch": 0.77, "grad_norm": 0.12365875393152237, "learning_rate": 1.2948782793760745e-06, "loss": 0.5278, "step": 3587 }, { "epoch": 0.77, "grad_norm": 0.1766958236694336, "learning_rate": 1.2925363219959958e-06, "loss": 0.569, "step": 3588 }, { "epoch": 0.77, "grad_norm": 0.15711119771003723, "learning_rate": 1.2901961699835475e-06, "loss": 0.542, "step": 3589 }, { "epoch": 0.77, "grad_norm": 0.16060031950473785, "learning_rate": 1.2878578244782775e-06, "loss": 0.5658, "step": 3590 }, { "epoch": 0.77, "grad_norm": 0.15911975502967834, "learning_rate": 1.2855212866188566e-06, "loss": 0.5181, "step": 3591 }, { "epoch": 0.77, "grad_norm": 0.16181927919387817, "learning_rate": 1.2831865575430702e-06, "loss": 0.5686, "step": 3592 }, { "epoch": 0.77, "grad_norm": 0.1953277289867401, "learning_rate": 1.2808536383878295e-06, "loss": 0.5062, "step": 3593 }, { "epoch": 0.77, "grad_norm": 0.17257684469223022, "learning_rate": 1.2785225302891568e-06, "loss": 0.4755, "step": 3594 }, { "epoch": 0.77, "grad_norm": 0.17315979301929474, "learning_rate": 1.2761932343821992e-06, "loss": 0.5166, "step": 3595 }, { "epoch": 0.77, "grad_norm": 0.19010502099990845, "learning_rate": 1.2738657518012188e-06, "loss": 0.5653, "step": 3596 }, { "epoch": 0.77, "grad_norm": 0.1522989571094513, "learning_rate": 1.2715400836795939e-06, "loss": 0.5601, "step": 3597 }, { "epoch": 0.78, "grad_norm": 0.15732337534427643, "learning_rate": 1.2692162311498219e-06, "loss": 0.5467, "step": 3598 }, { "epoch": 0.78, "grad_norm": 0.16109420359134674, "learning_rate": 1.266894195343511e-06, "loss": 0.5328, "step": 3599 }, { "epoch": 0.78, "grad_norm": 0.16374173760414124, "learning_rate": 1.2645739773913911e-06, "loss": 0.5324, "step": 3600 }, { "epoch": 0.78, "grad_norm": 0.18841396272182465, "learning_rate": 1.2622555784232992e-06, "loss": 0.4905, "step": 3601 }, { "epoch": 0.78, "grad_norm": 0.17438913881778717, "learning_rate": 1.259938999568196e-06, "loss": 0.4836, "step": 3602 }, { "epoch": 0.78, "grad_norm": 0.1530585139989853, "learning_rate": 1.2576242419541502e-06, "loss": 0.4937, "step": 3603 }, { "epoch": 0.78, "grad_norm": 0.16211232542991638, "learning_rate": 1.2553113067083417e-06, "loss": 0.5307, "step": 3604 }, { "epoch": 0.78, "grad_norm": 0.18766599893569946, "learning_rate": 1.2530001949570686e-06, "loss": 0.4523, "step": 3605 }, { "epoch": 0.78, "grad_norm": 0.14365822076797485, "learning_rate": 1.2506909078257357e-06, "loss": 0.5097, "step": 3606 }, { "epoch": 0.78, "grad_norm": 0.13116493821144104, "learning_rate": 1.2483834464388622e-06, "loss": 0.5036, "step": 3607 }, { "epoch": 0.78, "grad_norm": 0.15349233150482178, "learning_rate": 1.2460778119200778e-06, "loss": 0.4983, "step": 3608 }, { "epoch": 0.78, "grad_norm": 0.15856203436851501, "learning_rate": 1.2437740053921238e-06, "loss": 0.4921, "step": 3609 }, { "epoch": 0.78, "grad_norm": 0.13519662618637085, "learning_rate": 1.24147202797685e-06, "loss": 0.5291, "step": 3610 }, { "epoch": 0.78, "grad_norm": 0.14394241571426392, "learning_rate": 1.2391718807952142e-06, "loss": 0.5235, "step": 3611 }, { "epoch": 0.78, "grad_norm": 0.12606112658977509, "learning_rate": 1.236873564967284e-06, "loss": 0.4571, "step": 3612 }, { "epoch": 0.78, "grad_norm": 0.1665707379579544, "learning_rate": 1.2345770816122388e-06, "loss": 0.5432, "step": 3613 }, { "epoch": 0.78, "grad_norm": 0.14036637544631958, "learning_rate": 1.2322824318483568e-06, "loss": 0.4873, "step": 3614 }, { "epoch": 0.78, "grad_norm": 0.1713072657585144, "learning_rate": 1.2299896167930358e-06, "loss": 0.5134, "step": 3615 }, { "epoch": 0.78, "grad_norm": 0.15142671763896942, "learning_rate": 1.227698637562768e-06, "loss": 0.5193, "step": 3616 }, { "epoch": 0.78, "grad_norm": 0.148328959941864, "learning_rate": 1.2254094952731594e-06, "loss": 0.5107, "step": 3617 }, { "epoch": 0.78, "grad_norm": 0.15323348343372345, "learning_rate": 1.2231221910389196e-06, "loss": 0.5187, "step": 3618 }, { "epoch": 0.78, "grad_norm": 0.13149654865264893, "learning_rate": 1.2208367259738602e-06, "loss": 0.5422, "step": 3619 }, { "epoch": 0.78, "grad_norm": 0.15822039544582367, "learning_rate": 1.2185531011909008e-06, "loss": 0.493, "step": 3620 }, { "epoch": 0.78, "grad_norm": 0.1450645625591278, "learning_rate": 1.2162713178020641e-06, "loss": 0.4954, "step": 3621 }, { "epoch": 0.78, "grad_norm": 0.14330001175403595, "learning_rate": 1.2139913769184757e-06, "loss": 0.4457, "step": 3622 }, { "epoch": 0.78, "grad_norm": 0.1793079823255539, "learning_rate": 1.211713279650365e-06, "loss": 0.5186, "step": 3623 }, { "epoch": 0.78, "grad_norm": 0.1488538533449173, "learning_rate": 1.2094370271070599e-06, "loss": 0.5479, "step": 3624 }, { "epoch": 0.78, "grad_norm": 0.18175008893013, "learning_rate": 1.207162620396996e-06, "loss": 0.5202, "step": 3625 }, { "epoch": 0.78, "grad_norm": 0.15374873578548431, "learning_rate": 1.2048900606277036e-06, "loss": 0.5404, "step": 3626 }, { "epoch": 0.78, "grad_norm": 0.1646018624305725, "learning_rate": 1.2026193489058185e-06, "loss": 0.5023, "step": 3627 }, { "epoch": 0.78, "grad_norm": 0.13878245651721954, "learning_rate": 1.2003504863370746e-06, "loss": 0.4892, "step": 3628 }, { "epoch": 0.78, "grad_norm": 0.1426292210817337, "learning_rate": 1.1980834740263065e-06, "loss": 0.5052, "step": 3629 }, { "epoch": 0.78, "grad_norm": 0.15884622931480408, "learning_rate": 1.195818313077447e-06, "loss": 0.5517, "step": 3630 }, { "epoch": 0.78, "grad_norm": 0.15096734464168549, "learning_rate": 1.1935550045935252e-06, "loss": 0.4624, "step": 3631 }, { "epoch": 0.78, "grad_norm": 0.1437617689371109, "learning_rate": 1.1912935496766719e-06, "loss": 0.4659, "step": 3632 }, { "epoch": 0.78, "grad_norm": 0.22621825337409973, "learning_rate": 1.1890339494281133e-06, "loss": 0.4706, "step": 3633 }, { "epoch": 0.78, "grad_norm": 0.17736200988292694, "learning_rate": 1.186776204948173e-06, "loss": 0.5366, "step": 3634 }, { "epoch": 0.78, "grad_norm": 0.1408076137304306, "learning_rate": 1.1845203173362725e-06, "loss": 0.5448, "step": 3635 }, { "epoch": 0.78, "grad_norm": 0.15870949625968933, "learning_rate": 1.182266287690924e-06, "loss": 0.4851, "step": 3636 }, { "epoch": 0.78, "grad_norm": 0.12970934808254242, "learning_rate": 1.1800141171097412e-06, "loss": 0.5177, "step": 3637 }, { "epoch": 0.78, "grad_norm": 0.15456534922122955, "learning_rate": 1.177763806689427e-06, "loss": 0.4997, "step": 3638 }, { "epoch": 0.78, "grad_norm": 0.14675313234329224, "learning_rate": 1.175515357525781e-06, "loss": 0.5198, "step": 3639 }, { "epoch": 0.78, "grad_norm": 0.1615784913301468, "learning_rate": 1.173268770713701e-06, "loss": 0.5251, "step": 3640 }, { "epoch": 0.78, "grad_norm": 0.19272805750370026, "learning_rate": 1.1710240473471685e-06, "loss": 0.5331, "step": 3641 }, { "epoch": 0.78, "grad_norm": 0.15185219049453735, "learning_rate": 1.1687811885192662e-06, "loss": 0.5372, "step": 3642 }, { "epoch": 0.78, "grad_norm": 0.1498749703168869, "learning_rate": 1.1665401953221622e-06, "loss": 0.4953, "step": 3643 }, { "epoch": 0.78, "grad_norm": 0.12573790550231934, "learning_rate": 1.16430106884712e-06, "loss": 0.556, "step": 3644 }, { "epoch": 0.79, "grad_norm": 0.15969648957252502, "learning_rate": 1.1620638101844938e-06, "loss": 0.4978, "step": 3645 }, { "epoch": 0.79, "grad_norm": 0.14067567884922028, "learning_rate": 1.159828420423728e-06, "loss": 0.5057, "step": 3646 }, { "epoch": 0.79, "grad_norm": 0.15133407711982727, "learning_rate": 1.157594900653357e-06, "loss": 0.5406, "step": 3647 }, { "epoch": 0.79, "grad_norm": 0.1627969741821289, "learning_rate": 1.1553632519610025e-06, "loss": 0.5282, "step": 3648 }, { "epoch": 0.79, "grad_norm": 0.163666769862175, "learning_rate": 1.1531334754333772e-06, "loss": 0.5655, "step": 3649 }, { "epoch": 0.79, "grad_norm": 0.1481485515832901, "learning_rate": 1.1509055721562839e-06, "loss": 0.5012, "step": 3650 }, { "epoch": 0.79, "grad_norm": 0.1437186896800995, "learning_rate": 1.148679543214608e-06, "loss": 0.4814, "step": 3651 }, { "epoch": 0.79, "grad_norm": 0.1677170991897583, "learning_rate": 1.1464553896923264e-06, "loss": 0.5308, "step": 3652 }, { "epoch": 0.79, "grad_norm": 0.15452422201633453, "learning_rate": 1.1442331126725014e-06, "loss": 0.4929, "step": 3653 }, { "epoch": 0.79, "grad_norm": 0.1445283144712448, "learning_rate": 1.1420127132372839e-06, "loss": 0.5104, "step": 3654 }, { "epoch": 0.79, "grad_norm": 0.13753926753997803, "learning_rate": 1.1397941924679046e-06, "loss": 0.4942, "step": 3655 }, { "epoch": 0.79, "grad_norm": 0.1484784483909607, "learning_rate": 1.1375775514446846e-06, "loss": 0.5266, "step": 3656 }, { "epoch": 0.79, "grad_norm": 0.21680472791194916, "learning_rate": 1.1353627912470289e-06, "loss": 0.5809, "step": 3657 }, { "epoch": 0.79, "grad_norm": 0.14361554384231567, "learning_rate": 1.1331499129534252e-06, "loss": 0.5438, "step": 3658 }, { "epoch": 0.79, "grad_norm": 0.14105179905891418, "learning_rate": 1.1309389176414471e-06, "loss": 0.5111, "step": 3659 }, { "epoch": 0.79, "grad_norm": 0.1748628318309784, "learning_rate": 1.128729806387746e-06, "loss": 0.537, "step": 3660 }, { "epoch": 0.79, "grad_norm": 0.162385493516922, "learning_rate": 1.1265225802680623e-06, "loss": 0.5325, "step": 3661 }, { "epoch": 0.79, "grad_norm": 0.20901146531105042, "learning_rate": 1.124317240357216e-06, "loss": 0.5093, "step": 3662 }, { "epoch": 0.79, "grad_norm": 0.14778484404087067, "learning_rate": 1.122113787729106e-06, "loss": 0.5484, "step": 3663 }, { "epoch": 0.79, "grad_norm": 0.1553722620010376, "learning_rate": 1.119912223456715e-06, "loss": 0.5044, "step": 3664 }, { "epoch": 0.79, "grad_norm": 0.15933193266391754, "learning_rate": 1.117712548612106e-06, "loss": 0.5267, "step": 3665 }, { "epoch": 0.79, "grad_norm": 0.14769431948661804, "learning_rate": 1.1155147642664217e-06, "loss": 0.5027, "step": 3666 }, { "epoch": 0.79, "grad_norm": 0.15224431455135345, "learning_rate": 1.1133188714898846e-06, "loss": 0.5068, "step": 3667 }, { "epoch": 0.79, "grad_norm": 0.25812146067619324, "learning_rate": 1.1111248713517935e-06, "loss": 0.516, "step": 3668 }, { "epoch": 0.79, "grad_norm": 0.1287028193473816, "learning_rate": 1.1089327649205301e-06, "loss": 0.497, "step": 3669 }, { "epoch": 0.79, "grad_norm": 0.17315033078193665, "learning_rate": 1.1067425532635463e-06, "loss": 0.5764, "step": 3670 }, { "epoch": 0.79, "grad_norm": 0.17579080164432526, "learning_rate": 1.1045542374473821e-06, "loss": 0.508, "step": 3671 }, { "epoch": 0.79, "grad_norm": 0.1450372189283371, "learning_rate": 1.1023678185376474e-06, "loss": 0.5104, "step": 3672 }, { "epoch": 0.79, "grad_norm": 0.2039722353219986, "learning_rate": 1.1001832975990274e-06, "loss": 0.5159, "step": 3673 }, { "epoch": 0.79, "grad_norm": 0.15762609243392944, "learning_rate": 1.0980006756952882e-06, "loss": 0.5387, "step": 3674 }, { "epoch": 0.79, "grad_norm": 0.14661352336406708, "learning_rate": 1.095819953889265e-06, "loss": 0.4672, "step": 3675 }, { "epoch": 0.79, "grad_norm": 0.18748416006565094, "learning_rate": 1.0936411332428732e-06, "loss": 0.4949, "step": 3676 }, { "epoch": 0.79, "grad_norm": 0.18095709383487701, "learning_rate": 1.091464214817099e-06, "loss": 0.5316, "step": 3677 }, { "epoch": 0.79, "grad_norm": 0.13158752024173737, "learning_rate": 1.089289199672004e-06, "loss": 0.4978, "step": 3678 }, { "epoch": 0.79, "grad_norm": 0.15241560339927673, "learning_rate": 1.0871160888667242e-06, "loss": 0.5195, "step": 3679 }, { "epoch": 0.79, "grad_norm": 0.13921800255775452, "learning_rate": 1.084944883459464e-06, "loss": 0.5269, "step": 3680 }, { "epoch": 0.79, "grad_norm": 0.16038571298122406, "learning_rate": 1.0827755845075044e-06, "loss": 0.5714, "step": 3681 }, { "epoch": 0.79, "grad_norm": 0.1517871618270874, "learning_rate": 1.0806081930671947e-06, "loss": 0.4976, "step": 3682 }, { "epoch": 0.79, "grad_norm": 0.1371169090270996, "learning_rate": 1.0784427101939553e-06, "loss": 0.5421, "step": 3683 }, { "epoch": 0.79, "grad_norm": 0.13309676945209503, "learning_rate": 1.0762791369422838e-06, "loss": 0.4903, "step": 3684 }, { "epoch": 0.79, "grad_norm": 0.15115486085414886, "learning_rate": 1.0741174743657385e-06, "loss": 0.5011, "step": 3685 }, { "epoch": 0.79, "grad_norm": 0.1870022863149643, "learning_rate": 1.0719577235169537e-06, "loss": 0.5292, "step": 3686 }, { "epoch": 0.79, "grad_norm": 0.37398022413253784, "learning_rate": 1.0697998854476294e-06, "loss": 0.5336, "step": 3687 }, { "epoch": 0.79, "grad_norm": 0.1611040085554123, "learning_rate": 1.0676439612085353e-06, "loss": 0.5077, "step": 3688 }, { "epoch": 0.79, "grad_norm": 0.14922092854976654, "learning_rate": 1.0654899518495104e-06, "loss": 0.5461, "step": 3689 }, { "epoch": 0.79, "grad_norm": 0.1831667125225067, "learning_rate": 1.0633378584194593e-06, "loss": 0.4868, "step": 3690 }, { "epoch": 0.8, "grad_norm": 0.15320684015750885, "learning_rate": 1.0611876819663557e-06, "loss": 0.5232, "step": 3691 }, { "epoch": 0.8, "grad_norm": 0.14661262929439545, "learning_rate": 1.059039423537237e-06, "loss": 0.485, "step": 3692 }, { "epoch": 0.8, "grad_norm": 0.13266430795192719, "learning_rate": 1.0568930841782088e-06, "loss": 0.5187, "step": 3693 }, { "epoch": 0.8, "grad_norm": 0.22684414684772491, "learning_rate": 1.054748664934443e-06, "loss": 0.5477, "step": 3694 }, { "epoch": 0.8, "grad_norm": 0.1602558046579361, "learning_rate": 1.0526061668501708e-06, "loss": 0.4672, "step": 3695 }, { "epoch": 0.8, "grad_norm": 0.1579863578081131, "learning_rate": 1.0504655909686978e-06, "loss": 0.5119, "step": 3696 }, { "epoch": 0.8, "grad_norm": 0.1422806680202484, "learning_rate": 1.048326938332384e-06, "loss": 0.549, "step": 3697 }, { "epoch": 0.8, "grad_norm": 0.13555404543876648, "learning_rate": 1.0461902099826577e-06, "loss": 0.5839, "step": 3698 }, { "epoch": 0.8, "grad_norm": 0.16685040295124054, "learning_rate": 1.0440554069600112e-06, "loss": 0.523, "step": 3699 }, { "epoch": 0.8, "grad_norm": 0.15756739675998688, "learning_rate": 1.0419225303039943e-06, "loss": 0.4513, "step": 3700 }, { "epoch": 0.8, "grad_norm": 0.15640777349472046, "learning_rate": 1.0397915810532227e-06, "loss": 0.525, "step": 3701 }, { "epoch": 0.8, "grad_norm": 0.15122468769550323, "learning_rate": 1.0376625602453733e-06, "loss": 0.5116, "step": 3702 }, { "epoch": 0.8, "grad_norm": 0.15925420820713043, "learning_rate": 1.0355354689171831e-06, "loss": 0.5259, "step": 3703 }, { "epoch": 0.8, "grad_norm": 0.14913156628608704, "learning_rate": 1.0334103081044504e-06, "loss": 0.5148, "step": 3704 }, { "epoch": 0.8, "grad_norm": 0.15156058967113495, "learning_rate": 1.031287078842031e-06, "loss": 0.5239, "step": 3705 }, { "epoch": 0.8, "grad_norm": 0.1923210173845291, "learning_rate": 1.0291657821638435e-06, "loss": 0.5351, "step": 3706 }, { "epoch": 0.8, "grad_norm": 0.16889818012714386, "learning_rate": 1.0270464191028618e-06, "loss": 0.5231, "step": 3707 }, { "epoch": 0.8, "grad_norm": 0.13356614112854004, "learning_rate": 1.024928990691121e-06, "loss": 0.506, "step": 3708 }, { "epoch": 0.8, "grad_norm": 0.1991608887910843, "learning_rate": 1.0228134979597126e-06, "loss": 0.5501, "step": 3709 }, { "epoch": 0.8, "grad_norm": 0.15821781754493713, "learning_rate": 1.0206999419387881e-06, "loss": 0.5371, "step": 3710 }, { "epoch": 0.8, "grad_norm": 0.1407308578491211, "learning_rate": 1.0185883236575533e-06, "loss": 0.5072, "step": 3711 }, { "epoch": 0.8, "grad_norm": 0.14752401411533356, "learning_rate": 1.0164786441442698e-06, "loss": 0.5163, "step": 3712 }, { "epoch": 0.8, "grad_norm": 0.14390012621879578, "learning_rate": 1.0143709044262574e-06, "loss": 0.4969, "step": 3713 }, { "epoch": 0.8, "grad_norm": 0.1694592982530594, "learning_rate": 1.0122651055298898e-06, "loss": 0.4924, "step": 3714 }, { "epoch": 0.8, "grad_norm": 0.15931564569473267, "learning_rate": 1.0101612484805967e-06, "loss": 0.4842, "step": 3715 }, { "epoch": 0.8, "grad_norm": 0.16370849311351776, "learning_rate": 1.0080593343028621e-06, "loss": 0.497, "step": 3716 }, { "epoch": 0.8, "grad_norm": 0.16331344842910767, "learning_rate": 1.005959364020222e-06, "loss": 0.4919, "step": 3717 }, { "epoch": 0.8, "grad_norm": 0.1296970099210739, "learning_rate": 1.0038613386552687e-06, "loss": 0.5674, "step": 3718 }, { "epoch": 0.8, "grad_norm": 0.15003569424152374, "learning_rate": 1.001765259229644e-06, "loss": 0.5164, "step": 3719 }, { "epoch": 0.8, "grad_norm": 0.14973247051239014, "learning_rate": 9.996711267640451e-07, "loss": 0.4997, "step": 3720 }, { "epoch": 0.8, "grad_norm": 0.14696918427944183, "learning_rate": 9.975789422782205e-07, "loss": 0.4806, "step": 3721 }, { "epoch": 0.8, "grad_norm": 0.14564906060695648, "learning_rate": 9.95488706790969e-07, "loss": 0.5491, "step": 3722 }, { "epoch": 0.8, "grad_norm": 0.18390415608882904, "learning_rate": 9.934004213201431e-07, "loss": 0.5264, "step": 3723 }, { "epoch": 0.8, "grad_norm": 0.1590055525302887, "learning_rate": 9.913140868826405e-07, "loss": 0.497, "step": 3724 }, { "epoch": 0.8, "grad_norm": 0.1445043832063675, "learning_rate": 9.892297044944133e-07, "loss": 0.5089, "step": 3725 }, { "epoch": 0.8, "grad_norm": 0.15211768448352814, "learning_rate": 9.871472751704625e-07, "loss": 0.5093, "step": 3726 }, { "epoch": 0.8, "grad_norm": 0.1753348559141159, "learning_rate": 9.85066799924836e-07, "loss": 0.5038, "step": 3727 }, { "epoch": 0.8, "grad_norm": 0.14900852739810944, "learning_rate": 9.829882797706336e-07, "loss": 0.4721, "step": 3728 }, { "epoch": 0.8, "grad_norm": 0.1514863520860672, "learning_rate": 9.809117157199982e-07, "loss": 0.5869, "step": 3729 }, { "epoch": 0.8, "grad_norm": 0.15811176598072052, "learning_rate": 9.788371087841236e-07, "loss": 0.5396, "step": 3730 }, { "epoch": 0.8, "grad_norm": 0.1752791553735733, "learning_rate": 9.767644599732517e-07, "loss": 0.4918, "step": 3731 }, { "epoch": 0.8, "grad_norm": 0.17730747163295746, "learning_rate": 9.74693770296667e-07, "loss": 0.5003, "step": 3732 }, { "epoch": 0.8, "grad_norm": 0.1746446192264557, "learning_rate": 9.72625040762702e-07, "loss": 0.4698, "step": 3733 }, { "epoch": 0.8, "grad_norm": 0.14243842661380768, "learning_rate": 9.705582723787348e-07, "loss": 0.5296, "step": 3734 }, { "epoch": 0.8, "grad_norm": 0.17734676599502563, "learning_rate": 9.684934661511909e-07, "loss": 0.5386, "step": 3735 }, { "epoch": 0.8, "grad_norm": 0.12994273006916046, "learning_rate": 9.664306230855342e-07, "loss": 0.5133, "step": 3736 }, { "epoch": 0.8, "grad_norm": 0.14365623891353607, "learning_rate": 9.643697441862782e-07, "loss": 0.4759, "step": 3737 }, { "epoch": 0.81, "grad_norm": 0.12920841574668884, "learning_rate": 9.623108304569783e-07, "loss": 0.4998, "step": 3738 }, { "epoch": 0.81, "grad_norm": 0.14855559170246124, "learning_rate": 9.6025388290023e-07, "loss": 0.5141, "step": 3739 }, { "epoch": 0.81, "grad_norm": 0.16959311068058014, "learning_rate": 9.58198902517678e-07, "loss": 0.5376, "step": 3740 }, { "epoch": 0.81, "grad_norm": 0.14248433709144592, "learning_rate": 9.561458903100025e-07, "loss": 0.5684, "step": 3741 }, { "epoch": 0.81, "grad_norm": 0.1944878101348877, "learning_rate": 9.540948472769278e-07, "loss": 0.4685, "step": 3742 }, { "epoch": 0.81, "grad_norm": 0.16937778890132904, "learning_rate": 9.520457744172218e-07, "loss": 0.5127, "step": 3743 }, { "epoch": 0.81, "grad_norm": 0.15535053610801697, "learning_rate": 9.499986727286869e-07, "loss": 0.509, "step": 3744 }, { "epoch": 0.81, "grad_norm": 0.13496124744415283, "learning_rate": 9.479535432081716e-07, "loss": 0.4883, "step": 3745 }, { "epoch": 0.81, "grad_norm": 0.15980157256126404, "learning_rate": 9.459103868515618e-07, "loss": 0.5115, "step": 3746 }, { "epoch": 0.81, "grad_norm": 0.13277289271354675, "learning_rate": 9.438692046537812e-07, "loss": 0.5383, "step": 3747 }, { "epoch": 0.81, "grad_norm": 0.15120829641819, "learning_rate": 9.418299976087964e-07, "loss": 0.4822, "step": 3748 }, { "epoch": 0.81, "grad_norm": 0.18064884841442108, "learning_rate": 9.397927667096058e-07, "loss": 0.4813, "step": 3749 }, { "epoch": 0.81, "grad_norm": 0.15214307606220245, "learning_rate": 9.377575129482513e-07, "loss": 0.538, "step": 3750 }, { "epoch": 0.81, "grad_norm": 0.13899169862270355, "learning_rate": 9.357242373158076e-07, "loss": 0.5259, "step": 3751 }, { "epoch": 0.81, "grad_norm": 0.17859694361686707, "learning_rate": 9.336929408023887e-07, "loss": 0.5298, "step": 3752 }, { "epoch": 0.81, "grad_norm": 0.16139504313468933, "learning_rate": 9.316636243971472e-07, "loss": 0.47, "step": 3753 }, { "epoch": 0.81, "grad_norm": 0.16516685485839844, "learning_rate": 9.29636289088266e-07, "loss": 0.4834, "step": 3754 }, { "epoch": 0.81, "grad_norm": 0.1459476500749588, "learning_rate": 9.27610935862967e-07, "loss": 0.505, "step": 3755 }, { "epoch": 0.81, "grad_norm": 0.14627854526042938, "learning_rate": 9.255875657075053e-07, "loss": 0.5443, "step": 3756 }, { "epoch": 0.81, "grad_norm": 0.17981037497520447, "learning_rate": 9.235661796071704e-07, "loss": 0.5165, "step": 3757 }, { "epoch": 0.81, "grad_norm": 0.14158649742603302, "learning_rate": 9.215467785462873e-07, "loss": 0.5373, "step": 3758 }, { "epoch": 0.81, "grad_norm": 0.1404084414243698, "learning_rate": 9.195293635082125e-07, "loss": 0.5071, "step": 3759 }, { "epoch": 0.81, "grad_norm": 0.18543866276741028, "learning_rate": 9.175139354753382e-07, "loss": 0.4776, "step": 3760 }, { "epoch": 0.81, "grad_norm": 0.1633271872997284, "learning_rate": 9.155004954290842e-07, "loss": 0.5757, "step": 3761 }, { "epoch": 0.81, "grad_norm": 0.13727520406246185, "learning_rate": 9.134890443499068e-07, "loss": 0.489, "step": 3762 }, { "epoch": 0.81, "grad_norm": 0.14105379581451416, "learning_rate": 9.114795832172907e-07, "loss": 0.4545, "step": 3763 }, { "epoch": 0.81, "grad_norm": 0.15787868201732635, "learning_rate": 9.094721130097517e-07, "loss": 0.5232, "step": 3764 }, { "epoch": 0.81, "grad_norm": 0.12892010807991028, "learning_rate": 9.074666347048416e-07, "loss": 0.5527, "step": 3765 }, { "epoch": 0.81, "grad_norm": 0.1309516578912735, "learning_rate": 9.054631492791344e-07, "loss": 0.5209, "step": 3766 }, { "epoch": 0.81, "grad_norm": 0.14891640841960907, "learning_rate": 9.034616577082389e-07, "loss": 0.4782, "step": 3767 }, { "epoch": 0.81, "grad_norm": 0.17316175997257233, "learning_rate": 9.014621609667896e-07, "loss": 0.5075, "step": 3768 }, { "epoch": 0.81, "grad_norm": 0.17712554335594177, "learning_rate": 8.994646600284518e-07, "loss": 0.5551, "step": 3769 }, { "epoch": 0.81, "grad_norm": 0.17951372265815735, "learning_rate": 8.974691558659187e-07, "loss": 0.4612, "step": 3770 }, { "epoch": 0.81, "grad_norm": 0.18492546677589417, "learning_rate": 8.954756494509104e-07, "loss": 0.498, "step": 3771 }, { "epoch": 0.81, "grad_norm": 0.15967923402786255, "learning_rate": 8.934841417541767e-07, "loss": 0.5152, "step": 3772 }, { "epoch": 0.81, "grad_norm": 0.1444973647594452, "learning_rate": 8.914946337454894e-07, "loss": 0.4852, "step": 3773 }, { "epoch": 0.81, "grad_norm": 0.13344036042690277, "learning_rate": 8.8950712639365e-07, "loss": 0.5396, "step": 3774 }, { "epoch": 0.81, "grad_norm": 0.14624960720539093, "learning_rate": 8.87521620666486e-07, "loss": 0.4983, "step": 3775 }, { "epoch": 0.81, "grad_norm": 0.15818633139133453, "learning_rate": 8.855381175308475e-07, "loss": 0.4791, "step": 3776 }, { "epoch": 0.81, "grad_norm": 0.17238670587539673, "learning_rate": 8.835566179526118e-07, "loss": 0.475, "step": 3777 }, { "epoch": 0.81, "grad_norm": 0.16176079213619232, "learning_rate": 8.815771228966796e-07, "loss": 0.5353, "step": 3778 }, { "epoch": 0.81, "grad_norm": 0.17096221446990967, "learning_rate": 8.795996333269763e-07, "loss": 0.483, "step": 3779 }, { "epoch": 0.81, "grad_norm": 0.1631225347518921, "learning_rate": 8.776241502064508e-07, "loss": 0.5166, "step": 3780 }, { "epoch": 0.81, "grad_norm": 0.19986139237880707, "learning_rate": 8.756506744970722e-07, "loss": 0.529, "step": 3781 }, { "epoch": 0.81, "grad_norm": 0.1557171493768692, "learning_rate": 8.736792071598355e-07, "loss": 0.5267, "step": 3782 }, { "epoch": 0.81, "grad_norm": 0.15632902085781097, "learning_rate": 8.717097491547566e-07, "loss": 0.5189, "step": 3783 }, { "epoch": 0.82, "grad_norm": 0.16741085052490234, "learning_rate": 8.697423014408718e-07, "loss": 0.5474, "step": 3784 }, { "epoch": 0.82, "grad_norm": 0.15666568279266357, "learning_rate": 8.677768649762419e-07, "loss": 0.5306, "step": 3785 }, { "epoch": 0.82, "grad_norm": 0.20284314453601837, "learning_rate": 8.658134407179419e-07, "loss": 0.5003, "step": 3786 }, { "epoch": 0.82, "grad_norm": 0.1518256664276123, "learning_rate": 8.638520296220748e-07, "loss": 0.5322, "step": 3787 }, { "epoch": 0.82, "grad_norm": 0.18800678849220276, "learning_rate": 8.61892632643756e-07, "loss": 0.4763, "step": 3788 }, { "epoch": 0.82, "grad_norm": 0.1399422585964203, "learning_rate": 8.59935250737125e-07, "loss": 0.5293, "step": 3789 }, { "epoch": 0.82, "grad_norm": 0.14804938435554504, "learning_rate": 8.579798848553389e-07, "loss": 0.4703, "step": 3790 }, { "epoch": 0.82, "grad_norm": 0.15333673357963562, "learning_rate": 8.560265359505716e-07, "loss": 0.4947, "step": 3791 }, { "epoch": 0.82, "grad_norm": 0.1545214056968689, "learning_rate": 8.540752049740181e-07, "loss": 0.5079, "step": 3792 }, { "epoch": 0.82, "grad_norm": 0.137412428855896, "learning_rate": 8.521258928758864e-07, "loss": 0.4973, "step": 3793 }, { "epoch": 0.82, "grad_norm": 0.1467263251543045, "learning_rate": 8.501786006054047e-07, "loss": 0.5318, "step": 3794 }, { "epoch": 0.82, "grad_norm": 0.16581839323043823, "learning_rate": 8.482333291108141e-07, "loss": 0.5226, "step": 3795 }, { "epoch": 0.82, "grad_norm": 0.1452476680278778, "learning_rate": 8.462900793393775e-07, "loss": 0.5012, "step": 3796 }, { "epoch": 0.82, "grad_norm": 0.1618988811969757, "learning_rate": 8.443488522373694e-07, "loss": 0.501, "step": 3797 }, { "epoch": 0.82, "grad_norm": 0.1634100079536438, "learning_rate": 8.424096487500777e-07, "loss": 0.5288, "step": 3798 }, { "epoch": 0.82, "grad_norm": 0.17481021583080292, "learning_rate": 8.404724698218103e-07, "loss": 0.5575, "step": 3799 }, { "epoch": 0.82, "grad_norm": 0.13058480620384216, "learning_rate": 8.385373163958821e-07, "loss": 0.4976, "step": 3800 }, { "epoch": 0.82, "grad_norm": 0.1389196217060089, "learning_rate": 8.366041894146276e-07, "loss": 0.4854, "step": 3801 }, { "epoch": 0.82, "grad_norm": 0.15564516186714172, "learning_rate": 8.346730898193928e-07, "loss": 0.4984, "step": 3802 }, { "epoch": 0.82, "grad_norm": 0.1349528729915619, "learning_rate": 8.327440185505353e-07, "loss": 0.5138, "step": 3803 }, { "epoch": 0.82, "grad_norm": 0.1407652646303177, "learning_rate": 8.308169765474278e-07, "loss": 0.4912, "step": 3804 }, { "epoch": 0.82, "grad_norm": 0.14387796819210052, "learning_rate": 8.2889196474845e-07, "loss": 0.5048, "step": 3805 }, { "epoch": 0.82, "grad_norm": 0.15386423468589783, "learning_rate": 8.269689840909967e-07, "loss": 0.5339, "step": 3806 }, { "epoch": 0.82, "grad_norm": 0.16335895657539368, "learning_rate": 8.250480355114748e-07, "loss": 0.5343, "step": 3807 }, { "epoch": 0.82, "grad_norm": 0.16175401210784912, "learning_rate": 8.231291199452956e-07, "loss": 0.52, "step": 3808 }, { "epoch": 0.82, "grad_norm": 0.15114691853523254, "learning_rate": 8.212122383268889e-07, "loss": 0.5034, "step": 3809 }, { "epoch": 0.82, "grad_norm": 0.13014435768127441, "learning_rate": 8.192973915896868e-07, "loss": 0.5266, "step": 3810 }, { "epoch": 0.82, "grad_norm": 0.1377837210893631, "learning_rate": 8.17384580666134e-07, "loss": 0.5326, "step": 3811 }, { "epoch": 0.82, "grad_norm": 0.17275045812129974, "learning_rate": 8.154738064876843e-07, "loss": 0.5156, "step": 3812 }, { "epoch": 0.82, "grad_norm": 0.1639435589313507, "learning_rate": 8.135650699847963e-07, "loss": 0.504, "step": 3813 }, { "epoch": 0.82, "grad_norm": 0.18835903704166412, "learning_rate": 8.116583720869398e-07, "loss": 0.5377, "step": 3814 }, { "epoch": 0.82, "grad_norm": 0.1467577964067459, "learning_rate": 8.097537137225909e-07, "loss": 0.5437, "step": 3815 }, { "epoch": 0.82, "grad_norm": 0.1413908451795578, "learning_rate": 8.078510958192337e-07, "loss": 0.5246, "step": 3816 }, { "epoch": 0.82, "grad_norm": 0.21813301742076874, "learning_rate": 8.05950519303354e-07, "loss": 0.48, "step": 3817 }, { "epoch": 0.82, "grad_norm": 0.14480313658714294, "learning_rate": 8.040519851004492e-07, "loss": 0.5298, "step": 3818 }, { "epoch": 0.82, "grad_norm": 0.16793721914291382, "learning_rate": 8.021554941350202e-07, "loss": 0.4885, "step": 3819 }, { "epoch": 0.82, "grad_norm": 0.15354284644126892, "learning_rate": 8.002610473305688e-07, "loss": 0.4743, "step": 3820 }, { "epoch": 0.82, "grad_norm": 0.15883216261863708, "learning_rate": 7.983686456096112e-07, "loss": 0.5344, "step": 3821 }, { "epoch": 0.82, "grad_norm": 0.16302940249443054, "learning_rate": 7.964782898936569e-07, "loss": 0.5251, "step": 3822 }, { "epoch": 0.82, "grad_norm": 0.1534924954175949, "learning_rate": 7.945899811032254e-07, "loss": 0.5438, "step": 3823 }, { "epoch": 0.82, "grad_norm": 0.1581207513809204, "learning_rate": 7.927037201578397e-07, "loss": 0.4707, "step": 3824 }, { "epoch": 0.82, "grad_norm": 0.16421711444854736, "learning_rate": 7.908195079760205e-07, "loss": 0.485, "step": 3825 }, { "epoch": 0.82, "grad_norm": 0.16686981916427612, "learning_rate": 7.889373454752964e-07, "loss": 0.5225, "step": 3826 }, { "epoch": 0.82, "grad_norm": 0.1350572556257248, "learning_rate": 7.870572335721949e-07, "loss": 0.5018, "step": 3827 }, { "epoch": 0.82, "grad_norm": 0.1447533518075943, "learning_rate": 7.851791731822461e-07, "loss": 0.5149, "step": 3828 }, { "epoch": 0.82, "grad_norm": 0.13988631963729858, "learning_rate": 7.833031652199819e-07, "loss": 0.5441, "step": 3829 }, { "epoch": 0.83, "grad_norm": 0.17618080973625183, "learning_rate": 7.814292105989308e-07, "loss": 0.5189, "step": 3830 }, { "epoch": 0.83, "grad_norm": 0.1565089076757431, "learning_rate": 7.795573102316267e-07, "loss": 0.5091, "step": 3831 }, { "epoch": 0.83, "grad_norm": 0.15142589807510376, "learning_rate": 7.776874650295984e-07, "loss": 0.4814, "step": 3832 }, { "epoch": 0.83, "grad_norm": 0.1675831824541092, "learning_rate": 7.758196759033765e-07, "loss": 0.4961, "step": 3833 }, { "epoch": 0.83, "grad_norm": 0.15488111972808838, "learning_rate": 7.739539437624933e-07, "loss": 0.552, "step": 3834 }, { "epoch": 0.83, "grad_norm": 0.133047953248024, "learning_rate": 7.720902695154725e-07, "loss": 0.506, "step": 3835 }, { "epoch": 0.83, "grad_norm": 0.13741527497768402, "learning_rate": 7.702286540698417e-07, "loss": 0.4968, "step": 3836 }, { "epoch": 0.83, "grad_norm": 0.13112328946590424, "learning_rate": 7.683690983321224e-07, "loss": 0.4906, "step": 3837 }, { "epoch": 0.83, "grad_norm": 0.17950129508972168, "learning_rate": 7.665116032078346e-07, "loss": 0.5324, "step": 3838 }, { "epoch": 0.83, "grad_norm": 0.21670496463775635, "learning_rate": 7.646561696014948e-07, "loss": 0.5378, "step": 3839 }, { "epoch": 0.83, "grad_norm": 0.14193449914455414, "learning_rate": 7.628027984166153e-07, "loss": 0.5395, "step": 3840 }, { "epoch": 0.83, "grad_norm": 0.14640313386917114, "learning_rate": 7.609514905557058e-07, "loss": 0.4765, "step": 3841 }, { "epoch": 0.83, "grad_norm": 0.15662898123264313, "learning_rate": 7.591022469202675e-07, "loss": 0.5274, "step": 3842 }, { "epoch": 0.83, "grad_norm": 0.15614978969097137, "learning_rate": 7.57255068410801e-07, "loss": 0.4858, "step": 3843 }, { "epoch": 0.83, "grad_norm": 0.13435639441013336, "learning_rate": 7.554099559267964e-07, "loss": 0.4774, "step": 3844 }, { "epoch": 0.83, "grad_norm": 0.1398366242647171, "learning_rate": 7.535669103667409e-07, "loss": 0.5893, "step": 3845 }, { "epoch": 0.83, "grad_norm": 0.14986996352672577, "learning_rate": 7.517259326281157e-07, "loss": 0.5105, "step": 3846 }, { "epoch": 0.83, "grad_norm": 0.15778091549873352, "learning_rate": 7.49887023607393e-07, "loss": 0.4488, "step": 3847 }, { "epoch": 0.83, "grad_norm": 0.16323697566986084, "learning_rate": 7.480501842000404e-07, "loss": 0.5533, "step": 3848 }, { "epoch": 0.83, "grad_norm": 0.14002352952957153, "learning_rate": 7.462154153005136e-07, "loss": 0.5196, "step": 3849 }, { "epoch": 0.83, "grad_norm": 0.1188010647892952, "learning_rate": 7.443827178022628e-07, "loss": 0.4912, "step": 3850 }, { "epoch": 0.83, "grad_norm": 0.14760838449001312, "learning_rate": 7.425520925977292e-07, "loss": 0.5157, "step": 3851 }, { "epoch": 0.83, "grad_norm": 0.19391202926635742, "learning_rate": 7.407235405783453e-07, "loss": 0.4939, "step": 3852 }, { "epoch": 0.83, "grad_norm": 0.1490384191274643, "learning_rate": 7.388970626345343e-07, "loss": 0.494, "step": 3853 }, { "epoch": 0.83, "grad_norm": 0.16639220714569092, "learning_rate": 7.370726596557059e-07, "loss": 0.488, "step": 3854 }, { "epoch": 0.83, "grad_norm": 0.16223375499248505, "learning_rate": 7.352503325302635e-07, "loss": 0.4825, "step": 3855 }, { "epoch": 0.83, "grad_norm": 0.16969801485538483, "learning_rate": 7.334300821455998e-07, "loss": 0.5288, "step": 3856 }, { "epoch": 0.83, "grad_norm": 0.1843784898519516, "learning_rate": 7.316119093880919e-07, "loss": 0.4818, "step": 3857 }, { "epoch": 0.83, "grad_norm": 0.139174684882164, "learning_rate": 7.297958151431094e-07, "loss": 0.5019, "step": 3858 }, { "epoch": 0.83, "grad_norm": 0.18277384340763092, "learning_rate": 7.279818002950079e-07, "loss": 0.5432, "step": 3859 }, { "epoch": 0.83, "grad_norm": 0.1524992436170578, "learning_rate": 7.26169865727131e-07, "loss": 0.5223, "step": 3860 }, { "epoch": 0.83, "grad_norm": 0.16654187440872192, "learning_rate": 7.243600123218109e-07, "loss": 0.4757, "step": 3861 }, { "epoch": 0.83, "grad_norm": 0.24710118770599365, "learning_rate": 7.225522409603608e-07, "loss": 0.5699, "step": 3862 }, { "epoch": 0.83, "grad_norm": 0.15701556205749512, "learning_rate": 7.207465525230878e-07, "loss": 0.5001, "step": 3863 }, { "epoch": 0.83, "grad_norm": 0.17674629390239716, "learning_rate": 7.189429478892762e-07, "loss": 0.4661, "step": 3864 }, { "epoch": 0.83, "grad_norm": 0.15791045129299164, "learning_rate": 7.171414279372041e-07, "loss": 0.4895, "step": 3865 }, { "epoch": 0.83, "grad_norm": 0.10984218865633011, "learning_rate": 7.153419935441303e-07, "loss": 0.4908, "step": 3866 }, { "epoch": 0.83, "grad_norm": 0.1386931836605072, "learning_rate": 7.135446455862954e-07, "loss": 0.452, "step": 3867 }, { "epoch": 0.83, "grad_norm": 0.17676003277301788, "learning_rate": 7.117493849389306e-07, "loss": 0.5278, "step": 3868 }, { "epoch": 0.83, "grad_norm": 0.1930963546037674, "learning_rate": 7.099562124762426e-07, "loss": 0.4919, "step": 3869 }, { "epoch": 0.83, "grad_norm": 0.13967633247375488, "learning_rate": 7.081651290714287e-07, "loss": 0.5333, "step": 3870 }, { "epoch": 0.83, "grad_norm": 0.18139459192752838, "learning_rate": 7.063761355966642e-07, "loss": 0.4855, "step": 3871 }, { "epoch": 0.83, "grad_norm": 0.13663552701473236, "learning_rate": 7.045892329231086e-07, "loss": 0.5479, "step": 3872 }, { "epoch": 0.83, "grad_norm": 0.1746217906475067, "learning_rate": 7.028044219209046e-07, "loss": 0.4923, "step": 3873 }, { "epoch": 0.83, "grad_norm": 0.14870743453502655, "learning_rate": 7.010217034591721e-07, "loss": 0.5018, "step": 3874 }, { "epoch": 0.83, "grad_norm": 0.1460588276386261, "learning_rate": 6.992410784060166e-07, "loss": 0.46, "step": 3875 }, { "epoch": 0.83, "grad_norm": 0.1792103797197342, "learning_rate": 6.974625476285191e-07, "loss": 0.524, "step": 3876 }, { "epoch": 0.84, "grad_norm": 0.18173110485076904, "learning_rate": 6.956861119927472e-07, "loss": 0.4626, "step": 3877 }, { "epoch": 0.84, "grad_norm": 0.1377502679824829, "learning_rate": 6.93911772363745e-07, "loss": 0.5192, "step": 3878 }, { "epoch": 0.84, "grad_norm": 0.17006491124629974, "learning_rate": 6.921395296055333e-07, "loss": 0.5051, "step": 3879 }, { "epoch": 0.84, "grad_norm": 0.13877364993095398, "learning_rate": 6.903693845811176e-07, "loss": 0.5102, "step": 3880 }, { "epoch": 0.84, "grad_norm": 0.17840033769607544, "learning_rate": 6.886013381524753e-07, "loss": 0.4961, "step": 3881 }, { "epoch": 0.84, "grad_norm": 0.1865067183971405, "learning_rate": 6.86835391180567e-07, "loss": 0.5206, "step": 3882 }, { "epoch": 0.84, "grad_norm": 0.20453877747058868, "learning_rate": 6.850715445253297e-07, "loss": 0.5632, "step": 3883 }, { "epoch": 0.84, "grad_norm": 0.15611490607261658, "learning_rate": 6.833097990456761e-07, "loss": 0.5682, "step": 3884 }, { "epoch": 0.84, "grad_norm": 0.15531837940216064, "learning_rate": 6.815501555994986e-07, "loss": 0.5113, "step": 3885 }, { "epoch": 0.84, "grad_norm": 0.13591976463794708, "learning_rate": 6.797926150436618e-07, "loss": 0.5462, "step": 3886 }, { "epoch": 0.84, "grad_norm": 0.1687079221010208, "learning_rate": 6.780371782340101e-07, "loss": 0.5001, "step": 3887 }, { "epoch": 0.84, "grad_norm": 0.16073539853096008, "learning_rate": 6.762838460253629e-07, "loss": 0.4732, "step": 3888 }, { "epoch": 0.84, "grad_norm": 0.14205871522426605, "learning_rate": 6.745326192715107e-07, "loss": 0.5361, "step": 3889 }, { "epoch": 0.84, "grad_norm": 0.152149498462677, "learning_rate": 6.727834988252258e-07, "loss": 0.4968, "step": 3890 }, { "epoch": 0.84, "grad_norm": 0.1546226292848587, "learning_rate": 6.71036485538249e-07, "loss": 0.5439, "step": 3891 }, { "epoch": 0.84, "grad_norm": 0.1516941487789154, "learning_rate": 6.692915802612965e-07, "loss": 0.5259, "step": 3892 }, { "epoch": 0.84, "grad_norm": 0.15468570590019226, "learning_rate": 6.675487838440608e-07, "loss": 0.4867, "step": 3893 }, { "epoch": 0.84, "grad_norm": 0.15356989204883575, "learning_rate": 6.658080971352026e-07, "loss": 0.4858, "step": 3894 }, { "epoch": 0.84, "grad_norm": 0.16798479855060577, "learning_rate": 6.640695209823588e-07, "loss": 0.5147, "step": 3895 }, { "epoch": 0.84, "grad_norm": 0.15462863445281982, "learning_rate": 6.623330562321378e-07, "loss": 0.517, "step": 3896 }, { "epoch": 0.84, "grad_norm": 0.13679371774196625, "learning_rate": 6.605987037301204e-07, "loss": 0.535, "step": 3897 }, { "epoch": 0.84, "grad_norm": 0.12895052134990692, "learning_rate": 6.588664643208559e-07, "loss": 0.5082, "step": 3898 }, { "epoch": 0.84, "grad_norm": 0.16611763834953308, "learning_rate": 6.571363388478686e-07, "loss": 0.495, "step": 3899 }, { "epoch": 0.84, "grad_norm": 0.12473352998495102, "learning_rate": 6.554083281536516e-07, "loss": 0.5251, "step": 3900 }, { "epoch": 0.84, "grad_norm": 0.15122053027153015, "learning_rate": 6.53682433079667e-07, "loss": 0.4709, "step": 3901 }, { "epoch": 0.84, "grad_norm": 0.15693899989128113, "learning_rate": 6.519586544663481e-07, "loss": 0.4572, "step": 3902 }, { "epoch": 0.84, "grad_norm": 0.18545718491077423, "learning_rate": 6.502369931530977e-07, "loss": 0.5047, "step": 3903 }, { "epoch": 0.84, "grad_norm": 0.20683102309703827, "learning_rate": 6.485174499782876e-07, "loss": 0.476, "step": 3904 }, { "epoch": 0.84, "grad_norm": 0.12381558865308762, "learning_rate": 6.468000257792583e-07, "loss": 0.5589, "step": 3905 }, { "epoch": 0.84, "grad_norm": 0.22837506234645844, "learning_rate": 6.450847213923162e-07, "loss": 0.512, "step": 3906 }, { "epoch": 0.84, "grad_norm": 0.15001285076141357, "learning_rate": 6.433715376527383e-07, "loss": 0.4689, "step": 3907 }, { "epoch": 0.84, "grad_norm": 0.1989048719406128, "learning_rate": 6.416604753947675e-07, "loss": 0.4834, "step": 3908 }, { "epoch": 0.84, "grad_norm": 0.23922041058540344, "learning_rate": 6.399515354516139e-07, "loss": 0.5496, "step": 3909 }, { "epoch": 0.84, "grad_norm": 0.15358422696590424, "learning_rate": 6.382447186554553e-07, "loss": 0.5441, "step": 3910 }, { "epoch": 0.84, "grad_norm": 0.19341875612735748, "learning_rate": 6.365400258374327e-07, "loss": 0.5052, "step": 3911 }, { "epoch": 0.84, "grad_norm": 0.1362185925245285, "learning_rate": 6.348374578276567e-07, "loss": 0.5318, "step": 3912 }, { "epoch": 0.84, "grad_norm": 0.172585591673851, "learning_rate": 6.331370154551986e-07, "loss": 0.5385, "step": 3913 }, { "epoch": 0.84, "grad_norm": 0.16115382313728333, "learning_rate": 6.314386995480987e-07, "loss": 0.5018, "step": 3914 }, { "epoch": 0.84, "grad_norm": 0.14296384155750275, "learning_rate": 6.297425109333605e-07, "loss": 0.5275, "step": 3915 }, { "epoch": 0.84, "grad_norm": 0.16052164137363434, "learning_rate": 6.280484504369505e-07, "loss": 0.5066, "step": 3916 }, { "epoch": 0.84, "grad_norm": 0.1424168050289154, "learning_rate": 6.263565188838011e-07, "loss": 0.4944, "step": 3917 }, { "epoch": 0.84, "grad_norm": 0.1381656974554062, "learning_rate": 6.246667170978049e-07, "loss": 0.5041, "step": 3918 }, { "epoch": 0.84, "grad_norm": 0.1506141573190689, "learning_rate": 6.229790459018203e-07, "loss": 0.5599, "step": 3919 }, { "epoch": 0.84, "grad_norm": 0.142376109957695, "learning_rate": 6.212935061176667e-07, "loss": 0.5435, "step": 3920 }, { "epoch": 0.84, "grad_norm": 0.1417161524295807, "learning_rate": 6.196100985661258e-07, "loss": 0.5334, "step": 3921 }, { "epoch": 0.84, "grad_norm": 0.16186197102069855, "learning_rate": 6.179288240669429e-07, "loss": 0.5081, "step": 3922 }, { "epoch": 0.85, "grad_norm": 0.18143245577812195, "learning_rate": 6.162496834388204e-07, "loss": 0.5346, "step": 3923 }, { "epoch": 0.85, "grad_norm": 0.16008998453617096, "learning_rate": 6.14572677499426e-07, "loss": 0.5284, "step": 3924 }, { "epoch": 0.85, "grad_norm": 0.1312318742275238, "learning_rate": 6.12897807065384e-07, "loss": 0.5083, "step": 3925 }, { "epoch": 0.85, "grad_norm": 0.15271489322185516, "learning_rate": 6.112250729522823e-07, "loss": 0.5676, "step": 3926 }, { "epoch": 0.85, "grad_norm": 0.1383863240480423, "learning_rate": 6.095544759746663e-07, "loss": 0.472, "step": 3927 }, { "epoch": 0.85, "grad_norm": 0.14017315208911896, "learning_rate": 6.078860169460416e-07, "loss": 0.4941, "step": 3928 }, { "epoch": 0.85, "grad_norm": 0.1404963880777359, "learning_rate": 6.062196966788736e-07, "loss": 0.5128, "step": 3929 }, { "epoch": 0.85, "grad_norm": 0.1775158941745758, "learning_rate": 6.045555159845828e-07, "loss": 0.5326, "step": 3930 }, { "epoch": 0.85, "grad_norm": 0.13232095539569855, "learning_rate": 6.028934756735516e-07, "loss": 0.4828, "step": 3931 }, { "epoch": 0.85, "grad_norm": 0.14679361879825592, "learning_rate": 6.012335765551186e-07, "loss": 0.5059, "step": 3932 }, { "epoch": 0.85, "grad_norm": 0.16096676886081696, "learning_rate": 5.995758194375794e-07, "loss": 0.4844, "step": 3933 }, { "epoch": 0.85, "grad_norm": 0.17318318784236908, "learning_rate": 5.979202051281891e-07, "loss": 0.506, "step": 3934 }, { "epoch": 0.85, "grad_norm": 0.1557616889476776, "learning_rate": 5.962667344331535e-07, "loss": 0.545, "step": 3935 }, { "epoch": 0.85, "grad_norm": 0.1732773780822754, "learning_rate": 5.946154081576411e-07, "loss": 0.5198, "step": 3936 }, { "epoch": 0.85, "grad_norm": 0.14775021374225616, "learning_rate": 5.929662271057729e-07, "loss": 0.5117, "step": 3937 }, { "epoch": 0.85, "grad_norm": 0.2609883248806, "learning_rate": 5.913191920806244e-07, "loss": 0.495, "step": 3938 }, { "epoch": 0.85, "grad_norm": 0.20081481337547302, "learning_rate": 5.896743038842279e-07, "loss": 0.51, "step": 3939 }, { "epoch": 0.85, "grad_norm": 0.17543698847293854, "learning_rate": 5.880315633175704e-07, "loss": 0.5292, "step": 3940 }, { "epoch": 0.85, "grad_norm": 0.15874987840652466, "learning_rate": 5.863909711805915e-07, "loss": 0.4689, "step": 3941 }, { "epoch": 0.85, "grad_norm": 0.12618225812911987, "learning_rate": 5.847525282721883e-07, "loss": 0.4914, "step": 3942 }, { "epoch": 0.85, "grad_norm": 0.12914496660232544, "learning_rate": 5.831162353902048e-07, "loss": 0.5027, "step": 3943 }, { "epoch": 0.85, "grad_norm": 0.13037589192390442, "learning_rate": 5.814820933314446e-07, "loss": 0.5111, "step": 3944 }, { "epoch": 0.85, "grad_norm": 0.14492201805114746, "learning_rate": 5.798501028916587e-07, "loss": 0.5404, "step": 3945 }, { "epoch": 0.85, "grad_norm": 0.17597924172878265, "learning_rate": 5.78220264865555e-07, "loss": 0.4962, "step": 3946 }, { "epoch": 0.85, "grad_norm": 0.1367659866809845, "learning_rate": 5.76592580046792e-07, "loss": 0.4624, "step": 3947 }, { "epoch": 0.85, "grad_norm": 0.16057761013507843, "learning_rate": 5.749670492279757e-07, "loss": 0.4985, "step": 3948 }, { "epoch": 0.85, "grad_norm": 0.1518482267856598, "learning_rate": 5.733436732006692e-07, "loss": 0.512, "step": 3949 }, { "epoch": 0.85, "grad_norm": 0.14091022312641144, "learning_rate": 5.717224527553811e-07, "loss": 0.5218, "step": 3950 }, { "epoch": 0.85, "grad_norm": 0.13686485588550568, "learning_rate": 5.701033886815738e-07, "loss": 0.4916, "step": 3951 }, { "epoch": 0.85, "grad_norm": 0.13204288482666016, "learning_rate": 5.684864817676583e-07, "loss": 0.495, "step": 3952 }, { "epoch": 0.85, "grad_norm": 0.2482197880744934, "learning_rate": 5.668717328009954e-07, "loss": 0.5075, "step": 3953 }, { "epoch": 0.85, "grad_norm": 0.14586390554904938, "learning_rate": 5.65259142567896e-07, "loss": 0.504, "step": 3954 }, { "epoch": 0.85, "grad_norm": 0.15525588393211365, "learning_rate": 5.636487118536171e-07, "loss": 0.5429, "step": 3955 }, { "epoch": 0.85, "grad_norm": 0.1539800614118576, "learning_rate": 5.620404414423674e-07, "loss": 0.5228, "step": 3956 }, { "epoch": 0.85, "grad_norm": 0.16243867576122284, "learning_rate": 5.604343321173006e-07, "loss": 0.5141, "step": 3957 }, { "epoch": 0.85, "grad_norm": 0.17166343331336975, "learning_rate": 5.588303846605187e-07, "loss": 0.5474, "step": 3958 }, { "epoch": 0.85, "grad_norm": 0.1559562087059021, "learning_rate": 5.572285998530758e-07, "loss": 0.4877, "step": 3959 }, { "epoch": 0.85, "grad_norm": 0.12732228636741638, "learning_rate": 5.556289784749653e-07, "loss": 0.4967, "step": 3960 }, { "epoch": 0.85, "grad_norm": 0.15208043158054352, "learning_rate": 5.540315213051323e-07, "loss": 0.5032, "step": 3961 }, { "epoch": 0.85, "grad_norm": 0.14678843319416046, "learning_rate": 5.524362291214652e-07, "loss": 0.5706, "step": 3962 }, { "epoch": 0.85, "grad_norm": 0.1634489744901657, "learning_rate": 5.508431027008004e-07, "loss": 0.4835, "step": 3963 }, { "epoch": 0.85, "grad_norm": 0.13023607432842255, "learning_rate": 5.492521428189179e-07, "loss": 0.491, "step": 3964 }, { "epoch": 0.85, "grad_norm": 0.13974343240261078, "learning_rate": 5.476633502505436e-07, "loss": 0.5619, "step": 3965 }, { "epoch": 0.85, "grad_norm": 0.17008721828460693, "learning_rate": 5.460767257693489e-07, "loss": 0.4755, "step": 3966 }, { "epoch": 0.85, "grad_norm": 0.20171983540058136, "learning_rate": 5.444922701479465e-07, "loss": 0.5274, "step": 3967 }, { "epoch": 0.85, "grad_norm": 0.24001158773899078, "learning_rate": 5.429099841578966e-07, "loss": 0.5145, "step": 3968 }, { "epoch": 0.85, "grad_norm": 0.15838083624839783, "learning_rate": 5.413298685697005e-07, "loss": 0.4835, "step": 3969 }, { "epoch": 0.86, "grad_norm": 0.13740849494934082, "learning_rate": 5.397519241528026e-07, "loss": 0.4933, "step": 3970 }, { "epoch": 0.86, "grad_norm": 0.15910400450229645, "learning_rate": 5.381761516755907e-07, "loss": 0.5559, "step": 3971 }, { "epoch": 0.86, "grad_norm": 0.1526496410369873, "learning_rate": 5.366025519053958e-07, "loss": 0.5526, "step": 3972 }, { "epoch": 0.86, "grad_norm": 0.14740879833698273, "learning_rate": 5.350311256084895e-07, "loss": 0.5, "step": 3973 }, { "epoch": 0.86, "grad_norm": 0.12307767570018768, "learning_rate": 5.334618735500868e-07, "loss": 0.544, "step": 3974 }, { "epoch": 0.86, "grad_norm": 0.1413116753101349, "learning_rate": 5.3189479649434e-07, "loss": 0.5074, "step": 3975 }, { "epoch": 0.86, "grad_norm": 0.1412649154663086, "learning_rate": 5.303298952043473e-07, "loss": 0.5446, "step": 3976 }, { "epoch": 0.86, "grad_norm": 0.17468446493148804, "learning_rate": 5.287671704421437e-07, "loss": 0.5217, "step": 3977 }, { "epoch": 0.86, "grad_norm": 0.15514497458934784, "learning_rate": 5.272066229687078e-07, "loss": 0.542, "step": 3978 }, { "epoch": 0.86, "grad_norm": 0.16648997366428375, "learning_rate": 5.256482535439528e-07, "loss": 0.4755, "step": 3979 }, { "epoch": 0.86, "grad_norm": 0.14344756305217743, "learning_rate": 5.24092062926736e-07, "loss": 0.5393, "step": 3980 }, { "epoch": 0.86, "grad_norm": 0.14399173855781555, "learning_rate": 5.225380518748529e-07, "loss": 0.4944, "step": 3981 }, { "epoch": 0.86, "grad_norm": 0.16422103345394135, "learning_rate": 5.209862211450351e-07, "loss": 0.5151, "step": 3982 }, { "epoch": 0.86, "grad_norm": 0.20136775076389313, "learning_rate": 5.19436571492955e-07, "loss": 0.4696, "step": 3983 }, { "epoch": 0.86, "grad_norm": 0.16965395212173462, "learning_rate": 5.17889103673222e-07, "loss": 0.5225, "step": 3984 }, { "epoch": 0.86, "grad_norm": 0.13450326025485992, "learning_rate": 5.163438184393826e-07, "loss": 0.5, "step": 3985 }, { "epoch": 0.86, "grad_norm": 0.16451282799243927, "learning_rate": 5.148007165439234e-07, "loss": 0.4973, "step": 3986 }, { "epoch": 0.86, "grad_norm": 0.13875506818294525, "learning_rate": 5.13259798738262e-07, "loss": 0.4976, "step": 3987 }, { "epoch": 0.86, "grad_norm": 0.13691715896129608, "learning_rate": 5.117210657727589e-07, "loss": 0.5844, "step": 3988 }, { "epoch": 0.86, "grad_norm": 0.16079024970531464, "learning_rate": 5.101845183967041e-07, "loss": 0.5084, "step": 3989 }, { "epoch": 0.86, "grad_norm": 0.1576671302318573, "learning_rate": 5.086501573583302e-07, "loss": 0.5307, "step": 3990 }, { "epoch": 0.86, "grad_norm": 0.14902909100055695, "learning_rate": 5.071179834048018e-07, "loss": 0.5562, "step": 3991 }, { "epoch": 0.86, "grad_norm": 0.17067904770374298, "learning_rate": 5.055879972822164e-07, "loss": 0.5427, "step": 3992 }, { "epoch": 0.86, "grad_norm": 0.23107197880744934, "learning_rate": 5.040601997356098e-07, "loss": 0.5028, "step": 3993 }, { "epoch": 0.86, "grad_norm": 0.15796354413032532, "learning_rate": 5.025345915089497e-07, "loss": 0.5006, "step": 3994 }, { "epoch": 0.86, "grad_norm": 0.15521222352981567, "learning_rate": 5.010111733451384e-07, "loss": 0.5438, "step": 3995 }, { "epoch": 0.86, "grad_norm": 0.1400623768568039, "learning_rate": 4.994899459860125e-07, "loss": 0.5441, "step": 3996 }, { "epoch": 0.86, "grad_norm": 0.15729603171348572, "learning_rate": 4.979709101723407e-07, "loss": 0.5244, "step": 3997 }, { "epoch": 0.86, "grad_norm": 0.17316539585590363, "learning_rate": 4.964540666438261e-07, "loss": 0.5038, "step": 3998 }, { "epoch": 0.86, "grad_norm": 0.16760565340518951, "learning_rate": 4.949394161391013e-07, "loss": 0.5128, "step": 3999 }, { "epoch": 0.86, "grad_norm": 0.13866716623306274, "learning_rate": 4.934269593957336e-07, "loss": 0.5033, "step": 4000 }, { "epoch": 0.86, "grad_norm": 0.17374561727046967, "learning_rate": 4.919166971502215e-07, "loss": 0.4985, "step": 4001 }, { "epoch": 0.86, "grad_norm": 0.16311132907867432, "learning_rate": 4.90408630137994e-07, "loss": 0.5016, "step": 4002 }, { "epoch": 0.86, "grad_norm": 0.15572021901607513, "learning_rate": 4.889027590934131e-07, "loss": 0.5121, "step": 4003 }, { "epoch": 0.86, "grad_norm": 0.20856572687625885, "learning_rate": 4.873990847497684e-07, "loss": 0.5021, "step": 4004 }, { "epoch": 0.86, "grad_norm": 0.15273533761501312, "learning_rate": 4.85897607839283e-07, "loss": 0.5781, "step": 4005 }, { "epoch": 0.86, "grad_norm": 0.14332985877990723, "learning_rate": 4.843983290931064e-07, "loss": 0.4704, "step": 4006 }, { "epoch": 0.86, "grad_norm": 0.17221957445144653, "learning_rate": 4.829012492413215e-07, "loss": 0.4858, "step": 4007 }, { "epoch": 0.86, "grad_norm": 0.14145652949810028, "learning_rate": 4.814063690129378e-07, "loss": 0.5182, "step": 4008 }, { "epoch": 0.86, "grad_norm": 0.15986113250255585, "learning_rate": 4.799136891358952e-07, "loss": 0.5424, "step": 4009 }, { "epoch": 0.86, "grad_norm": 0.1356787085533142, "learning_rate": 4.784232103370617e-07, "loss": 0.494, "step": 4010 }, { "epoch": 0.86, "grad_norm": 0.19140973687171936, "learning_rate": 4.769349333422324e-07, "loss": 0.4956, "step": 4011 }, { "epoch": 0.86, "grad_norm": 0.14601151645183563, "learning_rate": 4.7544885887613136e-07, "loss": 0.5142, "step": 4012 }, { "epoch": 0.86, "grad_norm": 0.16945038735866547, "learning_rate": 4.739649876624108e-07, "loss": 0.5068, "step": 4013 }, { "epoch": 0.86, "grad_norm": 0.1639741212129593, "learning_rate": 4.724833204236462e-07, "loss": 0.4829, "step": 4014 }, { "epoch": 0.86, "grad_norm": 0.21183674037456512, "learning_rate": 4.710038578813469e-07, "loss": 0.4902, "step": 4015 }, { "epoch": 0.87, "grad_norm": 0.167417973279953, "learning_rate": 4.695266007559407e-07, "loss": 0.504, "step": 4016 }, { "epoch": 0.87, "grad_norm": 0.18118150532245636, "learning_rate": 4.6805154976678755e-07, "loss": 0.5233, "step": 4017 }, { "epoch": 0.87, "grad_norm": 0.16984857618808746, "learning_rate": 4.6657870563217076e-07, "loss": 0.5051, "step": 4018 }, { "epoch": 0.87, "grad_norm": 0.17123106122016907, "learning_rate": 4.651080690692972e-07, "loss": 0.5429, "step": 4019 }, { "epoch": 0.87, "grad_norm": 0.15946775674819946, "learning_rate": 4.6363964079430166e-07, "loss": 0.5523, "step": 4020 }, { "epoch": 0.87, "grad_norm": 0.13110215961933136, "learning_rate": 4.6217342152224233e-07, "loss": 0.5525, "step": 4021 }, { "epoch": 0.87, "grad_norm": 0.1535872519016266, "learning_rate": 4.6070941196710186e-07, "loss": 0.5344, "step": 4022 }, { "epoch": 0.87, "grad_norm": 0.15114997327327728, "learning_rate": 4.5924761284178834e-07, "loss": 0.4776, "step": 4023 }, { "epoch": 0.87, "grad_norm": 0.15840767323970795, "learning_rate": 4.5778802485812956e-07, "loss": 0.506, "step": 4024 }, { "epoch": 0.87, "grad_norm": 0.1840353012084961, "learning_rate": 4.5633064872688093e-07, "loss": 0.5216, "step": 4025 }, { "epoch": 0.87, "grad_norm": 0.18357300758361816, "learning_rate": 4.548754851577175e-07, "loss": 0.5406, "step": 4026 }, { "epoch": 0.87, "grad_norm": 0.16347016394138336, "learning_rate": 4.5342253485923803e-07, "loss": 0.5085, "step": 4027 }, { "epoch": 0.87, "grad_norm": 0.2056354433298111, "learning_rate": 4.5197179853896654e-07, "loss": 0.518, "step": 4028 }, { "epoch": 0.87, "grad_norm": 0.1330898255109787, "learning_rate": 4.505232769033435e-07, "loss": 0.5138, "step": 4029 }, { "epoch": 0.87, "grad_norm": 0.16567635536193848, "learning_rate": 4.4907697065773523e-07, "loss": 0.5258, "step": 4030 }, { "epoch": 0.87, "grad_norm": 0.1845930814743042, "learning_rate": 4.476328805064262e-07, "loss": 0.5277, "step": 4031 }, { "epoch": 0.87, "grad_norm": 0.1463019847869873, "learning_rate": 4.4619100715262374e-07, "loss": 0.4919, "step": 4032 }, { "epoch": 0.87, "grad_norm": 0.12273728102445602, "learning_rate": 4.447513512984558e-07, "loss": 0.4665, "step": 4033 }, { "epoch": 0.87, "grad_norm": 0.1603401154279709, "learning_rate": 4.4331391364496934e-07, "loss": 0.517, "step": 4034 }, { "epoch": 0.87, "grad_norm": 0.15330933034420013, "learning_rate": 4.4187869489213275e-07, "loss": 0.5976, "step": 4035 }, { "epoch": 0.87, "grad_norm": 0.21303099393844604, "learning_rate": 4.404456957388309e-07, "loss": 0.5608, "step": 4036 }, { "epoch": 0.87, "grad_norm": 0.15875820815563202, "learning_rate": 4.3901491688287113e-07, "loss": 0.5394, "step": 4037 }, { "epoch": 0.87, "grad_norm": 0.18736515939235687, "learning_rate": 4.375863590209778e-07, "loss": 0.4804, "step": 4038 }, { "epoch": 0.87, "grad_norm": 0.21394529938697815, "learning_rate": 4.3616002284879333e-07, "loss": 0.5041, "step": 4039 }, { "epoch": 0.87, "grad_norm": 0.18619798123836517, "learning_rate": 4.3473590906088046e-07, "loss": 0.5027, "step": 4040 }, { "epoch": 0.87, "grad_norm": 0.16709107160568237, "learning_rate": 4.3331401835071783e-07, "loss": 0.4971, "step": 4041 }, { "epoch": 0.87, "grad_norm": 0.1601034700870514, "learning_rate": 4.3189435141070324e-07, "loss": 0.5241, "step": 4042 }, { "epoch": 0.87, "grad_norm": 0.15669238567352295, "learning_rate": 4.304769089321481e-07, "loss": 0.5291, "step": 4043 }, { "epoch": 0.87, "grad_norm": 0.14634265005588531, "learning_rate": 4.2906169160528424e-07, "loss": 0.5253, "step": 4044 }, { "epoch": 0.87, "grad_norm": 0.1932663768529892, "learning_rate": 4.276487001192592e-07, "loss": 0.5096, "step": 4045 }, { "epoch": 0.87, "grad_norm": 0.12650729715824127, "learning_rate": 4.262379351621354e-07, "loss": 0.5037, "step": 4046 }, { "epoch": 0.87, "grad_norm": 0.1783479005098343, "learning_rate": 4.248293974208928e-07, "loss": 0.5197, "step": 4047 }, { "epoch": 0.87, "grad_norm": 0.17106756567955017, "learning_rate": 4.2342308758142437e-07, "loss": 0.4908, "step": 4048 }, { "epoch": 0.87, "grad_norm": 0.1578291952610016, "learning_rate": 4.220190063285401e-07, "loss": 0.5028, "step": 4049 }, { "epoch": 0.87, "grad_norm": 0.17856548726558685, "learning_rate": 4.2061715434596475e-07, "loss": 0.4998, "step": 4050 }, { "epoch": 0.87, "grad_norm": 0.19411097466945648, "learning_rate": 4.192175323163361e-07, "loss": 0.5383, "step": 4051 }, { "epoch": 0.87, "grad_norm": 0.1397572010755539, "learning_rate": 4.1782014092120735e-07, "loss": 0.4779, "step": 4052 }, { "epoch": 0.87, "grad_norm": 0.12479076534509659, "learning_rate": 4.164249808410459e-07, "loss": 0.498, "step": 4053 }, { "epoch": 0.87, "grad_norm": 0.13633649051189423, "learning_rate": 4.150320527552304e-07, "loss": 0.5257, "step": 4054 }, { "epoch": 0.87, "grad_norm": 0.16726909577846527, "learning_rate": 4.1364135734205556e-07, "loss": 0.4955, "step": 4055 }, { "epoch": 0.87, "grad_norm": 0.1693604290485382, "learning_rate": 4.122528952787258e-07, "loss": 0.5903, "step": 4056 }, { "epoch": 0.87, "grad_norm": 0.13616541028022766, "learning_rate": 4.1086666724136024e-07, "loss": 0.4837, "step": 4057 }, { "epoch": 0.87, "grad_norm": 0.14842045307159424, "learning_rate": 4.0948267390498953e-07, "loss": 0.4777, "step": 4058 }, { "epoch": 0.87, "grad_norm": 0.15691286325454712, "learning_rate": 4.0810091594355674e-07, "loss": 0.4684, "step": 4059 }, { "epoch": 0.87, "grad_norm": 0.20302332937717438, "learning_rate": 4.067213940299136e-07, "loss": 0.5461, "step": 4060 }, { "epoch": 0.87, "grad_norm": 0.1701618880033493, "learning_rate": 4.0534410883582673e-07, "loss": 0.5253, "step": 4061 }, { "epoch": 0.88, "grad_norm": 0.16087806224822998, "learning_rate": 4.0396906103197244e-07, "loss": 0.5728, "step": 4062 }, { "epoch": 0.88, "grad_norm": 0.1731209009885788, "learning_rate": 4.02596251287935e-07, "loss": 0.4793, "step": 4063 }, { "epoch": 0.88, "grad_norm": 0.15619364380836487, "learning_rate": 4.01225680272212e-07, "loss": 0.5675, "step": 4064 }, { "epoch": 0.88, "grad_norm": 0.14686357975006104, "learning_rate": 3.998573486522095e-07, "loss": 0.5241, "step": 4065 }, { "epoch": 0.88, "grad_norm": 0.14786110818386078, "learning_rate": 3.984912570942434e-07, "loss": 0.5098, "step": 4066 }, { "epoch": 0.88, "grad_norm": 0.1765190064907074, "learning_rate": 3.9712740626354e-07, "loss": 0.6106, "step": 4067 }, { "epoch": 0.88, "grad_norm": 0.14524182677268982, "learning_rate": 3.9576579682423066e-07, "loss": 0.5239, "step": 4068 }, { "epoch": 0.88, "grad_norm": 0.18796804547309875, "learning_rate": 3.9440642943936013e-07, "loss": 0.4934, "step": 4069 }, { "epoch": 0.88, "grad_norm": 0.13147200644016266, "learning_rate": 3.930493047708761e-07, "loss": 0.5417, "step": 4070 }, { "epoch": 0.88, "grad_norm": 0.137882798910141, "learning_rate": 3.916944234796399e-07, "loss": 0.4724, "step": 4071 }, { "epoch": 0.88, "grad_norm": 0.1864192932844162, "learning_rate": 3.903417862254172e-07, "loss": 0.4951, "step": 4072 }, { "epoch": 0.88, "grad_norm": 0.14649604260921478, "learning_rate": 3.8899139366687985e-07, "loss": 0.5297, "step": 4073 }, { "epoch": 0.88, "grad_norm": 0.19774487614631653, "learning_rate": 3.876432464616103e-07, "loss": 0.5174, "step": 4074 }, { "epoch": 0.88, "grad_norm": 0.12834720313549042, "learning_rate": 3.862973452660929e-07, "loss": 0.523, "step": 4075 }, { "epoch": 0.88, "grad_norm": 0.1609206348657608, "learning_rate": 3.8495369073572266e-07, "loss": 0.5635, "step": 4076 }, { "epoch": 0.88, "grad_norm": 0.1672678291797638, "learning_rate": 3.8361228352479795e-07, "loss": 0.478, "step": 4077 }, { "epoch": 0.88, "grad_norm": 0.13725100457668304, "learning_rate": 3.822731242865235e-07, "loss": 0.5276, "step": 4078 }, { "epoch": 0.88, "grad_norm": 0.1619109809398651, "learning_rate": 3.8093621367301103e-07, "loss": 0.5497, "step": 4079 }, { "epoch": 0.88, "grad_norm": 0.18122999370098114, "learning_rate": 3.7960155233527364e-07, "loss": 0.5882, "step": 4080 }, { "epoch": 0.88, "grad_norm": 0.15119299292564392, "learning_rate": 3.782691409232325e-07, "loss": 0.4459, "step": 4081 }, { "epoch": 0.88, "grad_norm": 0.164114847779274, "learning_rate": 3.7693898008571205e-07, "loss": 0.525, "step": 4082 }, { "epoch": 0.88, "grad_norm": 0.1447734236717224, "learning_rate": 3.75611070470438e-07, "loss": 0.542, "step": 4083 }, { "epoch": 0.88, "grad_norm": 0.16693483293056488, "learning_rate": 3.742854127240464e-07, "loss": 0.5254, "step": 4084 }, { "epoch": 0.88, "grad_norm": 0.1440124213695526, "learning_rate": 3.7296200749207034e-07, "loss": 0.4841, "step": 4085 }, { "epoch": 0.88, "grad_norm": 0.14543622732162476, "learning_rate": 3.7164085541894937e-07, "loss": 0.5613, "step": 4086 }, { "epoch": 0.88, "grad_norm": 0.16933149099349976, "learning_rate": 3.703219571480249e-07, "loss": 0.5304, "step": 4087 }, { "epoch": 0.88, "grad_norm": 0.14789710938930511, "learning_rate": 3.690053133215399e-07, "loss": 0.5256, "step": 4088 }, { "epoch": 0.88, "grad_norm": 0.16581717133522034, "learning_rate": 3.676909245806415e-07, "loss": 0.5014, "step": 4089 }, { "epoch": 0.88, "grad_norm": 0.13003475964069366, "learning_rate": 3.663787915653777e-07, "loss": 0.5366, "step": 4090 }, { "epoch": 0.88, "grad_norm": 0.14335590600967407, "learning_rate": 3.650689149146991e-07, "loss": 0.5642, "step": 4091 }, { "epoch": 0.88, "grad_norm": 0.20606780052185059, "learning_rate": 3.6376129526645376e-07, "loss": 0.5484, "step": 4092 }, { "epoch": 0.88, "grad_norm": 0.1340981125831604, "learning_rate": 3.624559332573957e-07, "loss": 0.4645, "step": 4093 }, { "epoch": 0.88, "grad_norm": 0.14133349061012268, "learning_rate": 3.6115282952317807e-07, "loss": 0.4575, "step": 4094 }, { "epoch": 0.88, "grad_norm": 0.16861362755298615, "learning_rate": 3.598519846983511e-07, "loss": 0.4783, "step": 4095 }, { "epoch": 0.88, "grad_norm": 0.126511812210083, "learning_rate": 3.5855339941636867e-07, "loss": 0.4925, "step": 4096 }, { "epoch": 0.88, "grad_norm": 0.17741841077804565, "learning_rate": 3.572570743095838e-07, "loss": 0.4844, "step": 4097 }, { "epoch": 0.88, "grad_norm": 0.13794460892677307, "learning_rate": 3.5596301000924815e-07, "loss": 0.5503, "step": 4098 }, { "epoch": 0.88, "grad_norm": 0.14488175511360168, "learning_rate": 3.546712071455127e-07, "loss": 0.4982, "step": 4099 }, { "epoch": 0.88, "grad_norm": 0.2083345204591751, "learning_rate": 3.533816663474271e-07, "loss": 0.4913, "step": 4100 }, { "epoch": 0.88, "grad_norm": 0.15425090491771698, "learning_rate": 3.5209438824293896e-07, "loss": 0.5406, "step": 4101 }, { "epoch": 0.88, "grad_norm": 0.19911810755729675, "learning_rate": 3.508093734588952e-07, "loss": 0.4975, "step": 4102 }, { "epoch": 0.88, "grad_norm": 0.14716565608978271, "learning_rate": 3.4952662262104033e-07, "loss": 0.4834, "step": 4103 }, { "epoch": 0.88, "grad_norm": 0.18182729184627533, "learning_rate": 3.482461363540163e-07, "loss": 0.5785, "step": 4104 }, { "epoch": 0.88, "grad_norm": 0.16187834739685059, "learning_rate": 3.46967915281361e-07, "loss": 0.5109, "step": 4105 }, { "epoch": 0.88, "grad_norm": 0.13375143706798553, "learning_rate": 3.456919600255126e-07, "loss": 0.4686, "step": 4106 }, { "epoch": 0.88, "grad_norm": 0.16278614103794098, "learning_rate": 3.4441827120780147e-07, "loss": 0.5005, "step": 4107 }, { "epoch": 0.88, "grad_norm": 0.2082134485244751, "learning_rate": 3.4314684944845747e-07, "loss": 0.4992, "step": 4108 }, { "epoch": 0.89, "grad_norm": 0.134648859500885, "learning_rate": 3.4187769536660533e-07, "loss": 0.4795, "step": 4109 }, { "epoch": 0.89, "grad_norm": 0.1557423323392868, "learning_rate": 3.406108095802668e-07, "loss": 0.5266, "step": 4110 }, { "epoch": 0.89, "grad_norm": 0.15993043780326843, "learning_rate": 3.393461927063585e-07, "loss": 0.4967, "step": 4111 }, { "epoch": 0.89, "grad_norm": 0.12335589528083801, "learning_rate": 3.3808384536068997e-07, "loss": 0.5044, "step": 4112 }, { "epoch": 0.89, "grad_norm": 0.1581617146730423, "learning_rate": 3.3682376815796834e-07, "loss": 0.5087, "step": 4113 }, { "epoch": 0.89, "grad_norm": 0.1484224498271942, "learning_rate": 3.3556596171179455e-07, "loss": 0.4655, "step": 4114 }, { "epoch": 0.89, "grad_norm": 0.16247932612895966, "learning_rate": 3.343104266346636e-07, "loss": 0.4964, "step": 4115 }, { "epoch": 0.89, "grad_norm": 0.18105369806289673, "learning_rate": 3.3305716353796537e-07, "loss": 0.5231, "step": 4116 }, { "epoch": 0.89, "grad_norm": 0.16214075684547424, "learning_rate": 3.3180617303198046e-07, "loss": 0.546, "step": 4117 }, { "epoch": 0.89, "grad_norm": 0.16236190497875214, "learning_rate": 3.305574557258867e-07, "loss": 0.4751, "step": 4118 }, { "epoch": 0.89, "grad_norm": 0.1777781993150711, "learning_rate": 3.2931101222775154e-07, "loss": 0.5563, "step": 4119 }, { "epoch": 0.89, "grad_norm": 0.17919768393039703, "learning_rate": 3.2806684314453774e-07, "loss": 0.4581, "step": 4120 }, { "epoch": 0.89, "grad_norm": 0.16179294884204865, "learning_rate": 3.2682494908209906e-07, "loss": 0.5197, "step": 4121 }, { "epoch": 0.89, "grad_norm": 0.14991389214992523, "learning_rate": 3.255853306451823e-07, "loss": 0.5574, "step": 4122 }, { "epoch": 0.89, "grad_norm": 0.22062784433364868, "learning_rate": 3.243479884374262e-07, "loss": 0.5563, "step": 4123 }, { "epoch": 0.89, "grad_norm": 0.15176159143447876, "learning_rate": 3.2311292306135944e-07, "loss": 0.4785, "step": 4124 }, { "epoch": 0.89, "grad_norm": 0.1553657054901123, "learning_rate": 3.2188013511840365e-07, "loss": 0.5524, "step": 4125 }, { "epoch": 0.89, "grad_norm": 0.2217596471309662, "learning_rate": 3.2064962520887146e-07, "loss": 0.4976, "step": 4126 }, { "epoch": 0.89, "grad_norm": 0.1558333784341812, "learning_rate": 3.194213939319646e-07, "loss": 0.5259, "step": 4127 }, { "epoch": 0.89, "grad_norm": 0.1418876200914383, "learning_rate": 3.18195441885778e-07, "loss": 0.5342, "step": 4128 }, { "epoch": 0.89, "grad_norm": 0.12174227088689804, "learning_rate": 3.169717696672936e-07, "loss": 0.5027, "step": 4129 }, { "epoch": 0.89, "grad_norm": 0.12317800521850586, "learning_rate": 3.157503778723847e-07, "loss": 0.5245, "step": 4130 }, { "epoch": 0.89, "grad_norm": 0.14522142708301544, "learning_rate": 3.145312670958156e-07, "loss": 0.4962, "step": 4131 }, { "epoch": 0.89, "grad_norm": 0.1550437957048416, "learning_rate": 3.1331443793123585e-07, "loss": 0.4934, "step": 4132 }, { "epoch": 0.89, "grad_norm": 0.15259462594985962, "learning_rate": 3.120998909711881e-07, "loss": 0.4907, "step": 4133 }, { "epoch": 0.89, "grad_norm": 0.12828658521175385, "learning_rate": 3.108876268071009e-07, "loss": 0.4977, "step": 4134 }, { "epoch": 0.89, "grad_norm": 0.18181227147579193, "learning_rate": 3.096776460292927e-07, "loss": 0.4883, "step": 4135 }, { "epoch": 0.89, "grad_norm": 0.16961906850337982, "learning_rate": 3.0846994922697104e-07, "loss": 0.5045, "step": 4136 }, { "epoch": 0.89, "grad_norm": 0.14249150454998016, "learning_rate": 3.072645369882271e-07, "loss": 0.5097, "step": 4137 }, { "epoch": 0.89, "grad_norm": 0.1759713590145111, "learning_rate": 3.060614099000442e-07, "loss": 0.4703, "step": 4138 }, { "epoch": 0.89, "grad_norm": 0.1403285712003708, "learning_rate": 3.048605685482892e-07, "loss": 0.4779, "step": 4139 }, { "epoch": 0.89, "grad_norm": 0.15652833878993988, "learning_rate": 3.0366201351771983e-07, "loss": 0.4843, "step": 4140 }, { "epoch": 0.89, "grad_norm": 0.1837598830461502, "learning_rate": 3.024657453919777e-07, "loss": 0.5272, "step": 4141 }, { "epoch": 0.89, "grad_norm": 0.1516779363155365, "learning_rate": 3.0127176475359065e-07, "loss": 0.5174, "step": 4142 }, { "epoch": 0.89, "grad_norm": 0.15141400694847107, "learning_rate": 3.0008007218397415e-07, "loss": 0.5148, "step": 4143 }, { "epoch": 0.89, "grad_norm": 0.1702878326177597, "learning_rate": 2.988906682634285e-07, "loss": 0.5279, "step": 4144 }, { "epoch": 0.89, "grad_norm": 0.17491145431995392, "learning_rate": 2.977035535711392e-07, "loss": 0.5319, "step": 4145 }, { "epoch": 0.89, "grad_norm": 0.16550621390342712, "learning_rate": 2.965187286851784e-07, "loss": 0.5071, "step": 4146 }, { "epoch": 0.89, "grad_norm": 0.19162628054618835, "learning_rate": 2.953361941825017e-07, "loss": 0.5123, "step": 4147 }, { "epoch": 0.89, "grad_norm": 0.15393443405628204, "learning_rate": 2.941559506389513e-07, "loss": 0.5035, "step": 4148 }, { "epoch": 0.89, "grad_norm": 0.20023614168167114, "learning_rate": 2.9297799862925136e-07, "loss": 0.5585, "step": 4149 }, { "epoch": 0.89, "grad_norm": 0.13044221699237823, "learning_rate": 2.9180233872701247e-07, "loss": 0.4811, "step": 4150 }, { "epoch": 0.89, "grad_norm": 0.13851873576641083, "learning_rate": 2.906289715047267e-07, "loss": 0.5445, "step": 4151 }, { "epoch": 0.89, "grad_norm": 0.15511353313922882, "learning_rate": 2.894578975337703e-07, "loss": 0.5014, "step": 4152 }, { "epoch": 0.89, "grad_norm": 0.12846341729164124, "learning_rate": 2.8828911738440713e-07, "loss": 0.4618, "step": 4153 }, { "epoch": 0.89, "grad_norm": 0.19906170666217804, "learning_rate": 2.8712263162577636e-07, "loss": 0.5116, "step": 4154 }, { "epoch": 0.9, "grad_norm": 0.16838042438030243, "learning_rate": 2.8595844082590695e-07, "loss": 0.4673, "step": 4155 }, { "epoch": 0.9, "grad_norm": 0.14445045590400696, "learning_rate": 2.8479654555170546e-07, "loss": 0.5171, "step": 4156 }, { "epoch": 0.9, "grad_norm": 0.14654265344142914, "learning_rate": 2.836369463689631e-07, "loss": 0.5053, "step": 4157 }, { "epoch": 0.9, "grad_norm": 0.14703992009162903, "learning_rate": 2.8247964384235214e-07, "loss": 0.5053, "step": 4158 }, { "epoch": 0.9, "grad_norm": 0.15812741219997406, "learning_rate": 2.813246385354268e-07, "loss": 0.5083, "step": 4159 }, { "epoch": 0.9, "grad_norm": 0.18599557876586914, "learning_rate": 2.8017193101062377e-07, "loss": 0.5569, "step": 4160 }, { "epoch": 0.9, "grad_norm": 0.16005754470825195, "learning_rate": 2.7902152182925746e-07, "loss": 0.5608, "step": 4161 }, { "epoch": 0.9, "grad_norm": 0.1266726851463318, "learning_rate": 2.778734115515269e-07, "loss": 0.536, "step": 4162 }, { "epoch": 0.9, "grad_norm": 0.15541070699691772, "learning_rate": 2.7672760073650996e-07, "loss": 0.5001, "step": 4163 }, { "epoch": 0.9, "grad_norm": 0.12981733679771423, "learning_rate": 2.755840899421636e-07, "loss": 0.5071, "step": 4164 }, { "epoch": 0.9, "grad_norm": 0.14456294476985931, "learning_rate": 2.744428797253268e-07, "loss": 0.4775, "step": 4165 }, { "epoch": 0.9, "grad_norm": 0.16282424330711365, "learning_rate": 2.7330397064171787e-07, "loss": 0.5036, "step": 4166 }, { "epoch": 0.9, "grad_norm": 0.13770410418510437, "learning_rate": 2.7216736324593316e-07, "loss": 0.4779, "step": 4167 }, { "epoch": 0.9, "grad_norm": 0.1373995691537857, "learning_rate": 2.7103305809145106e-07, "loss": 0.5212, "step": 4168 }, { "epoch": 0.9, "grad_norm": 0.13789519667625427, "learning_rate": 2.699010557306253e-07, "loss": 0.5334, "step": 4169 }, { "epoch": 0.9, "grad_norm": 0.15480732917785645, "learning_rate": 2.687713567146899e-07, "loss": 0.5061, "step": 4170 }, { "epoch": 0.9, "grad_norm": 0.1765686720609665, "learning_rate": 2.676439615937582e-07, "loss": 0.4803, "step": 4171 }, { "epoch": 0.9, "grad_norm": 0.13667207956314087, "learning_rate": 2.665188709168215e-07, "loss": 0.529, "step": 4172 }, { "epoch": 0.9, "grad_norm": 0.1658671498298645, "learning_rate": 2.6539608523174665e-07, "loss": 0.5231, "step": 4173 }, { "epoch": 0.9, "grad_norm": 0.1876867711544037, "learning_rate": 2.642756050852796e-07, "loss": 0.5373, "step": 4174 }, { "epoch": 0.9, "grad_norm": 0.1558872014284134, "learning_rate": 2.631574310230456e-07, "loss": 0.5224, "step": 4175 }, { "epoch": 0.9, "grad_norm": 0.15014420449733734, "learning_rate": 2.620415635895429e-07, "loss": 0.5415, "step": 4176 }, { "epoch": 0.9, "grad_norm": 0.14560414850711823, "learning_rate": 2.6092800332814914e-07, "loss": 0.4483, "step": 4177 }, { "epoch": 0.9, "grad_norm": 0.19036678969860077, "learning_rate": 2.5981675078111835e-07, "loss": 0.545, "step": 4178 }, { "epoch": 0.9, "grad_norm": 0.3053010404109955, "learning_rate": 2.587078064895804e-07, "loss": 0.5174, "step": 4179 }, { "epoch": 0.9, "grad_norm": 0.14357438683509827, "learning_rate": 2.5760117099354163e-07, "loss": 0.484, "step": 4180 }, { "epoch": 0.9, "grad_norm": 0.1921653300523758, "learning_rate": 2.5649684483188274e-07, "loss": 0.5016, "step": 4181 }, { "epoch": 0.9, "grad_norm": 0.15328127145767212, "learning_rate": 2.5539482854236076e-07, "loss": 0.4675, "step": 4182 }, { "epoch": 0.9, "grad_norm": 0.16819103062152863, "learning_rate": 2.5429512266160805e-07, "loss": 0.4982, "step": 4183 }, { "epoch": 0.9, "grad_norm": 0.16778436303138733, "learning_rate": 2.531977277251324e-07, "loss": 0.5475, "step": 4184 }, { "epoch": 0.9, "grad_norm": 0.1751207560300827, "learning_rate": 2.521026442673158e-07, "loss": 0.5167, "step": 4185 }, { "epoch": 0.9, "grad_norm": 0.16310207545757294, "learning_rate": 2.510098728214133e-07, "loss": 0.5168, "step": 4186 }, { "epoch": 0.9, "grad_norm": 0.17500171065330505, "learning_rate": 2.4991941391955654e-07, "loss": 0.5662, "step": 4187 }, { "epoch": 0.9, "grad_norm": 0.15705259144306183, "learning_rate": 2.488312680927485e-07, "loss": 0.5006, "step": 4188 }, { "epoch": 0.9, "grad_norm": 0.18603339791297913, "learning_rate": 2.4774543587086807e-07, "loss": 0.5122, "step": 4189 }, { "epoch": 0.9, "grad_norm": 0.18599247932434082, "learning_rate": 2.466619177826668e-07, "loss": 0.5189, "step": 4190 }, { "epoch": 0.9, "grad_norm": 0.1684497594833374, "learning_rate": 2.4558071435576813e-07, "loss": 0.5395, "step": 4191 }, { "epoch": 0.9, "grad_norm": 0.15068615972995758, "learning_rate": 2.4450182611667096e-07, "loss": 0.4888, "step": 4192 }, { "epoch": 0.9, "grad_norm": 0.1370285153388977, "learning_rate": 2.4342525359074385e-07, "loss": 0.5118, "step": 4193 }, { "epoch": 0.9, "grad_norm": 0.16400447487831116, "learning_rate": 2.423509973022292e-07, "loss": 0.4971, "step": 4194 }, { "epoch": 0.9, "grad_norm": 0.22781065106391907, "learning_rate": 2.4127905777424134e-07, "loss": 0.5495, "step": 4195 }, { "epoch": 0.9, "grad_norm": 0.1345801055431366, "learning_rate": 2.4020943552876706e-07, "loss": 0.509, "step": 4196 }, { "epoch": 0.9, "grad_norm": 0.15502989292144775, "learning_rate": 2.391421310866648e-07, "loss": 0.5564, "step": 4197 }, { "epoch": 0.9, "grad_norm": 0.13414627313613892, "learning_rate": 2.3807714496766165e-07, "loss": 0.5253, "step": 4198 }, { "epoch": 0.9, "grad_norm": 0.1705794483423233, "learning_rate": 2.370144776903599e-07, "loss": 0.4849, "step": 4199 }, { "epoch": 0.9, "grad_norm": 0.16182225942611694, "learning_rate": 2.3595412977222897e-07, "loss": 0.5487, "step": 4200 }, { "epoch": 0.9, "grad_norm": 0.18094182014465332, "learning_rate": 2.3489610172961143e-07, "loss": 0.4966, "step": 4201 }, { "epoch": 0.91, "grad_norm": 0.134856179356575, "learning_rate": 2.3384039407771896e-07, "loss": 0.5284, "step": 4202 }, { "epoch": 0.91, "grad_norm": 0.12742473185062408, "learning_rate": 2.327870073306332e-07, "loss": 0.5371, "step": 4203 }, { "epoch": 0.91, "grad_norm": 0.16482314467430115, "learning_rate": 2.317359420013071e-07, "loss": 0.5241, "step": 4204 }, { "epoch": 0.91, "grad_norm": 0.17178313434123993, "learning_rate": 2.306871986015613e-07, "loss": 0.5146, "step": 4205 }, { "epoch": 0.91, "grad_norm": 0.16056092083454132, "learning_rate": 2.2964077764208615e-07, "loss": 0.5389, "step": 4206 }, { "epoch": 0.91, "grad_norm": 0.18820203840732574, "learning_rate": 2.2859667963244236e-07, "loss": 0.4964, "step": 4207 }, { "epoch": 0.91, "grad_norm": 0.18173396587371826, "learning_rate": 2.2755490508105716e-07, "loss": 0.5323, "step": 4208 }, { "epoch": 0.91, "grad_norm": 0.15220309793949127, "learning_rate": 2.2651545449522972e-07, "loss": 0.477, "step": 4209 }, { "epoch": 0.91, "grad_norm": 0.17373429238796234, "learning_rate": 2.254783283811246e-07, "loss": 0.5649, "step": 4210 }, { "epoch": 0.91, "grad_norm": 0.1504889577627182, "learning_rate": 2.2444352724377505e-07, "loss": 0.5183, "step": 4211 }, { "epoch": 0.91, "grad_norm": 0.1400587409734726, "learning_rate": 2.2341105158708408e-07, "loss": 0.5101, "step": 4212 }, { "epoch": 0.91, "grad_norm": 0.18521972000598907, "learning_rate": 2.22380901913819e-07, "loss": 0.4801, "step": 4213 }, { "epoch": 0.91, "grad_norm": 0.1672522872686386, "learning_rate": 2.2135307872561628e-07, "loss": 0.4725, "step": 4214 }, { "epoch": 0.91, "grad_norm": 0.15692496299743652, "learning_rate": 2.2032758252298115e-07, "loss": 0.5603, "step": 4215 }, { "epoch": 0.91, "grad_norm": 0.18013040721416473, "learning_rate": 2.1930441380528243e-07, "loss": 0.5292, "step": 4216 }, { "epoch": 0.91, "grad_norm": 0.14542804658412933, "learning_rate": 2.182835730707583e-07, "loss": 0.5056, "step": 4217 }, { "epoch": 0.91, "grad_norm": 0.16739703714847565, "learning_rate": 2.172650608165111e-07, "loss": 0.4897, "step": 4218 }, { "epoch": 0.91, "grad_norm": 0.16817772388458252, "learning_rate": 2.1624887753851186e-07, "loss": 0.515, "step": 4219 }, { "epoch": 0.91, "grad_norm": 0.1342426985502243, "learning_rate": 2.1523502373159367e-07, "loss": 0.4996, "step": 4220 }, { "epoch": 0.91, "grad_norm": 0.3892795741558075, "learning_rate": 2.142234998894588e-07, "loss": 0.4838, "step": 4221 }, { "epoch": 0.91, "grad_norm": 0.16268621385097504, "learning_rate": 2.1321430650467546e-07, "loss": 0.5302, "step": 4222 }, { "epoch": 0.91, "grad_norm": 0.12644894421100616, "learning_rate": 2.1220744406867278e-07, "loss": 0.5567, "step": 4223 }, { "epoch": 0.91, "grad_norm": 0.1844691038131714, "learning_rate": 2.112029130717491e-07, "loss": 0.6264, "step": 4224 }, { "epoch": 0.91, "grad_norm": 0.18971168994903564, "learning_rate": 2.1020071400306429e-07, "loss": 0.5327, "step": 4225 }, { "epoch": 0.91, "grad_norm": 0.11999719589948654, "learning_rate": 2.092008473506446e-07, "loss": 0.5153, "step": 4226 }, { "epoch": 0.91, "grad_norm": 0.12612876296043396, "learning_rate": 2.0820331360138058e-07, "loss": 0.4838, "step": 4227 }, { "epoch": 0.91, "grad_norm": 0.15707595646381378, "learning_rate": 2.072081132410253e-07, "loss": 0.5158, "step": 4228 }, { "epoch": 0.91, "grad_norm": 0.14865291118621826, "learning_rate": 2.062152467541978e-07, "loss": 0.512, "step": 4229 }, { "epoch": 0.91, "grad_norm": 0.17846401035785675, "learning_rate": 2.0522471462437798e-07, "loss": 0.4865, "step": 4230 }, { "epoch": 0.91, "grad_norm": 0.15374383330345154, "learning_rate": 2.042365173339117e-07, "loss": 0.5007, "step": 4231 }, { "epoch": 0.91, "grad_norm": 0.14291195571422577, "learning_rate": 2.0325065536400456e-07, "loss": 0.5102, "step": 4232 }, { "epoch": 0.91, "grad_norm": 0.12746839225292206, "learning_rate": 2.02267129194727e-07, "loss": 0.51, "step": 4233 }, { "epoch": 0.91, "grad_norm": 0.16647961735725403, "learning_rate": 2.0128593930501427e-07, "loss": 0.5033, "step": 4234 }, { "epoch": 0.91, "grad_norm": 0.16872666776180267, "learning_rate": 2.0030708617265971e-07, "loss": 0.4992, "step": 4235 }, { "epoch": 0.91, "grad_norm": 0.13757802546024323, "learning_rate": 1.9933057027432147e-07, "loss": 0.5519, "step": 4236 }, { "epoch": 0.91, "grad_norm": 0.15467625856399536, "learning_rate": 1.9835639208551803e-07, "loss": 0.5208, "step": 4237 }, { "epoch": 0.91, "grad_norm": 0.14848686754703522, "learning_rate": 1.9738455208063055e-07, "loss": 0.5348, "step": 4238 }, { "epoch": 0.91, "grad_norm": 0.18028004467487335, "learning_rate": 1.9641505073290103e-07, "loss": 0.5313, "step": 4239 }, { "epoch": 0.91, "grad_norm": 0.19385437667369843, "learning_rate": 1.9544788851443342e-07, "loss": 0.5109, "step": 4240 }, { "epoch": 0.91, "grad_norm": 0.1776755303144455, "learning_rate": 1.944830658961927e-07, "loss": 0.4881, "step": 4241 }, { "epoch": 0.91, "grad_norm": 0.15795911848545074, "learning_rate": 1.9352058334800195e-07, "loss": 0.5299, "step": 4242 }, { "epoch": 0.91, "grad_norm": 0.1372847557067871, "learning_rate": 1.9256044133854846e-07, "loss": 0.5026, "step": 4243 }, { "epoch": 0.91, "grad_norm": 0.1478043794631958, "learning_rate": 1.9160264033537824e-07, "loss": 0.4663, "step": 4244 }, { "epoch": 0.91, "grad_norm": 0.16185085475444794, "learning_rate": 1.9064718080489596e-07, "loss": 0.4501, "step": 4245 }, { "epoch": 0.91, "grad_norm": 0.15890911221504211, "learning_rate": 1.8969406321236727e-07, "loss": 0.5688, "step": 4246 }, { "epoch": 0.91, "grad_norm": 0.1747117042541504, "learning_rate": 1.8874328802191867e-07, "loss": 0.5213, "step": 4247 }, { "epoch": 0.92, "grad_norm": 0.15179674327373505, "learning_rate": 1.8779485569653422e-07, "loss": 0.5252, "step": 4248 }, { "epoch": 0.92, "grad_norm": 0.1634942591190338, "learning_rate": 1.868487666980584e-07, "loss": 0.4914, "step": 4249 }, { "epoch": 0.92, "grad_norm": 0.13174600899219513, "learning_rate": 1.859050214871927e-07, "loss": 0.5337, "step": 4250 }, { "epoch": 0.92, "grad_norm": 0.1955437809228897, "learning_rate": 1.8496362052349893e-07, "loss": 0.4623, "step": 4251 }, { "epoch": 0.92, "grad_norm": 0.14583423733711243, "learning_rate": 1.8402456426539706e-07, "loss": 0.5413, "step": 4252 }, { "epoch": 0.92, "grad_norm": 0.15666338801383972, "learning_rate": 1.830878531701652e-07, "loss": 0.4953, "step": 4253 }, { "epoch": 0.92, "grad_norm": 0.15777826309204102, "learning_rate": 1.8215348769393904e-07, "loss": 0.5767, "step": 4254 }, { "epoch": 0.92, "grad_norm": 0.1892169713973999, "learning_rate": 1.8122146829171294e-07, "loss": 0.5119, "step": 4255 }, { "epoch": 0.92, "grad_norm": 0.13005930185317993, "learning_rate": 1.8029179541733833e-07, "loss": 0.5126, "step": 4256 }, { "epoch": 0.92, "grad_norm": 0.15139774978160858, "learning_rate": 1.7936446952352303e-07, "loss": 0.5505, "step": 4257 }, { "epoch": 0.92, "grad_norm": 0.18456581234931946, "learning_rate": 1.7843949106183368e-07, "loss": 0.4961, "step": 4258 }, { "epoch": 0.92, "grad_norm": 0.1589013636112213, "learning_rate": 1.7751686048269322e-07, "loss": 0.5622, "step": 4259 }, { "epoch": 0.92, "grad_norm": 0.16003479063510895, "learning_rate": 1.7659657823538067e-07, "loss": 0.5282, "step": 4260 }, { "epoch": 0.92, "grad_norm": 0.15125080943107605, "learning_rate": 1.7567864476803254e-07, "loss": 0.4712, "step": 4261 }, { "epoch": 0.92, "grad_norm": 0.14712797105312347, "learning_rate": 1.747630605276407e-07, "loss": 0.5371, "step": 4262 }, { "epoch": 0.92, "grad_norm": 0.16056658327579498, "learning_rate": 1.7384982596005352e-07, "loss": 0.5107, "step": 4263 }, { "epoch": 0.92, "grad_norm": 0.14780429005622864, "learning_rate": 1.7293894150997414e-07, "loss": 0.5311, "step": 4264 }, { "epoch": 0.92, "grad_norm": 0.14676974713802338, "learning_rate": 1.720304076209639e-07, "loss": 0.4981, "step": 4265 }, { "epoch": 0.92, "grad_norm": 0.14568917453289032, "learning_rate": 1.711242247354372e-07, "loss": 0.5237, "step": 4266 }, { "epoch": 0.92, "grad_norm": 0.13407346606254578, "learning_rate": 1.7022039329466333e-07, "loss": 0.5197, "step": 4267 }, { "epoch": 0.92, "grad_norm": 0.14667077362537384, "learning_rate": 1.6931891373876852e-07, "loss": 0.5155, "step": 4268 }, { "epoch": 0.92, "grad_norm": 0.14774075150489807, "learning_rate": 1.6841978650673218e-07, "loss": 0.5261, "step": 4269 }, { "epoch": 0.92, "grad_norm": 0.13004808127880096, "learning_rate": 1.6752301203638854e-07, "loss": 0.4793, "step": 4270 }, { "epoch": 0.92, "grad_norm": 0.1556776911020279, "learning_rate": 1.666285907644266e-07, "loss": 0.5356, "step": 4271 }, { "epoch": 0.92, "grad_norm": 0.1795538365840912, "learning_rate": 1.657365231263891e-07, "loss": 0.5119, "step": 4272 }, { "epoch": 0.92, "grad_norm": 0.15904632210731506, "learning_rate": 1.6484680955667354e-07, "loss": 0.5485, "step": 4273 }, { "epoch": 0.92, "grad_norm": 0.1445087045431137, "learning_rate": 1.6395945048852947e-07, "loss": 0.4932, "step": 4274 }, { "epoch": 0.92, "grad_norm": 0.13816164433956146, "learning_rate": 1.6307444635406011e-07, "loss": 0.5038, "step": 4275 }, { "epoch": 0.92, "grad_norm": 0.15279729664325714, "learning_rate": 1.6219179758422465e-07, "loss": 0.5235, "step": 4276 }, { "epoch": 0.92, "grad_norm": 0.15122798085212708, "learning_rate": 1.6131150460883038e-07, "loss": 0.4975, "step": 4277 }, { "epoch": 0.92, "grad_norm": 0.19103887677192688, "learning_rate": 1.6043356785654273e-07, "loss": 0.5026, "step": 4278 }, { "epoch": 0.92, "grad_norm": 0.1535024344921112, "learning_rate": 1.595579877548764e-07, "loss": 0.5348, "step": 4279 }, { "epoch": 0.92, "grad_norm": 0.17013922333717346, "learning_rate": 1.5868476473019922e-07, "loss": 0.528, "step": 4280 }, { "epoch": 0.92, "grad_norm": 0.13540351390838623, "learning_rate": 1.578138992077316e-07, "loss": 0.5253, "step": 4281 }, { "epoch": 0.92, "grad_norm": 0.14699843525886536, "learning_rate": 1.5694539161154598e-07, "loss": 0.4991, "step": 4282 }, { "epoch": 0.92, "grad_norm": 0.1623685657978058, "learning_rate": 1.560792423645663e-07, "loss": 0.5254, "step": 4283 }, { "epoch": 0.92, "grad_norm": 0.17117798328399658, "learning_rate": 1.5521545188856734e-07, "loss": 0.557, "step": 4284 }, { "epoch": 0.92, "grad_norm": 0.16229890286922455, "learning_rate": 1.5435402060417825e-07, "loss": 0.5552, "step": 4285 }, { "epoch": 0.92, "grad_norm": 0.28365910053253174, "learning_rate": 1.5349494893087514e-07, "loss": 0.5357, "step": 4286 }, { "epoch": 0.92, "grad_norm": 0.1524672657251358, "learning_rate": 1.526382372869878e-07, "loss": 0.5343, "step": 4287 }, { "epoch": 0.92, "grad_norm": 0.18612819910049438, "learning_rate": 1.517838860896964e-07, "loss": 0.4767, "step": 4288 }, { "epoch": 0.92, "grad_norm": 0.15579423308372498, "learning_rate": 1.50931895755031e-07, "loss": 0.5174, "step": 4289 }, { "epoch": 0.92, "grad_norm": 0.1574939489364624, "learning_rate": 1.500822666978735e-07, "loss": 0.4945, "step": 4290 }, { "epoch": 0.92, "grad_norm": 0.13923248648643494, "learning_rate": 1.492349993319536e-07, "loss": 0.5056, "step": 4291 }, { "epoch": 0.92, "grad_norm": 0.1429956555366516, "learning_rate": 1.4839009406985295e-07, "loss": 0.4775, "step": 4292 }, { "epoch": 0.92, "grad_norm": 0.1344211846590042, "learning_rate": 1.4754755132300292e-07, "loss": 0.5308, "step": 4293 }, { "epoch": 0.92, "grad_norm": 0.17861835658550262, "learning_rate": 1.4670737150168257e-07, "loss": 0.4766, "step": 4294 }, { "epoch": 0.93, "grad_norm": 0.1777002215385437, "learning_rate": 1.4586955501502186e-07, "loss": 0.5361, "step": 4295 }, { "epoch": 0.93, "grad_norm": 0.14904451370239258, "learning_rate": 1.4503410227100057e-07, "loss": 0.4903, "step": 4296 }, { "epoch": 0.93, "grad_norm": 0.19658173620700836, "learning_rate": 1.4420101367644602e-07, "loss": 0.5013, "step": 4297 }, { "epoch": 0.93, "grad_norm": 0.12814252078533173, "learning_rate": 1.433702896370348e-07, "loss": 0.5173, "step": 4298 }, { "epoch": 0.93, "grad_norm": 0.1587502658367157, "learning_rate": 1.4254193055729171e-07, "loss": 0.5192, "step": 4299 }, { "epoch": 0.93, "grad_norm": 0.16808383166790009, "learning_rate": 1.417159368405907e-07, "loss": 0.54, "step": 4300 }, { "epoch": 0.93, "grad_norm": 0.14128008484840393, "learning_rate": 1.408923088891534e-07, "loss": 0.5069, "step": 4301 }, { "epoch": 0.93, "grad_norm": 0.1637185662984848, "learning_rate": 1.4007104710404838e-07, "loss": 0.514, "step": 4302 }, { "epoch": 0.93, "grad_norm": 0.1476011574268341, "learning_rate": 1.3925215188519525e-07, "loss": 0.5337, "step": 4303 }, { "epoch": 0.93, "grad_norm": 0.1344112902879715, "learning_rate": 1.384356236313572e-07, "loss": 0.4939, "step": 4304 }, { "epoch": 0.93, "grad_norm": 0.15150727331638336, "learning_rate": 1.3762146274014842e-07, "loss": 0.4818, "step": 4305 }, { "epoch": 0.93, "grad_norm": 0.14989051222801208, "learning_rate": 1.3680966960802623e-07, "loss": 0.4746, "step": 4306 }, { "epoch": 0.93, "grad_norm": 0.14494554698467255, "learning_rate": 1.3600024463029938e-07, "loss": 0.5037, "step": 4307 }, { "epoch": 0.93, "grad_norm": 0.17142927646636963, "learning_rate": 1.3519318820111983e-07, "loss": 0.5133, "step": 4308 }, { "epoch": 0.93, "grad_norm": 0.16990455985069275, "learning_rate": 1.3438850071348874e-07, "loss": 0.5251, "step": 4309 }, { "epoch": 0.93, "grad_norm": 0.1605384796857834, "learning_rate": 1.3358618255925214e-07, "loss": 0.5038, "step": 4310 }, { "epoch": 0.93, "grad_norm": 0.13191020488739014, "learning_rate": 1.3278623412910308e-07, "loss": 0.5257, "step": 4311 }, { "epoch": 0.93, "grad_norm": 0.1355755478143692, "learning_rate": 1.3198865581258046e-07, "loss": 0.5244, "step": 4312 }, { "epoch": 0.93, "grad_norm": 0.1625167280435562, "learning_rate": 1.311934479980681e-07, "loss": 0.4965, "step": 4313 }, { "epoch": 0.93, "grad_norm": 0.18114399909973145, "learning_rate": 1.3040061107279679e-07, "loss": 0.5235, "step": 4314 }, { "epoch": 0.93, "grad_norm": 0.15504209697246552, "learning_rate": 1.2961014542284266e-07, "loss": 0.5038, "step": 4315 }, { "epoch": 0.93, "grad_norm": 0.20267391204833984, "learning_rate": 1.2882205143312676e-07, "loss": 0.4623, "step": 4316 }, { "epoch": 0.93, "grad_norm": 0.1550229787826538, "learning_rate": 1.280363294874154e-07, "loss": 0.4784, "step": 4317 }, { "epoch": 0.93, "grad_norm": 0.1660616248846054, "learning_rate": 1.272529799683192e-07, "loss": 0.4987, "step": 4318 }, { "epoch": 0.93, "grad_norm": 0.15414029359817505, "learning_rate": 1.264720032572947e-07, "loss": 0.56, "step": 4319 }, { "epoch": 0.93, "grad_norm": 0.18424440920352936, "learning_rate": 1.2569339973464155e-07, "loss": 0.4993, "step": 4320 }, { "epoch": 0.93, "grad_norm": 0.1249246671795845, "learning_rate": 1.249171697795054e-07, "loss": 0.486, "step": 4321 }, { "epoch": 0.93, "grad_norm": 0.15937843918800354, "learning_rate": 1.2414331376987555e-07, "loss": 0.5439, "step": 4322 }, { "epoch": 0.93, "grad_norm": 0.16112691164016724, "learning_rate": 1.233718320825833e-07, "loss": 0.4971, "step": 4323 }, { "epoch": 0.93, "grad_norm": 0.13961079716682434, "learning_rate": 1.2260272509330707e-07, "loss": 0.5513, "step": 4324 }, { "epoch": 0.93, "grad_norm": 0.1391015648841858, "learning_rate": 1.218359931765667e-07, "loss": 0.5472, "step": 4325 }, { "epoch": 0.93, "grad_norm": 0.1630607694387436, "learning_rate": 1.2107163670572574e-07, "loss": 0.5002, "step": 4326 }, { "epoch": 0.93, "grad_norm": 0.16287516057491302, "learning_rate": 1.2030965605299204e-07, "loss": 0.4701, "step": 4327 }, { "epoch": 0.93, "grad_norm": 0.12734615802764893, "learning_rate": 1.195500515894149e-07, "loss": 0.5591, "step": 4328 }, { "epoch": 0.93, "grad_norm": 0.16435910761356354, "learning_rate": 1.1879282368488787e-07, "loss": 0.5503, "step": 4329 }, { "epoch": 0.93, "grad_norm": 0.16866935789585114, "learning_rate": 1.1803797270814765e-07, "loss": 0.518, "step": 4330 }, { "epoch": 0.93, "grad_norm": 0.17033065855503082, "learning_rate": 1.1728549902677133e-07, "loss": 0.4658, "step": 4331 }, { "epoch": 0.93, "grad_norm": 0.18168850243091583, "learning_rate": 1.165354030071808e-07, "loss": 0.5154, "step": 4332 }, { "epoch": 0.93, "grad_norm": 0.15495000779628754, "learning_rate": 1.1578768501463722e-07, "loss": 0.5399, "step": 4333 }, { "epoch": 0.93, "grad_norm": 0.14426656067371368, "learning_rate": 1.1504234541324765e-07, "loss": 0.4739, "step": 4334 }, { "epoch": 0.93, "grad_norm": 0.1701272577047348, "learning_rate": 1.1429938456595735e-07, "loss": 0.5633, "step": 4335 }, { "epoch": 0.93, "grad_norm": 0.16904759407043457, "learning_rate": 1.1355880283455523e-07, "loss": 0.528, "step": 4336 }, { "epoch": 0.93, "grad_norm": 0.1610129028558731, "learning_rate": 1.1282060057967226e-07, "loss": 0.5077, "step": 4337 }, { "epoch": 0.93, "grad_norm": 0.1449388712644577, "learning_rate": 1.1208477816077756e-07, "loss": 0.5261, "step": 4338 }, { "epoch": 0.93, "grad_norm": 0.17686261236667633, "learning_rate": 1.1135133593618508e-07, "loss": 0.5136, "step": 4339 }, { "epoch": 0.93, "grad_norm": 0.13631290197372437, "learning_rate": 1.1062027426304744e-07, "loss": 0.5105, "step": 4340 }, { "epoch": 0.94, "grad_norm": 0.15161027014255524, "learning_rate": 1.0989159349735879e-07, "loss": 0.5221, "step": 4341 }, { "epoch": 0.94, "grad_norm": 0.15384641289710999, "learning_rate": 1.091652939939547e-07, "loss": 0.5192, "step": 4342 }, { "epoch": 0.94, "grad_norm": 0.166702538728714, "learning_rate": 1.084413761065084e-07, "loss": 0.5481, "step": 4343 }, { "epoch": 0.94, "grad_norm": 0.15912270545959473, "learning_rate": 1.0771984018753733e-07, "loss": 0.6039, "step": 4344 }, { "epoch": 0.94, "grad_norm": 0.15669448673725128, "learning_rate": 1.0700068658839491e-07, "loss": 0.5047, "step": 4345 }, { "epoch": 0.94, "grad_norm": 0.16294890642166138, "learning_rate": 1.0628391565927765e-07, "loss": 0.5736, "step": 4346 }, { "epoch": 0.94, "grad_norm": 0.18943636119365692, "learning_rate": 1.0556952774922136e-07, "loss": 0.5612, "step": 4347 }, { "epoch": 0.94, "grad_norm": 0.1898173987865448, "learning_rate": 1.0485752320609944e-07, "loss": 0.5456, "step": 4348 }, { "epoch": 0.94, "grad_norm": 0.13543102145195007, "learning_rate": 1.0414790237662676e-07, "loss": 0.5888, "step": 4349 }, { "epoch": 0.94, "grad_norm": 0.1901504397392273, "learning_rate": 1.0344066560635635e-07, "loss": 0.5364, "step": 4350 }, { "epoch": 0.94, "grad_norm": 0.16581448912620544, "learning_rate": 1.0273581323968052e-07, "loss": 0.4779, "step": 4351 }, { "epoch": 0.94, "grad_norm": 0.16107046604156494, "learning_rate": 1.0203334561983025e-07, "loss": 0.5074, "step": 4352 }, { "epoch": 0.94, "grad_norm": 0.15327927470207214, "learning_rate": 1.0133326308887692e-07, "loss": 0.5471, "step": 4353 }, { "epoch": 0.94, "grad_norm": 0.1985284835100174, "learning_rate": 1.0063556598772839e-07, "loss": 0.5462, "step": 4354 }, { "epoch": 0.94, "grad_norm": 0.13533158600330353, "learning_rate": 9.994025465613122e-08, "loss": 0.5763, "step": 4355 }, { "epoch": 0.94, "grad_norm": 0.19730281829833984, "learning_rate": 9.924732943267068e-08, "loss": 0.535, "step": 4356 }, { "epoch": 0.94, "grad_norm": 0.18454429507255554, "learning_rate": 9.855679065477131e-08, "loss": 0.5222, "step": 4357 }, { "epoch": 0.94, "grad_norm": 0.15890662372112274, "learning_rate": 9.7868638658693e-08, "loss": 0.4811, "step": 4358 }, { "epoch": 0.94, "grad_norm": 0.181091770529747, "learning_rate": 9.71828737795355e-08, "loss": 0.5643, "step": 4359 }, { "epoch": 0.94, "grad_norm": 0.13532613217830658, "learning_rate": 9.6499496351235e-08, "loss": 0.5115, "step": 4360 }, { "epoch": 0.94, "grad_norm": 0.15786287188529968, "learning_rate": 9.581850670656644e-08, "loss": 0.5078, "step": 4361 }, { "epoch": 0.94, "grad_norm": 0.1745007038116455, "learning_rate": 9.513990517713955e-08, "loss": 0.5805, "step": 4362 }, { "epoch": 0.94, "grad_norm": 0.15297739207744598, "learning_rate": 9.446369209340334e-08, "loss": 0.4882, "step": 4363 }, { "epoch": 0.94, "grad_norm": 0.1355600208044052, "learning_rate": 9.378986778464327e-08, "loss": 0.4854, "step": 4364 }, { "epoch": 0.94, "grad_norm": 0.1561882495880127, "learning_rate": 9.311843257898134e-08, "loss": 0.491, "step": 4365 }, { "epoch": 0.94, "grad_norm": 0.17752040922641754, "learning_rate": 9.244938680337656e-08, "loss": 0.5178, "step": 4366 }, { "epoch": 0.94, "grad_norm": 0.12778738141059875, "learning_rate": 9.178273078362332e-08, "loss": 0.5, "step": 4367 }, { "epoch": 0.94, "grad_norm": 0.1494607776403427, "learning_rate": 9.111846484435361e-08, "loss": 0.5469, "step": 4368 }, { "epoch": 0.94, "grad_norm": 0.1332845240831375, "learning_rate": 9.045658930903477e-08, "loss": 0.5386, "step": 4369 }, { "epoch": 0.94, "grad_norm": 0.18359340727329254, "learning_rate": 8.979710449997014e-08, "loss": 0.5668, "step": 4370 }, { "epoch": 0.94, "grad_norm": 0.16064810752868652, "learning_rate": 8.914001073829892e-08, "loss": 0.5341, "step": 4371 }, { "epoch": 0.94, "grad_norm": 0.14224553108215332, "learning_rate": 8.848530834399683e-08, "loss": 0.5512, "step": 4372 }, { "epoch": 0.94, "grad_norm": 0.14381971955299377, "learning_rate": 8.783299763587439e-08, "loss": 0.5154, "step": 4373 }, { "epoch": 0.94, "grad_norm": 0.1366354078054428, "learning_rate": 8.718307893157696e-08, "loss": 0.5354, "step": 4374 }, { "epoch": 0.94, "grad_norm": 0.21582616865634918, "learning_rate": 8.653555254758583e-08, "loss": 0.5755, "step": 4375 }, { "epoch": 0.94, "grad_norm": 0.18118129670619965, "learning_rate": 8.589041879921711e-08, "loss": 0.5604, "step": 4376 }, { "epoch": 0.94, "grad_norm": 0.18753331899642944, "learning_rate": 8.524767800062228e-08, "loss": 0.5141, "step": 4377 }, { "epoch": 0.94, "grad_norm": 0.15496698021888733, "learning_rate": 8.460733046478653e-08, "loss": 0.5408, "step": 4378 }, { "epoch": 0.94, "grad_norm": 0.19295796751976013, "learning_rate": 8.396937650353042e-08, "loss": 0.5633, "step": 4379 }, { "epoch": 0.94, "grad_norm": 0.16296663880348206, "learning_rate": 8.333381642750882e-08, "loss": 0.4816, "step": 4380 }, { "epoch": 0.94, "grad_norm": 0.18352928757667542, "learning_rate": 8.270065054621135e-08, "loss": 0.521, "step": 4381 }, { "epoch": 0.94, "grad_norm": 0.1636262685060501, "learning_rate": 8.206987916796027e-08, "loss": 0.492, "step": 4382 }, { "epoch": 0.94, "grad_norm": 0.1417970508337021, "learning_rate": 8.144150259991323e-08, "loss": 0.4883, "step": 4383 }, { "epoch": 0.94, "grad_norm": 0.15278513729572296, "learning_rate": 8.081552114806101e-08, "loss": 0.5371, "step": 4384 }, { "epoch": 0.94, "grad_norm": 0.1443348526954651, "learning_rate": 8.019193511722922e-08, "loss": 0.4936, "step": 4385 }, { "epoch": 0.94, "grad_norm": 0.17426589131355286, "learning_rate": 7.957074481107551e-08, "loss": 0.5743, "step": 4386 }, { "epoch": 0.95, "grad_norm": 0.1521102637052536, "learning_rate": 7.895195053209126e-08, "loss": 0.5066, "step": 4387 }, { "epoch": 0.95, "grad_norm": 0.1313631236553192, "learning_rate": 7.833555258160208e-08, "loss": 0.4878, "step": 4388 }, { "epoch": 0.95, "grad_norm": 0.1430417150259018, "learning_rate": 7.77215512597651e-08, "loss": 0.5264, "step": 4389 }, { "epoch": 0.95, "grad_norm": 0.1771220713853836, "learning_rate": 7.710994686557172e-08, "loss": 0.5333, "step": 4390 }, { "epoch": 0.95, "grad_norm": 0.13800616562366486, "learning_rate": 7.650073969684646e-08, "loss": 0.5203, "step": 4391 }, { "epoch": 0.95, "grad_norm": 0.1415596306324005, "learning_rate": 7.589393005024482e-08, "loss": 0.5199, "step": 4392 }, { "epoch": 0.95, "grad_norm": 0.1424768567085266, "learning_rate": 7.528951822125596e-08, "loss": 0.4892, "step": 4393 }, { "epoch": 0.95, "grad_norm": 0.15463979542255402, "learning_rate": 7.468750450420114e-08, "loss": 0.4966, "step": 4394 }, { "epoch": 0.95, "grad_norm": 0.12930360436439514, "learning_rate": 7.40878891922342e-08, "loss": 0.525, "step": 4395 }, { "epoch": 0.95, "grad_norm": 0.12379728257656097, "learning_rate": 7.349067257733989e-08, "loss": 0.532, "step": 4396 }, { "epoch": 0.95, "grad_norm": 0.15126173198223114, "learning_rate": 7.289585495033668e-08, "loss": 0.5074, "step": 4397 }, { "epoch": 0.95, "grad_norm": 0.1340315043926239, "learning_rate": 7.230343660087402e-08, "loss": 0.5003, "step": 4398 }, { "epoch": 0.95, "grad_norm": 0.14905254542827606, "learning_rate": 7.171341781743224e-08, "loss": 0.5331, "step": 4399 }, { "epoch": 0.95, "grad_norm": 0.13680437207221985, "learning_rate": 7.11257988873243e-08, "loss": 0.5186, "step": 4400 }, { "epoch": 0.95, "grad_norm": 0.1248023733496666, "learning_rate": 7.054058009669407e-08, "loss": 0.4576, "step": 4401 }, { "epoch": 0.95, "grad_norm": 0.14953729510307312, "learning_rate": 6.995776173051583e-08, "loss": 0.4709, "step": 4402 }, { "epoch": 0.95, "grad_norm": 0.139199897646904, "learning_rate": 6.937734407259756e-08, "loss": 0.5412, "step": 4403 }, { "epoch": 0.95, "grad_norm": 0.1763693392276764, "learning_rate": 6.879932740557538e-08, "loss": 0.5147, "step": 4404 }, { "epoch": 0.95, "grad_norm": 0.17772704362869263, "learning_rate": 6.822371201091749e-08, "loss": 0.5658, "step": 4405 }, { "epoch": 0.95, "grad_norm": 0.16532278060913086, "learning_rate": 6.7650498168923e-08, "loss": 0.4743, "step": 4406 }, { "epoch": 0.95, "grad_norm": 0.17449362576007843, "learning_rate": 6.707968615872085e-08, "loss": 0.5396, "step": 4407 }, { "epoch": 0.95, "grad_norm": 0.18282443284988403, "learning_rate": 6.651127625827037e-08, "loss": 0.5423, "step": 4408 }, { "epoch": 0.95, "grad_norm": 0.13201217353343964, "learning_rate": 6.594526874436236e-08, "loss": 0.5364, "step": 4409 }, { "epoch": 0.95, "grad_norm": 0.1461392194032669, "learning_rate": 6.538166389261635e-08, "loss": 0.5235, "step": 4410 }, { "epoch": 0.95, "grad_norm": 0.15727302432060242, "learning_rate": 6.482046197748282e-08, "loss": 0.4949, "step": 4411 }, { "epoch": 0.95, "grad_norm": 0.1405402272939682, "learning_rate": 6.426166327224148e-08, "loss": 0.5097, "step": 4412 }, { "epoch": 0.95, "grad_norm": 0.14864054322242737, "learning_rate": 6.3705268049003e-08, "loss": 0.5318, "step": 4413 }, { "epoch": 0.95, "grad_norm": 0.13717585802078247, "learning_rate": 6.315127657870513e-08, "loss": 0.4753, "step": 4414 }, { "epoch": 0.95, "grad_norm": 0.15826748311519623, "learning_rate": 6.259968913111869e-08, "loss": 0.522, "step": 4415 }, { "epoch": 0.95, "grad_norm": 0.1390410214662552, "learning_rate": 6.205050597483997e-08, "loss": 0.5485, "step": 4416 }, { "epoch": 0.95, "grad_norm": 0.13676656782627106, "learning_rate": 6.150372737729781e-08, "loss": 0.5234, "step": 4417 }, { "epoch": 0.95, "grad_norm": 0.20203281939029694, "learning_rate": 6.095935360474814e-08, "loss": 0.5139, "step": 4418 }, { "epoch": 0.95, "grad_norm": 0.16294489800930023, "learning_rate": 6.041738492227666e-08, "loss": 0.5323, "step": 4419 }, { "epoch": 0.95, "grad_norm": 0.19889448583126068, "learning_rate": 5.98778215937973e-08, "loss": 0.5655, "step": 4420 }, { "epoch": 0.95, "grad_norm": 0.14263413846492767, "learning_rate": 5.9340663882053727e-08, "loss": 0.5585, "step": 4421 }, { "epoch": 0.95, "grad_norm": 0.14396370947360992, "learning_rate": 5.880591204861674e-08, "loss": 0.5063, "step": 4422 }, { "epoch": 0.95, "grad_norm": 0.1644524484872818, "learning_rate": 5.827356635388692e-08, "loss": 0.5243, "step": 4423 }, { "epoch": 0.95, "grad_norm": 0.16655051708221436, "learning_rate": 5.7743627057092463e-08, "loss": 0.5033, "step": 4424 }, { "epoch": 0.95, "grad_norm": 0.18211567401885986, "learning_rate": 5.721609441629028e-08, "loss": 0.5083, "step": 4425 }, { "epoch": 0.95, "grad_norm": 0.14441342651844025, "learning_rate": 5.669096868836377e-08, "loss": 0.4764, "step": 4426 }, { "epoch": 0.95, "grad_norm": 0.14789772033691406, "learning_rate": 5.616825012902616e-08, "loss": 0.4914, "step": 4427 }, { "epoch": 0.95, "grad_norm": 0.1412544995546341, "learning_rate": 5.564793899281884e-08, "loss": 0.4267, "step": 4428 }, { "epoch": 0.95, "grad_norm": 0.15629424154758453, "learning_rate": 5.5130035533108587e-08, "loss": 0.4807, "step": 4429 }, { "epoch": 0.95, "grad_norm": 0.1984405219554901, "learning_rate": 5.461454000209199e-08, "loss": 0.5074, "step": 4430 }, { "epoch": 0.95, "grad_norm": 0.15565001964569092, "learning_rate": 5.410145265079103e-08, "loss": 0.4852, "step": 4431 }, { "epoch": 0.95, "grad_norm": 0.16649481654167175, "learning_rate": 5.3590773729056965e-08, "loss": 0.5007, "step": 4432 }, { "epoch": 0.95, "grad_norm": 0.14113038778305054, "learning_rate": 5.3082503485566425e-08, "loss": 0.4873, "step": 4433 }, { "epoch": 0.96, "grad_norm": 0.14539320766925812, "learning_rate": 5.257664216782532e-08, "loss": 0.4856, "step": 4434 }, { "epoch": 0.96, "grad_norm": 0.1761976182460785, "learning_rate": 5.2073190022164933e-08, "loss": 0.4845, "step": 4435 }, { "epoch": 0.96, "grad_norm": 0.1391577571630478, "learning_rate": 5.157214729374305e-08, "loss": 0.4913, "step": 4436 }, { "epoch": 0.96, "grad_norm": 0.16997891664505005, "learning_rate": 5.107351422654561e-08, "loss": 0.5419, "step": 4437 }, { "epoch": 0.96, "grad_norm": 0.15367954969406128, "learning_rate": 5.057729106338505e-08, "loss": 0.4658, "step": 4438 }, { "epoch": 0.96, "grad_norm": 0.15846063196659088, "learning_rate": 5.008347804589808e-08, "loss": 0.5814, "step": 4439 }, { "epoch": 0.96, "grad_norm": 0.18725064396858215, "learning_rate": 4.959207541455013e-08, "loss": 0.5488, "step": 4440 }, { "epoch": 0.96, "grad_norm": 0.14484313130378723, "learning_rate": 4.910308340863201e-08, "loss": 0.4471, "step": 4441 }, { "epoch": 0.96, "grad_norm": 0.1446012258529663, "learning_rate": 4.8616502266261026e-08, "loss": 0.5428, "step": 4442 }, { "epoch": 0.96, "grad_norm": 0.17468306422233582, "learning_rate": 4.813233222438041e-08, "loss": 0.5287, "step": 4443 }, { "epoch": 0.96, "grad_norm": 0.14374323189258575, "learning_rate": 4.765057351875879e-08, "loss": 0.5374, "step": 4444 }, { "epoch": 0.96, "grad_norm": 0.14365346729755402, "learning_rate": 4.7171226383990745e-08, "loss": 0.5042, "step": 4445 }, { "epoch": 0.96, "grad_norm": 0.16741974651813507, "learning_rate": 4.6694291053496766e-08, "loss": 0.5172, "step": 4446 }, { "epoch": 0.96, "grad_norm": 0.15114641189575195, "learning_rate": 4.621976775952386e-08, "loss": 0.4949, "step": 4447 }, { "epoch": 0.96, "grad_norm": 0.13638369739055634, "learning_rate": 4.5747656733142184e-08, "loss": 0.5654, "step": 4448 }, { "epoch": 0.96, "grad_norm": 0.15733817219734192, "learning_rate": 4.527795820424896e-08, "loss": 0.5382, "step": 4449 }, { "epoch": 0.96, "grad_norm": 0.18564561009407043, "learning_rate": 4.481067240156678e-08, "loss": 0.5269, "step": 4450 }, { "epoch": 0.96, "grad_norm": 0.2260461002588272, "learning_rate": 4.43457995526414e-08, "loss": 0.546, "step": 4451 }, { "epoch": 0.96, "grad_norm": 0.15831370651721954, "learning_rate": 4.3883339883846186e-08, "loss": 0.4982, "step": 4452 }, { "epoch": 0.96, "grad_norm": 0.16516351699829102, "learning_rate": 4.342329362037767e-08, "loss": 0.5072, "step": 4453 }, { "epoch": 0.96, "grad_norm": 0.16760680079460144, "learning_rate": 4.296566098625776e-08, "loss": 0.4515, "step": 4454 }, { "epoch": 0.96, "grad_norm": 0.12296677380800247, "learning_rate": 4.25104422043332e-08, "loss": 0.4929, "step": 4455 }, { "epoch": 0.96, "grad_norm": 0.16518919169902802, "learning_rate": 4.2057637496273896e-08, "loss": 0.524, "step": 4456 }, { "epoch": 0.96, "grad_norm": 0.17474794387817383, "learning_rate": 4.16072470825768e-08, "loss": 0.5345, "step": 4457 }, { "epoch": 0.96, "grad_norm": 0.13814187049865723, "learning_rate": 4.115927118256036e-08, "loss": 0.5051, "step": 4458 }, { "epoch": 0.96, "grad_norm": 0.1405845582485199, "learning_rate": 4.071371001436952e-08, "loss": 0.4459, "step": 4459 }, { "epoch": 0.96, "grad_norm": 0.13282142579555511, "learning_rate": 4.02705637949724e-08, "loss": 0.4892, "step": 4460 }, { "epoch": 0.96, "grad_norm": 0.17903049290180206, "learning_rate": 3.9829832740160834e-08, "loss": 0.5045, "step": 4461 }, { "epoch": 0.96, "grad_norm": 0.1339827924966812, "learning_rate": 3.939151706455146e-08, "loss": 0.5043, "step": 4462 }, { "epoch": 0.96, "grad_norm": 0.1924246847629547, "learning_rate": 3.895561698158357e-08, "loss": 0.4559, "step": 4463 }, { "epoch": 0.96, "grad_norm": 0.1578565388917923, "learning_rate": 3.8522132703521784e-08, "loss": 0.5406, "step": 4464 }, { "epoch": 0.96, "grad_norm": 0.17296722531318665, "learning_rate": 3.809106444145228e-08, "loss": 0.5006, "step": 4465 }, { "epoch": 0.96, "grad_norm": 0.15273821353912354, "learning_rate": 3.7662412405286567e-08, "loss": 0.5106, "step": 4466 }, { "epoch": 0.96, "grad_norm": 0.13537849485874176, "learning_rate": 3.723617680375935e-08, "loss": 0.5051, "step": 4467 }, { "epoch": 0.96, "grad_norm": 0.14120222628116608, "learning_rate": 3.6812357844427385e-08, "loss": 0.5358, "step": 4468 }, { "epoch": 0.96, "grad_norm": 0.1762859970331192, "learning_rate": 3.639095573367168e-08, "loss": 0.5097, "step": 4469 }, { "epoch": 0.96, "grad_norm": 0.13967743515968323, "learning_rate": 3.597197067669533e-08, "loss": 0.5434, "step": 4470 }, { "epoch": 0.96, "grad_norm": 0.18413011729717255, "learning_rate": 3.555540287752568e-08, "loss": 0.5334, "step": 4471 }, { "epoch": 0.96, "grad_norm": 0.2260027378797531, "learning_rate": 3.514125253901324e-08, "loss": 0.5245, "step": 4472 }, { "epoch": 0.96, "grad_norm": 0.14992649853229523, "learning_rate": 3.4729519862829466e-08, "loss": 0.4908, "step": 4473 }, { "epoch": 0.96, "grad_norm": 0.14780190587043762, "learning_rate": 3.432020504947064e-08, "loss": 0.5405, "step": 4474 }, { "epoch": 0.96, "grad_norm": 0.1981695592403412, "learning_rate": 3.3913308298253456e-08, "loss": 0.5227, "step": 4475 }, { "epoch": 0.96, "grad_norm": 0.23844297230243683, "learning_rate": 3.350882980731884e-08, "loss": 0.578, "step": 4476 }, { "epoch": 0.96, "grad_norm": 0.17352676391601562, "learning_rate": 3.310676977362925e-08, "loss": 0.5591, "step": 4477 }, { "epoch": 0.96, "grad_norm": 0.15774790942668915, "learning_rate": 3.27071283929703e-08, "loss": 0.5255, "step": 4478 }, { "epoch": 0.96, "grad_norm": 0.1580473780632019, "learning_rate": 3.230990585994964e-08, "loss": 0.5217, "step": 4479 }, { "epoch": 0.97, "grad_norm": 0.16399511694908142, "learning_rate": 3.191510236799589e-08, "loss": 0.4837, "step": 4480 }, { "epoch": 0.97, "grad_norm": 0.14228610694408417, "learning_rate": 3.152271810936081e-08, "loss": 0.5381, "step": 4481 }, { "epoch": 0.97, "grad_norm": 0.16083598136901855, "learning_rate": 3.113275327511767e-08, "loss": 0.4836, "step": 4482 }, { "epoch": 0.97, "grad_norm": 0.11880119889974594, "learning_rate": 3.074520805516235e-08, "loss": 0.497, "step": 4483 }, { "epoch": 0.97, "grad_norm": 0.1469106525182724, "learning_rate": 3.0360082638211666e-08, "loss": 0.5033, "step": 4484 }, { "epoch": 0.97, "grad_norm": 0.1515989750623703, "learning_rate": 2.997737721180338e-08, "loss": 0.5238, "step": 4485 }, { "epoch": 0.97, "grad_norm": 0.16306115686893463, "learning_rate": 2.959709196229954e-08, "loss": 0.5491, "step": 4486 }, { "epoch": 0.97, "grad_norm": 0.14227931201457977, "learning_rate": 2.921922707488034e-08, "loss": 0.5258, "step": 4487 }, { "epoch": 0.97, "grad_norm": 0.13914653658866882, "learning_rate": 2.8843782733549706e-08, "loss": 0.525, "step": 4488 }, { "epoch": 0.97, "grad_norm": 0.16191960871219635, "learning_rate": 2.847075912113195e-08, "loss": 0.491, "step": 4489 }, { "epoch": 0.97, "grad_norm": 0.2092602699995041, "learning_rate": 2.8100156419272885e-08, "loss": 0.5296, "step": 4490 }, { "epoch": 0.97, "grad_norm": 0.1607397496700287, "learning_rate": 2.7731974808439256e-08, "loss": 0.5445, "step": 4491 }, { "epoch": 0.97, "grad_norm": 0.15962082147598267, "learning_rate": 2.7366214467919318e-08, "loss": 0.5054, "step": 4492 }, { "epoch": 0.97, "grad_norm": 0.1509018987417221, "learning_rate": 2.7002875575820598e-08, "loss": 0.5188, "step": 4493 }, { "epoch": 0.97, "grad_norm": 0.1463000327348709, "learning_rate": 2.664195830907379e-08, "loss": 0.5455, "step": 4494 }, { "epoch": 0.97, "grad_norm": 0.15445829927921295, "learning_rate": 2.628346284342942e-08, "loss": 0.4282, "step": 4495 }, { "epoch": 0.97, "grad_norm": 0.1485372930765152, "learning_rate": 2.5927389353457842e-08, "loss": 0.5097, "step": 4496 }, { "epoch": 0.97, "grad_norm": 0.1508139967918396, "learning_rate": 2.5573738012550918e-08, "loss": 0.5222, "step": 4497 }, { "epoch": 0.97, "grad_norm": 0.1392061859369278, "learning_rate": 2.5222508992922e-08, "loss": 0.4993, "step": 4498 }, { "epoch": 0.97, "grad_norm": 0.18485471606254578, "learning_rate": 2.4873702465602612e-08, "loss": 0.556, "step": 4499 }, { "epoch": 0.97, "grad_norm": 0.14879798889160156, "learning_rate": 2.4527318600446324e-08, "loss": 0.53, "step": 4500 }, { "epoch": 0.97, "grad_norm": 0.1910664439201355, "learning_rate": 2.4183357566125998e-08, "loss": 0.5285, "step": 4501 }, { "epoch": 0.97, "grad_norm": 0.13035574555397034, "learning_rate": 2.3841819530135424e-08, "loss": 0.4912, "step": 4502 }, { "epoch": 0.97, "grad_norm": 0.15490761399269104, "learning_rate": 2.350270465878879e-08, "loss": 0.4842, "step": 4503 }, { "epoch": 0.97, "grad_norm": 0.16533678770065308, "learning_rate": 2.3166013117218998e-08, "loss": 0.5493, "step": 4504 }, { "epoch": 0.97, "grad_norm": 0.15088367462158203, "learning_rate": 2.2831745069379907e-08, "loss": 0.5552, "step": 4505 }, { "epoch": 0.97, "grad_norm": 0.17368783056735992, "learning_rate": 2.249990067804464e-08, "loss": 0.5297, "step": 4506 }, { "epoch": 0.97, "grad_norm": 0.20666424930095673, "learning_rate": 2.2170480104807268e-08, "loss": 0.4992, "step": 4507 }, { "epoch": 0.97, "grad_norm": 0.17731893062591553, "learning_rate": 2.1843483510080032e-08, "loss": 0.4926, "step": 4508 }, { "epoch": 0.97, "grad_norm": 0.15629667043685913, "learning_rate": 2.151891105309556e-08, "loss": 0.4927, "step": 4509 }, { "epoch": 0.97, "grad_norm": 0.13626927137374878, "learning_rate": 2.119676289190631e-08, "loss": 0.4622, "step": 4510 }, { "epoch": 0.97, "grad_norm": 0.14321957528591156, "learning_rate": 2.0877039183384018e-08, "loss": 0.4869, "step": 4511 }, { "epoch": 0.97, "grad_norm": 0.15675011277198792, "learning_rate": 2.0559740083219147e-08, "loss": 0.4736, "step": 4512 }, { "epoch": 0.97, "grad_norm": 0.14412085711956024, "learning_rate": 2.024486574592255e-08, "loss": 0.457, "step": 4513 }, { "epoch": 0.97, "grad_norm": 0.15253007411956787, "learning_rate": 1.9932416324823235e-08, "loss": 0.5338, "step": 4514 }, { "epoch": 0.97, "grad_norm": 0.23936396837234497, "learning_rate": 1.9622391972071164e-08, "loss": 0.5333, "step": 4515 }, { "epoch": 0.97, "grad_norm": 0.16216091811656952, "learning_rate": 1.93147928386328e-08, "loss": 0.445, "step": 4516 }, { "epoch": 0.97, "grad_norm": 0.15972132980823517, "learning_rate": 1.9009619074296102e-08, "loss": 0.5372, "step": 4517 }, { "epoch": 0.97, "grad_norm": 0.13417501747608185, "learning_rate": 1.8706870827666646e-08, "loss": 0.4829, "step": 4518 }, { "epoch": 0.97, "grad_norm": 0.184243842959404, "learning_rate": 1.840654824616872e-08, "loss": 0.5087, "step": 4519 }, { "epoch": 0.97, "grad_norm": 0.14154835045337677, "learning_rate": 1.8108651476046457e-08, "loss": 0.5314, "step": 4520 }, { "epoch": 0.97, "grad_norm": 0.14898306131362915, "learning_rate": 1.781318066236215e-08, "loss": 0.536, "step": 4521 }, { "epoch": 0.97, "grad_norm": 0.14588691294193268, "learning_rate": 1.7520135948996263e-08, "loss": 0.5138, "step": 4522 }, { "epoch": 0.97, "grad_norm": 0.19737721979618073, "learning_rate": 1.722951747864854e-08, "loss": 0.5161, "step": 4523 }, { "epoch": 0.97, "grad_norm": 0.16627101600170135, "learning_rate": 1.6941325392837437e-08, "loss": 0.573, "step": 4524 }, { "epoch": 0.97, "grad_norm": 0.16760136187076569, "learning_rate": 1.6655559831899038e-08, "loss": 0.5447, "step": 4525 }, { "epoch": 0.97, "grad_norm": 0.16920168697834015, "learning_rate": 1.6372220934988693e-08, "loss": 0.5242, "step": 4526 }, { "epoch": 0.98, "grad_norm": 0.1605527251958847, "learning_rate": 1.609130884007881e-08, "loss": 0.5065, "step": 4527 }, { "epoch": 0.98, "grad_norm": 0.16007407009601593, "learning_rate": 1.5812823683962198e-08, "loss": 0.4823, "step": 4528 }, { "epoch": 0.98, "grad_norm": 0.15912877023220062, "learning_rate": 1.5536765602248148e-08, "loss": 0.4905, "step": 4529 }, { "epoch": 0.98, "grad_norm": 0.1717677265405655, "learning_rate": 1.5263134729363582e-08, "loss": 0.5239, "step": 4530 }, { "epoch": 0.98, "grad_norm": 0.16128626465797424, "learning_rate": 1.49919311985558e-08, "loss": 0.5036, "step": 4531 }, { "epoch": 0.98, "grad_norm": 0.15915502607822418, "learning_rate": 1.472315514188749e-08, "loss": 0.4892, "step": 4532 }, { "epoch": 0.98, "grad_norm": 0.18323585391044617, "learning_rate": 1.4456806690241187e-08, "loss": 0.487, "step": 4533 }, { "epoch": 0.98, "grad_norm": 0.16480879485607147, "learning_rate": 1.4192885973315918e-08, "loss": 0.5064, "step": 4534 }, { "epoch": 0.98, "grad_norm": 0.15716604888439178, "learning_rate": 1.3931393119629987e-08, "loss": 0.4886, "step": 4535 }, { "epoch": 0.98, "grad_norm": 0.1497613936662674, "learning_rate": 1.3672328256518208e-08, "loss": 0.5372, "step": 4536 }, { "epoch": 0.98, "grad_norm": 0.14100497961044312, "learning_rate": 1.3415691510133555e-08, "loss": 0.4763, "step": 4537 }, { "epoch": 0.98, "grad_norm": 0.16514605283737183, "learning_rate": 1.3161483005446618e-08, "loss": 0.5506, "step": 4538 }, { "epoch": 0.98, "grad_norm": 0.17945894598960876, "learning_rate": 1.2909702866245045e-08, "loss": 0.5278, "step": 4539 }, { "epoch": 0.98, "grad_norm": 0.13528050482273102, "learning_rate": 1.2660351215135203e-08, "loss": 0.4926, "step": 4540 }, { "epoch": 0.98, "grad_norm": 0.1794215887784958, "learning_rate": 1.241342817353941e-08, "loss": 0.4929, "step": 4541 }, { "epoch": 0.98, "grad_norm": 0.14071007072925568, "learning_rate": 1.2168933861698151e-08, "loss": 0.502, "step": 4542 }, { "epoch": 0.98, "grad_norm": 0.16157187521457672, "learning_rate": 1.1926868398669522e-08, "loss": 0.5321, "step": 4543 }, { "epoch": 0.98, "grad_norm": 0.15778125822544098, "learning_rate": 1.1687231902328122e-08, "loss": 0.4767, "step": 4544 }, { "epoch": 0.98, "grad_norm": 0.16504241526126862, "learning_rate": 1.1450024489366163e-08, "loss": 0.5074, "step": 4545 }, { "epoch": 0.98, "grad_norm": 0.16821053624153137, "learning_rate": 1.1215246275292913e-08, "loss": 0.5654, "step": 4546 }, { "epoch": 0.98, "grad_norm": 0.18481959402561188, "learning_rate": 1.0982897374435252e-08, "loss": 0.5467, "step": 4547 }, { "epoch": 0.98, "grad_norm": 0.18275012075901031, "learning_rate": 1.0752977899936013e-08, "loss": 0.5656, "step": 4548 }, { "epoch": 0.98, "grad_norm": 0.15175531804561615, "learning_rate": 1.0525487963756186e-08, "loss": 0.5455, "step": 4549 }, { "epoch": 0.98, "grad_norm": 0.14397265017032623, "learning_rate": 1.0300427676672164e-08, "loss": 0.5424, "step": 4550 }, { "epoch": 0.98, "grad_norm": 0.14812220633029938, "learning_rate": 1.0077797148279056e-08, "loss": 0.5515, "step": 4551 }, { "epoch": 0.98, "grad_norm": 0.15202391147613525, "learning_rate": 9.85759648698792e-09, "loss": 0.5168, "step": 4552 }, { "epoch": 0.98, "grad_norm": 0.221909299492836, "learning_rate": 9.63982580002576e-09, "loss": 0.5585, "step": 4553 }, { "epoch": 0.98, "grad_norm": 0.1755637228488922, "learning_rate": 9.42448519343775e-09, "loss": 0.4478, "step": 4554 }, { "epoch": 0.98, "grad_norm": 0.2023898959159851, "learning_rate": 9.211574772085009e-09, "loss": 0.5461, "step": 4555 }, { "epoch": 0.98, "grad_norm": 0.26978346705436707, "learning_rate": 9.001094639645158e-09, "loss": 0.5124, "step": 4556 }, { "epoch": 0.98, "grad_norm": 0.42439812421798706, "learning_rate": 8.793044898612324e-09, "loss": 0.4763, "step": 4557 }, { "epoch": 0.98, "grad_norm": 0.1549844741821289, "learning_rate": 8.587425650297688e-09, "loss": 0.5193, "step": 4558 }, { "epoch": 0.98, "grad_norm": 0.16406425833702087, "learning_rate": 8.384236994828376e-09, "loss": 0.5499, "step": 4559 }, { "epoch": 0.98, "grad_norm": 0.15720784664154053, "learning_rate": 8.183479031148022e-09, "loss": 0.5331, "step": 4560 }, { "epoch": 0.98, "grad_norm": 0.13800294697284698, "learning_rate": 7.98515185701676e-09, "loss": 0.493, "step": 4561 }, { "epoch": 0.98, "grad_norm": 0.17194029688835144, "learning_rate": 7.789255569011223e-09, "loss": 0.5432, "step": 4562 }, { "epoch": 0.98, "grad_norm": 0.13786643743515015, "learning_rate": 7.595790262523995e-09, "loss": 0.5465, "step": 4563 }, { "epoch": 0.98, "grad_norm": 0.13633519411087036, "learning_rate": 7.40475603176416e-09, "loss": 0.4532, "step": 4564 }, { "epoch": 0.98, "grad_norm": 0.1479789763689041, "learning_rate": 7.216152969755641e-09, "loss": 0.459, "step": 4565 }, { "epoch": 0.98, "grad_norm": 0.15188181400299072, "learning_rate": 7.029981168341082e-09, "loss": 0.5184, "step": 4566 }, { "epoch": 0.98, "grad_norm": 0.19617387652397156, "learning_rate": 6.846240718176855e-09, "loss": 0.4477, "step": 4567 }, { "epoch": 0.98, "grad_norm": 0.1465783268213272, "learning_rate": 6.664931708736943e-09, "loss": 0.5164, "step": 4568 }, { "epoch": 0.98, "grad_norm": 0.17284545302391052, "learning_rate": 6.486054228309613e-09, "loss": 0.5033, "step": 4569 }, { "epoch": 0.98, "grad_norm": 0.1475616693496704, "learning_rate": 6.309608364001296e-09, "loss": 0.5787, "step": 4570 }, { "epoch": 0.98, "grad_norm": 0.21322426199913025, "learning_rate": 6.1355942017321534e-09, "loss": 0.5463, "step": 4571 }, { "epoch": 0.98, "grad_norm": 0.13938239216804504, "learning_rate": 5.9640118262399575e-09, "loss": 0.4885, "step": 4572 }, { "epoch": 0.99, "grad_norm": 0.14598731696605682, "learning_rate": 5.794861321077872e-09, "loss": 0.5078, "step": 4573 }, { "epoch": 0.99, "grad_norm": 0.13491961359977722, "learning_rate": 5.628142768613343e-09, "loss": 0.5065, "step": 4574 }, { "epoch": 0.99, "grad_norm": 0.1279793381690979, "learning_rate": 5.4638562500319844e-09, "loss": 0.5256, "step": 4575 }, { "epoch": 0.99, "grad_norm": 0.12754946947097778, "learning_rate": 5.302001845333138e-09, "loss": 0.5323, "step": 4576 }, { "epoch": 0.99, "grad_norm": 0.1419224590063095, "learning_rate": 5.1425796333332e-09, "loss": 0.5048, "step": 4577 }, { "epoch": 0.99, "grad_norm": 0.14779268205165863, "learning_rate": 4.9855896916634065e-09, "loss": 0.5018, "step": 4578 }, { "epoch": 0.99, "grad_norm": 0.15771281719207764, "learning_rate": 4.831032096770383e-09, "loss": 0.54, "step": 4579 }, { "epoch": 0.99, "grad_norm": 0.14732089638710022, "learning_rate": 4.678906923916704e-09, "loss": 0.5594, "step": 4580 }, { "epoch": 0.99, "grad_norm": 0.13935159146785736, "learning_rate": 4.529214247181446e-09, "loss": 0.4833, "step": 4581 }, { "epoch": 0.99, "grad_norm": 0.1624104082584381, "learning_rate": 4.381954139457411e-09, "loss": 0.5112, "step": 4582 }, { "epoch": 0.99, "grad_norm": 0.14294303953647614, "learning_rate": 4.237126672453351e-09, "loss": 0.5499, "step": 4583 }, { "epoch": 0.99, "grad_norm": 0.16533686220645905, "learning_rate": 4.094731916693962e-09, "loss": 0.5159, "step": 4584 }, { "epoch": 0.99, "grad_norm": 0.14635160565376282, "learning_rate": 3.9547699415198874e-09, "loss": 0.5057, "step": 4585 }, { "epoch": 0.99, "grad_norm": 0.149323508143425, "learning_rate": 3.817240815084944e-09, "loss": 0.4638, "step": 4586 }, { "epoch": 0.99, "grad_norm": 0.19318562746047974, "learning_rate": 3.68214460436056e-09, "loss": 0.5073, "step": 4587 }, { "epoch": 0.99, "grad_norm": 0.139207124710083, "learning_rate": 3.5494813751324466e-09, "loss": 0.5214, "step": 4588 }, { "epoch": 0.99, "grad_norm": 0.16351597011089325, "learning_rate": 3.4192511920011495e-09, "loss": 0.548, "step": 4589 }, { "epoch": 0.99, "grad_norm": 0.21700578927993774, "learning_rate": 3.291454118383164e-09, "loss": 0.5148, "step": 4590 }, { "epoch": 0.99, "grad_norm": 0.1478326916694641, "learning_rate": 3.1660902165098205e-09, "loss": 0.5089, "step": 4591 }, { "epoch": 0.99, "grad_norm": 0.13109348714351654, "learning_rate": 3.043159547427843e-09, "loss": 0.5693, "step": 4592 }, { "epoch": 0.99, "grad_norm": 0.18486939370632172, "learning_rate": 2.922662170998791e-09, "loss": 0.5101, "step": 4593 }, { "epoch": 0.99, "grad_norm": 0.23270238935947418, "learning_rate": 2.804598145899062e-09, "loss": 0.5173, "step": 4594 }, { "epoch": 0.99, "grad_norm": 0.13426940143108368, "learning_rate": 2.688967529621556e-09, "loss": 0.5174, "step": 4595 }, { "epoch": 0.99, "grad_norm": 0.1617153435945511, "learning_rate": 2.575770378472342e-09, "loss": 0.509, "step": 4596 }, { "epoch": 0.99, "grad_norm": 0.17806501686573029, "learning_rate": 2.4650067475734398e-09, "loss": 0.5115, "step": 4597 }, { "epoch": 0.99, "grad_norm": 0.16140803694725037, "learning_rate": 2.3566766908622586e-09, "loss": 0.5718, "step": 4598 }, { "epoch": 0.99, "grad_norm": 0.1904280185699463, "learning_rate": 2.25078026108938e-09, "loss": 0.4756, "step": 4599 }, { "epoch": 0.99, "grad_norm": 0.154715433716774, "learning_rate": 2.1473175098229993e-09, "loss": 0.5476, "step": 4600 }, { "epoch": 0.99, "grad_norm": 0.13388168811798096, "learning_rate": 2.046288487444481e-09, "loss": 0.5101, "step": 4601 }, { "epoch": 0.99, "grad_norm": 0.15827472507953644, "learning_rate": 1.9476932431500286e-09, "loss": 0.5506, "step": 4602 }, { "epoch": 0.99, "grad_norm": 0.1361783891916275, "learning_rate": 1.8515318249506809e-09, "loss": 0.4876, "step": 4603 }, { "epoch": 0.99, "grad_norm": 0.13444480299949646, "learning_rate": 1.7578042796739803e-09, "loss": 0.5557, "step": 4604 }, { "epoch": 0.99, "grad_norm": 0.14045651257038116, "learning_rate": 1.666510652960085e-09, "loss": 0.533, "step": 4605 }, { "epoch": 0.99, "grad_norm": 0.16582362353801727, "learning_rate": 1.5776509892645453e-09, "loss": 0.4909, "step": 4606 }, { "epoch": 0.99, "grad_norm": 0.18087778985500336, "learning_rate": 1.4912253318594138e-09, "loss": 0.5907, "step": 4607 }, { "epoch": 0.99, "grad_norm": 0.1612243801355362, "learning_rate": 1.4072337228282496e-09, "loss": 0.489, "step": 4608 }, { "epoch": 0.99, "grad_norm": 0.1329774111509323, "learning_rate": 1.3256762030727788e-09, "loss": 0.4982, "step": 4609 }, { "epoch": 0.99, "grad_norm": 0.15983089804649353, "learning_rate": 1.2465528123073445e-09, "loss": 0.554, "step": 4610 }, { "epoch": 0.99, "grad_norm": 0.14717040956020355, "learning_rate": 1.1698635890611264e-09, "loss": 0.5216, "step": 4611 }, { "epoch": 0.99, "grad_norm": 0.1585836112499237, "learning_rate": 1.0956085706781416e-09, "loss": 0.5291, "step": 4612 }, { "epoch": 0.99, "grad_norm": 0.1522228866815567, "learning_rate": 1.0237877933183538e-09, "loss": 0.4878, "step": 4613 }, { "epoch": 0.99, "grad_norm": 0.1510564535856247, "learning_rate": 9.54401291953788e-10, "loss": 0.454, "step": 4614 }, { "epoch": 0.99, "grad_norm": 0.15678934752941132, "learning_rate": 8.874491003735275e-10, "loss": 0.4875, "step": 4615 }, { "epoch": 0.99, "grad_norm": 0.14470191299915314, "learning_rate": 8.229312511803811e-10, "loss": 0.5576, "step": 4616 }, { "epoch": 0.99, "grad_norm": 0.14706255495548248, "learning_rate": 7.60847775790885e-10, "loss": 0.5316, "step": 4617 }, { "epoch": 0.99, "grad_norm": 0.18483339250087738, "learning_rate": 7.011987044369673e-10, "loss": 0.5227, "step": 4618 }, { "epoch": 0.99, "grad_norm": 0.15726877748966217, "learning_rate": 6.43984066165948e-10, "loss": 0.4667, "step": 4619 }, { "epoch": 1.0, "grad_norm": 0.17411313951015472, "learning_rate": 5.892038888377638e-10, "loss": 0.5399, "step": 4620 }, { "epoch": 1.0, "grad_norm": 0.12193353474140167, "learning_rate": 5.368581991282983e-10, "loss": 0.5359, "step": 4621 }, { "epoch": 1.0, "grad_norm": 0.1844024807214737, "learning_rate": 4.869470225277174e-10, "loss": 0.5551, "step": 4622 }, { "epoch": 1.0, "grad_norm": 0.144753098487854, "learning_rate": 4.3947038334046785e-10, "loss": 0.5273, "step": 4623 }, { "epoch": 1.0, "grad_norm": 0.18550604581832886, "learning_rate": 3.9442830468472414e-10, "loss": 0.5496, "step": 4624 }, { "epoch": 1.0, "grad_norm": 0.1372082531452179, "learning_rate": 3.5182080849516245e-10, "loss": 0.4912, "step": 4625 }, { "epoch": 1.0, "grad_norm": 0.16977030038833618, "learning_rate": 3.1164791551907545e-10, "loss": 0.4574, "step": 4626 }, { "epoch": 1.0, "grad_norm": 0.15275530517101288, "learning_rate": 2.739096453191481e-10, "loss": 0.5382, "step": 4627 }, { "epoch": 1.0, "grad_norm": 0.1399284303188324, "learning_rate": 2.386060162717918e-10, "loss": 0.5144, "step": 4628 }, { "epoch": 1.0, "grad_norm": 0.18410004675388336, "learning_rate": 2.05737045568255e-10, "loss": 0.5252, "step": 4629 }, { "epoch": 1.0, "grad_norm": 0.16072387993335724, "learning_rate": 1.7530274921462308e-10, "loss": 0.5003, "step": 4630 }, { "epoch": 1.0, "grad_norm": 0.18865381181240082, "learning_rate": 1.4730314203126318e-10, "loss": 0.5048, "step": 4631 }, { "epoch": 1.0, "grad_norm": 0.1395450234413147, "learning_rate": 1.21738237651714e-10, "loss": 0.4849, "step": 4632 }, { "epoch": 1.0, "grad_norm": 0.13772456347942352, "learning_rate": 9.860804852601658e-11, "loss": 0.5158, "step": 4633 }, { "epoch": 1.0, "grad_norm": 0.13184866309165955, "learning_rate": 7.791258591682837e-11, "loss": 0.526, "step": 4634 }, { "epoch": 1.0, "grad_norm": 0.15045422315597534, "learning_rate": 5.96518599021989e-11, "loss": 0.5058, "step": 4635 }, { "epoch": 1.0, "grad_norm": 0.15769906342029572, "learning_rate": 4.382587937445948e-11, "loss": 0.5534, "step": 4636 }, { "epoch": 1.0, "grad_norm": 0.172768235206604, "learning_rate": 3.0434652039668114e-11, "loss": 0.5297, "step": 4637 }, { "epoch": 1.0, "grad_norm": 0.13954681158065796, "learning_rate": 1.947818441927485e-11, "loss": 0.5442, "step": 4638 }, { "epoch": 1.0, "grad_norm": 0.12631604075431824, "learning_rate": 1.0956481847901323e-11, "loss": 0.5348, "step": 4639 }, { "epoch": 1.0, "grad_norm": 0.14625275135040283, "learning_rate": 4.8695484761163145e-12, "loss": 0.5268, "step": 4640 }, { "epoch": 1.0, "grad_norm": 0.16151678562164307, "learning_rate": 1.2173872671050746e-12, "loss": 0.5801, "step": 4641 }, { "epoch": 1.0, "grad_norm": 0.16961205005645752, "learning_rate": 0.0, "loss": 0.5504, "step": 4642 }, { "epoch": 1.0, "step": 4642, "total_flos": 1.0981284140833309e+19, "train_loss": 0.5198679990626676, "train_runtime": 67897.289, "train_samples_per_second": 17.504, "train_steps_per_second": 0.068 } ], "logging_steps": 1.0, "max_steps": 4642, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8000, "total_flos": 1.0981284140833309e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }