{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 15784, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 9.532355308532715, "learning_rate": 2.109704641350211e-08, "loss": 1.3252, "step": 1 }, { "epoch": 0.0, "grad_norm": 9.528098106384277, "learning_rate": 4.219409282700422e-08, "loss": 1.3375, "step": 2 }, { "epoch": 0.0, "grad_norm": 9.426416397094727, "learning_rate": 6.329113924050633e-08, "loss": 1.3024, "step": 3 }, { "epoch": 0.0, "grad_norm": 8.792394638061523, "learning_rate": 8.438818565400844e-08, "loss": 1.2546, "step": 4 }, { "epoch": 0.0, "grad_norm": 9.00940227508545, "learning_rate": 1.0548523206751055e-07, "loss": 1.2509, "step": 5 }, { "epoch": 0.0, "grad_norm": 9.64754867553711, "learning_rate": 1.2658227848101266e-07, "loss": 1.3443, "step": 6 }, { "epoch": 0.0, "grad_norm": 8.994702339172363, "learning_rate": 1.4767932489451477e-07, "loss": 1.3474, "step": 7 }, { "epoch": 0.0, "grad_norm": 9.668071746826172, "learning_rate": 1.6877637130801689e-07, "loss": 1.2868, "step": 8 }, { "epoch": 0.0, "grad_norm": 10.72716999053955, "learning_rate": 1.89873417721519e-07, "loss": 1.3352, "step": 9 }, { "epoch": 0.0, "grad_norm": 9.297769546508789, "learning_rate": 2.109704641350211e-07, "loss": 1.3187, "step": 10 }, { "epoch": 0.0, "grad_norm": 10.302685737609863, "learning_rate": 2.3206751054852324e-07, "loss": 1.2814, "step": 11 }, { "epoch": 0.0, "grad_norm": 9.82099437713623, "learning_rate": 2.5316455696202533e-07, "loss": 1.3105, "step": 12 }, { "epoch": 0.0, "grad_norm": 9.666701316833496, "learning_rate": 2.7426160337552746e-07, "loss": 1.3054, "step": 13 }, { "epoch": 0.0, "grad_norm": 8.828336715698242, "learning_rate": 2.9535864978902955e-07, "loss": 1.2671, "step": 14 }, { "epoch": 0.0, "grad_norm": 9.0466890335083, "learning_rate": 3.164556962025317e-07, "loss": 1.3082, "step": 15 }, { "epoch": 0.0, "grad_norm": 10.455029487609863, "learning_rate": 3.3755274261603377e-07, "loss": 1.3541, "step": 16 }, { "epoch": 0.0, "grad_norm": 9.330451965332031, "learning_rate": 3.586497890295359e-07, "loss": 1.3523, "step": 17 }, { "epoch": 0.0, "grad_norm": 9.279951095581055, "learning_rate": 3.79746835443038e-07, "loss": 1.3471, "step": 18 }, { "epoch": 0.0, "grad_norm": 9.216424942016602, "learning_rate": 4.0084388185654013e-07, "loss": 1.2502, "step": 19 }, { "epoch": 0.0, "grad_norm": 9.830771446228027, "learning_rate": 4.219409282700422e-07, "loss": 1.3262, "step": 20 }, { "epoch": 0.0, "grad_norm": 8.596817970275879, "learning_rate": 4.4303797468354435e-07, "loss": 1.2861, "step": 21 }, { "epoch": 0.0, "grad_norm": 7.879246711730957, "learning_rate": 4.641350210970465e-07, "loss": 1.3028, "step": 22 }, { "epoch": 0.0, "grad_norm": 7.26005744934082, "learning_rate": 4.852320675105486e-07, "loss": 1.1705, "step": 23 }, { "epoch": 0.0, "grad_norm": 8.682889938354492, "learning_rate": 5.063291139240507e-07, "loss": 1.2799, "step": 24 }, { "epoch": 0.0, "grad_norm": 7.49697732925415, "learning_rate": 5.274261603375528e-07, "loss": 1.2697, "step": 25 }, { "epoch": 0.0, "grad_norm": 8.21834945678711, "learning_rate": 5.485232067510549e-07, "loss": 1.3346, "step": 26 }, { "epoch": 0.0, "grad_norm": 7.122488021850586, "learning_rate": 5.69620253164557e-07, "loss": 1.2485, "step": 27 }, { "epoch": 0.0, "grad_norm": 7.122411727905273, "learning_rate": 5.907172995780591e-07, "loss": 1.2263, "step": 28 }, { "epoch": 0.0, "grad_norm": 7.260400295257568, "learning_rate": 6.118143459915613e-07, "loss": 1.2086, "step": 29 }, { "epoch": 0.0, "grad_norm": 6.424813270568848, "learning_rate": 6.329113924050634e-07, "loss": 1.216, "step": 30 }, { "epoch": 0.0, "grad_norm": 6.159492492675781, "learning_rate": 6.540084388185656e-07, "loss": 1.1744, "step": 31 }, { "epoch": 0.0, "grad_norm": 6.250075817108154, "learning_rate": 6.751054852320675e-07, "loss": 1.2486, "step": 32 }, { "epoch": 0.0, "grad_norm": 6.240837097167969, "learning_rate": 6.962025316455696e-07, "loss": 1.1966, "step": 33 }, { "epoch": 0.0, "grad_norm": 6.257637023925781, "learning_rate": 7.172995780590718e-07, "loss": 1.2206, "step": 34 }, { "epoch": 0.0, "grad_norm": 6.090736389160156, "learning_rate": 7.383966244725739e-07, "loss": 1.1555, "step": 35 }, { "epoch": 0.0, "grad_norm": 6.092897891998291, "learning_rate": 7.59493670886076e-07, "loss": 1.2119, "step": 36 }, { "epoch": 0.0, "grad_norm": 5.601813793182373, "learning_rate": 7.805907172995782e-07, "loss": 1.2041, "step": 37 }, { "epoch": 0.0, "grad_norm": 5.4696221351623535, "learning_rate": 8.016877637130803e-07, "loss": 1.0983, "step": 38 }, { "epoch": 0.0, "grad_norm": 5.418956756591797, "learning_rate": 8.227848101265823e-07, "loss": 1.1278, "step": 39 }, { "epoch": 0.0, "grad_norm": 4.913137912750244, "learning_rate": 8.438818565400844e-07, "loss": 1.0996, "step": 40 }, { "epoch": 0.0, "grad_norm": 3.8683886528015137, "learning_rate": 8.649789029535865e-07, "loss": 1.1214, "step": 41 }, { "epoch": 0.0, "grad_norm": 3.4708452224731445, "learning_rate": 8.860759493670887e-07, "loss": 1.0712, "step": 42 }, { "epoch": 0.0, "grad_norm": 3.7991111278533936, "learning_rate": 9.071729957805908e-07, "loss": 1.0548, "step": 43 }, { "epoch": 0.0, "grad_norm": 3.196794033050537, "learning_rate": 9.28270042194093e-07, "loss": 1.0285, "step": 44 }, { "epoch": 0.0, "grad_norm": 3.9760518074035645, "learning_rate": 9.493670886075951e-07, "loss": 1.0126, "step": 45 }, { "epoch": 0.0, "grad_norm": 3.435934066772461, "learning_rate": 9.704641350210971e-07, "loss": 1.0275, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.9900312423706055, "learning_rate": 9.915611814345991e-07, "loss": 0.9641, "step": 47 }, { "epoch": 0.0, "grad_norm": 3.0222251415252686, "learning_rate": 1.0126582278481013e-06, "loss": 0.9919, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.916668653488159, "learning_rate": 1.0337552742616035e-06, "loss": 1.0351, "step": 49 }, { "epoch": 0.0, "grad_norm": 3.182502508163452, "learning_rate": 1.0548523206751057e-06, "loss": 1.0153, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.9524974822998047, "learning_rate": 1.0759493670886077e-06, "loss": 0.9676, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.7479350566864014, "learning_rate": 1.0970464135021099e-06, "loss": 1.0005, "step": 52 }, { "epoch": 0.0, "grad_norm": 1.9719606637954712, "learning_rate": 1.1181434599156118e-06, "loss": 1.0069, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.0742597579956055, "learning_rate": 1.139240506329114e-06, "loss": 0.9551, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.005128860473633, "learning_rate": 1.1603375527426162e-06, "loss": 0.9297, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.920249342918396, "learning_rate": 1.1814345991561182e-06, "loss": 0.878, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.9409500360488892, "learning_rate": 1.2025316455696204e-06, "loss": 0.9166, "step": 57 }, { "epoch": 0.0, "grad_norm": 1.9121850728988647, "learning_rate": 1.2236286919831226e-06, "loss": 0.9379, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.7869422435760498, "learning_rate": 1.2447257383966246e-06, "loss": 0.9852, "step": 59 }, { "epoch": 0.0, "grad_norm": 1.9101088047027588, "learning_rate": 1.2658227848101267e-06, "loss": 0.918, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.8155966997146606, "learning_rate": 1.286919831223629e-06, "loss": 0.8639, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.7884961366653442, "learning_rate": 1.3080168776371311e-06, "loss": 0.8911, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.5779091119766235, "learning_rate": 1.3291139240506329e-06, "loss": 0.8864, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.529002070426941, "learning_rate": 1.350210970464135e-06, "loss": 0.8961, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.548957109451294, "learning_rate": 1.371308016877637e-06, "loss": 0.8673, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.5970193147659302, "learning_rate": 1.3924050632911392e-06, "loss": 0.8815, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.5011041164398193, "learning_rate": 1.4135021097046414e-06, "loss": 0.889, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.4582399129867554, "learning_rate": 1.4345991561181436e-06, "loss": 0.878, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.4272208213806152, "learning_rate": 1.4556962025316456e-06, "loss": 0.8489, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.435996651649475, "learning_rate": 1.4767932489451478e-06, "loss": 0.9408, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.4072030782699585, "learning_rate": 1.49789029535865e-06, "loss": 0.8805, "step": 71 }, { "epoch": 0.0, "grad_norm": 1.4391857385635376, "learning_rate": 1.518987341772152e-06, "loss": 0.881, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.229433298110962, "learning_rate": 1.5400843881856542e-06, "loss": 0.8213, "step": 73 }, { "epoch": 0.0, "grad_norm": 1.2908027172088623, "learning_rate": 1.5611814345991563e-06, "loss": 0.8103, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.3904271125793457, "learning_rate": 1.5822784810126585e-06, "loss": 0.8272, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.211230754852295, "learning_rate": 1.6033755274261605e-06, "loss": 0.8327, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.340195655822754, "learning_rate": 1.6244725738396625e-06, "loss": 0.8251, "step": 77 }, { "epoch": 0.0, "grad_norm": 1.2882392406463623, "learning_rate": 1.6455696202531647e-06, "loss": 0.8505, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.3191945552825928, "learning_rate": 1.6666666666666667e-06, "loss": 0.92, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.2318845987319946, "learning_rate": 1.6877637130801689e-06, "loss": 0.854, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.2841240167617798, "learning_rate": 1.708860759493671e-06, "loss": 0.8995, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.2077780961990356, "learning_rate": 1.729957805907173e-06, "loss": 0.8117, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.1782020330429077, "learning_rate": 1.7510548523206752e-06, "loss": 0.8833, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.2451756000518799, "learning_rate": 1.7721518987341774e-06, "loss": 0.7938, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.2025009393692017, "learning_rate": 1.7932489451476796e-06, "loss": 0.8146, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.2381807565689087, "learning_rate": 1.8143459915611816e-06, "loss": 0.8662, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.2321275472640991, "learning_rate": 1.8354430379746838e-06, "loss": 0.8094, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.1794078350067139, "learning_rate": 1.856540084388186e-06, "loss": 0.8173, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.293404459953308, "learning_rate": 1.877637130801688e-06, "loss": 0.8645, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.269100308418274, "learning_rate": 1.8987341772151901e-06, "loss": 0.8404, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.262161374092102, "learning_rate": 1.919831223628692e-06, "loss": 0.8175, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.1058001518249512, "learning_rate": 1.9409282700421943e-06, "loss": 0.8211, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.223036289215088, "learning_rate": 1.9620253164556965e-06, "loss": 0.834, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.1103086471557617, "learning_rate": 1.9831223628691982e-06, "loss": 0.764, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.1673771142959595, "learning_rate": 2.0042194092827004e-06, "loss": 0.8599, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.1508725881576538, "learning_rate": 2.0253164556962026e-06, "loss": 0.8131, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.1243406534194946, "learning_rate": 2.046413502109705e-06, "loss": 0.695, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.088311791419983, "learning_rate": 2.067510548523207e-06, "loss": 0.7975, "step": 98 }, { "epoch": 0.01, "grad_norm": 1.161037564277649, "learning_rate": 2.088607594936709e-06, "loss": 0.76, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.1575514078140259, "learning_rate": 2.1097046413502114e-06, "loss": 0.8673, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.1434293985366821, "learning_rate": 2.130801687763713e-06, "loss": 0.7514, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.1016374826431274, "learning_rate": 2.1518987341772153e-06, "loss": 0.812, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.1565126180648804, "learning_rate": 2.1729957805907175e-06, "loss": 0.8014, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.1164343357086182, "learning_rate": 2.1940928270042197e-06, "loss": 0.7952, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.1574078798294067, "learning_rate": 2.2151898734177215e-06, "loss": 0.8017, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.1797399520874023, "learning_rate": 2.2362869198312237e-06, "loss": 0.7733, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.1039958000183105, "learning_rate": 2.257383966244726e-06, "loss": 0.77, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.1885851621627808, "learning_rate": 2.278481012658228e-06, "loss": 0.8172, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.1390385627746582, "learning_rate": 2.2995780590717302e-06, "loss": 0.7927, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.1092506647109985, "learning_rate": 2.3206751054852324e-06, "loss": 0.7741, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.2666289806365967, "learning_rate": 2.341772151898734e-06, "loss": 0.7964, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.0005278587341309, "learning_rate": 2.3628691983122364e-06, "loss": 0.7241, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.1468347311019897, "learning_rate": 2.3839662447257386e-06, "loss": 0.7699, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.0486509799957275, "learning_rate": 2.4050632911392408e-06, "loss": 0.7698, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.0754250288009644, "learning_rate": 2.426160337552743e-06, "loss": 0.7577, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.0635970830917358, "learning_rate": 2.447257383966245e-06, "loss": 0.7201, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.1205726861953735, "learning_rate": 2.4683544303797473e-06, "loss": 0.8335, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.070901870727539, "learning_rate": 2.489451476793249e-06, "loss": 0.7949, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.0840576887130737, "learning_rate": 2.5105485232067513e-06, "loss": 0.7587, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.1758044958114624, "learning_rate": 2.5316455696202535e-06, "loss": 0.8069, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.0169506072998047, "learning_rate": 2.5527426160337553e-06, "loss": 0.7996, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.0591812133789062, "learning_rate": 2.573839662447258e-06, "loss": 0.7231, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.1686660051345825, "learning_rate": 2.5949367088607596e-06, "loss": 0.7476, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.0610604286193848, "learning_rate": 2.6160337552742622e-06, "loss": 0.7395, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.0829620361328125, "learning_rate": 2.637130801687764e-06, "loss": 0.8376, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.088221788406372, "learning_rate": 2.6582278481012658e-06, "loss": 0.7102, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.0730963945388794, "learning_rate": 2.679324894514768e-06, "loss": 0.7688, "step": 127 }, { "epoch": 0.01, "grad_norm": 1.0567976236343384, "learning_rate": 2.70042194092827e-06, "loss": 0.7138, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.079959511756897, "learning_rate": 2.7215189873417724e-06, "loss": 0.7664, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.0153189897537231, "learning_rate": 2.742616033755274e-06, "loss": 0.7066, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.1893813610076904, "learning_rate": 2.7637130801687767e-06, "loss": 0.6818, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.1453039646148682, "learning_rate": 2.7848101265822785e-06, "loss": 0.8184, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.1458662748336792, "learning_rate": 2.805907172995781e-06, "loss": 0.7722, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.098507046699524, "learning_rate": 2.827004219409283e-06, "loss": 0.7963, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.0800371170043945, "learning_rate": 2.848101265822785e-06, "loss": 0.7168, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.0287774801254272, "learning_rate": 2.8691983122362873e-06, "loss": 0.7627, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.0763013362884521, "learning_rate": 2.8902953586497895e-06, "loss": 0.8135, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.2011860609054565, "learning_rate": 2.9113924050632912e-06, "loss": 0.7457, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.1453096866607666, "learning_rate": 2.932489451476794e-06, "loss": 0.7488, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.077102541923523, "learning_rate": 2.9535864978902956e-06, "loss": 0.7586, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.0777740478515625, "learning_rate": 2.9746835443037974e-06, "loss": 0.7453, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.0961055755615234, "learning_rate": 2.9957805907173e-06, "loss": 0.7509, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.0943870544433594, "learning_rate": 3.0168776371308017e-06, "loss": 0.7719, "step": 143 }, { "epoch": 0.01, "grad_norm": 0.9995068907737732, "learning_rate": 3.037974683544304e-06, "loss": 0.7562, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.0800971984863281, "learning_rate": 3.059071729957806e-06, "loss": 0.8042, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.1074168682098389, "learning_rate": 3.0801687763713083e-06, "loss": 0.7368, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.1656383275985718, "learning_rate": 3.10126582278481e-06, "loss": 0.7654, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.1436306238174438, "learning_rate": 3.1223628691983127e-06, "loss": 0.7259, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.070346474647522, "learning_rate": 3.1434599156118145e-06, "loss": 0.7054, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.1207927465438843, "learning_rate": 3.164556962025317e-06, "loss": 0.7837, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.1490936279296875, "learning_rate": 3.185654008438819e-06, "loss": 0.7669, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.1072289943695068, "learning_rate": 3.206751054852321e-06, "loss": 0.7523, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.0876787900924683, "learning_rate": 3.2278481012658232e-06, "loss": 0.7732, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.1508756875991821, "learning_rate": 3.248945147679325e-06, "loss": 0.7658, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.2632142305374146, "learning_rate": 3.270042194092827e-06, "loss": 0.7805, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.0064738988876343, "learning_rate": 3.2911392405063294e-06, "loss": 0.7025, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.0723457336425781, "learning_rate": 3.3122362869198316e-06, "loss": 0.8056, "step": 157 }, { "epoch": 0.01, "grad_norm": 1.0933613777160645, "learning_rate": 3.3333333333333333e-06, "loss": 0.7526, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.164417028427124, "learning_rate": 3.354430379746836e-06, "loss": 0.7202, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.1357107162475586, "learning_rate": 3.3755274261603377e-06, "loss": 0.6892, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.1236298084259033, "learning_rate": 3.39662447257384e-06, "loss": 0.8033, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.0229138135910034, "learning_rate": 3.417721518987342e-06, "loss": 0.7655, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.1775723695755005, "learning_rate": 3.4388185654008443e-06, "loss": 0.7653, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.0776041746139526, "learning_rate": 3.459915611814346e-06, "loss": 0.7152, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.0283170938491821, "learning_rate": 3.4810126582278487e-06, "loss": 0.7518, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.0690875053405762, "learning_rate": 3.5021097046413504e-06, "loss": 0.8014, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.068129301071167, "learning_rate": 3.523206751054853e-06, "loss": 0.7015, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.0456384420394897, "learning_rate": 3.544303797468355e-06, "loss": 0.7503, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.148189663887024, "learning_rate": 3.5654008438818566e-06, "loss": 0.7158, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.050423264503479, "learning_rate": 3.586497890295359e-06, "loss": 0.7687, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.0767706632614136, "learning_rate": 3.607594936708861e-06, "loss": 0.7401, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.0276440382003784, "learning_rate": 3.628691983122363e-06, "loss": 0.798, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.1560982465744019, "learning_rate": 3.649789029535865e-06, "loss": 0.7576, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.0696228742599487, "learning_rate": 3.6708860759493675e-06, "loss": 0.7687, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.0968499183654785, "learning_rate": 3.6919831223628693e-06, "loss": 0.7254, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.0895642042160034, "learning_rate": 3.713080168776372e-06, "loss": 0.7755, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.0401157140731812, "learning_rate": 3.7341772151898737e-06, "loss": 0.729, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.1051784753799438, "learning_rate": 3.755274261603376e-06, "loss": 0.7117, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.12349271774292, "learning_rate": 3.776371308016878e-06, "loss": 0.7652, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.0855416059494019, "learning_rate": 3.7974683544303802e-06, "loss": 0.6883, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.1745630502700806, "learning_rate": 3.818565400843882e-06, "loss": 0.7449, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.0661110877990723, "learning_rate": 3.839662447257384e-06, "loss": 0.7232, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.0649524927139282, "learning_rate": 3.860759493670886e-06, "loss": 0.7632, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.128460168838501, "learning_rate": 3.8818565400843886e-06, "loss": 0.7516, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.1216446161270142, "learning_rate": 3.902953586497891e-06, "loss": 0.673, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.1039458513259888, "learning_rate": 3.924050632911393e-06, "loss": 0.7259, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.0785599946975708, "learning_rate": 3.945147679324895e-06, "loss": 0.7689, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.1407884359359741, "learning_rate": 3.9662447257383965e-06, "loss": 0.7762, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.2207587957382202, "learning_rate": 3.9873417721518995e-06, "loss": 0.7593, "step": 189 }, { "epoch": 0.01, "grad_norm": 1.126558542251587, "learning_rate": 4.008438818565401e-06, "loss": 0.7983, "step": 190 }, { "epoch": 0.01, "grad_norm": 1.0701903104782104, "learning_rate": 4.029535864978903e-06, "loss": 0.7166, "step": 191 }, { "epoch": 0.01, "grad_norm": 1.0827971696853638, "learning_rate": 4.050632911392405e-06, "loss": 0.7339, "step": 192 }, { "epoch": 0.01, "grad_norm": 0.9973072409629822, "learning_rate": 4.0717299578059074e-06, "loss": 0.6799, "step": 193 }, { "epoch": 0.01, "grad_norm": 1.05364990234375, "learning_rate": 4.09282700421941e-06, "loss": 0.7106, "step": 194 }, { "epoch": 0.01, "grad_norm": 1.0435612201690674, "learning_rate": 4.113924050632912e-06, "loss": 0.8139, "step": 195 }, { "epoch": 0.01, "grad_norm": 1.0995101928710938, "learning_rate": 4.135021097046414e-06, "loss": 0.7309, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.0613794326782227, "learning_rate": 4.156118143459915e-06, "loss": 0.7698, "step": 197 }, { "epoch": 0.01, "grad_norm": 1.1413551568984985, "learning_rate": 4.177215189873418e-06, "loss": 0.7912, "step": 198 }, { "epoch": 0.01, "grad_norm": 1.0940371751785278, "learning_rate": 4.19831223628692e-06, "loss": 0.7316, "step": 199 }, { "epoch": 0.01, "grad_norm": 1.1012215614318848, "learning_rate": 4.219409282700423e-06, "loss": 0.7178, "step": 200 }, { "epoch": 0.01, "grad_norm": 1.1062922477722168, "learning_rate": 4.240506329113924e-06, "loss": 0.7409, "step": 201 }, { "epoch": 0.01, "grad_norm": 1.1517467498779297, "learning_rate": 4.261603375527426e-06, "loss": 0.8392, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.9980434775352478, "learning_rate": 4.2827004219409285e-06, "loss": 0.7081, "step": 203 }, { "epoch": 0.01, "grad_norm": 1.190421223640442, "learning_rate": 4.303797468354431e-06, "loss": 0.7573, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.1169999837875366, "learning_rate": 4.324894514767933e-06, "loss": 0.7444, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.1159812211990356, "learning_rate": 4.345991561181435e-06, "loss": 0.7336, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.016315221786499, "learning_rate": 4.367088607594937e-06, "loss": 0.7329, "step": 207 }, { "epoch": 0.01, "grad_norm": 1.1287728548049927, "learning_rate": 4.3881856540084394e-06, "loss": 0.7638, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.0038844347000122, "learning_rate": 4.409282700421942e-06, "loss": 0.6823, "step": 209 }, { "epoch": 0.01, "grad_norm": 1.0368938446044922, "learning_rate": 4.430379746835443e-06, "loss": 0.7563, "step": 210 }, { "epoch": 0.01, "grad_norm": 1.215239405632019, "learning_rate": 4.451476793248945e-06, "loss": 0.7747, "step": 211 }, { "epoch": 0.01, "grad_norm": 0.9736345410346985, "learning_rate": 4.472573839662447e-06, "loss": 0.6811, "step": 212 }, { "epoch": 0.01, "grad_norm": 1.133034110069275, "learning_rate": 4.4936708860759495e-06, "loss": 0.8181, "step": 213 }, { "epoch": 0.01, "grad_norm": 1.1740864515304565, "learning_rate": 4.514767932489452e-06, "loss": 0.7113, "step": 214 }, { "epoch": 0.01, "grad_norm": 1.0824915170669556, "learning_rate": 4.535864978902954e-06, "loss": 0.7522, "step": 215 }, { "epoch": 0.01, "grad_norm": 1.0215483903884888, "learning_rate": 4.556962025316456e-06, "loss": 0.7099, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.1021614074707031, "learning_rate": 4.578059071729958e-06, "loss": 0.6993, "step": 217 }, { "epoch": 0.01, "grad_norm": 1.0713529586791992, "learning_rate": 4.5991561181434605e-06, "loss": 0.7765, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.0062373876571655, "learning_rate": 4.620253164556963e-06, "loss": 0.7104, "step": 219 }, { "epoch": 0.01, "grad_norm": 1.0480597019195557, "learning_rate": 4.641350210970465e-06, "loss": 0.6808, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.0954842567443848, "learning_rate": 4.662447257383967e-06, "loss": 0.7508, "step": 221 }, { "epoch": 0.01, "grad_norm": 1.1099358797073364, "learning_rate": 4.683544303797468e-06, "loss": 0.7075, "step": 222 }, { "epoch": 0.01, "grad_norm": 1.0071532726287842, "learning_rate": 4.7046413502109714e-06, "loss": 0.6977, "step": 223 }, { "epoch": 0.01, "grad_norm": 1.2051150798797607, "learning_rate": 4.725738396624473e-06, "loss": 0.7822, "step": 224 }, { "epoch": 0.01, "grad_norm": 1.0721451044082642, "learning_rate": 4.746835443037975e-06, "loss": 0.7405, "step": 225 }, { "epoch": 0.01, "grad_norm": 1.093891978263855, "learning_rate": 4.767932489451477e-06, "loss": 0.696, "step": 226 }, { "epoch": 0.01, "grad_norm": 1.030121088027954, "learning_rate": 4.789029535864979e-06, "loss": 0.6639, "step": 227 }, { "epoch": 0.01, "grad_norm": 1.029396653175354, "learning_rate": 4.8101265822784815e-06, "loss": 0.6721, "step": 228 }, { "epoch": 0.01, "grad_norm": 1.075605034828186, "learning_rate": 4.831223628691984e-06, "loss": 0.767, "step": 229 }, { "epoch": 0.01, "grad_norm": 1.1397629976272583, "learning_rate": 4.852320675105486e-06, "loss": 0.7567, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.9947602152824402, "learning_rate": 4.873417721518987e-06, "loss": 0.6579, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.1511805057525635, "learning_rate": 4.89451476793249e-06, "loss": 0.775, "step": 232 }, { "epoch": 0.01, "grad_norm": 1.1157087087631226, "learning_rate": 4.915611814345992e-06, "loss": 0.7197, "step": 233 }, { "epoch": 0.01, "grad_norm": 1.041972041130066, "learning_rate": 4.936708860759495e-06, "loss": 0.6443, "step": 234 }, { "epoch": 0.01, "grad_norm": 1.1057621240615845, "learning_rate": 4.957805907172996e-06, "loss": 0.7262, "step": 235 }, { "epoch": 0.01, "grad_norm": 1.0165050029754639, "learning_rate": 4.978902953586498e-06, "loss": 0.6992, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.093250036239624, "learning_rate": 5e-06, "loss": 0.7268, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.0492795705795288, "learning_rate": 5.021097046413503e-06, "loss": 0.7542, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.102603793144226, "learning_rate": 5.042194092827004e-06, "loss": 0.7308, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.078536033630371, "learning_rate": 5.063291139240507e-06, "loss": 0.6885, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.0672438144683838, "learning_rate": 5.084388185654009e-06, "loss": 0.7401, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.0409996509552002, "learning_rate": 5.1054852320675105e-06, "loss": 0.6583, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.0213676691055298, "learning_rate": 5.126582278481013e-06, "loss": 0.6894, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.00473952293396, "learning_rate": 5.147679324894516e-06, "loss": 0.6331, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.0353198051452637, "learning_rate": 5.168776371308017e-06, "loss": 0.6998, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.0606788396835327, "learning_rate": 5.189873417721519e-06, "loss": 0.7192, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.0762742757797241, "learning_rate": 5.2109704641350215e-06, "loss": 0.7234, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.0901657342910767, "learning_rate": 5.2320675105485245e-06, "loss": 0.7279, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.0092108249664307, "learning_rate": 5.253164556962026e-06, "loss": 0.7626, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.0560524463653564, "learning_rate": 5.274261603375528e-06, "loss": 0.6912, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.0830129384994507, "learning_rate": 5.295358649789029e-06, "loss": 0.7379, "step": 251 }, { "epoch": 0.02, "grad_norm": 1.0292439460754395, "learning_rate": 5.3164556962025316e-06, "loss": 0.7024, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.0570290088653564, "learning_rate": 5.337552742616035e-06, "loss": 0.7485, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.098766565322876, "learning_rate": 5.358649789029536e-06, "loss": 0.7215, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.038459062576294, "learning_rate": 5.379746835443038e-06, "loss": 0.7304, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.0401676893234253, "learning_rate": 5.40084388185654e-06, "loss": 0.711, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.1514108180999756, "learning_rate": 5.421940928270043e-06, "loss": 0.7406, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.0562361478805542, "learning_rate": 5.443037974683545e-06, "loss": 0.6916, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.0437403917312622, "learning_rate": 5.464135021097047e-06, "loss": 0.7645, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.0529121160507202, "learning_rate": 5.485232067510548e-06, "loss": 0.7039, "step": 260 }, { "epoch": 0.02, "grad_norm": 0.9928483963012695, "learning_rate": 5.506329113924051e-06, "loss": 0.7255, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.149880290031433, "learning_rate": 5.5274261603375535e-06, "loss": 0.6802, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.0575579404830933, "learning_rate": 5.548523206751056e-06, "loss": 0.6778, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.055155634880066, "learning_rate": 5.569620253164557e-06, "loss": 0.7686, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.0442339181900024, "learning_rate": 5.590717299578059e-06, "loss": 0.7301, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.128800868988037, "learning_rate": 5.611814345991562e-06, "loss": 0.7405, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.045422077178955, "learning_rate": 5.6329113924050636e-06, "loss": 0.7035, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.1714004278182983, "learning_rate": 5.654008438818566e-06, "loss": 0.6549, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.024259328842163, "learning_rate": 5.675105485232067e-06, "loss": 0.7305, "step": 269 }, { "epoch": 0.02, "grad_norm": 0.9970294237136841, "learning_rate": 5.69620253164557e-06, "loss": 0.7004, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.1413666009902954, "learning_rate": 5.717299578059072e-06, "loss": 0.7865, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.0278419256210327, "learning_rate": 5.7383966244725745e-06, "loss": 0.7116, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.1053383350372314, "learning_rate": 5.759493670886076e-06, "loss": 0.7703, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.0635850429534912, "learning_rate": 5.780590717299579e-06, "loss": 0.7128, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.2253458499908447, "learning_rate": 5.801687763713081e-06, "loss": 0.7689, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.152657151222229, "learning_rate": 5.8227848101265824e-06, "loss": 0.7243, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.0314563512802124, "learning_rate": 5.843881856540085e-06, "loss": 0.702, "step": 277 }, { "epoch": 0.02, "grad_norm": 0.9639416933059692, "learning_rate": 5.864978902953588e-06, "loss": 0.6504, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.0700966119766235, "learning_rate": 5.886075949367089e-06, "loss": 0.7275, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.0939685106277466, "learning_rate": 5.907172995780591e-06, "loss": 0.7193, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.0693538188934326, "learning_rate": 5.928270042194093e-06, "loss": 0.7746, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.057306170463562, "learning_rate": 5.949367088607595e-06, "loss": 0.7504, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.1040732860565186, "learning_rate": 5.970464135021098e-06, "loss": 0.7458, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.031706690788269, "learning_rate": 5.9915611814346e-06, "loss": 0.7131, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.0323573350906372, "learning_rate": 6.012658227848101e-06, "loss": 0.6868, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.0617097616195679, "learning_rate": 6.0337552742616035e-06, "loss": 0.63, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.11605703830719, "learning_rate": 6.0548523206751065e-06, "loss": 0.6858, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.0047672986984253, "learning_rate": 6.075949367088608e-06, "loss": 0.6639, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.131504774093628, "learning_rate": 6.09704641350211e-06, "loss": 0.7063, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.063057780265808, "learning_rate": 6.118143459915612e-06, "loss": 0.741, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.0435850620269775, "learning_rate": 6.139240506329115e-06, "loss": 0.6867, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.102751612663269, "learning_rate": 6.160337552742617e-06, "loss": 0.7544, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.1484432220458984, "learning_rate": 6.181434599156119e-06, "loss": 0.7241, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.1322616338729858, "learning_rate": 6.20253164556962e-06, "loss": 0.7311, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.077012300491333, "learning_rate": 6.223628691983122e-06, "loss": 0.7061, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.0520391464233398, "learning_rate": 6.244725738396625e-06, "loss": 0.7375, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.0452332496643066, "learning_rate": 6.265822784810128e-06, "loss": 0.6791, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.037235975265503, "learning_rate": 6.286919831223629e-06, "loss": 0.7208, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.0935461521148682, "learning_rate": 6.308016877637131e-06, "loss": 0.679, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.1446540355682373, "learning_rate": 6.329113924050634e-06, "loss": 0.7607, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.0081703662872314, "learning_rate": 6.3502109704641355e-06, "loss": 0.6801, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.042680025100708, "learning_rate": 6.371308016877638e-06, "loss": 0.709, "step": 302 }, { "epoch": 0.02, "grad_norm": 0.9972267150878906, "learning_rate": 6.392405063291139e-06, "loss": 0.7027, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.1144083738327026, "learning_rate": 6.413502109704642e-06, "loss": 0.7594, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.0191450119018555, "learning_rate": 6.434599156118144e-06, "loss": 0.6988, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.0765477418899536, "learning_rate": 6.4556962025316464e-06, "loss": 0.6887, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.0328569412231445, "learning_rate": 6.476793248945148e-06, "loss": 0.6941, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.060673475265503, "learning_rate": 6.49789029535865e-06, "loss": 0.7643, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.0403449535369873, "learning_rate": 6.518987341772153e-06, "loss": 0.7178, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.000282883644104, "learning_rate": 6.540084388185654e-06, "loss": 0.6678, "step": 310 }, { "epoch": 0.02, "grad_norm": 0.988453209400177, "learning_rate": 6.5611814345991565e-06, "loss": 0.6962, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.0525801181793213, "learning_rate": 6.582278481012659e-06, "loss": 0.6947, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.1088162660598755, "learning_rate": 6.603375527426161e-06, "loss": 0.7701, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.0845990180969238, "learning_rate": 6.624472573839663e-06, "loss": 0.6622, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.1164056062698364, "learning_rate": 6.645569620253165e-06, "loss": 0.6881, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.112685203552246, "learning_rate": 6.666666666666667e-06, "loss": 0.6864, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.0409352779388428, "learning_rate": 6.68776371308017e-06, "loss": 0.6724, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.0470497608184814, "learning_rate": 6.708860759493672e-06, "loss": 0.7243, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.1536136865615845, "learning_rate": 6.729957805907173e-06, "loss": 0.7587, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.1283690929412842, "learning_rate": 6.751054852320675e-06, "loss": 0.7378, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.1128261089324951, "learning_rate": 6.772151898734178e-06, "loss": 0.7279, "step": 321 }, { "epoch": 0.02, "grad_norm": 0.973404586315155, "learning_rate": 6.79324894514768e-06, "loss": 0.7067, "step": 322 }, { "epoch": 0.02, "grad_norm": 1.0499427318572998, "learning_rate": 6.814345991561182e-06, "loss": 0.6869, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.047536849975586, "learning_rate": 6.835443037974684e-06, "loss": 0.6862, "step": 324 }, { "epoch": 0.02, "grad_norm": 1.076842188835144, "learning_rate": 6.8565400843881855e-06, "loss": 0.6522, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.1328686475753784, "learning_rate": 6.8776371308016885e-06, "loss": 0.7842, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.05484938621521, "learning_rate": 6.898734177215191e-06, "loss": 0.7303, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.0914195775985718, "learning_rate": 6.919831223628692e-06, "loss": 0.7791, "step": 328 }, { "epoch": 0.02, "grad_norm": 1.1371619701385498, "learning_rate": 6.940928270042194e-06, "loss": 0.7417, "step": 329 }, { "epoch": 0.02, "grad_norm": 1.0946354866027832, "learning_rate": 6.962025316455697e-06, "loss": 0.7099, "step": 330 }, { "epoch": 0.02, "grad_norm": 1.048621416091919, "learning_rate": 6.9831223628691995e-06, "loss": 0.7406, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.0043079853057861, "learning_rate": 7.004219409282701e-06, "loss": 0.6941, "step": 332 }, { "epoch": 0.02, "grad_norm": 1.0117560625076294, "learning_rate": 7.025316455696203e-06, "loss": 0.7254, "step": 333 }, { "epoch": 0.02, "grad_norm": 0.9732447862625122, "learning_rate": 7.046413502109706e-06, "loss": 0.6738, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.0345726013183594, "learning_rate": 7.067510548523207e-06, "loss": 0.7732, "step": 335 }, { "epoch": 0.02, "grad_norm": 1.0557833909988403, "learning_rate": 7.08860759493671e-06, "loss": 0.6691, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.0420573949813843, "learning_rate": 7.109704641350211e-06, "loss": 0.6917, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.0846279859542847, "learning_rate": 7.130801687763713e-06, "loss": 0.7203, "step": 338 }, { "epoch": 0.02, "grad_norm": 1.0644689798355103, "learning_rate": 7.151898734177216e-06, "loss": 0.7264, "step": 339 }, { "epoch": 0.02, "grad_norm": 0.9768193960189819, "learning_rate": 7.172995780590718e-06, "loss": 0.635, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.0101126432418823, "learning_rate": 7.19409282700422e-06, "loss": 0.6507, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.208484411239624, "learning_rate": 7.215189873417722e-06, "loss": 0.7535, "step": 342 }, { "epoch": 0.02, "grad_norm": 1.0944318771362305, "learning_rate": 7.236286919831225e-06, "loss": 0.7649, "step": 343 }, { "epoch": 0.02, "grad_norm": 0.9345969557762146, "learning_rate": 7.257383966244726e-06, "loss": 0.6838, "step": 344 }, { "epoch": 0.02, "grad_norm": 1.0136656761169434, "learning_rate": 7.2784810126582285e-06, "loss": 0.6863, "step": 345 }, { "epoch": 0.02, "grad_norm": 1.0391440391540527, "learning_rate": 7.29957805907173e-06, "loss": 0.7484, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.0096555948257446, "learning_rate": 7.320675105485233e-06, "loss": 0.6641, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.0947874784469604, "learning_rate": 7.341772151898735e-06, "loss": 0.6892, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.0220927000045776, "learning_rate": 7.362869198312237e-06, "loss": 0.6493, "step": 349 }, { "epoch": 0.02, "grad_norm": 1.0061572790145874, "learning_rate": 7.3839662447257386e-06, "loss": 0.6969, "step": 350 }, { "epoch": 0.02, "grad_norm": 1.142774224281311, "learning_rate": 7.405063291139241e-06, "loss": 0.7036, "step": 351 }, { "epoch": 0.02, "grad_norm": 1.1005827188491821, "learning_rate": 7.426160337552744e-06, "loss": 0.6813, "step": 352 }, { "epoch": 0.02, "grad_norm": 0.9909974932670593, "learning_rate": 7.447257383966245e-06, "loss": 0.6848, "step": 353 }, { "epoch": 0.02, "grad_norm": 1.0379297733306885, "learning_rate": 7.468354430379747e-06, "loss": 0.6472, "step": 354 }, { "epoch": 0.02, "grad_norm": 1.0365216732025146, "learning_rate": 7.4894514767932495e-06, "loss": 0.7371, "step": 355 }, { "epoch": 0.02, "grad_norm": 1.0643117427825928, "learning_rate": 7.510548523206752e-06, "loss": 0.7383, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.0581454038619995, "learning_rate": 7.531645569620254e-06, "loss": 0.6001, "step": 357 }, { "epoch": 0.02, "grad_norm": 1.147464632987976, "learning_rate": 7.552742616033756e-06, "loss": 0.7362, "step": 358 }, { "epoch": 0.02, "grad_norm": 1.1402760744094849, "learning_rate": 7.5738396624472574e-06, "loss": 0.6973, "step": 359 }, { "epoch": 0.02, "grad_norm": 0.987370491027832, "learning_rate": 7.5949367088607605e-06, "loss": 0.6186, "step": 360 }, { "epoch": 0.02, "grad_norm": 1.1057859659194946, "learning_rate": 7.616033755274263e-06, "loss": 0.758, "step": 361 }, { "epoch": 0.02, "grad_norm": 1.1299041509628296, "learning_rate": 7.637130801687764e-06, "loss": 0.6524, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.0638086795806885, "learning_rate": 7.658227848101265e-06, "loss": 0.6748, "step": 363 }, { "epoch": 0.02, "grad_norm": 1.1084306240081787, "learning_rate": 7.679324894514768e-06, "loss": 0.7294, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.0223629474639893, "learning_rate": 7.700421940928271e-06, "loss": 0.7385, "step": 365 }, { "epoch": 0.02, "grad_norm": 0.9762625694274902, "learning_rate": 7.721518987341773e-06, "loss": 0.657, "step": 366 }, { "epoch": 0.02, "grad_norm": 1.045155644416809, "learning_rate": 7.742616033755274e-06, "loss": 0.6697, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.101192831993103, "learning_rate": 7.763713080168777e-06, "loss": 0.7388, "step": 368 }, { "epoch": 0.02, "grad_norm": 0.9734575152397156, "learning_rate": 7.78481012658228e-06, "loss": 0.7006, "step": 369 }, { "epoch": 0.02, "grad_norm": 1.0786582231521606, "learning_rate": 7.805907172995782e-06, "loss": 0.7253, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.0659972429275513, "learning_rate": 7.827004219409283e-06, "loss": 0.6975, "step": 371 }, { "epoch": 0.02, "grad_norm": 1.0065947771072388, "learning_rate": 7.848101265822786e-06, "loss": 0.6478, "step": 372 }, { "epoch": 0.02, "grad_norm": 1.066577672958374, "learning_rate": 7.869198312236287e-06, "loss": 0.6615, "step": 373 }, { "epoch": 0.02, "grad_norm": 0.9906947612762451, "learning_rate": 7.89029535864979e-06, "loss": 0.6473, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.1715826988220215, "learning_rate": 7.911392405063292e-06, "loss": 0.7237, "step": 375 }, { "epoch": 0.02, "grad_norm": 0.943070650100708, "learning_rate": 7.932489451476793e-06, "loss": 0.6124, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.0601487159729004, "learning_rate": 7.953586497890296e-06, "loss": 0.7008, "step": 377 }, { "epoch": 0.02, "grad_norm": 0.9950490593910217, "learning_rate": 7.974683544303799e-06, "loss": 0.676, "step": 378 }, { "epoch": 0.02, "grad_norm": 1.107460856437683, "learning_rate": 7.9957805907173e-06, "loss": 0.6832, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.111391305923462, "learning_rate": 8.016877637130802e-06, "loss": 0.7273, "step": 380 }, { "epoch": 0.02, "grad_norm": 0.9769160747528076, "learning_rate": 8.037974683544305e-06, "loss": 0.6351, "step": 381 }, { "epoch": 0.02, "grad_norm": 1.040307879447937, "learning_rate": 8.059071729957806e-06, "loss": 0.6979, "step": 382 }, { "epoch": 0.02, "grad_norm": 1.1144888401031494, "learning_rate": 8.080168776371309e-06, "loss": 0.733, "step": 383 }, { "epoch": 0.02, "grad_norm": 0.9809250831604004, "learning_rate": 8.10126582278481e-06, "loss": 0.7225, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.014562964439392, "learning_rate": 8.122362869198312e-06, "loss": 0.6472, "step": 385 }, { "epoch": 0.02, "grad_norm": 1.0310808420181274, "learning_rate": 8.143459915611815e-06, "loss": 0.7224, "step": 386 }, { "epoch": 0.02, "grad_norm": 1.026007890701294, "learning_rate": 8.164556962025318e-06, "loss": 0.6932, "step": 387 }, { "epoch": 0.02, "grad_norm": 1.062725305557251, "learning_rate": 8.18565400843882e-06, "loss": 0.7098, "step": 388 }, { "epoch": 0.02, "grad_norm": 1.1098779439926147, "learning_rate": 8.20675105485232e-06, "loss": 0.6994, "step": 389 }, { "epoch": 0.02, "grad_norm": 1.0307683944702148, "learning_rate": 8.227848101265824e-06, "loss": 0.784, "step": 390 }, { "epoch": 0.02, "grad_norm": 1.0046746730804443, "learning_rate": 8.248945147679327e-06, "loss": 0.6689, "step": 391 }, { "epoch": 0.02, "grad_norm": 1.090871810913086, "learning_rate": 8.270042194092828e-06, "loss": 0.6986, "step": 392 }, { "epoch": 0.02, "grad_norm": 0.9817966222763062, "learning_rate": 8.29113924050633e-06, "loss": 0.6288, "step": 393 }, { "epoch": 0.02, "grad_norm": 1.004731297492981, "learning_rate": 8.31223628691983e-06, "loss": 0.6876, "step": 394 }, { "epoch": 0.03, "grad_norm": 0.9606344103813171, "learning_rate": 8.333333333333334e-06, "loss": 0.6722, "step": 395 }, { "epoch": 0.03, "grad_norm": 0.8888660669326782, "learning_rate": 8.354430379746837e-06, "loss": 0.6257, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.0444602966308594, "learning_rate": 8.375527426160338e-06, "loss": 0.7283, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.0116087198257446, "learning_rate": 8.39662447257384e-06, "loss": 0.6951, "step": 398 }, { "epoch": 0.03, "grad_norm": 0.9750226736068726, "learning_rate": 8.417721518987342e-06, "loss": 0.6172, "step": 399 }, { "epoch": 0.03, "grad_norm": 0.9671033024787903, "learning_rate": 8.438818565400846e-06, "loss": 0.6415, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.0681933164596558, "learning_rate": 8.459915611814347e-06, "loss": 0.6908, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.0316442251205444, "learning_rate": 8.481012658227848e-06, "loss": 0.6733, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.077885389328003, "learning_rate": 8.502109704641351e-06, "loss": 0.7253, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.03955078125, "learning_rate": 8.523206751054853e-06, "loss": 0.6548, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.1187814474105835, "learning_rate": 8.544303797468356e-06, "loss": 0.6971, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.0932029485702515, "learning_rate": 8.565400843881857e-06, "loss": 0.7234, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.0664738416671753, "learning_rate": 8.586497890295358e-06, "loss": 0.7203, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.0771609544754028, "learning_rate": 8.607594936708861e-06, "loss": 0.6594, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.0817670822143555, "learning_rate": 8.628691983122364e-06, "loss": 0.7253, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.0627022981643677, "learning_rate": 8.649789029535866e-06, "loss": 0.7192, "step": 410 }, { "epoch": 0.03, "grad_norm": 0.9740765690803528, "learning_rate": 8.670886075949367e-06, "loss": 0.7057, "step": 411 }, { "epoch": 0.03, "grad_norm": 0.9944364428520203, "learning_rate": 8.69198312236287e-06, "loss": 0.7844, "step": 412 }, { "epoch": 0.03, "grad_norm": 0.9714933633804321, "learning_rate": 8.713080168776371e-06, "loss": 0.7249, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.0184885263442993, "learning_rate": 8.734177215189874e-06, "loss": 0.647, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.0341577529907227, "learning_rate": 8.755274261603376e-06, "loss": 0.6934, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.0786633491516113, "learning_rate": 8.776371308016879e-06, "loss": 0.6879, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.0012767314910889, "learning_rate": 8.79746835443038e-06, "loss": 0.7064, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.0431938171386719, "learning_rate": 8.818565400843883e-06, "loss": 0.7309, "step": 418 }, { "epoch": 0.03, "grad_norm": 0.9838118553161621, "learning_rate": 8.839662447257385e-06, "loss": 0.6781, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.0146209001541138, "learning_rate": 8.860759493670886e-06, "loss": 0.699, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.0076758861541748, "learning_rate": 8.881856540084389e-06, "loss": 0.7561, "step": 421 }, { "epoch": 0.03, "grad_norm": 0.9734664559364319, "learning_rate": 8.90295358649789e-06, "loss": 0.6111, "step": 422 }, { "epoch": 0.03, "grad_norm": 0.9639232158660889, "learning_rate": 8.924050632911393e-06, "loss": 0.6722, "step": 423 }, { "epoch": 0.03, "grad_norm": 0.9442266225814819, "learning_rate": 8.945147679324895e-06, "loss": 0.6588, "step": 424 }, { "epoch": 0.03, "grad_norm": 0.9883629083633423, "learning_rate": 8.966244725738398e-06, "loss": 0.6536, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.013269066810608, "learning_rate": 8.987341772151899e-06, "loss": 0.6572, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.1119914054870605, "learning_rate": 9.008438818565402e-06, "loss": 0.7522, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.1306815147399902, "learning_rate": 9.029535864978903e-06, "loss": 0.6473, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.0609233379364014, "learning_rate": 9.050632911392407e-06, "loss": 0.6881, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.033166527748108, "learning_rate": 9.071729957805908e-06, "loss": 0.7441, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.1041021347045898, "learning_rate": 9.09282700421941e-06, "loss": 0.7217, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.1988205909729004, "learning_rate": 9.113924050632912e-06, "loss": 0.7092, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.054067611694336, "learning_rate": 9.135021097046414e-06, "loss": 0.6541, "step": 433 }, { "epoch": 0.03, "grad_norm": 0.967709481716156, "learning_rate": 9.156118143459917e-06, "loss": 0.6873, "step": 434 }, { "epoch": 0.03, "grad_norm": 1.0264620780944824, "learning_rate": 9.177215189873418e-06, "loss": 0.6761, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.0361930131912231, "learning_rate": 9.198312236286921e-06, "loss": 0.646, "step": 436 }, { "epoch": 0.03, "grad_norm": 0.9842200875282288, "learning_rate": 9.219409282700422e-06, "loss": 0.6717, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.0381004810333252, "learning_rate": 9.240506329113925e-06, "loss": 0.6861, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.0402230024337769, "learning_rate": 9.261603375527427e-06, "loss": 0.7462, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.0186004638671875, "learning_rate": 9.28270042194093e-06, "loss": 0.6941, "step": 440 }, { "epoch": 0.03, "grad_norm": 0.9940155148506165, "learning_rate": 9.303797468354431e-06, "loss": 0.7359, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.0634286403656006, "learning_rate": 9.324894514767934e-06, "loss": 0.7119, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.0823959112167358, "learning_rate": 9.345991561181435e-06, "loss": 0.6938, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.054929256439209, "learning_rate": 9.367088607594937e-06, "loss": 0.6777, "step": 444 }, { "epoch": 0.03, "grad_norm": 0.9768496751785278, "learning_rate": 9.38818565400844e-06, "loss": 0.6919, "step": 445 }, { "epoch": 0.03, "grad_norm": 0.9983486533164978, "learning_rate": 9.409282700421943e-06, "loss": 0.6621, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.0734831094741821, "learning_rate": 9.430379746835444e-06, "loss": 0.7321, "step": 447 }, { "epoch": 0.03, "grad_norm": 0.9543827772140503, "learning_rate": 9.451476793248946e-06, "loss": 0.68, "step": 448 }, { "epoch": 0.03, "grad_norm": 0.9443538188934326, "learning_rate": 9.472573839662449e-06, "loss": 0.6456, "step": 449 }, { "epoch": 0.03, "grad_norm": 1.0233805179595947, "learning_rate": 9.49367088607595e-06, "loss": 0.7291, "step": 450 }, { "epoch": 0.03, "grad_norm": 0.9053292870521545, "learning_rate": 9.514767932489453e-06, "loss": 0.6394, "step": 451 }, { "epoch": 0.03, "grad_norm": 0.9343140125274658, "learning_rate": 9.535864978902954e-06, "loss": 0.6988, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.014218807220459, "learning_rate": 9.556962025316456e-06, "loss": 0.6681, "step": 453 }, { "epoch": 0.03, "grad_norm": 1.0326037406921387, "learning_rate": 9.578059071729959e-06, "loss": 0.7683, "step": 454 }, { "epoch": 0.03, "grad_norm": 1.0093090534210205, "learning_rate": 9.599156118143462e-06, "loss": 0.7108, "step": 455 }, { "epoch": 0.03, "grad_norm": 1.0127726793289185, "learning_rate": 9.620253164556963e-06, "loss": 0.7319, "step": 456 }, { "epoch": 0.03, "grad_norm": 1.0509214401245117, "learning_rate": 9.641350210970464e-06, "loss": 0.7353, "step": 457 }, { "epoch": 0.03, "grad_norm": 1.0279244184494019, "learning_rate": 9.662447257383967e-06, "loss": 0.675, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.001551866531372, "learning_rate": 9.68354430379747e-06, "loss": 0.619, "step": 459 }, { "epoch": 0.03, "grad_norm": 1.1098026037216187, "learning_rate": 9.704641350210972e-06, "loss": 0.7643, "step": 460 }, { "epoch": 0.03, "grad_norm": 1.007842779159546, "learning_rate": 9.725738396624473e-06, "loss": 0.6524, "step": 461 }, { "epoch": 0.03, "grad_norm": 1.0313695669174194, "learning_rate": 9.746835443037975e-06, "loss": 0.6565, "step": 462 }, { "epoch": 0.03, "grad_norm": 1.0242507457733154, "learning_rate": 9.767932489451478e-06, "loss": 0.7165, "step": 463 }, { "epoch": 0.03, "grad_norm": 1.0414601564407349, "learning_rate": 9.78902953586498e-06, "loss": 0.6985, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.0553888082504272, "learning_rate": 9.810126582278482e-06, "loss": 0.7361, "step": 465 }, { "epoch": 0.03, "grad_norm": 1.0666173696517944, "learning_rate": 9.831223628691983e-06, "loss": 0.681, "step": 466 }, { "epoch": 0.03, "grad_norm": 0.92351233959198, "learning_rate": 9.852320675105486e-06, "loss": 0.6019, "step": 467 }, { "epoch": 0.03, "grad_norm": 1.0312128067016602, "learning_rate": 9.87341772151899e-06, "loss": 0.7481, "step": 468 }, { "epoch": 0.03, "grad_norm": 1.0440195798873901, "learning_rate": 9.89451476793249e-06, "loss": 0.6794, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.0302846431732178, "learning_rate": 9.915611814345992e-06, "loss": 0.6839, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.0816489458084106, "learning_rate": 9.936708860759493e-06, "loss": 0.7631, "step": 471 }, { "epoch": 0.03, "grad_norm": 1.0711013078689575, "learning_rate": 9.957805907172996e-06, "loss": 0.7376, "step": 472 }, { "epoch": 0.03, "grad_norm": 0.9822407960891724, "learning_rate": 9.9789029535865e-06, "loss": 0.7144, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.0089426040649414, "learning_rate": 1e-05, "loss": 0.7019, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.0129270553588867, "learning_rate": 9.999999894733699e-06, "loss": 0.7017, "step": 475 }, { "epoch": 0.03, "grad_norm": 1.04535973072052, "learning_rate": 9.999999578934793e-06, "loss": 0.7501, "step": 476 }, { "epoch": 0.03, "grad_norm": 0.9517323970794678, "learning_rate": 9.9999990526033e-06, "loss": 0.6579, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.0189645290374756, "learning_rate": 9.99999831573924e-06, "loss": 0.6476, "step": 478 }, { "epoch": 0.03, "grad_norm": 1.0060005187988281, "learning_rate": 9.999997368342644e-06, "loss": 0.7177, "step": 479 }, { "epoch": 0.03, "grad_norm": 1.0013384819030762, "learning_rate": 9.999996210413553e-06, "loss": 0.6951, "step": 480 }, { "epoch": 0.03, "grad_norm": 1.0175272226333618, "learning_rate": 9.999994841952016e-06, "loss": 0.6826, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.017972707748413, "learning_rate": 9.99999326295809e-06, "loss": 0.7456, "step": 482 }, { "epoch": 0.03, "grad_norm": 1.0452282428741455, "learning_rate": 9.99999147343184e-06, "loss": 0.6436, "step": 483 }, { "epoch": 0.03, "grad_norm": 1.0681228637695312, "learning_rate": 9.999989473373344e-06, "loss": 0.6529, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.0214364528656006, "learning_rate": 9.999987262782684e-06, "loss": 0.6911, "step": 485 }, { "epoch": 0.03, "grad_norm": 1.0358929634094238, "learning_rate": 9.999984841659955e-06, "loss": 0.7087, "step": 486 }, { "epoch": 0.03, "grad_norm": 1.1352185010910034, "learning_rate": 9.999982210005258e-06, "loss": 0.674, "step": 487 }, { "epoch": 0.03, "grad_norm": 0.992149293422699, "learning_rate": 9.999979367818704e-06, "loss": 0.6709, "step": 488 }, { "epoch": 0.03, "grad_norm": 0.9382144808769226, "learning_rate": 9.999976315100412e-06, "loss": 0.6415, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.002973198890686, "learning_rate": 9.99997305185051e-06, "loss": 0.6603, "step": 490 }, { "epoch": 0.03, "grad_norm": 1.0173821449279785, "learning_rate": 9.999969578069137e-06, "loss": 0.7378, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.0466855764389038, "learning_rate": 9.99996589375644e-06, "loss": 0.6665, "step": 492 }, { "epoch": 0.03, "grad_norm": 1.0387037992477417, "learning_rate": 9.999961998912573e-06, "loss": 0.6855, "step": 493 }, { "epoch": 0.03, "grad_norm": 1.0338493585586548, "learning_rate": 9.999957893537697e-06, "loss": 0.7131, "step": 494 }, { "epoch": 0.03, "grad_norm": 1.038668155670166, "learning_rate": 9.999953577631991e-06, "loss": 0.725, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.1043860912322998, "learning_rate": 9.999949051195631e-06, "loss": 0.7042, "step": 496 }, { "epoch": 0.03, "grad_norm": 0.9517979025840759, "learning_rate": 9.999944314228811e-06, "loss": 0.6591, "step": 497 }, { "epoch": 0.03, "grad_norm": 0.9950364232063293, "learning_rate": 9.99993936673173e-06, "loss": 0.6464, "step": 498 }, { "epoch": 0.03, "grad_norm": 0.9766575694084167, "learning_rate": 9.999934208704595e-06, "loss": 0.6531, "step": 499 }, { "epoch": 0.03, "grad_norm": 0.9440507888793945, "learning_rate": 9.999928840147624e-06, "loss": 0.6836, "step": 500 }, { "epoch": 0.03, "grad_norm": 0.9839968681335449, "learning_rate": 9.999923261061043e-06, "loss": 0.6737, "step": 501 }, { "epoch": 0.03, "grad_norm": 0.9848559498786926, "learning_rate": 9.999917471445086e-06, "loss": 0.6518, "step": 502 }, { "epoch": 0.03, "grad_norm": 1.0254359245300293, "learning_rate": 9.999911471299998e-06, "loss": 0.6916, "step": 503 }, { "epoch": 0.03, "grad_norm": 1.0536845922470093, "learning_rate": 9.999905260626033e-06, "loss": 0.7099, "step": 504 }, { "epoch": 0.03, "grad_norm": 1.1166943311691284, "learning_rate": 9.99989883942345e-06, "loss": 0.7506, "step": 505 }, { "epoch": 0.03, "grad_norm": 1.0335949659347534, "learning_rate": 9.999892207692521e-06, "loss": 0.666, "step": 506 }, { "epoch": 0.03, "grad_norm": 1.14674711227417, "learning_rate": 9.999885365433523e-06, "loss": 0.6926, "step": 507 }, { "epoch": 0.03, "grad_norm": 1.104755163192749, "learning_rate": 9.999878312646748e-06, "loss": 0.7511, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.075617790222168, "learning_rate": 9.999871049332488e-06, "loss": 0.7566, "step": 509 }, { "epoch": 0.03, "grad_norm": 1.0128673315048218, "learning_rate": 9.999863575491053e-06, "loss": 0.7372, "step": 510 }, { "epoch": 0.03, "grad_norm": 1.0448622703552246, "learning_rate": 9.999855891122754e-06, "loss": 0.7545, "step": 511 }, { "epoch": 0.03, "grad_norm": 1.0948630571365356, "learning_rate": 9.999847996227918e-06, "loss": 0.7373, "step": 512 }, { "epoch": 0.03, "grad_norm": 1.0382819175720215, "learning_rate": 9.999839890806877e-06, "loss": 0.7146, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.0122886896133423, "learning_rate": 9.99983157485997e-06, "loss": 0.6998, "step": 514 }, { "epoch": 0.03, "grad_norm": 1.016714334487915, "learning_rate": 9.99982304838755e-06, "loss": 0.685, "step": 515 }, { "epoch": 0.03, "grad_norm": 1.0722309350967407, "learning_rate": 9.999814311389973e-06, "loss": 0.6384, "step": 516 }, { "epoch": 0.03, "grad_norm": 1.0432019233703613, "learning_rate": 9.99980536386761e-06, "loss": 0.7179, "step": 517 }, { "epoch": 0.03, "grad_norm": 0.9547827243804932, "learning_rate": 9.999796205820835e-06, "loss": 0.6755, "step": 518 }, { "epoch": 0.03, "grad_norm": 1.041869878768921, "learning_rate": 9.999786837250034e-06, "loss": 0.6711, "step": 519 }, { "epoch": 0.03, "grad_norm": 1.1629664897918701, "learning_rate": 9.999777258155604e-06, "loss": 0.7044, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.1089982986450195, "learning_rate": 9.999767468537947e-06, "loss": 0.6838, "step": 521 }, { "epoch": 0.03, "grad_norm": 1.0850619077682495, "learning_rate": 9.999757468397473e-06, "loss": 0.703, "step": 522 }, { "epoch": 0.03, "grad_norm": 0.9639663100242615, "learning_rate": 9.999747257734605e-06, "loss": 0.6591, "step": 523 }, { "epoch": 0.03, "grad_norm": 1.1336872577667236, "learning_rate": 9.999736836549773e-06, "loss": 0.6886, "step": 524 }, { "epoch": 0.03, "grad_norm": 0.9170753955841064, "learning_rate": 9.999726204843417e-06, "loss": 0.645, "step": 525 }, { "epoch": 0.03, "grad_norm": 0.9949559569358826, "learning_rate": 9.999715362615983e-06, "loss": 0.6407, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.0204219818115234, "learning_rate": 9.999704309867926e-06, "loss": 0.7058, "step": 527 }, { "epoch": 0.03, "grad_norm": 0.9866481423377991, "learning_rate": 9.999693046599715e-06, "loss": 0.635, "step": 528 }, { "epoch": 0.03, "grad_norm": 1.0090922117233276, "learning_rate": 9.99968157281182e-06, "loss": 0.6621, "step": 529 }, { "epoch": 0.03, "grad_norm": 1.0550429821014404, "learning_rate": 9.999669888504731e-06, "loss": 0.6731, "step": 530 }, { "epoch": 0.03, "grad_norm": 0.9702327251434326, "learning_rate": 9.999657993678932e-06, "loss": 0.6619, "step": 531 }, { "epoch": 0.03, "grad_norm": 0.999877393245697, "learning_rate": 9.999645888334927e-06, "loss": 0.7003, "step": 532 }, { "epoch": 0.03, "grad_norm": 0.9928595423698425, "learning_rate": 9.999633572473228e-06, "loss": 0.7044, "step": 533 }, { "epoch": 0.03, "grad_norm": 1.1135807037353516, "learning_rate": 9.999621046094353e-06, "loss": 0.7308, "step": 534 }, { "epoch": 0.03, "grad_norm": 1.073427438735962, "learning_rate": 9.999608309198827e-06, "loss": 0.6319, "step": 535 }, { "epoch": 0.03, "grad_norm": 1.006304144859314, "learning_rate": 9.999595361787187e-06, "loss": 0.6534, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.0457631349563599, "learning_rate": 9.999582203859977e-06, "loss": 0.6932, "step": 537 }, { "epoch": 0.03, "grad_norm": 1.0090550184249878, "learning_rate": 9.999568835417755e-06, "loss": 0.6825, "step": 538 }, { "epoch": 0.03, "grad_norm": 0.9906076192855835, "learning_rate": 9.99955525646108e-06, "loss": 0.7105, "step": 539 }, { "epoch": 0.03, "grad_norm": 1.0822291374206543, "learning_rate": 9.999541466990526e-06, "loss": 0.6908, "step": 540 }, { "epoch": 0.03, "grad_norm": 0.9858079552650452, "learning_rate": 9.999527467006674e-06, "loss": 0.6584, "step": 541 }, { "epoch": 0.03, "grad_norm": 0.99437415599823, "learning_rate": 9.999513256510112e-06, "loss": 0.6743, "step": 542 }, { "epoch": 0.03, "grad_norm": 1.1383122205734253, "learning_rate": 9.999498835501438e-06, "loss": 0.7183, "step": 543 }, { "epoch": 0.03, "grad_norm": 1.0107368230819702, "learning_rate": 9.99948420398126e-06, "loss": 0.7656, "step": 544 }, { "epoch": 0.03, "grad_norm": 1.0026836395263672, "learning_rate": 9.999469361950195e-06, "loss": 0.7228, "step": 545 }, { "epoch": 0.03, "grad_norm": 0.9779771566390991, "learning_rate": 9.999454309408868e-06, "loss": 0.7003, "step": 546 }, { "epoch": 0.03, "grad_norm": 1.0403019189834595, "learning_rate": 9.999439046357908e-06, "loss": 0.6832, "step": 547 }, { "epoch": 0.03, "grad_norm": 0.9707220196723938, "learning_rate": 9.999423572797964e-06, "loss": 0.6621, "step": 548 }, { "epoch": 0.03, "grad_norm": 1.0087053775787354, "learning_rate": 9.999407888729686e-06, "loss": 0.698, "step": 549 }, { "epoch": 0.03, "grad_norm": 0.9814850091934204, "learning_rate": 9.999391994153734e-06, "loss": 0.6615, "step": 550 }, { "epoch": 0.03, "grad_norm": 0.9962078332901001, "learning_rate": 9.999375889070773e-06, "loss": 0.6748, "step": 551 }, { "epoch": 0.03, "grad_norm": 0.9213439226150513, "learning_rate": 9.99935957348149e-06, "loss": 0.6722, "step": 552 }, { "epoch": 0.04, "grad_norm": 0.9168039560317993, "learning_rate": 9.999343047386562e-06, "loss": 0.6371, "step": 553 }, { "epoch": 0.04, "grad_norm": 0.9854353666305542, "learning_rate": 9.999326310786692e-06, "loss": 0.6603, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.073123574256897, "learning_rate": 9.999309363682582e-06, "loss": 0.6385, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.007665753364563, "learning_rate": 9.999292206074946e-06, "loss": 0.6184, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.0501381158828735, "learning_rate": 9.999274837964507e-06, "loss": 0.6922, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.044360876083374, "learning_rate": 9.999257259351995e-06, "loss": 0.7034, "step": 558 }, { "epoch": 0.04, "grad_norm": 0.970619261264801, "learning_rate": 9.999239470238151e-06, "loss": 0.6615, "step": 559 }, { "epoch": 0.04, "grad_norm": 0.9886036515235901, "learning_rate": 9.999221470623726e-06, "loss": 0.6686, "step": 560 }, { "epoch": 0.04, "grad_norm": 0.9825366735458374, "learning_rate": 9.999203260509473e-06, "loss": 0.6488, "step": 561 }, { "epoch": 0.04, "grad_norm": 0.9640889167785645, "learning_rate": 9.999184839896163e-06, "loss": 0.7087, "step": 562 }, { "epoch": 0.04, "grad_norm": 0.9821694493293762, "learning_rate": 9.99916620878457e-06, "loss": 0.6697, "step": 563 }, { "epoch": 0.04, "grad_norm": 0.999758780002594, "learning_rate": 9.99914736717548e-06, "loss": 0.7016, "step": 564 }, { "epoch": 0.04, "grad_norm": 0.9824835658073425, "learning_rate": 9.999128315069684e-06, "loss": 0.6894, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.0214180946350098, "learning_rate": 9.999109052467986e-06, "loss": 0.689, "step": 566 }, { "epoch": 0.04, "grad_norm": 0.9945915341377258, "learning_rate": 9.999089579371195e-06, "loss": 0.673, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.036805272102356, "learning_rate": 9.999069895780133e-06, "loss": 0.6567, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.0524154901504517, "learning_rate": 9.99905000169563e-06, "loss": 0.6371, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.024978518486023, "learning_rate": 9.99902989711852e-06, "loss": 0.607, "step": 570 }, { "epoch": 0.04, "grad_norm": 0.9848310351371765, "learning_rate": 9.99900958204965e-06, "loss": 0.6629, "step": 571 }, { "epoch": 0.04, "grad_norm": 0.9815685749053955, "learning_rate": 9.99898905648988e-06, "loss": 0.66, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.0205934047698975, "learning_rate": 9.998968320440068e-06, "loss": 0.6797, "step": 573 }, { "epoch": 0.04, "grad_norm": 0.999477207660675, "learning_rate": 9.998947373901092e-06, "loss": 0.6605, "step": 574 }, { "epoch": 0.04, "grad_norm": 0.8807664513587952, "learning_rate": 9.998926216873833e-06, "loss": 0.6118, "step": 575 }, { "epoch": 0.04, "grad_norm": 0.9839978218078613, "learning_rate": 9.998904849359179e-06, "loss": 0.7242, "step": 576 }, { "epoch": 0.04, "grad_norm": 1.015552043914795, "learning_rate": 9.998883271358033e-06, "loss": 0.6737, "step": 577 }, { "epoch": 0.04, "grad_norm": 0.9544459581375122, "learning_rate": 9.998861482871303e-06, "loss": 0.654, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.0526336431503296, "learning_rate": 9.998839483899904e-06, "loss": 0.6855, "step": 579 }, { "epoch": 0.04, "grad_norm": 0.9297081828117371, "learning_rate": 9.998817274444765e-06, "loss": 0.671, "step": 580 }, { "epoch": 0.04, "grad_norm": 0.9545259475708008, "learning_rate": 9.998794854506819e-06, "loss": 0.6331, "step": 581 }, { "epoch": 0.04, "grad_norm": 0.94922935962677, "learning_rate": 9.998772224087011e-06, "loss": 0.6739, "step": 582 }, { "epoch": 0.04, "grad_norm": 0.9858238101005554, "learning_rate": 9.998749383186296e-06, "loss": 0.6402, "step": 583 }, { "epoch": 0.04, "grad_norm": 1.0650100708007812, "learning_rate": 9.998726331805632e-06, "loss": 0.646, "step": 584 }, { "epoch": 0.04, "grad_norm": 1.0530078411102295, "learning_rate": 9.998703069945995e-06, "loss": 0.6362, "step": 585 }, { "epoch": 0.04, "grad_norm": 1.036247968673706, "learning_rate": 9.998679597608357e-06, "loss": 0.7401, "step": 586 }, { "epoch": 0.04, "grad_norm": 0.990145206451416, "learning_rate": 9.998655914793711e-06, "loss": 0.6178, "step": 587 }, { "epoch": 0.04, "grad_norm": 0.988255500793457, "learning_rate": 9.998632021503055e-06, "loss": 0.6291, "step": 588 }, { "epoch": 0.04, "grad_norm": 0.9244970083236694, "learning_rate": 9.998607917737393e-06, "loss": 0.6075, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.020477056503296, "learning_rate": 9.99858360349774e-06, "loss": 0.6414, "step": 590 }, { "epoch": 0.04, "grad_norm": 1.138732671737671, "learning_rate": 9.99855907878512e-06, "loss": 0.6744, "step": 591 }, { "epoch": 0.04, "grad_norm": 1.045698881149292, "learning_rate": 9.998534343600567e-06, "loss": 0.6219, "step": 592 }, { "epoch": 0.04, "grad_norm": 0.9490424990653992, "learning_rate": 9.99850939794512e-06, "loss": 0.6726, "step": 593 }, { "epoch": 0.04, "grad_norm": 0.944223165512085, "learning_rate": 9.998484241819833e-06, "loss": 0.6593, "step": 594 }, { "epoch": 0.04, "grad_norm": 0.941078245639801, "learning_rate": 9.99845887522576e-06, "loss": 0.6168, "step": 595 }, { "epoch": 0.04, "grad_norm": 0.9710420966148376, "learning_rate": 9.998433298163974e-06, "loss": 0.7174, "step": 596 }, { "epoch": 0.04, "grad_norm": 0.958227276802063, "learning_rate": 9.99840751063555e-06, "loss": 0.6664, "step": 597 }, { "epoch": 0.04, "grad_norm": 1.0510207414627075, "learning_rate": 9.998381512641574e-06, "loss": 0.6356, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.0442863702774048, "learning_rate": 9.99835530418314e-06, "loss": 0.6988, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.0224828720092773, "learning_rate": 9.998328885261352e-06, "loss": 0.7323, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.007456660270691, "learning_rate": 9.998302255877323e-06, "loss": 0.7505, "step": 601 }, { "epoch": 0.04, "grad_norm": 1.0247341394424438, "learning_rate": 9.998275416032176e-06, "loss": 0.6373, "step": 602 }, { "epoch": 0.04, "grad_norm": 0.9564207792282104, "learning_rate": 9.998248365727037e-06, "loss": 0.6353, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.015875220298767, "learning_rate": 9.998221104963047e-06, "loss": 0.6737, "step": 604 }, { "epoch": 0.04, "grad_norm": 0.9237945079803467, "learning_rate": 9.998193633741353e-06, "loss": 0.63, "step": 605 }, { "epoch": 0.04, "grad_norm": 0.961380660533905, "learning_rate": 9.998165952063113e-06, "loss": 0.6423, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.033104658126831, "learning_rate": 9.998138059929493e-06, "loss": 0.6457, "step": 607 }, { "epoch": 0.04, "grad_norm": 0.9923886060714722, "learning_rate": 9.998109957341665e-06, "loss": 0.6845, "step": 608 }, { "epoch": 0.04, "grad_norm": 1.0205433368682861, "learning_rate": 9.998081644300815e-06, "loss": 0.6861, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.0005221366882324, "learning_rate": 9.998053120808133e-06, "loss": 0.6066, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.06248140335083, "learning_rate": 9.998024386864821e-06, "loss": 0.6687, "step": 611 }, { "epoch": 0.04, "grad_norm": 0.9576296806335449, "learning_rate": 9.99799544247209e-06, "loss": 0.6272, "step": 612 }, { "epoch": 0.04, "grad_norm": 1.0306636095046997, "learning_rate": 9.997966287631157e-06, "loss": 0.6418, "step": 613 }, { "epoch": 0.04, "grad_norm": 0.9254865050315857, "learning_rate": 9.997936922343253e-06, "loss": 0.6598, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.0606073141098022, "learning_rate": 9.997907346609608e-06, "loss": 0.6819, "step": 615 }, { "epoch": 0.04, "grad_norm": 0.9714949727058411, "learning_rate": 9.997877560431472e-06, "loss": 0.6894, "step": 616 }, { "epoch": 0.04, "grad_norm": 0.9910696148872375, "learning_rate": 9.9978475638101e-06, "loss": 0.6925, "step": 617 }, { "epoch": 0.04, "grad_norm": 0.986289918422699, "learning_rate": 9.997817356746751e-06, "loss": 0.6504, "step": 618 }, { "epoch": 0.04, "grad_norm": 0.9627223014831543, "learning_rate": 9.9977869392427e-06, "loss": 0.6327, "step": 619 }, { "epoch": 0.04, "grad_norm": 1.0320593118667603, "learning_rate": 9.997756311299229e-06, "loss": 0.711, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.0085593461990356, "learning_rate": 9.997725472917623e-06, "loss": 0.6989, "step": 621 }, { "epoch": 0.04, "grad_norm": 0.9825278520584106, "learning_rate": 9.997694424099184e-06, "loss": 0.6127, "step": 622 }, { "epoch": 0.04, "grad_norm": 0.9646775126457214, "learning_rate": 9.99766316484522e-06, "loss": 0.6, "step": 623 }, { "epoch": 0.04, "grad_norm": 0.963337242603302, "learning_rate": 9.997631695157043e-06, "loss": 0.668, "step": 624 }, { "epoch": 0.04, "grad_norm": 0.9412251710891724, "learning_rate": 9.997600015035982e-06, "loss": 0.7373, "step": 625 }, { "epoch": 0.04, "grad_norm": 0.9189504981040955, "learning_rate": 9.99756812448337e-06, "loss": 0.6178, "step": 626 }, { "epoch": 0.04, "grad_norm": 1.0134235620498657, "learning_rate": 9.99753602350055e-06, "loss": 0.7062, "step": 627 }, { "epoch": 0.04, "grad_norm": 0.9411819577217102, "learning_rate": 9.997503712088873e-06, "loss": 0.6587, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.1199616193771362, "learning_rate": 9.9974711902497e-06, "loss": 0.7078, "step": 629 }, { "epoch": 0.04, "grad_norm": 1.0165560245513916, "learning_rate": 9.997438457984398e-06, "loss": 0.7244, "step": 630 }, { "epoch": 0.04, "grad_norm": 0.9950897097587585, "learning_rate": 9.997405515294349e-06, "loss": 0.6421, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.0164552927017212, "learning_rate": 9.99737236218094e-06, "loss": 0.7025, "step": 632 }, { "epoch": 0.04, "grad_norm": 0.993774950504303, "learning_rate": 9.997338998645562e-06, "loss": 0.6816, "step": 633 }, { "epoch": 0.04, "grad_norm": 1.4380717277526855, "learning_rate": 9.997305424689626e-06, "loss": 0.6834, "step": 634 }, { "epoch": 0.04, "grad_norm": 0.9469321966171265, "learning_rate": 9.997271640314542e-06, "loss": 0.6764, "step": 635 }, { "epoch": 0.04, "grad_norm": 0.992761492729187, "learning_rate": 9.997237645521733e-06, "loss": 0.6855, "step": 636 }, { "epoch": 0.04, "grad_norm": 0.998432993888855, "learning_rate": 9.997203440312632e-06, "loss": 0.6508, "step": 637 }, { "epoch": 0.04, "grad_norm": 1.0937362909317017, "learning_rate": 9.997169024688678e-06, "loss": 0.6324, "step": 638 }, { "epoch": 0.04, "grad_norm": 0.9640938639640808, "learning_rate": 9.997134398651318e-06, "loss": 0.644, "step": 639 }, { "epoch": 0.04, "grad_norm": 0.98480224609375, "learning_rate": 9.997099562202015e-06, "loss": 0.6983, "step": 640 }, { "epoch": 0.04, "grad_norm": 0.9848492741584778, "learning_rate": 9.997064515342232e-06, "loss": 0.6504, "step": 641 }, { "epoch": 0.04, "grad_norm": 1.068900465965271, "learning_rate": 9.997029258073445e-06, "loss": 0.6797, "step": 642 }, { "epoch": 0.04, "grad_norm": 0.9294676780700684, "learning_rate": 9.99699379039714e-06, "loss": 0.6919, "step": 643 }, { "epoch": 0.04, "grad_norm": 1.054355263710022, "learning_rate": 9.996958112314811e-06, "loss": 0.6627, "step": 644 }, { "epoch": 0.04, "grad_norm": 0.9877650737762451, "learning_rate": 9.996922223827958e-06, "loss": 0.6641, "step": 645 }, { "epoch": 0.04, "grad_norm": 0.9646854400634766, "learning_rate": 9.996886124938092e-06, "loss": 0.689, "step": 646 }, { "epoch": 0.04, "grad_norm": 0.9384362101554871, "learning_rate": 9.996849815646736e-06, "loss": 0.6542, "step": 647 }, { "epoch": 0.04, "grad_norm": 0.9519203305244446, "learning_rate": 9.996813295955417e-06, "loss": 0.6862, "step": 648 }, { "epoch": 0.04, "grad_norm": 0.9722259640693665, "learning_rate": 9.996776565865671e-06, "loss": 0.647, "step": 649 }, { "epoch": 0.04, "grad_norm": 0.9623055458068848, "learning_rate": 9.996739625379049e-06, "loss": 0.66, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.0324175357818604, "learning_rate": 9.9967024744971e-06, "loss": 0.6562, "step": 651 }, { "epoch": 0.04, "grad_norm": 0.9541144371032715, "learning_rate": 9.996665113221396e-06, "loss": 0.6685, "step": 652 }, { "epoch": 0.04, "grad_norm": 0.9493923187255859, "learning_rate": 9.996627541553504e-06, "loss": 0.6545, "step": 653 }, { "epoch": 0.04, "grad_norm": 0.8991278409957886, "learning_rate": 9.996589759495008e-06, "loss": 0.627, "step": 654 }, { "epoch": 0.04, "grad_norm": 1.066519021987915, "learning_rate": 9.9965517670475e-06, "loss": 0.7353, "step": 655 }, { "epoch": 0.04, "grad_norm": 1.0149874687194824, "learning_rate": 9.996513564212577e-06, "loss": 0.6611, "step": 656 }, { "epoch": 0.04, "grad_norm": 0.9408120512962341, "learning_rate": 9.996475150991852e-06, "loss": 0.6596, "step": 657 }, { "epoch": 0.04, "grad_norm": 1.0212035179138184, "learning_rate": 9.99643652738694e-06, "loss": 0.7073, "step": 658 }, { "epoch": 0.04, "grad_norm": 1.066163182258606, "learning_rate": 9.996397693399465e-06, "loss": 0.6822, "step": 659 }, { "epoch": 0.04, "grad_norm": 0.9859758615493774, "learning_rate": 9.996358649031066e-06, "loss": 0.6691, "step": 660 }, { "epoch": 0.04, "grad_norm": 0.9595489501953125, "learning_rate": 9.996319394283384e-06, "loss": 0.6466, "step": 661 }, { "epoch": 0.04, "grad_norm": 1.0166726112365723, "learning_rate": 9.996279929158074e-06, "loss": 0.7591, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.0264617204666138, "learning_rate": 9.996240253656796e-06, "loss": 0.6632, "step": 663 }, { "epoch": 0.04, "grad_norm": 0.9634756445884705, "learning_rate": 9.996200367781224e-06, "loss": 0.6782, "step": 664 }, { "epoch": 0.04, "grad_norm": 0.9677413105964661, "learning_rate": 9.996160271533033e-06, "loss": 0.6686, "step": 665 }, { "epoch": 0.04, "grad_norm": 1.0524028539657593, "learning_rate": 9.996119964913914e-06, "loss": 0.6577, "step": 666 }, { "epoch": 0.04, "grad_norm": 1.0887914896011353, "learning_rate": 9.996079447925563e-06, "loss": 0.7091, "step": 667 }, { "epoch": 0.04, "grad_norm": 1.1000274419784546, "learning_rate": 9.996038720569688e-06, "loss": 0.6642, "step": 668 }, { "epoch": 0.04, "grad_norm": 0.8987544775009155, "learning_rate": 9.995997782848e-06, "loss": 0.6831, "step": 669 }, { "epoch": 0.04, "grad_norm": 0.9039768576622009, "learning_rate": 9.995956634762227e-06, "loss": 0.6322, "step": 670 }, { "epoch": 0.04, "grad_norm": 0.9678673148155212, "learning_rate": 9.995915276314099e-06, "loss": 0.6755, "step": 671 }, { "epoch": 0.04, "grad_norm": 1.0128499269485474, "learning_rate": 9.995873707505358e-06, "loss": 0.6625, "step": 672 }, { "epoch": 0.04, "grad_norm": 0.9772645235061646, "learning_rate": 9.995831928337756e-06, "loss": 0.6798, "step": 673 }, { "epoch": 0.04, "grad_norm": 0.9836506843566895, "learning_rate": 9.99578993881305e-06, "loss": 0.7197, "step": 674 }, { "epoch": 0.04, "grad_norm": 1.006994605064392, "learning_rate": 9.995747738933009e-06, "loss": 0.6148, "step": 675 }, { "epoch": 0.04, "grad_norm": 0.9389720559120178, "learning_rate": 9.995705328699408e-06, "loss": 0.7033, "step": 676 }, { "epoch": 0.04, "grad_norm": 1.027573823928833, "learning_rate": 9.995662708114036e-06, "loss": 0.7308, "step": 677 }, { "epoch": 0.04, "grad_norm": 1.0021096467971802, "learning_rate": 9.995619877178685e-06, "loss": 0.6952, "step": 678 }, { "epoch": 0.04, "grad_norm": 0.9737790822982788, "learning_rate": 9.99557683589516e-06, "loss": 0.741, "step": 679 }, { "epoch": 0.04, "grad_norm": 0.9685630202293396, "learning_rate": 9.995533584265273e-06, "loss": 0.6778, "step": 680 }, { "epoch": 0.04, "grad_norm": 1.057919979095459, "learning_rate": 9.995490122290845e-06, "loss": 0.7612, "step": 681 }, { "epoch": 0.04, "grad_norm": 0.9825607538223267, "learning_rate": 9.995446449973705e-06, "loss": 0.6765, "step": 682 }, { "epoch": 0.04, "grad_norm": 0.9538717269897461, "learning_rate": 9.995402567315695e-06, "loss": 0.6369, "step": 683 }, { "epoch": 0.04, "grad_norm": 0.9630232453346252, "learning_rate": 9.99535847431866e-06, "loss": 0.6993, "step": 684 }, { "epoch": 0.04, "grad_norm": 0.9161614179611206, "learning_rate": 9.995314170984457e-06, "loss": 0.6651, "step": 685 }, { "epoch": 0.04, "grad_norm": 1.0125501155853271, "learning_rate": 9.99526965731495e-06, "loss": 0.6547, "step": 686 }, { "epoch": 0.04, "grad_norm": 0.9450063109397888, "learning_rate": 9.995224933312016e-06, "loss": 0.6821, "step": 687 }, { "epoch": 0.04, "grad_norm": 0.9932665228843689, "learning_rate": 9.995179998977537e-06, "loss": 0.6668, "step": 688 }, { "epoch": 0.04, "grad_norm": 1.0881638526916504, "learning_rate": 9.995134854313407e-06, "loss": 0.6555, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.0094410181045532, "learning_rate": 9.995089499321521e-06, "loss": 0.7013, "step": 690 }, { "epoch": 0.04, "grad_norm": 0.9489861726760864, "learning_rate": 9.995043934003796e-06, "loss": 0.6614, "step": 691 }, { "epoch": 0.04, "grad_norm": 0.9359292984008789, "learning_rate": 9.994998158362148e-06, "loss": 0.6638, "step": 692 }, { "epoch": 0.04, "grad_norm": 1.031830906867981, "learning_rate": 9.994952172398502e-06, "loss": 0.7097, "step": 693 }, { "epoch": 0.04, "grad_norm": 0.9993519186973572, "learning_rate": 9.994905976114799e-06, "loss": 0.7518, "step": 694 }, { "epoch": 0.04, "grad_norm": 0.9693325757980347, "learning_rate": 9.994859569512978e-06, "loss": 0.6672, "step": 695 }, { "epoch": 0.04, "grad_norm": 0.9676175117492676, "learning_rate": 9.994812952594998e-06, "loss": 0.6357, "step": 696 }, { "epoch": 0.04, "grad_norm": 0.9201942682266235, "learning_rate": 9.994766125362821e-06, "loss": 0.6413, "step": 697 }, { "epoch": 0.04, "grad_norm": 0.9743843674659729, "learning_rate": 9.994719087818416e-06, "loss": 0.6913, "step": 698 }, { "epoch": 0.04, "grad_norm": 0.9933353066444397, "learning_rate": 9.994671839963766e-06, "loss": 0.645, "step": 699 }, { "epoch": 0.04, "grad_norm": 0.9530336856842041, "learning_rate": 9.994624381800861e-06, "loss": 0.6292, "step": 700 }, { "epoch": 0.04, "grad_norm": 0.9179254770278931, "learning_rate": 9.994576713331699e-06, "loss": 0.6347, "step": 701 }, { "epoch": 0.04, "grad_norm": 1.0408787727355957, "learning_rate": 9.994528834558285e-06, "loss": 0.7041, "step": 702 }, { "epoch": 0.04, "grad_norm": 0.9889572262763977, "learning_rate": 9.994480745482636e-06, "loss": 0.7333, "step": 703 }, { "epoch": 0.04, "grad_norm": 0.9241334795951843, "learning_rate": 9.99443244610678e-06, "loss": 0.6409, "step": 704 }, { "epoch": 0.04, "grad_norm": 0.9185009598731995, "learning_rate": 9.994383936432745e-06, "loss": 0.6136, "step": 705 }, { "epoch": 0.04, "grad_norm": 1.019323468208313, "learning_rate": 9.994335216462579e-06, "loss": 0.6535, "step": 706 }, { "epoch": 0.04, "grad_norm": 0.9755436182022095, "learning_rate": 9.99428628619833e-06, "loss": 0.666, "step": 707 }, { "epoch": 0.04, "grad_norm": 1.0832858085632324, "learning_rate": 9.994237145642058e-06, "loss": 0.731, "step": 708 }, { "epoch": 0.04, "grad_norm": 1.036895513534546, "learning_rate": 9.994187794795835e-06, "loss": 0.6424, "step": 709 }, { "epoch": 0.04, "grad_norm": 1.0125101804733276, "learning_rate": 9.994138233661737e-06, "loss": 0.7033, "step": 710 }, { "epoch": 0.05, "grad_norm": 0.9705720543861389, "learning_rate": 9.994088462241851e-06, "loss": 0.7013, "step": 711 }, { "epoch": 0.05, "grad_norm": 0.9837139248847961, "learning_rate": 9.994038480538274e-06, "loss": 0.6626, "step": 712 }, { "epoch": 0.05, "grad_norm": 1.0453715324401855, "learning_rate": 9.993988288553109e-06, "loss": 0.6823, "step": 713 }, { "epoch": 0.05, "grad_norm": 0.9410306811332703, "learning_rate": 9.993937886288471e-06, "loss": 0.6091, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.074299693107605, "learning_rate": 9.99388727374648e-06, "loss": 0.7144, "step": 715 }, { "epoch": 0.05, "grad_norm": 0.9971576929092407, "learning_rate": 9.993836450929268e-06, "loss": 0.6564, "step": 716 }, { "epoch": 0.05, "grad_norm": 0.9908046126365662, "learning_rate": 9.993785417838978e-06, "loss": 0.5828, "step": 717 }, { "epoch": 0.05, "grad_norm": 0.985888659954071, "learning_rate": 9.993734174477752e-06, "loss": 0.6428, "step": 718 }, { "epoch": 0.05, "grad_norm": 0.9888742566108704, "learning_rate": 9.993682720847755e-06, "loss": 0.6407, "step": 719 }, { "epoch": 0.05, "grad_norm": 1.1355136632919312, "learning_rate": 9.99363105695115e-06, "loss": 0.6762, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.0007946491241455, "learning_rate": 9.993579182790111e-06, "loss": 0.6832, "step": 721 }, { "epoch": 0.05, "grad_norm": 0.9047017097473145, "learning_rate": 9.993527098366826e-06, "loss": 0.6187, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.020936369895935, "learning_rate": 9.993474803683486e-06, "loss": 0.7007, "step": 723 }, { "epoch": 0.05, "grad_norm": 1.0052990913391113, "learning_rate": 9.993422298742293e-06, "loss": 0.6472, "step": 724 }, { "epoch": 0.05, "grad_norm": 0.9748853445053101, "learning_rate": 9.993369583545456e-06, "loss": 0.6705, "step": 725 }, { "epoch": 0.05, "grad_norm": 1.0031288862228394, "learning_rate": 9.993316658095198e-06, "loss": 0.6891, "step": 726 }, { "epoch": 0.05, "grad_norm": 1.018814206123352, "learning_rate": 9.993263522393745e-06, "loss": 0.7134, "step": 727 }, { "epoch": 0.05, "grad_norm": 0.9574296474456787, "learning_rate": 9.993210176443338e-06, "loss": 0.6798, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.0445259809494019, "learning_rate": 9.993156620246219e-06, "loss": 0.7278, "step": 729 }, { "epoch": 0.05, "grad_norm": 0.9974734783172607, "learning_rate": 9.993102853804643e-06, "loss": 0.687, "step": 730 }, { "epoch": 0.05, "grad_norm": 0.9887290000915527, "learning_rate": 9.993048877120876e-06, "loss": 0.6525, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.0144176483154297, "learning_rate": 9.992994690197192e-06, "loss": 0.6687, "step": 732 }, { "epoch": 0.05, "grad_norm": 0.9862350225448608, "learning_rate": 9.992940293035871e-06, "loss": 0.6795, "step": 733 }, { "epoch": 0.05, "grad_norm": 0.9350804090499878, "learning_rate": 9.992885685639203e-06, "loss": 0.6152, "step": 734 }, { "epoch": 0.05, "grad_norm": 0.937683641910553, "learning_rate": 9.992830868009487e-06, "loss": 0.6963, "step": 735 }, { "epoch": 0.05, "grad_norm": 0.9010510444641113, "learning_rate": 9.992775840149031e-06, "loss": 0.6196, "step": 736 }, { "epoch": 0.05, "grad_norm": 0.9523539543151855, "learning_rate": 9.992720602060155e-06, "loss": 0.6837, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.0255656242370605, "learning_rate": 9.992665153745182e-06, "loss": 0.7196, "step": 738 }, { "epoch": 0.05, "grad_norm": 0.9392181038856506, "learning_rate": 9.992609495206448e-06, "loss": 0.7169, "step": 739 }, { "epoch": 0.05, "grad_norm": 0.9734467267990112, "learning_rate": 9.992553626446296e-06, "loss": 0.6623, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.0188223123550415, "learning_rate": 9.992497547467079e-06, "loss": 0.7326, "step": 741 }, { "epoch": 0.05, "grad_norm": 0.9624093770980835, "learning_rate": 9.992441258271157e-06, "loss": 0.6977, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.082211971282959, "learning_rate": 9.992384758860902e-06, "loss": 0.6825, "step": 743 }, { "epoch": 0.05, "grad_norm": 0.9943745732307434, "learning_rate": 9.99232804923869e-06, "loss": 0.6755, "step": 744 }, { "epoch": 0.05, "grad_norm": 1.016251564025879, "learning_rate": 9.992271129406914e-06, "loss": 0.6997, "step": 745 }, { "epoch": 0.05, "grad_norm": 0.9673280715942383, "learning_rate": 9.992213999367965e-06, "loss": 0.6589, "step": 746 }, { "epoch": 0.05, "grad_norm": 0.984897255897522, "learning_rate": 9.992156659124253e-06, "loss": 0.6589, "step": 747 }, { "epoch": 0.05, "grad_norm": 0.991965651512146, "learning_rate": 9.99209910867819e-06, "loss": 0.6864, "step": 748 }, { "epoch": 0.05, "grad_norm": 0.8924134373664856, "learning_rate": 9.9920413480322e-06, "loss": 0.6364, "step": 749 }, { "epoch": 0.05, "grad_norm": 0.9772643446922302, "learning_rate": 9.991983377188715e-06, "loss": 0.6503, "step": 750 }, { "epoch": 0.05, "grad_norm": 0.9954730868339539, "learning_rate": 9.991925196150174e-06, "loss": 0.6672, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.0930321216583252, "learning_rate": 9.99186680491903e-06, "loss": 0.6267, "step": 752 }, { "epoch": 0.05, "grad_norm": 0.9928365349769592, "learning_rate": 9.99180820349774e-06, "loss": 0.7189, "step": 753 }, { "epoch": 0.05, "grad_norm": 0.9924033880233765, "learning_rate": 9.991749391888772e-06, "loss": 0.7041, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.0099812746047974, "learning_rate": 9.991690370094603e-06, "loss": 0.7206, "step": 755 }, { "epoch": 0.05, "grad_norm": 1.0289372205734253, "learning_rate": 9.991631138117715e-06, "loss": 0.7392, "step": 756 }, { "epoch": 0.05, "grad_norm": 1.0022187232971191, "learning_rate": 9.991571695960606e-06, "loss": 0.6903, "step": 757 }, { "epoch": 0.05, "grad_norm": 0.8819312453269958, "learning_rate": 9.991512043625777e-06, "loss": 0.6078, "step": 758 }, { "epoch": 0.05, "grad_norm": 0.9569171071052551, "learning_rate": 9.991452181115739e-06, "loss": 0.6521, "step": 759 }, { "epoch": 0.05, "grad_norm": 0.9458112120628357, "learning_rate": 9.991392108433016e-06, "loss": 0.7201, "step": 760 }, { "epoch": 0.05, "grad_norm": 0.9446436762809753, "learning_rate": 9.991331825580132e-06, "loss": 0.7091, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.0510190725326538, "learning_rate": 9.99127133255963e-06, "loss": 0.6936, "step": 762 }, { "epoch": 0.05, "grad_norm": 0.9923396110534668, "learning_rate": 9.991210629374058e-06, "loss": 0.65, "step": 763 }, { "epoch": 0.05, "grad_norm": 0.9962745904922485, "learning_rate": 9.991149716025967e-06, "loss": 0.6632, "step": 764 }, { "epoch": 0.05, "grad_norm": 0.9799250960350037, "learning_rate": 9.991088592517924e-06, "loss": 0.6631, "step": 765 }, { "epoch": 0.05, "grad_norm": 0.922505795955658, "learning_rate": 9.991027258852505e-06, "loss": 0.6594, "step": 766 }, { "epoch": 0.05, "grad_norm": 0.919065535068512, "learning_rate": 9.990965715032289e-06, "loss": 0.5974, "step": 767 }, { "epoch": 0.05, "grad_norm": 0.9268805384635925, "learning_rate": 9.99090396105987e-06, "loss": 0.6345, "step": 768 }, { "epoch": 0.05, "grad_norm": 0.9739397168159485, "learning_rate": 9.990841996937846e-06, "loss": 0.6757, "step": 769 }, { "epoch": 0.05, "grad_norm": 1.0037983655929565, "learning_rate": 9.990779822668827e-06, "loss": 0.714, "step": 770 }, { "epoch": 0.05, "grad_norm": 0.9694761633872986, "learning_rate": 9.990717438255435e-06, "loss": 0.6614, "step": 771 }, { "epoch": 0.05, "grad_norm": 0.962847113609314, "learning_rate": 9.99065484370029e-06, "loss": 0.7379, "step": 772 }, { "epoch": 0.05, "grad_norm": 0.9772030115127563, "learning_rate": 9.99059203900603e-06, "loss": 0.651, "step": 773 }, { "epoch": 0.05, "grad_norm": 0.9414916634559631, "learning_rate": 9.990529024175303e-06, "loss": 0.5766, "step": 774 }, { "epoch": 0.05, "grad_norm": 0.970845103263855, "learning_rate": 9.990465799210757e-06, "loss": 0.6302, "step": 775 }, { "epoch": 0.05, "grad_norm": 1.0166348218917847, "learning_rate": 9.99040236411506e-06, "loss": 0.745, "step": 776 }, { "epoch": 0.05, "grad_norm": 1.0761529207229614, "learning_rate": 9.990338718890878e-06, "loss": 0.6747, "step": 777 }, { "epoch": 0.05, "grad_norm": 0.9640312194824219, "learning_rate": 9.990274863540891e-06, "loss": 0.6303, "step": 778 }, { "epoch": 0.05, "grad_norm": 1.0130929946899414, "learning_rate": 9.990210798067792e-06, "loss": 0.6996, "step": 779 }, { "epoch": 0.05, "grad_norm": 1.0341368913650513, "learning_rate": 9.990146522474273e-06, "loss": 0.6914, "step": 780 }, { "epoch": 0.05, "grad_norm": 0.9964221119880676, "learning_rate": 9.990082036763046e-06, "loss": 0.6798, "step": 781 }, { "epoch": 0.05, "grad_norm": 0.9139377474784851, "learning_rate": 9.990017340936823e-06, "loss": 0.6492, "step": 782 }, { "epoch": 0.05, "grad_norm": 0.9424077868461609, "learning_rate": 9.989952434998328e-06, "loss": 0.7023, "step": 783 }, { "epoch": 0.05, "grad_norm": 0.9752070307731628, "learning_rate": 9.989887318950295e-06, "loss": 0.6834, "step": 784 }, { "epoch": 0.05, "grad_norm": 0.9262385964393616, "learning_rate": 9.989821992795467e-06, "loss": 0.667, "step": 785 }, { "epoch": 0.05, "grad_norm": 0.9452196955680847, "learning_rate": 9.989756456536593e-06, "loss": 0.6285, "step": 786 }, { "epoch": 0.05, "grad_norm": 1.0110949277877808, "learning_rate": 9.989690710176433e-06, "loss": 0.6696, "step": 787 }, { "epoch": 0.05, "grad_norm": 0.9881210327148438, "learning_rate": 9.989624753717752e-06, "loss": 0.6981, "step": 788 }, { "epoch": 0.05, "grad_norm": 0.9513158798217773, "learning_rate": 9.989558587163332e-06, "loss": 0.6507, "step": 789 }, { "epoch": 0.05, "grad_norm": 0.9474494457244873, "learning_rate": 9.989492210515958e-06, "loss": 0.6531, "step": 790 }, { "epoch": 0.05, "grad_norm": 0.9730693101882935, "learning_rate": 9.989425623778423e-06, "loss": 0.6714, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.0155802965164185, "learning_rate": 9.989358826953533e-06, "loss": 0.6823, "step": 792 }, { "epoch": 0.05, "grad_norm": 0.9369945526123047, "learning_rate": 9.989291820044099e-06, "loss": 0.6641, "step": 793 }, { "epoch": 0.05, "grad_norm": 0.9431589245796204, "learning_rate": 9.989224603052943e-06, "loss": 0.6021, "step": 794 }, { "epoch": 0.05, "grad_norm": 1.0468026399612427, "learning_rate": 9.989157175982896e-06, "loss": 0.7079, "step": 795 }, { "epoch": 0.05, "grad_norm": 0.9889234900474548, "learning_rate": 9.989089538836795e-06, "loss": 0.6192, "step": 796 }, { "epoch": 0.05, "grad_norm": 0.9767878651618958, "learning_rate": 9.98902169161749e-06, "loss": 0.6864, "step": 797 }, { "epoch": 0.05, "grad_norm": 0.9971264004707336, "learning_rate": 9.988953634327836e-06, "loss": 0.7311, "step": 798 }, { "epoch": 0.05, "grad_norm": 0.9408350586891174, "learning_rate": 9.988885366970701e-06, "loss": 0.7097, "step": 799 }, { "epoch": 0.05, "grad_norm": 0.9743333458900452, "learning_rate": 9.988816889548958e-06, "loss": 0.6899, "step": 800 }, { "epoch": 0.05, "grad_norm": 0.943428635597229, "learning_rate": 9.98874820206549e-06, "loss": 0.6598, "step": 801 }, { "epoch": 0.05, "grad_norm": 0.9481057524681091, "learning_rate": 9.988679304523192e-06, "loss": 0.6171, "step": 802 }, { "epoch": 0.05, "grad_norm": 0.9587128758430481, "learning_rate": 9.988610196924962e-06, "loss": 0.6731, "step": 803 }, { "epoch": 0.05, "grad_norm": 0.9046414494514465, "learning_rate": 9.98854087927371e-06, "loss": 0.6688, "step": 804 }, { "epoch": 0.05, "grad_norm": 0.9200078248977661, "learning_rate": 9.988471351572355e-06, "loss": 0.6444, "step": 805 }, { "epoch": 0.05, "grad_norm": 1.0290337800979614, "learning_rate": 9.988401613823825e-06, "loss": 0.6902, "step": 806 }, { "epoch": 0.05, "grad_norm": 0.9850117564201355, "learning_rate": 9.988331666031056e-06, "loss": 0.6844, "step": 807 }, { "epoch": 0.05, "grad_norm": 1.0165541172027588, "learning_rate": 9.988261508196994e-06, "loss": 0.6588, "step": 808 }, { "epoch": 0.05, "grad_norm": 0.9509625434875488, "learning_rate": 9.988191140324595e-06, "loss": 0.6395, "step": 809 }, { "epoch": 0.05, "grad_norm": 0.8759293556213379, "learning_rate": 9.988120562416817e-06, "loss": 0.6624, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.0014184713363647, "learning_rate": 9.988049774476636e-06, "loss": 0.658, "step": 811 }, { "epoch": 0.05, "grad_norm": 0.9247162938117981, "learning_rate": 9.98797877650703e-06, "loss": 0.6577, "step": 812 }, { "epoch": 0.05, "grad_norm": 0.9179931879043579, "learning_rate": 9.987907568510991e-06, "loss": 0.6404, "step": 813 }, { "epoch": 0.05, "grad_norm": 1.043821930885315, "learning_rate": 9.987836150491515e-06, "loss": 0.708, "step": 814 }, { "epoch": 0.05, "grad_norm": 1.0630831718444824, "learning_rate": 9.98776452245161e-06, "loss": 0.6716, "step": 815 }, { "epoch": 0.05, "grad_norm": 0.825951874256134, "learning_rate": 9.987692684394294e-06, "loss": 0.6198, "step": 816 }, { "epoch": 0.05, "grad_norm": 1.03840172290802, "learning_rate": 9.987620636322589e-06, "loss": 0.6358, "step": 817 }, { "epoch": 0.05, "grad_norm": 0.9848338961601257, "learning_rate": 9.987548378239529e-06, "loss": 0.7417, "step": 818 }, { "epoch": 0.05, "grad_norm": 1.0349116325378418, "learning_rate": 9.987475910148156e-06, "loss": 0.6809, "step": 819 }, { "epoch": 0.05, "grad_norm": 0.8631666898727417, "learning_rate": 9.987403232051525e-06, "loss": 0.6175, "step": 820 }, { "epoch": 0.05, "grad_norm": 0.972474992275238, "learning_rate": 9.987330343952692e-06, "loss": 0.6336, "step": 821 }, { "epoch": 0.05, "grad_norm": 0.9713488817214966, "learning_rate": 9.987257245854729e-06, "loss": 0.6455, "step": 822 }, { "epoch": 0.05, "grad_norm": 0.9748914241790771, "learning_rate": 9.987183937760713e-06, "loss": 0.6871, "step": 823 }, { "epoch": 0.05, "grad_norm": 0.9042195081710815, "learning_rate": 9.98711041967373e-06, "loss": 0.674, "step": 824 }, { "epoch": 0.05, "grad_norm": 0.9888205528259277, "learning_rate": 9.987036691596877e-06, "loss": 0.6587, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.0192331075668335, "learning_rate": 9.986962753533257e-06, "loss": 0.6968, "step": 826 }, { "epoch": 0.05, "grad_norm": 0.9436393976211548, "learning_rate": 9.986888605485983e-06, "loss": 0.615, "step": 827 }, { "epoch": 0.05, "grad_norm": 0.9654821753501892, "learning_rate": 9.986814247458177e-06, "loss": 0.6473, "step": 828 }, { "epoch": 0.05, "grad_norm": 1.0658471584320068, "learning_rate": 9.986739679452973e-06, "loss": 0.7004, "step": 829 }, { "epoch": 0.05, "grad_norm": 0.9651502966880798, "learning_rate": 9.986664901473508e-06, "loss": 0.5893, "step": 830 }, { "epoch": 0.05, "grad_norm": 0.9251554608345032, "learning_rate": 9.98658991352293e-06, "loss": 0.6431, "step": 831 }, { "epoch": 0.05, "grad_norm": 0.973141610622406, "learning_rate": 9.986514715604401e-06, "loss": 0.6458, "step": 832 }, { "epoch": 0.05, "grad_norm": 1.0335615873336792, "learning_rate": 9.986439307721083e-06, "loss": 0.6615, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.0050101280212402, "learning_rate": 9.98636368987615e-06, "loss": 0.6729, "step": 834 }, { "epoch": 0.05, "grad_norm": 0.9446513652801514, "learning_rate": 9.98628786207279e-06, "loss": 0.6221, "step": 835 }, { "epoch": 0.05, "grad_norm": 0.9160159230232239, "learning_rate": 9.986211824314193e-06, "loss": 0.6568, "step": 836 }, { "epoch": 0.05, "grad_norm": 0.9735708236694336, "learning_rate": 9.986135576603564e-06, "loss": 0.7023, "step": 837 }, { "epoch": 0.05, "grad_norm": 0.996498167514801, "learning_rate": 9.98605911894411e-06, "loss": 0.6713, "step": 838 }, { "epoch": 0.05, "grad_norm": 1.0343009233474731, "learning_rate": 9.985982451339054e-06, "loss": 0.6776, "step": 839 }, { "epoch": 0.05, "grad_norm": 0.9924929141998291, "learning_rate": 9.985905573791619e-06, "loss": 0.6904, "step": 840 }, { "epoch": 0.05, "grad_norm": 0.9932686686515808, "learning_rate": 9.985828486305046e-06, "loss": 0.692, "step": 841 }, { "epoch": 0.05, "grad_norm": 0.9492565989494324, "learning_rate": 9.98575118888258e-06, "loss": 0.6513, "step": 842 }, { "epoch": 0.05, "grad_norm": 0.9628197550773621, "learning_rate": 9.985673681527474e-06, "loss": 0.6683, "step": 843 }, { "epoch": 0.05, "grad_norm": 1.0421559810638428, "learning_rate": 9.985595964242996e-06, "loss": 0.6264, "step": 844 }, { "epoch": 0.05, "grad_norm": 1.0385863780975342, "learning_rate": 9.985518037032413e-06, "loss": 0.7485, "step": 845 }, { "epoch": 0.05, "grad_norm": 0.9698561429977417, "learning_rate": 9.98543989989901e-06, "loss": 0.6673, "step": 846 }, { "epoch": 0.05, "grad_norm": 1.1064468622207642, "learning_rate": 9.985361552846076e-06, "loss": 0.679, "step": 847 }, { "epoch": 0.05, "grad_norm": 0.9422234296798706, "learning_rate": 9.98528299587691e-06, "loss": 0.621, "step": 848 }, { "epoch": 0.05, "grad_norm": 0.9520366787910461, "learning_rate": 9.98520422899482e-06, "loss": 0.6739, "step": 849 }, { "epoch": 0.05, "grad_norm": 1.012834072113037, "learning_rate": 9.985125252203122e-06, "loss": 0.6486, "step": 850 }, { "epoch": 0.05, "grad_norm": 0.9515576958656311, "learning_rate": 9.985046065505141e-06, "loss": 0.6631, "step": 851 }, { "epoch": 0.05, "grad_norm": 0.9131986498832703, "learning_rate": 9.984966668904211e-06, "loss": 0.6183, "step": 852 }, { "epoch": 0.05, "grad_norm": 1.0273786783218384, "learning_rate": 9.984887062403678e-06, "loss": 0.6952, "step": 853 }, { "epoch": 0.05, "grad_norm": 1.0616415739059448, "learning_rate": 9.984807246006891e-06, "loss": 0.6604, "step": 854 }, { "epoch": 0.05, "grad_norm": 1.0264229774475098, "learning_rate": 9.984727219717212e-06, "loss": 0.6836, "step": 855 }, { "epoch": 0.05, "grad_norm": 0.9620240926742554, "learning_rate": 9.984646983538009e-06, "loss": 0.6725, "step": 856 }, { "epoch": 0.05, "grad_norm": 1.026843786239624, "learning_rate": 9.984566537472662e-06, "loss": 0.6973, "step": 857 }, { "epoch": 0.05, "grad_norm": 0.910517156124115, "learning_rate": 9.98448588152456e-06, "loss": 0.6826, "step": 858 }, { "epoch": 0.05, "grad_norm": 0.8792157769203186, "learning_rate": 9.984405015697097e-06, "loss": 0.6207, "step": 859 }, { "epoch": 0.05, "grad_norm": 1.026893973350525, "learning_rate": 9.984323939993678e-06, "loss": 0.6576, "step": 860 }, { "epoch": 0.05, "grad_norm": 0.9753056168556213, "learning_rate": 9.984242654417716e-06, "loss": 0.7123, "step": 861 }, { "epoch": 0.05, "grad_norm": 0.9340549111366272, "learning_rate": 9.984161158972636e-06, "loss": 0.6311, "step": 862 }, { "epoch": 0.05, "grad_norm": 0.958814799785614, "learning_rate": 9.984079453661869e-06, "loss": 0.6848, "step": 863 }, { "epoch": 0.05, "grad_norm": 1.02495539188385, "learning_rate": 9.983997538488851e-06, "loss": 0.6614, "step": 864 }, { "epoch": 0.05, "grad_norm": 0.9181699156761169, "learning_rate": 9.983915413457036e-06, "loss": 0.6182, "step": 865 }, { "epoch": 0.05, "grad_norm": 0.9128296375274658, "learning_rate": 9.983833078569883e-06, "loss": 0.6696, "step": 866 }, { "epoch": 0.05, "grad_norm": 0.9677926301956177, "learning_rate": 9.983750533830856e-06, "loss": 0.7208, "step": 867 }, { "epoch": 0.05, "grad_norm": 1.0382143259048462, "learning_rate": 9.98366777924343e-06, "loss": 0.7064, "step": 868 }, { "epoch": 0.06, "grad_norm": 1.0093235969543457, "learning_rate": 9.983584814811092e-06, "loss": 0.6637, "step": 869 }, { "epoch": 0.06, "grad_norm": 0.9793532490730286, "learning_rate": 9.983501640537333e-06, "loss": 0.7384, "step": 870 }, { "epoch": 0.06, "grad_norm": 0.9115772247314453, "learning_rate": 9.983418256425656e-06, "loss": 0.6931, "step": 871 }, { "epoch": 0.06, "grad_norm": 1.007819414138794, "learning_rate": 9.983334662479572e-06, "loss": 0.6364, "step": 872 }, { "epoch": 0.06, "grad_norm": 0.9625717997550964, "learning_rate": 9.983250858702603e-06, "loss": 0.6219, "step": 873 }, { "epoch": 0.06, "grad_norm": 0.9227074980735779, "learning_rate": 9.983166845098275e-06, "loss": 0.6472, "step": 874 }, { "epoch": 0.06, "grad_norm": 1.024123191833496, "learning_rate": 9.983082621670126e-06, "loss": 0.6085, "step": 875 }, { "epoch": 0.06, "grad_norm": 0.9824538826942444, "learning_rate": 9.982998188421702e-06, "loss": 0.7077, "step": 876 }, { "epoch": 0.06, "grad_norm": 0.9731464982032776, "learning_rate": 9.98291354535656e-06, "loss": 0.6783, "step": 877 }, { "epoch": 0.06, "grad_norm": 0.9325253367424011, "learning_rate": 9.982828692478261e-06, "loss": 0.6467, "step": 878 }, { "epoch": 0.06, "grad_norm": 1.0967223644256592, "learning_rate": 9.982743629790382e-06, "loss": 0.6342, "step": 879 }, { "epoch": 0.06, "grad_norm": 0.9630370140075684, "learning_rate": 9.982658357296502e-06, "loss": 0.6359, "step": 880 }, { "epoch": 0.06, "grad_norm": 0.9850766062736511, "learning_rate": 9.982572875000212e-06, "loss": 0.6571, "step": 881 }, { "epoch": 0.06, "grad_norm": 1.0202929973602295, "learning_rate": 9.98248718290511e-06, "loss": 0.7151, "step": 882 }, { "epoch": 0.06, "grad_norm": 0.9093936085700989, "learning_rate": 9.982401281014806e-06, "loss": 0.6123, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.017791748046875, "learning_rate": 9.982315169332918e-06, "loss": 0.7209, "step": 884 }, { "epoch": 0.06, "grad_norm": 1.0141305923461914, "learning_rate": 9.982228847863069e-06, "loss": 0.6597, "step": 885 }, { "epoch": 0.06, "grad_norm": 0.9242052435874939, "learning_rate": 9.982142316608897e-06, "loss": 0.6469, "step": 886 }, { "epoch": 0.06, "grad_norm": 0.9739558100700378, "learning_rate": 9.982055575574042e-06, "loss": 0.6735, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.027789831161499, "learning_rate": 9.981968624762159e-06, "loss": 0.7039, "step": 888 }, { "epoch": 0.06, "grad_norm": 1.0447962284088135, "learning_rate": 9.981881464176908e-06, "loss": 0.6446, "step": 889 }, { "epoch": 0.06, "grad_norm": 1.0435072183609009, "learning_rate": 9.981794093821957e-06, "loss": 0.6343, "step": 890 }, { "epoch": 0.06, "grad_norm": 0.9777998924255371, "learning_rate": 9.981706513700989e-06, "loss": 0.6912, "step": 891 }, { "epoch": 0.06, "grad_norm": 0.9571147561073303, "learning_rate": 9.98161872381769e-06, "loss": 0.6138, "step": 892 }, { "epoch": 0.06, "grad_norm": 0.9109900593757629, "learning_rate": 9.981530724175756e-06, "loss": 0.5711, "step": 893 }, { "epoch": 0.06, "grad_norm": 0.95176762342453, "learning_rate": 9.981442514778892e-06, "loss": 0.6553, "step": 894 }, { "epoch": 0.06, "grad_norm": 0.890281081199646, "learning_rate": 9.981354095630816e-06, "loss": 0.6194, "step": 895 }, { "epoch": 0.06, "grad_norm": 0.9268616437911987, "learning_rate": 9.981265466735244e-06, "loss": 0.6303, "step": 896 }, { "epoch": 0.06, "grad_norm": 0.961388885974884, "learning_rate": 9.981176628095913e-06, "loss": 0.6764, "step": 897 }, { "epoch": 0.06, "grad_norm": 1.0211546421051025, "learning_rate": 9.981087579716564e-06, "loss": 0.6545, "step": 898 }, { "epoch": 0.06, "grad_norm": 0.9296051859855652, "learning_rate": 9.980998321600944e-06, "loss": 0.613, "step": 899 }, { "epoch": 0.06, "grad_norm": 0.9536461234092712, "learning_rate": 9.98090885375281e-06, "loss": 0.7281, "step": 900 }, { "epoch": 0.06, "grad_norm": 0.9516177773475647, "learning_rate": 9.980819176175932e-06, "loss": 0.6636, "step": 901 }, { "epoch": 0.06, "grad_norm": 0.9591559767723083, "learning_rate": 9.980729288874088e-06, "loss": 0.6949, "step": 902 }, { "epoch": 0.06, "grad_norm": 1.0467829704284668, "learning_rate": 9.98063919185106e-06, "loss": 0.6268, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.0166997909545898, "learning_rate": 9.980548885110641e-06, "loss": 0.6232, "step": 904 }, { "epoch": 0.06, "grad_norm": 0.9611027240753174, "learning_rate": 9.980458368656635e-06, "loss": 0.7333, "step": 905 }, { "epoch": 0.06, "grad_norm": 0.9568818807601929, "learning_rate": 9.980367642492852e-06, "loss": 0.7402, "step": 906 }, { "epoch": 0.06, "grad_norm": 0.9771458506584167, "learning_rate": 9.980276706623114e-06, "loss": 0.6486, "step": 907 }, { "epoch": 0.06, "grad_norm": 0.8996546268463135, "learning_rate": 9.98018556105125e-06, "loss": 0.6198, "step": 908 }, { "epoch": 0.06, "grad_norm": 1.0799936056137085, "learning_rate": 9.980094205781094e-06, "loss": 0.6487, "step": 909 }, { "epoch": 0.06, "grad_norm": 0.9570757150650024, "learning_rate": 9.980002640816498e-06, "loss": 0.6478, "step": 910 }, { "epoch": 0.06, "grad_norm": 0.9597965478897095, "learning_rate": 9.979910866161313e-06, "loss": 0.7147, "step": 911 }, { "epoch": 0.06, "grad_norm": 0.9495083689689636, "learning_rate": 9.979818881819408e-06, "loss": 0.6683, "step": 912 }, { "epoch": 0.06, "grad_norm": 0.9899744987487793, "learning_rate": 9.979726687794651e-06, "loss": 0.752, "step": 913 }, { "epoch": 0.06, "grad_norm": 0.9405885934829712, "learning_rate": 9.979634284090927e-06, "loss": 0.6088, "step": 914 }, { "epoch": 0.06, "grad_norm": 0.9481942057609558, "learning_rate": 9.979541670712125e-06, "loss": 0.706, "step": 915 }, { "epoch": 0.06, "grad_norm": 1.053983211517334, "learning_rate": 9.979448847662148e-06, "loss": 0.6401, "step": 916 }, { "epoch": 0.06, "grad_norm": 0.963388204574585, "learning_rate": 9.979355814944901e-06, "loss": 0.7043, "step": 917 }, { "epoch": 0.06, "grad_norm": 1.0153307914733887, "learning_rate": 9.979262572564303e-06, "loss": 0.62, "step": 918 }, { "epoch": 0.06, "grad_norm": 0.9814804196357727, "learning_rate": 9.979169120524279e-06, "loss": 0.637, "step": 919 }, { "epoch": 0.06, "grad_norm": 0.9698815941810608, "learning_rate": 9.979075458828765e-06, "loss": 0.6741, "step": 920 }, { "epoch": 0.06, "grad_norm": 0.9134169816970825, "learning_rate": 9.978981587481705e-06, "loss": 0.6723, "step": 921 }, { "epoch": 0.06, "grad_norm": 1.1175658702850342, "learning_rate": 9.978887506487049e-06, "loss": 0.744, "step": 922 }, { "epoch": 0.06, "grad_norm": 0.9176881909370422, "learning_rate": 9.978793215848763e-06, "loss": 0.6377, "step": 923 }, { "epoch": 0.06, "grad_norm": 1.0039703845977783, "learning_rate": 9.978698715570814e-06, "loss": 0.6938, "step": 924 }, { "epoch": 0.06, "grad_norm": 0.969764232635498, "learning_rate": 9.97860400565718e-06, "loss": 0.6361, "step": 925 }, { "epoch": 0.06, "grad_norm": 0.9281071424484253, "learning_rate": 9.978509086111852e-06, "loss": 0.6727, "step": 926 }, { "epoch": 0.06, "grad_norm": 0.9164725542068481, "learning_rate": 9.978413956938824e-06, "loss": 0.6378, "step": 927 }, { "epoch": 0.06, "grad_norm": 0.9597803354263306, "learning_rate": 9.978318618142104e-06, "loss": 0.6448, "step": 928 }, { "epoch": 0.06, "grad_norm": 0.9781709313392639, "learning_rate": 9.978223069725706e-06, "loss": 0.626, "step": 929 }, { "epoch": 0.06, "grad_norm": 0.9814243912696838, "learning_rate": 9.978127311693653e-06, "loss": 0.5937, "step": 930 }, { "epoch": 0.06, "grad_norm": 0.9959214925765991, "learning_rate": 9.978031344049975e-06, "loss": 0.6537, "step": 931 }, { "epoch": 0.06, "grad_norm": 1.018471121788025, "learning_rate": 9.977935166798714e-06, "loss": 0.6903, "step": 932 }, { "epoch": 0.06, "grad_norm": 0.9168115854263306, "learning_rate": 9.977838779943921e-06, "loss": 0.6261, "step": 933 }, { "epoch": 0.06, "grad_norm": 1.0428775548934937, "learning_rate": 9.977742183489653e-06, "loss": 0.7146, "step": 934 }, { "epoch": 0.06, "grad_norm": 0.9918137192726135, "learning_rate": 9.977645377439979e-06, "loss": 0.6469, "step": 935 }, { "epoch": 0.06, "grad_norm": 0.9870752096176147, "learning_rate": 9.977548361798975e-06, "loss": 0.6586, "step": 936 }, { "epoch": 0.06, "grad_norm": 0.9916146993637085, "learning_rate": 9.977451136570722e-06, "loss": 0.6951, "step": 937 }, { "epoch": 0.06, "grad_norm": 0.894088089466095, "learning_rate": 9.97735370175932e-06, "loss": 0.6327, "step": 938 }, { "epoch": 0.06, "grad_norm": 0.9738256335258484, "learning_rate": 9.977256057368865e-06, "loss": 0.6731, "step": 939 }, { "epoch": 0.06, "grad_norm": 0.9997884631156921, "learning_rate": 9.977158203403475e-06, "loss": 0.6635, "step": 940 }, { "epoch": 0.06, "grad_norm": 0.9893252849578857, "learning_rate": 9.977060139867268e-06, "loss": 0.6278, "step": 941 }, { "epoch": 0.06, "grad_norm": 0.8766104578971863, "learning_rate": 9.97696186676437e-06, "loss": 0.5792, "step": 942 }, { "epoch": 0.06, "grad_norm": 0.9894992113113403, "learning_rate": 9.976863384098923e-06, "loss": 0.6855, "step": 943 }, { "epoch": 0.06, "grad_norm": 1.0352369546890259, "learning_rate": 9.976764691875072e-06, "loss": 0.7075, "step": 944 }, { "epoch": 0.06, "grad_norm": 0.996104896068573, "learning_rate": 9.976665790096971e-06, "loss": 0.6464, "step": 945 }, { "epoch": 0.06, "grad_norm": 0.9106736779212952, "learning_rate": 9.976566678768787e-06, "loss": 0.5984, "step": 946 }, { "epoch": 0.06, "grad_norm": 0.9899172186851501, "learning_rate": 9.976467357894693e-06, "loss": 0.6433, "step": 947 }, { "epoch": 0.06, "grad_norm": 0.9473981261253357, "learning_rate": 9.97636782747887e-06, "loss": 0.7038, "step": 948 }, { "epoch": 0.06, "grad_norm": 1.0071048736572266, "learning_rate": 9.976268087525509e-06, "loss": 0.7246, "step": 949 }, { "epoch": 0.06, "grad_norm": 0.9809601902961731, "learning_rate": 9.976168138038812e-06, "loss": 0.6786, "step": 950 }, { "epoch": 0.06, "grad_norm": 0.9237947463989258, "learning_rate": 9.976067979022983e-06, "loss": 0.685, "step": 951 }, { "epoch": 0.06, "grad_norm": 0.9583842754364014, "learning_rate": 9.975967610482243e-06, "loss": 0.6639, "step": 952 }, { "epoch": 0.06, "grad_norm": 0.9147073030471802, "learning_rate": 9.975867032420816e-06, "loss": 0.6512, "step": 953 }, { "epoch": 0.06, "grad_norm": 0.9555047750473022, "learning_rate": 9.97576624484294e-06, "loss": 0.7145, "step": 954 }, { "epoch": 0.06, "grad_norm": 0.9743271470069885, "learning_rate": 9.975665247752855e-06, "loss": 0.6743, "step": 955 }, { "epoch": 0.06, "grad_norm": 0.9561975598335266, "learning_rate": 9.975564041154817e-06, "loss": 0.6491, "step": 956 }, { "epoch": 0.06, "grad_norm": 0.971872091293335, "learning_rate": 9.975462625053085e-06, "loss": 0.696, "step": 957 }, { "epoch": 0.06, "grad_norm": 0.9306091666221619, "learning_rate": 9.97536099945193e-06, "loss": 0.6438, "step": 958 }, { "epoch": 0.06, "grad_norm": 0.9069042205810547, "learning_rate": 9.975259164355632e-06, "loss": 0.6829, "step": 959 }, { "epoch": 0.06, "grad_norm": 1.0041426420211792, "learning_rate": 9.97515711976848e-06, "loss": 0.6783, "step": 960 }, { "epoch": 0.06, "grad_norm": 1.1071757078170776, "learning_rate": 9.975054865694767e-06, "loss": 0.6785, "step": 961 }, { "epoch": 0.06, "grad_norm": 0.9503558278083801, "learning_rate": 9.9749524021388e-06, "loss": 0.7091, "step": 962 }, { "epoch": 0.06, "grad_norm": 0.9102316498756409, "learning_rate": 9.974849729104894e-06, "loss": 0.7051, "step": 963 }, { "epoch": 0.06, "grad_norm": 1.003288984298706, "learning_rate": 9.974746846597373e-06, "loss": 0.7456, "step": 964 }, { "epoch": 0.06, "grad_norm": 0.9375484585762024, "learning_rate": 9.974643754620567e-06, "loss": 0.6835, "step": 965 }, { "epoch": 0.06, "grad_norm": 1.0092264413833618, "learning_rate": 9.97454045317882e-06, "loss": 0.672, "step": 966 }, { "epoch": 0.06, "grad_norm": 0.9897353053092957, "learning_rate": 9.974436942276477e-06, "loss": 0.6498, "step": 967 }, { "epoch": 0.06, "grad_norm": 0.9781389236450195, "learning_rate": 9.974333221917903e-06, "loss": 0.6988, "step": 968 }, { "epoch": 0.06, "grad_norm": 0.8853163719177246, "learning_rate": 9.974229292107458e-06, "loss": 0.6391, "step": 969 }, { "epoch": 0.06, "grad_norm": 1.3123575448989868, "learning_rate": 9.974125152849523e-06, "loss": 0.689, "step": 970 }, { "epoch": 0.06, "grad_norm": 0.9711446166038513, "learning_rate": 9.974020804148482e-06, "loss": 0.645, "step": 971 }, { "epoch": 0.06, "grad_norm": 0.9429543614387512, "learning_rate": 9.973916246008727e-06, "loss": 0.6561, "step": 972 }, { "epoch": 0.06, "grad_norm": 0.9982245564460754, "learning_rate": 9.973811478434662e-06, "loss": 0.6915, "step": 973 }, { "epoch": 0.06, "grad_norm": 0.9405156373977661, "learning_rate": 9.9737065014307e-06, "loss": 0.6852, "step": 974 }, { "epoch": 0.06, "grad_norm": 0.9267737865447998, "learning_rate": 9.973601315001258e-06, "loss": 0.6724, "step": 975 }, { "epoch": 0.06, "grad_norm": 0.9884217977523804, "learning_rate": 9.973495919150766e-06, "loss": 0.6892, "step": 976 }, { "epoch": 0.06, "grad_norm": 0.8952025771141052, "learning_rate": 9.973390313883664e-06, "loss": 0.6149, "step": 977 }, { "epoch": 0.06, "grad_norm": 0.916035532951355, "learning_rate": 9.973284499204396e-06, "loss": 0.6147, "step": 978 }, { "epoch": 0.06, "grad_norm": 0.9775811433792114, "learning_rate": 9.973178475117419e-06, "loss": 0.6582, "step": 979 }, { "epoch": 0.06, "grad_norm": 0.942755401134491, "learning_rate": 9.973072241627196e-06, "loss": 0.7021, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.0862394571304321, "learning_rate": 9.972965798738202e-06, "loss": 0.7022, "step": 981 }, { "epoch": 0.06, "grad_norm": 0.8991437554359436, "learning_rate": 9.972859146454917e-06, "loss": 0.6008, "step": 982 }, { "epoch": 0.06, "grad_norm": 0.9662036895751953, "learning_rate": 9.972752284781832e-06, "loss": 0.6783, "step": 983 }, { "epoch": 0.06, "grad_norm": 0.9464378952980042, "learning_rate": 9.97264521372345e-06, "loss": 0.6452, "step": 984 }, { "epoch": 0.06, "grad_norm": 0.9118345975875854, "learning_rate": 9.972537933284274e-06, "loss": 0.6253, "step": 985 }, { "epoch": 0.06, "grad_norm": 0.9645686745643616, "learning_rate": 9.972430443468826e-06, "loss": 0.6812, "step": 986 }, { "epoch": 0.06, "grad_norm": 0.8795304894447327, "learning_rate": 9.972322744281628e-06, "loss": 0.5986, "step": 987 }, { "epoch": 0.06, "grad_norm": 0.9460269808769226, "learning_rate": 9.972214835727218e-06, "loss": 0.6976, "step": 988 }, { "epoch": 0.06, "grad_norm": 0.9464765191078186, "learning_rate": 9.972106717810137e-06, "loss": 0.6538, "step": 989 }, { "epoch": 0.06, "grad_norm": 0.9307100176811218, "learning_rate": 9.97199839053494e-06, "loss": 0.7183, "step": 990 }, { "epoch": 0.06, "grad_norm": 0.978036105632782, "learning_rate": 9.971889853906186e-06, "loss": 0.6879, "step": 991 }, { "epoch": 0.06, "grad_norm": 0.9477901458740234, "learning_rate": 9.971781107928447e-06, "loss": 0.6093, "step": 992 }, { "epoch": 0.06, "grad_norm": 0.9999047517776489, "learning_rate": 9.9716721526063e-06, "loss": 0.6431, "step": 993 }, { "epoch": 0.06, "grad_norm": 0.9746558666229248, "learning_rate": 9.971562987944336e-06, "loss": 0.6916, "step": 994 }, { "epoch": 0.06, "grad_norm": 0.9560214281082153, "learning_rate": 9.971453613947147e-06, "loss": 0.6746, "step": 995 }, { "epoch": 0.06, "grad_norm": 1.0287420749664307, "learning_rate": 9.971344030619342e-06, "loss": 0.6463, "step": 996 }, { "epoch": 0.06, "grad_norm": 0.9547033309936523, "learning_rate": 9.971234237965534e-06, "loss": 0.6567, "step": 997 }, { "epoch": 0.06, "grad_norm": 0.9555925130844116, "learning_rate": 9.971124235990346e-06, "loss": 0.6834, "step": 998 }, { "epoch": 0.06, "grad_norm": 0.9444142580032349, "learning_rate": 9.971014024698408e-06, "loss": 0.6531, "step": 999 }, { "epoch": 0.06, "grad_norm": 1.0372717380523682, "learning_rate": 9.970903604094365e-06, "loss": 0.6779, "step": 1000 }, { "epoch": 0.06, "grad_norm": 0.9465329051017761, "learning_rate": 9.970792974182863e-06, "loss": 0.6849, "step": 1001 }, { "epoch": 0.06, "grad_norm": 0.9567575454711914, "learning_rate": 9.97068213496856e-06, "loss": 0.6118, "step": 1002 }, { "epoch": 0.06, "grad_norm": 0.9264045357704163, "learning_rate": 9.970571086456124e-06, "loss": 0.6617, "step": 1003 }, { "epoch": 0.06, "grad_norm": 0.9825202226638794, "learning_rate": 9.970459828650232e-06, "loss": 0.6393, "step": 1004 }, { "epoch": 0.06, "grad_norm": 0.9990862011909485, "learning_rate": 9.970348361555566e-06, "loss": 0.633, "step": 1005 }, { "epoch": 0.06, "grad_norm": 0.9339586496353149, "learning_rate": 9.970236685176821e-06, "loss": 0.6845, "step": 1006 }, { "epoch": 0.06, "grad_norm": 0.9837610125541687, "learning_rate": 9.9701247995187e-06, "loss": 0.6371, "step": 1007 }, { "epoch": 0.06, "grad_norm": 1.0499521493911743, "learning_rate": 9.970012704585916e-06, "loss": 0.6754, "step": 1008 }, { "epoch": 0.06, "grad_norm": 1.0463135242462158, "learning_rate": 9.969900400383183e-06, "loss": 0.6943, "step": 1009 }, { "epoch": 0.06, "grad_norm": 0.9164577126502991, "learning_rate": 9.969787886915236e-06, "loss": 0.6393, "step": 1010 }, { "epoch": 0.06, "grad_norm": 0.9350469708442688, "learning_rate": 9.969675164186807e-06, "loss": 0.6697, "step": 1011 }, { "epoch": 0.06, "grad_norm": 0.9471501111984253, "learning_rate": 9.969562232202647e-06, "loss": 0.6617, "step": 1012 }, { "epoch": 0.06, "grad_norm": 0.9739314913749695, "learning_rate": 9.969449090967509e-06, "loss": 0.6864, "step": 1013 }, { "epoch": 0.06, "grad_norm": 0.9026748538017273, "learning_rate": 9.969335740486157e-06, "loss": 0.5679, "step": 1014 }, { "epoch": 0.06, "grad_norm": 0.9329193830490112, "learning_rate": 9.969222180763363e-06, "loss": 0.6714, "step": 1015 }, { "epoch": 0.06, "grad_norm": 1.0058557987213135, "learning_rate": 9.96910841180391e-06, "loss": 0.6131, "step": 1016 }, { "epoch": 0.06, "grad_norm": 1.0142805576324463, "learning_rate": 9.968994433612589e-06, "loss": 0.7093, "step": 1017 }, { "epoch": 0.06, "grad_norm": 0.9591155648231506, "learning_rate": 9.968880246194198e-06, "loss": 0.6828, "step": 1018 }, { "epoch": 0.06, "grad_norm": 0.9221545457839966, "learning_rate": 9.968765849553544e-06, "loss": 0.6824, "step": 1019 }, { "epoch": 0.06, "grad_norm": 0.8757246136665344, "learning_rate": 9.968651243695446e-06, "loss": 0.6282, "step": 1020 }, { "epoch": 0.06, "grad_norm": 1.0177953243255615, "learning_rate": 9.968536428624729e-06, "loss": 0.6673, "step": 1021 }, { "epoch": 0.06, "grad_norm": 0.9448785781860352, "learning_rate": 9.968421404346228e-06, "loss": 0.6768, "step": 1022 }, { "epoch": 0.06, "grad_norm": 1.0076022148132324, "learning_rate": 9.968306170864786e-06, "loss": 0.7581, "step": 1023 }, { "epoch": 0.06, "grad_norm": 1.0110529661178589, "learning_rate": 9.968190728185251e-06, "loss": 0.6518, "step": 1024 }, { "epoch": 0.06, "grad_norm": 0.8694904446601868, "learning_rate": 9.968075076312492e-06, "loss": 0.6453, "step": 1025 }, { "epoch": 0.07, "grad_norm": 0.9269656538963318, "learning_rate": 9.96795921525137e-06, "loss": 0.6668, "step": 1026 }, { "epoch": 0.07, "grad_norm": 0.9647197723388672, "learning_rate": 9.967843145006771e-06, "loss": 0.6586, "step": 1027 }, { "epoch": 0.07, "grad_norm": 1.0203245878219604, "learning_rate": 9.967726865583578e-06, "loss": 0.6558, "step": 1028 }, { "epoch": 0.07, "grad_norm": 0.9874720573425293, "learning_rate": 9.967610376986687e-06, "loss": 0.651, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.0147123336791992, "learning_rate": 9.967493679221006e-06, "loss": 0.7248, "step": 1030 }, { "epoch": 0.07, "grad_norm": 0.9211717844009399, "learning_rate": 9.967376772291446e-06, "loss": 0.6315, "step": 1031 }, { "epoch": 0.07, "grad_norm": 0.9259415864944458, "learning_rate": 9.96725965620293e-06, "loss": 0.6483, "step": 1032 }, { "epoch": 0.07, "grad_norm": 0.9673779010772705, "learning_rate": 9.96714233096039e-06, "loss": 0.648, "step": 1033 }, { "epoch": 0.07, "grad_norm": 0.9705022573471069, "learning_rate": 9.967024796568766e-06, "loss": 0.7079, "step": 1034 }, { "epoch": 0.07, "grad_norm": 0.9708682894706726, "learning_rate": 9.966907053033006e-06, "loss": 0.6949, "step": 1035 }, { "epoch": 0.07, "grad_norm": 0.9706398844718933, "learning_rate": 9.96678910035807e-06, "loss": 0.6657, "step": 1036 }, { "epoch": 0.07, "grad_norm": 0.9681613445281982, "learning_rate": 9.966670938548923e-06, "loss": 0.6909, "step": 1037 }, { "epoch": 0.07, "grad_norm": 1.0291481018066406, "learning_rate": 9.96655256761054e-06, "loss": 0.644, "step": 1038 }, { "epoch": 0.07, "grad_norm": 0.9305548667907715, "learning_rate": 9.966433987547906e-06, "loss": 0.6166, "step": 1039 }, { "epoch": 0.07, "grad_norm": 0.9347004890441895, "learning_rate": 9.966315198366011e-06, "loss": 0.6963, "step": 1040 }, { "epoch": 0.07, "grad_norm": 1.0041406154632568, "learning_rate": 9.966196200069863e-06, "loss": 0.6962, "step": 1041 }, { "epoch": 0.07, "grad_norm": 1.0330625772476196, "learning_rate": 9.966076992664469e-06, "loss": 0.6894, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.0031425952911377, "learning_rate": 9.965957576154848e-06, "loss": 0.6555, "step": 1043 }, { "epoch": 0.07, "grad_norm": 1.005267858505249, "learning_rate": 9.96583795054603e-06, "loss": 0.6799, "step": 1044 }, { "epoch": 0.07, "grad_norm": 0.9837890267372131, "learning_rate": 9.965718115843048e-06, "loss": 0.6881, "step": 1045 }, { "epoch": 0.07, "grad_norm": 0.983871340751648, "learning_rate": 9.965598072050953e-06, "loss": 0.6925, "step": 1046 }, { "epoch": 0.07, "grad_norm": 0.9536935687065125, "learning_rate": 9.965477819174796e-06, "loss": 0.6616, "step": 1047 }, { "epoch": 0.07, "grad_norm": 0.9697441458702087, "learning_rate": 9.96535735721964e-06, "loss": 0.6473, "step": 1048 }, { "epoch": 0.07, "grad_norm": 0.9311846494674683, "learning_rate": 9.965236686190563e-06, "loss": 0.734, "step": 1049 }, { "epoch": 0.07, "grad_norm": 0.9542213678359985, "learning_rate": 9.965115806092638e-06, "loss": 0.6183, "step": 1050 }, { "epoch": 0.07, "grad_norm": 0.940539538860321, "learning_rate": 9.96499471693096e-06, "loss": 0.64, "step": 1051 }, { "epoch": 0.07, "grad_norm": 0.9398403763771057, "learning_rate": 9.964873418710628e-06, "loss": 0.573, "step": 1052 }, { "epoch": 0.07, "grad_norm": 0.9776535034179688, "learning_rate": 9.964751911436748e-06, "loss": 0.6572, "step": 1053 }, { "epoch": 0.07, "grad_norm": 0.9758483171463013, "learning_rate": 9.964630195114432e-06, "loss": 0.6424, "step": 1054 }, { "epoch": 0.07, "grad_norm": 0.9551203846931458, "learning_rate": 9.964508269748814e-06, "loss": 0.6773, "step": 1055 }, { "epoch": 0.07, "grad_norm": 0.953066885471344, "learning_rate": 9.96438613534502e-06, "loss": 0.6551, "step": 1056 }, { "epoch": 0.07, "grad_norm": 1.0139051675796509, "learning_rate": 9.964263791908198e-06, "loss": 0.654, "step": 1057 }, { "epoch": 0.07, "grad_norm": 0.9728212952613831, "learning_rate": 9.964141239443497e-06, "loss": 0.612, "step": 1058 }, { "epoch": 0.07, "grad_norm": 0.9843549132347107, "learning_rate": 9.964018477956075e-06, "loss": 0.6867, "step": 1059 }, { "epoch": 0.07, "grad_norm": 0.9203978776931763, "learning_rate": 9.963895507451104e-06, "loss": 0.6567, "step": 1060 }, { "epoch": 0.07, "grad_norm": 0.9760221242904663, "learning_rate": 9.963772327933764e-06, "loss": 0.6684, "step": 1061 }, { "epoch": 0.07, "grad_norm": 0.9449279308319092, "learning_rate": 9.963648939409236e-06, "loss": 0.6811, "step": 1062 }, { "epoch": 0.07, "grad_norm": 0.8834384083747864, "learning_rate": 9.96352534188272e-06, "loss": 0.6195, "step": 1063 }, { "epoch": 0.07, "grad_norm": 0.9562472701072693, "learning_rate": 9.963401535359418e-06, "loss": 0.6353, "step": 1064 }, { "epoch": 0.07, "grad_norm": 0.9446646571159363, "learning_rate": 9.963277519844544e-06, "loss": 0.617, "step": 1065 }, { "epoch": 0.07, "grad_norm": 1.0598340034484863, "learning_rate": 9.963153295343319e-06, "loss": 0.7194, "step": 1066 }, { "epoch": 0.07, "grad_norm": 0.9084300994873047, "learning_rate": 9.963028861860975e-06, "loss": 0.6881, "step": 1067 }, { "epoch": 0.07, "grad_norm": 0.9703001976013184, "learning_rate": 9.962904219402752e-06, "loss": 0.7235, "step": 1068 }, { "epoch": 0.07, "grad_norm": 0.9932600259780884, "learning_rate": 9.962779367973896e-06, "loss": 0.6662, "step": 1069 }, { "epoch": 0.07, "grad_norm": 0.9670122265815735, "learning_rate": 9.962654307579665e-06, "loss": 0.6604, "step": 1070 }, { "epoch": 0.07, "grad_norm": 0.9765552282333374, "learning_rate": 9.962529038225324e-06, "loss": 0.7055, "step": 1071 }, { "epoch": 0.07, "grad_norm": 0.9492608904838562, "learning_rate": 9.962403559916149e-06, "loss": 0.6433, "step": 1072 }, { "epoch": 0.07, "grad_norm": 0.9491517543792725, "learning_rate": 9.962277872657422e-06, "loss": 0.6836, "step": 1073 }, { "epoch": 0.07, "grad_norm": 0.950981616973877, "learning_rate": 9.962151976454439e-06, "loss": 0.6404, "step": 1074 }, { "epoch": 0.07, "grad_norm": 1.0310513973236084, "learning_rate": 9.962025871312497e-06, "loss": 0.7125, "step": 1075 }, { "epoch": 0.07, "grad_norm": 0.9882835149765015, "learning_rate": 9.961899557236907e-06, "loss": 0.6861, "step": 1076 }, { "epoch": 0.07, "grad_norm": 0.9338645935058594, "learning_rate": 9.961773034232987e-06, "loss": 0.6555, "step": 1077 }, { "epoch": 0.07, "grad_norm": 0.9319179058074951, "learning_rate": 9.961646302306066e-06, "loss": 0.6467, "step": 1078 }, { "epoch": 0.07, "grad_norm": 0.9263783097267151, "learning_rate": 9.961519361461481e-06, "loss": 0.6384, "step": 1079 }, { "epoch": 0.07, "grad_norm": 0.9706323742866516, "learning_rate": 9.961392211704573e-06, "loss": 0.6704, "step": 1080 }, { "epoch": 0.07, "grad_norm": 0.9975467920303345, "learning_rate": 9.9612648530407e-06, "loss": 0.6444, "step": 1081 }, { "epoch": 0.07, "grad_norm": 0.9272286295890808, "learning_rate": 9.961137285475223e-06, "loss": 0.6558, "step": 1082 }, { "epoch": 0.07, "grad_norm": 0.9486019611358643, "learning_rate": 9.961009509013512e-06, "loss": 0.677, "step": 1083 }, { "epoch": 0.07, "grad_norm": 0.8935267329216003, "learning_rate": 9.96088152366095e-06, "loss": 0.6781, "step": 1084 }, { "epoch": 0.07, "grad_norm": 0.9707071781158447, "learning_rate": 9.960753329422925e-06, "loss": 0.6578, "step": 1085 }, { "epoch": 0.07, "grad_norm": 0.9859130382537842, "learning_rate": 9.960624926304834e-06, "loss": 0.6421, "step": 1086 }, { "epoch": 0.07, "grad_norm": 0.9117816090583801, "learning_rate": 9.960496314312085e-06, "loss": 0.6667, "step": 1087 }, { "epoch": 0.07, "grad_norm": 0.9339293837547302, "learning_rate": 9.96036749345009e-06, "loss": 0.6645, "step": 1088 }, { "epoch": 0.07, "grad_norm": 0.9403766393661499, "learning_rate": 9.960238463724278e-06, "loss": 0.6389, "step": 1089 }, { "epoch": 0.07, "grad_norm": 0.9280768632888794, "learning_rate": 9.96010922514008e-06, "loss": 0.6599, "step": 1090 }, { "epoch": 0.07, "grad_norm": 0.9073335528373718, "learning_rate": 9.959979777702935e-06, "loss": 0.6426, "step": 1091 }, { "epoch": 0.07, "grad_norm": 0.8961593508720398, "learning_rate": 9.959850121418298e-06, "loss": 0.6284, "step": 1092 }, { "epoch": 0.07, "grad_norm": 0.9164572954177856, "learning_rate": 9.959720256291626e-06, "loss": 0.6326, "step": 1093 }, { "epoch": 0.07, "grad_norm": 0.9504919052124023, "learning_rate": 9.959590182328387e-06, "loss": 0.6923, "step": 1094 }, { "epoch": 0.07, "grad_norm": 0.9429534077644348, "learning_rate": 9.95945989953406e-06, "loss": 0.6815, "step": 1095 }, { "epoch": 0.07, "grad_norm": 0.9838384389877319, "learning_rate": 9.959329407914129e-06, "loss": 0.6425, "step": 1096 }, { "epoch": 0.07, "grad_norm": 0.9929714798927307, "learning_rate": 9.959198707474087e-06, "loss": 0.6323, "step": 1097 }, { "epoch": 0.07, "grad_norm": 0.9384214878082275, "learning_rate": 9.959067798219442e-06, "loss": 0.6735, "step": 1098 }, { "epoch": 0.07, "grad_norm": 0.9922656416893005, "learning_rate": 9.958936680155702e-06, "loss": 0.6697, "step": 1099 }, { "epoch": 0.07, "grad_norm": 0.9068803787231445, "learning_rate": 9.958805353288388e-06, "loss": 0.6207, "step": 1100 }, { "epoch": 0.07, "grad_norm": 0.9993635416030884, "learning_rate": 9.958673817623033e-06, "loss": 0.6229, "step": 1101 }, { "epoch": 0.07, "grad_norm": 1.0061472654342651, "learning_rate": 9.958542073165172e-06, "loss": 0.6769, "step": 1102 }, { "epoch": 0.07, "grad_norm": 0.9775166511535645, "learning_rate": 9.958410119920355e-06, "loss": 0.6552, "step": 1103 }, { "epoch": 0.07, "grad_norm": 0.8982160091400146, "learning_rate": 9.958277957894137e-06, "loss": 0.635, "step": 1104 }, { "epoch": 0.07, "grad_norm": 0.9272829294204712, "learning_rate": 9.958145587092082e-06, "loss": 0.586, "step": 1105 }, { "epoch": 0.07, "grad_norm": 0.9862303137779236, "learning_rate": 9.958013007519764e-06, "loss": 0.7076, "step": 1106 }, { "epoch": 0.07, "grad_norm": 0.8938865661621094, "learning_rate": 9.957880219182767e-06, "loss": 0.5773, "step": 1107 }, { "epoch": 0.07, "grad_norm": 0.9800034761428833, "learning_rate": 9.957747222086682e-06, "loss": 0.7099, "step": 1108 }, { "epoch": 0.07, "grad_norm": 1.0491564273834229, "learning_rate": 9.957614016237106e-06, "loss": 0.6926, "step": 1109 }, { "epoch": 0.07, "grad_norm": 0.930266261100769, "learning_rate": 9.957480601639652e-06, "loss": 0.7086, "step": 1110 }, { "epoch": 0.07, "grad_norm": 0.9674487113952637, "learning_rate": 9.957346978299935e-06, "loss": 0.6541, "step": 1111 }, { "epoch": 0.07, "grad_norm": 1.0485286712646484, "learning_rate": 9.957213146223581e-06, "loss": 0.6749, "step": 1112 }, { "epoch": 0.07, "grad_norm": 0.913090169429779, "learning_rate": 9.957079105416228e-06, "loss": 0.648, "step": 1113 }, { "epoch": 0.07, "grad_norm": 0.9890965819358826, "learning_rate": 9.956944855883516e-06, "loss": 0.6896, "step": 1114 }, { "epoch": 0.07, "grad_norm": 0.9297420978546143, "learning_rate": 9.956810397631103e-06, "loss": 0.7024, "step": 1115 }, { "epoch": 0.07, "grad_norm": 1.0065919160842896, "learning_rate": 9.956675730664646e-06, "loss": 0.6224, "step": 1116 }, { "epoch": 0.07, "grad_norm": 0.992179274559021, "learning_rate": 9.956540854989817e-06, "loss": 0.6667, "step": 1117 }, { "epoch": 0.07, "grad_norm": 0.9736528992652893, "learning_rate": 9.956405770612295e-06, "loss": 0.6669, "step": 1118 }, { "epoch": 0.07, "grad_norm": 0.9248270988464355, "learning_rate": 9.956270477537768e-06, "loss": 0.6868, "step": 1119 }, { "epoch": 0.07, "grad_norm": 0.9713013172149658, "learning_rate": 9.956134975771934e-06, "loss": 0.6949, "step": 1120 }, { "epoch": 0.07, "grad_norm": 1.0682833194732666, "learning_rate": 9.955999265320495e-06, "loss": 0.7197, "step": 1121 }, { "epoch": 0.07, "grad_norm": 0.9445773363113403, "learning_rate": 9.95586334618917e-06, "loss": 0.6679, "step": 1122 }, { "epoch": 0.07, "grad_norm": 0.9021993279457092, "learning_rate": 9.95572721838368e-06, "loss": 0.6233, "step": 1123 }, { "epoch": 0.07, "grad_norm": 0.9834071397781372, "learning_rate": 9.955590881909753e-06, "loss": 0.6273, "step": 1124 }, { "epoch": 0.07, "grad_norm": 0.9453941583633423, "learning_rate": 9.955454336773136e-06, "loss": 0.6062, "step": 1125 }, { "epoch": 0.07, "grad_norm": 0.8219738602638245, "learning_rate": 9.955317582979575e-06, "loss": 0.5888, "step": 1126 }, { "epoch": 0.07, "grad_norm": 0.9637846350669861, "learning_rate": 9.95518062053483e-06, "loss": 0.6445, "step": 1127 }, { "epoch": 0.07, "grad_norm": 0.9680708050727844, "learning_rate": 9.955043449444665e-06, "loss": 0.6871, "step": 1128 }, { "epoch": 0.07, "grad_norm": 0.8683537840843201, "learning_rate": 9.95490606971486e-06, "loss": 0.6406, "step": 1129 }, { "epoch": 0.07, "grad_norm": 0.896604061126709, "learning_rate": 9.954768481351196e-06, "loss": 0.6409, "step": 1130 }, { "epoch": 0.07, "grad_norm": 0.9782860279083252, "learning_rate": 9.954630684359468e-06, "loss": 0.6409, "step": 1131 }, { "epoch": 0.07, "grad_norm": 1.0023294687271118, "learning_rate": 9.954492678745477e-06, "loss": 0.6668, "step": 1132 }, { "epoch": 0.07, "grad_norm": 0.9098303914070129, "learning_rate": 9.954354464515035e-06, "loss": 0.6047, "step": 1133 }, { "epoch": 0.07, "grad_norm": 0.9375096559524536, "learning_rate": 9.95421604167396e-06, "loss": 0.605, "step": 1134 }, { "epoch": 0.07, "grad_norm": 1.070643663406372, "learning_rate": 9.954077410228084e-06, "loss": 0.7336, "step": 1135 }, { "epoch": 0.07, "grad_norm": 0.941253125667572, "learning_rate": 9.95393857018324e-06, "loss": 0.6458, "step": 1136 }, { "epoch": 0.07, "grad_norm": 0.9726730585098267, "learning_rate": 9.95379952154528e-06, "loss": 0.6416, "step": 1137 }, { "epoch": 0.07, "grad_norm": 1.0929338932037354, "learning_rate": 9.953660264320053e-06, "loss": 0.7103, "step": 1138 }, { "epoch": 0.07, "grad_norm": 0.98173588514328, "learning_rate": 9.953520798513425e-06, "loss": 0.7664, "step": 1139 }, { "epoch": 0.07, "grad_norm": 0.9463081955909729, "learning_rate": 9.953381124131269e-06, "loss": 0.6432, "step": 1140 }, { "epoch": 0.07, "grad_norm": 0.967021644115448, "learning_rate": 9.953241241179462e-06, "loss": 0.6412, "step": 1141 }, { "epoch": 0.07, "grad_norm": 0.9214240312576294, "learning_rate": 9.953101149663902e-06, "loss": 0.6414, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.2380410432815552, "learning_rate": 9.95296084959048e-06, "loss": 0.5983, "step": 1143 }, { "epoch": 0.07, "grad_norm": 0.9872441291809082, "learning_rate": 9.952820340965109e-06, "loss": 0.653, "step": 1144 }, { "epoch": 0.07, "grad_norm": 0.8805240392684937, "learning_rate": 9.952679623793702e-06, "loss": 0.6532, "step": 1145 }, { "epoch": 0.07, "grad_norm": 0.9314813017845154, "learning_rate": 9.952538698082185e-06, "loss": 0.6649, "step": 1146 }, { "epoch": 0.07, "grad_norm": 0.9278802871704102, "learning_rate": 9.95239756383649e-06, "loss": 0.6832, "step": 1147 }, { "epoch": 0.07, "grad_norm": 0.9457370042800903, "learning_rate": 9.952256221062566e-06, "loss": 0.5822, "step": 1148 }, { "epoch": 0.07, "grad_norm": 1.0056676864624023, "learning_rate": 9.952114669766357e-06, "loss": 0.6653, "step": 1149 }, { "epoch": 0.07, "grad_norm": 0.877746045589447, "learning_rate": 9.951972909953828e-06, "loss": 0.5947, "step": 1150 }, { "epoch": 0.07, "grad_norm": 0.949008047580719, "learning_rate": 9.951830941630946e-06, "loss": 0.6857, "step": 1151 }, { "epoch": 0.07, "grad_norm": 1.0061525106430054, "learning_rate": 9.951688764803689e-06, "loss": 0.7064, "step": 1152 }, { "epoch": 0.07, "grad_norm": 1.1196439266204834, "learning_rate": 9.951546379478044e-06, "loss": 0.7059, "step": 1153 }, { "epoch": 0.07, "grad_norm": 1.003239393234253, "learning_rate": 9.951403785660005e-06, "loss": 0.7148, "step": 1154 }, { "epoch": 0.07, "grad_norm": 0.9646631479263306, "learning_rate": 9.95126098335558e-06, "loss": 0.6991, "step": 1155 }, { "epoch": 0.07, "grad_norm": 0.9327188730239868, "learning_rate": 9.951117972570776e-06, "loss": 0.6421, "step": 1156 }, { "epoch": 0.07, "grad_norm": 0.9369673132896423, "learning_rate": 9.95097475331162e-06, "loss": 0.7018, "step": 1157 }, { "epoch": 0.07, "grad_norm": 0.9307648539543152, "learning_rate": 9.950831325584138e-06, "loss": 0.6523, "step": 1158 }, { "epoch": 0.07, "grad_norm": 0.9541182518005371, "learning_rate": 9.950687689394373e-06, "loss": 0.6665, "step": 1159 }, { "epoch": 0.07, "grad_norm": 0.9604858756065369, "learning_rate": 9.950543844748372e-06, "loss": 0.6852, "step": 1160 }, { "epoch": 0.07, "grad_norm": 0.9503093957901001, "learning_rate": 9.950399791652193e-06, "loss": 0.6652, "step": 1161 }, { "epoch": 0.07, "grad_norm": 1.0395541191101074, "learning_rate": 9.950255530111896e-06, "loss": 0.7136, "step": 1162 }, { "epoch": 0.07, "grad_norm": 0.8791881799697876, "learning_rate": 9.950111060133562e-06, "loss": 0.6324, "step": 1163 }, { "epoch": 0.07, "grad_norm": 0.8195285201072693, "learning_rate": 9.94996638172327e-06, "loss": 0.6415, "step": 1164 }, { "epoch": 0.07, "grad_norm": 1.003089427947998, "learning_rate": 9.949821494887116e-06, "loss": 0.6959, "step": 1165 }, { "epoch": 0.07, "grad_norm": 0.9248343706130981, "learning_rate": 9.949676399631197e-06, "loss": 0.6429, "step": 1166 }, { "epoch": 0.07, "grad_norm": 0.9678802490234375, "learning_rate": 9.949531095961621e-06, "loss": 0.6973, "step": 1167 }, { "epoch": 0.07, "grad_norm": 0.968346118927002, "learning_rate": 9.94938558388451e-06, "loss": 0.6681, "step": 1168 }, { "epoch": 0.07, "grad_norm": 0.9063887000083923, "learning_rate": 9.94923986340599e-06, "loss": 0.6448, "step": 1169 }, { "epoch": 0.07, "grad_norm": 0.8939194083213806, "learning_rate": 9.949093934532196e-06, "loss": 0.6124, "step": 1170 }, { "epoch": 0.07, "grad_norm": 0.9329174757003784, "learning_rate": 9.948947797269275e-06, "loss": 0.6408, "step": 1171 }, { "epoch": 0.07, "grad_norm": 0.8747833967208862, "learning_rate": 9.948801451623376e-06, "loss": 0.6501, "step": 1172 }, { "epoch": 0.07, "grad_norm": 0.9457252621650696, "learning_rate": 9.948654897600664e-06, "loss": 0.6845, "step": 1173 }, { "epoch": 0.07, "grad_norm": 0.9753620028495789, "learning_rate": 9.94850813520731e-06, "loss": 0.6558, "step": 1174 }, { "epoch": 0.07, "grad_norm": 0.9322195053100586, "learning_rate": 9.948361164449493e-06, "loss": 0.6277, "step": 1175 }, { "epoch": 0.07, "grad_norm": 0.9404197335243225, "learning_rate": 9.948213985333403e-06, "loss": 0.6757, "step": 1176 }, { "epoch": 0.07, "grad_norm": 1.0010050535202026, "learning_rate": 9.948066597865234e-06, "loss": 0.6567, "step": 1177 }, { "epoch": 0.07, "grad_norm": 0.9383962750434875, "learning_rate": 9.947919002051194e-06, "loss": 0.6576, "step": 1178 }, { "epoch": 0.07, "grad_norm": 0.9984327554702759, "learning_rate": 9.947771197897495e-06, "loss": 0.6455, "step": 1179 }, { "epoch": 0.07, "grad_norm": 0.968433678150177, "learning_rate": 9.947623185410366e-06, "loss": 0.6596, "step": 1180 }, { "epoch": 0.07, "grad_norm": 0.9923335313796997, "learning_rate": 9.947474964596036e-06, "loss": 0.7174, "step": 1181 }, { "epoch": 0.07, "grad_norm": 0.9914311170578003, "learning_rate": 9.947326535460744e-06, "loss": 0.6712, "step": 1182 }, { "epoch": 0.07, "grad_norm": 0.9265934824943542, "learning_rate": 9.947177898010745e-06, "loss": 0.6429, "step": 1183 }, { "epoch": 0.08, "grad_norm": 0.9598346948623657, "learning_rate": 9.947029052252293e-06, "loss": 0.633, "step": 1184 }, { "epoch": 0.08, "grad_norm": 0.907012939453125, "learning_rate": 9.946879998191656e-06, "loss": 0.6516, "step": 1185 }, { "epoch": 0.08, "grad_norm": 0.9562612771987915, "learning_rate": 9.946730735835112e-06, "loss": 0.7378, "step": 1186 }, { "epoch": 0.08, "grad_norm": 0.9579821228981018, "learning_rate": 9.946581265188947e-06, "loss": 0.6426, "step": 1187 }, { "epoch": 0.08, "grad_norm": 0.9079206585884094, "learning_rate": 9.946431586259451e-06, "loss": 0.6513, "step": 1188 }, { "epoch": 0.08, "grad_norm": 0.9543782472610474, "learning_rate": 9.946281699052928e-06, "loss": 0.6829, "step": 1189 }, { "epoch": 0.08, "grad_norm": 0.9957901239395142, "learning_rate": 9.946131603575691e-06, "loss": 0.7518, "step": 1190 }, { "epoch": 0.08, "grad_norm": 0.971076250076294, "learning_rate": 9.945981299834058e-06, "loss": 0.6697, "step": 1191 }, { "epoch": 0.08, "grad_norm": 0.9861418008804321, "learning_rate": 9.945830787834358e-06, "loss": 0.6971, "step": 1192 }, { "epoch": 0.08, "grad_norm": 0.9396786093711853, "learning_rate": 9.945680067582928e-06, "loss": 0.6282, "step": 1193 }, { "epoch": 0.08, "grad_norm": 0.8945440053939819, "learning_rate": 9.945529139086116e-06, "loss": 0.6315, "step": 1194 }, { "epoch": 0.08, "grad_norm": 1.0057705640792847, "learning_rate": 9.945378002350277e-06, "loss": 0.6682, "step": 1195 }, { "epoch": 0.08, "grad_norm": 1.003580093383789, "learning_rate": 9.945226657381773e-06, "loss": 0.6859, "step": 1196 }, { "epoch": 0.08, "grad_norm": 0.8602601289749146, "learning_rate": 9.945075104186978e-06, "loss": 0.6245, "step": 1197 }, { "epoch": 0.08, "grad_norm": 0.8855172395706177, "learning_rate": 9.944923342772272e-06, "loss": 0.656, "step": 1198 }, { "epoch": 0.08, "grad_norm": 0.916766881942749, "learning_rate": 9.944771373144047e-06, "loss": 0.635, "step": 1199 }, { "epoch": 0.08, "grad_norm": 0.8637309670448303, "learning_rate": 9.944619195308701e-06, "loss": 0.6163, "step": 1200 }, { "epoch": 0.08, "grad_norm": 0.958526611328125, "learning_rate": 9.944466809272642e-06, "loss": 0.6275, "step": 1201 }, { "epoch": 0.08, "grad_norm": 0.900386393070221, "learning_rate": 9.944314215042286e-06, "loss": 0.6159, "step": 1202 }, { "epoch": 0.08, "grad_norm": 0.9339030385017395, "learning_rate": 9.944161412624059e-06, "loss": 0.6542, "step": 1203 }, { "epoch": 0.08, "grad_norm": 0.9049733281135559, "learning_rate": 9.944008402024395e-06, "loss": 0.688, "step": 1204 }, { "epoch": 0.08, "grad_norm": 1.030328392982483, "learning_rate": 9.943855183249734e-06, "loss": 0.6951, "step": 1205 }, { "epoch": 0.08, "grad_norm": 0.9944655299186707, "learning_rate": 9.94370175630653e-06, "loss": 0.7017, "step": 1206 }, { "epoch": 0.08, "grad_norm": 0.9828429222106934, "learning_rate": 9.943548121201243e-06, "loss": 0.6717, "step": 1207 }, { "epoch": 0.08, "grad_norm": 0.9837692975997925, "learning_rate": 9.943394277940344e-06, "loss": 0.6156, "step": 1208 }, { "epoch": 0.08, "grad_norm": 1.0148766040802002, "learning_rate": 9.943240226530306e-06, "loss": 0.7246, "step": 1209 }, { "epoch": 0.08, "grad_norm": 1.0045223236083984, "learning_rate": 9.94308596697762e-06, "loss": 0.6648, "step": 1210 }, { "epoch": 0.08, "grad_norm": 0.9967672824859619, "learning_rate": 9.942931499288779e-06, "loss": 0.6908, "step": 1211 }, { "epoch": 0.08, "grad_norm": 0.931037187576294, "learning_rate": 9.942776823470288e-06, "loss": 0.6155, "step": 1212 }, { "epoch": 0.08, "grad_norm": 0.9639803171157837, "learning_rate": 9.94262193952866e-06, "loss": 0.6644, "step": 1213 }, { "epoch": 0.08, "grad_norm": 0.9461570978164673, "learning_rate": 9.942466847470415e-06, "loss": 0.6588, "step": 1214 }, { "epoch": 0.08, "grad_norm": 0.8313033580780029, "learning_rate": 9.942311547302087e-06, "loss": 0.5843, "step": 1215 }, { "epoch": 0.08, "grad_norm": 0.9389255046844482, "learning_rate": 9.94215603903021e-06, "loss": 0.65, "step": 1216 }, { "epoch": 0.08, "grad_norm": 0.9553146362304688, "learning_rate": 9.942000322661339e-06, "loss": 0.6667, "step": 1217 }, { "epoch": 0.08, "grad_norm": 0.9988784193992615, "learning_rate": 9.941844398202022e-06, "loss": 0.6178, "step": 1218 }, { "epoch": 0.08, "grad_norm": 0.9160767197608948, "learning_rate": 9.941688265658832e-06, "loss": 0.6256, "step": 1219 }, { "epoch": 0.08, "grad_norm": 0.9464467167854309, "learning_rate": 9.941531925038337e-06, "loss": 0.6528, "step": 1220 }, { "epoch": 0.08, "grad_norm": 0.9644220471382141, "learning_rate": 9.941375376347124e-06, "loss": 0.6812, "step": 1221 }, { "epoch": 0.08, "grad_norm": 0.9486405849456787, "learning_rate": 9.941218619591783e-06, "loss": 0.6619, "step": 1222 }, { "epoch": 0.08, "grad_norm": 1.004610538482666, "learning_rate": 9.941061654778917e-06, "loss": 0.6183, "step": 1223 }, { "epoch": 0.08, "grad_norm": 0.991166889667511, "learning_rate": 9.940904481915132e-06, "loss": 0.6616, "step": 1224 }, { "epoch": 0.08, "grad_norm": 0.913848876953125, "learning_rate": 9.940747101007049e-06, "loss": 0.6455, "step": 1225 }, { "epoch": 0.08, "grad_norm": 0.8956865072250366, "learning_rate": 9.940589512061292e-06, "loss": 0.6446, "step": 1226 }, { "epoch": 0.08, "grad_norm": 0.9563295841217041, "learning_rate": 9.940431715084498e-06, "loss": 0.6442, "step": 1227 }, { "epoch": 0.08, "grad_norm": 0.8902249336242676, "learning_rate": 9.94027371008331e-06, "loss": 0.63, "step": 1228 }, { "epoch": 0.08, "grad_norm": 0.964384138584137, "learning_rate": 9.940115497064383e-06, "loss": 0.679, "step": 1229 }, { "epoch": 0.08, "grad_norm": 1.0036017894744873, "learning_rate": 9.939957076034379e-06, "loss": 0.6915, "step": 1230 }, { "epoch": 0.08, "grad_norm": 1.0034871101379395, "learning_rate": 9.939798446999965e-06, "loss": 0.6486, "step": 1231 }, { "epoch": 0.08, "grad_norm": 0.8999437689781189, "learning_rate": 9.939639609967825e-06, "loss": 0.6805, "step": 1232 }, { "epoch": 0.08, "grad_norm": 0.9058635830879211, "learning_rate": 9.939480564944642e-06, "loss": 0.6467, "step": 1233 }, { "epoch": 0.08, "grad_norm": 0.9270319938659668, "learning_rate": 9.939321311937117e-06, "loss": 0.636, "step": 1234 }, { "epoch": 0.08, "grad_norm": 0.9657304883003235, "learning_rate": 9.939161850951955e-06, "loss": 0.6824, "step": 1235 }, { "epoch": 0.08, "grad_norm": 0.9326258301734924, "learning_rate": 9.939002181995869e-06, "loss": 0.6247, "step": 1236 }, { "epoch": 0.08, "grad_norm": 0.8920637965202332, "learning_rate": 9.938842305075583e-06, "loss": 0.6373, "step": 1237 }, { "epoch": 0.08, "grad_norm": 0.9409562349319458, "learning_rate": 9.938682220197828e-06, "loss": 0.6077, "step": 1238 }, { "epoch": 0.08, "grad_norm": 0.8801417946815491, "learning_rate": 9.938521927369344e-06, "loss": 0.6141, "step": 1239 }, { "epoch": 0.08, "grad_norm": 0.9063442945480347, "learning_rate": 9.938361426596883e-06, "loss": 0.6628, "step": 1240 }, { "epoch": 0.08, "grad_norm": 0.9930490851402283, "learning_rate": 9.938200717887202e-06, "loss": 0.6316, "step": 1241 }, { "epoch": 0.08, "grad_norm": 0.969541609287262, "learning_rate": 9.938039801247066e-06, "loss": 0.6512, "step": 1242 }, { "epoch": 0.08, "grad_norm": 1.0176066160202026, "learning_rate": 9.937878676683254e-06, "loss": 0.706, "step": 1243 }, { "epoch": 0.08, "grad_norm": 0.9394564032554626, "learning_rate": 9.937717344202548e-06, "loss": 0.5894, "step": 1244 }, { "epoch": 0.08, "grad_norm": 0.9627434611320496, "learning_rate": 9.93755580381174e-06, "loss": 0.7024, "step": 1245 }, { "epoch": 0.08, "grad_norm": 0.8768373131752014, "learning_rate": 9.937394055517635e-06, "loss": 0.6532, "step": 1246 }, { "epoch": 0.08, "grad_norm": 0.941260039806366, "learning_rate": 9.937232099327044e-06, "loss": 0.5845, "step": 1247 }, { "epoch": 0.08, "grad_norm": 0.9536455869674683, "learning_rate": 9.937069935246782e-06, "loss": 0.6578, "step": 1248 }, { "epoch": 0.08, "grad_norm": 1.0150847434997559, "learning_rate": 9.93690756328368e-06, "loss": 0.6162, "step": 1249 }, { "epoch": 0.08, "grad_norm": 0.8984355926513672, "learning_rate": 9.936744983444576e-06, "loss": 0.6779, "step": 1250 }, { "epoch": 0.08, "grad_norm": 0.9334084987640381, "learning_rate": 9.936582195736314e-06, "loss": 0.6434, "step": 1251 }, { "epoch": 0.08, "grad_norm": 0.9719336628913879, "learning_rate": 9.936419200165748e-06, "loss": 0.608, "step": 1252 }, { "epoch": 0.08, "grad_norm": 0.9293937087059021, "learning_rate": 9.936255996739743e-06, "loss": 0.6417, "step": 1253 }, { "epoch": 0.08, "grad_norm": 0.9545564651489258, "learning_rate": 9.93609258546517e-06, "loss": 0.68, "step": 1254 }, { "epoch": 0.08, "grad_norm": 0.9403777122497559, "learning_rate": 9.93592896634891e-06, "loss": 0.6945, "step": 1255 }, { "epoch": 0.08, "grad_norm": 1.053970456123352, "learning_rate": 9.93576513939785e-06, "loss": 0.6557, "step": 1256 }, { "epoch": 0.08, "grad_norm": 0.9495333433151245, "learning_rate": 9.935601104618892e-06, "loss": 0.7352, "step": 1257 }, { "epoch": 0.08, "grad_norm": 0.9049481153488159, "learning_rate": 9.93543686201894e-06, "loss": 0.664, "step": 1258 }, { "epoch": 0.08, "grad_norm": 0.96930330991745, "learning_rate": 9.935272411604913e-06, "loss": 0.6956, "step": 1259 }, { "epoch": 0.08, "grad_norm": 0.8767880201339722, "learning_rate": 9.935107753383733e-06, "loss": 0.6015, "step": 1260 }, { "epoch": 0.08, "grad_norm": 0.9508046507835388, "learning_rate": 9.93494288736233e-06, "loss": 0.6727, "step": 1261 }, { "epoch": 0.08, "grad_norm": 0.9858577847480774, "learning_rate": 9.934777813547653e-06, "loss": 0.6565, "step": 1262 }, { "epoch": 0.08, "grad_norm": 0.9373133778572083, "learning_rate": 9.934612531946648e-06, "loss": 0.6131, "step": 1263 }, { "epoch": 0.08, "grad_norm": 0.9406293630599976, "learning_rate": 9.934447042566275e-06, "loss": 0.6567, "step": 1264 }, { "epoch": 0.08, "grad_norm": 0.990612804889679, "learning_rate": 9.934281345413504e-06, "loss": 0.6956, "step": 1265 }, { "epoch": 0.08, "grad_norm": 0.9480006694793701, "learning_rate": 9.934115440495311e-06, "loss": 0.6838, "step": 1266 }, { "epoch": 0.08, "grad_norm": 0.9584172964096069, "learning_rate": 9.93394932781868e-06, "loss": 0.6654, "step": 1267 }, { "epoch": 0.08, "grad_norm": 0.9526914954185486, "learning_rate": 9.933783007390608e-06, "loss": 0.6718, "step": 1268 }, { "epoch": 0.08, "grad_norm": 0.9477076530456543, "learning_rate": 9.933616479218095e-06, "loss": 0.6368, "step": 1269 }, { "epoch": 0.08, "grad_norm": 0.9243208765983582, "learning_rate": 9.933449743308155e-06, "loss": 0.6717, "step": 1270 }, { "epoch": 0.08, "grad_norm": 0.9876498579978943, "learning_rate": 9.93328279966781e-06, "loss": 0.6841, "step": 1271 }, { "epoch": 0.08, "grad_norm": 0.9659183025360107, "learning_rate": 9.933115648304087e-06, "loss": 0.6039, "step": 1272 }, { "epoch": 0.08, "grad_norm": 0.8511553406715393, "learning_rate": 9.932948289224025e-06, "loss": 0.6388, "step": 1273 }, { "epoch": 0.08, "grad_norm": 0.9214879274368286, "learning_rate": 9.932780722434671e-06, "loss": 0.6694, "step": 1274 }, { "epoch": 0.08, "grad_norm": 0.8896194696426392, "learning_rate": 9.932612947943084e-06, "loss": 0.6285, "step": 1275 }, { "epoch": 0.08, "grad_norm": 1.1229159832000732, "learning_rate": 9.932444965756321e-06, "loss": 0.6201, "step": 1276 }, { "epoch": 0.08, "grad_norm": 0.9542286396026611, "learning_rate": 9.93227677588146e-06, "loss": 0.6795, "step": 1277 }, { "epoch": 0.08, "grad_norm": 0.9124268293380737, "learning_rate": 9.932108378325582e-06, "loss": 0.6349, "step": 1278 }, { "epoch": 0.08, "grad_norm": 0.8908014893531799, "learning_rate": 9.931939773095779e-06, "loss": 0.6293, "step": 1279 }, { "epoch": 0.08, "grad_norm": 0.9545292258262634, "learning_rate": 9.93177096019915e-06, "loss": 0.6677, "step": 1280 }, { "epoch": 0.08, "grad_norm": 0.9467611908912659, "learning_rate": 9.9316019396428e-06, "loss": 0.7047, "step": 1281 }, { "epoch": 0.08, "grad_norm": 0.8944831490516663, "learning_rate": 9.931432711433849e-06, "loss": 0.6674, "step": 1282 }, { "epoch": 0.08, "grad_norm": 0.9431514739990234, "learning_rate": 9.93126327557942e-06, "loss": 0.7063, "step": 1283 }, { "epoch": 0.08, "grad_norm": 0.9257122874259949, "learning_rate": 9.931093632086651e-06, "loss": 0.6482, "step": 1284 }, { "epoch": 0.08, "grad_norm": 0.916651725769043, "learning_rate": 9.930923780962683e-06, "loss": 0.6141, "step": 1285 }, { "epoch": 0.08, "grad_norm": 0.9624120593070984, "learning_rate": 9.930753722214668e-06, "loss": 0.6743, "step": 1286 }, { "epoch": 0.08, "grad_norm": 0.8737559914588928, "learning_rate": 9.930583455849766e-06, "loss": 0.5961, "step": 1287 }, { "epoch": 0.08, "grad_norm": 1.0165629386901855, "learning_rate": 9.930412981875148e-06, "loss": 0.6855, "step": 1288 }, { "epoch": 0.08, "grad_norm": 0.9609097242355347, "learning_rate": 9.93024230029799e-06, "loss": 0.7235, "step": 1289 }, { "epoch": 0.08, "grad_norm": 0.9250974059104919, "learning_rate": 9.93007141112548e-06, "loss": 0.6568, "step": 1290 }, { "epoch": 0.08, "grad_norm": 0.9859768748283386, "learning_rate": 9.929900314364813e-06, "loss": 0.6838, "step": 1291 }, { "epoch": 0.08, "grad_norm": 0.9288014769554138, "learning_rate": 9.929729010023195e-06, "loss": 0.6676, "step": 1292 }, { "epoch": 0.08, "grad_norm": 0.9711349010467529, "learning_rate": 9.929557498107836e-06, "loss": 0.6951, "step": 1293 }, { "epoch": 0.08, "grad_norm": 0.9767155051231384, "learning_rate": 9.929385778625959e-06, "loss": 0.6707, "step": 1294 }, { "epoch": 0.08, "grad_norm": 0.9217318296432495, "learning_rate": 9.929213851584798e-06, "loss": 0.6735, "step": 1295 }, { "epoch": 0.08, "grad_norm": 0.9826382398605347, "learning_rate": 9.929041716991587e-06, "loss": 0.6452, "step": 1296 }, { "epoch": 0.08, "grad_norm": 0.9821561574935913, "learning_rate": 9.928869374853576e-06, "loss": 0.6308, "step": 1297 }, { "epoch": 0.08, "grad_norm": 0.9427945613861084, "learning_rate": 9.928696825178021e-06, "loss": 0.6526, "step": 1298 }, { "epoch": 0.08, "grad_norm": 0.9248101711273193, "learning_rate": 9.92852406797219e-06, "loss": 0.6777, "step": 1299 }, { "epoch": 0.08, "grad_norm": 1.0439354181289673, "learning_rate": 9.928351103243356e-06, "loss": 0.693, "step": 1300 }, { "epoch": 0.08, "grad_norm": 0.8999105095863342, "learning_rate": 9.928177930998801e-06, "loss": 0.6325, "step": 1301 }, { "epoch": 0.08, "grad_norm": 0.9729776382446289, "learning_rate": 9.928004551245818e-06, "loss": 0.6127, "step": 1302 }, { "epoch": 0.08, "grad_norm": 0.9380604028701782, "learning_rate": 9.927830963991704e-06, "loss": 0.6486, "step": 1303 }, { "epoch": 0.08, "grad_norm": 0.9806588888168335, "learning_rate": 9.927657169243773e-06, "loss": 0.7019, "step": 1304 }, { "epoch": 0.08, "grad_norm": 0.9600833654403687, "learning_rate": 9.92748316700934e-06, "loss": 0.7007, "step": 1305 }, { "epoch": 0.08, "grad_norm": 0.8898829817771912, "learning_rate": 9.927308957295733e-06, "loss": 0.6332, "step": 1306 }, { "epoch": 0.08, "grad_norm": 0.9268112182617188, "learning_rate": 9.927134540110286e-06, "loss": 0.6576, "step": 1307 }, { "epoch": 0.08, "grad_norm": 0.8421509861946106, "learning_rate": 9.926959915460344e-06, "loss": 0.6011, "step": 1308 }, { "epoch": 0.08, "grad_norm": 0.8830382823944092, "learning_rate": 9.926785083353258e-06, "loss": 0.5837, "step": 1309 }, { "epoch": 0.08, "grad_norm": 0.9646912217140198, "learning_rate": 9.926610043796394e-06, "loss": 0.6313, "step": 1310 }, { "epoch": 0.08, "grad_norm": 0.9010607004165649, "learning_rate": 9.926434796797117e-06, "loss": 0.622, "step": 1311 }, { "epoch": 0.08, "grad_norm": 0.976517379283905, "learning_rate": 9.92625934236281e-06, "loss": 0.6945, "step": 1312 }, { "epoch": 0.08, "grad_norm": 1.0138803720474243, "learning_rate": 9.92608368050086e-06, "loss": 0.6774, "step": 1313 }, { "epoch": 0.08, "grad_norm": 0.9496868848800659, "learning_rate": 9.925907811218661e-06, "loss": 0.6395, "step": 1314 }, { "epoch": 0.08, "grad_norm": 0.9517762064933777, "learning_rate": 9.925731734523621e-06, "loss": 0.6851, "step": 1315 }, { "epoch": 0.08, "grad_norm": 0.8854117393493652, "learning_rate": 9.925555450423153e-06, "loss": 0.6039, "step": 1316 }, { "epoch": 0.08, "grad_norm": 0.975888729095459, "learning_rate": 9.92537895892468e-06, "loss": 0.6392, "step": 1317 }, { "epoch": 0.08, "grad_norm": 0.9728052616119385, "learning_rate": 9.925202260035632e-06, "loss": 0.6934, "step": 1318 }, { "epoch": 0.08, "grad_norm": 0.8676881194114685, "learning_rate": 9.925025353763452e-06, "loss": 0.608, "step": 1319 }, { "epoch": 0.08, "grad_norm": 0.9743890762329102, "learning_rate": 9.924848240115585e-06, "loss": 0.6726, "step": 1320 }, { "epoch": 0.08, "grad_norm": 0.8968915939331055, "learning_rate": 9.924670919099493e-06, "loss": 0.6049, "step": 1321 }, { "epoch": 0.08, "grad_norm": 0.8923019766807556, "learning_rate": 9.92449339072264e-06, "loss": 0.6528, "step": 1322 }, { "epoch": 0.08, "grad_norm": 0.9763730764389038, "learning_rate": 9.924315654992501e-06, "loss": 0.6355, "step": 1323 }, { "epoch": 0.08, "grad_norm": 0.9337408542633057, "learning_rate": 9.924137711916559e-06, "loss": 0.6283, "step": 1324 }, { "epoch": 0.08, "grad_norm": 0.9374693036079407, "learning_rate": 9.92395956150231e-06, "loss": 0.6538, "step": 1325 }, { "epoch": 0.08, "grad_norm": 1.0116811990737915, "learning_rate": 9.923781203757253e-06, "loss": 0.6373, "step": 1326 }, { "epoch": 0.08, "grad_norm": 0.9618993997573853, "learning_rate": 9.923602638688897e-06, "loss": 0.6516, "step": 1327 }, { "epoch": 0.08, "grad_norm": 0.9200368523597717, "learning_rate": 9.923423866304761e-06, "loss": 0.6495, "step": 1328 }, { "epoch": 0.08, "grad_norm": 0.9342839121818542, "learning_rate": 9.923244886612375e-06, "loss": 0.6653, "step": 1329 }, { "epoch": 0.08, "grad_norm": 0.9107709527015686, "learning_rate": 9.923065699619273e-06, "loss": 0.6235, "step": 1330 }, { "epoch": 0.08, "grad_norm": 0.9179040193557739, "learning_rate": 9.922886305333e-06, "loss": 0.6449, "step": 1331 }, { "epoch": 0.08, "grad_norm": 0.9737808108329773, "learning_rate": 9.922706703761111e-06, "loss": 0.6355, "step": 1332 }, { "epoch": 0.08, "grad_norm": 0.9478714466094971, "learning_rate": 9.922526894911166e-06, "loss": 0.6301, "step": 1333 }, { "epoch": 0.08, "grad_norm": 0.9034336805343628, "learning_rate": 9.922346878790739e-06, "loss": 0.6711, "step": 1334 }, { "epoch": 0.08, "grad_norm": 0.9243572354316711, "learning_rate": 9.922166655407408e-06, "loss": 0.6703, "step": 1335 }, { "epoch": 0.08, "grad_norm": 0.9634230136871338, "learning_rate": 9.921986224768762e-06, "loss": 0.629, "step": 1336 }, { "epoch": 0.08, "grad_norm": 0.975191593170166, "learning_rate": 9.9218055868824e-06, "loss": 0.6973, "step": 1337 }, { "epoch": 0.08, "grad_norm": 0.9010855555534363, "learning_rate": 9.921624741755924e-06, "loss": 0.6279, "step": 1338 }, { "epoch": 0.08, "grad_norm": 0.9838567972183228, "learning_rate": 9.921443689396952e-06, "loss": 0.6527, "step": 1339 }, { "epoch": 0.08, "grad_norm": 0.9376393556594849, "learning_rate": 9.921262429813107e-06, "loss": 0.5999, "step": 1340 }, { "epoch": 0.08, "grad_norm": 1.01847505569458, "learning_rate": 9.921080963012021e-06, "loss": 0.637, "step": 1341 }, { "epoch": 0.09, "grad_norm": 0.9339948892593384, "learning_rate": 9.920899289001335e-06, "loss": 0.7109, "step": 1342 }, { "epoch": 0.09, "grad_norm": 0.8605727553367615, "learning_rate": 9.9207174077887e-06, "loss": 0.6148, "step": 1343 }, { "epoch": 0.09, "grad_norm": 0.9776952862739563, "learning_rate": 9.92053531938177e-06, "loss": 0.6101, "step": 1344 }, { "epoch": 0.09, "grad_norm": 0.9384501576423645, "learning_rate": 9.920353023788216e-06, "loss": 0.6861, "step": 1345 }, { "epoch": 0.09, "grad_norm": 1.0112212896347046, "learning_rate": 9.920170521015714e-06, "loss": 0.6201, "step": 1346 }, { "epoch": 0.09, "grad_norm": 0.993434488773346, "learning_rate": 9.919987811071946e-06, "loss": 0.7198, "step": 1347 }, { "epoch": 0.09, "grad_norm": 1.0135927200317383, "learning_rate": 9.919804893964607e-06, "loss": 0.7262, "step": 1348 }, { "epoch": 0.09, "grad_norm": 0.9492275714874268, "learning_rate": 9.9196217697014e-06, "loss": 0.6953, "step": 1349 }, { "epoch": 0.09, "grad_norm": 0.9331912398338318, "learning_rate": 9.919438438290032e-06, "loss": 0.6936, "step": 1350 }, { "epoch": 0.09, "grad_norm": 0.9214736223220825, "learning_rate": 9.919254899738227e-06, "loss": 0.6333, "step": 1351 }, { "epoch": 0.09, "grad_norm": 0.921393096446991, "learning_rate": 9.91907115405371e-06, "loss": 0.6357, "step": 1352 }, { "epoch": 0.09, "grad_norm": 0.867919385433197, "learning_rate": 9.918887201244219e-06, "loss": 0.6465, "step": 1353 }, { "epoch": 0.09, "grad_norm": 0.9736660718917847, "learning_rate": 9.918703041317498e-06, "loss": 0.6553, "step": 1354 }, { "epoch": 0.09, "grad_norm": 0.9158203601837158, "learning_rate": 9.918518674281305e-06, "loss": 0.6443, "step": 1355 }, { "epoch": 0.09, "grad_norm": 0.9227981567382812, "learning_rate": 9.9183341001434e-06, "loss": 0.6795, "step": 1356 }, { "epoch": 0.09, "grad_norm": 0.9615574479103088, "learning_rate": 9.918149318911557e-06, "loss": 0.6565, "step": 1357 }, { "epoch": 0.09, "grad_norm": 0.9969097375869751, "learning_rate": 9.917964330593553e-06, "loss": 0.653, "step": 1358 }, { "epoch": 0.09, "grad_norm": 0.9658487439155579, "learning_rate": 9.917779135197181e-06, "loss": 0.6578, "step": 1359 }, { "epoch": 0.09, "grad_norm": 0.9004530906677246, "learning_rate": 9.917593732730236e-06, "loss": 0.6623, "step": 1360 }, { "epoch": 0.09, "grad_norm": 1.01588773727417, "learning_rate": 9.917408123200527e-06, "loss": 0.6957, "step": 1361 }, { "epoch": 0.09, "grad_norm": 0.9513044357299805, "learning_rate": 9.917222306615868e-06, "loss": 0.5935, "step": 1362 }, { "epoch": 0.09, "grad_norm": 0.9240687489509583, "learning_rate": 9.917036282984084e-06, "loss": 0.6705, "step": 1363 }, { "epoch": 0.09, "grad_norm": 0.9190331697463989, "learning_rate": 9.916850052313007e-06, "loss": 0.6988, "step": 1364 }, { "epoch": 0.09, "grad_norm": 0.9454796314239502, "learning_rate": 9.916663614610478e-06, "loss": 0.6126, "step": 1365 }, { "epoch": 0.09, "grad_norm": 0.930030345916748, "learning_rate": 9.916476969884348e-06, "loss": 0.6659, "step": 1366 }, { "epoch": 0.09, "grad_norm": 0.8999435901641846, "learning_rate": 9.916290118142478e-06, "loss": 0.6424, "step": 1367 }, { "epoch": 0.09, "grad_norm": 0.9197795987129211, "learning_rate": 9.91610305939273e-06, "loss": 0.6463, "step": 1368 }, { "epoch": 0.09, "grad_norm": 0.8855273127555847, "learning_rate": 9.915915793642987e-06, "loss": 0.6438, "step": 1369 }, { "epoch": 0.09, "grad_norm": 0.992560863494873, "learning_rate": 9.91572832090113e-06, "loss": 0.6389, "step": 1370 }, { "epoch": 0.09, "grad_norm": 0.9714159965515137, "learning_rate": 9.915540641175055e-06, "loss": 0.6747, "step": 1371 }, { "epoch": 0.09, "grad_norm": 0.9169105291366577, "learning_rate": 9.915352754472662e-06, "loss": 0.6518, "step": 1372 }, { "epoch": 0.09, "grad_norm": 0.9008778929710388, "learning_rate": 9.915164660801865e-06, "loss": 0.6317, "step": 1373 }, { "epoch": 0.09, "grad_norm": 0.8850517272949219, "learning_rate": 9.914976360170583e-06, "loss": 0.6009, "step": 1374 }, { "epoch": 0.09, "grad_norm": 0.9355595707893372, "learning_rate": 9.914787852586744e-06, "loss": 0.6217, "step": 1375 }, { "epoch": 0.09, "grad_norm": 0.9078220129013062, "learning_rate": 9.914599138058285e-06, "loss": 0.6433, "step": 1376 }, { "epoch": 0.09, "grad_norm": 0.9317836761474609, "learning_rate": 9.914410216593154e-06, "loss": 0.6114, "step": 1377 }, { "epoch": 0.09, "grad_norm": 0.941766083240509, "learning_rate": 9.914221088199304e-06, "loss": 0.6318, "step": 1378 }, { "epoch": 0.09, "grad_norm": 0.9302542805671692, "learning_rate": 9.9140317528847e-06, "loss": 0.6385, "step": 1379 }, { "epoch": 0.09, "grad_norm": 0.9480794072151184, "learning_rate": 9.913842210657314e-06, "loss": 0.6457, "step": 1380 }, { "epoch": 0.09, "grad_norm": 0.9591321349143982, "learning_rate": 9.913652461525126e-06, "loss": 0.6889, "step": 1381 }, { "epoch": 0.09, "grad_norm": 0.9544585347175598, "learning_rate": 9.913462505496126e-06, "loss": 0.6533, "step": 1382 }, { "epoch": 0.09, "grad_norm": 0.9712944030761719, "learning_rate": 9.913272342578312e-06, "loss": 0.6232, "step": 1383 }, { "epoch": 0.09, "grad_norm": 0.9564849138259888, "learning_rate": 9.913081972779692e-06, "loss": 0.6481, "step": 1384 }, { "epoch": 0.09, "grad_norm": 0.9358051419258118, "learning_rate": 9.912891396108281e-06, "loss": 0.6599, "step": 1385 }, { "epoch": 0.09, "grad_norm": 0.9481112360954285, "learning_rate": 9.912700612572106e-06, "loss": 0.6148, "step": 1386 }, { "epoch": 0.09, "grad_norm": 0.9976912140846252, "learning_rate": 9.912509622179197e-06, "loss": 0.6802, "step": 1387 }, { "epoch": 0.09, "grad_norm": 0.9644153118133545, "learning_rate": 9.912318424937596e-06, "loss": 0.674, "step": 1388 }, { "epoch": 0.09, "grad_norm": 0.9347633719444275, "learning_rate": 9.912127020855356e-06, "loss": 0.6715, "step": 1389 }, { "epoch": 0.09, "grad_norm": 0.9877498149871826, "learning_rate": 9.911935409940536e-06, "loss": 0.6818, "step": 1390 }, { "epoch": 0.09, "grad_norm": 0.999534010887146, "learning_rate": 9.911743592201203e-06, "loss": 0.6524, "step": 1391 }, { "epoch": 0.09, "grad_norm": 0.9707819223403931, "learning_rate": 9.911551567645433e-06, "loss": 0.6186, "step": 1392 }, { "epoch": 0.09, "grad_norm": 0.9409770965576172, "learning_rate": 9.911359336281312e-06, "loss": 0.6754, "step": 1393 }, { "epoch": 0.09, "grad_norm": 0.9744927883148193, "learning_rate": 9.911166898116935e-06, "loss": 0.6842, "step": 1394 }, { "epoch": 0.09, "grad_norm": 0.8977870941162109, "learning_rate": 9.910974253160405e-06, "loss": 0.6315, "step": 1395 }, { "epoch": 0.09, "grad_norm": 0.9655022025108337, "learning_rate": 9.910781401419835e-06, "loss": 0.6493, "step": 1396 }, { "epoch": 0.09, "grad_norm": 0.9334004521369934, "learning_rate": 9.910588342903342e-06, "loss": 0.6679, "step": 1397 }, { "epoch": 0.09, "grad_norm": 0.9170674085617065, "learning_rate": 9.910395077619057e-06, "loss": 0.652, "step": 1398 }, { "epoch": 0.09, "grad_norm": 0.8829054832458496, "learning_rate": 9.910201605575116e-06, "loss": 0.5469, "step": 1399 }, { "epoch": 0.09, "grad_norm": 0.9742071032524109, "learning_rate": 9.910007926779669e-06, "loss": 0.6423, "step": 1400 }, { "epoch": 0.09, "grad_norm": 0.9407263398170471, "learning_rate": 9.909814041240867e-06, "loss": 0.7067, "step": 1401 }, { "epoch": 0.09, "grad_norm": 0.9770819544792175, "learning_rate": 9.909619948966875e-06, "loss": 0.6211, "step": 1402 }, { "epoch": 0.09, "grad_norm": 0.8862954378128052, "learning_rate": 9.909425649965869e-06, "loss": 0.6222, "step": 1403 }, { "epoch": 0.09, "grad_norm": 0.8932839035987854, "learning_rate": 9.909231144246026e-06, "loss": 0.6406, "step": 1404 }, { "epoch": 0.09, "grad_norm": 0.972990870475769, "learning_rate": 9.909036431815538e-06, "loss": 0.6454, "step": 1405 }, { "epoch": 0.09, "grad_norm": 0.9712089896202087, "learning_rate": 9.908841512682602e-06, "loss": 0.636, "step": 1406 }, { "epoch": 0.09, "grad_norm": 0.9432918429374695, "learning_rate": 9.908646386855427e-06, "loss": 0.639, "step": 1407 }, { "epoch": 0.09, "grad_norm": 0.896690845489502, "learning_rate": 9.90845105434223e-06, "loss": 0.6778, "step": 1408 }, { "epoch": 0.09, "grad_norm": 0.8984845280647278, "learning_rate": 9.908255515151232e-06, "loss": 0.6641, "step": 1409 }, { "epoch": 0.09, "grad_norm": 0.9165180921554565, "learning_rate": 9.90805976929067e-06, "loss": 0.6136, "step": 1410 }, { "epoch": 0.09, "grad_norm": 0.9825322031974792, "learning_rate": 9.907863816768786e-06, "loss": 0.68, "step": 1411 }, { "epoch": 0.09, "grad_norm": 0.9669419527053833, "learning_rate": 9.907667657593828e-06, "loss": 0.6393, "step": 1412 }, { "epoch": 0.09, "grad_norm": 0.9365913271903992, "learning_rate": 9.907471291774058e-06, "loss": 0.6369, "step": 1413 }, { "epoch": 0.09, "grad_norm": 0.9577059745788574, "learning_rate": 9.907274719317746e-06, "loss": 0.6428, "step": 1414 }, { "epoch": 0.09, "grad_norm": 0.9230369329452515, "learning_rate": 9.907077940233162e-06, "loss": 0.6102, "step": 1415 }, { "epoch": 0.09, "grad_norm": 1.0820754766464233, "learning_rate": 9.906880954528601e-06, "loss": 0.7506, "step": 1416 }, { "epoch": 0.09, "grad_norm": 0.98191237449646, "learning_rate": 9.90668376221235e-06, "loss": 0.6625, "step": 1417 }, { "epoch": 0.09, "grad_norm": 1.2556596994400024, "learning_rate": 9.906486363292718e-06, "loss": 0.6521, "step": 1418 }, { "epoch": 0.09, "grad_norm": 0.9278602004051208, "learning_rate": 9.906288757778012e-06, "loss": 0.6897, "step": 1419 }, { "epoch": 0.09, "grad_norm": 0.932048499584198, "learning_rate": 9.906090945676552e-06, "loss": 0.6475, "step": 1420 }, { "epoch": 0.09, "grad_norm": 0.9188955426216125, "learning_rate": 9.905892926996672e-06, "loss": 0.6607, "step": 1421 }, { "epoch": 0.09, "grad_norm": 0.8783012628555298, "learning_rate": 9.905694701746706e-06, "loss": 0.67, "step": 1422 }, { "epoch": 0.09, "grad_norm": 0.9844834208488464, "learning_rate": 9.905496269935002e-06, "loss": 0.6815, "step": 1423 }, { "epoch": 0.09, "grad_norm": 0.8767088055610657, "learning_rate": 9.905297631569915e-06, "loss": 0.6505, "step": 1424 }, { "epoch": 0.09, "grad_norm": 0.9569482207298279, "learning_rate": 9.905098786659809e-06, "loss": 0.6456, "step": 1425 }, { "epoch": 0.09, "grad_norm": 0.991873562335968, "learning_rate": 9.904899735213058e-06, "loss": 0.6747, "step": 1426 }, { "epoch": 0.09, "grad_norm": 0.8848119974136353, "learning_rate": 9.90470047723804e-06, "loss": 0.6224, "step": 1427 }, { "epoch": 0.09, "grad_norm": 0.8410341143608093, "learning_rate": 9.904501012743149e-06, "loss": 0.5621, "step": 1428 }, { "epoch": 0.09, "grad_norm": 0.951511561870575, "learning_rate": 9.90430134173678e-06, "loss": 0.651, "step": 1429 }, { "epoch": 0.09, "grad_norm": 0.93990159034729, "learning_rate": 9.904101464227342e-06, "loss": 0.6245, "step": 1430 }, { "epoch": 0.09, "grad_norm": 0.9799292087554932, "learning_rate": 9.903901380223254e-06, "loss": 0.6582, "step": 1431 }, { "epoch": 0.09, "grad_norm": 0.9411140084266663, "learning_rate": 9.903701089732937e-06, "loss": 0.6366, "step": 1432 }, { "epoch": 0.09, "grad_norm": 0.8953483700752258, "learning_rate": 9.903500592764825e-06, "loss": 0.5984, "step": 1433 }, { "epoch": 0.09, "grad_norm": 0.9429217576980591, "learning_rate": 9.903299889327362e-06, "loss": 0.6379, "step": 1434 }, { "epoch": 0.09, "grad_norm": 0.9783498644828796, "learning_rate": 9.903098979428998e-06, "loss": 0.6302, "step": 1435 }, { "epoch": 0.09, "grad_norm": 0.9483500719070435, "learning_rate": 9.902897863078192e-06, "loss": 0.5857, "step": 1436 }, { "epoch": 0.09, "grad_norm": 0.9564317464828491, "learning_rate": 9.902696540283414e-06, "loss": 0.6902, "step": 1437 }, { "epoch": 0.09, "grad_norm": 0.8706897497177124, "learning_rate": 9.90249501105314e-06, "loss": 0.5932, "step": 1438 }, { "epoch": 0.09, "grad_norm": 0.913366436958313, "learning_rate": 9.902293275395854e-06, "loss": 0.6527, "step": 1439 }, { "epoch": 0.09, "grad_norm": 0.9667909741401672, "learning_rate": 9.902091333320053e-06, "loss": 0.6133, "step": 1440 }, { "epoch": 0.09, "grad_norm": 0.9342280626296997, "learning_rate": 9.90188918483424e-06, "loss": 0.6631, "step": 1441 }, { "epoch": 0.09, "grad_norm": 0.9814968705177307, "learning_rate": 9.901686829946924e-06, "loss": 0.6715, "step": 1442 }, { "epoch": 0.09, "grad_norm": 0.9427680373191833, "learning_rate": 9.901484268666628e-06, "loss": 0.6623, "step": 1443 }, { "epoch": 0.09, "grad_norm": 0.9245345592498779, "learning_rate": 9.90128150100188e-06, "loss": 0.7061, "step": 1444 }, { "epoch": 0.09, "grad_norm": 0.9452770948410034, "learning_rate": 9.90107852696122e-06, "loss": 0.6333, "step": 1445 }, { "epoch": 0.09, "grad_norm": 1.0394012928009033, "learning_rate": 9.900875346553192e-06, "loss": 0.675, "step": 1446 }, { "epoch": 0.09, "grad_norm": 0.9171515107154846, "learning_rate": 9.900671959786352e-06, "loss": 0.6535, "step": 1447 }, { "epoch": 0.09, "grad_norm": 0.9841604828834534, "learning_rate": 9.900468366669264e-06, "loss": 0.6465, "step": 1448 }, { "epoch": 0.09, "grad_norm": 0.9761607050895691, "learning_rate": 9.900264567210501e-06, "loss": 0.6161, "step": 1449 }, { "epoch": 0.09, "grad_norm": 0.8922892808914185, "learning_rate": 9.900060561418643e-06, "loss": 0.6091, "step": 1450 }, { "epoch": 0.09, "grad_norm": 0.984734833240509, "learning_rate": 9.89985634930228e-06, "loss": 0.6684, "step": 1451 }, { "epoch": 0.09, "grad_norm": 0.9379397630691528, "learning_rate": 9.899651930870014e-06, "loss": 0.6442, "step": 1452 }, { "epoch": 0.09, "grad_norm": 0.9366482496261597, "learning_rate": 9.899447306130447e-06, "loss": 0.6081, "step": 1453 }, { "epoch": 0.09, "grad_norm": 1.0072358846664429, "learning_rate": 9.8992424750922e-06, "loss": 0.6793, "step": 1454 }, { "epoch": 0.09, "grad_norm": 0.9230679869651794, "learning_rate": 9.899037437763894e-06, "loss": 0.6299, "step": 1455 }, { "epoch": 0.09, "grad_norm": 0.9557128548622131, "learning_rate": 9.898832194154165e-06, "loss": 0.6412, "step": 1456 }, { "epoch": 0.09, "grad_norm": 0.8996137976646423, "learning_rate": 9.898626744271654e-06, "loss": 0.5912, "step": 1457 }, { "epoch": 0.09, "grad_norm": 0.9441683292388916, "learning_rate": 9.898421088125012e-06, "loss": 0.6139, "step": 1458 }, { "epoch": 0.09, "grad_norm": 0.9505671858787537, "learning_rate": 9.898215225722899e-06, "loss": 0.6811, "step": 1459 }, { "epoch": 0.09, "grad_norm": 1.0145865678787231, "learning_rate": 9.898009157073982e-06, "loss": 0.6746, "step": 1460 }, { "epoch": 0.09, "grad_norm": 0.9370318651199341, "learning_rate": 9.897802882186938e-06, "loss": 0.6384, "step": 1461 }, { "epoch": 0.09, "grad_norm": 0.890518307685852, "learning_rate": 9.897596401070452e-06, "loss": 0.6382, "step": 1462 }, { "epoch": 0.09, "grad_norm": 0.8634839653968811, "learning_rate": 9.89738971373322e-06, "loss": 0.6107, "step": 1463 }, { "epoch": 0.09, "grad_norm": 0.9820486307144165, "learning_rate": 9.897182820183944e-06, "loss": 0.6614, "step": 1464 }, { "epoch": 0.09, "grad_norm": 0.9493029117584229, "learning_rate": 9.896975720431334e-06, "loss": 0.6184, "step": 1465 }, { "epoch": 0.09, "grad_norm": 0.9800208210945129, "learning_rate": 9.896768414484115e-06, "loss": 0.6639, "step": 1466 }, { "epoch": 0.09, "grad_norm": 0.9691864848136902, "learning_rate": 9.896560902351009e-06, "loss": 0.6536, "step": 1467 }, { "epoch": 0.09, "grad_norm": 0.964766263961792, "learning_rate": 9.89635318404076e-06, "loss": 0.6796, "step": 1468 }, { "epoch": 0.09, "grad_norm": 0.8742868900299072, "learning_rate": 9.896145259562111e-06, "loss": 0.6627, "step": 1469 }, { "epoch": 0.09, "grad_norm": 0.9007435441017151, "learning_rate": 9.895937128923816e-06, "loss": 0.6388, "step": 1470 }, { "epoch": 0.09, "grad_norm": 0.931812584400177, "learning_rate": 9.895728792134642e-06, "loss": 0.6514, "step": 1471 }, { "epoch": 0.09, "grad_norm": 0.9516260027885437, "learning_rate": 9.895520249203358e-06, "loss": 0.6737, "step": 1472 }, { "epoch": 0.09, "grad_norm": 0.9379490613937378, "learning_rate": 9.895311500138749e-06, "loss": 0.6273, "step": 1473 }, { "epoch": 0.09, "grad_norm": 0.9107145667076111, "learning_rate": 9.8951025449496e-06, "loss": 0.6562, "step": 1474 }, { "epoch": 0.09, "grad_norm": 0.8951176404953003, "learning_rate": 9.894893383644713e-06, "loss": 0.5977, "step": 1475 }, { "epoch": 0.09, "grad_norm": 0.9197559356689453, "learning_rate": 9.894684016232893e-06, "loss": 0.6614, "step": 1476 }, { "epoch": 0.09, "grad_norm": 0.9731332063674927, "learning_rate": 9.894474442722956e-06, "loss": 0.6992, "step": 1477 }, { "epoch": 0.09, "grad_norm": 0.9306113719940186, "learning_rate": 9.89426466312373e-06, "loss": 0.6351, "step": 1478 }, { "epoch": 0.09, "grad_norm": 0.9742302298545837, "learning_rate": 9.89405467744404e-06, "loss": 0.7297, "step": 1479 }, { "epoch": 0.09, "grad_norm": 0.891633927822113, "learning_rate": 9.893844485692736e-06, "loss": 0.6004, "step": 1480 }, { "epoch": 0.09, "grad_norm": 1.000712275505066, "learning_rate": 9.893634087878665e-06, "loss": 0.6486, "step": 1481 }, { "epoch": 0.09, "grad_norm": 0.9697219729423523, "learning_rate": 9.893423484010685e-06, "loss": 0.6353, "step": 1482 }, { "epoch": 0.09, "grad_norm": 0.9428305625915527, "learning_rate": 9.893212674097666e-06, "loss": 0.6327, "step": 1483 }, { "epoch": 0.09, "grad_norm": 0.8775177001953125, "learning_rate": 9.893001658148482e-06, "loss": 0.5795, "step": 1484 }, { "epoch": 0.09, "grad_norm": 0.8842799663543701, "learning_rate": 9.892790436172022e-06, "loss": 0.6095, "step": 1485 }, { "epoch": 0.09, "grad_norm": 0.9339465498924255, "learning_rate": 9.892579008177176e-06, "loss": 0.6589, "step": 1486 }, { "epoch": 0.09, "grad_norm": 0.940681517124176, "learning_rate": 9.892367374172849e-06, "loss": 0.7008, "step": 1487 }, { "epoch": 0.09, "grad_norm": 0.942547619342804, "learning_rate": 9.89215553416795e-06, "loss": 0.6456, "step": 1488 }, { "epoch": 0.09, "grad_norm": 0.9299596548080444, "learning_rate": 9.8919434881714e-06, "loss": 0.6347, "step": 1489 }, { "epoch": 0.09, "grad_norm": 0.9081819653511047, "learning_rate": 9.891731236192127e-06, "loss": 0.6871, "step": 1490 }, { "epoch": 0.09, "grad_norm": 0.903308093547821, "learning_rate": 9.89151877823907e-06, "loss": 0.644, "step": 1491 }, { "epoch": 0.09, "grad_norm": 0.8742372989654541, "learning_rate": 9.891306114321175e-06, "loss": 0.6389, "step": 1492 }, { "epoch": 0.09, "grad_norm": 0.9535795450210571, "learning_rate": 9.891093244447393e-06, "loss": 0.6408, "step": 1493 }, { "epoch": 0.09, "grad_norm": 0.9566690921783447, "learning_rate": 9.890880168626691e-06, "loss": 0.6521, "step": 1494 }, { "epoch": 0.09, "grad_norm": 0.9142457246780396, "learning_rate": 9.890666886868038e-06, "loss": 0.6411, "step": 1495 }, { "epoch": 0.09, "grad_norm": 0.8510489463806152, "learning_rate": 9.890453399180415e-06, "loss": 0.6156, "step": 1496 }, { "epoch": 0.09, "grad_norm": 0.9929180145263672, "learning_rate": 9.890239705572815e-06, "loss": 0.6782, "step": 1497 }, { "epoch": 0.09, "grad_norm": 0.93791264295578, "learning_rate": 9.89002580605423e-06, "loss": 0.6499, "step": 1498 }, { "epoch": 0.09, "grad_norm": 0.9413290619850159, "learning_rate": 9.88981170063367e-06, "loss": 0.6317, "step": 1499 }, { "epoch": 0.1, "grad_norm": 0.9057697057723999, "learning_rate": 9.88959738932015e-06, "loss": 0.5684, "step": 1500 }, { "epoch": 0.1, "grad_norm": 0.9506174921989441, "learning_rate": 9.889382872122693e-06, "loss": 0.7017, "step": 1501 }, { "epoch": 0.1, "grad_norm": 0.9269284605979919, "learning_rate": 9.889168149050334e-06, "loss": 0.6496, "step": 1502 }, { "epoch": 0.1, "grad_norm": 0.9708095192909241, "learning_rate": 9.88895322011211e-06, "loss": 0.7373, "step": 1503 }, { "epoch": 0.1, "grad_norm": 0.9477187991142273, "learning_rate": 9.888738085317075e-06, "loss": 0.7015, "step": 1504 }, { "epoch": 0.1, "grad_norm": 0.8957401514053345, "learning_rate": 9.888522744674286e-06, "loss": 0.6327, "step": 1505 }, { "epoch": 0.1, "grad_norm": 0.9387091994285583, "learning_rate": 9.888307198192808e-06, "loss": 0.6296, "step": 1506 }, { "epoch": 0.1, "grad_norm": 0.9464743733406067, "learning_rate": 9.888091445881723e-06, "loss": 0.6616, "step": 1507 }, { "epoch": 0.1, "grad_norm": 0.944981575012207, "learning_rate": 9.887875487750108e-06, "loss": 0.637, "step": 1508 }, { "epoch": 0.1, "grad_norm": 0.9454977512359619, "learning_rate": 9.887659323807062e-06, "loss": 0.6645, "step": 1509 }, { "epoch": 0.1, "grad_norm": 0.9209526777267456, "learning_rate": 9.887442954061684e-06, "loss": 0.6978, "step": 1510 }, { "epoch": 0.1, "grad_norm": 0.9140705466270447, "learning_rate": 9.887226378523085e-06, "loss": 0.6424, "step": 1511 }, { "epoch": 0.1, "grad_norm": 0.9300777316093445, "learning_rate": 9.887009597200385e-06, "loss": 0.6293, "step": 1512 }, { "epoch": 0.1, "grad_norm": 0.883039653301239, "learning_rate": 9.88679261010271e-06, "loss": 0.561, "step": 1513 }, { "epoch": 0.1, "grad_norm": 0.9039274454116821, "learning_rate": 9.886575417239202e-06, "loss": 0.6245, "step": 1514 }, { "epoch": 0.1, "grad_norm": 0.9318472743034363, "learning_rate": 9.886358018619e-06, "loss": 0.6637, "step": 1515 }, { "epoch": 0.1, "grad_norm": 0.853915810585022, "learning_rate": 9.886140414251259e-06, "loss": 0.6292, "step": 1516 }, { "epoch": 0.1, "grad_norm": 0.996114194393158, "learning_rate": 9.885922604145143e-06, "loss": 0.6856, "step": 1517 }, { "epoch": 0.1, "grad_norm": 0.9068061113357544, "learning_rate": 9.885704588309825e-06, "loss": 0.6218, "step": 1518 }, { "epoch": 0.1, "grad_norm": 0.9396615624427795, "learning_rate": 9.885486366754482e-06, "loss": 0.6889, "step": 1519 }, { "epoch": 0.1, "grad_norm": 0.9767167568206787, "learning_rate": 9.885267939488303e-06, "loss": 0.669, "step": 1520 }, { "epoch": 0.1, "grad_norm": 0.9243539571762085, "learning_rate": 9.885049306520487e-06, "loss": 0.571, "step": 1521 }, { "epoch": 0.1, "grad_norm": 0.9698777794837952, "learning_rate": 9.884830467860238e-06, "loss": 0.6195, "step": 1522 }, { "epoch": 0.1, "grad_norm": 0.9754754900932312, "learning_rate": 9.88461142351677e-06, "loss": 0.6712, "step": 1523 }, { "epoch": 0.1, "grad_norm": 0.9276134371757507, "learning_rate": 9.884392173499308e-06, "loss": 0.6022, "step": 1524 }, { "epoch": 0.1, "grad_norm": 0.8962921500205994, "learning_rate": 9.884172717817085e-06, "loss": 0.6694, "step": 1525 }, { "epoch": 0.1, "grad_norm": 0.9050678610801697, "learning_rate": 9.883953056479336e-06, "loss": 0.6422, "step": 1526 }, { "epoch": 0.1, "grad_norm": 1.006984829902649, "learning_rate": 9.883733189495316e-06, "loss": 0.6856, "step": 1527 }, { "epoch": 0.1, "grad_norm": 0.9265629053115845, "learning_rate": 9.88351311687428e-06, "loss": 0.6231, "step": 1528 }, { "epoch": 0.1, "grad_norm": 0.9328793883323669, "learning_rate": 9.883292838625495e-06, "loss": 0.6304, "step": 1529 }, { "epoch": 0.1, "grad_norm": 0.9297760128974915, "learning_rate": 9.883072354758237e-06, "loss": 0.6102, "step": 1530 }, { "epoch": 0.1, "grad_norm": 0.9190971851348877, "learning_rate": 9.88285166528179e-06, "loss": 0.6895, "step": 1531 }, { "epoch": 0.1, "grad_norm": 0.9351559281349182, "learning_rate": 9.882630770205444e-06, "loss": 0.5951, "step": 1532 }, { "epoch": 0.1, "grad_norm": 0.9502492547035217, "learning_rate": 9.882409669538503e-06, "loss": 0.6165, "step": 1533 }, { "epoch": 0.1, "grad_norm": 0.9726475477218628, "learning_rate": 9.882188363290273e-06, "loss": 0.6672, "step": 1534 }, { "epoch": 0.1, "grad_norm": 1.011664628982544, "learning_rate": 9.881966851470077e-06, "loss": 0.6367, "step": 1535 }, { "epoch": 0.1, "grad_norm": 0.9154837727546692, "learning_rate": 9.881745134087239e-06, "loss": 0.6487, "step": 1536 }, { "epoch": 0.1, "grad_norm": 0.9829793572425842, "learning_rate": 9.881523211151097e-06, "loss": 0.6535, "step": 1537 }, { "epoch": 0.1, "grad_norm": 0.9159083366394043, "learning_rate": 9.881301082670992e-06, "loss": 0.6405, "step": 1538 }, { "epoch": 0.1, "grad_norm": 0.9753561615943909, "learning_rate": 9.881078748656282e-06, "loss": 0.673, "step": 1539 }, { "epoch": 0.1, "grad_norm": 0.961372971534729, "learning_rate": 9.880856209116324e-06, "loss": 0.6218, "step": 1540 }, { "epoch": 0.1, "grad_norm": 0.9532050490379333, "learning_rate": 9.880633464060492e-06, "loss": 0.6335, "step": 1541 }, { "epoch": 0.1, "grad_norm": 0.9294744729995728, "learning_rate": 9.880410513498163e-06, "loss": 0.6179, "step": 1542 }, { "epoch": 0.1, "grad_norm": 0.979083240032196, "learning_rate": 9.880187357438722e-06, "loss": 0.6624, "step": 1543 }, { "epoch": 0.1, "grad_norm": 0.9284359216690063, "learning_rate": 9.87996399589157e-06, "loss": 0.646, "step": 1544 }, { "epoch": 0.1, "grad_norm": 0.9217939376831055, "learning_rate": 9.87974042886611e-06, "loss": 0.6153, "step": 1545 }, { "epoch": 0.1, "grad_norm": 0.8446288704872131, "learning_rate": 9.879516656371758e-06, "loss": 0.5636, "step": 1546 }, { "epoch": 0.1, "grad_norm": 1.0131950378417969, "learning_rate": 9.879292678417934e-06, "loss": 0.6842, "step": 1547 }, { "epoch": 0.1, "grad_norm": 0.9436971545219421, "learning_rate": 9.879068495014068e-06, "loss": 0.6342, "step": 1548 }, { "epoch": 0.1, "grad_norm": 0.9207556843757629, "learning_rate": 9.878844106169601e-06, "loss": 0.717, "step": 1549 }, { "epoch": 0.1, "grad_norm": 0.9688981771469116, "learning_rate": 9.87861951189398e-06, "loss": 0.6763, "step": 1550 }, { "epoch": 0.1, "grad_norm": 0.9991617798805237, "learning_rate": 9.878394712196665e-06, "loss": 0.6928, "step": 1551 }, { "epoch": 0.1, "grad_norm": 0.9736862182617188, "learning_rate": 9.878169707087116e-06, "loss": 0.6552, "step": 1552 }, { "epoch": 0.1, "grad_norm": 0.9318220019340515, "learning_rate": 9.877944496574813e-06, "loss": 0.5917, "step": 1553 }, { "epoch": 0.1, "grad_norm": 0.8690041303634644, "learning_rate": 9.877719080669235e-06, "loss": 0.6064, "step": 1554 }, { "epoch": 0.1, "grad_norm": 0.9481027126312256, "learning_rate": 9.877493459379876e-06, "loss": 0.6604, "step": 1555 }, { "epoch": 0.1, "grad_norm": 1.0152469873428345, "learning_rate": 9.877267632716235e-06, "loss": 0.7071, "step": 1556 }, { "epoch": 0.1, "grad_norm": 0.9626147150993347, "learning_rate": 9.87704160068782e-06, "loss": 0.623, "step": 1557 }, { "epoch": 0.1, "grad_norm": 0.9506250619888306, "learning_rate": 9.87681536330415e-06, "loss": 0.6372, "step": 1558 }, { "epoch": 0.1, "grad_norm": 0.9783592224121094, "learning_rate": 9.87658892057475e-06, "loss": 0.6661, "step": 1559 }, { "epoch": 0.1, "grad_norm": 0.9387713670730591, "learning_rate": 9.876362272509154e-06, "loss": 0.7131, "step": 1560 }, { "epoch": 0.1, "grad_norm": 0.9006531238555908, "learning_rate": 9.876135419116908e-06, "loss": 0.6329, "step": 1561 }, { "epoch": 0.1, "grad_norm": 0.9375502467155457, "learning_rate": 9.87590836040756e-06, "loss": 0.6184, "step": 1562 }, { "epoch": 0.1, "grad_norm": 0.9109377264976501, "learning_rate": 9.875681096390676e-06, "loss": 0.6317, "step": 1563 }, { "epoch": 0.1, "grad_norm": 0.9256362915039062, "learning_rate": 9.87545362707582e-06, "loss": 0.6397, "step": 1564 }, { "epoch": 0.1, "grad_norm": 0.9016781449317932, "learning_rate": 9.875225952472574e-06, "loss": 0.6329, "step": 1565 }, { "epoch": 0.1, "grad_norm": 0.9019981026649475, "learning_rate": 9.874998072590521e-06, "loss": 0.6723, "step": 1566 }, { "epoch": 0.1, "grad_norm": 0.9814824461936951, "learning_rate": 9.874769987439259e-06, "loss": 0.6784, "step": 1567 }, { "epoch": 0.1, "grad_norm": 0.9205242395401001, "learning_rate": 9.87454169702839e-06, "loss": 0.6541, "step": 1568 }, { "epoch": 0.1, "grad_norm": 1.0002273321151733, "learning_rate": 9.87431320136753e-06, "loss": 0.5924, "step": 1569 }, { "epoch": 0.1, "grad_norm": 0.9376479983329773, "learning_rate": 9.874084500466295e-06, "loss": 0.6596, "step": 1570 }, { "epoch": 0.1, "grad_norm": 0.872928261756897, "learning_rate": 9.873855594334319e-06, "loss": 0.6838, "step": 1571 }, { "epoch": 0.1, "grad_norm": 0.902869701385498, "learning_rate": 9.873626482981238e-06, "loss": 0.6284, "step": 1572 }, { "epoch": 0.1, "grad_norm": 0.9037356972694397, "learning_rate": 9.873397166416698e-06, "loss": 0.6083, "step": 1573 }, { "epoch": 0.1, "grad_norm": 0.8765510320663452, "learning_rate": 9.87316764465036e-06, "loss": 0.6122, "step": 1574 }, { "epoch": 0.1, "grad_norm": 0.9921714067459106, "learning_rate": 9.872937917691883e-06, "loss": 0.5799, "step": 1575 }, { "epoch": 0.1, "grad_norm": 0.9323515295982361, "learning_rate": 9.872707985550942e-06, "loss": 0.5727, "step": 1576 }, { "epoch": 0.1, "grad_norm": 0.9624417424201965, "learning_rate": 9.872477848237221e-06, "loss": 0.6477, "step": 1577 }, { "epoch": 0.1, "grad_norm": 0.9209104180335999, "learning_rate": 9.872247505760405e-06, "loss": 0.6059, "step": 1578 }, { "epoch": 0.1, "grad_norm": 0.9874113202095032, "learning_rate": 9.872016958130197e-06, "loss": 0.6308, "step": 1579 }, { "epoch": 0.1, "grad_norm": 0.981163740158081, "learning_rate": 9.871786205356303e-06, "loss": 0.6446, "step": 1580 }, { "epoch": 0.1, "grad_norm": 0.9238435626029968, "learning_rate": 9.871555247448442e-06, "loss": 0.6831, "step": 1581 }, { "epoch": 0.1, "grad_norm": 1.0302647352218628, "learning_rate": 9.871324084416332e-06, "loss": 0.6095, "step": 1582 }, { "epoch": 0.1, "grad_norm": 0.9587137699127197, "learning_rate": 9.871092716269714e-06, "loss": 0.6613, "step": 1583 }, { "epoch": 0.1, "grad_norm": 0.9017694592475891, "learning_rate": 9.870861143018327e-06, "loss": 0.6368, "step": 1584 }, { "epoch": 0.1, "grad_norm": 0.9232084155082703, "learning_rate": 9.87062936467192e-06, "loss": 0.6588, "step": 1585 }, { "epoch": 0.1, "grad_norm": 0.9299889206886292, "learning_rate": 9.870397381240256e-06, "loss": 0.6222, "step": 1586 }, { "epoch": 0.1, "grad_norm": 0.8801997900009155, "learning_rate": 9.870165192733101e-06, "loss": 0.6371, "step": 1587 }, { "epoch": 0.1, "grad_norm": 0.9414759278297424, "learning_rate": 9.869932799160232e-06, "loss": 0.6735, "step": 1588 }, { "epoch": 0.1, "grad_norm": 1.5504239797592163, "learning_rate": 9.869700200531431e-06, "loss": 0.6738, "step": 1589 }, { "epoch": 0.1, "grad_norm": 0.9341895580291748, "learning_rate": 9.869467396856499e-06, "loss": 0.6024, "step": 1590 }, { "epoch": 0.1, "grad_norm": 0.921317994594574, "learning_rate": 9.869234388145232e-06, "loss": 0.6963, "step": 1591 }, { "epoch": 0.1, "grad_norm": 0.9685119986534119, "learning_rate": 9.869001174407444e-06, "loss": 0.5984, "step": 1592 }, { "epoch": 0.1, "grad_norm": 0.8759018182754517, "learning_rate": 9.868767755652955e-06, "loss": 0.6223, "step": 1593 }, { "epoch": 0.1, "grad_norm": 0.8878785371780396, "learning_rate": 9.868534131891594e-06, "loss": 0.6196, "step": 1594 }, { "epoch": 0.1, "grad_norm": 0.9563702344894409, "learning_rate": 9.868300303133195e-06, "loss": 0.6902, "step": 1595 }, { "epoch": 0.1, "grad_norm": 0.9496309757232666, "learning_rate": 9.868066269387609e-06, "loss": 0.6131, "step": 1596 }, { "epoch": 0.1, "grad_norm": 0.9410830140113831, "learning_rate": 9.867832030664685e-06, "loss": 0.6433, "step": 1597 }, { "epoch": 0.1, "grad_norm": 1.0077545642852783, "learning_rate": 9.867597586974288e-06, "loss": 0.6728, "step": 1598 }, { "epoch": 0.1, "grad_norm": 0.9375026226043701, "learning_rate": 9.86736293832629e-06, "loss": 0.6316, "step": 1599 }, { "epoch": 0.1, "grad_norm": 0.9416118264198303, "learning_rate": 9.86712808473057e-06, "loss": 0.662, "step": 1600 }, { "epoch": 0.1, "grad_norm": 0.9092760682106018, "learning_rate": 9.86689302619702e-06, "loss": 0.6317, "step": 1601 }, { "epoch": 0.1, "grad_norm": 0.9220471978187561, "learning_rate": 9.866657762735534e-06, "loss": 0.6576, "step": 1602 }, { "epoch": 0.1, "grad_norm": 0.9349024295806885, "learning_rate": 9.866422294356019e-06, "loss": 0.6976, "step": 1603 }, { "epoch": 0.1, "grad_norm": 0.8267975449562073, "learning_rate": 9.866186621068391e-06, "loss": 0.5685, "step": 1604 }, { "epoch": 0.1, "grad_norm": 0.9613746404647827, "learning_rate": 9.865950742882574e-06, "loss": 0.6038, "step": 1605 }, { "epoch": 0.1, "grad_norm": 0.9852586984634399, "learning_rate": 9.865714659808497e-06, "loss": 0.6429, "step": 1606 }, { "epoch": 0.1, "grad_norm": 0.9281002283096313, "learning_rate": 9.865478371856102e-06, "loss": 0.6473, "step": 1607 }, { "epoch": 0.1, "grad_norm": 0.8990695476531982, "learning_rate": 9.86524187903534e-06, "loss": 0.6667, "step": 1608 }, { "epoch": 0.1, "grad_norm": 0.9048877358436584, "learning_rate": 9.865005181356166e-06, "loss": 0.6437, "step": 1609 }, { "epoch": 0.1, "grad_norm": 0.957685649394989, "learning_rate": 9.864768278828548e-06, "loss": 0.6228, "step": 1610 }, { "epoch": 0.1, "grad_norm": 0.8602043986320496, "learning_rate": 9.864531171462462e-06, "loss": 0.5928, "step": 1611 }, { "epoch": 0.1, "grad_norm": 0.9182524085044861, "learning_rate": 9.86429385926789e-06, "loss": 0.6859, "step": 1612 }, { "epoch": 0.1, "grad_norm": 0.9720632433891296, "learning_rate": 9.864056342254827e-06, "loss": 0.6562, "step": 1613 }, { "epoch": 0.1, "grad_norm": 0.9607463479042053, "learning_rate": 9.86381862043327e-06, "loss": 0.6783, "step": 1614 }, { "epoch": 0.1, "grad_norm": 0.9194375276565552, "learning_rate": 9.863580693813232e-06, "loss": 0.6433, "step": 1615 }, { "epoch": 0.1, "grad_norm": 0.9061447381973267, "learning_rate": 9.86334256240473e-06, "loss": 0.6577, "step": 1616 }, { "epoch": 0.1, "grad_norm": 0.911880373954773, "learning_rate": 9.86310422621779e-06, "loss": 0.6437, "step": 1617 }, { "epoch": 0.1, "grad_norm": 0.9014673233032227, "learning_rate": 9.86286568526245e-06, "loss": 0.6688, "step": 1618 }, { "epoch": 0.1, "grad_norm": 0.8759530782699585, "learning_rate": 9.862626939548751e-06, "loss": 0.6889, "step": 1619 }, { "epoch": 0.1, "grad_norm": 0.8872689008712769, "learning_rate": 9.862387989086749e-06, "loss": 0.6351, "step": 1620 }, { "epoch": 0.1, "grad_norm": 0.912520706653595, "learning_rate": 9.862148833886504e-06, "loss": 0.6573, "step": 1621 }, { "epoch": 0.1, "grad_norm": 0.935406506061554, "learning_rate": 9.861909473958084e-06, "loss": 0.6349, "step": 1622 }, { "epoch": 0.1, "grad_norm": 0.9377623796463013, "learning_rate": 9.861669909311571e-06, "loss": 0.6324, "step": 1623 }, { "epoch": 0.1, "grad_norm": 0.8664435744285583, "learning_rate": 9.861430139957052e-06, "loss": 0.6517, "step": 1624 }, { "epoch": 0.1, "grad_norm": 0.9497208595275879, "learning_rate": 9.861190165904617e-06, "loss": 0.6703, "step": 1625 }, { "epoch": 0.1, "grad_norm": 0.9303921461105347, "learning_rate": 9.860949987164379e-06, "loss": 0.611, "step": 1626 }, { "epoch": 0.1, "grad_norm": 0.944831371307373, "learning_rate": 9.860709603746445e-06, "loss": 0.6534, "step": 1627 }, { "epoch": 0.1, "grad_norm": 0.9013164043426514, "learning_rate": 9.86046901566094e-06, "loss": 0.6457, "step": 1628 }, { "epoch": 0.1, "grad_norm": 0.9437874555587769, "learning_rate": 9.860228222917992e-06, "loss": 0.6238, "step": 1629 }, { "epoch": 0.1, "grad_norm": 0.901542067527771, "learning_rate": 9.859987225527742e-06, "loss": 0.6299, "step": 1630 }, { "epoch": 0.1, "grad_norm": 0.963375449180603, "learning_rate": 9.859746023500337e-06, "loss": 0.6798, "step": 1631 }, { "epoch": 0.1, "grad_norm": 0.9021002054214478, "learning_rate": 9.85950461684593e-06, "loss": 0.6386, "step": 1632 }, { "epoch": 0.1, "grad_norm": 0.932859480381012, "learning_rate": 9.85926300557469e-06, "loss": 0.6516, "step": 1633 }, { "epoch": 0.1, "grad_norm": 0.8896989822387695, "learning_rate": 9.85902118969679e-06, "loss": 0.6372, "step": 1634 }, { "epoch": 0.1, "grad_norm": 0.9466985464096069, "learning_rate": 9.85877916922241e-06, "loss": 0.6244, "step": 1635 }, { "epoch": 0.1, "grad_norm": 0.9208292961120605, "learning_rate": 9.858536944161743e-06, "loss": 0.6742, "step": 1636 }, { "epoch": 0.1, "grad_norm": 0.9316291213035583, "learning_rate": 9.858294514524987e-06, "loss": 0.6306, "step": 1637 }, { "epoch": 0.1, "grad_norm": 0.9085369110107422, "learning_rate": 9.858051880322347e-06, "loss": 0.5967, "step": 1638 }, { "epoch": 0.1, "grad_norm": 0.9222848415374756, "learning_rate": 9.857809041564044e-06, "loss": 0.656, "step": 1639 }, { "epoch": 0.1, "grad_norm": 0.9137614369392395, "learning_rate": 9.857565998260302e-06, "loss": 0.6778, "step": 1640 }, { "epoch": 0.1, "grad_norm": 0.8836297392845154, "learning_rate": 9.857322750421353e-06, "loss": 0.6172, "step": 1641 }, { "epoch": 0.1, "grad_norm": 0.9377101063728333, "learning_rate": 9.857079298057442e-06, "loss": 0.6562, "step": 1642 }, { "epoch": 0.1, "grad_norm": 0.938580334186554, "learning_rate": 9.856835641178816e-06, "loss": 0.6937, "step": 1643 }, { "epoch": 0.1, "grad_norm": 0.9680647253990173, "learning_rate": 9.856591779795738e-06, "loss": 0.6493, "step": 1644 }, { "epoch": 0.1, "grad_norm": 0.9074171781539917, "learning_rate": 9.856347713918475e-06, "loss": 0.6752, "step": 1645 }, { "epoch": 0.1, "grad_norm": 0.8547381162643433, "learning_rate": 9.856103443557304e-06, "loss": 0.623, "step": 1646 }, { "epoch": 0.1, "grad_norm": 1.0403729677200317, "learning_rate": 9.85585896872251e-06, "loss": 0.6593, "step": 1647 }, { "epoch": 0.1, "grad_norm": 0.9444959163665771, "learning_rate": 9.855614289424386e-06, "loss": 0.634, "step": 1648 }, { "epoch": 0.1, "grad_norm": 0.9254828095436096, "learning_rate": 9.855369405673236e-06, "loss": 0.5757, "step": 1649 }, { "epoch": 0.1, "grad_norm": 0.9265499711036682, "learning_rate": 9.855124317479372e-06, "loss": 0.6326, "step": 1650 }, { "epoch": 0.1, "grad_norm": 0.9064761400222778, "learning_rate": 9.854879024853113e-06, "loss": 0.6488, "step": 1651 }, { "epoch": 0.1, "grad_norm": 0.9563080072402954, "learning_rate": 9.854633527804787e-06, "loss": 0.642, "step": 1652 }, { "epoch": 0.1, "grad_norm": 0.838525116443634, "learning_rate": 9.85438782634473e-06, "loss": 0.5696, "step": 1653 }, { "epoch": 0.1, "grad_norm": 0.8792423009872437, "learning_rate": 9.854141920483289e-06, "loss": 0.6282, "step": 1654 }, { "epoch": 0.1, "grad_norm": 0.9897140264511108, "learning_rate": 9.853895810230818e-06, "loss": 0.6317, "step": 1655 }, { "epoch": 0.1, "grad_norm": 0.9357428550720215, "learning_rate": 9.853649495597682e-06, "loss": 0.691, "step": 1656 }, { "epoch": 0.1, "grad_norm": 0.8924740552902222, "learning_rate": 9.853402976594248e-06, "loss": 0.6754, "step": 1657 }, { "epoch": 0.11, "grad_norm": 0.9512656331062317, "learning_rate": 9.8531562532309e-06, "loss": 0.6218, "step": 1658 }, { "epoch": 0.11, "grad_norm": 0.9587389826774597, "learning_rate": 9.852909325518022e-06, "loss": 0.6707, "step": 1659 }, { "epoch": 0.11, "grad_norm": 0.9361017942428589, "learning_rate": 9.852662193466019e-06, "loss": 0.6475, "step": 1660 }, { "epoch": 0.11, "grad_norm": 0.9150497913360596, "learning_rate": 9.852414857085288e-06, "loss": 0.7143, "step": 1661 }, { "epoch": 0.11, "grad_norm": 0.9618809223175049, "learning_rate": 9.85216731638625e-06, "loss": 0.646, "step": 1662 }, { "epoch": 0.11, "grad_norm": 0.8974446654319763, "learning_rate": 9.851919571379326e-06, "loss": 0.6958, "step": 1663 }, { "epoch": 0.11, "grad_norm": 0.9085642099380493, "learning_rate": 9.851671622074947e-06, "loss": 0.6291, "step": 1664 }, { "epoch": 0.11, "grad_norm": 0.9605396389961243, "learning_rate": 9.851423468483554e-06, "loss": 0.669, "step": 1665 }, { "epoch": 0.11, "grad_norm": 1.0041121244430542, "learning_rate": 9.851175110615594e-06, "loss": 0.5982, "step": 1666 }, { "epoch": 0.11, "grad_norm": 0.9205458164215088, "learning_rate": 9.850926548481528e-06, "loss": 0.6587, "step": 1667 }, { "epoch": 0.11, "grad_norm": 0.9921442270278931, "learning_rate": 9.850677782091818e-06, "loss": 0.6505, "step": 1668 }, { "epoch": 0.11, "grad_norm": 0.9092791080474854, "learning_rate": 9.850428811456943e-06, "loss": 0.5881, "step": 1669 }, { "epoch": 0.11, "grad_norm": 1.0009846687316895, "learning_rate": 9.850179636587383e-06, "loss": 0.657, "step": 1670 }, { "epoch": 0.11, "grad_norm": 0.9284378886222839, "learning_rate": 9.849930257493632e-06, "loss": 0.616, "step": 1671 }, { "epoch": 0.11, "grad_norm": 1.0465761423110962, "learning_rate": 9.849680674186188e-06, "loss": 0.7, "step": 1672 }, { "epoch": 0.11, "grad_norm": 0.9236946105957031, "learning_rate": 9.849430886675564e-06, "loss": 0.6498, "step": 1673 }, { "epoch": 0.11, "grad_norm": 0.8840457201004028, "learning_rate": 9.849180894972272e-06, "loss": 0.6517, "step": 1674 }, { "epoch": 0.11, "grad_norm": 0.8201990723609924, "learning_rate": 9.848930699086846e-06, "loss": 0.6403, "step": 1675 }, { "epoch": 0.11, "grad_norm": 0.9330858588218689, "learning_rate": 9.848680299029813e-06, "loss": 0.6374, "step": 1676 }, { "epoch": 0.11, "grad_norm": 0.9151015877723694, "learning_rate": 9.848429694811721e-06, "loss": 0.5886, "step": 1677 }, { "epoch": 0.11, "grad_norm": 0.9654482007026672, "learning_rate": 9.84817888644312e-06, "loss": 0.6554, "step": 1678 }, { "epoch": 0.11, "grad_norm": 0.9523151516914368, "learning_rate": 9.847927873934573e-06, "loss": 0.6361, "step": 1679 }, { "epoch": 0.11, "grad_norm": 0.9912353157997131, "learning_rate": 9.847676657296647e-06, "loss": 0.6584, "step": 1680 }, { "epoch": 0.11, "grad_norm": 0.937496542930603, "learning_rate": 9.847425236539922e-06, "loss": 0.6502, "step": 1681 }, { "epoch": 0.11, "grad_norm": 0.8653977513313293, "learning_rate": 9.847173611674982e-06, "loss": 0.605, "step": 1682 }, { "epoch": 0.11, "grad_norm": 0.9031038880348206, "learning_rate": 9.846921782712424e-06, "loss": 0.6144, "step": 1683 }, { "epoch": 0.11, "grad_norm": 0.9280396699905396, "learning_rate": 9.846669749662851e-06, "loss": 0.615, "step": 1684 }, { "epoch": 0.11, "grad_norm": 0.9092390537261963, "learning_rate": 9.846417512536874e-06, "loss": 0.6235, "step": 1685 }, { "epoch": 0.11, "grad_norm": 0.9543402791023254, "learning_rate": 9.846165071345118e-06, "loss": 0.6555, "step": 1686 }, { "epoch": 0.11, "grad_norm": 1.045227289199829, "learning_rate": 9.845912426098206e-06, "loss": 0.6761, "step": 1687 }, { "epoch": 0.11, "grad_norm": 0.9226247072219849, "learning_rate": 9.845659576806781e-06, "loss": 0.6211, "step": 1688 }, { "epoch": 0.11, "grad_norm": 0.9279161691665649, "learning_rate": 9.845406523481488e-06, "loss": 0.6579, "step": 1689 }, { "epoch": 0.11, "grad_norm": 0.91354900598526, "learning_rate": 9.845153266132981e-06, "loss": 0.6024, "step": 1690 }, { "epoch": 0.11, "grad_norm": 0.9093358516693115, "learning_rate": 9.844899804771927e-06, "loss": 0.6232, "step": 1691 }, { "epoch": 0.11, "grad_norm": 0.9965054988861084, "learning_rate": 9.844646139408995e-06, "loss": 0.6429, "step": 1692 }, { "epoch": 0.11, "grad_norm": 0.9603714346885681, "learning_rate": 9.844392270054868e-06, "loss": 0.622, "step": 1693 }, { "epoch": 0.11, "grad_norm": 0.9581913948059082, "learning_rate": 9.844138196720236e-06, "loss": 0.6845, "step": 1694 }, { "epoch": 0.11, "grad_norm": 0.911685585975647, "learning_rate": 9.843883919415795e-06, "loss": 0.685, "step": 1695 }, { "epoch": 0.11, "grad_norm": 0.9244683980941772, "learning_rate": 9.843629438152252e-06, "loss": 0.6441, "step": 1696 }, { "epoch": 0.11, "grad_norm": 0.9643012285232544, "learning_rate": 9.843374752940323e-06, "loss": 0.6346, "step": 1697 }, { "epoch": 0.11, "grad_norm": 0.9334665536880493, "learning_rate": 9.843119863790733e-06, "loss": 0.7161, "step": 1698 }, { "epoch": 0.11, "grad_norm": 0.9710047245025635, "learning_rate": 9.842864770714213e-06, "loss": 0.6233, "step": 1699 }, { "epoch": 0.11, "grad_norm": 0.9540897607803345, "learning_rate": 9.842609473721505e-06, "loss": 0.6271, "step": 1700 }, { "epoch": 0.11, "grad_norm": 0.9325253367424011, "learning_rate": 9.842353972823358e-06, "loss": 0.6153, "step": 1701 }, { "epoch": 0.11, "grad_norm": 0.8942682147026062, "learning_rate": 9.842098268030532e-06, "loss": 0.5922, "step": 1702 }, { "epoch": 0.11, "grad_norm": 0.930939793586731, "learning_rate": 9.84184235935379e-06, "loss": 0.6366, "step": 1703 }, { "epoch": 0.11, "grad_norm": 0.9117228984832764, "learning_rate": 9.84158624680391e-06, "loss": 0.647, "step": 1704 }, { "epoch": 0.11, "grad_norm": 0.9259521961212158, "learning_rate": 9.841329930391678e-06, "loss": 0.6384, "step": 1705 }, { "epoch": 0.11, "grad_norm": 0.867400050163269, "learning_rate": 9.841073410127884e-06, "loss": 0.6741, "step": 1706 }, { "epoch": 0.11, "grad_norm": 1.0280332565307617, "learning_rate": 9.840816686023329e-06, "loss": 0.64, "step": 1707 }, { "epoch": 0.11, "grad_norm": 0.9076325297355652, "learning_rate": 9.840559758088821e-06, "loss": 0.5936, "step": 1708 }, { "epoch": 0.11, "grad_norm": 0.9110800623893738, "learning_rate": 9.840302626335182e-06, "loss": 0.6145, "step": 1709 }, { "epoch": 0.11, "grad_norm": 0.8760718107223511, "learning_rate": 9.84004529077324e-06, "loss": 0.6375, "step": 1710 }, { "epoch": 0.11, "grad_norm": 0.9137043356895447, "learning_rate": 9.839787751413825e-06, "loss": 0.6016, "step": 1711 }, { "epoch": 0.11, "grad_norm": 0.8688681125640869, "learning_rate": 9.839530008267785e-06, "loss": 0.6208, "step": 1712 }, { "epoch": 0.11, "grad_norm": 0.9339778423309326, "learning_rate": 9.839272061345974e-06, "loss": 0.6514, "step": 1713 }, { "epoch": 0.11, "grad_norm": 0.9327898025512695, "learning_rate": 9.839013910659249e-06, "loss": 0.6528, "step": 1714 }, { "epoch": 0.11, "grad_norm": 0.9317489266395569, "learning_rate": 9.838755556218483e-06, "loss": 0.6433, "step": 1715 }, { "epoch": 0.11, "grad_norm": 0.9479151368141174, "learning_rate": 9.838496998034552e-06, "loss": 0.692, "step": 1716 }, { "epoch": 0.11, "grad_norm": 0.9237775802612305, "learning_rate": 9.838238236118344e-06, "loss": 0.621, "step": 1717 }, { "epoch": 0.11, "grad_norm": 0.9310511946678162, "learning_rate": 9.837979270480758e-06, "loss": 0.6333, "step": 1718 }, { "epoch": 0.11, "grad_norm": 0.877641499042511, "learning_rate": 9.837720101132692e-06, "loss": 0.6943, "step": 1719 }, { "epoch": 0.11, "grad_norm": 0.9281149506568909, "learning_rate": 9.837460728085062e-06, "loss": 0.6446, "step": 1720 }, { "epoch": 0.11, "grad_norm": 0.9767260551452637, "learning_rate": 9.83720115134879e-06, "loss": 0.7114, "step": 1721 }, { "epoch": 0.11, "grad_norm": 0.9054911732673645, "learning_rate": 9.836941370934806e-06, "loss": 0.6143, "step": 1722 }, { "epoch": 0.11, "grad_norm": 0.9925005435943604, "learning_rate": 9.836681386854045e-06, "loss": 0.6386, "step": 1723 }, { "epoch": 0.11, "grad_norm": 0.9308101534843445, "learning_rate": 9.836421199117456e-06, "loss": 0.6501, "step": 1724 }, { "epoch": 0.11, "grad_norm": 0.9074007868766785, "learning_rate": 9.836160807735997e-06, "loss": 0.6792, "step": 1725 }, { "epoch": 0.11, "grad_norm": 0.9303346276283264, "learning_rate": 9.83590021272063e-06, "loss": 0.6218, "step": 1726 }, { "epoch": 0.11, "grad_norm": 0.9566819667816162, "learning_rate": 9.835639414082327e-06, "loss": 0.6525, "step": 1727 }, { "epoch": 0.11, "grad_norm": 0.9563994407653809, "learning_rate": 9.83537841183207e-06, "loss": 0.6473, "step": 1728 }, { "epoch": 0.11, "grad_norm": 0.9133448600769043, "learning_rate": 9.83511720598085e-06, "loss": 0.61, "step": 1729 }, { "epoch": 0.11, "grad_norm": 0.9543222784996033, "learning_rate": 9.834855796539665e-06, "loss": 0.614, "step": 1730 }, { "epoch": 0.11, "grad_norm": 0.9356175661087036, "learning_rate": 9.834594183519521e-06, "loss": 0.6181, "step": 1731 }, { "epoch": 0.11, "grad_norm": 0.9626755118370056, "learning_rate": 9.834332366931435e-06, "loss": 0.6355, "step": 1732 }, { "epoch": 0.11, "grad_norm": 0.9340695142745972, "learning_rate": 9.834070346786428e-06, "loss": 0.6235, "step": 1733 }, { "epoch": 0.11, "grad_norm": 0.916644811630249, "learning_rate": 9.833808123095538e-06, "loss": 0.6401, "step": 1734 }, { "epoch": 0.11, "grad_norm": 0.9744462370872498, "learning_rate": 9.833545695869802e-06, "loss": 0.6916, "step": 1735 }, { "epoch": 0.11, "grad_norm": 0.9321338534355164, "learning_rate": 9.833283065120272e-06, "loss": 0.6363, "step": 1736 }, { "epoch": 0.11, "grad_norm": 0.9485877752304077, "learning_rate": 9.833020230858005e-06, "loss": 0.6865, "step": 1737 }, { "epoch": 0.11, "grad_norm": 0.8846791982650757, "learning_rate": 9.832757193094072e-06, "loss": 0.6522, "step": 1738 }, { "epoch": 0.11, "grad_norm": 0.8400406837463379, "learning_rate": 9.832493951839541e-06, "loss": 0.626, "step": 1739 }, { "epoch": 0.11, "grad_norm": 0.9057971239089966, "learning_rate": 9.832230507105504e-06, "loss": 0.6248, "step": 1740 }, { "epoch": 0.11, "grad_norm": 0.9419105052947998, "learning_rate": 9.831966858903049e-06, "loss": 0.6535, "step": 1741 }, { "epoch": 0.11, "grad_norm": 0.8836336135864258, "learning_rate": 9.83170300724328e-06, "loss": 0.6482, "step": 1742 }, { "epoch": 0.11, "grad_norm": 0.861971378326416, "learning_rate": 9.831438952137304e-06, "loss": 0.6039, "step": 1743 }, { "epoch": 0.11, "grad_norm": 0.8943654298782349, "learning_rate": 9.831174693596241e-06, "loss": 0.6038, "step": 1744 }, { "epoch": 0.11, "grad_norm": 0.9814664721488953, "learning_rate": 9.83091023163122e-06, "loss": 0.6729, "step": 1745 }, { "epoch": 0.11, "grad_norm": 0.8936158418655396, "learning_rate": 9.830645566253374e-06, "loss": 0.6335, "step": 1746 }, { "epoch": 0.11, "grad_norm": 0.9102863073348999, "learning_rate": 9.830380697473848e-06, "loss": 0.6611, "step": 1747 }, { "epoch": 0.11, "grad_norm": 0.9278464913368225, "learning_rate": 9.830115625303793e-06, "loss": 0.6865, "step": 1748 }, { "epoch": 0.11, "grad_norm": 0.921346127986908, "learning_rate": 9.829850349754373e-06, "loss": 0.6441, "step": 1749 }, { "epoch": 0.11, "grad_norm": 0.9085983037948608, "learning_rate": 9.829584870836756e-06, "loss": 0.6905, "step": 1750 }, { "epoch": 0.11, "grad_norm": 0.8647844195365906, "learning_rate": 9.82931918856212e-06, "loss": 0.6083, "step": 1751 }, { "epoch": 0.11, "grad_norm": 0.9357777237892151, "learning_rate": 9.829053302941656e-06, "loss": 0.6546, "step": 1752 }, { "epoch": 0.11, "grad_norm": 0.9371474981307983, "learning_rate": 9.828787213986554e-06, "loss": 0.6826, "step": 1753 }, { "epoch": 0.11, "grad_norm": 0.8982768654823303, "learning_rate": 9.82852092170802e-06, "loss": 0.6184, "step": 1754 }, { "epoch": 0.11, "grad_norm": 0.8816835284233093, "learning_rate": 9.82825442611727e-06, "loss": 0.6204, "step": 1755 }, { "epoch": 0.11, "grad_norm": 1.0181394815444946, "learning_rate": 9.82798772722552e-06, "loss": 0.6937, "step": 1756 }, { "epoch": 0.11, "grad_norm": 0.9692837595939636, "learning_rate": 9.827720825044003e-06, "loss": 0.6443, "step": 1757 }, { "epoch": 0.11, "grad_norm": 0.9221457242965698, "learning_rate": 9.827453719583957e-06, "loss": 0.6109, "step": 1758 }, { "epoch": 0.11, "grad_norm": 0.8878170847892761, "learning_rate": 9.827186410856627e-06, "loss": 0.5887, "step": 1759 }, { "epoch": 0.11, "grad_norm": 0.9767280220985413, "learning_rate": 9.82691889887327e-06, "loss": 0.5916, "step": 1760 }, { "epoch": 0.11, "grad_norm": 0.9061947464942932, "learning_rate": 9.82665118364515e-06, "loss": 0.6084, "step": 1761 }, { "epoch": 0.11, "grad_norm": 0.9700713753700256, "learning_rate": 9.82638326518354e-06, "loss": 0.6003, "step": 1762 }, { "epoch": 0.11, "grad_norm": 0.977722704410553, "learning_rate": 9.826115143499721e-06, "loss": 0.6788, "step": 1763 }, { "epoch": 0.11, "grad_norm": 0.9503071904182434, "learning_rate": 9.82584681860498e-06, "loss": 0.6288, "step": 1764 }, { "epoch": 0.11, "grad_norm": 0.9503450393676758, "learning_rate": 9.82557829051062e-06, "loss": 0.6407, "step": 1765 }, { "epoch": 0.11, "grad_norm": 1.0136359930038452, "learning_rate": 9.825309559227944e-06, "loss": 0.7054, "step": 1766 }, { "epoch": 0.11, "grad_norm": 1.0087224245071411, "learning_rate": 9.825040624768267e-06, "loss": 0.6528, "step": 1767 }, { "epoch": 0.11, "grad_norm": 0.9373607039451599, "learning_rate": 9.824771487142917e-06, "loss": 0.6851, "step": 1768 }, { "epoch": 0.11, "grad_norm": 0.863404393196106, "learning_rate": 9.824502146363222e-06, "loss": 0.6083, "step": 1769 }, { "epoch": 0.11, "grad_norm": 0.9172773361206055, "learning_rate": 9.824232602440524e-06, "loss": 0.647, "step": 1770 }, { "epoch": 0.11, "grad_norm": 0.8769250512123108, "learning_rate": 9.823962855386175e-06, "loss": 0.6657, "step": 1771 }, { "epoch": 0.11, "grad_norm": 0.9721053838729858, "learning_rate": 9.823692905211533e-06, "loss": 0.5903, "step": 1772 }, { "epoch": 0.11, "grad_norm": 0.9308158159255981, "learning_rate": 9.823422751927961e-06, "loss": 0.6218, "step": 1773 }, { "epoch": 0.11, "grad_norm": 0.9058137536048889, "learning_rate": 9.823152395546836e-06, "loss": 0.6584, "step": 1774 }, { "epoch": 0.11, "grad_norm": 0.9022964239120483, "learning_rate": 9.822881836079543e-06, "loss": 0.6114, "step": 1775 }, { "epoch": 0.11, "grad_norm": 0.9127461910247803, "learning_rate": 9.822611073537474e-06, "loss": 0.653, "step": 1776 }, { "epoch": 0.11, "grad_norm": 0.9295786023139954, "learning_rate": 9.822340107932028e-06, "loss": 0.569, "step": 1777 }, { "epoch": 0.11, "grad_norm": 0.945152759552002, "learning_rate": 9.822068939274616e-06, "loss": 0.6499, "step": 1778 }, { "epoch": 0.11, "grad_norm": 0.9397128224372864, "learning_rate": 9.821797567576656e-06, "loss": 0.6069, "step": 1779 }, { "epoch": 0.11, "grad_norm": 1.0003842115402222, "learning_rate": 9.821525992849575e-06, "loss": 0.6875, "step": 1780 }, { "epoch": 0.11, "grad_norm": 0.9174728393554688, "learning_rate": 9.821254215104808e-06, "loss": 0.673, "step": 1781 }, { "epoch": 0.11, "grad_norm": 0.9309795498847961, "learning_rate": 9.820982234353795e-06, "loss": 0.6023, "step": 1782 }, { "epoch": 0.11, "grad_norm": 0.9999585747718811, "learning_rate": 9.820710050607994e-06, "loss": 0.6542, "step": 1783 }, { "epoch": 0.11, "grad_norm": 0.8771758675575256, "learning_rate": 9.820437663878862e-06, "loss": 0.632, "step": 1784 }, { "epoch": 0.11, "grad_norm": 0.9545724391937256, "learning_rate": 9.820165074177867e-06, "loss": 0.673, "step": 1785 }, { "epoch": 0.11, "grad_norm": 0.9740934371948242, "learning_rate": 9.819892281516491e-06, "loss": 0.6621, "step": 1786 }, { "epoch": 0.11, "grad_norm": 0.9247666597366333, "learning_rate": 9.819619285906217e-06, "loss": 0.609, "step": 1787 }, { "epoch": 0.11, "grad_norm": 0.9412689208984375, "learning_rate": 9.819346087358542e-06, "loss": 0.689, "step": 1788 }, { "epoch": 0.11, "grad_norm": 1.0129718780517578, "learning_rate": 9.819072685884969e-06, "loss": 0.6117, "step": 1789 }, { "epoch": 0.11, "grad_norm": 0.9594516754150391, "learning_rate": 9.818799081497008e-06, "loss": 0.6672, "step": 1790 }, { "epoch": 0.11, "grad_norm": 0.9422503709793091, "learning_rate": 9.818525274206184e-06, "loss": 0.6601, "step": 1791 }, { "epoch": 0.11, "grad_norm": 0.9016688466072083, "learning_rate": 9.818251264024018e-06, "loss": 0.6811, "step": 1792 }, { "epoch": 0.11, "grad_norm": 0.9061679244041443, "learning_rate": 9.817977050962058e-06, "loss": 0.6095, "step": 1793 }, { "epoch": 0.11, "grad_norm": 0.973602831363678, "learning_rate": 9.817702635031842e-06, "loss": 0.6499, "step": 1794 }, { "epoch": 0.11, "grad_norm": 0.9249684810638428, "learning_rate": 9.817428016244928e-06, "loss": 0.6369, "step": 1795 }, { "epoch": 0.11, "grad_norm": 0.9385564923286438, "learning_rate": 9.81715319461288e-06, "loss": 0.7354, "step": 1796 }, { "epoch": 0.11, "grad_norm": 0.9195820093154907, "learning_rate": 9.816878170147268e-06, "loss": 0.6723, "step": 1797 }, { "epoch": 0.11, "grad_norm": 1.0029053688049316, "learning_rate": 9.816602942859672e-06, "loss": 0.6807, "step": 1798 }, { "epoch": 0.11, "grad_norm": 1.0146007537841797, "learning_rate": 9.816327512761683e-06, "loss": 0.6377, "step": 1799 }, { "epoch": 0.11, "grad_norm": 0.9209313988685608, "learning_rate": 9.816051879864896e-06, "loss": 0.5904, "step": 1800 }, { "epoch": 0.11, "grad_norm": 0.8723121881484985, "learning_rate": 9.81577604418092e-06, "loss": 0.5807, "step": 1801 }, { "epoch": 0.11, "grad_norm": 0.9405813217163086, "learning_rate": 9.815500005721365e-06, "loss": 0.6401, "step": 1802 }, { "epoch": 0.11, "grad_norm": 1.0136600732803345, "learning_rate": 9.815223764497859e-06, "loss": 0.6395, "step": 1803 }, { "epoch": 0.11, "grad_norm": 0.8953354358673096, "learning_rate": 9.814947320522031e-06, "loss": 0.6236, "step": 1804 }, { "epoch": 0.11, "grad_norm": 0.9782286882400513, "learning_rate": 9.81467067380552e-06, "loss": 0.6592, "step": 1805 }, { "epoch": 0.11, "grad_norm": 0.8998913168907166, "learning_rate": 9.814393824359975e-06, "loss": 0.6448, "step": 1806 }, { "epoch": 0.11, "grad_norm": 0.8747649788856506, "learning_rate": 9.814116772197058e-06, "loss": 0.6038, "step": 1807 }, { "epoch": 0.11, "grad_norm": 0.980236828327179, "learning_rate": 9.813839517328428e-06, "loss": 0.6272, "step": 1808 }, { "epoch": 0.11, "grad_norm": 0.9255844354629517, "learning_rate": 9.813562059765762e-06, "loss": 0.6626, "step": 1809 }, { "epoch": 0.11, "grad_norm": 0.9551252722740173, "learning_rate": 9.813284399520744e-06, "loss": 0.6511, "step": 1810 }, { "epoch": 0.11, "grad_norm": 0.9699724912643433, "learning_rate": 9.813006536605063e-06, "loss": 0.6487, "step": 1811 }, { "epoch": 0.11, "grad_norm": 0.9553450345993042, "learning_rate": 9.812728471030421e-06, "loss": 0.6733, "step": 1812 }, { "epoch": 0.11, "grad_norm": 0.9273084402084351, "learning_rate": 9.812450202808525e-06, "loss": 0.6379, "step": 1813 }, { "epoch": 0.11, "grad_norm": 0.9686819911003113, "learning_rate": 9.812171731951092e-06, "loss": 0.6156, "step": 1814 }, { "epoch": 0.11, "grad_norm": 0.9068811535835266, "learning_rate": 9.811893058469848e-06, "loss": 0.6301, "step": 1815 }, { "epoch": 0.12, "grad_norm": 0.8845064043998718, "learning_rate": 9.811614182376527e-06, "loss": 0.6841, "step": 1816 }, { "epoch": 0.12, "grad_norm": 0.9105836153030396, "learning_rate": 9.811335103682872e-06, "loss": 0.6024, "step": 1817 }, { "epoch": 0.12, "grad_norm": 0.9044576287269592, "learning_rate": 9.81105582240063e-06, "loss": 0.6668, "step": 1818 }, { "epoch": 0.12, "grad_norm": 0.9328505992889404, "learning_rate": 9.810776338541566e-06, "loss": 0.6684, "step": 1819 }, { "epoch": 0.12, "grad_norm": 0.9516772031784058, "learning_rate": 9.810496652117445e-06, "loss": 0.6531, "step": 1820 }, { "epoch": 0.12, "grad_norm": 0.8713773488998413, "learning_rate": 9.810216763140046e-06, "loss": 0.586, "step": 1821 }, { "epoch": 0.12, "grad_norm": 0.9502965807914734, "learning_rate": 9.809936671621151e-06, "loss": 0.5754, "step": 1822 }, { "epoch": 0.12, "grad_norm": 0.9351384043693542, "learning_rate": 9.809656377572556e-06, "loss": 0.6786, "step": 1823 }, { "epoch": 0.12, "grad_norm": 0.8560097217559814, "learning_rate": 9.809375881006063e-06, "loss": 0.5677, "step": 1824 }, { "epoch": 0.12, "grad_norm": 0.8590288162231445, "learning_rate": 9.809095181933482e-06, "loss": 0.6032, "step": 1825 }, { "epoch": 0.12, "grad_norm": 1.0070056915283203, "learning_rate": 9.808814280366632e-06, "loss": 0.6919, "step": 1826 }, { "epoch": 0.12, "grad_norm": 0.9655309915542603, "learning_rate": 9.808533176317341e-06, "loss": 0.6631, "step": 1827 }, { "epoch": 0.12, "grad_norm": 1.0063858032226562, "learning_rate": 9.808251869797445e-06, "loss": 0.6876, "step": 1828 }, { "epoch": 0.12, "grad_norm": 0.9091975092887878, "learning_rate": 9.807970360818791e-06, "loss": 0.6122, "step": 1829 }, { "epoch": 0.12, "grad_norm": 1.0076450109481812, "learning_rate": 9.80768864939323e-06, "loss": 0.6995, "step": 1830 }, { "epoch": 0.12, "grad_norm": 0.8727695345878601, "learning_rate": 9.807406735532625e-06, "loss": 0.6056, "step": 1831 }, { "epoch": 0.12, "grad_norm": 0.9693520069122314, "learning_rate": 9.807124619248847e-06, "loss": 0.6708, "step": 1832 }, { "epoch": 0.12, "grad_norm": 0.993155300617218, "learning_rate": 9.806842300553772e-06, "loss": 0.6415, "step": 1833 }, { "epoch": 0.12, "grad_norm": 0.9352355599403381, "learning_rate": 9.806559779459291e-06, "loss": 0.6858, "step": 1834 }, { "epoch": 0.12, "grad_norm": 0.8845545649528503, "learning_rate": 9.806277055977299e-06, "loss": 0.6022, "step": 1835 }, { "epoch": 0.12, "grad_norm": 0.9431570768356323, "learning_rate": 9.8059941301197e-06, "loss": 0.6488, "step": 1836 }, { "epoch": 0.12, "grad_norm": 0.9353639483451843, "learning_rate": 9.805711001898406e-06, "loss": 0.6399, "step": 1837 }, { "epoch": 0.12, "grad_norm": 0.9036180973052979, "learning_rate": 9.805427671325339e-06, "loss": 0.6234, "step": 1838 }, { "epoch": 0.12, "grad_norm": 0.8938383460044861, "learning_rate": 9.80514413841243e-06, "loss": 0.5884, "step": 1839 }, { "epoch": 0.12, "grad_norm": 1.001819372177124, "learning_rate": 9.804860403171617e-06, "loss": 0.6657, "step": 1840 }, { "epoch": 0.12, "grad_norm": 0.9125610589981079, "learning_rate": 9.804576465614848e-06, "loss": 0.6196, "step": 1841 }, { "epoch": 0.12, "grad_norm": 0.9416166543960571, "learning_rate": 9.804292325754079e-06, "loss": 0.6596, "step": 1842 }, { "epoch": 0.12, "grad_norm": 0.9415349960327148, "learning_rate": 9.804007983601271e-06, "loss": 0.6558, "step": 1843 }, { "epoch": 0.12, "grad_norm": 0.8949640393257141, "learning_rate": 9.8037234391684e-06, "loss": 0.6629, "step": 1844 }, { "epoch": 0.12, "grad_norm": 0.9415730237960815, "learning_rate": 9.803438692467446e-06, "loss": 0.6158, "step": 1845 }, { "epoch": 0.12, "grad_norm": 0.8983997106552124, "learning_rate": 9.8031537435104e-06, "loss": 0.6057, "step": 1846 }, { "epoch": 0.12, "grad_norm": 0.8986216187477112, "learning_rate": 9.802868592309255e-06, "loss": 0.6404, "step": 1847 }, { "epoch": 0.12, "grad_norm": 0.9952399134635925, "learning_rate": 9.802583238876024e-06, "loss": 0.6655, "step": 1848 }, { "epoch": 0.12, "grad_norm": 0.908902108669281, "learning_rate": 9.80229768322272e-06, "loss": 0.6155, "step": 1849 }, { "epoch": 0.12, "grad_norm": 0.9122000932693481, "learning_rate": 9.802011925361366e-06, "loss": 0.6594, "step": 1850 }, { "epoch": 0.12, "grad_norm": 0.970879077911377, "learning_rate": 9.801725965303995e-06, "loss": 0.5872, "step": 1851 }, { "epoch": 0.12, "grad_norm": 0.9796939492225647, "learning_rate": 9.801439803062646e-06, "loss": 0.6749, "step": 1852 }, { "epoch": 0.12, "grad_norm": 0.8634384274482727, "learning_rate": 9.801153438649371e-06, "loss": 0.6442, "step": 1853 }, { "epoch": 0.12, "grad_norm": 0.9319069981575012, "learning_rate": 9.800866872076227e-06, "loss": 0.6265, "step": 1854 }, { "epoch": 0.12, "grad_norm": 0.8886886239051819, "learning_rate": 9.80058010335528e-06, "loss": 0.6443, "step": 1855 }, { "epoch": 0.12, "grad_norm": 0.885466992855072, "learning_rate": 9.800293132498603e-06, "loss": 0.6565, "step": 1856 }, { "epoch": 0.12, "grad_norm": 0.9097492694854736, "learning_rate": 9.800005959518284e-06, "loss": 0.6244, "step": 1857 }, { "epoch": 0.12, "grad_norm": 0.9563896059989929, "learning_rate": 9.79971858442641e-06, "loss": 0.6734, "step": 1858 }, { "epoch": 0.12, "grad_norm": 0.9626286625862122, "learning_rate": 9.799431007235086e-06, "loss": 0.6489, "step": 1859 }, { "epoch": 0.12, "grad_norm": 0.9317120313644409, "learning_rate": 9.799143227956416e-06, "loss": 0.6892, "step": 1860 }, { "epoch": 0.12, "grad_norm": 0.9715713262557983, "learning_rate": 9.798855246602522e-06, "loss": 0.6715, "step": 1861 }, { "epoch": 0.12, "grad_norm": 0.9262539148330688, "learning_rate": 9.798567063185525e-06, "loss": 0.6057, "step": 1862 }, { "epoch": 0.12, "grad_norm": 0.9007180333137512, "learning_rate": 9.798278677717562e-06, "loss": 0.6343, "step": 1863 }, { "epoch": 0.12, "grad_norm": 0.9322105646133423, "learning_rate": 9.797990090210777e-06, "loss": 0.6516, "step": 1864 }, { "epoch": 0.12, "grad_norm": 0.8944317102432251, "learning_rate": 9.79770130067732e-06, "loss": 0.6814, "step": 1865 }, { "epoch": 0.12, "grad_norm": 0.8687607049942017, "learning_rate": 9.797412309129351e-06, "loss": 0.6282, "step": 1866 }, { "epoch": 0.12, "grad_norm": 0.9158695340156555, "learning_rate": 9.79712311557904e-06, "loss": 0.6178, "step": 1867 }, { "epoch": 0.12, "grad_norm": 0.9163758754730225, "learning_rate": 9.79683372003856e-06, "loss": 0.6176, "step": 1868 }, { "epoch": 0.12, "grad_norm": 0.9717338681221008, "learning_rate": 9.796544122520101e-06, "loss": 0.6533, "step": 1869 }, { "epoch": 0.12, "grad_norm": 0.880803108215332, "learning_rate": 9.796254323035854e-06, "loss": 0.5912, "step": 1870 }, { "epoch": 0.12, "grad_norm": 0.8988786935806274, "learning_rate": 9.795964321598023e-06, "loss": 0.6374, "step": 1871 }, { "epoch": 0.12, "grad_norm": 0.870625376701355, "learning_rate": 9.795674118218819e-06, "loss": 0.5529, "step": 1872 }, { "epoch": 0.12, "grad_norm": 0.8748095631599426, "learning_rate": 9.795383712910458e-06, "loss": 0.6148, "step": 1873 }, { "epoch": 0.12, "grad_norm": 0.962794303894043, "learning_rate": 9.795093105685175e-06, "loss": 0.658, "step": 1874 }, { "epoch": 0.12, "grad_norm": 0.9512926340103149, "learning_rate": 9.794802296555198e-06, "loss": 0.6414, "step": 1875 }, { "epoch": 0.12, "grad_norm": 0.8706688284873962, "learning_rate": 9.79451128553278e-06, "loss": 0.575, "step": 1876 }, { "epoch": 0.12, "grad_norm": 0.9835572838783264, "learning_rate": 9.794220072630168e-06, "loss": 0.6807, "step": 1877 }, { "epoch": 0.12, "grad_norm": 0.9953154921531677, "learning_rate": 9.793928657859627e-06, "loss": 0.6794, "step": 1878 }, { "epoch": 0.12, "grad_norm": 0.9724439382553101, "learning_rate": 9.793637041233428e-06, "loss": 0.6771, "step": 1879 }, { "epoch": 0.12, "grad_norm": 0.9492095708847046, "learning_rate": 9.793345222763847e-06, "loss": 0.6477, "step": 1880 }, { "epoch": 0.12, "grad_norm": 0.8991506099700928, "learning_rate": 9.793053202463176e-06, "loss": 0.6047, "step": 1881 }, { "epoch": 0.12, "grad_norm": 1.0061862468719482, "learning_rate": 9.792760980343708e-06, "loss": 0.6526, "step": 1882 }, { "epoch": 0.12, "grad_norm": 0.9687420725822449, "learning_rate": 9.792468556417746e-06, "loss": 0.593, "step": 1883 }, { "epoch": 0.12, "grad_norm": 0.9339932203292847, "learning_rate": 9.792175930697608e-06, "loss": 0.683, "step": 1884 }, { "epoch": 0.12, "grad_norm": 0.900341808795929, "learning_rate": 9.79188310319561e-06, "loss": 0.6468, "step": 1885 }, { "epoch": 0.12, "grad_norm": 1.0057995319366455, "learning_rate": 9.791590073924086e-06, "loss": 0.7412, "step": 1886 }, { "epoch": 0.12, "grad_norm": 0.9101889729499817, "learning_rate": 9.79129684289537e-06, "loss": 0.6269, "step": 1887 }, { "epoch": 0.12, "grad_norm": 0.8851762413978577, "learning_rate": 9.791003410121815e-06, "loss": 0.6335, "step": 1888 }, { "epoch": 0.12, "grad_norm": 0.9171597361564636, "learning_rate": 9.79070977561577e-06, "loss": 0.6272, "step": 1889 }, { "epoch": 0.12, "grad_norm": 0.8824100494384766, "learning_rate": 9.790415939389604e-06, "loss": 0.6263, "step": 1890 }, { "epoch": 0.12, "grad_norm": 0.8969504237174988, "learning_rate": 9.790121901455687e-06, "loss": 0.6059, "step": 1891 }, { "epoch": 0.12, "grad_norm": 0.9351462125778198, "learning_rate": 9.7898276618264e-06, "loss": 0.5978, "step": 1892 }, { "epoch": 0.12, "grad_norm": 0.8654520511627197, "learning_rate": 9.789533220514132e-06, "loss": 0.5934, "step": 1893 }, { "epoch": 0.12, "grad_norm": 0.9118187427520752, "learning_rate": 9.789238577531284e-06, "loss": 0.6832, "step": 1894 }, { "epoch": 0.12, "grad_norm": 0.9300076365470886, "learning_rate": 9.788943732890258e-06, "loss": 0.5968, "step": 1895 }, { "epoch": 0.12, "grad_norm": 0.9657106995582581, "learning_rate": 9.788648686603472e-06, "loss": 0.6519, "step": 1896 }, { "epoch": 0.12, "grad_norm": 0.9614534378051758, "learning_rate": 9.788353438683346e-06, "loss": 0.6579, "step": 1897 }, { "epoch": 0.12, "grad_norm": 0.9790334701538086, "learning_rate": 9.788057989142317e-06, "loss": 0.6839, "step": 1898 }, { "epoch": 0.12, "grad_norm": 0.8710220456123352, "learning_rate": 9.787762337992821e-06, "loss": 0.6316, "step": 1899 }, { "epoch": 0.12, "grad_norm": 0.9904646873474121, "learning_rate": 9.78746648524731e-06, "loss": 0.6722, "step": 1900 }, { "epoch": 0.12, "grad_norm": 0.858887255191803, "learning_rate": 9.787170430918239e-06, "loss": 0.6155, "step": 1901 }, { "epoch": 0.12, "grad_norm": 0.9751510620117188, "learning_rate": 9.786874175018073e-06, "loss": 0.6484, "step": 1902 }, { "epoch": 0.12, "grad_norm": 0.9207701086997986, "learning_rate": 9.78657771755929e-06, "loss": 0.6447, "step": 1903 }, { "epoch": 0.12, "grad_norm": 0.8809829354286194, "learning_rate": 9.786281058554369e-06, "loss": 0.6035, "step": 1904 }, { "epoch": 0.12, "grad_norm": 0.8563817143440247, "learning_rate": 9.785984198015804e-06, "loss": 0.5981, "step": 1905 }, { "epoch": 0.12, "grad_norm": 0.9085856676101685, "learning_rate": 9.785687135956092e-06, "loss": 0.6318, "step": 1906 }, { "epoch": 0.12, "grad_norm": 0.9032816290855408, "learning_rate": 9.785389872387745e-06, "loss": 0.5691, "step": 1907 }, { "epoch": 0.12, "grad_norm": 0.9462535977363586, "learning_rate": 9.785092407323276e-06, "loss": 0.6807, "step": 1908 }, { "epoch": 0.12, "grad_norm": 0.9293099641799927, "learning_rate": 9.784794740775212e-06, "loss": 0.663, "step": 1909 }, { "epoch": 0.12, "grad_norm": 0.9306168556213379, "learning_rate": 9.784496872756086e-06, "loss": 0.6242, "step": 1910 }, { "epoch": 0.12, "grad_norm": 0.9209849834442139, "learning_rate": 9.784198803278442e-06, "loss": 0.6387, "step": 1911 }, { "epoch": 0.12, "grad_norm": 0.8757005929946899, "learning_rate": 9.78390053235483e-06, "loss": 0.6034, "step": 1912 }, { "epoch": 0.12, "grad_norm": 0.9847443699836731, "learning_rate": 9.783602059997808e-06, "loss": 0.6675, "step": 1913 }, { "epoch": 0.12, "grad_norm": 0.9457899928092957, "learning_rate": 9.783303386219942e-06, "loss": 0.6446, "step": 1914 }, { "epoch": 0.12, "grad_norm": 0.9456826448440552, "learning_rate": 9.783004511033814e-06, "loss": 0.6877, "step": 1915 }, { "epoch": 0.12, "grad_norm": 1.0723676681518555, "learning_rate": 9.782705434452002e-06, "loss": 0.6977, "step": 1916 }, { "epoch": 0.12, "grad_norm": 0.9328003525733948, "learning_rate": 9.782406156487104e-06, "loss": 0.6618, "step": 1917 }, { "epoch": 0.12, "grad_norm": 1.0295826196670532, "learning_rate": 9.782106677151717e-06, "loss": 0.674, "step": 1918 }, { "epoch": 0.12, "grad_norm": 0.919144868850708, "learning_rate": 9.781806996458456e-06, "loss": 0.6598, "step": 1919 }, { "epoch": 0.12, "grad_norm": 0.8945218324661255, "learning_rate": 9.781507114419937e-06, "loss": 0.6114, "step": 1920 }, { "epoch": 0.12, "grad_norm": 0.9285868406295776, "learning_rate": 9.781207031048785e-06, "loss": 0.6274, "step": 1921 }, { "epoch": 0.12, "grad_norm": 0.974398136138916, "learning_rate": 9.78090674635764e-06, "loss": 0.631, "step": 1922 }, { "epoch": 0.12, "grad_norm": 0.8931386470794678, "learning_rate": 9.780606260359141e-06, "loss": 0.6597, "step": 1923 }, { "epoch": 0.12, "grad_norm": 1.076026201248169, "learning_rate": 9.780305573065945e-06, "loss": 0.7182, "step": 1924 }, { "epoch": 0.12, "grad_norm": 0.8796352744102478, "learning_rate": 9.78000468449071e-06, "loss": 0.6023, "step": 1925 }, { "epoch": 0.12, "grad_norm": 0.9814146161079407, "learning_rate": 9.779703594646106e-06, "loss": 0.7202, "step": 1926 }, { "epoch": 0.12, "grad_norm": 0.9146006107330322, "learning_rate": 9.779402303544811e-06, "loss": 0.6295, "step": 1927 }, { "epoch": 0.12, "grad_norm": 0.9009500741958618, "learning_rate": 9.77910081119951e-06, "loss": 0.5973, "step": 1928 }, { "epoch": 0.12, "grad_norm": 1.0086877346038818, "learning_rate": 9.7787991176229e-06, "loss": 0.6795, "step": 1929 }, { "epoch": 0.12, "grad_norm": 0.8697808384895325, "learning_rate": 9.778497222827685e-06, "loss": 0.5967, "step": 1930 }, { "epoch": 0.12, "grad_norm": 0.8777212500572205, "learning_rate": 9.778195126826574e-06, "loss": 0.6801, "step": 1931 }, { "epoch": 0.12, "grad_norm": 0.9132078289985657, "learning_rate": 9.777892829632288e-06, "loss": 0.6482, "step": 1932 }, { "epoch": 0.12, "grad_norm": 0.9318856000900269, "learning_rate": 9.777590331257557e-06, "loss": 0.7193, "step": 1933 }, { "epoch": 0.12, "grad_norm": 0.9304954409599304, "learning_rate": 9.777287631715117e-06, "loss": 0.5924, "step": 1934 }, { "epoch": 0.12, "grad_norm": 0.9359629154205322, "learning_rate": 9.776984731017714e-06, "loss": 0.7213, "step": 1935 }, { "epoch": 0.12, "grad_norm": 0.8828993439674377, "learning_rate": 9.7766816291781e-06, "loss": 0.6008, "step": 1936 }, { "epoch": 0.12, "grad_norm": 0.9245378375053406, "learning_rate": 9.77637832620904e-06, "loss": 0.6614, "step": 1937 }, { "epoch": 0.12, "grad_norm": 0.9555390477180481, "learning_rate": 9.776074822123306e-06, "loss": 0.6417, "step": 1938 }, { "epoch": 0.12, "grad_norm": 0.9190395474433899, "learning_rate": 9.775771116933674e-06, "loss": 0.6063, "step": 1939 }, { "epoch": 0.12, "grad_norm": 0.9256815314292908, "learning_rate": 9.775467210652936e-06, "loss": 0.6503, "step": 1940 }, { "epoch": 0.12, "grad_norm": 0.8923386335372925, "learning_rate": 9.775163103293885e-06, "loss": 0.6111, "step": 1941 }, { "epoch": 0.12, "grad_norm": 0.9070592522621155, "learning_rate": 9.774858794869328e-06, "loss": 0.6668, "step": 1942 }, { "epoch": 0.12, "grad_norm": 0.8983462452888489, "learning_rate": 9.774554285392078e-06, "loss": 0.6129, "step": 1943 }, { "epoch": 0.12, "grad_norm": 0.8370616436004639, "learning_rate": 9.774249574874957e-06, "loss": 0.6213, "step": 1944 }, { "epoch": 0.12, "grad_norm": 0.8787031769752502, "learning_rate": 9.773944663330793e-06, "loss": 0.6145, "step": 1945 }, { "epoch": 0.12, "grad_norm": 0.9314898252487183, "learning_rate": 9.773639550772428e-06, "loss": 0.6159, "step": 1946 }, { "epoch": 0.12, "grad_norm": 0.9421966671943665, "learning_rate": 9.773334237212707e-06, "loss": 0.6402, "step": 1947 }, { "epoch": 0.12, "grad_norm": 0.9963151812553406, "learning_rate": 9.773028722664486e-06, "loss": 0.6342, "step": 1948 }, { "epoch": 0.12, "grad_norm": 0.8582517504692078, "learning_rate": 9.77272300714063e-06, "loss": 0.6762, "step": 1949 }, { "epoch": 0.12, "grad_norm": 0.905519425868988, "learning_rate": 9.77241709065401e-06, "loss": 0.6098, "step": 1950 }, { "epoch": 0.12, "grad_norm": 0.9416316151618958, "learning_rate": 9.772110973217512e-06, "loss": 0.6413, "step": 1951 }, { "epoch": 0.12, "grad_norm": 0.9484925270080566, "learning_rate": 9.77180465484402e-06, "loss": 0.6415, "step": 1952 }, { "epoch": 0.12, "grad_norm": 0.8854299187660217, "learning_rate": 9.771498135546433e-06, "loss": 0.6387, "step": 1953 }, { "epoch": 0.12, "grad_norm": 0.896232545375824, "learning_rate": 9.77119141533766e-06, "loss": 0.611, "step": 1954 }, { "epoch": 0.12, "grad_norm": 0.9634320735931396, "learning_rate": 9.770884494230614e-06, "loss": 0.6216, "step": 1955 }, { "epoch": 0.12, "grad_norm": 0.9145449995994568, "learning_rate": 9.770577372238217e-06, "loss": 0.5922, "step": 1956 }, { "epoch": 0.12, "grad_norm": 0.9219470620155334, "learning_rate": 9.770270049373403e-06, "loss": 0.6517, "step": 1957 }, { "epoch": 0.12, "grad_norm": 0.8602051734924316, "learning_rate": 9.769962525649112e-06, "loss": 0.57, "step": 1958 }, { "epoch": 0.12, "grad_norm": 0.9885112643241882, "learning_rate": 9.769654801078294e-06, "loss": 0.6788, "step": 1959 }, { "epoch": 0.12, "grad_norm": 0.8877094984054565, "learning_rate": 9.769346875673903e-06, "loss": 0.602, "step": 1960 }, { "epoch": 0.12, "grad_norm": 0.9231418967247009, "learning_rate": 9.769038749448907e-06, "loss": 0.6285, "step": 1961 }, { "epoch": 0.12, "grad_norm": 0.9032172560691833, "learning_rate": 9.76873042241628e-06, "loss": 0.6017, "step": 1962 }, { "epoch": 0.12, "grad_norm": 0.9338173866271973, "learning_rate": 9.768421894589003e-06, "loss": 0.6577, "step": 1963 }, { "epoch": 0.12, "grad_norm": 0.99520343542099, "learning_rate": 9.76811316598007e-06, "loss": 0.6539, "step": 1964 }, { "epoch": 0.12, "grad_norm": 0.8935354351997375, "learning_rate": 9.767804236602476e-06, "loss": 0.6299, "step": 1965 }, { "epoch": 0.12, "grad_norm": 0.8823718428611755, "learning_rate": 9.767495106469233e-06, "loss": 0.6356, "step": 1966 }, { "epoch": 0.12, "grad_norm": 0.9498067498207092, "learning_rate": 9.767185775593356e-06, "loss": 0.6466, "step": 1967 }, { "epoch": 0.12, "grad_norm": 0.9748334884643555, "learning_rate": 9.76687624398787e-06, "loss": 0.6011, "step": 1968 }, { "epoch": 0.12, "grad_norm": 0.9265943765640259, "learning_rate": 9.766566511665808e-06, "loss": 0.6582, "step": 1969 }, { "epoch": 0.12, "grad_norm": 0.8861657381057739, "learning_rate": 9.766256578640212e-06, "loss": 0.6416, "step": 1970 }, { "epoch": 0.12, "grad_norm": 0.9129331707954407, "learning_rate": 9.76594644492413e-06, "loss": 0.6252, "step": 1971 }, { "epoch": 0.12, "grad_norm": 0.9186064004898071, "learning_rate": 9.765636110530626e-06, "loss": 0.6536, "step": 1972 }, { "epoch": 0.12, "grad_norm": 1.0016237497329712, "learning_rate": 9.765325575472761e-06, "loss": 0.6365, "step": 1973 }, { "epoch": 0.13, "grad_norm": 0.9075548052787781, "learning_rate": 9.765014839763616e-06, "loss": 0.6497, "step": 1974 }, { "epoch": 0.13, "grad_norm": 0.9337440729141235, "learning_rate": 9.764703903416271e-06, "loss": 0.6143, "step": 1975 }, { "epoch": 0.13, "grad_norm": 0.9033395648002625, "learning_rate": 9.76439276644382e-06, "loss": 0.6251, "step": 1976 }, { "epoch": 0.13, "grad_norm": 0.9360528588294983, "learning_rate": 9.764081428859363e-06, "loss": 0.6793, "step": 1977 }, { "epoch": 0.13, "grad_norm": 0.9267653226852417, "learning_rate": 9.763769890676011e-06, "loss": 0.6137, "step": 1978 }, { "epoch": 0.13, "grad_norm": 0.9287899136543274, "learning_rate": 9.76345815190688e-06, "loss": 0.6879, "step": 1979 }, { "epoch": 0.13, "grad_norm": 0.9556732773780823, "learning_rate": 9.763146212565097e-06, "loss": 0.6544, "step": 1980 }, { "epoch": 0.13, "grad_norm": 0.9532358646392822, "learning_rate": 9.762834072663798e-06, "loss": 0.629, "step": 1981 }, { "epoch": 0.13, "grad_norm": 0.8710858821868896, "learning_rate": 9.762521732216124e-06, "loss": 0.6043, "step": 1982 }, { "epoch": 0.13, "grad_norm": 0.9163749814033508, "learning_rate": 9.762209191235227e-06, "loss": 0.7024, "step": 1983 }, { "epoch": 0.13, "grad_norm": 0.9370541572570801, "learning_rate": 9.761896449734269e-06, "loss": 0.6327, "step": 1984 }, { "epoch": 0.13, "grad_norm": 0.9257699251174927, "learning_rate": 9.761583507726416e-06, "loss": 0.6479, "step": 1985 }, { "epoch": 0.13, "grad_norm": 0.9155780673027039, "learning_rate": 9.761270365224846e-06, "loss": 0.6547, "step": 1986 }, { "epoch": 0.13, "grad_norm": 0.8561526536941528, "learning_rate": 9.760957022242746e-06, "loss": 0.565, "step": 1987 }, { "epoch": 0.13, "grad_norm": 1.0075304508209229, "learning_rate": 9.760643478793305e-06, "loss": 0.6894, "step": 1988 }, { "epoch": 0.13, "grad_norm": 0.9076879620552063, "learning_rate": 9.760329734889729e-06, "loss": 0.6435, "step": 1989 }, { "epoch": 0.13, "grad_norm": 0.9092305302619934, "learning_rate": 9.760015790545227e-06, "loss": 0.6086, "step": 1990 }, { "epoch": 0.13, "grad_norm": 0.9335655570030212, "learning_rate": 9.759701645773022e-06, "loss": 0.6191, "step": 1991 }, { "epoch": 0.13, "grad_norm": 0.9743184447288513, "learning_rate": 9.759387300586336e-06, "loss": 0.6072, "step": 1992 }, { "epoch": 0.13, "grad_norm": 0.7978373169898987, "learning_rate": 9.759072754998407e-06, "loss": 0.558, "step": 1993 }, { "epoch": 0.13, "grad_norm": 0.9291953444480896, "learning_rate": 9.758758009022482e-06, "loss": 0.6755, "step": 1994 }, { "epoch": 0.13, "grad_norm": 0.8679872751235962, "learning_rate": 9.758443062671809e-06, "loss": 0.657, "step": 1995 }, { "epoch": 0.13, "grad_norm": 0.9098535180091858, "learning_rate": 9.758127915959655e-06, "loss": 0.6372, "step": 1996 }, { "epoch": 0.13, "grad_norm": 0.899311363697052, "learning_rate": 9.757812568899285e-06, "loss": 0.6732, "step": 1997 }, { "epoch": 0.13, "grad_norm": 0.8860989809036255, "learning_rate": 9.75749702150398e-06, "loss": 0.6065, "step": 1998 }, { "epoch": 0.13, "grad_norm": 0.9011684656143188, "learning_rate": 9.757181273787024e-06, "loss": 0.6352, "step": 1999 }, { "epoch": 0.13, "grad_norm": 0.8687819242477417, "learning_rate": 9.756865325761715e-06, "loss": 0.6535, "step": 2000 }, { "epoch": 0.13, "grad_norm": 0.9379962682723999, "learning_rate": 9.756549177441354e-06, "loss": 0.602, "step": 2001 }, { "epoch": 0.13, "grad_norm": 0.9150758385658264, "learning_rate": 9.756232828839256e-06, "loss": 0.621, "step": 2002 }, { "epoch": 0.13, "grad_norm": 0.9422044157981873, "learning_rate": 9.755916279968738e-06, "loss": 0.6499, "step": 2003 }, { "epoch": 0.13, "grad_norm": 0.906806230545044, "learning_rate": 9.75559953084313e-06, "loss": 0.6025, "step": 2004 }, { "epoch": 0.13, "grad_norm": 0.9322741627693176, "learning_rate": 9.755282581475769e-06, "loss": 0.6008, "step": 2005 }, { "epoch": 0.13, "grad_norm": 0.8540508151054382, "learning_rate": 9.75496543188e-06, "loss": 0.6156, "step": 2006 }, { "epoch": 0.13, "grad_norm": 0.8853635191917419, "learning_rate": 9.754648082069181e-06, "loss": 0.5934, "step": 2007 }, { "epoch": 0.13, "grad_norm": 0.9560227990150452, "learning_rate": 9.75433053205667e-06, "loss": 0.6117, "step": 2008 }, { "epoch": 0.13, "grad_norm": 0.934593915939331, "learning_rate": 9.754012781855837e-06, "loss": 0.6571, "step": 2009 }, { "epoch": 0.13, "grad_norm": 0.9356120824813843, "learning_rate": 9.753694831480067e-06, "loss": 0.6051, "step": 2010 }, { "epoch": 0.13, "grad_norm": 0.9165395498275757, "learning_rate": 9.753376680942744e-06, "loss": 0.6509, "step": 2011 }, { "epoch": 0.13, "grad_norm": 0.907821536064148, "learning_rate": 9.753058330257263e-06, "loss": 0.6233, "step": 2012 }, { "epoch": 0.13, "grad_norm": 0.924810528755188, "learning_rate": 9.752739779437032e-06, "loss": 0.6273, "step": 2013 }, { "epoch": 0.13, "grad_norm": 0.9630839824676514, "learning_rate": 9.752421028495461e-06, "loss": 0.6812, "step": 2014 }, { "epoch": 0.13, "grad_norm": 0.9571135640144348, "learning_rate": 9.752102077445974e-06, "loss": 0.6364, "step": 2015 }, { "epoch": 0.13, "grad_norm": 0.9181431531906128, "learning_rate": 9.751782926302e-06, "loss": 0.6296, "step": 2016 }, { "epoch": 0.13, "grad_norm": 0.9492517113685608, "learning_rate": 9.751463575076977e-06, "loss": 0.6416, "step": 2017 }, { "epoch": 0.13, "grad_norm": 0.9193875789642334, "learning_rate": 9.75114402378435e-06, "loss": 0.6329, "step": 2018 }, { "epoch": 0.13, "grad_norm": 1.0199710130691528, "learning_rate": 9.75082427243758e-06, "loss": 0.6989, "step": 2019 }, { "epoch": 0.13, "grad_norm": 1.0100188255310059, "learning_rate": 9.750504321050126e-06, "loss": 0.6863, "step": 2020 }, { "epoch": 0.13, "grad_norm": 0.957966685295105, "learning_rate": 9.75018416963546e-06, "loss": 0.6497, "step": 2021 }, { "epoch": 0.13, "grad_norm": 0.9822169542312622, "learning_rate": 9.749863818207061e-06, "loss": 0.6548, "step": 2022 }, { "epoch": 0.13, "grad_norm": 0.9881288409233093, "learning_rate": 9.749543266778424e-06, "loss": 0.6411, "step": 2023 }, { "epoch": 0.13, "grad_norm": 0.9414603114128113, "learning_rate": 9.749222515363041e-06, "loss": 0.6502, "step": 2024 }, { "epoch": 0.13, "grad_norm": 0.935268223285675, "learning_rate": 9.748901563974418e-06, "loss": 0.6292, "step": 2025 }, { "epoch": 0.13, "grad_norm": 0.9612113833427429, "learning_rate": 9.748580412626072e-06, "loss": 0.6809, "step": 2026 }, { "epoch": 0.13, "grad_norm": 0.9209766983985901, "learning_rate": 9.748259061331524e-06, "loss": 0.6361, "step": 2027 }, { "epoch": 0.13, "grad_norm": 0.9090907573699951, "learning_rate": 9.747937510104305e-06, "loss": 0.6418, "step": 2028 }, { "epoch": 0.13, "grad_norm": 1.0263921022415161, "learning_rate": 9.74761575895795e-06, "loss": 0.6523, "step": 2029 }, { "epoch": 0.13, "grad_norm": 0.9746382236480713, "learning_rate": 9.747293807906017e-06, "loss": 0.6635, "step": 2030 }, { "epoch": 0.13, "grad_norm": 0.8786625266075134, "learning_rate": 9.746971656962053e-06, "loss": 0.6328, "step": 2031 }, { "epoch": 0.13, "grad_norm": 0.9323434233665466, "learning_rate": 9.746649306139627e-06, "loss": 0.6359, "step": 2032 }, { "epoch": 0.13, "grad_norm": 0.9723234176635742, "learning_rate": 9.74632675545231e-06, "loss": 0.6284, "step": 2033 }, { "epoch": 0.13, "grad_norm": 0.9053655862808228, "learning_rate": 9.746004004913688e-06, "loss": 0.6266, "step": 2034 }, { "epoch": 0.13, "grad_norm": 0.835522472858429, "learning_rate": 9.745681054537345e-06, "loss": 0.5554, "step": 2035 }, { "epoch": 0.13, "grad_norm": 0.9256971478462219, "learning_rate": 9.745357904336882e-06, "loss": 0.6404, "step": 2036 }, { "epoch": 0.13, "grad_norm": 0.9099552035331726, "learning_rate": 9.745034554325905e-06, "loss": 0.6096, "step": 2037 }, { "epoch": 0.13, "grad_norm": 0.9474760293960571, "learning_rate": 9.74471100451803e-06, "loss": 0.6383, "step": 2038 }, { "epoch": 0.13, "grad_norm": 0.8943539261817932, "learning_rate": 9.744387254926882e-06, "loss": 0.6159, "step": 2039 }, { "epoch": 0.13, "grad_norm": 0.9860721230506897, "learning_rate": 9.74406330556609e-06, "loss": 0.6816, "step": 2040 }, { "epoch": 0.13, "grad_norm": 0.9628056883811951, "learning_rate": 9.743739156449294e-06, "loss": 0.6745, "step": 2041 }, { "epoch": 0.13, "grad_norm": 0.8961864709854126, "learning_rate": 9.743414807590145e-06, "loss": 0.6283, "step": 2042 }, { "epoch": 0.13, "grad_norm": 0.9150246381759644, "learning_rate": 9.743090259002302e-06, "loss": 0.6118, "step": 2043 }, { "epoch": 0.13, "grad_norm": 0.9093335270881653, "learning_rate": 9.742765510699425e-06, "loss": 0.6072, "step": 2044 }, { "epoch": 0.13, "grad_norm": 0.9687802195549011, "learning_rate": 9.742440562695194e-06, "loss": 0.6055, "step": 2045 }, { "epoch": 0.13, "grad_norm": 0.9072078466415405, "learning_rate": 9.742115415003288e-06, "loss": 0.6052, "step": 2046 }, { "epoch": 0.13, "grad_norm": 0.9311554431915283, "learning_rate": 9.741790067637398e-06, "loss": 0.7248, "step": 2047 }, { "epoch": 0.13, "grad_norm": 0.9264607429504395, "learning_rate": 9.741464520611223e-06, "loss": 0.6605, "step": 2048 }, { "epoch": 0.13, "grad_norm": 0.9030102491378784, "learning_rate": 9.741138773938472e-06, "loss": 0.605, "step": 2049 }, { "epoch": 0.13, "grad_norm": 0.9147754907608032, "learning_rate": 9.74081282763286e-06, "loss": 0.6209, "step": 2050 }, { "epoch": 0.13, "grad_norm": 0.9356055855751038, "learning_rate": 9.740486681708114e-06, "loss": 0.6877, "step": 2051 }, { "epoch": 0.13, "grad_norm": 0.9533680081367493, "learning_rate": 9.740160336177962e-06, "loss": 0.677, "step": 2052 }, { "epoch": 0.13, "grad_norm": 0.9570329189300537, "learning_rate": 9.73983379105615e-06, "loss": 0.6264, "step": 2053 }, { "epoch": 0.13, "grad_norm": 0.8931095600128174, "learning_rate": 9.739507046356424e-06, "loss": 0.65, "step": 2054 }, { "epoch": 0.13, "grad_norm": 0.9158161282539368, "learning_rate": 9.739180102092544e-06, "loss": 0.6391, "step": 2055 }, { "epoch": 0.13, "grad_norm": 0.9462281465530396, "learning_rate": 9.738852958278278e-06, "loss": 0.6517, "step": 2056 }, { "epoch": 0.13, "grad_norm": 0.9567736983299255, "learning_rate": 9.738525614927399e-06, "loss": 0.6521, "step": 2057 }, { "epoch": 0.13, "grad_norm": 0.8848094344139099, "learning_rate": 9.73819807205369e-06, "loss": 0.5847, "step": 2058 }, { "epoch": 0.13, "grad_norm": 0.9664223194122314, "learning_rate": 9.737870329670942e-06, "loss": 0.6546, "step": 2059 }, { "epoch": 0.13, "grad_norm": 1.0148460865020752, "learning_rate": 9.737542387792957e-06, "loss": 0.6764, "step": 2060 }, { "epoch": 0.13, "grad_norm": 0.9341017007827759, "learning_rate": 9.737214246433544e-06, "loss": 0.6472, "step": 2061 }, { "epoch": 0.13, "grad_norm": 0.9515483975410461, "learning_rate": 9.736885905606516e-06, "loss": 0.6375, "step": 2062 }, { "epoch": 0.13, "grad_norm": 0.914443850517273, "learning_rate": 9.736557365325703e-06, "loss": 0.6448, "step": 2063 }, { "epoch": 0.13, "grad_norm": 0.8859198689460754, "learning_rate": 9.736228625604938e-06, "loss": 0.6072, "step": 2064 }, { "epoch": 0.13, "grad_norm": 0.9599637985229492, "learning_rate": 9.735899686458059e-06, "loss": 0.6274, "step": 2065 }, { "epoch": 0.13, "grad_norm": 0.890293538570404, "learning_rate": 9.73557054789892e-06, "loss": 0.6138, "step": 2066 }, { "epoch": 0.13, "grad_norm": 0.9055455923080444, "learning_rate": 9.73524120994138e-06, "loss": 0.6443, "step": 2067 }, { "epoch": 0.13, "grad_norm": 0.9043434262275696, "learning_rate": 9.734911672599304e-06, "loss": 0.6233, "step": 2068 }, { "epoch": 0.13, "grad_norm": 0.9188245534896851, "learning_rate": 9.73458193588657e-06, "loss": 0.6832, "step": 2069 }, { "epoch": 0.13, "grad_norm": 0.928259551525116, "learning_rate": 9.734251999817061e-06, "loss": 0.6329, "step": 2070 }, { "epoch": 0.13, "grad_norm": 0.9406484365463257, "learning_rate": 9.733921864404669e-06, "loss": 0.5915, "step": 2071 }, { "epoch": 0.13, "grad_norm": 1.0036033391952515, "learning_rate": 9.733591529663295e-06, "loss": 0.6564, "step": 2072 }, { "epoch": 0.13, "grad_norm": 0.9120476245880127, "learning_rate": 9.73326099560685e-06, "loss": 0.5945, "step": 2073 }, { "epoch": 0.13, "grad_norm": 0.8819807767868042, "learning_rate": 9.732930262249249e-06, "loss": 0.5899, "step": 2074 }, { "epoch": 0.13, "grad_norm": 0.953350841999054, "learning_rate": 9.73259932960442e-06, "loss": 0.6316, "step": 2075 }, { "epoch": 0.13, "grad_norm": 0.9032095074653625, "learning_rate": 9.732268197686296e-06, "loss": 0.6505, "step": 2076 }, { "epoch": 0.13, "grad_norm": 0.9567223787307739, "learning_rate": 9.731936866508822e-06, "loss": 0.6194, "step": 2077 }, { "epoch": 0.13, "grad_norm": 0.980812132358551, "learning_rate": 9.731605336085947e-06, "loss": 0.6625, "step": 2078 }, { "epoch": 0.13, "grad_norm": 0.8719442486763, "learning_rate": 9.73127360643163e-06, "loss": 0.6017, "step": 2079 }, { "epoch": 0.13, "grad_norm": 0.9464851021766663, "learning_rate": 9.730941677559843e-06, "loss": 0.6396, "step": 2080 }, { "epoch": 0.13, "grad_norm": 0.9146105051040649, "learning_rate": 9.730609549484558e-06, "loss": 0.672, "step": 2081 }, { "epoch": 0.13, "grad_norm": 0.8443781733512878, "learning_rate": 9.730277222219762e-06, "loss": 0.5547, "step": 2082 }, { "epoch": 0.13, "grad_norm": 0.9289159774780273, "learning_rate": 9.729944695779448e-06, "loss": 0.6482, "step": 2083 }, { "epoch": 0.13, "grad_norm": 0.9049432873725891, "learning_rate": 9.729611970177615e-06, "loss": 0.5846, "step": 2084 }, { "epoch": 0.13, "grad_norm": 0.9440188407897949, "learning_rate": 9.729279045428277e-06, "loss": 0.6773, "step": 2085 }, { "epoch": 0.13, "grad_norm": 0.8901217579841614, "learning_rate": 9.72894592154545e-06, "loss": 0.6139, "step": 2086 }, { "epoch": 0.13, "grad_norm": 0.9457870125770569, "learning_rate": 9.72861259854316e-06, "loss": 0.5882, "step": 2087 }, { "epoch": 0.13, "grad_norm": 0.8994535207748413, "learning_rate": 9.728279076435446e-06, "loss": 0.5914, "step": 2088 }, { "epoch": 0.13, "grad_norm": 0.9447741508483887, "learning_rate": 9.727945355236345e-06, "loss": 0.5963, "step": 2089 }, { "epoch": 0.13, "grad_norm": 0.9336423277854919, "learning_rate": 9.727611434959914e-06, "loss": 0.6375, "step": 2090 }, { "epoch": 0.13, "grad_norm": 0.9569306969642639, "learning_rate": 9.727277315620212e-06, "loss": 0.6664, "step": 2091 }, { "epoch": 0.13, "grad_norm": 0.8650494813919067, "learning_rate": 9.726942997231308e-06, "loss": 0.6182, "step": 2092 }, { "epoch": 0.13, "grad_norm": 0.8966164588928223, "learning_rate": 9.726608479807278e-06, "loss": 0.6107, "step": 2093 }, { "epoch": 0.13, "grad_norm": 0.9104940295219421, "learning_rate": 9.726273763362206e-06, "loss": 0.6679, "step": 2094 }, { "epoch": 0.13, "grad_norm": 0.9482932686805725, "learning_rate": 9.725938847910187e-06, "loss": 0.65, "step": 2095 }, { "epoch": 0.13, "grad_norm": 0.8956882357597351, "learning_rate": 9.725603733465325e-06, "loss": 0.6163, "step": 2096 }, { "epoch": 0.13, "grad_norm": 0.9436829686164856, "learning_rate": 9.725268420041728e-06, "loss": 0.6822, "step": 2097 }, { "epoch": 0.13, "grad_norm": 0.9164643883705139, "learning_rate": 9.724932907653516e-06, "loss": 0.6358, "step": 2098 }, { "epoch": 0.13, "grad_norm": 0.8321818113327026, "learning_rate": 9.724597196314817e-06, "loss": 0.5925, "step": 2099 }, { "epoch": 0.13, "grad_norm": 0.8644357919692993, "learning_rate": 9.724261286039766e-06, "loss": 0.6181, "step": 2100 }, { "epoch": 0.13, "grad_norm": 0.9723076820373535, "learning_rate": 9.723925176842506e-06, "loss": 0.6353, "step": 2101 }, { "epoch": 0.13, "grad_norm": 0.9631821513175964, "learning_rate": 9.72358886873719e-06, "loss": 0.6319, "step": 2102 }, { "epoch": 0.13, "grad_norm": 0.8844379782676697, "learning_rate": 9.723252361737977e-06, "loss": 0.6011, "step": 2103 }, { "epoch": 0.13, "grad_norm": 0.8965840935707092, "learning_rate": 9.722915655859042e-06, "loss": 0.6657, "step": 2104 }, { "epoch": 0.13, "grad_norm": 0.878998875617981, "learning_rate": 9.722578751114556e-06, "loss": 0.609, "step": 2105 }, { "epoch": 0.13, "grad_norm": 0.9274184107780457, "learning_rate": 9.72224164751871e-06, "loss": 0.6772, "step": 2106 }, { "epoch": 0.13, "grad_norm": 0.9545007944107056, "learning_rate": 9.721904345085692e-06, "loss": 0.7068, "step": 2107 }, { "epoch": 0.13, "grad_norm": 0.8995804786682129, "learning_rate": 9.72156684382971e-06, "loss": 0.594, "step": 2108 }, { "epoch": 0.13, "grad_norm": 0.8433576822280884, "learning_rate": 9.721229143764975e-06, "loss": 0.5871, "step": 2109 }, { "epoch": 0.13, "grad_norm": 0.8701801300048828, "learning_rate": 9.720891244905701e-06, "loss": 0.6579, "step": 2110 }, { "epoch": 0.13, "grad_norm": 0.9737119078636169, "learning_rate": 9.720553147266123e-06, "loss": 0.6812, "step": 2111 }, { "epoch": 0.13, "grad_norm": 0.9231463074684143, "learning_rate": 9.720214850860473e-06, "loss": 0.6731, "step": 2112 }, { "epoch": 0.13, "grad_norm": 0.9718281030654907, "learning_rate": 9.719876355702993e-06, "loss": 0.6299, "step": 2113 }, { "epoch": 0.13, "grad_norm": 0.9555742144584656, "learning_rate": 9.719537661807942e-06, "loss": 0.6401, "step": 2114 }, { "epoch": 0.13, "grad_norm": 0.9553225636482239, "learning_rate": 9.719198769189577e-06, "loss": 0.5957, "step": 2115 }, { "epoch": 0.13, "grad_norm": 0.9114801287651062, "learning_rate": 9.718859677862169e-06, "loss": 0.6514, "step": 2116 }, { "epoch": 0.13, "grad_norm": 0.9563703536987305, "learning_rate": 9.718520387839996e-06, "loss": 0.6367, "step": 2117 }, { "epoch": 0.13, "grad_norm": 0.9876435399055481, "learning_rate": 9.718180899137344e-06, "loss": 0.6547, "step": 2118 }, { "epoch": 0.13, "grad_norm": 0.91056889295578, "learning_rate": 9.717841211768505e-06, "loss": 0.6011, "step": 2119 }, { "epoch": 0.13, "grad_norm": 0.9514434337615967, "learning_rate": 9.71750132574779e-06, "loss": 0.6384, "step": 2120 }, { "epoch": 0.13, "grad_norm": 0.97725510597229, "learning_rate": 9.717161241089501e-06, "loss": 0.6478, "step": 2121 }, { "epoch": 0.13, "grad_norm": 0.8876969814300537, "learning_rate": 9.716820957807963e-06, "loss": 0.6272, "step": 2122 }, { "epoch": 0.13, "grad_norm": 0.9173263311386108, "learning_rate": 9.716480475917504e-06, "loss": 0.6316, "step": 2123 }, { "epoch": 0.13, "grad_norm": 0.898065984249115, "learning_rate": 9.716139795432459e-06, "loss": 0.6161, "step": 2124 }, { "epoch": 0.13, "grad_norm": 0.9070072770118713, "learning_rate": 9.715798916367174e-06, "loss": 0.6324, "step": 2125 }, { "epoch": 0.13, "grad_norm": 0.871792733669281, "learning_rate": 9.715457838736e-06, "loss": 0.6324, "step": 2126 }, { "epoch": 0.13, "grad_norm": 0.8872711658477783, "learning_rate": 9.715116562553302e-06, "loss": 0.6484, "step": 2127 }, { "epoch": 0.13, "grad_norm": 0.903907299041748, "learning_rate": 9.714775087833446e-06, "loss": 0.6243, "step": 2128 }, { "epoch": 0.13, "grad_norm": 0.9561790227890015, "learning_rate": 9.714433414590816e-06, "loss": 0.6232, "step": 2129 }, { "epoch": 0.13, "grad_norm": 0.9328345656394958, "learning_rate": 9.714091542839792e-06, "loss": 0.6487, "step": 2130 }, { "epoch": 0.14, "grad_norm": 0.8860450387001038, "learning_rate": 9.713749472594773e-06, "loss": 0.6337, "step": 2131 }, { "epoch": 0.14, "grad_norm": 0.9177609086036682, "learning_rate": 9.713407203870163e-06, "loss": 0.6369, "step": 2132 }, { "epoch": 0.14, "grad_norm": 0.8619222640991211, "learning_rate": 9.713064736680372e-06, "loss": 0.5794, "step": 2133 }, { "epoch": 0.14, "grad_norm": 0.9057930707931519, "learning_rate": 9.71272207103982e-06, "loss": 0.5988, "step": 2134 }, { "epoch": 0.14, "grad_norm": 0.9182736873626709, "learning_rate": 9.712379206962936e-06, "loss": 0.7029, "step": 2135 }, { "epoch": 0.14, "grad_norm": 0.8596693277359009, "learning_rate": 9.712036144464157e-06, "loss": 0.598, "step": 2136 }, { "epoch": 0.14, "grad_norm": 0.9068416953086853, "learning_rate": 9.711692883557928e-06, "loss": 0.6706, "step": 2137 }, { "epoch": 0.14, "grad_norm": 0.9520207643508911, "learning_rate": 9.711349424258702e-06, "loss": 0.6514, "step": 2138 }, { "epoch": 0.14, "grad_norm": 0.8763338327407837, "learning_rate": 9.711005766580942e-06, "loss": 0.5938, "step": 2139 }, { "epoch": 0.14, "grad_norm": 0.9876976013183594, "learning_rate": 9.710661910539117e-06, "loss": 0.6066, "step": 2140 }, { "epoch": 0.14, "grad_norm": 0.9245547652244568, "learning_rate": 9.710317856147707e-06, "loss": 0.6224, "step": 2141 }, { "epoch": 0.14, "grad_norm": 0.8814427852630615, "learning_rate": 9.709973603421196e-06, "loss": 0.5874, "step": 2142 }, { "epoch": 0.14, "grad_norm": 0.8934566378593445, "learning_rate": 9.709629152374084e-06, "loss": 0.6272, "step": 2143 }, { "epoch": 0.14, "grad_norm": 0.8956682682037354, "learning_rate": 9.70928450302087e-06, "loss": 0.6705, "step": 2144 }, { "epoch": 0.14, "grad_norm": 0.8444738984107971, "learning_rate": 9.708939655376069e-06, "loss": 0.5762, "step": 2145 }, { "epoch": 0.14, "grad_norm": 0.9432147145271301, "learning_rate": 9.708594609454201e-06, "loss": 0.6278, "step": 2146 }, { "epoch": 0.14, "grad_norm": 0.872987687587738, "learning_rate": 9.708249365269793e-06, "loss": 0.6689, "step": 2147 }, { "epoch": 0.14, "grad_norm": 0.9760878086090088, "learning_rate": 9.707903922837382e-06, "loss": 0.665, "step": 2148 }, { "epoch": 0.14, "grad_norm": 1.0128514766693115, "learning_rate": 9.707558282171517e-06, "loss": 0.7365, "step": 2149 }, { "epoch": 0.14, "grad_norm": 0.9022131562232971, "learning_rate": 9.707212443286746e-06, "loss": 0.6428, "step": 2150 }, { "epoch": 0.14, "grad_norm": 0.9122216105461121, "learning_rate": 9.706866406197637e-06, "loss": 0.617, "step": 2151 }, { "epoch": 0.14, "grad_norm": 0.9093108773231506, "learning_rate": 9.706520170918756e-06, "loss": 0.6826, "step": 2152 }, { "epoch": 0.14, "grad_norm": 0.9202459454536438, "learning_rate": 9.706173737464683e-06, "loss": 0.5962, "step": 2153 }, { "epoch": 0.14, "grad_norm": 0.9246529936790466, "learning_rate": 9.705827105850008e-06, "loss": 0.6299, "step": 2154 }, { "epoch": 0.14, "grad_norm": 0.9624621868133545, "learning_rate": 9.705480276089323e-06, "loss": 0.5714, "step": 2155 }, { "epoch": 0.14, "grad_norm": 0.8589086532592773, "learning_rate": 9.705133248197232e-06, "loss": 0.6083, "step": 2156 }, { "epoch": 0.14, "grad_norm": 0.8764198422431946, "learning_rate": 9.704786022188346e-06, "loss": 0.6235, "step": 2157 }, { "epoch": 0.14, "grad_norm": 0.9193335771560669, "learning_rate": 9.704438598077291e-06, "loss": 0.5962, "step": 2158 }, { "epoch": 0.14, "grad_norm": 0.8846250176429749, "learning_rate": 9.70409097587869e-06, "loss": 0.6036, "step": 2159 }, { "epoch": 0.14, "grad_norm": 0.8875699043273926, "learning_rate": 9.703743155607182e-06, "loss": 0.5966, "step": 2160 }, { "epoch": 0.14, "grad_norm": 0.9193180799484253, "learning_rate": 9.703395137277414e-06, "loss": 0.6257, "step": 2161 }, { "epoch": 0.14, "grad_norm": 0.9386597275733948, "learning_rate": 9.703046920904038e-06, "loss": 0.6208, "step": 2162 }, { "epoch": 0.14, "grad_norm": 1.0000402927398682, "learning_rate": 9.702698506501717e-06, "loss": 0.6224, "step": 2163 }, { "epoch": 0.14, "grad_norm": 0.9253415465354919, "learning_rate": 9.702349894085122e-06, "loss": 0.6126, "step": 2164 }, { "epoch": 0.14, "grad_norm": 0.9393420219421387, "learning_rate": 9.702001083668931e-06, "loss": 0.6636, "step": 2165 }, { "epoch": 0.14, "grad_norm": 0.9388704299926758, "learning_rate": 9.701652075267832e-06, "loss": 0.6852, "step": 2166 }, { "epoch": 0.14, "grad_norm": 0.9847069382667542, "learning_rate": 9.701302868896518e-06, "loss": 0.6375, "step": 2167 }, { "epoch": 0.14, "grad_norm": 0.8902013301849365, "learning_rate": 9.700953464569698e-06, "loss": 0.6506, "step": 2168 }, { "epoch": 0.14, "grad_norm": 0.8558127880096436, "learning_rate": 9.700603862302078e-06, "loss": 0.6022, "step": 2169 }, { "epoch": 0.14, "grad_norm": 0.9520554542541504, "learning_rate": 9.700254062108383e-06, "loss": 0.6399, "step": 2170 }, { "epoch": 0.14, "grad_norm": 0.90887451171875, "learning_rate": 9.69990406400334e-06, "loss": 0.6224, "step": 2171 }, { "epoch": 0.14, "grad_norm": 0.9194844365119934, "learning_rate": 9.699553868001688e-06, "loss": 0.6657, "step": 2172 }, { "epoch": 0.14, "grad_norm": 0.9208309054374695, "learning_rate": 9.699203474118168e-06, "loss": 0.5959, "step": 2173 }, { "epoch": 0.14, "grad_norm": 0.9791555404663086, "learning_rate": 9.69885288236754e-06, "loss": 0.6241, "step": 2174 }, { "epoch": 0.14, "grad_norm": 0.94879150390625, "learning_rate": 9.698502092764562e-06, "loss": 0.6352, "step": 2175 }, { "epoch": 0.14, "grad_norm": 0.919954240322113, "learning_rate": 9.698151105324006e-06, "loss": 0.5973, "step": 2176 }, { "epoch": 0.14, "grad_norm": 0.863923192024231, "learning_rate": 9.697799920060651e-06, "loss": 0.6602, "step": 2177 }, { "epoch": 0.14, "grad_norm": 0.861292839050293, "learning_rate": 9.697448536989284e-06, "loss": 0.6121, "step": 2178 }, { "epoch": 0.14, "grad_norm": 0.8456393480300903, "learning_rate": 9.697096956124699e-06, "loss": 0.6424, "step": 2179 }, { "epoch": 0.14, "grad_norm": 0.9356247186660767, "learning_rate": 9.696745177481703e-06, "loss": 0.5969, "step": 2180 }, { "epoch": 0.14, "grad_norm": 0.8527323007583618, "learning_rate": 9.696393201075105e-06, "loss": 0.6084, "step": 2181 }, { "epoch": 0.14, "grad_norm": 0.8870608806610107, "learning_rate": 9.696041026919727e-06, "loss": 0.5883, "step": 2182 }, { "epoch": 0.14, "grad_norm": 0.9454874992370605, "learning_rate": 9.695688655030397e-06, "loss": 0.6827, "step": 2183 }, { "epoch": 0.14, "grad_norm": 0.9070287942886353, "learning_rate": 9.695336085421953e-06, "loss": 0.6381, "step": 2184 }, { "epoch": 0.14, "grad_norm": 0.8830955624580383, "learning_rate": 9.694983318109242e-06, "loss": 0.6082, "step": 2185 }, { "epoch": 0.14, "grad_norm": 0.8843045830726624, "learning_rate": 9.694630353107115e-06, "loss": 0.6811, "step": 2186 }, { "epoch": 0.14, "grad_norm": 0.9212061166763306, "learning_rate": 9.694277190430437e-06, "loss": 0.6432, "step": 2187 }, { "epoch": 0.14, "grad_norm": 0.8803088068962097, "learning_rate": 9.693923830094074e-06, "loss": 0.6415, "step": 2188 }, { "epoch": 0.14, "grad_norm": 0.9358056783676147, "learning_rate": 9.693570272112908e-06, "loss": 0.661, "step": 2189 }, { "epoch": 0.14, "grad_norm": 0.9013800024986267, "learning_rate": 9.693216516501827e-06, "loss": 0.6218, "step": 2190 }, { "epoch": 0.14, "grad_norm": 0.944242000579834, "learning_rate": 9.692862563275725e-06, "loss": 0.6864, "step": 2191 }, { "epoch": 0.14, "grad_norm": 0.8731442093849182, "learning_rate": 9.692508412449505e-06, "loss": 0.6566, "step": 2192 }, { "epoch": 0.14, "grad_norm": 0.9164943099021912, "learning_rate": 9.692154064038079e-06, "loss": 0.6176, "step": 2193 }, { "epoch": 0.14, "grad_norm": 0.8906972408294678, "learning_rate": 9.691799518056369e-06, "loss": 0.6736, "step": 2194 }, { "epoch": 0.14, "grad_norm": 0.927988588809967, "learning_rate": 9.691444774519302e-06, "loss": 0.6201, "step": 2195 }, { "epoch": 0.14, "grad_norm": 0.9599518775939941, "learning_rate": 9.691089833441818e-06, "loss": 0.662, "step": 2196 }, { "epoch": 0.14, "grad_norm": 0.842663049697876, "learning_rate": 9.69073469483886e-06, "loss": 0.6176, "step": 2197 }, { "epoch": 0.14, "grad_norm": 0.9649078845977783, "learning_rate": 9.690379358725379e-06, "loss": 0.6787, "step": 2198 }, { "epoch": 0.14, "grad_norm": 0.9011525511741638, "learning_rate": 9.69002382511634e-06, "loss": 0.6612, "step": 2199 }, { "epoch": 0.14, "grad_norm": 0.8732843399047852, "learning_rate": 9.689668094026716e-06, "loss": 0.6005, "step": 2200 }, { "epoch": 0.14, "grad_norm": 0.9425661563873291, "learning_rate": 9.689312165471483e-06, "loss": 0.5944, "step": 2201 }, { "epoch": 0.14, "grad_norm": 0.8813802003860474, "learning_rate": 9.688956039465626e-06, "loss": 0.6291, "step": 2202 }, { "epoch": 0.14, "grad_norm": 0.9538077712059021, "learning_rate": 9.688599716024141e-06, "loss": 0.6214, "step": 2203 }, { "epoch": 0.14, "grad_norm": 0.8900435566902161, "learning_rate": 9.688243195162033e-06, "loss": 0.62, "step": 2204 }, { "epoch": 0.14, "grad_norm": 0.8894834518432617, "learning_rate": 9.687886476894314e-06, "loss": 0.5676, "step": 2205 }, { "epoch": 0.14, "grad_norm": 1.0278310775756836, "learning_rate": 9.687529561236004e-06, "loss": 0.6704, "step": 2206 }, { "epoch": 0.14, "grad_norm": 0.9716306924819946, "learning_rate": 9.687172448202129e-06, "loss": 0.6479, "step": 2207 }, { "epoch": 0.14, "grad_norm": 0.8720564246177673, "learning_rate": 9.68681513780773e-06, "loss": 0.6445, "step": 2208 }, { "epoch": 0.14, "grad_norm": 0.9259105324745178, "learning_rate": 9.686457630067848e-06, "loss": 0.6582, "step": 2209 }, { "epoch": 0.14, "grad_norm": 0.9476026892662048, "learning_rate": 9.686099924997538e-06, "loss": 0.6086, "step": 2210 }, { "epoch": 0.14, "grad_norm": 0.8634487390518188, "learning_rate": 9.685742022611864e-06, "loss": 0.5746, "step": 2211 }, { "epoch": 0.14, "grad_norm": 0.9387729167938232, "learning_rate": 9.685383922925892e-06, "loss": 0.6432, "step": 2212 }, { "epoch": 0.14, "grad_norm": 0.897686779499054, "learning_rate": 9.685025625954703e-06, "loss": 0.6607, "step": 2213 }, { "epoch": 0.14, "grad_norm": 0.9364752769470215, "learning_rate": 9.684667131713381e-06, "loss": 0.6227, "step": 2214 }, { "epoch": 0.14, "grad_norm": 0.9455356597900391, "learning_rate": 9.684308440217026e-06, "loss": 0.6756, "step": 2215 }, { "epoch": 0.14, "grad_norm": 0.9486604928970337, "learning_rate": 9.683949551480736e-06, "loss": 0.5791, "step": 2216 }, { "epoch": 0.14, "grad_norm": 0.9534194469451904, "learning_rate": 9.683590465519625e-06, "loss": 0.6675, "step": 2217 }, { "epoch": 0.14, "grad_norm": 0.9059990048408508, "learning_rate": 9.683231182348813e-06, "loss": 0.6342, "step": 2218 }, { "epoch": 0.14, "grad_norm": 0.8662623167037964, "learning_rate": 9.682871701983428e-06, "loss": 0.6061, "step": 2219 }, { "epoch": 0.14, "grad_norm": 1.0336980819702148, "learning_rate": 9.682512024438607e-06, "loss": 0.6481, "step": 2220 }, { "epoch": 0.14, "grad_norm": 0.9470313787460327, "learning_rate": 9.682152149729491e-06, "loss": 0.679, "step": 2221 }, { "epoch": 0.14, "grad_norm": 0.9740751385688782, "learning_rate": 9.681792077871238e-06, "loss": 0.6369, "step": 2222 }, { "epoch": 0.14, "grad_norm": 0.9472583532333374, "learning_rate": 9.681431808879007e-06, "loss": 0.6351, "step": 2223 }, { "epoch": 0.14, "grad_norm": 0.9514747262001038, "learning_rate": 9.681071342767967e-06, "loss": 0.6496, "step": 2224 }, { "epoch": 0.14, "grad_norm": 0.9216861724853516, "learning_rate": 9.6807106795533e-06, "loss": 0.6447, "step": 2225 }, { "epoch": 0.14, "grad_norm": 0.9888139367103577, "learning_rate": 9.680349819250185e-06, "loss": 0.6117, "step": 2226 }, { "epoch": 0.14, "grad_norm": 0.9336743354797363, "learning_rate": 9.679988761873824e-06, "loss": 0.609, "step": 2227 }, { "epoch": 0.14, "grad_norm": 0.8719781637191772, "learning_rate": 9.679627507439416e-06, "loss": 0.6476, "step": 2228 }, { "epoch": 0.14, "grad_norm": 0.8691688179969788, "learning_rate": 9.679266055962174e-06, "loss": 0.6706, "step": 2229 }, { "epoch": 0.14, "grad_norm": 0.8492668271064758, "learning_rate": 9.678904407457314e-06, "loss": 0.621, "step": 2230 }, { "epoch": 0.14, "grad_norm": 0.9780930876731873, "learning_rate": 9.678542561940067e-06, "loss": 0.6739, "step": 2231 }, { "epoch": 0.14, "grad_norm": 0.983424186706543, "learning_rate": 9.678180519425669e-06, "loss": 0.6331, "step": 2232 }, { "epoch": 0.14, "grad_norm": 0.8755106329917908, "learning_rate": 9.677818279929363e-06, "loss": 0.5712, "step": 2233 }, { "epoch": 0.14, "grad_norm": 0.8746523857116699, "learning_rate": 9.677455843466402e-06, "loss": 0.6365, "step": 2234 }, { "epoch": 0.14, "grad_norm": 0.9087699055671692, "learning_rate": 9.677093210052048e-06, "loss": 0.5855, "step": 2235 }, { "epoch": 0.14, "grad_norm": 0.8961308598518372, "learning_rate": 9.676730379701567e-06, "loss": 0.6563, "step": 2236 }, { "epoch": 0.14, "grad_norm": 0.917649507522583, "learning_rate": 9.676367352430242e-06, "loss": 0.6651, "step": 2237 }, { "epoch": 0.14, "grad_norm": 0.8809880018234253, "learning_rate": 9.676004128253354e-06, "loss": 0.6311, "step": 2238 }, { "epoch": 0.14, "grad_norm": 0.946129322052002, "learning_rate": 9.675640707186199e-06, "loss": 0.6366, "step": 2239 }, { "epoch": 0.14, "grad_norm": 0.920985221862793, "learning_rate": 9.67527708924408e-06, "loss": 0.5958, "step": 2240 }, { "epoch": 0.14, "grad_norm": 0.8754940629005432, "learning_rate": 9.674913274442305e-06, "loss": 0.6765, "step": 2241 }, { "epoch": 0.14, "grad_norm": 0.9010186791419983, "learning_rate": 9.674549262796196e-06, "loss": 0.6457, "step": 2242 }, { "epoch": 0.14, "grad_norm": 0.8508507013320923, "learning_rate": 9.674185054321079e-06, "loss": 0.5684, "step": 2243 }, { "epoch": 0.14, "grad_norm": 0.9368433952331543, "learning_rate": 9.67382064903229e-06, "loss": 0.6338, "step": 2244 }, { "epoch": 0.14, "grad_norm": 1.0419481992721558, "learning_rate": 9.67345604694517e-06, "loss": 0.6102, "step": 2245 }, { "epoch": 0.14, "grad_norm": 0.9164296984672546, "learning_rate": 9.673091248075077e-06, "loss": 0.6279, "step": 2246 }, { "epoch": 0.14, "grad_norm": 0.9411850571632385, "learning_rate": 9.672726252437368e-06, "loss": 0.6252, "step": 2247 }, { "epoch": 0.14, "grad_norm": 0.8734287023544312, "learning_rate": 9.67236106004741e-06, "loss": 0.6179, "step": 2248 }, { "epoch": 0.14, "grad_norm": 0.8806835412979126, "learning_rate": 9.671995670920582e-06, "loss": 0.5995, "step": 2249 }, { "epoch": 0.14, "grad_norm": 0.9245673418045044, "learning_rate": 9.671630085072268e-06, "loss": 0.6239, "step": 2250 }, { "epoch": 0.14, "grad_norm": 0.941852331161499, "learning_rate": 9.671264302517864e-06, "loss": 0.656, "step": 2251 }, { "epoch": 0.14, "grad_norm": 0.8769700527191162, "learning_rate": 9.67089832327277e-06, "loss": 0.6233, "step": 2252 }, { "epoch": 0.14, "grad_norm": 0.9232833385467529, "learning_rate": 9.670532147352399e-06, "loss": 0.6819, "step": 2253 }, { "epoch": 0.14, "grad_norm": 0.9096298813819885, "learning_rate": 9.670165774772164e-06, "loss": 0.6606, "step": 2254 }, { "epoch": 0.14, "grad_norm": 0.8869082927703857, "learning_rate": 9.669799205547494e-06, "loss": 0.6617, "step": 2255 }, { "epoch": 0.14, "grad_norm": 0.8901436924934387, "learning_rate": 9.669432439693827e-06, "loss": 0.6609, "step": 2256 }, { "epoch": 0.14, "grad_norm": 0.9619342088699341, "learning_rate": 9.669065477226602e-06, "loss": 0.6758, "step": 2257 }, { "epoch": 0.14, "grad_norm": 0.9121052026748657, "learning_rate": 9.668698318161271e-06, "loss": 0.6162, "step": 2258 }, { "epoch": 0.14, "grad_norm": 0.9086534380912781, "learning_rate": 9.668330962513297e-06, "loss": 0.6098, "step": 2259 }, { "epoch": 0.14, "grad_norm": 1.0334198474884033, "learning_rate": 9.667963410298147e-06, "loss": 0.6498, "step": 2260 }, { "epoch": 0.14, "grad_norm": 0.942879319190979, "learning_rate": 9.667595661531294e-06, "loss": 0.6464, "step": 2261 }, { "epoch": 0.14, "grad_norm": 0.8824305534362793, "learning_rate": 9.667227716228228e-06, "loss": 0.6543, "step": 2262 }, { "epoch": 0.14, "grad_norm": 0.8903138041496277, "learning_rate": 9.666859574404434e-06, "loss": 0.6208, "step": 2263 }, { "epoch": 0.14, "grad_norm": 0.9194402694702148, "learning_rate": 9.666491236075423e-06, "loss": 0.6187, "step": 2264 }, { "epoch": 0.14, "grad_norm": 0.9723901152610779, "learning_rate": 9.666122701256697e-06, "loss": 0.6729, "step": 2265 }, { "epoch": 0.14, "grad_norm": 0.9405593276023865, "learning_rate": 9.665753969963779e-06, "loss": 0.6383, "step": 2266 }, { "epoch": 0.14, "grad_norm": 0.9103307127952576, "learning_rate": 9.66538504221219e-06, "loss": 0.6709, "step": 2267 }, { "epoch": 0.14, "grad_norm": 0.8941056132316589, "learning_rate": 9.665015918017467e-06, "loss": 0.6022, "step": 2268 }, { "epoch": 0.14, "grad_norm": 0.9082260727882385, "learning_rate": 9.664646597395151e-06, "loss": 0.6307, "step": 2269 }, { "epoch": 0.14, "grad_norm": 0.9310553073883057, "learning_rate": 9.664277080360796e-06, "loss": 0.6667, "step": 2270 }, { "epoch": 0.14, "grad_norm": 0.893653154373169, "learning_rate": 9.663907366929958e-06, "loss": 0.6233, "step": 2271 }, { "epoch": 0.14, "grad_norm": 0.9378598928451538, "learning_rate": 9.663537457118206e-06, "loss": 0.6922, "step": 2272 }, { "epoch": 0.14, "grad_norm": 1.0078368186950684, "learning_rate": 9.663167350941114e-06, "loss": 0.7198, "step": 2273 }, { "epoch": 0.14, "grad_norm": 0.9074714183807373, "learning_rate": 9.662797048414267e-06, "loss": 0.6979, "step": 2274 }, { "epoch": 0.14, "grad_norm": 0.9271409511566162, "learning_rate": 9.662426549553257e-06, "loss": 0.6478, "step": 2275 }, { "epoch": 0.14, "grad_norm": 0.8915387392044067, "learning_rate": 9.662055854373684e-06, "loss": 0.6721, "step": 2276 }, { "epoch": 0.14, "grad_norm": 0.8576652407646179, "learning_rate": 9.661684962891158e-06, "loss": 0.6245, "step": 2277 }, { "epoch": 0.14, "grad_norm": 0.8375203013420105, "learning_rate": 9.661313875121294e-06, "loss": 0.5757, "step": 2278 }, { "epoch": 0.14, "grad_norm": 0.9374811053276062, "learning_rate": 9.66094259107972e-06, "loss": 0.673, "step": 2279 }, { "epoch": 0.14, "grad_norm": 0.9571980834007263, "learning_rate": 9.660571110782066e-06, "loss": 0.6396, "step": 2280 }, { "epoch": 0.14, "grad_norm": 0.9160385131835938, "learning_rate": 9.660199434243977e-06, "loss": 0.6462, "step": 2281 }, { "epoch": 0.14, "grad_norm": 0.8740729689598083, "learning_rate": 9.6598275614811e-06, "loss": 0.6305, "step": 2282 }, { "epoch": 0.14, "grad_norm": 0.9242905974388123, "learning_rate": 9.659455492509096e-06, "loss": 0.6148, "step": 2283 }, { "epoch": 0.14, "grad_norm": 0.9448089599609375, "learning_rate": 9.659083227343628e-06, "loss": 0.6224, "step": 2284 }, { "epoch": 0.14, "grad_norm": 0.9231502413749695, "learning_rate": 9.658710766000375e-06, "loss": 0.6241, "step": 2285 }, { "epoch": 0.14, "grad_norm": 0.9591917991638184, "learning_rate": 9.658338108495018e-06, "loss": 0.6087, "step": 2286 }, { "epoch": 0.14, "grad_norm": 0.9254891276359558, "learning_rate": 9.65796525484325e-06, "loss": 0.6346, "step": 2287 }, { "epoch": 0.14, "grad_norm": 0.9530578255653381, "learning_rate": 9.657592205060766e-06, "loss": 0.6213, "step": 2288 }, { "epoch": 0.15, "grad_norm": 0.9184418320655823, "learning_rate": 9.657218959163278e-06, "loss": 0.5876, "step": 2289 }, { "epoch": 0.15, "grad_norm": 0.9244976043701172, "learning_rate": 9.656845517166502e-06, "loss": 0.6017, "step": 2290 }, { "epoch": 0.15, "grad_norm": 0.9175297617912292, "learning_rate": 9.656471879086158e-06, "loss": 0.6249, "step": 2291 }, { "epoch": 0.15, "grad_norm": 0.931868314743042, "learning_rate": 9.656098044937985e-06, "loss": 0.6413, "step": 2292 }, { "epoch": 0.15, "grad_norm": 0.9301477074623108, "learning_rate": 9.65572401473772e-06, "loss": 0.6678, "step": 2293 }, { "epoch": 0.15, "grad_norm": 0.8930208086967468, "learning_rate": 9.655349788501112e-06, "loss": 0.6502, "step": 2294 }, { "epoch": 0.15, "grad_norm": 0.9450199007987976, "learning_rate": 9.654975366243919e-06, "loss": 0.622, "step": 2295 }, { "epoch": 0.15, "grad_norm": 0.8430439829826355, "learning_rate": 9.654600747981908e-06, "loss": 0.5621, "step": 2296 }, { "epoch": 0.15, "grad_norm": 0.9476586580276489, "learning_rate": 9.654225933730852e-06, "loss": 0.6427, "step": 2297 }, { "epoch": 0.15, "grad_norm": 0.8823800086975098, "learning_rate": 9.653850923506532e-06, "loss": 0.6457, "step": 2298 }, { "epoch": 0.15, "grad_norm": 0.8883811235427856, "learning_rate": 9.653475717324739e-06, "loss": 0.6332, "step": 2299 }, { "epoch": 0.15, "grad_norm": 0.8883042335510254, "learning_rate": 9.65310031520127e-06, "loss": 0.6744, "step": 2300 }, { "epoch": 0.15, "grad_norm": 0.9382773041725159, "learning_rate": 9.652724717151938e-06, "loss": 0.6717, "step": 2301 }, { "epoch": 0.15, "grad_norm": 0.9416858553886414, "learning_rate": 9.652348923192551e-06, "loss": 0.648, "step": 2302 }, { "epoch": 0.15, "grad_norm": 0.8762007355690002, "learning_rate": 9.651972933338935e-06, "loss": 0.5897, "step": 2303 }, { "epoch": 0.15, "grad_norm": 0.9719755053520203, "learning_rate": 9.651596747606924e-06, "loss": 0.6991, "step": 2304 }, { "epoch": 0.15, "grad_norm": 0.9252588152885437, "learning_rate": 9.651220366012354e-06, "loss": 0.6186, "step": 2305 }, { "epoch": 0.15, "grad_norm": 0.9560814499855042, "learning_rate": 9.650843788571076e-06, "loss": 0.6411, "step": 2306 }, { "epoch": 0.15, "grad_norm": 1.036543369293213, "learning_rate": 9.650467015298943e-06, "loss": 0.6339, "step": 2307 }, { "epoch": 0.15, "grad_norm": 0.9324323534965515, "learning_rate": 9.650090046211822e-06, "loss": 0.6649, "step": 2308 }, { "epoch": 0.15, "grad_norm": 0.8707371950149536, "learning_rate": 9.649712881325587e-06, "loss": 0.5718, "step": 2309 }, { "epoch": 0.15, "grad_norm": 0.9522401690483093, "learning_rate": 9.649335520656118e-06, "loss": 0.6915, "step": 2310 }, { "epoch": 0.15, "grad_norm": 0.9509444236755371, "learning_rate": 9.648957964219303e-06, "loss": 0.6725, "step": 2311 }, { "epoch": 0.15, "grad_norm": 0.9052115678787231, "learning_rate": 9.64858021203104e-06, "loss": 0.6543, "step": 2312 }, { "epoch": 0.15, "grad_norm": 0.914665162563324, "learning_rate": 9.648202264107239e-06, "loss": 0.6265, "step": 2313 }, { "epoch": 0.15, "grad_norm": 0.895332396030426, "learning_rate": 9.647824120463806e-06, "loss": 0.6248, "step": 2314 }, { "epoch": 0.15, "grad_norm": 0.9358121752738953, "learning_rate": 9.64744578111667e-06, "loss": 0.5782, "step": 2315 }, { "epoch": 0.15, "grad_norm": 0.9630364179611206, "learning_rate": 9.647067246081761e-06, "loss": 0.6326, "step": 2316 }, { "epoch": 0.15, "grad_norm": 0.9551122784614563, "learning_rate": 9.646688515375014e-06, "loss": 0.6224, "step": 2317 }, { "epoch": 0.15, "grad_norm": 0.9448221325874329, "learning_rate": 9.646309589012379e-06, "loss": 0.6124, "step": 2318 }, { "epoch": 0.15, "grad_norm": 0.8628481030464172, "learning_rate": 9.64593046700981e-06, "loss": 0.5868, "step": 2319 }, { "epoch": 0.15, "grad_norm": 0.9186686873435974, "learning_rate": 9.645551149383272e-06, "loss": 0.6142, "step": 2320 }, { "epoch": 0.15, "grad_norm": 0.8454536199569702, "learning_rate": 9.645171636148736e-06, "loss": 0.5603, "step": 2321 }, { "epoch": 0.15, "grad_norm": 0.904983639717102, "learning_rate": 9.644791927322182e-06, "loss": 0.6052, "step": 2322 }, { "epoch": 0.15, "grad_norm": 0.9742248058319092, "learning_rate": 9.644412022919597e-06, "loss": 0.5941, "step": 2323 }, { "epoch": 0.15, "grad_norm": 0.8749731183052063, "learning_rate": 9.644031922956979e-06, "loss": 0.625, "step": 2324 }, { "epoch": 0.15, "grad_norm": 0.9458450078964233, "learning_rate": 9.64365162745033e-06, "loss": 0.6475, "step": 2325 }, { "epoch": 0.15, "grad_norm": 0.8835443258285522, "learning_rate": 9.643271136415668e-06, "loss": 0.6253, "step": 2326 }, { "epoch": 0.15, "grad_norm": 0.9501144886016846, "learning_rate": 9.642890449869008e-06, "loss": 0.6576, "step": 2327 }, { "epoch": 0.15, "grad_norm": 0.8323443531990051, "learning_rate": 9.642509567826386e-06, "loss": 0.6001, "step": 2328 }, { "epoch": 0.15, "grad_norm": 0.8868235945701599, "learning_rate": 9.642128490303834e-06, "loss": 0.5987, "step": 2329 }, { "epoch": 0.15, "grad_norm": 0.8260801434516907, "learning_rate": 9.6417472173174e-06, "loss": 0.6312, "step": 2330 }, { "epoch": 0.15, "grad_norm": 0.8221123218536377, "learning_rate": 9.64136574888314e-06, "loss": 0.6371, "step": 2331 }, { "epoch": 0.15, "grad_norm": 0.911744236946106, "learning_rate": 9.640984085017113e-06, "loss": 0.6679, "step": 2332 }, { "epoch": 0.15, "grad_norm": 0.8895740509033203, "learning_rate": 9.640602225735391e-06, "loss": 0.6627, "step": 2333 }, { "epoch": 0.15, "grad_norm": 0.8667907118797302, "learning_rate": 9.640220171054054e-06, "loss": 0.6181, "step": 2334 }, { "epoch": 0.15, "grad_norm": 0.9176861643791199, "learning_rate": 9.639837920989188e-06, "loss": 0.6174, "step": 2335 }, { "epoch": 0.15, "grad_norm": 1.0207765102386475, "learning_rate": 9.639455475556887e-06, "loss": 0.6571, "step": 2336 }, { "epoch": 0.15, "grad_norm": 0.9681141972541809, "learning_rate": 9.639072834773254e-06, "loss": 0.6719, "step": 2337 }, { "epoch": 0.15, "grad_norm": 0.8755819797515869, "learning_rate": 9.638689998654404e-06, "loss": 0.5911, "step": 2338 }, { "epoch": 0.15, "grad_norm": 0.9221803545951843, "learning_rate": 9.638306967216453e-06, "loss": 0.6486, "step": 2339 }, { "epoch": 0.15, "grad_norm": 0.8622904419898987, "learning_rate": 9.637923740475534e-06, "loss": 0.5772, "step": 2340 }, { "epoch": 0.15, "grad_norm": 0.888806164264679, "learning_rate": 9.637540318447778e-06, "loss": 0.6504, "step": 2341 }, { "epoch": 0.15, "grad_norm": 0.8896088004112244, "learning_rate": 9.637156701149333e-06, "loss": 0.6623, "step": 2342 }, { "epoch": 0.15, "grad_norm": 0.9848870635032654, "learning_rate": 9.636772888596352e-06, "loss": 0.6652, "step": 2343 }, { "epoch": 0.15, "grad_norm": 0.814385712146759, "learning_rate": 9.636388880804991e-06, "loss": 0.6293, "step": 2344 }, { "epoch": 0.15, "grad_norm": 0.9577558040618896, "learning_rate": 9.636004677791427e-06, "loss": 0.6725, "step": 2345 }, { "epoch": 0.15, "grad_norm": 0.9663403630256653, "learning_rate": 9.635620279571833e-06, "loss": 0.6702, "step": 2346 }, { "epoch": 0.15, "grad_norm": 0.9322980642318726, "learning_rate": 9.635235686162395e-06, "loss": 0.6654, "step": 2347 }, { "epoch": 0.15, "grad_norm": 0.8965892791748047, "learning_rate": 9.634850897579304e-06, "loss": 0.6208, "step": 2348 }, { "epoch": 0.15, "grad_norm": 0.858284592628479, "learning_rate": 9.63446591383877e-06, "loss": 0.6063, "step": 2349 }, { "epoch": 0.15, "grad_norm": 0.932563304901123, "learning_rate": 9.634080734956993e-06, "loss": 0.6188, "step": 2350 }, { "epoch": 0.15, "grad_norm": 0.9433985948562622, "learning_rate": 9.633695360950202e-06, "loss": 0.6515, "step": 2351 }, { "epoch": 0.15, "grad_norm": 0.9088814854621887, "learning_rate": 9.633309791834617e-06, "loss": 0.5985, "step": 2352 }, { "epoch": 0.15, "grad_norm": 0.9924407601356506, "learning_rate": 9.632924027626474e-06, "loss": 0.6527, "step": 2353 }, { "epoch": 0.15, "grad_norm": 0.989184558391571, "learning_rate": 9.632538068342018e-06, "loss": 0.6183, "step": 2354 }, { "epoch": 0.15, "grad_norm": 0.898025631904602, "learning_rate": 9.632151913997498e-06, "loss": 0.6068, "step": 2355 }, { "epoch": 0.15, "grad_norm": 0.8926374912261963, "learning_rate": 9.631765564609177e-06, "loss": 0.588, "step": 2356 }, { "epoch": 0.15, "grad_norm": 0.9426562190055847, "learning_rate": 9.63137902019332e-06, "loss": 0.6104, "step": 2357 }, { "epoch": 0.15, "grad_norm": 0.9089484810829163, "learning_rate": 9.630992280766202e-06, "loss": 0.5981, "step": 2358 }, { "epoch": 0.15, "grad_norm": 0.9309037923812866, "learning_rate": 9.630605346344113e-06, "loss": 0.6064, "step": 2359 }, { "epoch": 0.15, "grad_norm": 0.9744449257850647, "learning_rate": 9.630218216943338e-06, "loss": 0.6856, "step": 2360 }, { "epoch": 0.15, "grad_norm": 0.9766737222671509, "learning_rate": 9.629830892580183e-06, "loss": 0.669, "step": 2361 }, { "epoch": 0.15, "grad_norm": 0.9922558665275574, "learning_rate": 9.629443373270954e-06, "loss": 0.631, "step": 2362 }, { "epoch": 0.15, "grad_norm": 0.960340678691864, "learning_rate": 9.62905565903197e-06, "loss": 0.653, "step": 2363 }, { "epoch": 0.15, "grad_norm": 0.9167748689651489, "learning_rate": 9.628667749879555e-06, "loss": 0.6177, "step": 2364 }, { "epoch": 0.15, "grad_norm": 0.8925089836120605, "learning_rate": 9.628279645830044e-06, "loss": 0.6215, "step": 2365 }, { "epoch": 0.15, "grad_norm": 0.9379563331604004, "learning_rate": 9.627891346899775e-06, "loss": 0.5828, "step": 2366 }, { "epoch": 0.15, "grad_norm": 0.8987218141555786, "learning_rate": 9.627502853105104e-06, "loss": 0.6567, "step": 2367 }, { "epoch": 0.15, "grad_norm": 0.8803840279579163, "learning_rate": 9.627114164462385e-06, "loss": 0.6219, "step": 2368 }, { "epoch": 0.15, "grad_norm": 0.9460154175758362, "learning_rate": 9.626725280987985e-06, "loss": 0.6922, "step": 2369 }, { "epoch": 0.15, "grad_norm": 0.8633837103843689, "learning_rate": 9.626336202698277e-06, "loss": 0.6041, "step": 2370 }, { "epoch": 0.15, "grad_norm": 0.9062354564666748, "learning_rate": 9.625946929609647e-06, "loss": 0.6013, "step": 2371 }, { "epoch": 0.15, "grad_norm": 1.0080102682113647, "learning_rate": 9.625557461738484e-06, "loss": 0.6919, "step": 2372 }, { "epoch": 0.15, "grad_norm": 0.9922934174537659, "learning_rate": 9.625167799101188e-06, "loss": 0.6966, "step": 2373 }, { "epoch": 0.15, "grad_norm": 0.9306240081787109, "learning_rate": 9.624777941714165e-06, "loss": 0.7226, "step": 2374 }, { "epoch": 0.15, "grad_norm": 0.9547491073608398, "learning_rate": 9.624387889593832e-06, "loss": 0.6127, "step": 2375 }, { "epoch": 0.15, "grad_norm": 0.9361152052879333, "learning_rate": 9.62399764275661e-06, "loss": 0.6275, "step": 2376 }, { "epoch": 0.15, "grad_norm": 0.9301709532737732, "learning_rate": 9.623607201218934e-06, "loss": 0.6553, "step": 2377 }, { "epoch": 0.15, "grad_norm": 0.9561883807182312, "learning_rate": 9.623216564997244e-06, "loss": 0.6708, "step": 2378 }, { "epoch": 0.15, "grad_norm": 0.8827099800109863, "learning_rate": 9.622825734107987e-06, "loss": 0.6176, "step": 2379 }, { "epoch": 0.15, "grad_norm": 0.9545076489448547, "learning_rate": 9.62243470856762e-06, "loss": 0.6568, "step": 2380 }, { "epoch": 0.15, "grad_norm": 0.947793185710907, "learning_rate": 9.622043488392607e-06, "loss": 0.6247, "step": 2381 }, { "epoch": 0.15, "grad_norm": 0.8860893249511719, "learning_rate": 9.621652073599423e-06, "loss": 0.6495, "step": 2382 }, { "epoch": 0.15, "grad_norm": 0.852778434753418, "learning_rate": 9.621260464204548e-06, "loss": 0.6111, "step": 2383 }, { "epoch": 0.15, "grad_norm": 0.8790839910507202, "learning_rate": 9.620868660224468e-06, "loss": 0.6269, "step": 2384 }, { "epoch": 0.15, "grad_norm": 0.9253284931182861, "learning_rate": 9.620476661675685e-06, "loss": 0.6211, "step": 2385 }, { "epoch": 0.15, "grad_norm": 0.892335832118988, "learning_rate": 9.620084468574704e-06, "loss": 0.6312, "step": 2386 }, { "epoch": 0.15, "grad_norm": 0.9835995435714722, "learning_rate": 9.619692080938039e-06, "loss": 0.5984, "step": 2387 }, { "epoch": 0.15, "grad_norm": 0.9870280027389526, "learning_rate": 9.61929949878221e-06, "loss": 0.6646, "step": 2388 }, { "epoch": 0.15, "grad_norm": 1.0109413862228394, "learning_rate": 9.618906722123748e-06, "loss": 0.6489, "step": 2389 }, { "epoch": 0.15, "grad_norm": 0.9506871700286865, "learning_rate": 9.618513750979193e-06, "loss": 0.649, "step": 2390 }, { "epoch": 0.15, "grad_norm": 0.8704227209091187, "learning_rate": 9.61812058536509e-06, "loss": 0.5762, "step": 2391 }, { "epoch": 0.15, "grad_norm": 0.9024654626846313, "learning_rate": 9.617727225297994e-06, "loss": 0.6464, "step": 2392 }, { "epoch": 0.15, "grad_norm": 0.9265242218971252, "learning_rate": 9.617333670794468e-06, "loss": 0.627, "step": 2393 }, { "epoch": 0.15, "grad_norm": 0.8859432935714722, "learning_rate": 9.616939921871087e-06, "loss": 0.6211, "step": 2394 }, { "epoch": 0.15, "grad_norm": 0.9842885732650757, "learning_rate": 9.616545978544424e-06, "loss": 0.6308, "step": 2395 }, { "epoch": 0.15, "grad_norm": 0.8890007138252258, "learning_rate": 9.616151840831069e-06, "loss": 0.5769, "step": 2396 }, { "epoch": 0.15, "grad_norm": 0.9050889015197754, "learning_rate": 9.61575750874762e-06, "loss": 0.6224, "step": 2397 }, { "epoch": 0.15, "grad_norm": 0.8961501717567444, "learning_rate": 9.615362982310679e-06, "loss": 0.5271, "step": 2398 }, { "epoch": 0.15, "grad_norm": 0.8966047167778015, "learning_rate": 9.614968261536858e-06, "loss": 0.6134, "step": 2399 }, { "epoch": 0.15, "grad_norm": 1.0056560039520264, "learning_rate": 9.61457334644278e-06, "loss": 0.6931, "step": 2400 }, { "epoch": 0.15, "grad_norm": 0.9624162316322327, "learning_rate": 9.61417823704507e-06, "loss": 0.6242, "step": 2401 }, { "epoch": 0.15, "grad_norm": 0.9640290141105652, "learning_rate": 9.613782933360365e-06, "loss": 0.6799, "step": 2402 }, { "epoch": 0.15, "grad_norm": 0.9172433018684387, "learning_rate": 9.613387435405312e-06, "loss": 0.5416, "step": 2403 }, { "epoch": 0.15, "grad_norm": 0.965398371219635, "learning_rate": 9.612991743196562e-06, "loss": 0.6174, "step": 2404 }, { "epoch": 0.15, "grad_norm": 0.909716010093689, "learning_rate": 9.612595856750776e-06, "loss": 0.6275, "step": 2405 }, { "epoch": 0.15, "grad_norm": 0.9636967182159424, "learning_rate": 9.612199776084627e-06, "loss": 0.6389, "step": 2406 }, { "epoch": 0.15, "grad_norm": 0.8924964070320129, "learning_rate": 9.611803501214789e-06, "loss": 0.6796, "step": 2407 }, { "epoch": 0.15, "grad_norm": 0.9327677488327026, "learning_rate": 9.61140703215795e-06, "loss": 0.612, "step": 2408 }, { "epoch": 0.15, "grad_norm": 0.943336546421051, "learning_rate": 9.611010368930801e-06, "loss": 0.6227, "step": 2409 }, { "epoch": 0.15, "grad_norm": 0.9563452005386353, "learning_rate": 9.610613511550047e-06, "loss": 0.6554, "step": 2410 }, { "epoch": 0.15, "grad_norm": 0.9521295428276062, "learning_rate": 9.610216460032398e-06, "loss": 0.6661, "step": 2411 }, { "epoch": 0.15, "grad_norm": 1.0174225568771362, "learning_rate": 9.60981921439457e-06, "loss": 0.716, "step": 2412 }, { "epoch": 0.15, "grad_norm": 1.0105873346328735, "learning_rate": 9.609421774653291e-06, "loss": 0.6864, "step": 2413 }, { "epoch": 0.15, "grad_norm": 0.9458989500999451, "learning_rate": 9.609024140825299e-06, "loss": 0.5787, "step": 2414 }, { "epoch": 0.15, "grad_norm": 1.0147578716278076, "learning_rate": 9.608626312927331e-06, "loss": 0.6836, "step": 2415 }, { "epoch": 0.15, "grad_norm": 0.9052198529243469, "learning_rate": 9.608228290976143e-06, "loss": 0.6575, "step": 2416 }, { "epoch": 0.15, "grad_norm": 0.9532240629196167, "learning_rate": 9.607830074988491e-06, "loss": 0.7125, "step": 2417 }, { "epoch": 0.15, "grad_norm": 0.9613702297210693, "learning_rate": 9.607431664981144e-06, "loss": 0.6158, "step": 2418 }, { "epoch": 0.15, "grad_norm": 1.0011951923370361, "learning_rate": 9.607033060970878e-06, "loss": 0.6624, "step": 2419 }, { "epoch": 0.15, "grad_norm": 1.0187532901763916, "learning_rate": 9.606634262974477e-06, "loss": 0.6558, "step": 2420 }, { "epoch": 0.15, "grad_norm": 0.9332427382469177, "learning_rate": 9.606235271008732e-06, "loss": 0.5966, "step": 2421 }, { "epoch": 0.15, "grad_norm": 1.0140283107757568, "learning_rate": 9.605836085090445e-06, "loss": 0.6317, "step": 2422 }, { "epoch": 0.15, "grad_norm": 0.9521609544754028, "learning_rate": 9.605436705236421e-06, "loss": 0.624, "step": 2423 }, { "epoch": 0.15, "grad_norm": 0.8743317127227783, "learning_rate": 9.60503713146348e-06, "loss": 0.6424, "step": 2424 }, { "epoch": 0.15, "grad_norm": 0.9343128800392151, "learning_rate": 9.604637363788444e-06, "loss": 0.6336, "step": 2425 }, { "epoch": 0.15, "grad_norm": 0.876990795135498, "learning_rate": 9.604237402228149e-06, "loss": 0.6946, "step": 2426 }, { "epoch": 0.15, "grad_norm": 1.0633113384246826, "learning_rate": 9.603837246799431e-06, "loss": 0.6597, "step": 2427 }, { "epoch": 0.15, "grad_norm": 0.9568866491317749, "learning_rate": 9.603436897519145e-06, "loss": 0.6364, "step": 2428 }, { "epoch": 0.15, "grad_norm": 0.8877198100090027, "learning_rate": 9.603036354404145e-06, "loss": 0.6024, "step": 2429 }, { "epoch": 0.15, "grad_norm": 0.844281792640686, "learning_rate": 9.602635617471295e-06, "loss": 0.6393, "step": 2430 }, { "epoch": 0.15, "grad_norm": 0.8881232738494873, "learning_rate": 9.602234686737473e-06, "loss": 0.6738, "step": 2431 }, { "epoch": 0.15, "grad_norm": 0.8689331412315369, "learning_rate": 9.601833562219556e-06, "loss": 0.6245, "step": 2432 }, { "epoch": 0.15, "grad_norm": 0.8485287427902222, "learning_rate": 9.601432243934437e-06, "loss": 0.5738, "step": 2433 }, { "epoch": 0.15, "grad_norm": 0.910656213760376, "learning_rate": 9.601030731899014e-06, "loss": 0.6129, "step": 2434 }, { "epoch": 0.15, "grad_norm": 0.8227107524871826, "learning_rate": 9.600629026130192e-06, "loss": 0.5835, "step": 2435 }, { "epoch": 0.15, "grad_norm": 0.948371410369873, "learning_rate": 9.600227126644887e-06, "loss": 0.7007, "step": 2436 }, { "epoch": 0.15, "grad_norm": 0.8964093327522278, "learning_rate": 9.59982503346002e-06, "loss": 0.6071, "step": 2437 }, { "epoch": 0.15, "grad_norm": 0.9090175628662109, "learning_rate": 9.599422746592522e-06, "loss": 0.6698, "step": 2438 }, { "epoch": 0.15, "grad_norm": 1.0446149110794067, "learning_rate": 9.599020266059334e-06, "loss": 0.699, "step": 2439 }, { "epoch": 0.15, "grad_norm": 0.8509514331817627, "learning_rate": 9.5986175918774e-06, "loss": 0.6503, "step": 2440 }, { "epoch": 0.15, "grad_norm": 0.9461331367492676, "learning_rate": 9.598214724063678e-06, "loss": 0.6716, "step": 2441 }, { "epoch": 0.15, "grad_norm": 0.8966230750083923, "learning_rate": 9.597811662635128e-06, "loss": 0.6537, "step": 2442 }, { "epoch": 0.15, "grad_norm": 1.0068098306655884, "learning_rate": 9.597408407608725e-06, "loss": 0.6665, "step": 2443 }, { "epoch": 0.15, "grad_norm": 0.9178805351257324, "learning_rate": 9.597004959001447e-06, "loss": 0.628, "step": 2444 }, { "epoch": 0.15, "grad_norm": 0.9293497204780579, "learning_rate": 9.596601316830282e-06, "loss": 0.6272, "step": 2445 }, { "epoch": 0.15, "grad_norm": 0.9563755989074707, "learning_rate": 9.596197481112225e-06, "loss": 0.6115, "step": 2446 }, { "epoch": 0.16, "grad_norm": 0.8711754083633423, "learning_rate": 9.59579345186428e-06, "loss": 0.5987, "step": 2447 }, { "epoch": 0.16, "grad_norm": 0.9303868412971497, "learning_rate": 9.595389229103464e-06, "loss": 0.6427, "step": 2448 }, { "epoch": 0.16, "grad_norm": 0.8827221393585205, "learning_rate": 9.594984812846792e-06, "loss": 0.6017, "step": 2449 }, { "epoch": 0.16, "grad_norm": 0.9278771877288818, "learning_rate": 9.594580203111294e-06, "loss": 0.5994, "step": 2450 }, { "epoch": 0.16, "grad_norm": 0.9450991153717041, "learning_rate": 9.594175399914008e-06, "loss": 0.6128, "step": 2451 }, { "epoch": 0.16, "grad_norm": 0.9174882173538208, "learning_rate": 9.593770403271977e-06, "loss": 0.661, "step": 2452 }, { "epoch": 0.16, "grad_norm": 0.9412451982498169, "learning_rate": 9.593365213202255e-06, "loss": 0.6346, "step": 2453 }, { "epoch": 0.16, "grad_norm": 0.8439229726791382, "learning_rate": 9.592959829721903e-06, "loss": 0.6032, "step": 2454 }, { "epoch": 0.16, "grad_norm": 0.8956865072250366, "learning_rate": 9.59255425284799e-06, "loss": 0.6588, "step": 2455 }, { "epoch": 0.16, "grad_norm": 0.8552918434143066, "learning_rate": 9.592148482597595e-06, "loss": 0.6176, "step": 2456 }, { "epoch": 0.16, "grad_norm": 0.9776921272277832, "learning_rate": 9.591742518987802e-06, "loss": 0.6922, "step": 2457 }, { "epoch": 0.16, "grad_norm": 0.8479081392288208, "learning_rate": 9.591336362035703e-06, "loss": 0.5635, "step": 2458 }, { "epoch": 0.16, "grad_norm": 0.8601279854774475, "learning_rate": 9.590930011758403e-06, "loss": 0.6025, "step": 2459 }, { "epoch": 0.16, "grad_norm": 0.9203231334686279, "learning_rate": 9.590523468173011e-06, "loss": 0.6317, "step": 2460 }, { "epoch": 0.16, "grad_norm": 0.9199931621551514, "learning_rate": 9.590116731296646e-06, "loss": 0.578, "step": 2461 }, { "epoch": 0.16, "grad_norm": 0.8737656474113464, "learning_rate": 9.589709801146432e-06, "loss": 0.6047, "step": 2462 }, { "epoch": 0.16, "grad_norm": 0.8943954706192017, "learning_rate": 9.589302677739506e-06, "loss": 0.6203, "step": 2463 }, { "epoch": 0.16, "grad_norm": 0.9137763381004333, "learning_rate": 9.588895361093009e-06, "loss": 0.6568, "step": 2464 }, { "epoch": 0.16, "grad_norm": 0.9582598805427551, "learning_rate": 9.588487851224091e-06, "loss": 0.6377, "step": 2465 }, { "epoch": 0.16, "grad_norm": 0.9316682815551758, "learning_rate": 9.588080148149912e-06, "loss": 0.6544, "step": 2466 }, { "epoch": 0.16, "grad_norm": 1.0368373394012451, "learning_rate": 9.587672251887639e-06, "loss": 0.7225, "step": 2467 }, { "epoch": 0.16, "grad_norm": 0.8449527621269226, "learning_rate": 9.587264162454447e-06, "loss": 0.5722, "step": 2468 }, { "epoch": 0.16, "grad_norm": 0.8870164155960083, "learning_rate": 9.586855879867519e-06, "loss": 0.6279, "step": 2469 }, { "epoch": 0.16, "grad_norm": 0.9462539553642273, "learning_rate": 9.586447404144046e-06, "loss": 0.6945, "step": 2470 }, { "epoch": 0.16, "grad_norm": 0.9636325240135193, "learning_rate": 9.58603873530123e-06, "loss": 0.626, "step": 2471 }, { "epoch": 0.16, "grad_norm": 0.8742256164550781, "learning_rate": 9.585629873356273e-06, "loss": 0.5091, "step": 2472 }, { "epoch": 0.16, "grad_norm": 0.937807559967041, "learning_rate": 9.585220818326395e-06, "loss": 0.6507, "step": 2473 }, { "epoch": 0.16, "grad_norm": 0.8809791207313538, "learning_rate": 9.58481157022882e-06, "loss": 0.6041, "step": 2474 }, { "epoch": 0.16, "grad_norm": 0.9614810347557068, "learning_rate": 9.584402129080779e-06, "loss": 0.6466, "step": 2475 }, { "epoch": 0.16, "grad_norm": 0.8808587789535522, "learning_rate": 9.583992494899513e-06, "loss": 0.6032, "step": 2476 }, { "epoch": 0.16, "grad_norm": 0.9078788161277771, "learning_rate": 9.583582667702269e-06, "loss": 0.6371, "step": 2477 }, { "epoch": 0.16, "grad_norm": 0.8558230996131897, "learning_rate": 9.583172647506305e-06, "loss": 0.6056, "step": 2478 }, { "epoch": 0.16, "grad_norm": 0.8734446167945862, "learning_rate": 9.582762434328883e-06, "loss": 0.6081, "step": 2479 }, { "epoch": 0.16, "grad_norm": 0.8628250360488892, "learning_rate": 9.582352028187278e-06, "loss": 0.665, "step": 2480 }, { "epoch": 0.16, "grad_norm": 0.8482995629310608, "learning_rate": 9.581941429098769e-06, "loss": 0.5588, "step": 2481 }, { "epoch": 0.16, "grad_norm": 0.9192953109741211, "learning_rate": 9.581530637080647e-06, "loss": 0.6463, "step": 2482 }, { "epoch": 0.16, "grad_norm": 0.9629647135734558, "learning_rate": 9.581119652150208e-06, "loss": 0.6296, "step": 2483 }, { "epoch": 0.16, "grad_norm": 0.9503898620605469, "learning_rate": 9.580708474324755e-06, "loss": 0.6034, "step": 2484 }, { "epoch": 0.16, "grad_norm": 0.8851401209831238, "learning_rate": 9.580297103621605e-06, "loss": 0.6284, "step": 2485 }, { "epoch": 0.16, "grad_norm": 0.9362215399742126, "learning_rate": 9.579885540058079e-06, "loss": 0.6451, "step": 2486 }, { "epoch": 0.16, "grad_norm": 0.8985670804977417, "learning_rate": 9.579473783651503e-06, "loss": 0.6017, "step": 2487 }, { "epoch": 0.16, "grad_norm": 0.977086067199707, "learning_rate": 9.579061834419217e-06, "loss": 0.6823, "step": 2488 }, { "epoch": 0.16, "grad_norm": 0.9364843368530273, "learning_rate": 9.578649692378567e-06, "loss": 0.706, "step": 2489 }, { "epoch": 0.16, "grad_norm": 0.9483008980751038, "learning_rate": 9.578237357546907e-06, "loss": 0.6172, "step": 2490 }, { "epoch": 0.16, "grad_norm": 0.9181289672851562, "learning_rate": 9.577824829941597e-06, "loss": 0.6565, "step": 2491 }, { "epoch": 0.16, "grad_norm": 0.9168728590011597, "learning_rate": 9.577412109580009e-06, "loss": 0.6018, "step": 2492 }, { "epoch": 0.16, "grad_norm": 0.9681271910667419, "learning_rate": 9.57699919647952e-06, "loss": 0.6707, "step": 2493 }, { "epoch": 0.16, "grad_norm": 1.0229047536849976, "learning_rate": 9.576586090657519e-06, "loss": 0.6503, "step": 2494 }, { "epoch": 0.16, "grad_norm": 0.9658745527267456, "learning_rate": 9.576172792131397e-06, "loss": 0.6538, "step": 2495 }, { "epoch": 0.16, "grad_norm": 0.9022778272628784, "learning_rate": 9.57575930091856e-06, "loss": 0.6287, "step": 2496 }, { "epoch": 0.16, "grad_norm": 0.9274746179580688, "learning_rate": 9.575345617036413e-06, "loss": 0.659, "step": 2497 }, { "epoch": 0.16, "grad_norm": 0.8899304270744324, "learning_rate": 9.574931740502383e-06, "loss": 0.6294, "step": 2498 }, { "epoch": 0.16, "grad_norm": 1.072940468788147, "learning_rate": 9.57451767133389e-06, "loss": 0.6603, "step": 2499 }, { "epoch": 0.16, "grad_norm": 0.8845842480659485, "learning_rate": 9.57410340954837e-06, "loss": 0.6408, "step": 2500 }, { "epoch": 0.16, "grad_norm": 0.8758795857429504, "learning_rate": 9.57368895516327e-06, "loss": 0.6419, "step": 2501 }, { "epoch": 0.16, "grad_norm": 0.9652571082115173, "learning_rate": 9.573274308196037e-06, "loss": 0.6189, "step": 2502 }, { "epoch": 0.16, "grad_norm": 0.8658424615859985, "learning_rate": 9.572859468664133e-06, "loss": 0.5963, "step": 2503 }, { "epoch": 0.16, "grad_norm": 0.9083049893379211, "learning_rate": 9.572444436585025e-06, "loss": 0.6744, "step": 2504 }, { "epoch": 0.16, "grad_norm": 0.8568194508552551, "learning_rate": 9.572029211976189e-06, "loss": 0.6413, "step": 2505 }, { "epoch": 0.16, "grad_norm": 0.8805359601974487, "learning_rate": 9.571613794855105e-06, "loss": 0.6408, "step": 2506 }, { "epoch": 0.16, "grad_norm": 0.9113273620605469, "learning_rate": 9.57119818523927e-06, "loss": 0.6041, "step": 2507 }, { "epoch": 0.16, "grad_norm": 0.925477147102356, "learning_rate": 9.570782383146183e-06, "loss": 0.6571, "step": 2508 }, { "epoch": 0.16, "grad_norm": 1.012748122215271, "learning_rate": 9.570366388593347e-06, "loss": 0.6822, "step": 2509 }, { "epoch": 0.16, "grad_norm": 1.0008292198181152, "learning_rate": 9.569950201598283e-06, "loss": 0.6183, "step": 2510 }, { "epoch": 0.16, "grad_norm": 0.8939400911331177, "learning_rate": 9.569533822178513e-06, "loss": 0.6556, "step": 2511 }, { "epoch": 0.16, "grad_norm": 0.8361603021621704, "learning_rate": 9.569117250351571e-06, "loss": 0.6179, "step": 2512 }, { "epoch": 0.16, "grad_norm": 0.9382283687591553, "learning_rate": 9.568700486134996e-06, "loss": 0.6307, "step": 2513 }, { "epoch": 0.16, "grad_norm": 0.9003825783729553, "learning_rate": 9.568283529546336e-06, "loss": 0.5918, "step": 2514 }, { "epoch": 0.16, "grad_norm": 0.9097765684127808, "learning_rate": 9.56786638060315e-06, "loss": 0.6467, "step": 2515 }, { "epoch": 0.16, "grad_norm": 0.938727080821991, "learning_rate": 9.567449039323e-06, "loss": 0.6822, "step": 2516 }, { "epoch": 0.16, "grad_norm": 0.8862230181694031, "learning_rate": 9.56703150572346e-06, "loss": 0.6319, "step": 2517 }, { "epoch": 0.16, "grad_norm": 0.8898985981941223, "learning_rate": 9.56661377982211e-06, "loss": 0.6129, "step": 2518 }, { "epoch": 0.16, "grad_norm": 0.9016578197479248, "learning_rate": 9.566195861636542e-06, "loss": 0.668, "step": 2519 }, { "epoch": 0.16, "grad_norm": 0.8894520401954651, "learning_rate": 9.56577775118435e-06, "loss": 0.6323, "step": 2520 }, { "epoch": 0.16, "grad_norm": 0.9632962346076965, "learning_rate": 9.56535944848314e-06, "loss": 0.7104, "step": 2521 }, { "epoch": 0.16, "grad_norm": 0.8559346199035645, "learning_rate": 9.564940953550525e-06, "loss": 0.6451, "step": 2522 }, { "epoch": 0.16, "grad_norm": 0.9069300293922424, "learning_rate": 9.564522266404127e-06, "loss": 0.6152, "step": 2523 }, { "epoch": 0.16, "grad_norm": 0.9622822403907776, "learning_rate": 9.564103387061575e-06, "loss": 0.5734, "step": 2524 }, { "epoch": 0.16, "grad_norm": 0.9601327776908875, "learning_rate": 9.563684315540507e-06, "loss": 0.6096, "step": 2525 }, { "epoch": 0.16, "grad_norm": 0.905097246170044, "learning_rate": 9.563265051858569e-06, "loss": 0.6449, "step": 2526 }, { "epoch": 0.16, "grad_norm": 0.9115608334541321, "learning_rate": 9.562845596033413e-06, "loss": 0.6879, "step": 2527 }, { "epoch": 0.16, "grad_norm": 0.9223030209541321, "learning_rate": 9.562425948082702e-06, "loss": 0.6029, "step": 2528 }, { "epoch": 0.16, "grad_norm": 0.8907862901687622, "learning_rate": 9.562006108024106e-06, "loss": 0.6018, "step": 2529 }, { "epoch": 0.16, "grad_norm": 0.9722427129745483, "learning_rate": 9.561586075875304e-06, "loss": 0.649, "step": 2530 }, { "epoch": 0.16, "grad_norm": 0.9734516739845276, "learning_rate": 9.56116585165398e-06, "loss": 0.595, "step": 2531 }, { "epoch": 0.16, "grad_norm": 0.9580360651016235, "learning_rate": 9.560745435377828e-06, "loss": 0.604, "step": 2532 }, { "epoch": 0.16, "grad_norm": 0.8849531412124634, "learning_rate": 9.560324827064553e-06, "loss": 0.6313, "step": 2533 }, { "epoch": 0.16, "grad_norm": 0.8849808573722839, "learning_rate": 9.559904026731862e-06, "loss": 0.5895, "step": 2534 }, { "epoch": 0.16, "grad_norm": 0.8286584615707397, "learning_rate": 9.559483034397477e-06, "loss": 0.6168, "step": 2535 }, { "epoch": 0.16, "grad_norm": 0.8422954678535461, "learning_rate": 9.559061850079121e-06, "loss": 0.5688, "step": 2536 }, { "epoch": 0.16, "grad_norm": 0.9304640293121338, "learning_rate": 9.558640473794533e-06, "loss": 0.5911, "step": 2537 }, { "epoch": 0.16, "grad_norm": 0.9410046339035034, "learning_rate": 9.558218905561452e-06, "loss": 0.6099, "step": 2538 }, { "epoch": 0.16, "grad_norm": 0.8600730895996094, "learning_rate": 9.557797145397629e-06, "loss": 0.635, "step": 2539 }, { "epoch": 0.16, "grad_norm": 0.923870325088501, "learning_rate": 9.557375193320824e-06, "loss": 0.6513, "step": 2540 }, { "epoch": 0.16, "grad_norm": 0.9524445533752441, "learning_rate": 9.556953049348803e-06, "loss": 0.6036, "step": 2541 }, { "epoch": 0.16, "grad_norm": 0.945360004901886, "learning_rate": 9.556530713499341e-06, "loss": 0.6471, "step": 2542 }, { "epoch": 0.16, "grad_norm": 1.020447850227356, "learning_rate": 9.556108185790223e-06, "loss": 0.7046, "step": 2543 }, { "epoch": 0.16, "grad_norm": 0.9810319542884827, "learning_rate": 9.55568546623924e-06, "loss": 0.6746, "step": 2544 }, { "epoch": 0.16, "grad_norm": 0.9337319135665894, "learning_rate": 9.555262554864188e-06, "loss": 0.6229, "step": 2545 }, { "epoch": 0.16, "grad_norm": 0.890835165977478, "learning_rate": 9.554839451682876e-06, "loss": 0.5636, "step": 2546 }, { "epoch": 0.16, "grad_norm": 0.8403000831604004, "learning_rate": 9.554416156713121e-06, "loss": 0.6144, "step": 2547 }, { "epoch": 0.16, "grad_norm": 0.8973768353462219, "learning_rate": 9.553992669972744e-06, "loss": 0.6128, "step": 2548 }, { "epoch": 0.16, "grad_norm": 0.912047803401947, "learning_rate": 9.55356899147958e-06, "loss": 0.6295, "step": 2549 }, { "epoch": 0.16, "grad_norm": 0.8875672817230225, "learning_rate": 9.553145121251465e-06, "loss": 0.6375, "step": 2550 }, { "epoch": 0.16, "grad_norm": 0.8986533284187317, "learning_rate": 9.552721059306248e-06, "loss": 0.6332, "step": 2551 }, { "epoch": 0.16, "grad_norm": 0.8964718580245972, "learning_rate": 9.552296805661787e-06, "loss": 0.6369, "step": 2552 }, { "epoch": 0.16, "grad_norm": 0.9571990370750427, "learning_rate": 9.551872360335941e-06, "loss": 0.6474, "step": 2553 }, { "epoch": 0.16, "grad_norm": 0.927249550819397, "learning_rate": 9.551447723346587e-06, "loss": 0.624, "step": 2554 }, { "epoch": 0.16, "grad_norm": 0.9312215447425842, "learning_rate": 9.5510228947116e-06, "loss": 0.6383, "step": 2555 }, { "epoch": 0.16, "grad_norm": 0.9223430156707764, "learning_rate": 9.550597874448874e-06, "loss": 0.6332, "step": 2556 }, { "epoch": 0.16, "grad_norm": 0.8620796799659729, "learning_rate": 9.5501726625763e-06, "loss": 0.6429, "step": 2557 }, { "epoch": 0.16, "grad_norm": 0.8788149356842041, "learning_rate": 9.549747259111786e-06, "loss": 0.6188, "step": 2558 }, { "epoch": 0.16, "grad_norm": 0.9338142275810242, "learning_rate": 9.54932166407324e-06, "loss": 0.6234, "step": 2559 }, { "epoch": 0.16, "grad_norm": 0.8641449213027954, "learning_rate": 9.548895877478585e-06, "loss": 0.6202, "step": 2560 }, { "epoch": 0.16, "grad_norm": 0.9130368828773499, "learning_rate": 9.54846989934575e-06, "loss": 0.6636, "step": 2561 }, { "epoch": 0.16, "grad_norm": 0.9087523818016052, "learning_rate": 9.54804372969267e-06, "loss": 0.6419, "step": 2562 }, { "epoch": 0.16, "grad_norm": 0.8906131982803345, "learning_rate": 9.54761736853729e-06, "loss": 0.5957, "step": 2563 }, { "epoch": 0.16, "grad_norm": 0.8853945732116699, "learning_rate": 9.547190815897563e-06, "loss": 0.5888, "step": 2564 }, { "epoch": 0.16, "grad_norm": 0.951070249080658, "learning_rate": 9.54676407179145e-06, "loss": 0.6681, "step": 2565 }, { "epoch": 0.16, "grad_norm": 0.9170838594436646, "learning_rate": 9.546337136236916e-06, "loss": 0.6224, "step": 2566 }, { "epoch": 0.16, "grad_norm": 0.955334484577179, "learning_rate": 9.545910009251945e-06, "loss": 0.6488, "step": 2567 }, { "epoch": 0.16, "grad_norm": 0.8778351545333862, "learning_rate": 9.545482690854513e-06, "loss": 0.6396, "step": 2568 }, { "epoch": 0.16, "grad_norm": 0.8910854458808899, "learning_rate": 9.545055181062621e-06, "loss": 0.6397, "step": 2569 }, { "epoch": 0.16, "grad_norm": 1.0262346267700195, "learning_rate": 9.544627479894264e-06, "loss": 0.6648, "step": 2570 }, { "epoch": 0.16, "grad_norm": 0.881415843963623, "learning_rate": 9.544199587367455e-06, "loss": 0.6112, "step": 2571 }, { "epoch": 0.16, "grad_norm": 0.8958014249801636, "learning_rate": 9.54377150350021e-06, "loss": 0.6493, "step": 2572 }, { "epoch": 0.16, "grad_norm": 0.9083918929100037, "learning_rate": 9.543343228310551e-06, "loss": 0.578, "step": 2573 }, { "epoch": 0.16, "grad_norm": 0.9322221279144287, "learning_rate": 9.542914761816518e-06, "loss": 0.6487, "step": 2574 }, { "epoch": 0.16, "grad_norm": 0.914716362953186, "learning_rate": 9.542486104036143e-06, "loss": 0.6269, "step": 2575 }, { "epoch": 0.16, "grad_norm": 0.9125852584838867, "learning_rate": 9.542057254987485e-06, "loss": 0.6308, "step": 2576 }, { "epoch": 0.16, "grad_norm": 0.9945306777954102, "learning_rate": 9.541628214688595e-06, "loss": 0.6203, "step": 2577 }, { "epoch": 0.16, "grad_norm": 0.9009057283401489, "learning_rate": 9.541198983157538e-06, "loss": 0.6603, "step": 2578 }, { "epoch": 0.16, "grad_norm": 0.8918367028236389, "learning_rate": 9.54076956041239e-06, "loss": 0.6313, "step": 2579 }, { "epoch": 0.16, "grad_norm": 0.8985729217529297, "learning_rate": 9.540339946471235e-06, "loss": 0.6205, "step": 2580 }, { "epoch": 0.16, "grad_norm": 0.8877277970314026, "learning_rate": 9.539910141352156e-06, "loss": 0.6364, "step": 2581 }, { "epoch": 0.16, "grad_norm": 0.9015381336212158, "learning_rate": 9.539480145073257e-06, "loss": 0.5959, "step": 2582 }, { "epoch": 0.16, "grad_norm": 0.9096758365631104, "learning_rate": 9.53904995765264e-06, "loss": 0.6062, "step": 2583 }, { "epoch": 0.16, "grad_norm": 0.9512980580329895, "learning_rate": 9.538619579108417e-06, "loss": 0.6782, "step": 2584 }, { "epoch": 0.16, "grad_norm": 0.9591136574745178, "learning_rate": 9.538189009458715e-06, "loss": 0.6716, "step": 2585 }, { "epoch": 0.16, "grad_norm": 0.9070512056350708, "learning_rate": 9.53775824872166e-06, "loss": 0.5908, "step": 2586 }, { "epoch": 0.16, "grad_norm": 0.8964409232139587, "learning_rate": 9.53732729691539e-06, "loss": 0.622, "step": 2587 }, { "epoch": 0.16, "grad_norm": 0.9467551708221436, "learning_rate": 9.536896154058053e-06, "loss": 0.6137, "step": 2588 }, { "epoch": 0.16, "grad_norm": 0.8990939259529114, "learning_rate": 9.536464820167804e-06, "loss": 0.6319, "step": 2589 }, { "epoch": 0.16, "grad_norm": 0.8276720643043518, "learning_rate": 9.536033295262799e-06, "loss": 0.5556, "step": 2590 }, { "epoch": 0.16, "grad_norm": 0.8583798408508301, "learning_rate": 9.535601579361214e-06, "loss": 0.5774, "step": 2591 }, { "epoch": 0.16, "grad_norm": 0.9028250575065613, "learning_rate": 9.535169672481222e-06, "loss": 0.629, "step": 2592 }, { "epoch": 0.16, "grad_norm": 0.9669902920722961, "learning_rate": 9.534737574641014e-06, "loss": 0.6514, "step": 2593 }, { "epoch": 0.16, "grad_norm": 0.9334651827812195, "learning_rate": 9.53430528585878e-06, "loss": 0.6385, "step": 2594 }, { "epoch": 0.16, "grad_norm": 0.8801825046539307, "learning_rate": 9.533872806152727e-06, "loss": 0.6043, "step": 2595 }, { "epoch": 0.16, "grad_norm": 0.9169769883155823, "learning_rate": 9.533440135541059e-06, "loss": 0.6202, "step": 2596 }, { "epoch": 0.16, "grad_norm": 0.90007483959198, "learning_rate": 9.533007274042e-06, "loss": 0.6977, "step": 2597 }, { "epoch": 0.16, "grad_norm": 0.887588620185852, "learning_rate": 9.532574221673772e-06, "loss": 0.6228, "step": 2598 }, { "epoch": 0.16, "grad_norm": 0.9043447971343994, "learning_rate": 9.532140978454614e-06, "loss": 0.6192, "step": 2599 }, { "epoch": 0.16, "grad_norm": 0.9651160836219788, "learning_rate": 9.531707544402762e-06, "loss": 0.6675, "step": 2600 }, { "epoch": 0.16, "grad_norm": 0.9440380334854126, "learning_rate": 9.531273919536473e-06, "loss": 0.6294, "step": 2601 }, { "epoch": 0.16, "grad_norm": 0.9147106409072876, "learning_rate": 9.530840103874001e-06, "loss": 0.6483, "step": 2602 }, { "epoch": 0.16, "grad_norm": 0.9056714177131653, "learning_rate": 9.530406097433615e-06, "loss": 0.5734, "step": 2603 }, { "epoch": 0.16, "grad_norm": 0.9497922658920288, "learning_rate": 9.529971900233587e-06, "loss": 0.5915, "step": 2604 }, { "epoch": 0.17, "grad_norm": 0.8961224555969238, "learning_rate": 9.529537512292201e-06, "loss": 0.6239, "step": 2605 }, { "epoch": 0.17, "grad_norm": 0.9149653315544128, "learning_rate": 9.529102933627747e-06, "loss": 0.6477, "step": 2606 }, { "epoch": 0.17, "grad_norm": 0.904569685459137, "learning_rate": 9.528668164258525e-06, "loss": 0.6361, "step": 2607 }, { "epoch": 0.17, "grad_norm": 0.8962168097496033, "learning_rate": 9.528233204202842e-06, "loss": 0.6214, "step": 2608 }, { "epoch": 0.17, "grad_norm": 0.891830325126648, "learning_rate": 9.527798053479009e-06, "loss": 0.5854, "step": 2609 }, { "epoch": 0.17, "grad_norm": 0.9612575173377991, "learning_rate": 9.527362712105353e-06, "loss": 0.6016, "step": 2610 }, { "epoch": 0.17, "grad_norm": 0.9431421756744385, "learning_rate": 9.5269271801002e-06, "loss": 0.6032, "step": 2611 }, { "epoch": 0.17, "grad_norm": 0.8791323304176331, "learning_rate": 9.526491457481895e-06, "loss": 0.6002, "step": 2612 }, { "epoch": 0.17, "grad_norm": 0.9468672275543213, "learning_rate": 9.526055544268778e-06, "loss": 0.6101, "step": 2613 }, { "epoch": 0.17, "grad_norm": 0.8586993217468262, "learning_rate": 9.525619440479209e-06, "loss": 0.5971, "step": 2614 }, { "epoch": 0.17, "grad_norm": 0.88875812292099, "learning_rate": 9.525183146131549e-06, "loss": 0.6711, "step": 2615 }, { "epoch": 0.17, "grad_norm": 0.9012202620506287, "learning_rate": 9.524746661244166e-06, "loss": 0.6357, "step": 2616 }, { "epoch": 0.17, "grad_norm": 0.9038097858428955, "learning_rate": 9.524309985835444e-06, "loss": 0.6106, "step": 2617 }, { "epoch": 0.17, "grad_norm": 0.9143322706222534, "learning_rate": 9.523873119923768e-06, "loss": 0.5951, "step": 2618 }, { "epoch": 0.17, "grad_norm": 0.9046504497528076, "learning_rate": 9.523436063527531e-06, "loss": 0.5902, "step": 2619 }, { "epoch": 0.17, "grad_norm": 0.959321916103363, "learning_rate": 9.522998816665137e-06, "loss": 0.6532, "step": 2620 }, { "epoch": 0.17, "grad_norm": 0.8277800679206848, "learning_rate": 9.522561379354997e-06, "loss": 0.6249, "step": 2621 }, { "epoch": 0.17, "grad_norm": 0.9031876921653748, "learning_rate": 9.522123751615532e-06, "loss": 0.6575, "step": 2622 }, { "epoch": 0.17, "grad_norm": 0.9128404855728149, "learning_rate": 9.521685933465166e-06, "loss": 0.6547, "step": 2623 }, { "epoch": 0.17, "grad_norm": 0.8987665772438049, "learning_rate": 9.521247924922334e-06, "loss": 0.6212, "step": 2624 }, { "epoch": 0.17, "grad_norm": 0.944159209728241, "learning_rate": 9.520809726005481e-06, "loss": 0.5963, "step": 2625 }, { "epoch": 0.17, "grad_norm": 0.9575842022895813, "learning_rate": 9.52037133673306e-06, "loss": 0.5637, "step": 2626 }, { "epoch": 0.17, "grad_norm": 0.8398919105529785, "learning_rate": 9.519932757123523e-06, "loss": 0.664, "step": 2627 }, { "epoch": 0.17, "grad_norm": 0.9531906843185425, "learning_rate": 9.519493987195343e-06, "loss": 0.5932, "step": 2628 }, { "epoch": 0.17, "grad_norm": 0.9427643418312073, "learning_rate": 9.519055026966995e-06, "loss": 0.5979, "step": 2629 }, { "epoch": 0.17, "grad_norm": 0.9445648193359375, "learning_rate": 9.518615876456958e-06, "loss": 0.6406, "step": 2630 }, { "epoch": 0.17, "grad_norm": 0.8915479183197021, "learning_rate": 9.518176535683727e-06, "loss": 0.5887, "step": 2631 }, { "epoch": 0.17, "grad_norm": 0.9278690218925476, "learning_rate": 9.5177370046658e-06, "loss": 0.6604, "step": 2632 }, { "epoch": 0.17, "grad_norm": 0.9619773626327515, "learning_rate": 9.517297283421681e-06, "loss": 0.6622, "step": 2633 }, { "epoch": 0.17, "grad_norm": 0.9478781819343567, "learning_rate": 9.51685737196989e-06, "loss": 0.6336, "step": 2634 }, { "epoch": 0.17, "grad_norm": 0.8679977059364319, "learning_rate": 9.516417270328948e-06, "loss": 0.6031, "step": 2635 }, { "epoch": 0.17, "grad_norm": 0.9029505252838135, "learning_rate": 9.515976978517387e-06, "loss": 0.6204, "step": 2636 }, { "epoch": 0.17, "grad_norm": 0.8872044086456299, "learning_rate": 9.515536496553744e-06, "loss": 0.578, "step": 2637 }, { "epoch": 0.17, "grad_norm": 0.9961317777633667, "learning_rate": 9.515095824456568e-06, "loss": 0.6484, "step": 2638 }, { "epoch": 0.17, "grad_norm": 0.8571626543998718, "learning_rate": 9.514654962244414e-06, "loss": 0.6417, "step": 2639 }, { "epoch": 0.17, "grad_norm": 0.8865385055541992, "learning_rate": 9.514213909935843e-06, "loss": 0.5677, "step": 2640 }, { "epoch": 0.17, "grad_norm": 0.9392569661140442, "learning_rate": 9.51377266754943e-06, "loss": 0.6493, "step": 2641 }, { "epoch": 0.17, "grad_norm": 0.9384260773658752, "learning_rate": 9.513331235103751e-06, "loss": 0.6117, "step": 2642 }, { "epoch": 0.17, "grad_norm": 1.0064356327056885, "learning_rate": 9.512889612617397e-06, "loss": 0.6214, "step": 2643 }, { "epoch": 0.17, "grad_norm": 0.8559515476226807, "learning_rate": 9.512447800108958e-06, "loss": 0.6171, "step": 2644 }, { "epoch": 0.17, "grad_norm": 0.9168458580970764, "learning_rate": 9.512005797597042e-06, "loss": 0.6406, "step": 2645 }, { "epoch": 0.17, "grad_norm": 0.9505908489227295, "learning_rate": 9.511563605100255e-06, "loss": 0.63, "step": 2646 }, { "epoch": 0.17, "grad_norm": 0.9313047528266907, "learning_rate": 9.511121222637222e-06, "loss": 0.6543, "step": 2647 }, { "epoch": 0.17, "grad_norm": 0.8740178346633911, "learning_rate": 9.510678650226567e-06, "loss": 0.5734, "step": 2648 }, { "epoch": 0.17, "grad_norm": 0.9065948128700256, "learning_rate": 9.510235887886923e-06, "loss": 0.6048, "step": 2649 }, { "epoch": 0.17, "grad_norm": 0.9390092492103577, "learning_rate": 9.509792935636939e-06, "loss": 0.5976, "step": 2650 }, { "epoch": 0.17, "grad_norm": 0.9297692179679871, "learning_rate": 9.50934979349526e-06, "loss": 0.5868, "step": 2651 }, { "epoch": 0.17, "grad_norm": 0.9775800704956055, "learning_rate": 9.508906461480549e-06, "loss": 0.6938, "step": 2652 }, { "epoch": 0.17, "grad_norm": 0.934540867805481, "learning_rate": 9.508462939611473e-06, "loss": 0.6, "step": 2653 }, { "epoch": 0.17, "grad_norm": 0.9152988195419312, "learning_rate": 9.508019227906706e-06, "loss": 0.6573, "step": 2654 }, { "epoch": 0.17, "grad_norm": 0.9159802794456482, "learning_rate": 9.507575326384932e-06, "loss": 0.5607, "step": 2655 }, { "epoch": 0.17, "grad_norm": 0.9005085229873657, "learning_rate": 9.507131235064842e-06, "loss": 0.6402, "step": 2656 }, { "epoch": 0.17, "grad_norm": 0.9148140549659729, "learning_rate": 9.506686953965134e-06, "loss": 0.6254, "step": 2657 }, { "epoch": 0.17, "grad_norm": 0.8619657754898071, "learning_rate": 9.506242483104517e-06, "loss": 0.534, "step": 2658 }, { "epoch": 0.17, "grad_norm": 0.8992459774017334, "learning_rate": 9.505797822501704e-06, "loss": 0.6414, "step": 2659 }, { "epoch": 0.17, "grad_norm": 0.9422406554222107, "learning_rate": 9.505352972175419e-06, "loss": 0.6557, "step": 2660 }, { "epoch": 0.17, "grad_norm": 0.9567902088165283, "learning_rate": 9.504907932144394e-06, "loss": 0.6674, "step": 2661 }, { "epoch": 0.17, "grad_norm": 0.9111477136611938, "learning_rate": 9.504462702427369e-06, "loss": 0.634, "step": 2662 }, { "epoch": 0.17, "grad_norm": 0.9020829796791077, "learning_rate": 9.504017283043087e-06, "loss": 0.6443, "step": 2663 }, { "epoch": 0.17, "grad_norm": 0.9128588438034058, "learning_rate": 9.503571674010305e-06, "loss": 0.651, "step": 2664 }, { "epoch": 0.17, "grad_norm": 0.908065676689148, "learning_rate": 9.503125875347789e-06, "loss": 0.6225, "step": 2665 }, { "epoch": 0.17, "grad_norm": 0.9279728531837463, "learning_rate": 9.502679887074306e-06, "loss": 0.6425, "step": 2666 }, { "epoch": 0.17, "grad_norm": 0.8896051645278931, "learning_rate": 9.502233709208637e-06, "loss": 0.6823, "step": 2667 }, { "epoch": 0.17, "grad_norm": 0.9090619087219238, "learning_rate": 9.50178734176957e-06, "loss": 0.5903, "step": 2668 }, { "epoch": 0.17, "grad_norm": 0.8844740986824036, "learning_rate": 9.501340784775896e-06, "loss": 0.6276, "step": 2669 }, { "epoch": 0.17, "grad_norm": 0.9212251901626587, "learning_rate": 9.500894038246424e-06, "loss": 0.5796, "step": 2670 }, { "epoch": 0.17, "grad_norm": 0.9225980639457703, "learning_rate": 9.50044710219996e-06, "loss": 0.6326, "step": 2671 }, { "epoch": 0.17, "grad_norm": 0.9283084869384766, "learning_rate": 9.499999976655324e-06, "loss": 0.6165, "step": 2672 }, { "epoch": 0.17, "grad_norm": 0.8648502826690674, "learning_rate": 9.499552661631342e-06, "loss": 0.6137, "step": 2673 }, { "epoch": 0.17, "grad_norm": 0.88034588098526, "learning_rate": 9.49910515714685e-06, "loss": 0.594, "step": 2674 }, { "epoch": 0.17, "grad_norm": 0.841262698173523, "learning_rate": 9.498657463220694e-06, "loss": 0.5953, "step": 2675 }, { "epoch": 0.17, "grad_norm": 0.9340731501579285, "learning_rate": 9.49820957987172e-06, "loss": 0.6236, "step": 2676 }, { "epoch": 0.17, "grad_norm": 0.898252546787262, "learning_rate": 9.49776150711879e-06, "loss": 0.5813, "step": 2677 }, { "epoch": 0.17, "grad_norm": 0.8751718997955322, "learning_rate": 9.497313244980768e-06, "loss": 0.5712, "step": 2678 }, { "epoch": 0.17, "grad_norm": 0.8850248456001282, "learning_rate": 9.496864793476532e-06, "loss": 0.6464, "step": 2679 }, { "epoch": 0.17, "grad_norm": 0.9821275472640991, "learning_rate": 9.49641615262496e-06, "loss": 0.6297, "step": 2680 }, { "epoch": 0.17, "grad_norm": 0.8436826467514038, "learning_rate": 9.49596732244495e-06, "loss": 0.5828, "step": 2681 }, { "epoch": 0.17, "grad_norm": 0.9077553749084473, "learning_rate": 9.495518302955393e-06, "loss": 0.6651, "step": 2682 }, { "epoch": 0.17, "grad_norm": 0.9323903322219849, "learning_rate": 9.4950690941752e-06, "loss": 0.6516, "step": 2683 }, { "epoch": 0.17, "grad_norm": 1.0304430723190308, "learning_rate": 9.494619696123286e-06, "loss": 0.6534, "step": 2684 }, { "epoch": 0.17, "grad_norm": 0.9509037137031555, "learning_rate": 9.49417010881857e-06, "loss": 0.6013, "step": 2685 }, { "epoch": 0.17, "grad_norm": 0.8547189831733704, "learning_rate": 9.493720332279987e-06, "loss": 0.5765, "step": 2686 }, { "epoch": 0.17, "grad_norm": 0.9771583676338196, "learning_rate": 9.493270366526471e-06, "loss": 0.6383, "step": 2687 }, { "epoch": 0.17, "grad_norm": 0.9149676561355591, "learning_rate": 9.492820211576971e-06, "loss": 0.6117, "step": 2688 }, { "epoch": 0.17, "grad_norm": 0.8924671411514282, "learning_rate": 9.492369867450444e-06, "loss": 0.5931, "step": 2689 }, { "epoch": 0.17, "grad_norm": 0.9182107448577881, "learning_rate": 9.491919334165846e-06, "loss": 0.6233, "step": 2690 }, { "epoch": 0.17, "grad_norm": 0.9452329277992249, "learning_rate": 9.491468611742154e-06, "loss": 0.7153, "step": 2691 }, { "epoch": 0.17, "grad_norm": 0.9435275197029114, "learning_rate": 9.491017700198343e-06, "loss": 0.6737, "step": 2692 }, { "epoch": 0.17, "grad_norm": 0.9835942387580872, "learning_rate": 9.490566599553399e-06, "loss": 0.6323, "step": 2693 }, { "epoch": 0.17, "grad_norm": 1.015770673751831, "learning_rate": 9.490115309826317e-06, "loss": 0.6106, "step": 2694 }, { "epoch": 0.17, "grad_norm": 0.9940273761749268, "learning_rate": 9.4896638310361e-06, "loss": 0.6326, "step": 2695 }, { "epoch": 0.17, "grad_norm": 0.9595569968223572, "learning_rate": 9.489212163201758e-06, "loss": 0.6314, "step": 2696 }, { "epoch": 0.17, "grad_norm": 0.918870747089386, "learning_rate": 9.488760306342307e-06, "loss": 0.6369, "step": 2697 }, { "epoch": 0.17, "grad_norm": 0.9247921705245972, "learning_rate": 9.488308260476776e-06, "loss": 0.5877, "step": 2698 }, { "epoch": 0.17, "grad_norm": 0.8694366812705994, "learning_rate": 9.487856025624196e-06, "loss": 0.6188, "step": 2699 }, { "epoch": 0.17, "grad_norm": 0.9364984631538391, "learning_rate": 9.487403601803614e-06, "loss": 0.5841, "step": 2700 }, { "epoch": 0.17, "grad_norm": 0.8980706930160522, "learning_rate": 9.486950989034074e-06, "loss": 0.6324, "step": 2701 }, { "epoch": 0.17, "grad_norm": 0.8469223380088806, "learning_rate": 9.486498187334636e-06, "loss": 0.5997, "step": 2702 }, { "epoch": 0.17, "grad_norm": 0.9805670976638794, "learning_rate": 9.48604519672437e-06, "loss": 0.6745, "step": 2703 }, { "epoch": 0.17, "grad_norm": 0.9122759103775024, "learning_rate": 9.485592017222344e-06, "loss": 0.6904, "step": 2704 }, { "epoch": 0.17, "grad_norm": 0.9132962822914124, "learning_rate": 9.485138648847643e-06, "loss": 0.5926, "step": 2705 }, { "epoch": 0.17, "grad_norm": 0.8468869924545288, "learning_rate": 9.484685091619358e-06, "loss": 0.6072, "step": 2706 }, { "epoch": 0.17, "grad_norm": 0.9402836561203003, "learning_rate": 9.484231345556582e-06, "loss": 0.6308, "step": 2707 }, { "epoch": 0.17, "grad_norm": 0.8940732479095459, "learning_rate": 9.483777410678427e-06, "loss": 0.63, "step": 2708 }, { "epoch": 0.17, "grad_norm": 0.886562705039978, "learning_rate": 9.483323287004001e-06, "loss": 0.5811, "step": 2709 }, { "epoch": 0.17, "grad_norm": 0.9191167950630188, "learning_rate": 9.482868974552427e-06, "loss": 0.6349, "step": 2710 }, { "epoch": 0.17, "grad_norm": 0.936594545841217, "learning_rate": 9.482414473342835e-06, "loss": 0.72, "step": 2711 }, { "epoch": 0.17, "grad_norm": 0.9029736518859863, "learning_rate": 9.481959783394365e-06, "loss": 0.6818, "step": 2712 }, { "epoch": 0.17, "grad_norm": 0.9597886800765991, "learning_rate": 9.48150490472616e-06, "loss": 0.6462, "step": 2713 }, { "epoch": 0.17, "grad_norm": 0.9007745385169983, "learning_rate": 9.481049837357371e-06, "loss": 0.6234, "step": 2714 }, { "epoch": 0.17, "grad_norm": 0.8033143877983093, "learning_rate": 9.480594581307164e-06, "loss": 0.5724, "step": 2715 }, { "epoch": 0.17, "grad_norm": 0.856959879398346, "learning_rate": 9.480139136594706e-06, "loss": 0.5977, "step": 2716 }, { "epoch": 0.17, "grad_norm": 0.9320681095123291, "learning_rate": 9.479683503239172e-06, "loss": 0.6452, "step": 2717 }, { "epoch": 0.17, "grad_norm": 0.8906647562980652, "learning_rate": 9.479227681259751e-06, "loss": 0.6675, "step": 2718 }, { "epoch": 0.17, "grad_norm": 0.8599271774291992, "learning_rate": 9.478771670675635e-06, "loss": 0.6287, "step": 2719 }, { "epoch": 0.17, "grad_norm": 0.8469679355621338, "learning_rate": 9.478315471506023e-06, "loss": 0.5967, "step": 2720 }, { "epoch": 0.17, "grad_norm": 0.8832866549491882, "learning_rate": 9.477859083770126e-06, "loss": 0.6506, "step": 2721 }, { "epoch": 0.17, "grad_norm": 0.8781976699829102, "learning_rate": 9.477402507487162e-06, "loss": 0.6026, "step": 2722 }, { "epoch": 0.17, "grad_norm": 0.9236262440681458, "learning_rate": 9.476945742676352e-06, "loss": 0.5791, "step": 2723 }, { "epoch": 0.17, "grad_norm": 0.9180050492286682, "learning_rate": 9.476488789356933e-06, "loss": 0.5972, "step": 2724 }, { "epoch": 0.17, "grad_norm": 0.8968567848205566, "learning_rate": 9.47603164754814e-06, "loss": 0.6701, "step": 2725 }, { "epoch": 0.17, "grad_norm": 0.9011199474334717, "learning_rate": 9.47557431726923e-06, "loss": 0.6389, "step": 2726 }, { "epoch": 0.17, "grad_norm": 0.964178204536438, "learning_rate": 9.475116798539451e-06, "loss": 0.6804, "step": 2727 }, { "epoch": 0.17, "grad_norm": 0.9103108048439026, "learning_rate": 9.474659091378074e-06, "loss": 0.5935, "step": 2728 }, { "epoch": 0.17, "grad_norm": 0.9424949884414673, "learning_rate": 9.474201195804367e-06, "loss": 0.6662, "step": 2729 }, { "epoch": 0.17, "grad_norm": 0.9513722658157349, "learning_rate": 9.473743111837612e-06, "loss": 0.6526, "step": 2730 }, { "epoch": 0.17, "grad_norm": 0.9301340579986572, "learning_rate": 9.4732848394971e-06, "loss": 0.5824, "step": 2731 }, { "epoch": 0.17, "grad_norm": 0.9112258553504944, "learning_rate": 9.472826378802122e-06, "loss": 0.6287, "step": 2732 }, { "epoch": 0.17, "grad_norm": 0.9196444749832153, "learning_rate": 9.472367729771987e-06, "loss": 0.6376, "step": 2733 }, { "epoch": 0.17, "grad_norm": 0.9066518545150757, "learning_rate": 9.471908892426005e-06, "loss": 0.6648, "step": 2734 }, { "epoch": 0.17, "grad_norm": 0.8786914944648743, "learning_rate": 9.471449866783495e-06, "loss": 0.6161, "step": 2735 }, { "epoch": 0.17, "grad_norm": 0.9169754385948181, "learning_rate": 9.470990652863787e-06, "loss": 0.6643, "step": 2736 }, { "epoch": 0.17, "grad_norm": 0.9611136317253113, "learning_rate": 9.470531250686216e-06, "loss": 0.6446, "step": 2737 }, { "epoch": 0.17, "grad_norm": 0.9478945732116699, "learning_rate": 9.470071660270126e-06, "loss": 0.6436, "step": 2738 }, { "epoch": 0.17, "grad_norm": 0.8549840450286865, "learning_rate": 9.469611881634868e-06, "loss": 0.607, "step": 2739 }, { "epoch": 0.17, "grad_norm": 0.9151300191879272, "learning_rate": 9.469151914799803e-06, "loss": 0.5987, "step": 2740 }, { "epoch": 0.17, "grad_norm": 0.87184077501297, "learning_rate": 9.468691759784298e-06, "loss": 0.6307, "step": 2741 }, { "epoch": 0.17, "grad_norm": 0.9251417517662048, "learning_rate": 9.468231416607727e-06, "loss": 0.5822, "step": 2742 }, { "epoch": 0.17, "grad_norm": 0.9144605994224548, "learning_rate": 9.467770885289477e-06, "loss": 0.5699, "step": 2743 }, { "epoch": 0.17, "grad_norm": 0.8591218590736389, "learning_rate": 9.467310165848935e-06, "loss": 0.6483, "step": 2744 }, { "epoch": 0.17, "grad_norm": 0.8842750787734985, "learning_rate": 9.466849258305504e-06, "loss": 0.6478, "step": 2745 }, { "epoch": 0.17, "grad_norm": 0.8982271552085876, "learning_rate": 9.46638816267859e-06, "loss": 0.6189, "step": 2746 }, { "epoch": 0.17, "grad_norm": 1.1078075170516968, "learning_rate": 9.465926878987609e-06, "loss": 0.652, "step": 2747 }, { "epoch": 0.17, "grad_norm": 0.9062262773513794, "learning_rate": 9.46546540725198e-06, "loss": 0.6205, "step": 2748 }, { "epoch": 0.17, "grad_norm": 0.9785717725753784, "learning_rate": 9.465003747491138e-06, "loss": 0.6586, "step": 2749 }, { "epoch": 0.17, "grad_norm": 0.9226608276367188, "learning_rate": 9.464541899724522e-06, "loss": 0.6167, "step": 2750 }, { "epoch": 0.17, "grad_norm": 0.9549429416656494, "learning_rate": 9.464079863971576e-06, "loss": 0.6093, "step": 2751 }, { "epoch": 0.17, "grad_norm": 0.9625465869903564, "learning_rate": 9.463617640251756e-06, "loss": 0.6058, "step": 2752 }, { "epoch": 0.17, "grad_norm": 0.917473316192627, "learning_rate": 9.463155228584526e-06, "loss": 0.608, "step": 2753 }, { "epoch": 0.17, "grad_norm": 0.969939649105072, "learning_rate": 9.462692628989356e-06, "loss": 0.5676, "step": 2754 }, { "epoch": 0.17, "grad_norm": 0.9174929857254028, "learning_rate": 9.462229841485723e-06, "loss": 0.6664, "step": 2755 }, { "epoch": 0.17, "grad_norm": 0.9567301273345947, "learning_rate": 9.461766866093117e-06, "loss": 0.6435, "step": 2756 }, { "epoch": 0.17, "grad_norm": 0.8922646045684814, "learning_rate": 9.461303702831026e-06, "loss": 0.5949, "step": 2757 }, { "epoch": 0.17, "grad_norm": 0.8556625843048096, "learning_rate": 9.460840351718958e-06, "loss": 0.5995, "step": 2758 }, { "epoch": 0.17, "grad_norm": 0.9240930676460266, "learning_rate": 9.46037681277642e-06, "loss": 0.6158, "step": 2759 }, { "epoch": 0.17, "grad_norm": 0.9151474833488464, "learning_rate": 9.459913086022931e-06, "loss": 0.6091, "step": 2760 }, { "epoch": 0.17, "grad_norm": 0.937988817691803, "learning_rate": 9.459449171478017e-06, "loss": 0.5562, "step": 2761 }, { "epoch": 0.17, "grad_norm": 0.8838707804679871, "learning_rate": 9.458985069161212e-06, "loss": 0.5736, "step": 2762 }, { "epoch": 0.18, "grad_norm": 0.9612347483634949, "learning_rate": 9.458520779092057e-06, "loss": 0.5838, "step": 2763 }, { "epoch": 0.18, "grad_norm": 1.0034922361373901, "learning_rate": 9.458056301290102e-06, "loss": 0.6895, "step": 2764 }, { "epoch": 0.18, "grad_norm": 0.9068509340286255, "learning_rate": 9.457591635774905e-06, "loss": 0.687, "step": 2765 }, { "epoch": 0.18, "grad_norm": 0.9105919599533081, "learning_rate": 9.457126782566031e-06, "loss": 0.6629, "step": 2766 }, { "epoch": 0.18, "grad_norm": 0.9419427514076233, "learning_rate": 9.456661741683054e-06, "loss": 0.6553, "step": 2767 }, { "epoch": 0.18, "grad_norm": 0.9317494034767151, "learning_rate": 9.456196513145553e-06, "loss": 0.619, "step": 2768 }, { "epoch": 0.18, "grad_norm": 0.9247744679450989, "learning_rate": 9.455731096973119e-06, "loss": 0.6352, "step": 2769 }, { "epoch": 0.18, "grad_norm": 0.9570684432983398, "learning_rate": 9.455265493185349e-06, "loss": 0.6674, "step": 2770 }, { "epoch": 0.18, "grad_norm": 0.9092298150062561, "learning_rate": 9.454799701801849e-06, "loss": 0.6136, "step": 2771 }, { "epoch": 0.18, "grad_norm": 0.9638829827308655, "learning_rate": 9.45433372284223e-06, "loss": 0.6206, "step": 2772 }, { "epoch": 0.18, "grad_norm": 1.1069514751434326, "learning_rate": 9.453867556326113e-06, "loss": 0.6166, "step": 2773 }, { "epoch": 0.18, "grad_norm": 0.958802342414856, "learning_rate": 9.453401202273127e-06, "loss": 0.6009, "step": 2774 }, { "epoch": 0.18, "grad_norm": 0.8832184076309204, "learning_rate": 9.45293466070291e-06, "loss": 0.609, "step": 2775 }, { "epoch": 0.18, "grad_norm": 0.9852387309074402, "learning_rate": 9.452467931635104e-06, "loss": 0.6633, "step": 2776 }, { "epoch": 0.18, "grad_norm": 0.8827134370803833, "learning_rate": 9.452001015089363e-06, "loss": 0.6112, "step": 2777 }, { "epoch": 0.18, "grad_norm": 0.9104273915290833, "learning_rate": 9.451533911085346e-06, "loss": 0.6043, "step": 2778 }, { "epoch": 0.18, "grad_norm": 0.9635795950889587, "learning_rate": 9.451066619642721e-06, "loss": 0.628, "step": 2779 }, { "epoch": 0.18, "grad_norm": 0.9080226421356201, "learning_rate": 9.450599140781166e-06, "loss": 0.6428, "step": 2780 }, { "epoch": 0.18, "grad_norm": 0.8342934846878052, "learning_rate": 9.450131474520364e-06, "loss": 0.6056, "step": 2781 }, { "epoch": 0.18, "grad_norm": 0.8714557886123657, "learning_rate": 9.449663620880006e-06, "loss": 0.6105, "step": 2782 }, { "epoch": 0.18, "grad_norm": 0.8582709431648254, "learning_rate": 9.449195579879793e-06, "loss": 0.6117, "step": 2783 }, { "epoch": 0.18, "grad_norm": 1.0167529582977295, "learning_rate": 9.448727351539431e-06, "loss": 0.6551, "step": 2784 }, { "epoch": 0.18, "grad_norm": 0.8866241574287415, "learning_rate": 9.448258935878635e-06, "loss": 0.623, "step": 2785 }, { "epoch": 0.18, "grad_norm": 0.9443932771682739, "learning_rate": 9.44779033291713e-06, "loss": 0.6456, "step": 2786 }, { "epoch": 0.18, "grad_norm": 0.9517203569412231, "learning_rate": 9.447321542674647e-06, "loss": 0.6439, "step": 2787 }, { "epoch": 0.18, "grad_norm": 0.9734207987785339, "learning_rate": 9.446852565170928e-06, "loss": 0.6553, "step": 2788 }, { "epoch": 0.18, "grad_norm": 0.898755669593811, "learning_rate": 9.446383400425713e-06, "loss": 0.6615, "step": 2789 }, { "epoch": 0.18, "grad_norm": 0.9627699851989746, "learning_rate": 9.445914048458764e-06, "loss": 0.5574, "step": 2790 }, { "epoch": 0.18, "grad_norm": 0.8621180057525635, "learning_rate": 9.445444509289838e-06, "loss": 0.6064, "step": 2791 }, { "epoch": 0.18, "grad_norm": 0.9321991205215454, "learning_rate": 9.44497478293871e-06, "loss": 0.6189, "step": 2792 }, { "epoch": 0.18, "grad_norm": 0.9137430787086487, "learning_rate": 9.444504869425154e-06, "loss": 0.6378, "step": 2793 }, { "epoch": 0.18, "grad_norm": 0.9660084843635559, "learning_rate": 9.44403476876896e-06, "loss": 0.6376, "step": 2794 }, { "epoch": 0.18, "grad_norm": 0.8711713552474976, "learning_rate": 9.443564480989924e-06, "loss": 0.6145, "step": 2795 }, { "epoch": 0.18, "grad_norm": 0.8694255352020264, "learning_rate": 9.443094006107844e-06, "loss": 0.6109, "step": 2796 }, { "epoch": 0.18, "grad_norm": 0.9288530945777893, "learning_rate": 9.442623344142534e-06, "loss": 0.6055, "step": 2797 }, { "epoch": 0.18, "grad_norm": 0.9127347469329834, "learning_rate": 9.442152495113808e-06, "loss": 0.6153, "step": 2798 }, { "epoch": 0.18, "grad_norm": 0.8872652053833008, "learning_rate": 9.441681459041494e-06, "loss": 0.6426, "step": 2799 }, { "epoch": 0.18, "grad_norm": 0.9660980105400085, "learning_rate": 9.441210235945425e-06, "loss": 0.6255, "step": 2800 }, { "epoch": 0.18, "grad_norm": 0.8567848801612854, "learning_rate": 9.440738825845441e-06, "loss": 0.6009, "step": 2801 }, { "epoch": 0.18, "grad_norm": 0.9663728475570679, "learning_rate": 9.440267228761395e-06, "loss": 0.6588, "step": 2802 }, { "epoch": 0.18, "grad_norm": 0.9529426097869873, "learning_rate": 9.439795444713143e-06, "loss": 0.6628, "step": 2803 }, { "epoch": 0.18, "grad_norm": 0.929195761680603, "learning_rate": 9.43932347372055e-06, "loss": 0.6209, "step": 2804 }, { "epoch": 0.18, "grad_norm": 0.9078366160392761, "learning_rate": 9.438851315803488e-06, "loss": 0.5669, "step": 2805 }, { "epoch": 0.18, "grad_norm": 0.9016088247299194, "learning_rate": 9.438378970981839e-06, "loss": 0.6074, "step": 2806 }, { "epoch": 0.18, "grad_norm": 0.9534980654716492, "learning_rate": 9.43790643927549e-06, "loss": 0.7098, "step": 2807 }, { "epoch": 0.18, "grad_norm": 0.8913077116012573, "learning_rate": 9.437433720704342e-06, "loss": 0.586, "step": 2808 }, { "epoch": 0.18, "grad_norm": 1.0161441564559937, "learning_rate": 9.436960815288294e-06, "loss": 0.6038, "step": 2809 }, { "epoch": 0.18, "grad_norm": 0.8946830034255981, "learning_rate": 9.436487723047263e-06, "loss": 0.6169, "step": 2810 }, { "epoch": 0.18, "grad_norm": 0.9344162344932556, "learning_rate": 9.436014444001167e-06, "loss": 0.6332, "step": 2811 }, { "epoch": 0.18, "grad_norm": 0.8833682537078857, "learning_rate": 9.435540978169933e-06, "loss": 0.6148, "step": 2812 }, { "epoch": 0.18, "grad_norm": 0.9014259576797485, "learning_rate": 9.435067325573499e-06, "loss": 0.6617, "step": 2813 }, { "epoch": 0.18, "grad_norm": 0.8786671757698059, "learning_rate": 9.43459348623181e-06, "loss": 0.6741, "step": 2814 }, { "epoch": 0.18, "grad_norm": 0.9095485806465149, "learning_rate": 9.434119460164816e-06, "loss": 0.5859, "step": 2815 }, { "epoch": 0.18, "grad_norm": 0.9492687582969666, "learning_rate": 9.433645247392476e-06, "loss": 0.6005, "step": 2816 }, { "epoch": 0.18, "grad_norm": 0.9836667776107788, "learning_rate": 9.433170847934759e-06, "loss": 0.673, "step": 2817 }, { "epoch": 0.18, "grad_norm": 0.9654482007026672, "learning_rate": 9.432696261811637e-06, "loss": 0.6462, "step": 2818 }, { "epoch": 0.18, "grad_norm": 0.919657826423645, "learning_rate": 9.432221489043097e-06, "loss": 0.6495, "step": 2819 }, { "epoch": 0.18, "grad_norm": 0.928325355052948, "learning_rate": 9.43174652964913e-06, "loss": 0.6354, "step": 2820 }, { "epoch": 0.18, "grad_norm": 1.0097019672393799, "learning_rate": 9.431271383649731e-06, "loss": 0.636, "step": 2821 }, { "epoch": 0.18, "grad_norm": 0.8387419581413269, "learning_rate": 9.430796051064913e-06, "loss": 0.6435, "step": 2822 }, { "epoch": 0.18, "grad_norm": 0.9152708649635315, "learning_rate": 9.430320531914683e-06, "loss": 0.6436, "step": 2823 }, { "epoch": 0.18, "grad_norm": 0.9267799854278564, "learning_rate": 9.42984482621907e-06, "loss": 0.6528, "step": 2824 }, { "epoch": 0.18, "grad_norm": 0.8546323776245117, "learning_rate": 9.4293689339981e-06, "loss": 0.5591, "step": 2825 }, { "epoch": 0.18, "grad_norm": 1.015834093093872, "learning_rate": 9.428892855271813e-06, "loss": 0.7004, "step": 2826 }, { "epoch": 0.18, "grad_norm": 0.9022856950759888, "learning_rate": 9.428416590060256e-06, "loss": 0.6214, "step": 2827 }, { "epoch": 0.18, "grad_norm": 0.9249994158744812, "learning_rate": 9.427940138383482e-06, "loss": 0.6688, "step": 2828 }, { "epoch": 0.18, "grad_norm": 0.8863480091094971, "learning_rate": 9.427463500261551e-06, "loss": 0.6651, "step": 2829 }, { "epoch": 0.18, "grad_norm": 0.8578901290893555, "learning_rate": 9.426986675714535e-06, "loss": 0.5767, "step": 2830 }, { "epoch": 0.18, "grad_norm": 0.8513709902763367, "learning_rate": 9.426509664762509e-06, "loss": 0.545, "step": 2831 }, { "epoch": 0.18, "grad_norm": 0.9681910872459412, "learning_rate": 9.42603246742556e-06, "loss": 0.6421, "step": 2832 }, { "epoch": 0.18, "grad_norm": 0.9950567483901978, "learning_rate": 9.425555083723783e-06, "loss": 0.6663, "step": 2833 }, { "epoch": 0.18, "grad_norm": 0.9001085162162781, "learning_rate": 9.425077513677276e-06, "loss": 0.61, "step": 2834 }, { "epoch": 0.18, "grad_norm": 0.9015680551528931, "learning_rate": 9.424599757306148e-06, "loss": 0.6296, "step": 2835 }, { "epoch": 0.18, "grad_norm": 0.862308144569397, "learning_rate": 9.424121814630516e-06, "loss": 0.5494, "step": 2836 }, { "epoch": 0.18, "grad_norm": 0.913428008556366, "learning_rate": 9.423643685670504e-06, "loss": 0.6652, "step": 2837 }, { "epoch": 0.18, "grad_norm": 0.8796103000640869, "learning_rate": 9.423165370446249e-06, "loss": 0.5867, "step": 2838 }, { "epoch": 0.18, "grad_norm": 0.9445327520370483, "learning_rate": 9.422686868977884e-06, "loss": 0.5812, "step": 2839 }, { "epoch": 0.18, "grad_norm": 1.0006681680679321, "learning_rate": 9.42220818128556e-06, "loss": 0.6484, "step": 2840 }, { "epoch": 0.18, "grad_norm": 0.9889962077140808, "learning_rate": 9.421729307389435e-06, "loss": 0.6266, "step": 2841 }, { "epoch": 0.18, "grad_norm": 0.8913476467132568, "learning_rate": 9.42125024730967e-06, "loss": 0.6197, "step": 2842 }, { "epoch": 0.18, "grad_norm": 1.0092391967773438, "learning_rate": 9.420771001066439e-06, "loss": 0.6748, "step": 2843 }, { "epoch": 0.18, "grad_norm": 0.9135981202125549, "learning_rate": 9.420291568679917e-06, "loss": 0.6796, "step": 2844 }, { "epoch": 0.18, "grad_norm": 0.9135114550590515, "learning_rate": 9.419811950170294e-06, "loss": 0.6444, "step": 2845 }, { "epoch": 0.18, "grad_norm": 0.9234583377838135, "learning_rate": 9.419332145557768e-06, "loss": 0.652, "step": 2846 }, { "epoch": 0.18, "grad_norm": 1.013744831085205, "learning_rate": 9.418852154862538e-06, "loss": 0.6552, "step": 2847 }, { "epoch": 0.18, "grad_norm": 0.8808279633522034, "learning_rate": 9.418371978104816e-06, "loss": 0.6126, "step": 2848 }, { "epoch": 0.18, "grad_norm": 0.9165722131729126, "learning_rate": 9.41789161530482e-06, "loss": 0.6558, "step": 2849 }, { "epoch": 0.18, "grad_norm": 0.9362298250198364, "learning_rate": 9.417411066482777e-06, "loss": 0.6204, "step": 2850 }, { "epoch": 0.18, "grad_norm": 0.9138143658638, "learning_rate": 9.41693033165892e-06, "loss": 0.6359, "step": 2851 }, { "epoch": 0.18, "grad_norm": 0.8916357755661011, "learning_rate": 9.416449410853495e-06, "loss": 0.6234, "step": 2852 }, { "epoch": 0.18, "grad_norm": 1.0022516250610352, "learning_rate": 9.415968304086746e-06, "loss": 0.6353, "step": 2853 }, { "epoch": 0.18, "grad_norm": 0.8648804426193237, "learning_rate": 9.415487011378935e-06, "loss": 0.6154, "step": 2854 }, { "epoch": 0.18, "grad_norm": 0.9364731311798096, "learning_rate": 9.415005532750326e-06, "loss": 0.5895, "step": 2855 }, { "epoch": 0.18, "grad_norm": 0.961506187915802, "learning_rate": 9.414523868221192e-06, "loss": 0.6945, "step": 2856 }, { "epoch": 0.18, "grad_norm": 0.8515611886978149, "learning_rate": 9.414042017811817e-06, "loss": 0.6253, "step": 2857 }, { "epoch": 0.18, "grad_norm": 0.8460178375244141, "learning_rate": 9.413559981542486e-06, "loss": 0.6468, "step": 2858 }, { "epoch": 0.18, "grad_norm": 0.9305799603462219, "learning_rate": 9.413077759433498e-06, "loss": 0.644, "step": 2859 }, { "epoch": 0.18, "grad_norm": 0.9662857055664062, "learning_rate": 9.412595351505158e-06, "loss": 0.6078, "step": 2860 }, { "epoch": 0.18, "grad_norm": 0.8659998178482056, "learning_rate": 9.412112757777777e-06, "loss": 0.6536, "step": 2861 }, { "epoch": 0.18, "grad_norm": 0.9392401576042175, "learning_rate": 9.411629978271679e-06, "loss": 0.6528, "step": 2862 }, { "epoch": 0.18, "grad_norm": 0.9797030687332153, "learning_rate": 9.411147013007188e-06, "loss": 0.6421, "step": 2863 }, { "epoch": 0.18, "grad_norm": 0.9174354672431946, "learning_rate": 9.41066386200464e-06, "loss": 0.6395, "step": 2864 }, { "epoch": 0.18, "grad_norm": 0.8441389203071594, "learning_rate": 9.410180525284384e-06, "loss": 0.5613, "step": 2865 }, { "epoch": 0.18, "grad_norm": 0.8536418080329895, "learning_rate": 9.409697002866765e-06, "loss": 0.5965, "step": 2866 }, { "epoch": 0.18, "grad_norm": 0.9425634145736694, "learning_rate": 9.409213294772147e-06, "loss": 0.6575, "step": 2867 }, { "epoch": 0.18, "grad_norm": 0.9369651079177856, "learning_rate": 9.408729401020896e-06, "loss": 0.6457, "step": 2868 }, { "epoch": 0.18, "grad_norm": 0.9151921272277832, "learning_rate": 9.408245321633385e-06, "loss": 0.6186, "step": 2869 }, { "epoch": 0.18, "grad_norm": 0.8802269697189331, "learning_rate": 9.407761056629999e-06, "loss": 0.5875, "step": 2870 }, { "epoch": 0.18, "grad_norm": 0.9625882506370544, "learning_rate": 9.40727660603113e-06, "loss": 0.6164, "step": 2871 }, { "epoch": 0.18, "grad_norm": 1.045422911643982, "learning_rate": 9.406791969857173e-06, "loss": 0.6814, "step": 2872 }, { "epoch": 0.18, "grad_norm": 0.9631166458129883, "learning_rate": 9.406307148128537e-06, "loss": 0.6423, "step": 2873 }, { "epoch": 0.18, "grad_norm": 1.0093629360198975, "learning_rate": 9.405822140865636e-06, "loss": 0.6567, "step": 2874 }, { "epoch": 0.18, "grad_norm": 0.8591984510421753, "learning_rate": 9.40533694808889e-06, "loss": 0.5637, "step": 2875 }, { "epoch": 0.18, "grad_norm": 0.8643238544464111, "learning_rate": 9.404851569818731e-06, "loss": 0.6406, "step": 2876 }, { "epoch": 0.18, "grad_norm": 1.0501185655593872, "learning_rate": 9.404366006075596e-06, "loss": 0.7109, "step": 2877 }, { "epoch": 0.18, "grad_norm": 0.9728371500968933, "learning_rate": 9.403880256879931e-06, "loss": 0.6682, "step": 2878 }, { "epoch": 0.18, "grad_norm": 0.9601030349731445, "learning_rate": 9.403394322252186e-06, "loss": 0.6103, "step": 2879 }, { "epoch": 0.18, "grad_norm": 0.8728528618812561, "learning_rate": 9.402908202212826e-06, "loss": 0.5668, "step": 2880 }, { "epoch": 0.18, "grad_norm": 0.9390819072723389, "learning_rate": 9.402421896782319e-06, "loss": 0.6126, "step": 2881 }, { "epoch": 0.18, "grad_norm": 0.8935672044754028, "learning_rate": 9.401935405981138e-06, "loss": 0.6379, "step": 2882 }, { "epoch": 0.18, "grad_norm": 0.8943515419960022, "learning_rate": 9.401448729829773e-06, "loss": 0.6343, "step": 2883 }, { "epoch": 0.18, "grad_norm": 0.8738617300987244, "learning_rate": 9.400961868348713e-06, "loss": 0.6473, "step": 2884 }, { "epoch": 0.18, "grad_norm": 0.8747914433479309, "learning_rate": 9.400474821558457e-06, "loss": 0.5848, "step": 2885 }, { "epoch": 0.18, "grad_norm": 0.8880560994148254, "learning_rate": 9.399987589479516e-06, "loss": 0.6521, "step": 2886 }, { "epoch": 0.18, "grad_norm": 0.9082310199737549, "learning_rate": 9.399500172132403e-06, "loss": 0.6059, "step": 2887 }, { "epoch": 0.18, "grad_norm": 0.945617139339447, "learning_rate": 9.399012569537643e-06, "loss": 0.6031, "step": 2888 }, { "epoch": 0.18, "grad_norm": 0.9733775854110718, "learning_rate": 9.398524781715767e-06, "loss": 0.6393, "step": 2889 }, { "epoch": 0.18, "grad_norm": 0.9139953851699829, "learning_rate": 9.398036808687314e-06, "loss": 0.5984, "step": 2890 }, { "epoch": 0.18, "grad_norm": 0.896920919418335, "learning_rate": 9.39754865047283e-06, "loss": 0.6455, "step": 2891 }, { "epoch": 0.18, "grad_norm": 0.8665996193885803, "learning_rate": 9.39706030709287e-06, "loss": 0.5565, "step": 2892 }, { "epoch": 0.18, "grad_norm": 0.9414594173431396, "learning_rate": 9.396571778567997e-06, "loss": 0.6157, "step": 2893 }, { "epoch": 0.18, "grad_norm": 0.9462769031524658, "learning_rate": 9.396083064918782e-06, "loss": 0.6198, "step": 2894 }, { "epoch": 0.18, "grad_norm": 0.9393496513366699, "learning_rate": 9.3955941661658e-06, "loss": 0.6631, "step": 2895 }, { "epoch": 0.18, "grad_norm": 0.8664448857307434, "learning_rate": 9.39510508232964e-06, "loss": 0.5953, "step": 2896 }, { "epoch": 0.18, "grad_norm": 0.8992229700088501, "learning_rate": 9.394615813430895e-06, "loss": 0.604, "step": 2897 }, { "epoch": 0.18, "grad_norm": 0.9086971282958984, "learning_rate": 9.394126359490166e-06, "loss": 0.6294, "step": 2898 }, { "epoch": 0.18, "grad_norm": 0.9720740914344788, "learning_rate": 9.393636720528061e-06, "loss": 0.6259, "step": 2899 }, { "epoch": 0.18, "grad_norm": 0.9365057349205017, "learning_rate": 9.393146896565197e-06, "loss": 0.6621, "step": 2900 }, { "epoch": 0.18, "grad_norm": 1.003066897392273, "learning_rate": 9.392656887622202e-06, "loss": 0.639, "step": 2901 }, { "epoch": 0.18, "grad_norm": 0.9764630794525146, "learning_rate": 9.392166693719706e-06, "loss": 0.6113, "step": 2902 }, { "epoch": 0.18, "grad_norm": 0.901775598526001, "learning_rate": 9.391676314878348e-06, "loss": 0.6274, "step": 2903 }, { "epoch": 0.18, "grad_norm": 0.8916495442390442, "learning_rate": 9.391185751118782e-06, "loss": 0.6762, "step": 2904 }, { "epoch": 0.18, "grad_norm": 0.9183511137962341, "learning_rate": 9.390695002461657e-06, "loss": 0.6358, "step": 2905 }, { "epoch": 0.18, "grad_norm": 0.8651520609855652, "learning_rate": 9.390204068927638e-06, "loss": 0.629, "step": 2906 }, { "epoch": 0.18, "grad_norm": 0.9377386569976807, "learning_rate": 9.389712950537399e-06, "loss": 0.638, "step": 2907 }, { "epoch": 0.18, "grad_norm": 0.9569635987281799, "learning_rate": 9.38922164731162e-06, "loss": 0.6532, "step": 2908 }, { "epoch": 0.18, "grad_norm": 0.8829088807106018, "learning_rate": 9.388730159270984e-06, "loss": 0.524, "step": 2909 }, { "epoch": 0.18, "grad_norm": 0.8493028879165649, "learning_rate": 9.38823848643619e-06, "loss": 0.6161, "step": 2910 }, { "epoch": 0.18, "grad_norm": 0.9601280689239502, "learning_rate": 9.38774662882794e-06, "loss": 0.7469, "step": 2911 }, { "epoch": 0.18, "grad_norm": 0.9152663946151733, "learning_rate": 9.387254586466942e-06, "loss": 0.6003, "step": 2912 }, { "epoch": 0.18, "grad_norm": 0.8507892489433289, "learning_rate": 9.386762359373915e-06, "loss": 0.6036, "step": 2913 }, { "epoch": 0.18, "grad_norm": 0.8200054168701172, "learning_rate": 9.386269947569585e-06, "loss": 0.5787, "step": 2914 }, { "epoch": 0.18, "grad_norm": 0.9165661334991455, "learning_rate": 9.385777351074688e-06, "loss": 0.6487, "step": 2915 }, { "epoch": 0.18, "grad_norm": 0.9168198108673096, "learning_rate": 9.385284569909963e-06, "loss": 0.628, "step": 2916 }, { "epoch": 0.18, "grad_norm": 0.8360828161239624, "learning_rate": 9.38479160409616e-06, "loss": 0.6177, "step": 2917 }, { "epoch": 0.18, "grad_norm": 0.9536049962043762, "learning_rate": 9.384298453654037e-06, "loss": 0.6575, "step": 2918 }, { "epoch": 0.18, "grad_norm": 0.8601536154747009, "learning_rate": 9.383805118604357e-06, "loss": 0.6362, "step": 2919 }, { "epoch": 0.18, "grad_norm": 0.90377277135849, "learning_rate": 9.383311598967892e-06, "loss": 0.6232, "step": 2920 }, { "epoch": 0.19, "grad_norm": 0.9626878499984741, "learning_rate": 9.382817894765426e-06, "loss": 0.7203, "step": 2921 }, { "epoch": 0.19, "grad_norm": 0.9490284323692322, "learning_rate": 9.382324006017746e-06, "loss": 0.6665, "step": 2922 }, { "epoch": 0.19, "grad_norm": 0.9522665739059448, "learning_rate": 9.381829932745646e-06, "loss": 0.5931, "step": 2923 }, { "epoch": 0.19, "grad_norm": 0.9432849884033203, "learning_rate": 9.38133567496993e-06, "loss": 0.6349, "step": 2924 }, { "epoch": 0.19, "grad_norm": 0.9463351368904114, "learning_rate": 9.380841232711412e-06, "loss": 0.6388, "step": 2925 }, { "epoch": 0.19, "grad_norm": 0.9243980646133423, "learning_rate": 9.38034660599091e-06, "loss": 0.5969, "step": 2926 }, { "epoch": 0.19, "grad_norm": 0.8849940299987793, "learning_rate": 9.379851794829247e-06, "loss": 0.59, "step": 2927 }, { "epoch": 0.19, "grad_norm": 0.8930582404136658, "learning_rate": 9.379356799247263e-06, "loss": 0.6975, "step": 2928 }, { "epoch": 0.19, "grad_norm": 0.8990230560302734, "learning_rate": 9.3788616192658e-06, "loss": 0.6572, "step": 2929 }, { "epoch": 0.19, "grad_norm": 0.9136034250259399, "learning_rate": 9.378366254905706e-06, "loss": 0.6293, "step": 2930 }, { "epoch": 0.19, "grad_norm": 0.8949812054634094, "learning_rate": 9.377870706187841e-06, "loss": 0.5834, "step": 2931 }, { "epoch": 0.19, "grad_norm": 0.9137758016586304, "learning_rate": 9.37737497313307e-06, "loss": 0.5751, "step": 2932 }, { "epoch": 0.19, "grad_norm": 0.8495166301727295, "learning_rate": 9.376879055762267e-06, "loss": 0.5503, "step": 2933 }, { "epoch": 0.19, "grad_norm": 0.900567889213562, "learning_rate": 9.376382954096312e-06, "loss": 0.6444, "step": 2934 }, { "epoch": 0.19, "grad_norm": 0.8367151021957397, "learning_rate": 9.375886668156095e-06, "loss": 0.6396, "step": 2935 }, { "epoch": 0.19, "grad_norm": 0.9150660634040833, "learning_rate": 9.375390197962514e-06, "loss": 0.5965, "step": 2936 }, { "epoch": 0.19, "grad_norm": 0.9402359127998352, "learning_rate": 9.374893543536471e-06, "loss": 0.5989, "step": 2937 }, { "epoch": 0.19, "grad_norm": 0.9302038550376892, "learning_rate": 9.374396704898883e-06, "loss": 0.6621, "step": 2938 }, { "epoch": 0.19, "grad_norm": 0.9301861524581909, "learning_rate": 9.373899682070664e-06, "loss": 0.6565, "step": 2939 }, { "epoch": 0.19, "grad_norm": 0.9140589237213135, "learning_rate": 9.373402475072746e-06, "loss": 0.6323, "step": 2940 }, { "epoch": 0.19, "grad_norm": 0.8676826357841492, "learning_rate": 9.372905083926064e-06, "loss": 0.6269, "step": 2941 }, { "epoch": 0.19, "grad_norm": 0.8796793818473816, "learning_rate": 9.372407508651561e-06, "loss": 0.6185, "step": 2942 }, { "epoch": 0.19, "grad_norm": 0.8520810604095459, "learning_rate": 9.371909749270189e-06, "loss": 0.5918, "step": 2943 }, { "epoch": 0.19, "grad_norm": 0.8967190384864807, "learning_rate": 9.371411805802907e-06, "loss": 0.6327, "step": 2944 }, { "epoch": 0.19, "grad_norm": 0.9223288893699646, "learning_rate": 9.370913678270678e-06, "loss": 0.5942, "step": 2945 }, { "epoch": 0.19, "grad_norm": 0.8543033599853516, "learning_rate": 9.370415366694483e-06, "loss": 0.6218, "step": 2946 }, { "epoch": 0.19, "grad_norm": 0.9239391684532166, "learning_rate": 9.369916871095299e-06, "loss": 0.6351, "step": 2947 }, { "epoch": 0.19, "grad_norm": 0.8336774110794067, "learning_rate": 9.369418191494117e-06, "loss": 0.6284, "step": 2948 }, { "epoch": 0.19, "grad_norm": 0.9170488119125366, "learning_rate": 9.368919327911934e-06, "loss": 0.6324, "step": 2949 }, { "epoch": 0.19, "grad_norm": 0.9082837104797363, "learning_rate": 9.368420280369759e-06, "loss": 0.6565, "step": 2950 }, { "epoch": 0.19, "grad_norm": 0.8999912738800049, "learning_rate": 9.3679210488886e-06, "loss": 0.6246, "step": 2951 }, { "epoch": 0.19, "grad_norm": 0.8772600293159485, "learning_rate": 9.367421633489482e-06, "loss": 0.6999, "step": 2952 }, { "epoch": 0.19, "grad_norm": 0.8400874733924866, "learning_rate": 9.366922034193431e-06, "loss": 0.6037, "step": 2953 }, { "epoch": 0.19, "grad_norm": 0.9329245090484619, "learning_rate": 9.366422251021486e-06, "loss": 0.6171, "step": 2954 }, { "epoch": 0.19, "grad_norm": 0.8737487196922302, "learning_rate": 9.365922283994689e-06, "loss": 0.6164, "step": 2955 }, { "epoch": 0.19, "grad_norm": 0.9520554542541504, "learning_rate": 9.365422133134093e-06, "loss": 0.6257, "step": 2956 }, { "epoch": 0.19, "grad_norm": 0.8844176530838013, "learning_rate": 9.364921798460756e-06, "loss": 0.6177, "step": 2957 }, { "epoch": 0.19, "grad_norm": 0.9074828028678894, "learning_rate": 9.364421279995747e-06, "loss": 0.6601, "step": 2958 }, { "epoch": 0.19, "grad_norm": 0.8782038688659668, "learning_rate": 9.36392057776014e-06, "loss": 0.6358, "step": 2959 }, { "epoch": 0.19, "grad_norm": 0.9199265241622925, "learning_rate": 9.36341969177502e-06, "loss": 0.5666, "step": 2960 }, { "epoch": 0.19, "grad_norm": 0.887235164642334, "learning_rate": 9.362918622061475e-06, "loss": 0.6011, "step": 2961 }, { "epoch": 0.19, "grad_norm": 0.882902204990387, "learning_rate": 9.362417368640604e-06, "loss": 0.5865, "step": 2962 }, { "epoch": 0.19, "grad_norm": 0.8558497428894043, "learning_rate": 9.361915931533513e-06, "loss": 0.6236, "step": 2963 }, { "epoch": 0.19, "grad_norm": 0.879278838634491, "learning_rate": 9.36141431076132e-06, "loss": 0.6493, "step": 2964 }, { "epoch": 0.19, "grad_norm": 0.929413378238678, "learning_rate": 9.360912506345139e-06, "loss": 0.6075, "step": 2965 }, { "epoch": 0.19, "grad_norm": 0.8678581118583679, "learning_rate": 9.360410518306103e-06, "loss": 0.5635, "step": 2966 }, { "epoch": 0.19, "grad_norm": 0.8952652812004089, "learning_rate": 9.359908346665349e-06, "loss": 0.5775, "step": 2967 }, { "epoch": 0.19, "grad_norm": 0.9281927943229675, "learning_rate": 9.359405991444022e-06, "loss": 0.6704, "step": 2968 }, { "epoch": 0.19, "grad_norm": 0.958543598651886, "learning_rate": 9.358903452663273e-06, "loss": 0.6666, "step": 2969 }, { "epoch": 0.19, "grad_norm": 0.8834668397903442, "learning_rate": 9.358400730344265e-06, "loss": 0.6254, "step": 2970 }, { "epoch": 0.19, "grad_norm": 0.8528700470924377, "learning_rate": 9.357897824508163e-06, "loss": 0.5954, "step": 2971 }, { "epoch": 0.19, "grad_norm": 0.9526364207267761, "learning_rate": 9.357394735176144e-06, "loss": 0.6404, "step": 2972 }, { "epoch": 0.19, "grad_norm": 0.8991536498069763, "learning_rate": 9.356891462369391e-06, "loss": 0.6091, "step": 2973 }, { "epoch": 0.19, "grad_norm": 0.916277289390564, "learning_rate": 9.356388006109094e-06, "loss": 0.6429, "step": 2974 }, { "epoch": 0.19, "grad_norm": 0.8340256214141846, "learning_rate": 9.355884366416454e-06, "loss": 0.6042, "step": 2975 }, { "epoch": 0.19, "grad_norm": 0.9151265621185303, "learning_rate": 9.355380543312676e-06, "loss": 0.6805, "step": 2976 }, { "epoch": 0.19, "grad_norm": 0.9069379568099976, "learning_rate": 9.354876536818974e-06, "loss": 0.6437, "step": 2977 }, { "epoch": 0.19, "grad_norm": 0.8915377259254456, "learning_rate": 9.35437234695657e-06, "loss": 0.6179, "step": 2978 }, { "epoch": 0.19, "grad_norm": 0.9558995366096497, "learning_rate": 9.353867973746696e-06, "loss": 0.6258, "step": 2979 }, { "epoch": 0.19, "grad_norm": 0.8677237629890442, "learning_rate": 9.353363417210587e-06, "loss": 0.5676, "step": 2980 }, { "epoch": 0.19, "grad_norm": 0.9419227838516235, "learning_rate": 9.352858677369488e-06, "loss": 0.655, "step": 2981 }, { "epoch": 0.19, "grad_norm": 0.8914104700088501, "learning_rate": 9.352353754244654e-06, "loss": 0.5995, "step": 2982 }, { "epoch": 0.19, "grad_norm": 0.8787503838539124, "learning_rate": 9.351848647857343e-06, "loss": 0.6032, "step": 2983 }, { "epoch": 0.19, "grad_norm": 0.856158971786499, "learning_rate": 9.351343358228825e-06, "loss": 0.5997, "step": 2984 }, { "epoch": 0.19, "grad_norm": 0.8458609580993652, "learning_rate": 9.350837885380375e-06, "loss": 0.6437, "step": 2985 }, { "epoch": 0.19, "grad_norm": 0.9018574953079224, "learning_rate": 9.350332229333276e-06, "loss": 0.607, "step": 2986 }, { "epoch": 0.19, "grad_norm": 0.8946293592453003, "learning_rate": 9.349826390108823e-06, "loss": 0.6058, "step": 2987 }, { "epoch": 0.19, "grad_norm": 0.8331573605537415, "learning_rate": 9.349320367728312e-06, "loss": 0.5638, "step": 2988 }, { "epoch": 0.19, "grad_norm": 0.8785397410392761, "learning_rate": 9.34881416221305e-06, "loss": 0.6192, "step": 2989 }, { "epoch": 0.19, "grad_norm": 0.9098055958747864, "learning_rate": 9.348307773584351e-06, "loss": 0.6112, "step": 2990 }, { "epoch": 0.19, "grad_norm": 0.9168455600738525, "learning_rate": 9.34780120186354e-06, "loss": 0.6383, "step": 2991 }, { "epoch": 0.19, "grad_norm": 0.9003625512123108, "learning_rate": 9.347294447071945e-06, "loss": 0.6049, "step": 2992 }, { "epoch": 0.19, "grad_norm": 0.9203317165374756, "learning_rate": 9.346787509230903e-06, "loss": 0.6291, "step": 2993 }, { "epoch": 0.19, "grad_norm": 0.8706764578819275, "learning_rate": 9.346280388361761e-06, "loss": 0.6065, "step": 2994 }, { "epoch": 0.19, "grad_norm": 0.8694605827331543, "learning_rate": 9.34577308448587e-06, "loss": 0.6128, "step": 2995 }, { "epoch": 0.19, "grad_norm": 0.8985933065414429, "learning_rate": 9.345265597624595e-06, "loss": 0.6279, "step": 2996 }, { "epoch": 0.19, "grad_norm": 0.9904145002365112, "learning_rate": 9.344757927799299e-06, "loss": 0.6427, "step": 2997 }, { "epoch": 0.19, "grad_norm": 0.943004846572876, "learning_rate": 9.344250075031362e-06, "loss": 0.6155, "step": 2998 }, { "epoch": 0.19, "grad_norm": 0.8913044929504395, "learning_rate": 9.343742039342168e-06, "loss": 0.5856, "step": 2999 }, { "epoch": 0.19, "grad_norm": 0.8846127986907959, "learning_rate": 9.343233820753107e-06, "loss": 0.6393, "step": 3000 }, { "epoch": 0.19, "grad_norm": 0.8753595948219299, "learning_rate": 9.34272541928558e-06, "loss": 0.6275, "step": 3001 }, { "epoch": 0.19, "grad_norm": 0.9006514549255371, "learning_rate": 9.34221683496099e-06, "loss": 0.5802, "step": 3002 }, { "epoch": 0.19, "grad_norm": 0.9199402332305908, "learning_rate": 9.341708067800757e-06, "loss": 0.5987, "step": 3003 }, { "epoch": 0.19, "grad_norm": 0.9146287441253662, "learning_rate": 9.341199117826298e-06, "loss": 0.6328, "step": 3004 }, { "epoch": 0.19, "grad_norm": 0.8784115314483643, "learning_rate": 9.340689985059048e-06, "loss": 0.6419, "step": 3005 }, { "epoch": 0.19, "grad_norm": 0.8956212997436523, "learning_rate": 9.340180669520443e-06, "loss": 0.6327, "step": 3006 }, { "epoch": 0.19, "grad_norm": 0.9466882944107056, "learning_rate": 9.339671171231929e-06, "loss": 0.6233, "step": 3007 }, { "epoch": 0.19, "grad_norm": 0.8755168318748474, "learning_rate": 9.339161490214957e-06, "loss": 0.6351, "step": 3008 }, { "epoch": 0.19, "grad_norm": 0.8940410614013672, "learning_rate": 9.33865162649099e-06, "loss": 0.6076, "step": 3009 }, { "epoch": 0.19, "grad_norm": 0.833020031452179, "learning_rate": 9.338141580081496e-06, "loss": 0.5649, "step": 3010 }, { "epoch": 0.19, "grad_norm": 0.950567364692688, "learning_rate": 9.337631351007953e-06, "loss": 0.5768, "step": 3011 }, { "epoch": 0.19, "grad_norm": 0.9161326289176941, "learning_rate": 9.337120939291842e-06, "loss": 0.6158, "step": 3012 }, { "epoch": 0.19, "grad_norm": 0.9443663954734802, "learning_rate": 9.336610344954656e-06, "loss": 0.5929, "step": 3013 }, { "epoch": 0.19, "grad_norm": 0.9030787348747253, "learning_rate": 9.336099568017895e-06, "loss": 0.583, "step": 3014 }, { "epoch": 0.19, "grad_norm": 0.984470546245575, "learning_rate": 9.335588608503065e-06, "loss": 0.6265, "step": 3015 }, { "epoch": 0.19, "grad_norm": 0.9294076561927795, "learning_rate": 9.33507746643168e-06, "loss": 0.6035, "step": 3016 }, { "epoch": 0.19, "grad_norm": 0.8897981643676758, "learning_rate": 9.334566141825266e-06, "loss": 0.649, "step": 3017 }, { "epoch": 0.19, "grad_norm": 0.8603422045707703, "learning_rate": 9.334054634705347e-06, "loss": 0.5839, "step": 3018 }, { "epoch": 0.19, "grad_norm": 0.9341859817504883, "learning_rate": 9.333542945093468e-06, "loss": 0.6234, "step": 3019 }, { "epoch": 0.19, "grad_norm": 0.885899007320404, "learning_rate": 9.333031073011169e-06, "loss": 0.6592, "step": 3020 }, { "epoch": 0.19, "grad_norm": 0.9095667004585266, "learning_rate": 9.332519018480005e-06, "loss": 0.6809, "step": 3021 }, { "epoch": 0.19, "grad_norm": 0.8997942805290222, "learning_rate": 9.332006781521537e-06, "loss": 0.5953, "step": 3022 }, { "epoch": 0.19, "grad_norm": 0.8883410692214966, "learning_rate": 9.331494362157335e-06, "loss": 0.5812, "step": 3023 }, { "epoch": 0.19, "grad_norm": 0.9656973481178284, "learning_rate": 9.330981760408972e-06, "loss": 0.6112, "step": 3024 }, { "epoch": 0.19, "grad_norm": 0.862815260887146, "learning_rate": 9.330468976298033e-06, "loss": 0.6099, "step": 3025 }, { "epoch": 0.19, "grad_norm": 0.9055874347686768, "learning_rate": 9.329956009846111e-06, "loss": 0.6427, "step": 3026 }, { "epoch": 0.19, "grad_norm": 0.9218257665634155, "learning_rate": 9.329442861074803e-06, "loss": 0.6451, "step": 3027 }, { "epoch": 0.19, "grad_norm": 0.8925780057907104, "learning_rate": 9.328929530005717e-06, "loss": 0.6358, "step": 3028 }, { "epoch": 0.19, "grad_norm": 0.9461687803268433, "learning_rate": 9.328416016660471e-06, "loss": 0.6319, "step": 3029 }, { "epoch": 0.19, "grad_norm": 0.8816470503807068, "learning_rate": 9.327902321060681e-06, "loss": 0.5662, "step": 3030 }, { "epoch": 0.19, "grad_norm": 0.8994545936584473, "learning_rate": 9.327388443227981e-06, "loss": 0.6594, "step": 3031 }, { "epoch": 0.19, "grad_norm": 0.9947099685668945, "learning_rate": 9.326874383184006e-06, "loss": 0.6412, "step": 3032 }, { "epoch": 0.19, "grad_norm": 0.9207108020782471, "learning_rate": 9.326360140950406e-06, "loss": 0.6029, "step": 3033 }, { "epoch": 0.19, "grad_norm": 0.9070324897766113, "learning_rate": 9.325845716548827e-06, "loss": 0.648, "step": 3034 }, { "epoch": 0.19, "grad_norm": 0.959884762763977, "learning_rate": 9.325331110000937e-06, "loss": 0.6598, "step": 3035 }, { "epoch": 0.19, "grad_norm": 0.9339284896850586, "learning_rate": 9.324816321328398e-06, "loss": 0.6646, "step": 3036 }, { "epoch": 0.19, "grad_norm": 0.852982759475708, "learning_rate": 9.324301350552889e-06, "loss": 0.6305, "step": 3037 }, { "epoch": 0.19, "grad_norm": 0.9116036891937256, "learning_rate": 9.323786197696094e-06, "loss": 0.6283, "step": 3038 }, { "epoch": 0.19, "grad_norm": 0.9738210439682007, "learning_rate": 9.323270862779704e-06, "loss": 0.6546, "step": 3039 }, { "epoch": 0.19, "grad_norm": 0.8786873817443848, "learning_rate": 9.322755345825418e-06, "loss": 0.5488, "step": 3040 }, { "epoch": 0.19, "grad_norm": 0.891937255859375, "learning_rate": 9.32223964685494e-06, "loss": 0.6073, "step": 3041 }, { "epoch": 0.19, "grad_norm": 0.9519621133804321, "learning_rate": 9.321723765889987e-06, "loss": 0.6103, "step": 3042 }, { "epoch": 0.19, "grad_norm": 0.9369633197784424, "learning_rate": 9.321207702952281e-06, "loss": 0.6551, "step": 3043 }, { "epoch": 0.19, "grad_norm": 1.0724352598190308, "learning_rate": 9.320691458063552e-06, "loss": 0.644, "step": 3044 }, { "epoch": 0.19, "grad_norm": 0.8826418519020081, "learning_rate": 9.320175031245535e-06, "loss": 0.6231, "step": 3045 }, { "epoch": 0.19, "grad_norm": 0.9791775345802307, "learning_rate": 9.319658422519977e-06, "loss": 0.6646, "step": 3046 }, { "epoch": 0.19, "grad_norm": 1.0244020223617554, "learning_rate": 9.319141631908628e-06, "loss": 0.6662, "step": 3047 }, { "epoch": 0.19, "grad_norm": 0.8816352486610413, "learning_rate": 9.318624659433254e-06, "loss": 0.6291, "step": 3048 }, { "epoch": 0.19, "grad_norm": 0.9350719451904297, "learning_rate": 9.318107505115615e-06, "loss": 0.6312, "step": 3049 }, { "epoch": 0.19, "grad_norm": 0.8697081208229065, "learning_rate": 9.317590168977492e-06, "loss": 0.6504, "step": 3050 }, { "epoch": 0.19, "grad_norm": 0.9508548378944397, "learning_rate": 9.317072651040666e-06, "loss": 0.6127, "step": 3051 }, { "epoch": 0.19, "grad_norm": 0.9957895278930664, "learning_rate": 9.31655495132693e-06, "loss": 0.6096, "step": 3052 }, { "epoch": 0.19, "grad_norm": 0.906047523021698, "learning_rate": 9.31603706985808e-06, "loss": 0.6054, "step": 3053 }, { "epoch": 0.19, "grad_norm": 0.9574893116950989, "learning_rate": 9.315519006655925e-06, "loss": 0.6771, "step": 3054 }, { "epoch": 0.19, "grad_norm": 0.8845919966697693, "learning_rate": 9.315000761742276e-06, "loss": 0.6098, "step": 3055 }, { "epoch": 0.19, "grad_norm": 0.9109580516815186, "learning_rate": 9.314482335138954e-06, "loss": 0.6058, "step": 3056 }, { "epoch": 0.19, "grad_norm": 0.8939434289932251, "learning_rate": 9.313963726867793e-06, "loss": 0.6442, "step": 3057 }, { "epoch": 0.19, "grad_norm": 0.9312341213226318, "learning_rate": 9.313444936950626e-06, "loss": 0.5768, "step": 3058 }, { "epoch": 0.19, "grad_norm": 0.9663771986961365, "learning_rate": 9.312925965409297e-06, "loss": 0.6526, "step": 3059 }, { "epoch": 0.19, "grad_norm": 1.0164662599563599, "learning_rate": 9.312406812265659e-06, "loss": 0.6253, "step": 3060 }, { "epoch": 0.19, "grad_norm": 0.8911099433898926, "learning_rate": 9.311887477541574e-06, "loss": 0.6088, "step": 3061 }, { "epoch": 0.19, "grad_norm": 0.8796306848526001, "learning_rate": 9.311367961258906e-06, "loss": 0.6459, "step": 3062 }, { "epoch": 0.19, "grad_norm": 0.8623282313346863, "learning_rate": 9.31084826343953e-06, "loss": 0.6009, "step": 3063 }, { "epoch": 0.19, "grad_norm": 0.9488338828086853, "learning_rate": 9.310328384105331e-06, "loss": 0.6573, "step": 3064 }, { "epoch": 0.19, "grad_norm": 0.9341808557510376, "learning_rate": 9.309808323278199e-06, "loss": 0.5932, "step": 3065 }, { "epoch": 0.19, "grad_norm": 0.8781132698059082, "learning_rate": 9.30928808098003e-06, "loss": 0.6225, "step": 3066 }, { "epoch": 0.19, "grad_norm": 0.9114009737968445, "learning_rate": 9.308767657232733e-06, "loss": 0.6765, "step": 3067 }, { "epoch": 0.19, "grad_norm": 0.9022600650787354, "learning_rate": 9.308247052058217e-06, "loss": 0.6255, "step": 3068 }, { "epoch": 0.19, "grad_norm": 0.9390726089477539, "learning_rate": 9.307726265478405e-06, "loss": 0.6543, "step": 3069 }, { "epoch": 0.19, "grad_norm": 0.8956183791160583, "learning_rate": 9.307205297515225e-06, "loss": 0.6966, "step": 3070 }, { "epoch": 0.19, "grad_norm": 0.9602479338645935, "learning_rate": 9.306684148190616e-06, "loss": 0.6754, "step": 3071 }, { "epoch": 0.19, "grad_norm": 0.885344922542572, "learning_rate": 9.306162817526519e-06, "loss": 0.6952, "step": 3072 }, { "epoch": 0.19, "grad_norm": 0.8164680004119873, "learning_rate": 9.305641305544884e-06, "loss": 0.5571, "step": 3073 }, { "epoch": 0.19, "grad_norm": 0.8812573552131653, "learning_rate": 9.305119612267673e-06, "loss": 0.6224, "step": 3074 }, { "epoch": 0.19, "grad_norm": 0.9333205819129944, "learning_rate": 9.30459773771685e-06, "loss": 0.66, "step": 3075 }, { "epoch": 0.19, "grad_norm": 0.9016597270965576, "learning_rate": 9.304075681914392e-06, "loss": 0.5679, "step": 3076 }, { "epoch": 0.19, "grad_norm": 0.8855369687080383, "learning_rate": 9.30355344488228e-06, "loss": 0.5928, "step": 3077 }, { "epoch": 0.2, "grad_norm": 0.885812520980835, "learning_rate": 9.303031026642504e-06, "loss": 0.5906, "step": 3078 }, { "epoch": 0.2, "grad_norm": 0.8560954332351685, "learning_rate": 9.302508427217059e-06, "loss": 0.6125, "step": 3079 }, { "epoch": 0.2, "grad_norm": 0.9503233432769775, "learning_rate": 9.301985646627953e-06, "loss": 0.6551, "step": 3080 }, { "epoch": 0.2, "grad_norm": 0.8623626828193665, "learning_rate": 9.301462684897195e-06, "loss": 0.613, "step": 3081 }, { "epoch": 0.2, "grad_norm": 0.9079574346542358, "learning_rate": 9.300939542046808e-06, "loss": 0.601, "step": 3082 }, { "epoch": 0.2, "grad_norm": 0.950981616973877, "learning_rate": 9.30041621809882e-06, "loss": 0.6574, "step": 3083 }, { "epoch": 0.2, "grad_norm": 0.9608682990074158, "learning_rate": 9.299892713075263e-06, "loss": 0.6698, "step": 3084 }, { "epoch": 0.2, "grad_norm": 0.9275756478309631, "learning_rate": 9.299369026998184e-06, "loss": 0.6307, "step": 3085 }, { "epoch": 0.2, "grad_norm": 0.9137438535690308, "learning_rate": 9.298845159889632e-06, "loss": 0.612, "step": 3086 }, { "epoch": 0.2, "grad_norm": 0.9493110179901123, "learning_rate": 9.298321111771664e-06, "loss": 0.6554, "step": 3087 }, { "epoch": 0.2, "grad_norm": 0.9397709369659424, "learning_rate": 9.297796882666346e-06, "loss": 0.6389, "step": 3088 }, { "epoch": 0.2, "grad_norm": 0.9529610276222229, "learning_rate": 9.297272472595753e-06, "loss": 0.6638, "step": 3089 }, { "epoch": 0.2, "grad_norm": 0.8715389370918274, "learning_rate": 9.296747881581965e-06, "loss": 0.6121, "step": 3090 }, { "epoch": 0.2, "grad_norm": 0.8829297423362732, "learning_rate": 9.29622310964707e-06, "loss": 0.6215, "step": 3091 }, { "epoch": 0.2, "grad_norm": 0.9472043514251709, "learning_rate": 9.295698156813167e-06, "loss": 0.6624, "step": 3092 }, { "epoch": 0.2, "grad_norm": 1.0828962326049805, "learning_rate": 9.295173023102358e-06, "loss": 0.6576, "step": 3093 }, { "epoch": 0.2, "grad_norm": 1.0033841133117676, "learning_rate": 9.294647708536754e-06, "loss": 0.6569, "step": 3094 }, { "epoch": 0.2, "grad_norm": 0.9431530237197876, "learning_rate": 9.294122213138475e-06, "loss": 0.5798, "step": 3095 }, { "epoch": 0.2, "grad_norm": 0.951475977897644, "learning_rate": 9.29359653692965e-06, "loss": 0.6196, "step": 3096 }, { "epoch": 0.2, "grad_norm": 0.9203341007232666, "learning_rate": 9.293070679932407e-06, "loss": 0.6916, "step": 3097 }, { "epoch": 0.2, "grad_norm": 0.9140121936798096, "learning_rate": 9.292544642168896e-06, "loss": 0.5716, "step": 3098 }, { "epoch": 0.2, "grad_norm": 0.8884128928184509, "learning_rate": 9.292018423661261e-06, "loss": 0.6084, "step": 3099 }, { "epoch": 0.2, "grad_norm": 0.9457247257232666, "learning_rate": 9.291492024431661e-06, "loss": 0.6486, "step": 3100 }, { "epoch": 0.2, "grad_norm": 0.9811872839927673, "learning_rate": 9.290965444502263e-06, "loss": 0.6772, "step": 3101 }, { "epoch": 0.2, "grad_norm": 0.8778002858161926, "learning_rate": 9.290438683895236e-06, "loss": 0.6258, "step": 3102 }, { "epoch": 0.2, "grad_norm": 0.962437093257904, "learning_rate": 9.28991174263276e-06, "loss": 0.6443, "step": 3103 }, { "epoch": 0.2, "grad_norm": 0.961173415184021, "learning_rate": 9.289384620737025e-06, "loss": 0.6779, "step": 3104 }, { "epoch": 0.2, "grad_norm": 0.9411439299583435, "learning_rate": 9.288857318230225e-06, "loss": 0.6218, "step": 3105 }, { "epoch": 0.2, "grad_norm": 0.9473131895065308, "learning_rate": 9.288329835134563e-06, "loss": 0.6684, "step": 3106 }, { "epoch": 0.2, "grad_norm": 0.8709444999694824, "learning_rate": 9.28780217147225e-06, "loss": 0.5879, "step": 3107 }, { "epoch": 0.2, "grad_norm": 0.902916431427002, "learning_rate": 9.287274327265505e-06, "loss": 0.64, "step": 3108 }, { "epoch": 0.2, "grad_norm": 0.9012413024902344, "learning_rate": 9.286746302536551e-06, "loss": 0.6239, "step": 3109 }, { "epoch": 0.2, "grad_norm": 0.9633440971374512, "learning_rate": 9.286218097307623e-06, "loss": 0.6198, "step": 3110 }, { "epoch": 0.2, "grad_norm": 0.9164153337478638, "learning_rate": 9.285689711600961e-06, "loss": 0.6444, "step": 3111 }, { "epoch": 0.2, "grad_norm": 0.9006356000900269, "learning_rate": 9.285161145438815e-06, "loss": 0.6407, "step": 3112 }, { "epoch": 0.2, "grad_norm": 0.8857513666152954, "learning_rate": 9.284632398843439e-06, "loss": 0.5797, "step": 3113 }, { "epoch": 0.2, "grad_norm": 0.9555025696754456, "learning_rate": 9.284103471837097e-06, "loss": 0.644, "step": 3114 }, { "epoch": 0.2, "grad_norm": 0.9066913723945618, "learning_rate": 9.283574364442066e-06, "loss": 0.5744, "step": 3115 }, { "epoch": 0.2, "grad_norm": 0.9738790392875671, "learning_rate": 9.283045076680614e-06, "loss": 0.6821, "step": 3116 }, { "epoch": 0.2, "grad_norm": 0.9615656733512878, "learning_rate": 9.282515608575038e-06, "loss": 0.6394, "step": 3117 }, { "epoch": 0.2, "grad_norm": 0.9014465808868408, "learning_rate": 9.281985960147625e-06, "loss": 0.621, "step": 3118 }, { "epoch": 0.2, "grad_norm": 0.8829550743103027, "learning_rate": 9.28145613142068e-06, "loss": 0.6598, "step": 3119 }, { "epoch": 0.2, "grad_norm": 0.9063881039619446, "learning_rate": 9.28092612241651e-06, "loss": 0.6228, "step": 3120 }, { "epoch": 0.2, "grad_norm": 0.9000431895256042, "learning_rate": 9.280395933157436e-06, "loss": 0.5869, "step": 3121 }, { "epoch": 0.2, "grad_norm": 0.8334502577781677, "learning_rate": 9.279865563665778e-06, "loss": 0.593, "step": 3122 }, { "epoch": 0.2, "grad_norm": 0.8414939045906067, "learning_rate": 9.27933501396387e-06, "loss": 0.5866, "step": 3123 }, { "epoch": 0.2, "grad_norm": 0.8854286670684814, "learning_rate": 9.27880428407405e-06, "loss": 0.639, "step": 3124 }, { "epoch": 0.2, "grad_norm": 0.9396377801895142, "learning_rate": 9.278273374018669e-06, "loss": 0.6313, "step": 3125 }, { "epoch": 0.2, "grad_norm": 0.9234236478805542, "learning_rate": 9.277742283820077e-06, "loss": 0.6578, "step": 3126 }, { "epoch": 0.2, "grad_norm": 0.9088654518127441, "learning_rate": 9.27721101350064e-06, "loss": 0.6698, "step": 3127 }, { "epoch": 0.2, "grad_norm": 0.8841193318367004, "learning_rate": 9.276679563082726e-06, "loss": 0.6511, "step": 3128 }, { "epoch": 0.2, "grad_norm": 0.8056107759475708, "learning_rate": 9.276147932588712e-06, "loss": 0.573, "step": 3129 }, { "epoch": 0.2, "grad_norm": 0.8983877301216125, "learning_rate": 9.275616122040985e-06, "loss": 0.5889, "step": 3130 }, { "epoch": 0.2, "grad_norm": 0.9186437129974365, "learning_rate": 9.275084131461938e-06, "loss": 0.6494, "step": 3131 }, { "epoch": 0.2, "grad_norm": 0.9824482202529907, "learning_rate": 9.27455196087397e-06, "loss": 0.6029, "step": 3132 }, { "epoch": 0.2, "grad_norm": 0.8984844088554382, "learning_rate": 9.274019610299487e-06, "loss": 0.6178, "step": 3133 }, { "epoch": 0.2, "grad_norm": 0.8482160568237305, "learning_rate": 9.273487079760908e-06, "loss": 0.5949, "step": 3134 }, { "epoch": 0.2, "grad_norm": 0.9106261134147644, "learning_rate": 9.272954369280654e-06, "loss": 0.6198, "step": 3135 }, { "epoch": 0.2, "grad_norm": 0.8580856323242188, "learning_rate": 9.272421478881158e-06, "loss": 0.6239, "step": 3136 }, { "epoch": 0.2, "grad_norm": 0.9911568760871887, "learning_rate": 9.271888408584852e-06, "loss": 0.6535, "step": 3137 }, { "epoch": 0.2, "grad_norm": 0.8783669471740723, "learning_rate": 9.27135515841419e-06, "loss": 0.6204, "step": 3138 }, { "epoch": 0.2, "grad_norm": 0.8823960423469543, "learning_rate": 9.27082172839162e-06, "loss": 0.6155, "step": 3139 }, { "epoch": 0.2, "grad_norm": 0.9094551801681519, "learning_rate": 9.270288118539603e-06, "loss": 0.636, "step": 3140 }, { "epoch": 0.2, "grad_norm": 0.906217098236084, "learning_rate": 9.26975432888061e-06, "loss": 0.5808, "step": 3141 }, { "epoch": 0.2, "grad_norm": 0.8561161160469055, "learning_rate": 9.269220359437114e-06, "loss": 0.5757, "step": 3142 }, { "epoch": 0.2, "grad_norm": 0.9296371340751648, "learning_rate": 9.2686862102316e-06, "loss": 0.6503, "step": 3143 }, { "epoch": 0.2, "grad_norm": 0.9102144837379456, "learning_rate": 9.268151881286561e-06, "loss": 0.6316, "step": 3144 }, { "epoch": 0.2, "grad_norm": 0.8436759114265442, "learning_rate": 9.267617372624494e-06, "loss": 0.6341, "step": 3145 }, { "epoch": 0.2, "grad_norm": 0.9814794659614563, "learning_rate": 9.267082684267905e-06, "loss": 0.6439, "step": 3146 }, { "epoch": 0.2, "grad_norm": 0.968041181564331, "learning_rate": 9.266547816239309e-06, "loss": 0.6738, "step": 3147 }, { "epoch": 0.2, "grad_norm": 0.9353750348091125, "learning_rate": 9.266012768561225e-06, "loss": 0.6825, "step": 3148 }, { "epoch": 0.2, "grad_norm": 0.9027935266494751, "learning_rate": 9.265477541256184e-06, "loss": 0.6578, "step": 3149 }, { "epoch": 0.2, "grad_norm": 0.9193140864372253, "learning_rate": 9.264942134346723e-06, "loss": 0.6061, "step": 3150 }, { "epoch": 0.2, "grad_norm": 0.8780162930488586, "learning_rate": 9.264406547855386e-06, "loss": 0.5997, "step": 3151 }, { "epoch": 0.2, "grad_norm": 0.9127413630485535, "learning_rate": 9.263870781804723e-06, "loss": 0.6124, "step": 3152 }, { "epoch": 0.2, "grad_norm": 0.9376271963119507, "learning_rate": 9.263334836217295e-06, "loss": 0.5963, "step": 3153 }, { "epoch": 0.2, "grad_norm": 0.8163601160049438, "learning_rate": 9.262798711115667e-06, "loss": 0.626, "step": 3154 }, { "epoch": 0.2, "grad_norm": 0.8610231280326843, "learning_rate": 9.262262406522415e-06, "loss": 0.5428, "step": 3155 }, { "epoch": 0.2, "grad_norm": 0.950401782989502, "learning_rate": 9.261725922460121e-06, "loss": 0.6314, "step": 3156 }, { "epoch": 0.2, "grad_norm": 0.9740757942199707, "learning_rate": 9.261189258951372e-06, "loss": 0.637, "step": 3157 }, { "epoch": 0.2, "grad_norm": 0.9891514778137207, "learning_rate": 9.26065241601877e-06, "loss": 0.6871, "step": 3158 }, { "epoch": 0.2, "grad_norm": 0.8673012852668762, "learning_rate": 9.260115393684914e-06, "loss": 0.6357, "step": 3159 }, { "epoch": 0.2, "grad_norm": 0.9474377036094666, "learning_rate": 9.25957819197242e-06, "loss": 0.616, "step": 3160 }, { "epoch": 0.2, "grad_norm": 0.8863465189933777, "learning_rate": 9.259040810903906e-06, "loss": 0.6046, "step": 3161 }, { "epoch": 0.2, "grad_norm": 0.8935105800628662, "learning_rate": 9.258503250501998e-06, "loss": 0.6472, "step": 3162 }, { "epoch": 0.2, "grad_norm": 0.9094743728637695, "learning_rate": 9.257965510789334e-06, "loss": 0.5834, "step": 3163 }, { "epoch": 0.2, "grad_norm": 0.9533581137657166, "learning_rate": 9.257427591788555e-06, "loss": 0.6631, "step": 3164 }, { "epoch": 0.2, "grad_norm": 0.8987277746200562, "learning_rate": 9.25688949352231e-06, "loss": 0.6267, "step": 3165 }, { "epoch": 0.2, "grad_norm": 0.8459535241127014, "learning_rate": 9.256351216013257e-06, "loss": 0.6366, "step": 3166 }, { "epoch": 0.2, "grad_norm": 0.8984457850456238, "learning_rate": 9.255812759284062e-06, "loss": 0.5716, "step": 3167 }, { "epoch": 0.2, "grad_norm": 0.8287543654441833, "learning_rate": 9.255274123357396e-06, "loss": 0.5677, "step": 3168 }, { "epoch": 0.2, "grad_norm": 0.9328951239585876, "learning_rate": 9.254735308255937e-06, "loss": 0.7044, "step": 3169 }, { "epoch": 0.2, "grad_norm": 0.9265501499176025, "learning_rate": 9.254196314002379e-06, "loss": 0.625, "step": 3170 }, { "epoch": 0.2, "grad_norm": 0.959682285785675, "learning_rate": 9.253657140619412e-06, "loss": 0.6506, "step": 3171 }, { "epoch": 0.2, "grad_norm": 0.9735859036445618, "learning_rate": 9.25311778812974e-06, "loss": 0.6281, "step": 3172 }, { "epoch": 0.2, "grad_norm": 0.9741908311843872, "learning_rate": 9.252578256556075e-06, "loss": 0.6645, "step": 3173 }, { "epoch": 0.2, "grad_norm": 0.9076485633850098, "learning_rate": 9.252038545921131e-06, "loss": 0.5691, "step": 3174 }, { "epoch": 0.2, "grad_norm": 0.9652928113937378, "learning_rate": 9.251498656247636e-06, "loss": 0.6645, "step": 3175 }, { "epoch": 0.2, "grad_norm": 0.9393512010574341, "learning_rate": 9.250958587558326e-06, "loss": 0.6011, "step": 3176 }, { "epoch": 0.2, "grad_norm": 0.9639145731925964, "learning_rate": 9.250418339875934e-06, "loss": 0.6379, "step": 3177 }, { "epoch": 0.2, "grad_norm": 0.8793298602104187, "learning_rate": 9.249877913223213e-06, "loss": 0.6104, "step": 3178 }, { "epoch": 0.2, "grad_norm": 0.8683106899261475, "learning_rate": 9.249337307622916e-06, "loss": 0.62, "step": 3179 }, { "epoch": 0.2, "grad_norm": 0.9256559014320374, "learning_rate": 9.24879652309781e-06, "loss": 0.642, "step": 3180 }, { "epoch": 0.2, "grad_norm": 0.8257124423980713, "learning_rate": 9.248255559670661e-06, "loss": 0.5951, "step": 3181 }, { "epoch": 0.2, "grad_norm": 0.8258576989173889, "learning_rate": 9.247714417364251e-06, "loss": 0.6086, "step": 3182 }, { "epoch": 0.2, "grad_norm": 0.8258581161499023, "learning_rate": 9.24717309620136e-06, "loss": 0.576, "step": 3183 }, { "epoch": 0.2, "grad_norm": 0.9140012860298157, "learning_rate": 9.246631596204788e-06, "loss": 0.5943, "step": 3184 }, { "epoch": 0.2, "grad_norm": 0.9424448609352112, "learning_rate": 9.246089917397332e-06, "loss": 0.6434, "step": 3185 }, { "epoch": 0.2, "grad_norm": 0.9118272066116333, "learning_rate": 9.2455480598018e-06, "loss": 0.6748, "step": 3186 }, { "epoch": 0.2, "grad_norm": 0.9356390833854675, "learning_rate": 9.245006023441008e-06, "loss": 0.6076, "step": 3187 }, { "epoch": 0.2, "grad_norm": 0.9224506616592407, "learning_rate": 9.24446380833778e-06, "loss": 0.6892, "step": 3188 }, { "epoch": 0.2, "grad_norm": 0.8446199893951416, "learning_rate": 9.243921414514947e-06, "loss": 0.5628, "step": 3189 }, { "epoch": 0.2, "grad_norm": 0.8842172026634216, "learning_rate": 9.243378841995346e-06, "loss": 0.5721, "step": 3190 }, { "epoch": 0.2, "grad_norm": 0.9054396748542786, "learning_rate": 9.242836090801823e-06, "loss": 0.6142, "step": 3191 }, { "epoch": 0.2, "grad_norm": 0.832400918006897, "learning_rate": 9.242293160957231e-06, "loss": 0.6044, "step": 3192 }, { "epoch": 0.2, "grad_norm": 0.9510114789009094, "learning_rate": 9.241750052484435e-06, "loss": 0.6215, "step": 3193 }, { "epoch": 0.2, "grad_norm": 0.9344449043273926, "learning_rate": 9.241206765406298e-06, "loss": 0.5672, "step": 3194 }, { "epoch": 0.2, "grad_norm": 0.9397872090339661, "learning_rate": 9.2406632997457e-06, "loss": 0.5928, "step": 3195 }, { "epoch": 0.2, "grad_norm": 0.9468801021575928, "learning_rate": 9.240119655525522e-06, "loss": 0.6609, "step": 3196 }, { "epoch": 0.2, "grad_norm": 0.9130421280860901, "learning_rate": 9.239575832768655e-06, "loss": 0.6606, "step": 3197 }, { "epoch": 0.2, "grad_norm": 0.8960924744606018, "learning_rate": 9.239031831498e-06, "loss": 0.588, "step": 3198 }, { "epoch": 0.2, "grad_norm": 0.9796780347824097, "learning_rate": 9.238487651736458e-06, "loss": 0.6605, "step": 3199 }, { "epoch": 0.2, "grad_norm": 0.9915714859962463, "learning_rate": 9.237943293506948e-06, "loss": 0.6517, "step": 3200 }, { "epoch": 0.2, "grad_norm": 0.8834147453308105, "learning_rate": 9.237398756832387e-06, "loss": 0.6094, "step": 3201 }, { "epoch": 0.2, "grad_norm": 0.9595925211906433, "learning_rate": 9.236854041735706e-06, "loss": 0.5947, "step": 3202 }, { "epoch": 0.2, "grad_norm": 0.9719516038894653, "learning_rate": 9.236309148239839e-06, "loss": 0.6186, "step": 3203 }, { "epoch": 0.2, "grad_norm": 0.912463366985321, "learning_rate": 9.235764076367732e-06, "loss": 0.5836, "step": 3204 }, { "epoch": 0.2, "grad_norm": 0.9614611864089966, "learning_rate": 9.235218826142337e-06, "loss": 0.6131, "step": 3205 }, { "epoch": 0.2, "grad_norm": 0.8435421586036682, "learning_rate": 9.234673397586606e-06, "loss": 0.6088, "step": 3206 }, { "epoch": 0.2, "grad_norm": 0.9031780958175659, "learning_rate": 9.234127790723512e-06, "loss": 0.6154, "step": 3207 }, { "epoch": 0.2, "grad_norm": 0.9034252166748047, "learning_rate": 9.233582005576028e-06, "loss": 0.5843, "step": 3208 }, { "epoch": 0.2, "grad_norm": 0.912809431552887, "learning_rate": 9.233036042167131e-06, "loss": 0.634, "step": 3209 }, { "epoch": 0.2, "grad_norm": 0.924806535243988, "learning_rate": 9.232489900519812e-06, "loss": 0.6497, "step": 3210 }, { "epoch": 0.2, "grad_norm": 0.9530941247940063, "learning_rate": 9.231943580657069e-06, "loss": 0.6562, "step": 3211 }, { "epoch": 0.2, "grad_norm": 0.8763086795806885, "learning_rate": 9.2313970826019e-06, "loss": 0.5798, "step": 3212 }, { "epoch": 0.2, "grad_norm": 0.8387221097946167, "learning_rate": 9.230850406377323e-06, "loss": 0.5948, "step": 3213 }, { "epoch": 0.2, "grad_norm": 0.8941132426261902, "learning_rate": 9.230303552006352e-06, "loss": 0.5912, "step": 3214 }, { "epoch": 0.2, "grad_norm": 0.9189191460609436, "learning_rate": 9.229756519512014e-06, "loss": 0.6402, "step": 3215 }, { "epoch": 0.2, "grad_norm": 0.8971881866455078, "learning_rate": 9.229209308917343e-06, "loss": 0.6072, "step": 3216 }, { "epoch": 0.2, "grad_norm": 0.910284161567688, "learning_rate": 9.228661920245383e-06, "loss": 0.5816, "step": 3217 }, { "epoch": 0.2, "grad_norm": 0.8809064626693726, "learning_rate": 9.22811435351918e-06, "loss": 0.5952, "step": 3218 }, { "epoch": 0.2, "grad_norm": 0.9248557686805725, "learning_rate": 9.227566608761786e-06, "loss": 0.6442, "step": 3219 }, { "epoch": 0.2, "grad_norm": 0.9311677813529968, "learning_rate": 9.227018685996272e-06, "loss": 0.6215, "step": 3220 }, { "epoch": 0.2, "grad_norm": 0.8465821146965027, "learning_rate": 9.226470585245706e-06, "loss": 0.6053, "step": 3221 }, { "epoch": 0.2, "grad_norm": 0.9327176213264465, "learning_rate": 9.225922306533164e-06, "loss": 0.6709, "step": 3222 }, { "epoch": 0.2, "grad_norm": 0.8878608345985413, "learning_rate": 9.225373849881739e-06, "loss": 0.6033, "step": 3223 }, { "epoch": 0.2, "grad_norm": 0.8926795721054077, "learning_rate": 9.224825215314515e-06, "loss": 0.6279, "step": 3224 }, { "epoch": 0.2, "grad_norm": 0.9022210240364075, "learning_rate": 9.224276402854601e-06, "loss": 0.641, "step": 3225 }, { "epoch": 0.2, "grad_norm": 0.9377365708351135, "learning_rate": 9.223727412525103e-06, "loss": 0.6236, "step": 3226 }, { "epoch": 0.2, "grad_norm": 0.9372929334640503, "learning_rate": 9.223178244349135e-06, "loss": 0.6389, "step": 3227 }, { "epoch": 0.2, "grad_norm": 0.8741313219070435, "learning_rate": 9.222628898349825e-06, "loss": 0.6063, "step": 3228 }, { "epoch": 0.2, "grad_norm": 0.8348528742790222, "learning_rate": 9.2220793745503e-06, "loss": 0.5962, "step": 3229 }, { "epoch": 0.2, "grad_norm": 0.8601580858230591, "learning_rate": 9.221529672973701e-06, "loss": 0.589, "step": 3230 }, { "epoch": 0.2, "grad_norm": 1.0050134658813477, "learning_rate": 9.220979793643173e-06, "loss": 0.6276, "step": 3231 }, { "epoch": 0.2, "grad_norm": 0.9667968153953552, "learning_rate": 9.220429736581869e-06, "loss": 0.654, "step": 3232 }, { "epoch": 0.2, "grad_norm": 0.9288026690483093, "learning_rate": 9.219879501812952e-06, "loss": 0.5867, "step": 3233 }, { "epoch": 0.2, "grad_norm": 0.9640477895736694, "learning_rate": 9.219329089359588e-06, "loss": 0.6481, "step": 3234 }, { "epoch": 0.2, "grad_norm": 1.0388972759246826, "learning_rate": 9.218778499244953e-06, "loss": 0.6895, "step": 3235 }, { "epoch": 0.21, "grad_norm": 0.9421420693397522, "learning_rate": 9.218227731492234e-06, "loss": 0.6852, "step": 3236 }, { "epoch": 0.21, "grad_norm": 0.9457274079322815, "learning_rate": 9.217676786124616e-06, "loss": 0.6247, "step": 3237 }, { "epoch": 0.21, "grad_norm": 0.9554296135902405, "learning_rate": 9.217125663165303e-06, "loss": 0.6557, "step": 3238 }, { "epoch": 0.21, "grad_norm": 0.930719792842865, "learning_rate": 9.216574362637498e-06, "loss": 0.7073, "step": 3239 }, { "epoch": 0.21, "grad_norm": 0.880737841129303, "learning_rate": 9.216022884564414e-06, "loss": 0.6405, "step": 3240 }, { "epoch": 0.21, "grad_norm": 0.9576687812805176, "learning_rate": 9.215471228969275e-06, "loss": 0.6455, "step": 3241 }, { "epoch": 0.21, "grad_norm": 0.8890754580497742, "learning_rate": 9.214919395875306e-06, "loss": 0.674, "step": 3242 }, { "epoch": 0.21, "grad_norm": 0.8603907823562622, "learning_rate": 9.214367385305744e-06, "loss": 0.5467, "step": 3243 }, { "epoch": 0.21, "grad_norm": 0.925334632396698, "learning_rate": 9.213815197283834e-06, "loss": 0.6226, "step": 3244 }, { "epoch": 0.21, "grad_norm": 0.9949658513069153, "learning_rate": 9.21326283183282e-06, "loss": 0.6078, "step": 3245 }, { "epoch": 0.21, "grad_norm": 0.8808592557907104, "learning_rate": 9.21271028897597e-06, "loss": 0.6003, "step": 3246 }, { "epoch": 0.21, "grad_norm": 0.9511841535568237, "learning_rate": 9.212157568736542e-06, "loss": 0.6048, "step": 3247 }, { "epoch": 0.21, "grad_norm": 0.8486485481262207, "learning_rate": 9.211604671137812e-06, "loss": 0.6194, "step": 3248 }, { "epoch": 0.21, "grad_norm": 0.9710730314254761, "learning_rate": 9.211051596203061e-06, "loss": 0.6914, "step": 3249 }, { "epoch": 0.21, "grad_norm": 0.8839832544326782, "learning_rate": 9.210498343955576e-06, "loss": 0.6243, "step": 3250 }, { "epoch": 0.21, "grad_norm": 0.9419470429420471, "learning_rate": 9.209944914418653e-06, "loss": 0.6847, "step": 3251 }, { "epoch": 0.21, "grad_norm": 0.8645347356796265, "learning_rate": 9.209391307615596e-06, "loss": 0.574, "step": 3252 }, { "epoch": 0.21, "grad_norm": 0.9880130887031555, "learning_rate": 9.208837523569713e-06, "loss": 0.6631, "step": 3253 }, { "epoch": 0.21, "grad_norm": 0.8735252618789673, "learning_rate": 9.208283562304326e-06, "loss": 0.5747, "step": 3254 }, { "epoch": 0.21, "grad_norm": 0.9930894374847412, "learning_rate": 9.207729423842755e-06, "loss": 0.6138, "step": 3255 }, { "epoch": 0.21, "grad_norm": 0.9474650025367737, "learning_rate": 9.207175108208334e-06, "loss": 0.6524, "step": 3256 }, { "epoch": 0.21, "grad_norm": 0.9482831358909607, "learning_rate": 9.20662061542441e-06, "loss": 0.6654, "step": 3257 }, { "epoch": 0.21, "grad_norm": 0.8777074813842773, "learning_rate": 9.206065945514321e-06, "loss": 0.6201, "step": 3258 }, { "epoch": 0.21, "grad_norm": 0.9118297696113586, "learning_rate": 9.20551109850143e-06, "loss": 0.6074, "step": 3259 }, { "epoch": 0.21, "grad_norm": 0.973640501499176, "learning_rate": 9.204956074409095e-06, "loss": 0.6246, "step": 3260 }, { "epoch": 0.21, "grad_norm": 0.9374106526374817, "learning_rate": 9.204400873260688e-06, "loss": 0.6306, "step": 3261 }, { "epoch": 0.21, "grad_norm": 0.8689625859260559, "learning_rate": 9.203845495079587e-06, "loss": 0.6317, "step": 3262 }, { "epoch": 0.21, "grad_norm": 0.8900114893913269, "learning_rate": 9.203289939889175e-06, "loss": 0.6137, "step": 3263 }, { "epoch": 0.21, "grad_norm": 0.862295389175415, "learning_rate": 9.202734207712847e-06, "loss": 0.6081, "step": 3264 }, { "epoch": 0.21, "grad_norm": 0.8732759952545166, "learning_rate": 9.202178298574e-06, "loss": 0.5917, "step": 3265 }, { "epoch": 0.21, "grad_norm": 0.9287835359573364, "learning_rate": 9.201622212496043e-06, "loss": 0.6226, "step": 3266 }, { "epoch": 0.21, "grad_norm": 0.9804710745811462, "learning_rate": 9.201065949502394e-06, "loss": 0.6828, "step": 3267 }, { "epoch": 0.21, "grad_norm": 0.907406210899353, "learning_rate": 9.20050950961647e-06, "loss": 0.6004, "step": 3268 }, { "epoch": 0.21, "grad_norm": 0.870427668094635, "learning_rate": 9.199952892861706e-06, "loss": 0.6121, "step": 3269 }, { "epoch": 0.21, "grad_norm": 0.8811596035957336, "learning_rate": 9.199396099261532e-06, "loss": 0.6258, "step": 3270 }, { "epoch": 0.21, "grad_norm": 0.9226367473602295, "learning_rate": 9.198839128839399e-06, "loss": 0.6473, "step": 3271 }, { "epoch": 0.21, "grad_norm": 0.8732794523239136, "learning_rate": 9.198281981618757e-06, "loss": 0.5947, "step": 3272 }, { "epoch": 0.21, "grad_norm": 0.9539616703987122, "learning_rate": 9.197724657623066e-06, "loss": 0.6034, "step": 3273 }, { "epoch": 0.21, "grad_norm": 0.9064382910728455, "learning_rate": 9.197167156875793e-06, "loss": 0.6329, "step": 3274 }, { "epoch": 0.21, "grad_norm": 0.9909444451332092, "learning_rate": 9.19660947940041e-06, "loss": 0.6264, "step": 3275 }, { "epoch": 0.21, "grad_norm": 0.8617537021636963, "learning_rate": 9.196051625220401e-06, "loss": 0.5953, "step": 3276 }, { "epoch": 0.21, "grad_norm": 0.8831681609153748, "learning_rate": 9.195493594359254e-06, "loss": 0.6043, "step": 3277 }, { "epoch": 0.21, "grad_norm": 0.9826748371124268, "learning_rate": 9.19493538684047e-06, "loss": 0.6699, "step": 3278 }, { "epoch": 0.21, "grad_norm": 0.8774879574775696, "learning_rate": 9.194377002687547e-06, "loss": 0.6173, "step": 3279 }, { "epoch": 0.21, "grad_norm": 0.976276159286499, "learning_rate": 9.193818441924003e-06, "loss": 0.6248, "step": 3280 }, { "epoch": 0.21, "grad_norm": 0.8805941343307495, "learning_rate": 9.19325970457335e-06, "loss": 0.5739, "step": 3281 }, { "epoch": 0.21, "grad_norm": 0.8417159914970398, "learning_rate": 9.192700790659121e-06, "loss": 0.5478, "step": 3282 }, { "epoch": 0.21, "grad_norm": 0.958260178565979, "learning_rate": 9.192141700204844e-06, "loss": 0.7037, "step": 3283 }, { "epoch": 0.21, "grad_norm": 0.8954302668571472, "learning_rate": 9.191582433234067e-06, "loss": 0.6518, "step": 3284 }, { "epoch": 0.21, "grad_norm": 0.8878317475318909, "learning_rate": 9.191022989770332e-06, "loss": 0.6168, "step": 3285 }, { "epoch": 0.21, "grad_norm": 0.9823928475379944, "learning_rate": 9.1904633698372e-06, "loss": 0.6546, "step": 3286 }, { "epoch": 0.21, "grad_norm": 0.8733540177345276, "learning_rate": 9.189903573458234e-06, "loss": 0.6393, "step": 3287 }, { "epoch": 0.21, "grad_norm": 0.9368897080421448, "learning_rate": 9.189343600657002e-06, "loss": 0.6342, "step": 3288 }, { "epoch": 0.21, "grad_norm": 0.9004266858100891, "learning_rate": 9.188783451457086e-06, "loss": 0.6332, "step": 3289 }, { "epoch": 0.21, "grad_norm": 0.8798797130584717, "learning_rate": 9.18822312588207e-06, "loss": 0.6061, "step": 3290 }, { "epoch": 0.21, "grad_norm": 0.8371910452842712, "learning_rate": 9.187662623955548e-06, "loss": 0.5436, "step": 3291 }, { "epoch": 0.21, "grad_norm": 0.9449594616889954, "learning_rate": 9.18710194570112e-06, "loss": 0.629, "step": 3292 }, { "epoch": 0.21, "grad_norm": 0.8512078523635864, "learning_rate": 9.186541091142397e-06, "loss": 0.5525, "step": 3293 }, { "epoch": 0.21, "grad_norm": 0.9958682656288147, "learning_rate": 9.18598006030299e-06, "loss": 0.6585, "step": 3294 }, { "epoch": 0.21, "grad_norm": 0.8955892324447632, "learning_rate": 9.185418853206528e-06, "loss": 0.5793, "step": 3295 }, { "epoch": 0.21, "grad_norm": 1.0014921426773071, "learning_rate": 9.184857469876635e-06, "loss": 0.6248, "step": 3296 }, { "epoch": 0.21, "grad_norm": 0.9238271117210388, "learning_rate": 9.184295910336953e-06, "loss": 0.6186, "step": 3297 }, { "epoch": 0.21, "grad_norm": 0.9311379790306091, "learning_rate": 9.183734174611125e-06, "loss": 0.702, "step": 3298 }, { "epoch": 0.21, "grad_norm": 0.9323460459709167, "learning_rate": 9.183172262722807e-06, "loss": 0.7064, "step": 3299 }, { "epoch": 0.21, "grad_norm": 0.8786803483963013, "learning_rate": 9.182610174695656e-06, "loss": 0.6119, "step": 3300 }, { "epoch": 0.21, "grad_norm": 0.8774591088294983, "learning_rate": 9.182047910553342e-06, "loss": 0.6455, "step": 3301 }, { "epoch": 0.21, "grad_norm": 0.8772428035736084, "learning_rate": 9.181485470319537e-06, "loss": 0.6336, "step": 3302 }, { "epoch": 0.21, "grad_norm": 0.9135443568229675, "learning_rate": 9.180922854017927e-06, "loss": 0.5964, "step": 3303 }, { "epoch": 0.21, "grad_norm": 0.9094753861427307, "learning_rate": 9.1803600616722e-06, "loss": 0.6492, "step": 3304 }, { "epoch": 0.21, "grad_norm": 1.001076340675354, "learning_rate": 9.179797093306053e-06, "loss": 0.6767, "step": 3305 }, { "epoch": 0.21, "grad_norm": 0.989811360836029, "learning_rate": 9.17923394894319e-06, "loss": 0.6325, "step": 3306 }, { "epoch": 0.21, "grad_norm": 0.8956232070922852, "learning_rate": 9.178670628607325e-06, "loss": 0.6254, "step": 3307 }, { "epoch": 0.21, "grad_norm": 0.8942602276802063, "learning_rate": 9.178107132322174e-06, "loss": 0.5803, "step": 3308 }, { "epoch": 0.21, "grad_norm": 0.9630834460258484, "learning_rate": 9.177543460111469e-06, "loss": 0.6188, "step": 3309 }, { "epoch": 0.21, "grad_norm": 0.912510097026825, "learning_rate": 9.17697961199894e-06, "loss": 0.6383, "step": 3310 }, { "epoch": 0.21, "grad_norm": 0.9050446152687073, "learning_rate": 9.176415588008332e-06, "loss": 0.67, "step": 3311 }, { "epoch": 0.21, "grad_norm": 0.9326666593551636, "learning_rate": 9.175851388163391e-06, "loss": 0.6253, "step": 3312 }, { "epoch": 0.21, "grad_norm": 0.933397114276886, "learning_rate": 9.175287012487874e-06, "loss": 0.6303, "step": 3313 }, { "epoch": 0.21, "grad_norm": 0.8903535604476929, "learning_rate": 9.174722461005546e-06, "loss": 0.6088, "step": 3314 }, { "epoch": 0.21, "grad_norm": 0.9588652849197388, "learning_rate": 9.174157733740178e-06, "loss": 0.6508, "step": 3315 }, { "epoch": 0.21, "grad_norm": 0.9236728549003601, "learning_rate": 9.173592830715548e-06, "loss": 0.5812, "step": 3316 }, { "epoch": 0.21, "grad_norm": 0.9767409563064575, "learning_rate": 9.173027751955444e-06, "loss": 0.6523, "step": 3317 }, { "epoch": 0.21, "grad_norm": 0.8604898452758789, "learning_rate": 9.172462497483658e-06, "loss": 0.555, "step": 3318 }, { "epoch": 0.21, "grad_norm": 0.9616580009460449, "learning_rate": 9.17189706732399e-06, "loss": 0.6045, "step": 3319 }, { "epoch": 0.21, "grad_norm": 1.0537388324737549, "learning_rate": 9.171331461500253e-06, "loss": 0.6657, "step": 3320 }, { "epoch": 0.21, "grad_norm": 0.8993361592292786, "learning_rate": 9.170765680036256e-06, "loss": 0.6046, "step": 3321 }, { "epoch": 0.21, "grad_norm": 0.8810584545135498, "learning_rate": 9.170199722955825e-06, "loss": 0.6191, "step": 3322 }, { "epoch": 0.21, "grad_norm": 0.8661196231842041, "learning_rate": 9.169633590282793e-06, "loss": 0.608, "step": 3323 }, { "epoch": 0.21, "grad_norm": 0.9606330990791321, "learning_rate": 9.169067282040994e-06, "loss": 0.649, "step": 3324 }, { "epoch": 0.21, "grad_norm": 0.8650776147842407, "learning_rate": 9.168500798254275e-06, "loss": 0.5795, "step": 3325 }, { "epoch": 0.21, "grad_norm": 0.9146811366081238, "learning_rate": 9.167934138946489e-06, "loss": 0.6595, "step": 3326 }, { "epoch": 0.21, "grad_norm": 1.0079501867294312, "learning_rate": 9.167367304141494e-06, "loss": 0.6568, "step": 3327 }, { "epoch": 0.21, "grad_norm": 0.9679005146026611, "learning_rate": 9.166800293863161e-06, "loss": 0.6695, "step": 3328 }, { "epoch": 0.21, "grad_norm": 0.9201866984367371, "learning_rate": 9.166233108135362e-06, "loss": 0.5872, "step": 3329 }, { "epoch": 0.21, "grad_norm": 0.9234635233879089, "learning_rate": 9.165665746981982e-06, "loss": 0.6317, "step": 3330 }, { "epoch": 0.21, "grad_norm": 0.8861828446388245, "learning_rate": 9.165098210426905e-06, "loss": 0.6627, "step": 3331 }, { "epoch": 0.21, "grad_norm": 0.8471156358718872, "learning_rate": 9.164530498494035e-06, "loss": 0.6388, "step": 3332 }, { "epoch": 0.21, "grad_norm": 0.898435115814209, "learning_rate": 9.163962611207272e-06, "loss": 0.6261, "step": 3333 }, { "epoch": 0.21, "grad_norm": 0.9174916744232178, "learning_rate": 9.163394548590529e-06, "loss": 0.6421, "step": 3334 }, { "epoch": 0.21, "grad_norm": 0.922631025314331, "learning_rate": 9.162826310667725e-06, "loss": 0.641, "step": 3335 }, { "epoch": 0.21, "grad_norm": 0.9309580326080322, "learning_rate": 9.162257897462784e-06, "loss": 0.6544, "step": 3336 }, { "epoch": 0.21, "grad_norm": 0.8594711422920227, "learning_rate": 9.161689308999646e-06, "loss": 0.6029, "step": 3337 }, { "epoch": 0.21, "grad_norm": 0.969755232334137, "learning_rate": 9.161120545302246e-06, "loss": 0.6378, "step": 3338 }, { "epoch": 0.21, "grad_norm": 0.9250763058662415, "learning_rate": 9.160551606394537e-06, "loss": 0.6754, "step": 3339 }, { "epoch": 0.21, "grad_norm": 0.910316526889801, "learning_rate": 9.159982492300473e-06, "loss": 0.6433, "step": 3340 }, { "epoch": 0.21, "grad_norm": 0.9393495321273804, "learning_rate": 9.159413203044017e-06, "loss": 0.614, "step": 3341 }, { "epoch": 0.21, "grad_norm": 0.9090781211853027, "learning_rate": 9.158843738649141e-06, "loss": 0.601, "step": 3342 }, { "epoch": 0.21, "grad_norm": 0.8957191705703735, "learning_rate": 9.158274099139823e-06, "loss": 0.6071, "step": 3343 }, { "epoch": 0.21, "grad_norm": 0.8667554259300232, "learning_rate": 9.157704284540047e-06, "loss": 0.5847, "step": 3344 }, { "epoch": 0.21, "grad_norm": 0.904606819152832, "learning_rate": 9.15713429487381e-06, "loss": 0.6435, "step": 3345 }, { "epoch": 0.21, "grad_norm": 0.8986235857009888, "learning_rate": 9.156564130165106e-06, "loss": 0.5976, "step": 3346 }, { "epoch": 0.21, "grad_norm": 0.9764082431793213, "learning_rate": 9.155993790437949e-06, "loss": 0.6332, "step": 3347 }, { "epoch": 0.21, "grad_norm": 0.8578452467918396, "learning_rate": 9.155423275716351e-06, "loss": 0.5981, "step": 3348 }, { "epoch": 0.21, "grad_norm": 0.8971353769302368, "learning_rate": 9.154852586024332e-06, "loss": 0.6173, "step": 3349 }, { "epoch": 0.21, "grad_norm": 0.8360897302627563, "learning_rate": 9.154281721385928e-06, "loss": 0.6029, "step": 3350 }, { "epoch": 0.21, "grad_norm": 0.9505079984664917, "learning_rate": 9.153710681825169e-06, "loss": 0.6472, "step": 3351 }, { "epoch": 0.21, "grad_norm": 0.8876816034317017, "learning_rate": 9.153139467366103e-06, "loss": 0.6335, "step": 3352 }, { "epoch": 0.21, "grad_norm": 0.8487616777420044, "learning_rate": 9.152568078032783e-06, "loss": 0.6377, "step": 3353 }, { "epoch": 0.21, "grad_norm": 0.9489740133285522, "learning_rate": 9.151996513849267e-06, "loss": 0.6148, "step": 3354 }, { "epoch": 0.21, "grad_norm": 0.7857329249382019, "learning_rate": 9.151424774839622e-06, "loss": 0.5779, "step": 3355 }, { "epoch": 0.21, "grad_norm": 0.8584344983100891, "learning_rate": 9.15085286102792e-06, "loss": 0.5893, "step": 3356 }, { "epoch": 0.21, "grad_norm": 0.8409185409545898, "learning_rate": 9.150280772438245e-06, "loss": 0.6258, "step": 3357 }, { "epoch": 0.21, "grad_norm": 0.8375939726829529, "learning_rate": 9.149708509094684e-06, "loss": 0.5923, "step": 3358 }, { "epoch": 0.21, "grad_norm": 0.9026387333869934, "learning_rate": 9.149136071021333e-06, "loss": 0.6251, "step": 3359 }, { "epoch": 0.21, "grad_norm": 0.8975716233253479, "learning_rate": 9.148563458242296e-06, "loss": 0.6533, "step": 3360 }, { "epoch": 0.21, "grad_norm": 0.8834118247032166, "learning_rate": 9.147990670781683e-06, "loss": 0.6297, "step": 3361 }, { "epoch": 0.21, "grad_norm": 0.8428575992584229, "learning_rate": 9.147417708663615e-06, "loss": 0.5204, "step": 3362 }, { "epoch": 0.21, "grad_norm": 0.8661702871322632, "learning_rate": 9.146844571912213e-06, "loss": 0.6314, "step": 3363 }, { "epoch": 0.21, "grad_norm": 0.9363715052604675, "learning_rate": 9.146271260551614e-06, "loss": 0.6431, "step": 3364 }, { "epoch": 0.21, "grad_norm": 0.9469258785247803, "learning_rate": 9.145697774605953e-06, "loss": 0.6139, "step": 3365 }, { "epoch": 0.21, "grad_norm": 0.9144854545593262, "learning_rate": 9.145124114099382e-06, "loss": 0.6105, "step": 3366 }, { "epoch": 0.21, "grad_norm": 1.001625657081604, "learning_rate": 9.144550279056055e-06, "loss": 0.5842, "step": 3367 }, { "epoch": 0.21, "grad_norm": 0.9497262239456177, "learning_rate": 9.143976269500133e-06, "loss": 0.5686, "step": 3368 }, { "epoch": 0.21, "grad_norm": 0.9559330940246582, "learning_rate": 9.143402085455785e-06, "loss": 0.6098, "step": 3369 }, { "epoch": 0.21, "grad_norm": 0.8853155374526978, "learning_rate": 9.142827726947193e-06, "loss": 0.6347, "step": 3370 }, { "epoch": 0.21, "grad_norm": 0.9385725855827332, "learning_rate": 9.142253193998533e-06, "loss": 0.6098, "step": 3371 }, { "epoch": 0.21, "grad_norm": 0.8995375037193298, "learning_rate": 9.141678486634002e-06, "loss": 0.6358, "step": 3372 }, { "epoch": 0.21, "grad_norm": 0.8732660412788391, "learning_rate": 9.1411036048778e-06, "loss": 0.6316, "step": 3373 }, { "epoch": 0.21, "grad_norm": 0.8813968896865845, "learning_rate": 9.140528548754128e-06, "loss": 0.6106, "step": 3374 }, { "epoch": 0.21, "grad_norm": 0.8351157307624817, "learning_rate": 9.139953318287204e-06, "loss": 0.5437, "step": 3375 }, { "epoch": 0.21, "grad_norm": 0.9750312566757202, "learning_rate": 9.139377913501247e-06, "loss": 0.63, "step": 3376 }, { "epoch": 0.21, "grad_norm": 0.8909156322479248, "learning_rate": 9.138802334420486e-06, "loss": 0.58, "step": 3377 }, { "epoch": 0.21, "grad_norm": 0.9233285188674927, "learning_rate": 9.138226581069158e-06, "loss": 0.6109, "step": 3378 }, { "epoch": 0.21, "grad_norm": 0.9473268985748291, "learning_rate": 9.137650653471505e-06, "loss": 0.6883, "step": 3379 }, { "epoch": 0.21, "grad_norm": 0.9071610569953918, "learning_rate": 9.137074551651774e-06, "loss": 0.6187, "step": 3380 }, { "epoch": 0.21, "grad_norm": 0.9321165084838867, "learning_rate": 9.136498275634226e-06, "loss": 0.649, "step": 3381 }, { "epoch": 0.21, "grad_norm": 0.885661780834198, "learning_rate": 9.135921825443125e-06, "loss": 0.6455, "step": 3382 }, { "epoch": 0.21, "grad_norm": 0.9413583278656006, "learning_rate": 9.135345201102745e-06, "loss": 0.5993, "step": 3383 }, { "epoch": 0.21, "grad_norm": 0.9824182391166687, "learning_rate": 9.134768402637366e-06, "loss": 0.6636, "step": 3384 }, { "epoch": 0.21, "grad_norm": 0.8217403292655945, "learning_rate": 9.13419143007127e-06, "loss": 0.6168, "step": 3385 }, { "epoch": 0.21, "grad_norm": 0.8587862253189087, "learning_rate": 9.133614283428757e-06, "loss": 0.58, "step": 3386 }, { "epoch": 0.21, "grad_norm": 1.0002095699310303, "learning_rate": 9.133036962734127e-06, "loss": 0.6247, "step": 3387 }, { "epoch": 0.21, "grad_norm": 0.8633260726928711, "learning_rate": 9.132459468011686e-06, "loss": 0.6379, "step": 3388 }, { "epoch": 0.21, "grad_norm": 0.9585233926773071, "learning_rate": 9.131881799285754e-06, "loss": 0.6496, "step": 3389 }, { "epoch": 0.21, "grad_norm": 1.0030509233474731, "learning_rate": 9.131303956580653e-06, "loss": 0.6768, "step": 3390 }, { "epoch": 0.21, "grad_norm": 0.8934270143508911, "learning_rate": 9.130725939920712e-06, "loss": 0.5957, "step": 3391 }, { "epoch": 0.21, "grad_norm": 0.9003897905349731, "learning_rate": 9.130147749330275e-06, "loss": 0.6296, "step": 3392 }, { "epoch": 0.21, "grad_norm": 0.8862766027450562, "learning_rate": 9.129569384833682e-06, "loss": 0.636, "step": 3393 }, { "epoch": 0.22, "grad_norm": 0.9102445840835571, "learning_rate": 9.128990846455287e-06, "loss": 0.66, "step": 3394 }, { "epoch": 0.22, "grad_norm": 0.9600756168365479, "learning_rate": 9.128412134219453e-06, "loss": 0.5945, "step": 3395 }, { "epoch": 0.22, "grad_norm": 0.9160851240158081, "learning_rate": 9.127833248150546e-06, "loss": 0.6609, "step": 3396 }, { "epoch": 0.22, "grad_norm": 0.9181495904922485, "learning_rate": 9.12725418827294e-06, "loss": 0.5946, "step": 3397 }, { "epoch": 0.22, "grad_norm": 0.870098888874054, "learning_rate": 9.126674954611016e-06, "loss": 0.6147, "step": 3398 }, { "epoch": 0.22, "grad_norm": 0.8894675374031067, "learning_rate": 9.12609554718917e-06, "loss": 0.6234, "step": 3399 }, { "epoch": 0.22, "grad_norm": 0.8688364028930664, "learning_rate": 9.12551596603179e-06, "loss": 0.6225, "step": 3400 }, { "epoch": 0.22, "grad_norm": 0.8926935195922852, "learning_rate": 9.124936211163284e-06, "loss": 0.623, "step": 3401 }, { "epoch": 0.22, "grad_norm": 0.8921478390693665, "learning_rate": 9.124356282608065e-06, "loss": 0.5813, "step": 3402 }, { "epoch": 0.22, "grad_norm": 0.9715839624404907, "learning_rate": 9.123776180390552e-06, "loss": 0.6453, "step": 3403 }, { "epoch": 0.22, "grad_norm": 0.95328289270401, "learning_rate": 9.123195904535167e-06, "loss": 0.5729, "step": 3404 }, { "epoch": 0.22, "grad_norm": 0.9020276665687561, "learning_rate": 9.122615455066348e-06, "loss": 0.6469, "step": 3405 }, { "epoch": 0.22, "grad_norm": 0.9650804996490479, "learning_rate": 9.122034832008532e-06, "loss": 0.6088, "step": 3406 }, { "epoch": 0.22, "grad_norm": 0.9247978329658508, "learning_rate": 9.12145403538617e-06, "loss": 0.6298, "step": 3407 }, { "epoch": 0.22, "grad_norm": 0.9034278988838196, "learning_rate": 9.120873065223716e-06, "loss": 0.6255, "step": 3408 }, { "epoch": 0.22, "grad_norm": 0.9190613031387329, "learning_rate": 9.120291921545633e-06, "loss": 0.5856, "step": 3409 }, { "epoch": 0.22, "grad_norm": 1.0126057863235474, "learning_rate": 9.11971060437639e-06, "loss": 0.6682, "step": 3410 }, { "epoch": 0.22, "grad_norm": 0.9093670845031738, "learning_rate": 9.119129113740463e-06, "loss": 0.5967, "step": 3411 }, { "epoch": 0.22, "grad_norm": 0.8827959299087524, "learning_rate": 9.118547449662342e-06, "loss": 0.6004, "step": 3412 }, { "epoch": 0.22, "grad_norm": 0.9230462312698364, "learning_rate": 9.117965612166514e-06, "loss": 0.636, "step": 3413 }, { "epoch": 0.22, "grad_norm": 0.8487642407417297, "learning_rate": 9.117383601277478e-06, "loss": 0.6186, "step": 3414 }, { "epoch": 0.22, "grad_norm": 0.8755055665969849, "learning_rate": 9.116801417019744e-06, "loss": 0.581, "step": 3415 }, { "epoch": 0.22, "grad_norm": 0.9088988304138184, "learning_rate": 9.116219059417821e-06, "loss": 0.6535, "step": 3416 }, { "epoch": 0.22, "grad_norm": 0.9298532009124756, "learning_rate": 9.115636528496236e-06, "loss": 0.6107, "step": 3417 }, { "epoch": 0.22, "grad_norm": 0.8741108775138855, "learning_rate": 9.115053824279511e-06, "loss": 0.6134, "step": 3418 }, { "epoch": 0.22, "grad_norm": 0.9260140657424927, "learning_rate": 9.114470946792187e-06, "loss": 0.6243, "step": 3419 }, { "epoch": 0.22, "grad_norm": 0.8403961062431335, "learning_rate": 9.113887896058805e-06, "loss": 0.6119, "step": 3420 }, { "epoch": 0.22, "grad_norm": 0.9289052486419678, "learning_rate": 9.11330467210391e-06, "loss": 0.6269, "step": 3421 }, { "epoch": 0.22, "grad_norm": 0.9038977026939392, "learning_rate": 9.11272127495207e-06, "loss": 0.5929, "step": 3422 }, { "epoch": 0.22, "grad_norm": 0.8994903564453125, "learning_rate": 9.112137704627842e-06, "loss": 0.6708, "step": 3423 }, { "epoch": 0.22, "grad_norm": 0.8599143028259277, "learning_rate": 9.1115539611558e-06, "loss": 0.597, "step": 3424 }, { "epoch": 0.22, "grad_norm": 0.9443843960762024, "learning_rate": 9.110970044560524e-06, "loss": 0.6339, "step": 3425 }, { "epoch": 0.22, "grad_norm": 0.8927295804023743, "learning_rate": 9.1103859548666e-06, "loss": 0.6615, "step": 3426 }, { "epoch": 0.22, "grad_norm": 1.0299628973007202, "learning_rate": 9.109801692098624e-06, "loss": 0.6202, "step": 3427 }, { "epoch": 0.22, "grad_norm": 0.8992637991905212, "learning_rate": 9.109217256281196e-06, "loss": 0.5849, "step": 3428 }, { "epoch": 0.22, "grad_norm": 0.9295695424079895, "learning_rate": 9.108632647438922e-06, "loss": 0.6528, "step": 3429 }, { "epoch": 0.22, "grad_norm": 0.9012725949287415, "learning_rate": 9.108047865596421e-06, "loss": 0.6073, "step": 3430 }, { "epoch": 0.22, "grad_norm": 0.9266906380653381, "learning_rate": 9.107462910778316e-06, "loss": 0.5892, "step": 3431 }, { "epoch": 0.22, "grad_norm": 0.8764145970344543, "learning_rate": 9.106877783009236e-06, "loss": 0.6318, "step": 3432 }, { "epoch": 0.22, "grad_norm": 0.9359897375106812, "learning_rate": 9.106292482313819e-06, "loss": 0.6241, "step": 3433 }, { "epoch": 0.22, "grad_norm": 0.8579049706459045, "learning_rate": 9.105707008716712e-06, "loss": 0.5882, "step": 3434 }, { "epoch": 0.22, "grad_norm": 0.9599249362945557, "learning_rate": 9.105121362242564e-06, "loss": 0.6201, "step": 3435 }, { "epoch": 0.22, "grad_norm": 0.8719608187675476, "learning_rate": 9.104535542916035e-06, "loss": 0.5912, "step": 3436 }, { "epoch": 0.22, "grad_norm": 0.9004802107810974, "learning_rate": 9.103949550761795e-06, "loss": 0.6307, "step": 3437 }, { "epoch": 0.22, "grad_norm": 0.9485353827476501, "learning_rate": 9.103363385804516e-06, "loss": 0.6454, "step": 3438 }, { "epoch": 0.22, "grad_norm": 0.9338861703872681, "learning_rate": 9.102777048068878e-06, "loss": 0.6493, "step": 3439 }, { "epoch": 0.22, "grad_norm": 0.9316098093986511, "learning_rate": 9.102190537579572e-06, "loss": 0.6294, "step": 3440 }, { "epoch": 0.22, "grad_norm": 0.880497932434082, "learning_rate": 9.101603854361291e-06, "loss": 0.6074, "step": 3441 }, { "epoch": 0.22, "grad_norm": 0.9129565954208374, "learning_rate": 9.101016998438743e-06, "loss": 0.5802, "step": 3442 }, { "epoch": 0.22, "grad_norm": 0.9664899706840515, "learning_rate": 9.100429969836636e-06, "loss": 0.6579, "step": 3443 }, { "epoch": 0.22, "grad_norm": 0.928165853023529, "learning_rate": 9.099842768579685e-06, "loss": 0.6038, "step": 3444 }, { "epoch": 0.22, "grad_norm": 0.909015953540802, "learning_rate": 9.099255394692618e-06, "loss": 0.5444, "step": 3445 }, { "epoch": 0.22, "grad_norm": 0.9096186757087708, "learning_rate": 9.098667848200167e-06, "loss": 0.5793, "step": 3446 }, { "epoch": 0.22, "grad_norm": 0.969042956829071, "learning_rate": 9.09808012912707e-06, "loss": 0.6421, "step": 3447 }, { "epoch": 0.22, "grad_norm": 0.9990017414093018, "learning_rate": 9.097492237498076e-06, "loss": 0.6331, "step": 3448 }, { "epoch": 0.22, "grad_norm": 0.8431956768035889, "learning_rate": 9.096904173337937e-06, "loss": 0.6034, "step": 3449 }, { "epoch": 0.22, "grad_norm": 0.967842698097229, "learning_rate": 9.096315936671416e-06, "loss": 0.6123, "step": 3450 }, { "epoch": 0.22, "grad_norm": 0.9783948063850403, "learning_rate": 9.095727527523282e-06, "loss": 0.6611, "step": 3451 }, { "epoch": 0.22, "grad_norm": 0.9480175971984863, "learning_rate": 9.095138945918309e-06, "loss": 0.6269, "step": 3452 }, { "epoch": 0.22, "grad_norm": 0.8809651732444763, "learning_rate": 9.094550191881281e-06, "loss": 0.5726, "step": 3453 }, { "epoch": 0.22, "grad_norm": 0.9356509447097778, "learning_rate": 9.093961265436988e-06, "loss": 0.6504, "step": 3454 }, { "epoch": 0.22, "grad_norm": 0.8500334024429321, "learning_rate": 9.093372166610229e-06, "loss": 0.619, "step": 3455 }, { "epoch": 0.22, "grad_norm": 0.8734151124954224, "learning_rate": 9.092782895425806e-06, "loss": 0.5817, "step": 3456 }, { "epoch": 0.22, "grad_norm": 0.8919950723648071, "learning_rate": 9.092193451908533e-06, "loss": 0.6438, "step": 3457 }, { "epoch": 0.22, "grad_norm": 0.9189222455024719, "learning_rate": 9.091603836083231e-06, "loss": 0.6717, "step": 3458 }, { "epoch": 0.22, "grad_norm": 0.941829264163971, "learning_rate": 9.091014047974725e-06, "loss": 0.5565, "step": 3459 }, { "epoch": 0.22, "grad_norm": 0.9333182573318481, "learning_rate": 9.090424087607848e-06, "loss": 0.6282, "step": 3460 }, { "epoch": 0.22, "grad_norm": 0.8771211504936218, "learning_rate": 9.089833955007443e-06, "loss": 0.5849, "step": 3461 }, { "epoch": 0.22, "grad_norm": 0.9246846437454224, "learning_rate": 9.089243650198359e-06, "loss": 0.6186, "step": 3462 }, { "epoch": 0.22, "grad_norm": 0.8576235771179199, "learning_rate": 9.088653173205449e-06, "loss": 0.5996, "step": 3463 }, { "epoch": 0.22, "grad_norm": 0.9263531565666199, "learning_rate": 9.088062524053575e-06, "loss": 0.6116, "step": 3464 }, { "epoch": 0.22, "grad_norm": 0.8749649524688721, "learning_rate": 9.087471702767612e-06, "loss": 0.5922, "step": 3465 }, { "epoch": 0.22, "grad_norm": 0.9297971725463867, "learning_rate": 9.086880709372434e-06, "loss": 0.6259, "step": 3466 }, { "epoch": 0.22, "grad_norm": 0.8290271759033203, "learning_rate": 9.086289543892928e-06, "loss": 0.5753, "step": 3467 }, { "epoch": 0.22, "grad_norm": 0.9221488833427429, "learning_rate": 9.085698206353983e-06, "loss": 0.5982, "step": 3468 }, { "epoch": 0.22, "grad_norm": 0.8664331436157227, "learning_rate": 9.085106696780499e-06, "loss": 0.5829, "step": 3469 }, { "epoch": 0.22, "grad_norm": 0.943659245967865, "learning_rate": 9.084515015197384e-06, "loss": 0.6722, "step": 3470 }, { "epoch": 0.22, "grad_norm": 0.9838310480117798, "learning_rate": 9.08392316162955e-06, "loss": 0.6407, "step": 3471 }, { "epoch": 0.22, "grad_norm": 0.9057297110557556, "learning_rate": 9.083331136101921e-06, "loss": 0.6113, "step": 3472 }, { "epoch": 0.22, "grad_norm": 0.872379720211029, "learning_rate": 9.08273893863942e-06, "loss": 0.5884, "step": 3473 }, { "epoch": 0.22, "grad_norm": 0.9110143184661865, "learning_rate": 9.082146569266988e-06, "loss": 0.5865, "step": 3474 }, { "epoch": 0.22, "grad_norm": 0.9769248366355896, "learning_rate": 9.081554028009562e-06, "loss": 0.6642, "step": 3475 }, { "epoch": 0.22, "grad_norm": 0.8390948176383972, "learning_rate": 9.080961314892096e-06, "loss": 0.6116, "step": 3476 }, { "epoch": 0.22, "grad_norm": 0.9101285338401794, "learning_rate": 9.080368429939546e-06, "loss": 0.6063, "step": 3477 }, { "epoch": 0.22, "grad_norm": 0.9952099323272705, "learning_rate": 9.079775373176874e-06, "loss": 0.6302, "step": 3478 }, { "epoch": 0.22, "grad_norm": 0.9361991286277771, "learning_rate": 9.079182144629055e-06, "loss": 0.6237, "step": 3479 }, { "epoch": 0.22, "grad_norm": 0.8918977975845337, "learning_rate": 9.078588744321067e-06, "loss": 0.5958, "step": 3480 }, { "epoch": 0.22, "grad_norm": 0.9270057082176208, "learning_rate": 9.077995172277894e-06, "loss": 0.598, "step": 3481 }, { "epoch": 0.22, "grad_norm": 0.9182881712913513, "learning_rate": 9.07740142852453e-06, "loss": 0.6355, "step": 3482 }, { "epoch": 0.22, "grad_norm": 0.9537854194641113, "learning_rate": 9.076807513085976e-06, "loss": 0.6256, "step": 3483 }, { "epoch": 0.22, "grad_norm": 0.9619026780128479, "learning_rate": 9.076213425987242e-06, "loss": 0.6517, "step": 3484 }, { "epoch": 0.22, "grad_norm": 0.9318684339523315, "learning_rate": 9.07561916725334e-06, "loss": 0.6745, "step": 3485 }, { "epoch": 0.22, "grad_norm": 0.9609551429748535, "learning_rate": 9.075024736909292e-06, "loss": 0.6062, "step": 3486 }, { "epoch": 0.22, "grad_norm": 0.9244940280914307, "learning_rate": 9.074430134980129e-06, "loss": 0.6348, "step": 3487 }, { "epoch": 0.22, "grad_norm": 0.9906083941459656, "learning_rate": 9.073835361490885e-06, "loss": 0.6681, "step": 3488 }, { "epoch": 0.22, "grad_norm": 0.9201457500457764, "learning_rate": 9.073240416466609e-06, "loss": 0.6429, "step": 3489 }, { "epoch": 0.22, "grad_norm": 0.8737314939498901, "learning_rate": 9.072645299932347e-06, "loss": 0.6151, "step": 3490 }, { "epoch": 0.22, "grad_norm": 0.8806108832359314, "learning_rate": 9.07205001191316e-06, "loss": 0.6441, "step": 3491 }, { "epoch": 0.22, "grad_norm": 0.8998177647590637, "learning_rate": 9.071454552434111e-06, "loss": 0.6407, "step": 3492 }, { "epoch": 0.22, "grad_norm": 0.8950275778770447, "learning_rate": 9.070858921520276e-06, "loss": 0.6341, "step": 3493 }, { "epoch": 0.22, "grad_norm": 0.8834101557731628, "learning_rate": 9.070263119196734e-06, "loss": 0.6065, "step": 3494 }, { "epoch": 0.22, "grad_norm": 0.9296960830688477, "learning_rate": 9.06966714548857e-06, "loss": 0.6307, "step": 3495 }, { "epoch": 0.22, "grad_norm": 0.8565431833267212, "learning_rate": 9.069071000420879e-06, "loss": 0.607, "step": 3496 }, { "epoch": 0.22, "grad_norm": 0.9660019874572754, "learning_rate": 9.068474684018765e-06, "loss": 0.5671, "step": 3497 }, { "epoch": 0.22, "grad_norm": 0.9031816124916077, "learning_rate": 9.067878196307334e-06, "loss": 0.6158, "step": 3498 }, { "epoch": 0.22, "grad_norm": 0.8707241415977478, "learning_rate": 9.067281537311705e-06, "loss": 0.5942, "step": 3499 }, { "epoch": 0.22, "grad_norm": 0.9110444188117981, "learning_rate": 9.066684707056999e-06, "loss": 0.5835, "step": 3500 }, { "epoch": 0.22, "grad_norm": 0.9043798446655273, "learning_rate": 9.066087705568346e-06, "loss": 0.6047, "step": 3501 }, { "epoch": 0.22, "grad_norm": 0.9171016216278076, "learning_rate": 9.065490532870884e-06, "loss": 0.5593, "step": 3502 }, { "epoch": 0.22, "grad_norm": 0.9416684508323669, "learning_rate": 9.06489318898976e-06, "loss": 0.6465, "step": 3503 }, { "epoch": 0.22, "grad_norm": 0.9238849878311157, "learning_rate": 9.064295673950125e-06, "loss": 0.653, "step": 3504 }, { "epoch": 0.22, "grad_norm": 0.9581873416900635, "learning_rate": 9.063697987777136e-06, "loss": 0.6547, "step": 3505 }, { "epoch": 0.22, "grad_norm": 0.907537579536438, "learning_rate": 9.063100130495962e-06, "loss": 0.6362, "step": 3506 }, { "epoch": 0.22, "grad_norm": 0.8580865859985352, "learning_rate": 9.062502102131777e-06, "loss": 0.6312, "step": 3507 }, { "epoch": 0.22, "grad_norm": 0.9068456888198853, "learning_rate": 9.06190390270976e-06, "loss": 0.6583, "step": 3508 }, { "epoch": 0.22, "grad_norm": 0.8349429368972778, "learning_rate": 9.0613055322551e-06, "loss": 0.6388, "step": 3509 }, { "epoch": 0.22, "grad_norm": 0.8973667621612549, "learning_rate": 9.060706990792993e-06, "loss": 0.6076, "step": 3510 }, { "epoch": 0.22, "grad_norm": 0.8447120189666748, "learning_rate": 9.06010827834864e-06, "loss": 0.6158, "step": 3511 }, { "epoch": 0.22, "grad_norm": 0.8853378295898438, "learning_rate": 9.059509394947252e-06, "loss": 0.6026, "step": 3512 }, { "epoch": 0.22, "grad_norm": 0.9272050857543945, "learning_rate": 9.058910340614045e-06, "loss": 0.6184, "step": 3513 }, { "epoch": 0.22, "grad_norm": 0.8689481616020203, "learning_rate": 9.058311115374244e-06, "loss": 0.6424, "step": 3514 }, { "epoch": 0.22, "grad_norm": 0.8084876537322998, "learning_rate": 9.057711719253077e-06, "loss": 0.5953, "step": 3515 }, { "epoch": 0.22, "grad_norm": 0.9154835343360901, "learning_rate": 9.057112152275788e-06, "loss": 0.6471, "step": 3516 }, { "epoch": 0.22, "grad_norm": 0.9204840660095215, "learning_rate": 9.05651241446762e-06, "loss": 0.6131, "step": 3517 }, { "epoch": 0.22, "grad_norm": 0.8655226826667786, "learning_rate": 9.055912505853826e-06, "loss": 0.577, "step": 3518 }, { "epoch": 0.22, "grad_norm": 0.9272779226303101, "learning_rate": 9.055312426459663e-06, "loss": 0.6352, "step": 3519 }, { "epoch": 0.22, "grad_norm": 0.955590009689331, "learning_rate": 9.054712176310405e-06, "loss": 0.7019, "step": 3520 }, { "epoch": 0.22, "grad_norm": 0.898430585861206, "learning_rate": 9.05411175543132e-06, "loss": 0.5869, "step": 3521 }, { "epoch": 0.22, "grad_norm": 0.908953845500946, "learning_rate": 9.053511163847694e-06, "loss": 0.5677, "step": 3522 }, { "epoch": 0.22, "grad_norm": 0.9534192085266113, "learning_rate": 9.052910401584812e-06, "loss": 0.6819, "step": 3523 }, { "epoch": 0.22, "grad_norm": 0.8306724429130554, "learning_rate": 9.052309468667974e-06, "loss": 0.591, "step": 3524 }, { "epoch": 0.22, "grad_norm": 0.7923970818519592, "learning_rate": 9.05170836512248e-06, "loss": 0.5629, "step": 3525 }, { "epoch": 0.22, "grad_norm": 0.9316359162330627, "learning_rate": 9.051107090973642e-06, "loss": 0.6095, "step": 3526 }, { "epoch": 0.22, "grad_norm": 0.9419963359832764, "learning_rate": 9.050505646246777e-06, "loss": 0.6759, "step": 3527 }, { "epoch": 0.22, "grad_norm": 0.8494296669960022, "learning_rate": 9.04990403096721e-06, "loss": 0.582, "step": 3528 }, { "epoch": 0.22, "grad_norm": 0.9111973643302917, "learning_rate": 9.049302245160273e-06, "loss": 0.6585, "step": 3529 }, { "epoch": 0.22, "grad_norm": 0.906576931476593, "learning_rate": 9.048700288851305e-06, "loss": 0.6302, "step": 3530 }, { "epoch": 0.22, "grad_norm": 0.8784658312797546, "learning_rate": 9.048098162065652e-06, "loss": 0.653, "step": 3531 }, { "epoch": 0.22, "grad_norm": 0.8768582344055176, "learning_rate": 9.047495864828668e-06, "loss": 0.6133, "step": 3532 }, { "epoch": 0.22, "grad_norm": 0.8660056591033936, "learning_rate": 9.046893397165713e-06, "loss": 0.6245, "step": 3533 }, { "epoch": 0.22, "grad_norm": 0.923427939414978, "learning_rate": 9.046290759102155e-06, "loss": 0.6139, "step": 3534 }, { "epoch": 0.22, "grad_norm": 0.8469942212104797, "learning_rate": 9.04568795066337e-06, "loss": 0.5996, "step": 3535 }, { "epoch": 0.22, "grad_norm": 0.9075682759284973, "learning_rate": 9.045084971874738e-06, "loss": 0.6336, "step": 3536 }, { "epoch": 0.22, "grad_norm": 0.9033473134040833, "learning_rate": 9.044481822761651e-06, "loss": 0.6277, "step": 3537 }, { "epoch": 0.22, "grad_norm": 0.9756919145584106, "learning_rate": 9.043878503349503e-06, "loss": 0.6174, "step": 3538 }, { "epoch": 0.22, "grad_norm": 0.8659248352050781, "learning_rate": 9.043275013663699e-06, "loss": 0.5844, "step": 3539 }, { "epoch": 0.22, "grad_norm": 0.9130862355232239, "learning_rate": 9.04267135372965e-06, "loss": 0.6517, "step": 3540 }, { "epoch": 0.22, "grad_norm": 0.9586864709854126, "learning_rate": 9.042067523572775e-06, "loss": 0.6081, "step": 3541 }, { "epoch": 0.22, "grad_norm": 0.9088827967643738, "learning_rate": 9.041463523218496e-06, "loss": 0.6973, "step": 3542 }, { "epoch": 0.22, "grad_norm": 0.9386407136917114, "learning_rate": 9.040859352692249e-06, "loss": 0.6771, "step": 3543 }, { "epoch": 0.22, "grad_norm": 0.9016104340553284, "learning_rate": 9.04025501201947e-06, "loss": 0.6504, "step": 3544 }, { "epoch": 0.22, "grad_norm": 0.8565789461135864, "learning_rate": 9.039650501225608e-06, "loss": 0.6128, "step": 3545 }, { "epoch": 0.22, "grad_norm": 0.8813103437423706, "learning_rate": 9.039045820336116e-06, "loss": 0.6283, "step": 3546 }, { "epoch": 0.22, "grad_norm": 0.883348286151886, "learning_rate": 9.038440969376456e-06, "loss": 0.6106, "step": 3547 }, { "epoch": 0.22, "grad_norm": 0.8444504737854004, "learning_rate": 9.037835948372095e-06, "loss": 0.5763, "step": 3548 }, { "epoch": 0.22, "grad_norm": 0.856566846370697, "learning_rate": 9.03723075734851e-06, "loss": 0.601, "step": 3549 }, { "epoch": 0.22, "grad_norm": 0.9262292385101318, "learning_rate": 9.03662539633118e-06, "loss": 0.5976, "step": 3550 }, { "epoch": 0.22, "grad_norm": 0.931098461151123, "learning_rate": 9.0360198653456e-06, "loss": 0.6391, "step": 3551 }, { "epoch": 0.23, "grad_norm": 0.9265716075897217, "learning_rate": 9.035414164417262e-06, "loss": 0.5899, "step": 3552 }, { "epoch": 0.23, "grad_norm": 0.9725390672683716, "learning_rate": 9.034808293571672e-06, "loss": 0.6615, "step": 3553 }, { "epoch": 0.23, "grad_norm": 0.9248775839805603, "learning_rate": 9.03420225283434e-06, "loss": 0.6472, "step": 3554 }, { "epoch": 0.23, "grad_norm": 0.851396381855011, "learning_rate": 9.033596042230788e-06, "loss": 0.5497, "step": 3555 }, { "epoch": 0.23, "grad_norm": 0.9172872304916382, "learning_rate": 9.032989661786535e-06, "loss": 0.65, "step": 3556 }, { "epoch": 0.23, "grad_norm": 0.8770195841789246, "learning_rate": 9.032383111527119e-06, "loss": 0.6253, "step": 3557 }, { "epoch": 0.23, "grad_norm": 0.9005029201507568, "learning_rate": 9.031776391478077e-06, "loss": 0.631, "step": 3558 }, { "epoch": 0.23, "grad_norm": 0.8701792359352112, "learning_rate": 9.031169501664958e-06, "loss": 0.6235, "step": 3559 }, { "epoch": 0.23, "grad_norm": 0.9129980206489563, "learning_rate": 9.030562442113313e-06, "loss": 0.6273, "step": 3560 }, { "epoch": 0.23, "grad_norm": 0.9068407416343689, "learning_rate": 9.029955212848706e-06, "loss": 0.6408, "step": 3561 }, { "epoch": 0.23, "grad_norm": 0.9272667169570923, "learning_rate": 9.029347813896704e-06, "loss": 0.5862, "step": 3562 }, { "epoch": 0.23, "grad_norm": 0.8602524399757385, "learning_rate": 9.028740245282881e-06, "loss": 0.6004, "step": 3563 }, { "epoch": 0.23, "grad_norm": 0.9108449220657349, "learning_rate": 9.028132507032823e-06, "loss": 0.6113, "step": 3564 }, { "epoch": 0.23, "grad_norm": 0.8397127985954285, "learning_rate": 9.027524599172117e-06, "loss": 0.601, "step": 3565 }, { "epoch": 0.23, "grad_norm": 0.9540258049964905, "learning_rate": 9.026916521726361e-06, "loss": 0.6869, "step": 3566 }, { "epoch": 0.23, "grad_norm": 0.9084812998771667, "learning_rate": 9.026308274721161e-06, "loss": 0.5817, "step": 3567 }, { "epoch": 0.23, "grad_norm": 0.9301480650901794, "learning_rate": 9.025699858182125e-06, "loss": 0.5917, "step": 3568 }, { "epoch": 0.23, "grad_norm": 0.8542090058326721, "learning_rate": 9.02509127213487e-06, "loss": 0.6182, "step": 3569 }, { "epoch": 0.23, "grad_norm": 0.8809559941291809, "learning_rate": 9.024482516605026e-06, "loss": 0.5781, "step": 3570 }, { "epoch": 0.23, "grad_norm": 0.9583331346511841, "learning_rate": 9.023873591618224e-06, "loss": 0.6249, "step": 3571 }, { "epoch": 0.23, "grad_norm": 0.9110972881317139, "learning_rate": 9.023264497200102e-06, "loss": 0.624, "step": 3572 }, { "epoch": 0.23, "grad_norm": 0.876470685005188, "learning_rate": 9.022655233376308e-06, "loss": 0.6066, "step": 3573 }, { "epoch": 0.23, "grad_norm": 0.8327741622924805, "learning_rate": 9.022045800172493e-06, "loss": 0.592, "step": 3574 }, { "epoch": 0.23, "grad_norm": 0.9016212821006775, "learning_rate": 9.021436197614326e-06, "loss": 0.5999, "step": 3575 }, { "epoch": 0.23, "grad_norm": 0.9393583536148071, "learning_rate": 9.020826425727468e-06, "loss": 0.6292, "step": 3576 }, { "epoch": 0.23, "grad_norm": 0.8916171193122864, "learning_rate": 9.020216484537595e-06, "loss": 0.6681, "step": 3577 }, { "epoch": 0.23, "grad_norm": 0.9707697629928589, "learning_rate": 9.019606374070394e-06, "loss": 0.6506, "step": 3578 }, { "epoch": 0.23, "grad_norm": 0.9742267727851868, "learning_rate": 9.01899609435155e-06, "loss": 0.6432, "step": 3579 }, { "epoch": 0.23, "grad_norm": 0.9248902201652527, "learning_rate": 9.018385645406765e-06, "loss": 0.627, "step": 3580 }, { "epoch": 0.23, "grad_norm": 0.8701397180557251, "learning_rate": 9.017775027261735e-06, "loss": 0.6343, "step": 3581 }, { "epoch": 0.23, "grad_norm": 0.8465285897254944, "learning_rate": 9.017164239942178e-06, "loss": 0.6101, "step": 3582 }, { "epoch": 0.23, "grad_norm": 0.8863876461982727, "learning_rate": 9.016553283473808e-06, "loss": 0.6401, "step": 3583 }, { "epoch": 0.23, "grad_norm": 0.8480295538902283, "learning_rate": 9.015942157882353e-06, "loss": 0.5718, "step": 3584 }, { "epoch": 0.23, "grad_norm": 0.8785873055458069, "learning_rate": 9.015330863193543e-06, "loss": 0.6074, "step": 3585 }, { "epoch": 0.23, "grad_norm": 0.8759261965751648, "learning_rate": 9.01471939943312e-06, "loss": 0.6138, "step": 3586 }, { "epoch": 0.23, "grad_norm": 0.8847134113311768, "learning_rate": 9.014107766626828e-06, "loss": 0.5651, "step": 3587 }, { "epoch": 0.23, "grad_norm": 0.8662316203117371, "learning_rate": 9.013495964800423e-06, "loss": 0.6643, "step": 3588 }, { "epoch": 0.23, "grad_norm": 0.8825305700302124, "learning_rate": 9.012883993979663e-06, "loss": 0.7025, "step": 3589 }, { "epoch": 0.23, "grad_norm": 0.8754686713218689, "learning_rate": 9.01227185419032e-06, "loss": 0.6276, "step": 3590 }, { "epoch": 0.23, "grad_norm": 0.9244438409805298, "learning_rate": 9.011659545458167e-06, "loss": 0.5912, "step": 3591 }, { "epoch": 0.23, "grad_norm": 0.9278584718704224, "learning_rate": 9.011047067808985e-06, "loss": 0.6441, "step": 3592 }, { "epoch": 0.23, "grad_norm": 0.853956401348114, "learning_rate": 9.010434421268564e-06, "loss": 0.5881, "step": 3593 }, { "epoch": 0.23, "grad_norm": 0.903804361820221, "learning_rate": 9.009821605862701e-06, "loss": 0.6268, "step": 3594 }, { "epoch": 0.23, "grad_norm": 0.884956955909729, "learning_rate": 9.0092086216172e-06, "loss": 0.5976, "step": 3595 }, { "epoch": 0.23, "grad_norm": 0.8600631952285767, "learning_rate": 9.00859546855787e-06, "loss": 0.5976, "step": 3596 }, { "epoch": 0.23, "grad_norm": 0.8109932541847229, "learning_rate": 9.007982146710533e-06, "loss": 0.5807, "step": 3597 }, { "epoch": 0.23, "grad_norm": 0.8790200352668762, "learning_rate": 9.007368656101006e-06, "loss": 0.6335, "step": 3598 }, { "epoch": 0.23, "grad_norm": 0.8840540647506714, "learning_rate": 9.006754996755129e-06, "loss": 0.5932, "step": 3599 }, { "epoch": 0.23, "grad_norm": 0.9371446967124939, "learning_rate": 9.006141168698735e-06, "loss": 0.6723, "step": 3600 }, { "epoch": 0.23, "grad_norm": 0.8178922533988953, "learning_rate": 9.005527171957676e-06, "loss": 0.5882, "step": 3601 }, { "epoch": 0.23, "grad_norm": 0.9067853093147278, "learning_rate": 9.004913006557798e-06, "loss": 0.6432, "step": 3602 }, { "epoch": 0.23, "grad_norm": 0.8906139135360718, "learning_rate": 9.004298672524967e-06, "loss": 0.6492, "step": 3603 }, { "epoch": 0.23, "grad_norm": 0.8456130623817444, "learning_rate": 9.003684169885049e-06, "loss": 0.6127, "step": 3604 }, { "epoch": 0.23, "grad_norm": 0.8719025254249573, "learning_rate": 9.00306949866392e-06, "loss": 0.6, "step": 3605 }, { "epoch": 0.23, "grad_norm": 0.8929893970489502, "learning_rate": 9.002454658887458e-06, "loss": 0.5915, "step": 3606 }, { "epoch": 0.23, "grad_norm": 0.9277382493019104, "learning_rate": 9.001839650581554e-06, "loss": 0.6316, "step": 3607 }, { "epoch": 0.23, "grad_norm": 0.9326600432395935, "learning_rate": 9.001224473772104e-06, "loss": 0.6662, "step": 3608 }, { "epoch": 0.23, "grad_norm": 0.8711685538291931, "learning_rate": 9.000609128485011e-06, "loss": 0.5557, "step": 3609 }, { "epoch": 0.23, "grad_norm": 0.9938933849334717, "learning_rate": 8.999993614746184e-06, "loss": 0.6923, "step": 3610 }, { "epoch": 0.23, "grad_norm": 0.8392737507820129, "learning_rate": 8.999377932581541e-06, "loss": 0.5789, "step": 3611 }, { "epoch": 0.23, "grad_norm": 0.9159629344940186, "learning_rate": 8.998762082017006e-06, "loss": 0.6179, "step": 3612 }, { "epoch": 0.23, "grad_norm": 0.9216225743293762, "learning_rate": 8.998146063078512e-06, "loss": 0.6627, "step": 3613 }, { "epoch": 0.23, "grad_norm": 0.8778311610221863, "learning_rate": 8.997529875791993e-06, "loss": 0.6039, "step": 3614 }, { "epoch": 0.23, "grad_norm": 0.9303637742996216, "learning_rate": 8.9969135201834e-06, "loss": 0.6187, "step": 3615 }, { "epoch": 0.23, "grad_norm": 0.9529017806053162, "learning_rate": 8.996296996278682e-06, "loss": 0.6698, "step": 3616 }, { "epoch": 0.23, "grad_norm": 0.8703224658966064, "learning_rate": 8.9956803041038e-06, "loss": 0.6047, "step": 3617 }, { "epoch": 0.23, "grad_norm": 0.8795974254608154, "learning_rate": 8.99506344368472e-06, "loss": 0.5653, "step": 3618 }, { "epoch": 0.23, "grad_norm": 0.8557493090629578, "learning_rate": 8.994446415047415e-06, "loss": 0.5735, "step": 3619 }, { "epoch": 0.23, "grad_norm": 0.8863241672515869, "learning_rate": 8.993829218217867e-06, "loss": 0.6194, "step": 3620 }, { "epoch": 0.23, "grad_norm": 0.8855205178260803, "learning_rate": 8.993211853222065e-06, "loss": 0.6443, "step": 3621 }, { "epoch": 0.23, "grad_norm": 0.9319906830787659, "learning_rate": 8.992594320086005e-06, "loss": 0.6551, "step": 3622 }, { "epoch": 0.23, "grad_norm": 0.863646924495697, "learning_rate": 8.991976618835685e-06, "loss": 0.6152, "step": 3623 }, { "epoch": 0.23, "grad_norm": 0.9434888362884521, "learning_rate": 8.991358749497117e-06, "loss": 0.6381, "step": 3624 }, { "epoch": 0.23, "grad_norm": 0.9003688097000122, "learning_rate": 8.990740712096317e-06, "loss": 0.6295, "step": 3625 }, { "epoch": 0.23, "grad_norm": 0.8399546146392822, "learning_rate": 8.99012250665931e-06, "loss": 0.5874, "step": 3626 }, { "epoch": 0.23, "grad_norm": 0.9593385457992554, "learning_rate": 8.989504133212123e-06, "loss": 0.7235, "step": 3627 }, { "epoch": 0.23, "grad_norm": 0.8997763991355896, "learning_rate": 8.988885591780795e-06, "loss": 0.6178, "step": 3628 }, { "epoch": 0.23, "grad_norm": 0.888486385345459, "learning_rate": 8.988266882391374e-06, "loss": 0.6022, "step": 3629 }, { "epoch": 0.23, "grad_norm": 0.8956373929977417, "learning_rate": 8.987648005069907e-06, "loss": 0.6401, "step": 3630 }, { "epoch": 0.23, "grad_norm": 0.9338024854660034, "learning_rate": 8.987028959842454e-06, "loss": 0.6308, "step": 3631 }, { "epoch": 0.23, "grad_norm": 0.9498031139373779, "learning_rate": 8.986409746735084e-06, "loss": 0.5903, "step": 3632 }, { "epoch": 0.23, "grad_norm": 0.9222273826599121, "learning_rate": 8.985790365773864e-06, "loss": 0.6238, "step": 3633 }, { "epoch": 0.23, "grad_norm": 0.8916066884994507, "learning_rate": 8.985170816984878e-06, "loss": 0.5938, "step": 3634 }, { "epoch": 0.23, "grad_norm": 0.901877760887146, "learning_rate": 8.984551100394212e-06, "loss": 0.6308, "step": 3635 }, { "epoch": 0.23, "grad_norm": 0.9297860264778137, "learning_rate": 8.98393121602796e-06, "loss": 0.6093, "step": 3636 }, { "epoch": 0.23, "grad_norm": 0.9041366577148438, "learning_rate": 8.983311163912227e-06, "loss": 0.5895, "step": 3637 }, { "epoch": 0.23, "grad_norm": 0.9007093906402588, "learning_rate": 8.982690944073113e-06, "loss": 0.5988, "step": 3638 }, { "epoch": 0.23, "grad_norm": 0.8943149447441101, "learning_rate": 8.982070556536741e-06, "loss": 0.6197, "step": 3639 }, { "epoch": 0.23, "grad_norm": 1.2114888429641724, "learning_rate": 8.98145000132923e-06, "loss": 0.639, "step": 3640 }, { "epoch": 0.23, "grad_norm": 0.8697226047515869, "learning_rate": 8.980829278476711e-06, "loss": 0.612, "step": 3641 }, { "epoch": 0.23, "grad_norm": 0.9321666359901428, "learning_rate": 8.980208388005318e-06, "loss": 0.6106, "step": 3642 }, { "epoch": 0.23, "grad_norm": 0.8679060935974121, "learning_rate": 8.979587329941197e-06, "loss": 0.635, "step": 3643 }, { "epoch": 0.23, "grad_norm": 0.8968499898910522, "learning_rate": 8.978966104310497e-06, "loss": 0.6707, "step": 3644 }, { "epoch": 0.23, "grad_norm": 0.8723733425140381, "learning_rate": 8.978344711139374e-06, "loss": 0.5988, "step": 3645 }, { "epoch": 0.23, "grad_norm": 0.9933862090110779, "learning_rate": 8.977723150453999e-06, "loss": 0.6475, "step": 3646 }, { "epoch": 0.23, "grad_norm": 0.9089076519012451, "learning_rate": 8.977101422280536e-06, "loss": 0.6124, "step": 3647 }, { "epoch": 0.23, "grad_norm": 0.8408138155937195, "learning_rate": 8.97647952664517e-06, "loss": 0.5773, "step": 3648 }, { "epoch": 0.23, "grad_norm": 0.8604863882064819, "learning_rate": 8.975857463574082e-06, "loss": 0.5365, "step": 3649 }, { "epoch": 0.23, "grad_norm": 0.8891851902008057, "learning_rate": 8.97523523309347e-06, "loss": 0.5713, "step": 3650 }, { "epoch": 0.23, "grad_norm": 0.8845424652099609, "learning_rate": 8.974612835229528e-06, "loss": 0.6176, "step": 3651 }, { "epoch": 0.23, "grad_norm": 0.8516875505447388, "learning_rate": 8.973990270008467e-06, "loss": 0.6297, "step": 3652 }, { "epoch": 0.23, "grad_norm": 0.8722536563873291, "learning_rate": 8.973367537456502e-06, "loss": 0.6413, "step": 3653 }, { "epoch": 0.23, "grad_norm": 0.9590871334075928, "learning_rate": 8.97274463759985e-06, "loss": 0.6862, "step": 3654 }, { "epoch": 0.23, "grad_norm": 0.952622652053833, "learning_rate": 8.972121570464744e-06, "loss": 0.6474, "step": 3655 }, { "epoch": 0.23, "grad_norm": 0.8545388579368591, "learning_rate": 8.971498336077415e-06, "loss": 0.6003, "step": 3656 }, { "epoch": 0.23, "grad_norm": 0.9351946115493774, "learning_rate": 8.970874934464108e-06, "loss": 0.6054, "step": 3657 }, { "epoch": 0.23, "grad_norm": 0.8703538179397583, "learning_rate": 8.970251365651071e-06, "loss": 0.6466, "step": 3658 }, { "epoch": 0.23, "grad_norm": 0.908089816570282, "learning_rate": 8.969627629664559e-06, "loss": 0.5536, "step": 3659 }, { "epoch": 0.23, "grad_norm": 0.8776571154594421, "learning_rate": 8.969003726530838e-06, "loss": 0.6136, "step": 3660 }, { "epoch": 0.23, "grad_norm": 0.901607096195221, "learning_rate": 8.968379656276177e-06, "loss": 0.6526, "step": 3661 }, { "epoch": 0.23, "grad_norm": 0.9027665853500366, "learning_rate": 8.967755418926854e-06, "loss": 0.6117, "step": 3662 }, { "epoch": 0.23, "grad_norm": 0.8891413807868958, "learning_rate": 8.967131014509152e-06, "loss": 0.586, "step": 3663 }, { "epoch": 0.23, "grad_norm": 0.9754297137260437, "learning_rate": 8.966506443049366e-06, "loss": 0.6608, "step": 3664 }, { "epoch": 0.23, "grad_norm": 0.9024408459663391, "learning_rate": 8.965881704573789e-06, "loss": 0.6286, "step": 3665 }, { "epoch": 0.23, "grad_norm": 1.0116610527038574, "learning_rate": 8.965256799108733e-06, "loss": 0.6086, "step": 3666 }, { "epoch": 0.23, "grad_norm": 0.9310095310211182, "learning_rate": 8.964631726680504e-06, "loss": 0.6619, "step": 3667 }, { "epoch": 0.23, "grad_norm": 0.9279587268829346, "learning_rate": 8.964006487315426e-06, "loss": 0.6823, "step": 3668 }, { "epoch": 0.23, "grad_norm": 0.8194244503974915, "learning_rate": 8.963381081039826e-06, "loss": 0.6129, "step": 3669 }, { "epoch": 0.23, "grad_norm": 0.8662837147712708, "learning_rate": 8.962755507880036e-06, "loss": 0.6013, "step": 3670 }, { "epoch": 0.23, "grad_norm": 0.8568682074546814, "learning_rate": 8.962129767862395e-06, "loss": 0.6053, "step": 3671 }, { "epoch": 0.23, "grad_norm": 0.9012435078620911, "learning_rate": 8.961503861013255e-06, "loss": 0.6542, "step": 3672 }, { "epoch": 0.23, "grad_norm": 0.8900840282440186, "learning_rate": 8.960877787358968e-06, "loss": 0.6021, "step": 3673 }, { "epoch": 0.23, "grad_norm": 0.9452951550483704, "learning_rate": 8.960251546925895e-06, "loss": 0.6342, "step": 3674 }, { "epoch": 0.23, "grad_norm": 0.7982982397079468, "learning_rate": 8.959625139740407e-06, "loss": 0.5353, "step": 3675 }, { "epoch": 0.23, "grad_norm": 0.9291501045227051, "learning_rate": 8.95899856582888e-06, "loss": 0.6417, "step": 3676 }, { "epoch": 0.23, "grad_norm": 0.8522927761077881, "learning_rate": 8.958371825217693e-06, "loss": 0.5758, "step": 3677 }, { "epoch": 0.23, "grad_norm": 0.8960750699043274, "learning_rate": 8.957744917933241e-06, "loss": 0.5945, "step": 3678 }, { "epoch": 0.23, "grad_norm": 0.8411138653755188, "learning_rate": 8.957117844001919e-06, "loss": 0.6068, "step": 3679 }, { "epoch": 0.23, "grad_norm": 0.9141689538955688, "learning_rate": 8.956490603450128e-06, "loss": 0.6117, "step": 3680 }, { "epoch": 0.23, "grad_norm": 0.9008049964904785, "learning_rate": 8.955863196304282e-06, "loss": 0.6095, "step": 3681 }, { "epoch": 0.23, "grad_norm": 0.9140220284461975, "learning_rate": 8.9552356225908e-06, "loss": 0.5998, "step": 3682 }, { "epoch": 0.23, "grad_norm": 0.8372965455055237, "learning_rate": 8.954607882336105e-06, "loss": 0.5772, "step": 3683 }, { "epoch": 0.23, "grad_norm": 0.9582294225692749, "learning_rate": 8.953979975566626e-06, "loss": 0.6542, "step": 3684 }, { "epoch": 0.23, "grad_norm": 0.9331498146057129, "learning_rate": 8.953351902308807e-06, "loss": 0.6334, "step": 3685 }, { "epoch": 0.23, "grad_norm": 0.9214125871658325, "learning_rate": 8.952723662589093e-06, "loss": 0.6551, "step": 3686 }, { "epoch": 0.23, "grad_norm": 0.9166949987411499, "learning_rate": 8.952095256433934e-06, "loss": 0.6552, "step": 3687 }, { "epoch": 0.23, "grad_norm": 0.9024720191955566, "learning_rate": 8.951466683869795e-06, "loss": 0.5499, "step": 3688 }, { "epoch": 0.23, "grad_norm": 0.8689588308334351, "learning_rate": 8.950837944923138e-06, "loss": 0.5789, "step": 3689 }, { "epoch": 0.23, "grad_norm": 0.9102311134338379, "learning_rate": 8.95020903962044e-06, "loss": 0.6514, "step": 3690 }, { "epoch": 0.23, "grad_norm": 0.9357526302337646, "learning_rate": 8.94957996798818e-06, "loss": 0.608, "step": 3691 }, { "epoch": 0.23, "grad_norm": 0.8915140628814697, "learning_rate": 8.948950730052847e-06, "loss": 0.6221, "step": 3692 }, { "epoch": 0.23, "grad_norm": 0.9464169144630432, "learning_rate": 8.948321325840937e-06, "loss": 0.6701, "step": 3693 }, { "epoch": 0.23, "grad_norm": 0.9279240965843201, "learning_rate": 8.94769175537895e-06, "loss": 0.6052, "step": 3694 }, { "epoch": 0.23, "grad_norm": 0.9310309886932373, "learning_rate": 8.9470620186934e-06, "loss": 0.6523, "step": 3695 }, { "epoch": 0.23, "grad_norm": 0.930351972579956, "learning_rate": 8.946432115810795e-06, "loss": 0.6639, "step": 3696 }, { "epoch": 0.23, "grad_norm": 0.9379802346229553, "learning_rate": 8.945802046757666e-06, "loss": 0.6714, "step": 3697 }, { "epoch": 0.23, "grad_norm": 0.903059184551239, "learning_rate": 8.945171811560535e-06, "loss": 0.6284, "step": 3698 }, { "epoch": 0.23, "grad_norm": 0.9489940404891968, "learning_rate": 8.944541410245947e-06, "loss": 0.5875, "step": 3699 }, { "epoch": 0.23, "grad_norm": 0.9724168181419373, "learning_rate": 8.943910842840439e-06, "loss": 0.6435, "step": 3700 }, { "epoch": 0.23, "grad_norm": 0.9340975284576416, "learning_rate": 8.943280109370568e-06, "loss": 0.6209, "step": 3701 }, { "epoch": 0.23, "grad_norm": 0.8607521653175354, "learning_rate": 8.942649209862888e-06, "loss": 0.5788, "step": 3702 }, { "epoch": 0.23, "grad_norm": 0.8896112442016602, "learning_rate": 8.942018144343965e-06, "loss": 0.6177, "step": 3703 }, { "epoch": 0.23, "grad_norm": 0.9297407865524292, "learning_rate": 8.941386912840372e-06, "loss": 0.6398, "step": 3704 }, { "epoch": 0.23, "grad_norm": 0.8317979574203491, "learning_rate": 8.940755515378687e-06, "loss": 0.6036, "step": 3705 }, { "epoch": 0.23, "grad_norm": 0.9319295287132263, "learning_rate": 8.940123951985495e-06, "loss": 0.608, "step": 3706 }, { "epoch": 0.23, "grad_norm": 0.9755576252937317, "learning_rate": 8.939492222687392e-06, "loss": 0.6238, "step": 3707 }, { "epoch": 0.23, "grad_norm": 0.8916385173797607, "learning_rate": 8.938860327510975e-06, "loss": 0.614, "step": 3708 }, { "epoch": 0.23, "grad_norm": 0.8873549699783325, "learning_rate": 8.938228266482852e-06, "loss": 0.6389, "step": 3709 }, { "epoch": 0.24, "grad_norm": 0.8616818785667419, "learning_rate": 8.937596039629637e-06, "loss": 0.6028, "step": 3710 }, { "epoch": 0.24, "grad_norm": 0.8916230797767639, "learning_rate": 8.93696364697795e-06, "loss": 0.6395, "step": 3711 }, { "epoch": 0.24, "grad_norm": 0.8822511434555054, "learning_rate": 8.936331088554419e-06, "loss": 0.5956, "step": 3712 }, { "epoch": 0.24, "grad_norm": 0.8785961270332336, "learning_rate": 8.93569836438568e-06, "loss": 0.5859, "step": 3713 }, { "epoch": 0.24, "grad_norm": 0.9163837432861328, "learning_rate": 8.935065474498375e-06, "loss": 0.6075, "step": 3714 }, { "epoch": 0.24, "grad_norm": 0.8735101819038391, "learning_rate": 8.934432418919153e-06, "loss": 0.6399, "step": 3715 }, { "epoch": 0.24, "grad_norm": 0.877932608127594, "learning_rate": 8.933799197674667e-06, "loss": 0.6058, "step": 3716 }, { "epoch": 0.24, "grad_norm": 0.9489808082580566, "learning_rate": 8.933165810791579e-06, "loss": 0.6173, "step": 3717 }, { "epoch": 0.24, "grad_norm": 0.8636232018470764, "learning_rate": 8.932532258296565e-06, "loss": 0.6418, "step": 3718 }, { "epoch": 0.24, "grad_norm": 0.9418687224388123, "learning_rate": 8.931898540216297e-06, "loss": 0.6438, "step": 3719 }, { "epoch": 0.24, "grad_norm": 0.9097021222114563, "learning_rate": 8.931264656577459e-06, "loss": 0.6215, "step": 3720 }, { "epoch": 0.24, "grad_norm": 0.8493873476982117, "learning_rate": 8.930630607406743e-06, "loss": 0.6228, "step": 3721 }, { "epoch": 0.24, "grad_norm": 0.9140156507492065, "learning_rate": 8.929996392730844e-06, "loss": 0.6362, "step": 3722 }, { "epoch": 0.24, "grad_norm": 0.8999550938606262, "learning_rate": 8.92936201257647e-06, "loss": 0.6398, "step": 3723 }, { "epoch": 0.24, "grad_norm": 0.8380311727523804, "learning_rate": 8.928727466970331e-06, "loss": 0.588, "step": 3724 }, { "epoch": 0.24, "grad_norm": 0.9263492822647095, "learning_rate": 8.928092755939145e-06, "loss": 0.6247, "step": 3725 }, { "epoch": 0.24, "grad_norm": 0.9678030014038086, "learning_rate": 8.927457879509638e-06, "loss": 0.624, "step": 3726 }, { "epoch": 0.24, "grad_norm": 0.8912070989608765, "learning_rate": 8.926822837708542e-06, "loss": 0.6393, "step": 3727 }, { "epoch": 0.24, "grad_norm": 0.844551682472229, "learning_rate": 8.926187630562597e-06, "loss": 0.6139, "step": 3728 }, { "epoch": 0.24, "grad_norm": 0.9056801199913025, "learning_rate": 8.925552258098549e-06, "loss": 0.5725, "step": 3729 }, { "epoch": 0.24, "grad_norm": 0.8850533962249756, "learning_rate": 8.924916720343151e-06, "loss": 0.6235, "step": 3730 }, { "epoch": 0.24, "grad_norm": 0.858784556388855, "learning_rate": 8.924281017323164e-06, "loss": 0.59, "step": 3731 }, { "epoch": 0.24, "grad_norm": 0.8923681974411011, "learning_rate": 8.923645149065354e-06, "loss": 0.5841, "step": 3732 }, { "epoch": 0.24, "grad_norm": 0.9098735451698303, "learning_rate": 8.923009115596498e-06, "loss": 0.5895, "step": 3733 }, { "epoch": 0.24, "grad_norm": 0.8857651352882385, "learning_rate": 8.922372916943374e-06, "loss": 0.6612, "step": 3734 }, { "epoch": 0.24, "grad_norm": 0.9229490756988525, "learning_rate": 8.921736553132772e-06, "loss": 0.6304, "step": 3735 }, { "epoch": 0.24, "grad_norm": 0.8978235125541687, "learning_rate": 8.921100024191486e-06, "loss": 0.5965, "step": 3736 }, { "epoch": 0.24, "grad_norm": 0.8856748938560486, "learning_rate": 8.920463330146318e-06, "loss": 0.6114, "step": 3737 }, { "epoch": 0.24, "grad_norm": 0.9307460784912109, "learning_rate": 8.919826471024078e-06, "loss": 0.6278, "step": 3738 }, { "epoch": 0.24, "grad_norm": 0.9287357926368713, "learning_rate": 8.919189446851583e-06, "loss": 0.5925, "step": 3739 }, { "epoch": 0.24, "grad_norm": 0.9358810782432556, "learning_rate": 8.918552257655652e-06, "loss": 0.608, "step": 3740 }, { "epoch": 0.24, "grad_norm": 0.9406039714813232, "learning_rate": 8.917914903463119e-06, "loss": 0.6813, "step": 3741 }, { "epoch": 0.24, "grad_norm": 0.9355833530426025, "learning_rate": 8.917277384300817e-06, "loss": 0.6438, "step": 3742 }, { "epoch": 0.24, "grad_norm": 0.8952451944351196, "learning_rate": 8.916639700195593e-06, "loss": 0.5932, "step": 3743 }, { "epoch": 0.24, "grad_norm": 0.8967479467391968, "learning_rate": 8.916001851174296e-06, "loss": 0.6132, "step": 3744 }, { "epoch": 0.24, "grad_norm": 0.9279077053070068, "learning_rate": 8.915363837263782e-06, "loss": 0.6351, "step": 3745 }, { "epoch": 0.24, "grad_norm": 0.8428364396095276, "learning_rate": 8.91472565849092e-06, "loss": 0.5932, "step": 3746 }, { "epoch": 0.24, "grad_norm": 0.892693817615509, "learning_rate": 8.914087314882578e-06, "loss": 0.585, "step": 3747 }, { "epoch": 0.24, "grad_norm": 0.8900630474090576, "learning_rate": 8.913448806465634e-06, "loss": 0.6317, "step": 3748 }, { "epoch": 0.24, "grad_norm": 0.8545112013816833, "learning_rate": 8.912810133266976e-06, "loss": 0.5925, "step": 3749 }, { "epoch": 0.24, "grad_norm": 0.9142085313796997, "learning_rate": 8.912171295313493e-06, "loss": 0.5952, "step": 3750 }, { "epoch": 0.24, "grad_norm": 0.8664583563804626, "learning_rate": 8.911532292632089e-06, "loss": 0.6369, "step": 3751 }, { "epoch": 0.24, "grad_norm": 0.8957768678665161, "learning_rate": 8.910893125249666e-06, "loss": 0.6155, "step": 3752 }, { "epoch": 0.24, "grad_norm": 0.9018309116363525, "learning_rate": 8.91025379319314e-06, "loss": 0.6574, "step": 3753 }, { "epoch": 0.24, "grad_norm": 0.8896942734718323, "learning_rate": 8.909614296489428e-06, "loss": 0.5785, "step": 3754 }, { "epoch": 0.24, "grad_norm": 0.9446683526039124, "learning_rate": 8.908974635165458e-06, "loss": 0.638, "step": 3755 }, { "epoch": 0.24, "grad_norm": 0.8614102602005005, "learning_rate": 8.908334809248165e-06, "loss": 0.5967, "step": 3756 }, { "epoch": 0.24, "grad_norm": 0.9426348805427551, "learning_rate": 8.90769481876449e-06, "loss": 0.6738, "step": 3757 }, { "epoch": 0.24, "grad_norm": 0.9114719033241272, "learning_rate": 8.90705466374138e-06, "loss": 0.6332, "step": 3758 }, { "epoch": 0.24, "grad_norm": 0.8968010544776917, "learning_rate": 8.906414344205789e-06, "loss": 0.6338, "step": 3759 }, { "epoch": 0.24, "grad_norm": 0.8845388293266296, "learning_rate": 8.905773860184679e-06, "loss": 0.6355, "step": 3760 }, { "epoch": 0.24, "grad_norm": 0.8717195391654968, "learning_rate": 8.905133211705019e-06, "loss": 0.593, "step": 3761 }, { "epoch": 0.24, "grad_norm": 0.8622083067893982, "learning_rate": 8.904492398793785e-06, "loss": 0.632, "step": 3762 }, { "epoch": 0.24, "grad_norm": 0.9208856225013733, "learning_rate": 8.903851421477959e-06, "loss": 0.6135, "step": 3763 }, { "epoch": 0.24, "grad_norm": 0.8842298984527588, "learning_rate": 8.90321027978453e-06, "loss": 0.6295, "step": 3764 }, { "epoch": 0.24, "grad_norm": 0.9459641575813293, "learning_rate": 8.902568973740495e-06, "loss": 0.5951, "step": 3765 }, { "epoch": 0.24, "grad_norm": 0.9696717858314514, "learning_rate": 8.901927503372855e-06, "loss": 0.5996, "step": 3766 }, { "epoch": 0.24, "grad_norm": 0.8983449935913086, "learning_rate": 8.901285868708622e-06, "loss": 0.6206, "step": 3767 }, { "epoch": 0.24, "grad_norm": 0.8596554398536682, "learning_rate": 8.900644069774815e-06, "loss": 0.5802, "step": 3768 }, { "epoch": 0.24, "grad_norm": 0.912438690662384, "learning_rate": 8.900002106598453e-06, "loss": 0.6058, "step": 3769 }, { "epoch": 0.24, "grad_norm": 0.931678056716919, "learning_rate": 8.89935997920657e-06, "loss": 0.6331, "step": 3770 }, { "epoch": 0.24, "grad_norm": 1.06976318359375, "learning_rate": 8.898717687626203e-06, "loss": 0.6401, "step": 3771 }, { "epoch": 0.24, "grad_norm": 0.9052533507347107, "learning_rate": 8.898075231884397e-06, "loss": 0.6218, "step": 3772 }, { "epoch": 0.24, "grad_norm": 0.8735697269439697, "learning_rate": 8.897432612008206e-06, "loss": 0.633, "step": 3773 }, { "epoch": 0.24, "grad_norm": 0.8962618112564087, "learning_rate": 8.896789828024682e-06, "loss": 0.6216, "step": 3774 }, { "epoch": 0.24, "grad_norm": 0.8915939927101135, "learning_rate": 8.896146879960896e-06, "loss": 0.6651, "step": 3775 }, { "epoch": 0.24, "grad_norm": 0.877487063407898, "learning_rate": 8.895503767843918e-06, "loss": 0.6433, "step": 3776 }, { "epoch": 0.24, "grad_norm": 0.9209939241409302, "learning_rate": 8.89486049170083e-06, "loss": 0.6573, "step": 3777 }, { "epoch": 0.24, "grad_norm": 0.8641723990440369, "learning_rate": 8.894217051558713e-06, "loss": 0.596, "step": 3778 }, { "epoch": 0.24, "grad_norm": 0.8952119946479797, "learning_rate": 8.893573447444663e-06, "loss": 0.6429, "step": 3779 }, { "epoch": 0.24, "grad_norm": 0.8626795411109924, "learning_rate": 8.892929679385783e-06, "loss": 0.5635, "step": 3780 }, { "epoch": 0.24, "grad_norm": 0.9164071679115295, "learning_rate": 8.892285747409172e-06, "loss": 0.5775, "step": 3781 }, { "epoch": 0.24, "grad_norm": 0.8823123574256897, "learning_rate": 8.891641651541953e-06, "loss": 0.5881, "step": 3782 }, { "epoch": 0.24, "grad_norm": 0.9153462648391724, "learning_rate": 8.89099739181124e-06, "loss": 0.5915, "step": 3783 }, { "epoch": 0.24, "grad_norm": 0.9311332106590271, "learning_rate": 8.890352968244162e-06, "loss": 0.601, "step": 3784 }, { "epoch": 0.24, "grad_norm": 0.9409120678901672, "learning_rate": 8.889708380867856e-06, "loss": 0.6608, "step": 3785 }, { "epoch": 0.24, "grad_norm": 0.8671489357948303, "learning_rate": 8.88906362970946e-06, "loss": 0.5921, "step": 3786 }, { "epoch": 0.24, "grad_norm": 0.8596804141998291, "learning_rate": 8.888418714796124e-06, "loss": 0.6256, "step": 3787 }, { "epoch": 0.24, "grad_norm": 0.8811514377593994, "learning_rate": 8.887773636155002e-06, "loss": 0.5861, "step": 3788 }, { "epoch": 0.24, "grad_norm": 0.900944709777832, "learning_rate": 8.887128393813257e-06, "loss": 0.641, "step": 3789 }, { "epoch": 0.24, "grad_norm": 0.915507435798645, "learning_rate": 8.886482987798059e-06, "loss": 0.6348, "step": 3790 }, { "epoch": 0.24, "grad_norm": 1.0151876211166382, "learning_rate": 8.885837418136581e-06, "loss": 0.5955, "step": 3791 }, { "epoch": 0.24, "grad_norm": 0.8506528735160828, "learning_rate": 8.885191684856007e-06, "loss": 0.5497, "step": 3792 }, { "epoch": 0.24, "grad_norm": 0.8645548224449158, "learning_rate": 8.884545787983528e-06, "loss": 0.6036, "step": 3793 }, { "epoch": 0.24, "grad_norm": 0.8505982160568237, "learning_rate": 8.88389972754634e-06, "loss": 0.6379, "step": 3794 }, { "epoch": 0.24, "grad_norm": 0.9695981740951538, "learning_rate": 8.883253503571643e-06, "loss": 0.632, "step": 3795 }, { "epoch": 0.24, "grad_norm": 0.9560012817382812, "learning_rate": 8.882607116086651e-06, "loss": 0.6135, "step": 3796 }, { "epoch": 0.24, "grad_norm": 0.8482503890991211, "learning_rate": 8.881960565118581e-06, "loss": 0.538, "step": 3797 }, { "epoch": 0.24, "grad_norm": 0.9212302565574646, "learning_rate": 8.881313850694653e-06, "loss": 0.6599, "step": 3798 }, { "epoch": 0.24, "grad_norm": 0.9493160843849182, "learning_rate": 8.880666972842105e-06, "loss": 0.6263, "step": 3799 }, { "epoch": 0.24, "grad_norm": 0.8596429824829102, "learning_rate": 8.880019931588167e-06, "loss": 0.6504, "step": 3800 }, { "epoch": 0.24, "grad_norm": 0.929779589176178, "learning_rate": 8.87937272696009e-06, "loss": 0.6391, "step": 3801 }, { "epoch": 0.24, "grad_norm": 0.8671481013298035, "learning_rate": 8.878725358985121e-06, "loss": 0.5667, "step": 3802 }, { "epoch": 0.24, "grad_norm": 0.9427719116210938, "learning_rate": 8.87807782769052e-06, "loss": 0.6795, "step": 3803 }, { "epoch": 0.24, "grad_norm": 0.9037208557128906, "learning_rate": 8.877430133103555e-06, "loss": 0.6183, "step": 3804 }, { "epoch": 0.24, "grad_norm": 0.8492844104766846, "learning_rate": 8.876782275251491e-06, "loss": 0.6273, "step": 3805 }, { "epoch": 0.24, "grad_norm": 0.8471344113349915, "learning_rate": 8.876134254161617e-06, "loss": 0.6053, "step": 3806 }, { "epoch": 0.24, "grad_norm": 0.8713465332984924, "learning_rate": 8.87548606986121e-06, "loss": 0.6451, "step": 3807 }, { "epoch": 0.24, "grad_norm": 0.9716042280197144, "learning_rate": 8.874837722377568e-06, "loss": 0.5792, "step": 3808 }, { "epoch": 0.24, "grad_norm": 0.8822860717773438, "learning_rate": 8.87418921173799e-06, "loss": 0.6024, "step": 3809 }, { "epoch": 0.24, "grad_norm": 0.8905455470085144, "learning_rate": 8.87354053796978e-06, "loss": 0.5976, "step": 3810 }, { "epoch": 0.24, "grad_norm": 0.807611346244812, "learning_rate": 8.872891701100253e-06, "loss": 0.6114, "step": 3811 }, { "epoch": 0.24, "grad_norm": 0.9287991523742676, "learning_rate": 8.872242701156731e-06, "loss": 0.6195, "step": 3812 }, { "epoch": 0.24, "grad_norm": 0.8870870471000671, "learning_rate": 8.871593538166538e-06, "loss": 0.6173, "step": 3813 }, { "epoch": 0.24, "grad_norm": 1.0158964395523071, "learning_rate": 8.870944212157008e-06, "loss": 0.6278, "step": 3814 }, { "epoch": 0.24, "grad_norm": 0.8998157382011414, "learning_rate": 8.870294723155486e-06, "loss": 0.6385, "step": 3815 }, { "epoch": 0.24, "grad_norm": 0.9535521268844604, "learning_rate": 8.869645071189316e-06, "loss": 0.6515, "step": 3816 }, { "epoch": 0.24, "grad_norm": 0.9406755566596985, "learning_rate": 8.868995256285853e-06, "loss": 0.6271, "step": 3817 }, { "epoch": 0.24, "grad_norm": 0.920963704586029, "learning_rate": 8.868345278472458e-06, "loss": 0.6204, "step": 3818 }, { "epoch": 0.24, "grad_norm": 0.9438266754150391, "learning_rate": 8.867695137776503e-06, "loss": 0.6417, "step": 3819 }, { "epoch": 0.24, "grad_norm": 0.9606151580810547, "learning_rate": 8.86704483422536e-06, "loss": 0.6408, "step": 3820 }, { "epoch": 0.24, "grad_norm": 0.8948151469230652, "learning_rate": 8.86639436784641e-06, "loss": 0.6276, "step": 3821 }, { "epoch": 0.24, "grad_norm": 0.9214081168174744, "learning_rate": 8.865743738667045e-06, "loss": 0.5498, "step": 3822 }, { "epoch": 0.24, "grad_norm": 0.8617424964904785, "learning_rate": 8.865092946714657e-06, "loss": 0.5677, "step": 3823 }, { "epoch": 0.24, "grad_norm": 0.9291020035743713, "learning_rate": 8.864441992016653e-06, "loss": 0.5911, "step": 3824 }, { "epoch": 0.24, "grad_norm": 0.9329352378845215, "learning_rate": 8.863790874600438e-06, "loss": 0.6073, "step": 3825 }, { "epoch": 0.24, "grad_norm": 0.8821927309036255, "learning_rate": 8.863139594493432e-06, "loss": 0.5866, "step": 3826 }, { "epoch": 0.24, "grad_norm": 0.8997513055801392, "learning_rate": 8.862488151723055e-06, "loss": 0.6081, "step": 3827 }, { "epoch": 0.24, "grad_norm": 0.9646042585372925, "learning_rate": 8.86183654631674e-06, "loss": 0.6295, "step": 3828 }, { "epoch": 0.24, "grad_norm": 0.9080867171287537, "learning_rate": 8.861184778301921e-06, "loss": 0.6282, "step": 3829 }, { "epoch": 0.24, "grad_norm": 0.8966723084449768, "learning_rate": 8.860532847706046e-06, "loss": 0.637, "step": 3830 }, { "epoch": 0.24, "grad_norm": 0.9197657704353333, "learning_rate": 8.85988075455656e-06, "loss": 0.5963, "step": 3831 }, { "epoch": 0.24, "grad_norm": 0.9177777767181396, "learning_rate": 8.859228498880923e-06, "loss": 0.6453, "step": 3832 }, { "epoch": 0.24, "grad_norm": 0.9342770576477051, "learning_rate": 8.8585760807066e-06, "loss": 0.6383, "step": 3833 }, { "epoch": 0.24, "grad_norm": 0.9254716038703918, "learning_rate": 8.85792350006106e-06, "loss": 0.6608, "step": 3834 }, { "epoch": 0.24, "grad_norm": 0.878808319568634, "learning_rate": 8.857270756971785e-06, "loss": 0.6036, "step": 3835 }, { "epoch": 0.24, "grad_norm": 0.9698695540428162, "learning_rate": 8.856617851466254e-06, "loss": 0.6553, "step": 3836 }, { "epoch": 0.24, "grad_norm": 0.8826630115509033, "learning_rate": 8.855964783571963e-06, "loss": 0.5691, "step": 3837 }, { "epoch": 0.24, "grad_norm": 0.9340159296989441, "learning_rate": 8.855311553316409e-06, "loss": 0.5863, "step": 3838 }, { "epoch": 0.24, "grad_norm": 0.8885470628738403, "learning_rate": 8.854658160727096e-06, "loss": 0.6368, "step": 3839 }, { "epoch": 0.24, "grad_norm": 0.846393346786499, "learning_rate": 8.854004605831536e-06, "loss": 0.5378, "step": 3840 }, { "epoch": 0.24, "grad_norm": 0.8811196684837341, "learning_rate": 8.853350888657251e-06, "loss": 0.6132, "step": 3841 }, { "epoch": 0.24, "grad_norm": 0.9290794134140015, "learning_rate": 8.852697009231766e-06, "loss": 0.5925, "step": 3842 }, { "epoch": 0.24, "grad_norm": 0.8985415697097778, "learning_rate": 8.852042967582611e-06, "loss": 0.6533, "step": 3843 }, { "epoch": 0.24, "grad_norm": 0.8721175789833069, "learning_rate": 8.851388763737328e-06, "loss": 0.5439, "step": 3844 }, { "epoch": 0.24, "grad_norm": 0.898200511932373, "learning_rate": 8.850734397723461e-06, "loss": 0.5901, "step": 3845 }, { "epoch": 0.24, "grad_norm": 1.0981974601745605, "learning_rate": 8.850079869568565e-06, "loss": 0.6579, "step": 3846 }, { "epoch": 0.24, "grad_norm": 0.8868777751922607, "learning_rate": 8.849425179300197e-06, "loss": 0.6113, "step": 3847 }, { "epoch": 0.24, "grad_norm": 0.8843356370925903, "learning_rate": 8.848770326945927e-06, "loss": 0.5933, "step": 3848 }, { "epoch": 0.24, "grad_norm": 0.9298630356788635, "learning_rate": 8.84811531253333e-06, "loss": 0.5776, "step": 3849 }, { "epoch": 0.24, "grad_norm": 1.0039656162261963, "learning_rate": 8.847460136089982e-06, "loss": 0.6304, "step": 3850 }, { "epoch": 0.24, "grad_norm": 0.8467380404472351, "learning_rate": 8.846804797643472e-06, "loss": 0.6048, "step": 3851 }, { "epoch": 0.24, "grad_norm": 0.8640190958976746, "learning_rate": 8.846149297221394e-06, "loss": 0.5943, "step": 3852 }, { "epoch": 0.24, "grad_norm": 0.9181884527206421, "learning_rate": 8.845493634851348e-06, "loss": 0.6935, "step": 3853 }, { "epoch": 0.24, "grad_norm": 0.8371793627738953, "learning_rate": 8.844837810560943e-06, "loss": 0.5877, "step": 3854 }, { "epoch": 0.24, "grad_norm": 0.9249871969223022, "learning_rate": 8.844181824377793e-06, "loss": 0.5707, "step": 3855 }, { "epoch": 0.24, "grad_norm": 0.8807600140571594, "learning_rate": 8.843525676329521e-06, "loss": 0.6436, "step": 3856 }, { "epoch": 0.24, "grad_norm": 0.8587551116943359, "learning_rate": 8.842869366443751e-06, "loss": 0.5952, "step": 3857 }, { "epoch": 0.24, "grad_norm": 0.8563278317451477, "learning_rate": 8.842212894748122e-06, "loss": 0.5835, "step": 3858 }, { "epoch": 0.24, "grad_norm": 0.8369274735450745, "learning_rate": 8.841556261270272e-06, "loss": 0.5843, "step": 3859 }, { "epoch": 0.24, "grad_norm": 0.9252521395683289, "learning_rate": 8.840899466037854e-06, "loss": 0.6468, "step": 3860 }, { "epoch": 0.24, "grad_norm": 0.874243438243866, "learning_rate": 8.840242509078521e-06, "loss": 0.5989, "step": 3861 }, { "epoch": 0.24, "grad_norm": 0.8621048331260681, "learning_rate": 8.839585390419933e-06, "loss": 0.5763, "step": 3862 }, { "epoch": 0.24, "grad_norm": 0.8070306777954102, "learning_rate": 8.838928110089763e-06, "loss": 0.6054, "step": 3863 }, { "epoch": 0.24, "grad_norm": 0.9515740275382996, "learning_rate": 8.838270668115685e-06, "loss": 0.6457, "step": 3864 }, { "epoch": 0.24, "grad_norm": 0.8527739644050598, "learning_rate": 8.837613064525381e-06, "loss": 0.5998, "step": 3865 }, { "epoch": 0.24, "grad_norm": 0.9535593390464783, "learning_rate": 8.83695529934654e-06, "loss": 0.6252, "step": 3866 }, { "epoch": 0.24, "grad_norm": 0.8122836351394653, "learning_rate": 8.83629737260686e-06, "loss": 0.5928, "step": 3867 }, { "epoch": 0.25, "grad_norm": 0.9340097904205322, "learning_rate": 8.835639284334043e-06, "loss": 0.5719, "step": 3868 }, { "epoch": 0.25, "grad_norm": 0.9119397401809692, "learning_rate": 8.834981034555799e-06, "loss": 0.6028, "step": 3869 }, { "epoch": 0.25, "grad_norm": 0.8478021025657654, "learning_rate": 8.834322623299844e-06, "loss": 0.5882, "step": 3870 }, { "epoch": 0.25, "grad_norm": 0.9423801898956299, "learning_rate": 8.833664050593904e-06, "loss": 0.5901, "step": 3871 }, { "epoch": 0.25, "grad_norm": 0.973012387752533, "learning_rate": 8.833005316465706e-06, "loss": 0.5702, "step": 3872 }, { "epoch": 0.25, "grad_norm": 0.870364785194397, "learning_rate": 8.832346420942987e-06, "loss": 0.5943, "step": 3873 }, { "epoch": 0.25, "grad_norm": 0.8896936774253845, "learning_rate": 8.831687364053493e-06, "loss": 0.6135, "step": 3874 }, { "epoch": 0.25, "grad_norm": 0.9121167063713074, "learning_rate": 8.831028145824974e-06, "loss": 0.639, "step": 3875 }, { "epoch": 0.25, "grad_norm": 0.9295619130134583, "learning_rate": 8.830368766285186e-06, "loss": 0.6404, "step": 3876 }, { "epoch": 0.25, "grad_norm": 0.9236605763435364, "learning_rate": 8.829709225461894e-06, "loss": 0.596, "step": 3877 }, { "epoch": 0.25, "grad_norm": 1.0370179414749146, "learning_rate": 8.829049523382871e-06, "loss": 0.6572, "step": 3878 }, { "epoch": 0.25, "grad_norm": 0.8750087022781372, "learning_rate": 8.828389660075891e-06, "loss": 0.6232, "step": 3879 }, { "epoch": 0.25, "grad_norm": 0.8742169141769409, "learning_rate": 8.82772963556874e-06, "loss": 0.6312, "step": 3880 }, { "epoch": 0.25, "grad_norm": 0.8765554428100586, "learning_rate": 8.827069449889211e-06, "loss": 0.58, "step": 3881 }, { "epoch": 0.25, "grad_norm": 0.9164361357688904, "learning_rate": 8.8264091030651e-06, "loss": 0.6194, "step": 3882 }, { "epoch": 0.25, "grad_norm": 0.914909839630127, "learning_rate": 8.825748595124214e-06, "loss": 0.6188, "step": 3883 }, { "epoch": 0.25, "grad_norm": 0.88898104429245, "learning_rate": 8.825087926094363e-06, "loss": 0.5854, "step": 3884 }, { "epoch": 0.25, "grad_norm": 0.8506219387054443, "learning_rate": 8.824427096003367e-06, "loss": 0.5805, "step": 3885 }, { "epoch": 0.25, "grad_norm": 0.9433155655860901, "learning_rate": 8.823766104879047e-06, "loss": 0.5827, "step": 3886 }, { "epoch": 0.25, "grad_norm": 0.8702185153961182, "learning_rate": 8.823104952749242e-06, "loss": 0.5661, "step": 3887 }, { "epoch": 0.25, "grad_norm": 0.8791462779045105, "learning_rate": 8.822443639641785e-06, "loss": 0.5424, "step": 3888 }, { "epoch": 0.25, "grad_norm": 0.8864879012107849, "learning_rate": 8.821782165584524e-06, "loss": 0.6041, "step": 3889 }, { "epoch": 0.25, "grad_norm": 0.9141310453414917, "learning_rate": 8.82112053060531e-06, "loss": 0.6335, "step": 3890 }, { "epoch": 0.25, "grad_norm": 0.9409934878349304, "learning_rate": 8.820458734732004e-06, "loss": 0.6872, "step": 3891 }, { "epoch": 0.25, "grad_norm": 0.9157419204711914, "learning_rate": 8.819796777992471e-06, "loss": 0.5836, "step": 3892 }, { "epoch": 0.25, "grad_norm": 0.957832396030426, "learning_rate": 8.819134660414585e-06, "loss": 0.6145, "step": 3893 }, { "epoch": 0.25, "grad_norm": 0.9433353543281555, "learning_rate": 8.818472382026222e-06, "loss": 0.573, "step": 3894 }, { "epoch": 0.25, "grad_norm": 0.919173002243042, "learning_rate": 8.817809942855272e-06, "loss": 0.5815, "step": 3895 }, { "epoch": 0.25, "grad_norm": 0.8651015758514404, "learning_rate": 8.817147342929626e-06, "loss": 0.5762, "step": 3896 }, { "epoch": 0.25, "grad_norm": 0.9625697135925293, "learning_rate": 8.816484582277184e-06, "loss": 0.6389, "step": 3897 }, { "epoch": 0.25, "grad_norm": 0.8946129083633423, "learning_rate": 8.815821660925853e-06, "loss": 0.6084, "step": 3898 }, { "epoch": 0.25, "grad_norm": 0.9177218079566956, "learning_rate": 8.815158578903548e-06, "loss": 0.6022, "step": 3899 }, { "epoch": 0.25, "grad_norm": 0.8781201243400574, "learning_rate": 8.814495336238185e-06, "loss": 0.6393, "step": 3900 }, { "epoch": 0.25, "grad_norm": 0.9286174774169922, "learning_rate": 8.813831932957696e-06, "loss": 0.6149, "step": 3901 }, { "epoch": 0.25, "grad_norm": 0.882340669631958, "learning_rate": 8.813168369090007e-06, "loss": 0.5349, "step": 3902 }, { "epoch": 0.25, "grad_norm": 0.8473665118217468, "learning_rate": 8.812504644663066e-06, "loss": 0.5991, "step": 3903 }, { "epoch": 0.25, "grad_norm": 1.013710618019104, "learning_rate": 8.811840759704816e-06, "loss": 0.6184, "step": 3904 }, { "epoch": 0.25, "grad_norm": 0.8682031631469727, "learning_rate": 8.811176714243213e-06, "loss": 0.6179, "step": 3905 }, { "epoch": 0.25, "grad_norm": 0.9201847314834595, "learning_rate": 8.810512508306216e-06, "loss": 0.5807, "step": 3906 }, { "epoch": 0.25, "grad_norm": 0.8606781959533691, "learning_rate": 8.809848141921793e-06, "loss": 0.5846, "step": 3907 }, { "epoch": 0.25, "grad_norm": 0.9430428743362427, "learning_rate": 8.809183615117919e-06, "loss": 0.6372, "step": 3908 }, { "epoch": 0.25, "grad_norm": 0.8924831748008728, "learning_rate": 8.808518927922574e-06, "loss": 0.6182, "step": 3909 }, { "epoch": 0.25, "grad_norm": 0.9287380576133728, "learning_rate": 8.807854080363745e-06, "loss": 0.6251, "step": 3910 }, { "epoch": 0.25, "grad_norm": 0.9271407723426819, "learning_rate": 8.807189072469428e-06, "loss": 0.6197, "step": 3911 }, { "epoch": 0.25, "grad_norm": 0.8575233817100525, "learning_rate": 8.806523904267623e-06, "loss": 0.6011, "step": 3912 }, { "epoch": 0.25, "grad_norm": 1.0265129804611206, "learning_rate": 8.80585857578634e-06, "loss": 0.6534, "step": 3913 }, { "epoch": 0.25, "grad_norm": 0.8787725567817688, "learning_rate": 8.80519308705359e-06, "loss": 0.5598, "step": 3914 }, { "epoch": 0.25, "grad_norm": 0.9931519031524658, "learning_rate": 8.804527438097396e-06, "loss": 0.624, "step": 3915 }, { "epoch": 0.25, "grad_norm": 0.9081161022186279, "learning_rate": 8.803861628945787e-06, "loss": 0.5939, "step": 3916 }, { "epoch": 0.25, "grad_norm": 0.9506007432937622, "learning_rate": 8.803195659626798e-06, "loss": 0.6593, "step": 3917 }, { "epoch": 0.25, "grad_norm": 0.8824777603149414, "learning_rate": 8.802529530168469e-06, "loss": 0.6381, "step": 3918 }, { "epoch": 0.25, "grad_norm": 0.8718113899230957, "learning_rate": 8.801863240598851e-06, "loss": 0.6002, "step": 3919 }, { "epoch": 0.25, "grad_norm": 0.880943238735199, "learning_rate": 8.801196790945999e-06, "loss": 0.587, "step": 3920 }, { "epoch": 0.25, "grad_norm": 0.9570931196212769, "learning_rate": 8.800530181237971e-06, "loss": 0.6615, "step": 3921 }, { "epoch": 0.25, "grad_norm": 0.9796764254570007, "learning_rate": 8.799863411502838e-06, "loss": 0.5868, "step": 3922 }, { "epoch": 0.25, "grad_norm": 0.8545233607292175, "learning_rate": 8.799196481768677e-06, "loss": 0.6008, "step": 3923 }, { "epoch": 0.25, "grad_norm": 0.8299331068992615, "learning_rate": 8.798529392063569e-06, "loss": 0.5954, "step": 3924 }, { "epoch": 0.25, "grad_norm": 0.8435283899307251, "learning_rate": 8.7978621424156e-06, "loss": 0.5738, "step": 3925 }, { "epoch": 0.25, "grad_norm": 0.9209175109863281, "learning_rate": 8.79719473285287e-06, "loss": 0.64, "step": 3926 }, { "epoch": 0.25, "grad_norm": 0.9299062490463257, "learning_rate": 8.796527163403479e-06, "loss": 0.6396, "step": 3927 }, { "epoch": 0.25, "grad_norm": 0.9327616691589355, "learning_rate": 8.795859434095535e-06, "loss": 0.6079, "step": 3928 }, { "epoch": 0.25, "grad_norm": 0.9269071221351624, "learning_rate": 8.795191544957156e-06, "loss": 0.6834, "step": 3929 }, { "epoch": 0.25, "grad_norm": 0.9255284667015076, "learning_rate": 8.794523496016465e-06, "loss": 0.6848, "step": 3930 }, { "epoch": 0.25, "grad_norm": 0.8709956407546997, "learning_rate": 8.793855287301588e-06, "loss": 0.6473, "step": 3931 }, { "epoch": 0.25, "grad_norm": 0.840023934841156, "learning_rate": 8.793186918840661e-06, "loss": 0.5463, "step": 3932 }, { "epoch": 0.25, "grad_norm": 0.9407967925071716, "learning_rate": 8.792518390661831e-06, "loss": 0.6065, "step": 3933 }, { "epoch": 0.25, "grad_norm": 0.8985733985900879, "learning_rate": 8.791849702793245e-06, "loss": 0.6236, "step": 3934 }, { "epoch": 0.25, "grad_norm": 0.8958050012588501, "learning_rate": 8.791180855263057e-06, "loss": 0.6322, "step": 3935 }, { "epoch": 0.25, "grad_norm": 0.9496500492095947, "learning_rate": 8.790511848099433e-06, "loss": 0.6399, "step": 3936 }, { "epoch": 0.25, "grad_norm": 0.9206477999687195, "learning_rate": 8.789842681330543e-06, "loss": 0.6232, "step": 3937 }, { "epoch": 0.25, "grad_norm": 0.9060776829719543, "learning_rate": 8.789173354984557e-06, "loss": 0.5971, "step": 3938 }, { "epoch": 0.25, "grad_norm": 0.9292250871658325, "learning_rate": 8.788503869089667e-06, "loss": 0.6796, "step": 3939 }, { "epoch": 0.25, "grad_norm": 0.8954676985740662, "learning_rate": 8.787834223674056e-06, "loss": 0.6061, "step": 3940 }, { "epoch": 0.25, "grad_norm": 0.8948878049850464, "learning_rate": 8.787164418765923e-06, "loss": 0.5726, "step": 3941 }, { "epoch": 0.25, "grad_norm": 0.9730081558227539, "learning_rate": 8.786494454393472e-06, "loss": 0.6271, "step": 3942 }, { "epoch": 0.25, "grad_norm": 0.9321277737617493, "learning_rate": 8.785824330584912e-06, "loss": 0.5956, "step": 3943 }, { "epoch": 0.25, "grad_norm": 0.9635143280029297, "learning_rate": 8.785154047368459e-06, "loss": 0.6239, "step": 3944 }, { "epoch": 0.25, "grad_norm": 0.9317022562026978, "learning_rate": 8.784483604772336e-06, "loss": 0.6193, "step": 3945 }, { "epoch": 0.25, "grad_norm": 0.893115222454071, "learning_rate": 8.783813002824773e-06, "loss": 0.617, "step": 3946 }, { "epoch": 0.25, "grad_norm": 0.899761974811554, "learning_rate": 8.783142241554009e-06, "loss": 0.5963, "step": 3947 }, { "epoch": 0.25, "grad_norm": 0.8902785181999207, "learning_rate": 8.782471320988284e-06, "loss": 0.6318, "step": 3948 }, { "epoch": 0.25, "grad_norm": 0.8925158381462097, "learning_rate": 8.781800241155851e-06, "loss": 0.5684, "step": 3949 }, { "epoch": 0.25, "grad_norm": 0.9196040630340576, "learning_rate": 8.781129002084965e-06, "loss": 0.5899, "step": 3950 }, { "epoch": 0.25, "grad_norm": 0.9138063192367554, "learning_rate": 8.780457603803892e-06, "loss": 0.6088, "step": 3951 }, { "epoch": 0.25, "grad_norm": 0.8876779675483704, "learning_rate": 8.779786046340898e-06, "loss": 0.6453, "step": 3952 }, { "epoch": 0.25, "grad_norm": 0.9258411526679993, "learning_rate": 8.779114329724265e-06, "loss": 0.6308, "step": 3953 }, { "epoch": 0.25, "grad_norm": 0.8825391530990601, "learning_rate": 8.778442453982272e-06, "loss": 0.5773, "step": 3954 }, { "epoch": 0.25, "grad_norm": 0.8886011242866516, "learning_rate": 8.777770419143214e-06, "loss": 0.646, "step": 3955 }, { "epoch": 0.25, "grad_norm": 0.9516189694404602, "learning_rate": 8.777098225235384e-06, "loss": 0.6543, "step": 3956 }, { "epoch": 0.25, "grad_norm": 0.9398981928825378, "learning_rate": 8.776425872287087e-06, "loss": 0.6067, "step": 3957 }, { "epoch": 0.25, "grad_norm": 0.9179983139038086, "learning_rate": 8.775753360326635e-06, "loss": 0.6425, "step": 3958 }, { "epoch": 0.25, "grad_norm": 0.8767003417015076, "learning_rate": 8.775080689382342e-06, "loss": 0.6333, "step": 3959 }, { "epoch": 0.25, "grad_norm": 0.8714125752449036, "learning_rate": 8.774407859482537e-06, "loss": 0.6302, "step": 3960 }, { "epoch": 0.25, "grad_norm": 0.8385068774223328, "learning_rate": 8.773734870655544e-06, "loss": 0.6087, "step": 3961 }, { "epoch": 0.25, "grad_norm": 0.9136397242546082, "learning_rate": 8.773061722929704e-06, "loss": 0.6035, "step": 3962 }, { "epoch": 0.25, "grad_norm": 0.805779755115509, "learning_rate": 8.772388416333361e-06, "loss": 0.5575, "step": 3963 }, { "epoch": 0.25, "grad_norm": 0.9417558908462524, "learning_rate": 8.771714950894865e-06, "loss": 0.5909, "step": 3964 }, { "epoch": 0.25, "grad_norm": 0.922148585319519, "learning_rate": 8.771041326642572e-06, "loss": 0.6075, "step": 3965 }, { "epoch": 0.25, "grad_norm": 0.9802806377410889, "learning_rate": 8.770367543604849e-06, "loss": 0.6446, "step": 3966 }, { "epoch": 0.25, "grad_norm": 0.924773633480072, "learning_rate": 8.769693601810066e-06, "loss": 0.6432, "step": 3967 }, { "epoch": 0.25, "grad_norm": 0.8747174143791199, "learning_rate": 8.769019501286598e-06, "loss": 0.5868, "step": 3968 }, { "epoch": 0.25, "grad_norm": 0.9166977405548096, "learning_rate": 8.768345242062828e-06, "loss": 0.6624, "step": 3969 }, { "epoch": 0.25, "grad_norm": 0.886821985244751, "learning_rate": 8.767670824167151e-06, "loss": 0.6774, "step": 3970 }, { "epoch": 0.25, "grad_norm": 0.8805607557296753, "learning_rate": 8.766996247627963e-06, "loss": 0.6026, "step": 3971 }, { "epoch": 0.25, "grad_norm": 0.8670737743377686, "learning_rate": 8.766321512473666e-06, "loss": 0.6216, "step": 3972 }, { "epoch": 0.25, "grad_norm": 0.9067496657371521, "learning_rate": 8.765646618732672e-06, "loss": 0.6065, "step": 3973 }, { "epoch": 0.25, "grad_norm": 0.8839542269706726, "learning_rate": 8.7649715664334e-06, "loss": 0.6026, "step": 3974 }, { "epoch": 0.25, "grad_norm": 0.915699303150177, "learning_rate": 8.764296355604273e-06, "loss": 0.6433, "step": 3975 }, { "epoch": 0.25, "grad_norm": 0.8603993654251099, "learning_rate": 8.76362098627372e-06, "loss": 0.6031, "step": 3976 }, { "epoch": 0.25, "grad_norm": 0.9431526064872742, "learning_rate": 8.76294545847018e-06, "loss": 0.6777, "step": 3977 }, { "epoch": 0.25, "grad_norm": 0.919879138469696, "learning_rate": 8.762269772222099e-06, "loss": 0.5918, "step": 3978 }, { "epoch": 0.25, "grad_norm": 0.8991773128509521, "learning_rate": 8.761593927557923e-06, "loss": 0.6318, "step": 3979 }, { "epoch": 0.25, "grad_norm": 0.8900842666625977, "learning_rate": 8.760917924506114e-06, "loss": 0.578, "step": 3980 }, { "epoch": 0.25, "grad_norm": 1.0085675716400146, "learning_rate": 8.760241763095135e-06, "loss": 0.6554, "step": 3981 }, { "epoch": 0.25, "grad_norm": 0.9195557832717896, "learning_rate": 8.759565443353454e-06, "loss": 0.6484, "step": 3982 }, { "epoch": 0.25, "grad_norm": 0.9673278331756592, "learning_rate": 8.758888965309554e-06, "loss": 0.6418, "step": 3983 }, { "epoch": 0.25, "grad_norm": 0.9138756394386292, "learning_rate": 8.758212328991913e-06, "loss": 0.6098, "step": 3984 }, { "epoch": 0.25, "grad_norm": 0.9599946737289429, "learning_rate": 8.757535534429027e-06, "loss": 0.6413, "step": 3985 }, { "epoch": 0.25, "grad_norm": 0.9634223580360413, "learning_rate": 8.756858581649391e-06, "loss": 0.6299, "step": 3986 }, { "epoch": 0.25, "grad_norm": 0.8933982253074646, "learning_rate": 8.756181470681507e-06, "loss": 0.642, "step": 3987 }, { "epoch": 0.25, "grad_norm": 0.8917509317398071, "learning_rate": 8.755504201553889e-06, "loss": 0.6301, "step": 3988 }, { "epoch": 0.25, "grad_norm": 0.9482274651527405, "learning_rate": 8.754826774295056e-06, "loss": 0.6465, "step": 3989 }, { "epoch": 0.25, "grad_norm": 0.9077238440513611, "learning_rate": 8.754149188933527e-06, "loss": 0.5909, "step": 3990 }, { "epoch": 0.25, "grad_norm": 0.9035444855690002, "learning_rate": 8.753471445497837e-06, "loss": 0.5492, "step": 3991 }, { "epoch": 0.25, "grad_norm": 0.8981434106826782, "learning_rate": 8.752793544016519e-06, "loss": 0.6003, "step": 3992 }, { "epoch": 0.25, "grad_norm": 0.9048976898193359, "learning_rate": 8.752115484518123e-06, "loss": 0.656, "step": 3993 }, { "epoch": 0.25, "grad_norm": 0.9182979464530945, "learning_rate": 8.751437267031194e-06, "loss": 0.6331, "step": 3994 }, { "epoch": 0.25, "grad_norm": 0.9162821173667908, "learning_rate": 8.750758891584293e-06, "loss": 0.6385, "step": 3995 }, { "epoch": 0.25, "grad_norm": 0.882770836353302, "learning_rate": 8.750080358205983e-06, "loss": 0.5651, "step": 3996 }, { "epoch": 0.25, "grad_norm": 0.8625838756561279, "learning_rate": 8.749401666924834e-06, "loss": 0.5548, "step": 3997 }, { "epoch": 0.25, "grad_norm": 0.8796778917312622, "learning_rate": 8.748722817769426e-06, "loss": 0.6218, "step": 3998 }, { "epoch": 0.25, "grad_norm": 0.8950878977775574, "learning_rate": 8.74804381076834e-06, "loss": 0.591, "step": 3999 }, { "epoch": 0.25, "grad_norm": 0.9669718742370605, "learning_rate": 8.747364645950168e-06, "loss": 0.6653, "step": 4000 }, { "epoch": 0.25, "grad_norm": 0.8975842595100403, "learning_rate": 8.746685323343507e-06, "loss": 0.6355, "step": 4001 }, { "epoch": 0.25, "grad_norm": 0.8494351506233215, "learning_rate": 8.74600584297696e-06, "loss": 0.5891, "step": 4002 }, { "epoch": 0.25, "grad_norm": 0.8773183226585388, "learning_rate": 8.745326204879139e-06, "loss": 0.6023, "step": 4003 }, { "epoch": 0.25, "grad_norm": 0.8296153545379639, "learning_rate": 8.74464640907866e-06, "loss": 0.5928, "step": 4004 }, { "epoch": 0.25, "grad_norm": 0.989004909992218, "learning_rate": 8.743966455604147e-06, "loss": 0.6234, "step": 4005 }, { "epoch": 0.25, "grad_norm": 0.8475044369697571, "learning_rate": 8.743286344484232e-06, "loss": 0.5406, "step": 4006 }, { "epoch": 0.25, "grad_norm": 0.8195810317993164, "learning_rate": 8.74260607574755e-06, "loss": 0.5639, "step": 4007 }, { "epoch": 0.25, "grad_norm": 0.909238874912262, "learning_rate": 8.741925649422746e-06, "loss": 0.5854, "step": 4008 }, { "epoch": 0.25, "grad_norm": 0.9121100902557373, "learning_rate": 8.741245065538471e-06, "loss": 0.614, "step": 4009 }, { "epoch": 0.25, "grad_norm": 0.8657447695732117, "learning_rate": 8.74056432412338e-06, "loss": 0.568, "step": 4010 }, { "epoch": 0.25, "grad_norm": 0.808589518070221, "learning_rate": 8.739883425206138e-06, "loss": 0.598, "step": 4011 }, { "epoch": 0.25, "grad_norm": 0.9169565439224243, "learning_rate": 8.739202368815416e-06, "loss": 0.6287, "step": 4012 }, { "epoch": 0.25, "grad_norm": 0.9050797820091248, "learning_rate": 8.738521154979889e-06, "loss": 0.6202, "step": 4013 }, { "epoch": 0.25, "grad_norm": 0.9150273203849792, "learning_rate": 8.737839783728242e-06, "loss": 0.6038, "step": 4014 }, { "epoch": 0.25, "grad_norm": 0.9774922132492065, "learning_rate": 8.737158255089164e-06, "loss": 0.6055, "step": 4015 }, { "epoch": 0.25, "grad_norm": 0.855354905128479, "learning_rate": 8.736476569091352e-06, "loss": 0.5831, "step": 4016 }, { "epoch": 0.25, "grad_norm": 0.8585079312324524, "learning_rate": 8.735794725763512e-06, "loss": 0.6254, "step": 4017 }, { "epoch": 0.25, "grad_norm": 0.9431387782096863, "learning_rate": 8.735112725134352e-06, "loss": 0.5971, "step": 4018 }, { "epoch": 0.25, "grad_norm": 0.9413880109786987, "learning_rate": 8.734430567232585e-06, "loss": 0.6035, "step": 4019 }, { "epoch": 0.25, "grad_norm": 0.8787413835525513, "learning_rate": 8.733748252086943e-06, "loss": 0.6233, "step": 4020 }, { "epoch": 0.25, "grad_norm": 0.9067035913467407, "learning_rate": 8.733065779726146e-06, "loss": 0.6869, "step": 4021 }, { "epoch": 0.25, "grad_norm": 0.8852519392967224, "learning_rate": 8.732383150178938e-06, "loss": 0.6373, "step": 4022 }, { "epoch": 0.25, "grad_norm": 0.9651377201080322, "learning_rate": 8.73170036347406e-06, "loss": 0.6788, "step": 4023 }, { "epoch": 0.25, "grad_norm": 0.8923559784889221, "learning_rate": 8.731017419640261e-06, "loss": 0.6376, "step": 4024 }, { "epoch": 0.26, "grad_norm": 0.9307226538658142, "learning_rate": 8.730334318706297e-06, "loss": 0.5993, "step": 4025 }, { "epoch": 0.26, "grad_norm": 0.9229474067687988, "learning_rate": 8.729651060700932e-06, "loss": 0.6617, "step": 4026 }, { "epoch": 0.26, "grad_norm": 0.863122284412384, "learning_rate": 8.728967645652936e-06, "loss": 0.5719, "step": 4027 }, { "epoch": 0.26, "grad_norm": 0.9152368903160095, "learning_rate": 8.728284073591083e-06, "loss": 0.6337, "step": 4028 }, { "epoch": 0.26, "grad_norm": 0.922824501991272, "learning_rate": 8.727600344544159e-06, "loss": 0.6418, "step": 4029 }, { "epoch": 0.26, "grad_norm": 0.8921812176704407, "learning_rate": 8.72691645854095e-06, "loss": 0.625, "step": 4030 }, { "epoch": 0.26, "grad_norm": 1.0182279348373413, "learning_rate": 8.726232415610257e-06, "loss": 0.6637, "step": 4031 }, { "epoch": 0.26, "grad_norm": 0.8648515343666077, "learning_rate": 8.725548215780877e-06, "loss": 0.5921, "step": 4032 }, { "epoch": 0.26, "grad_norm": 0.8446393609046936, "learning_rate": 8.724863859081622e-06, "loss": 0.6198, "step": 4033 }, { "epoch": 0.26, "grad_norm": 0.8602930307388306, "learning_rate": 8.724179345541308e-06, "loss": 0.5806, "step": 4034 }, { "epoch": 0.26, "grad_norm": 0.896755576133728, "learning_rate": 8.72349467518876e-06, "loss": 0.6625, "step": 4035 }, { "epoch": 0.26, "grad_norm": 0.8296254277229309, "learning_rate": 8.7228098480528e-06, "loss": 0.6138, "step": 4036 }, { "epoch": 0.26, "grad_norm": 0.9342056512832642, "learning_rate": 8.72212486416227e-06, "loss": 0.6454, "step": 4037 }, { "epoch": 0.26, "grad_norm": 0.9745578169822693, "learning_rate": 8.721439723546012e-06, "loss": 0.6671, "step": 4038 }, { "epoch": 0.26, "grad_norm": 0.9331424832344055, "learning_rate": 8.720754426232871e-06, "loss": 0.5942, "step": 4039 }, { "epoch": 0.26, "grad_norm": 0.9035102128982544, "learning_rate": 8.720068972251705e-06, "loss": 0.6128, "step": 4040 }, { "epoch": 0.26, "grad_norm": 0.8807538747787476, "learning_rate": 8.719383361631376e-06, "loss": 0.6637, "step": 4041 }, { "epoch": 0.26, "grad_norm": 0.8650099635124207, "learning_rate": 8.718697594400753e-06, "loss": 0.6391, "step": 4042 }, { "epoch": 0.26, "grad_norm": 0.8635523915290833, "learning_rate": 8.71801167058871e-06, "loss": 0.6161, "step": 4043 }, { "epoch": 0.26, "grad_norm": 0.8900404572486877, "learning_rate": 8.717325590224129e-06, "loss": 0.6197, "step": 4044 }, { "epoch": 0.26, "grad_norm": 0.9338827133178711, "learning_rate": 8.7166393533359e-06, "loss": 0.6293, "step": 4045 }, { "epoch": 0.26, "grad_norm": 0.9154714941978455, "learning_rate": 8.715952959952917e-06, "loss": 0.6291, "step": 4046 }, { "epoch": 0.26, "grad_norm": 0.8892160058021545, "learning_rate": 8.715266410104081e-06, "loss": 0.6401, "step": 4047 }, { "epoch": 0.26, "grad_norm": 0.8630048036575317, "learning_rate": 8.714579703818301e-06, "loss": 0.6683, "step": 4048 }, { "epoch": 0.26, "grad_norm": 0.8822508454322815, "learning_rate": 8.713892841124492e-06, "loss": 0.6048, "step": 4049 }, { "epoch": 0.26, "grad_norm": 0.9436633586883545, "learning_rate": 8.713205822051576e-06, "loss": 0.6598, "step": 4050 }, { "epoch": 0.26, "grad_norm": 0.8699237704277039, "learning_rate": 8.71251864662848e-06, "loss": 0.607, "step": 4051 }, { "epoch": 0.26, "grad_norm": 0.9008539915084839, "learning_rate": 8.711831314884137e-06, "loss": 0.6121, "step": 4052 }, { "epoch": 0.26, "grad_norm": 0.8727585077285767, "learning_rate": 8.711143826847491e-06, "loss": 0.6199, "step": 4053 }, { "epoch": 0.26, "grad_norm": 0.8655484914779663, "learning_rate": 8.71045618254749e-06, "loss": 0.58, "step": 4054 }, { "epoch": 0.26, "grad_norm": 0.9389612078666687, "learning_rate": 8.709768382013084e-06, "loss": 0.591, "step": 4055 }, { "epoch": 0.26, "grad_norm": 0.9785065650939941, "learning_rate": 8.709080425273238e-06, "loss": 0.6502, "step": 4056 }, { "epoch": 0.26, "grad_norm": 0.9361227750778198, "learning_rate": 8.708392312356919e-06, "loss": 0.6516, "step": 4057 }, { "epoch": 0.26, "grad_norm": 0.9632472395896912, "learning_rate": 8.7077040432931e-06, "loss": 0.7088, "step": 4058 }, { "epoch": 0.26, "grad_norm": 0.9708462953567505, "learning_rate": 8.707015618110761e-06, "loss": 0.6293, "step": 4059 }, { "epoch": 0.26, "grad_norm": 0.8739571571350098, "learning_rate": 8.706327036838891e-06, "loss": 0.6009, "step": 4060 }, { "epoch": 0.26, "grad_norm": 0.8531939387321472, "learning_rate": 8.705638299506482e-06, "loss": 0.5739, "step": 4061 }, { "epoch": 0.26, "grad_norm": 1.007936716079712, "learning_rate": 8.704949406142536e-06, "loss": 0.6615, "step": 4062 }, { "epoch": 0.26, "grad_norm": 0.9138129949569702, "learning_rate": 8.70426035677606e-06, "loss": 0.6759, "step": 4063 }, { "epoch": 0.26, "grad_norm": 0.8993487358093262, "learning_rate": 8.703571151436064e-06, "loss": 0.5449, "step": 4064 }, { "epoch": 0.26, "grad_norm": 0.9341960549354553, "learning_rate": 8.702881790151572e-06, "loss": 0.5835, "step": 4065 }, { "epoch": 0.26, "grad_norm": 0.9405590891838074, "learning_rate": 8.70219227295161e-06, "loss": 0.7077, "step": 4066 }, { "epoch": 0.26, "grad_norm": 0.8309204578399658, "learning_rate": 8.70150259986521e-06, "loss": 0.5906, "step": 4067 }, { "epoch": 0.26, "grad_norm": 0.9234378337860107, "learning_rate": 8.70081277092141e-06, "loss": 0.6099, "step": 4068 }, { "epoch": 0.26, "grad_norm": 0.9216628670692444, "learning_rate": 8.700122786149261e-06, "loss": 0.6042, "step": 4069 }, { "epoch": 0.26, "grad_norm": 1.0060338973999023, "learning_rate": 8.699432645577812e-06, "loss": 0.7019, "step": 4070 }, { "epoch": 0.26, "grad_norm": 0.8804594278335571, "learning_rate": 8.698742349236124e-06, "loss": 0.5648, "step": 4071 }, { "epoch": 0.26, "grad_norm": 0.9049243927001953, "learning_rate": 8.698051897153264e-06, "loss": 0.6575, "step": 4072 }, { "epoch": 0.26, "grad_norm": 0.8847574591636658, "learning_rate": 8.697361289358302e-06, "loss": 0.5955, "step": 4073 }, { "epoch": 0.26, "grad_norm": 0.943061351776123, "learning_rate": 8.696670525880318e-06, "loss": 0.6276, "step": 4074 }, { "epoch": 0.26, "grad_norm": 0.9238365292549133, "learning_rate": 8.695979606748398e-06, "loss": 0.5952, "step": 4075 }, { "epoch": 0.26, "grad_norm": 1.0495893955230713, "learning_rate": 8.695288531991633e-06, "loss": 0.6174, "step": 4076 }, { "epoch": 0.26, "grad_norm": 0.8936722278594971, "learning_rate": 8.694597301639125e-06, "loss": 0.6327, "step": 4077 }, { "epoch": 0.26, "grad_norm": 0.9051495790481567, "learning_rate": 8.693905915719976e-06, "loss": 0.6268, "step": 4078 }, { "epoch": 0.26, "grad_norm": 0.883945882320404, "learning_rate": 8.693214374263298e-06, "loss": 0.6083, "step": 4079 }, { "epoch": 0.26, "grad_norm": 0.8976554274559021, "learning_rate": 8.692522677298213e-06, "loss": 0.6267, "step": 4080 }, { "epoch": 0.26, "grad_norm": 0.8834015727043152, "learning_rate": 8.691830824853843e-06, "loss": 0.6268, "step": 4081 }, { "epoch": 0.26, "grad_norm": 0.8857308030128479, "learning_rate": 8.691138816959318e-06, "loss": 0.6356, "step": 4082 }, { "epoch": 0.26, "grad_norm": 0.9096359014511108, "learning_rate": 8.690446653643778e-06, "loss": 0.6125, "step": 4083 }, { "epoch": 0.26, "grad_norm": 0.9551771879196167, "learning_rate": 8.68975433493637e-06, "loss": 0.627, "step": 4084 }, { "epoch": 0.26, "grad_norm": 0.9362192749977112, "learning_rate": 8.689061860866242e-06, "loss": 0.5975, "step": 4085 }, { "epoch": 0.26, "grad_norm": 0.9172837734222412, "learning_rate": 8.68836923146255e-06, "loss": 0.6706, "step": 4086 }, { "epoch": 0.26, "grad_norm": 0.9365245699882507, "learning_rate": 8.687676446754464e-06, "loss": 0.6429, "step": 4087 }, { "epoch": 0.26, "grad_norm": 0.9222214221954346, "learning_rate": 8.686983506771149e-06, "loss": 0.6116, "step": 4088 }, { "epoch": 0.26, "grad_norm": 0.8810616731643677, "learning_rate": 8.686290411541785e-06, "loss": 0.5765, "step": 4089 }, { "epoch": 0.26, "grad_norm": 0.8715612888336182, "learning_rate": 8.685597161095555e-06, "loss": 0.5724, "step": 4090 }, { "epoch": 0.26, "grad_norm": 0.8744463920593262, "learning_rate": 8.68490375546165e-06, "loss": 0.5963, "step": 4091 }, { "epoch": 0.26, "grad_norm": 0.936255931854248, "learning_rate": 8.684210194669269e-06, "loss": 0.6308, "step": 4092 }, { "epoch": 0.26, "grad_norm": 0.9600224494934082, "learning_rate": 8.68351647874761e-06, "loss": 0.6213, "step": 4093 }, { "epoch": 0.26, "grad_norm": 0.9066085815429688, "learning_rate": 8.682822607725887e-06, "loss": 0.6384, "step": 4094 }, { "epoch": 0.26, "grad_norm": 0.9050360918045044, "learning_rate": 8.682128581633316e-06, "loss": 0.6109, "step": 4095 }, { "epoch": 0.26, "grad_norm": 0.8644648194313049, "learning_rate": 8.68143440049912e-06, "loss": 0.6181, "step": 4096 }, { "epoch": 0.26, "grad_norm": 0.8626109957695007, "learning_rate": 8.68074006435253e-06, "loss": 0.6159, "step": 4097 }, { "epoch": 0.26, "grad_norm": 0.9877548217773438, "learning_rate": 8.680045573222776e-06, "loss": 0.6638, "step": 4098 }, { "epoch": 0.26, "grad_norm": 0.9884246587753296, "learning_rate": 8.679350927139108e-06, "loss": 0.6608, "step": 4099 }, { "epoch": 0.26, "grad_norm": 0.8285159468650818, "learning_rate": 8.678656126130768e-06, "loss": 0.6032, "step": 4100 }, { "epoch": 0.26, "grad_norm": 0.9355902671813965, "learning_rate": 8.677961170227021e-06, "loss": 0.6451, "step": 4101 }, { "epoch": 0.26, "grad_norm": 0.8407034873962402, "learning_rate": 8.677266059457121e-06, "loss": 0.6093, "step": 4102 }, { "epoch": 0.26, "grad_norm": 0.8834094405174255, "learning_rate": 8.67657079385034e-06, "loss": 0.6294, "step": 4103 }, { "epoch": 0.26, "grad_norm": 0.953618049621582, "learning_rate": 8.675875373435951e-06, "loss": 0.5979, "step": 4104 }, { "epoch": 0.26, "grad_norm": 0.9017611742019653, "learning_rate": 8.67517979824324e-06, "loss": 0.5706, "step": 4105 }, { "epoch": 0.26, "grad_norm": 0.9118735194206238, "learning_rate": 8.674484068301492e-06, "loss": 0.6226, "step": 4106 }, { "epoch": 0.26, "grad_norm": 0.8959848284721375, "learning_rate": 8.673788183640001e-06, "loss": 0.6742, "step": 4107 }, { "epoch": 0.26, "grad_norm": 0.9677282571792603, "learning_rate": 8.673092144288071e-06, "loss": 0.6542, "step": 4108 }, { "epoch": 0.26, "grad_norm": 0.8986738324165344, "learning_rate": 8.672395950275008e-06, "loss": 0.6457, "step": 4109 }, { "epoch": 0.26, "grad_norm": 0.9157966375350952, "learning_rate": 8.671699601630127e-06, "loss": 0.6178, "step": 4110 }, { "epoch": 0.26, "grad_norm": 0.8645839095115662, "learning_rate": 8.67100309838275e-06, "loss": 0.5971, "step": 4111 }, { "epoch": 0.26, "grad_norm": 0.9088585376739502, "learning_rate": 8.670306440562202e-06, "loss": 0.635, "step": 4112 }, { "epoch": 0.26, "grad_norm": 0.8277181386947632, "learning_rate": 8.669609628197817e-06, "loss": 0.5686, "step": 4113 }, { "epoch": 0.26, "grad_norm": 0.9371722340583801, "learning_rate": 8.668912661318938e-06, "loss": 0.6229, "step": 4114 }, { "epoch": 0.26, "grad_norm": 0.8745486736297607, "learning_rate": 8.66821553995491e-06, "loss": 0.6389, "step": 4115 }, { "epoch": 0.26, "grad_norm": 0.8587163686752319, "learning_rate": 8.667518264135085e-06, "loss": 0.5837, "step": 4116 }, { "epoch": 0.26, "grad_norm": 0.895158588886261, "learning_rate": 8.666820833888825e-06, "loss": 0.5817, "step": 4117 }, { "epoch": 0.26, "grad_norm": 0.9290642738342285, "learning_rate": 8.666123249245495e-06, "loss": 0.6401, "step": 4118 }, { "epoch": 0.26, "grad_norm": 0.9100977778434753, "learning_rate": 8.665425510234469e-06, "loss": 0.6622, "step": 4119 }, { "epoch": 0.26, "grad_norm": 0.8745128512382507, "learning_rate": 8.664727616885126e-06, "loss": 0.613, "step": 4120 }, { "epoch": 0.26, "grad_norm": 0.8843961954116821, "learning_rate": 8.66402956922685e-06, "loss": 0.6118, "step": 4121 }, { "epoch": 0.26, "grad_norm": 0.9334408044815063, "learning_rate": 8.663331367289038e-06, "loss": 0.6604, "step": 4122 }, { "epoch": 0.26, "grad_norm": 0.8388084769248962, "learning_rate": 8.662633011101084e-06, "loss": 0.5691, "step": 4123 }, { "epoch": 0.26, "grad_norm": 0.8637480139732361, "learning_rate": 8.661934500692395e-06, "loss": 0.6299, "step": 4124 }, { "epoch": 0.26, "grad_norm": 0.8830687403678894, "learning_rate": 8.661235836092385e-06, "loss": 0.6879, "step": 4125 }, { "epoch": 0.26, "grad_norm": 0.91837477684021, "learning_rate": 8.660537017330468e-06, "loss": 0.6245, "step": 4126 }, { "epoch": 0.26, "grad_norm": 0.9373289346694946, "learning_rate": 8.659838044436074e-06, "loss": 0.6387, "step": 4127 }, { "epoch": 0.26, "grad_norm": 0.8295657634735107, "learning_rate": 8.65913891743863e-06, "loss": 0.546, "step": 4128 }, { "epoch": 0.26, "grad_norm": 0.8394411206245422, "learning_rate": 8.658439636367574e-06, "loss": 0.5775, "step": 4129 }, { "epoch": 0.26, "grad_norm": 0.9311953186988831, "learning_rate": 8.657740201252353e-06, "loss": 0.6699, "step": 4130 }, { "epoch": 0.26, "grad_norm": 0.9466168284416199, "learning_rate": 8.657040612122418e-06, "loss": 0.6419, "step": 4131 }, { "epoch": 0.26, "grad_norm": 0.9362534880638123, "learning_rate": 8.656340869007225e-06, "loss": 0.6982, "step": 4132 }, { "epoch": 0.26, "grad_norm": 0.9404389262199402, "learning_rate": 8.655640971936236e-06, "loss": 0.6242, "step": 4133 }, { "epoch": 0.26, "grad_norm": 0.9012186527252197, "learning_rate": 8.654940920938922e-06, "loss": 0.6187, "step": 4134 }, { "epoch": 0.26, "grad_norm": 0.8309886455535889, "learning_rate": 8.654240716044762e-06, "loss": 0.6226, "step": 4135 }, { "epoch": 0.26, "grad_norm": 0.9367273449897766, "learning_rate": 8.653540357283236e-06, "loss": 0.5919, "step": 4136 }, { "epoch": 0.26, "grad_norm": 0.8980950713157654, "learning_rate": 8.652839844683836e-06, "loss": 0.5913, "step": 4137 }, { "epoch": 0.26, "grad_norm": 0.8785884976387024, "learning_rate": 8.652139178276058e-06, "loss": 0.6348, "step": 4138 }, { "epoch": 0.26, "grad_norm": 0.8896494507789612, "learning_rate": 8.651438358089403e-06, "loss": 0.6578, "step": 4139 }, { "epoch": 0.26, "grad_norm": 0.9590379595756531, "learning_rate": 8.650737384153382e-06, "loss": 0.6917, "step": 4140 }, { "epoch": 0.26, "grad_norm": 0.9541071653366089, "learning_rate": 8.65003625649751e-06, "loss": 0.582, "step": 4141 }, { "epoch": 0.26, "grad_norm": 0.9491351246833801, "learning_rate": 8.649334975151307e-06, "loss": 0.6342, "step": 4142 }, { "epoch": 0.26, "grad_norm": 0.979164183139801, "learning_rate": 8.648633540144304e-06, "loss": 0.6439, "step": 4143 }, { "epoch": 0.26, "grad_norm": 0.8879642486572266, "learning_rate": 8.647931951506037e-06, "loss": 0.6109, "step": 4144 }, { "epoch": 0.26, "grad_norm": 0.8990030884742737, "learning_rate": 8.647230209266043e-06, "loss": 0.6334, "step": 4145 }, { "epoch": 0.26, "grad_norm": 0.9525482654571533, "learning_rate": 8.646528313453876e-06, "loss": 0.6203, "step": 4146 }, { "epoch": 0.26, "grad_norm": 0.8282102942466736, "learning_rate": 8.645826264099085e-06, "loss": 0.573, "step": 4147 }, { "epoch": 0.26, "grad_norm": 0.8854700922966003, "learning_rate": 8.645124061231234e-06, "loss": 0.6247, "step": 4148 }, { "epoch": 0.26, "grad_norm": 0.8921488523483276, "learning_rate": 8.644421704879889e-06, "loss": 0.5295, "step": 4149 }, { "epoch": 0.26, "grad_norm": 0.8470342755317688, "learning_rate": 8.643719195074622e-06, "loss": 0.5909, "step": 4150 }, { "epoch": 0.26, "grad_norm": 0.8630185127258301, "learning_rate": 8.643016531845017e-06, "loss": 0.6125, "step": 4151 }, { "epoch": 0.26, "grad_norm": 1.019774079322815, "learning_rate": 8.642313715220659e-06, "loss": 0.6089, "step": 4152 }, { "epoch": 0.26, "grad_norm": 0.8334149122238159, "learning_rate": 8.641610745231142e-06, "loss": 0.5966, "step": 4153 }, { "epoch": 0.26, "grad_norm": 0.8783389925956726, "learning_rate": 8.640907621906062e-06, "loss": 0.5849, "step": 4154 }, { "epoch": 0.26, "grad_norm": 0.9363436698913574, "learning_rate": 8.640204345275029e-06, "loss": 0.6535, "step": 4155 }, { "epoch": 0.26, "grad_norm": 0.9536002278327942, "learning_rate": 8.639500915367656e-06, "loss": 0.6491, "step": 4156 }, { "epoch": 0.26, "grad_norm": 0.9206741452217102, "learning_rate": 8.63879733221356e-06, "loss": 0.657, "step": 4157 }, { "epoch": 0.26, "grad_norm": 0.943328320980072, "learning_rate": 8.638093595842366e-06, "loss": 0.6666, "step": 4158 }, { "epoch": 0.26, "grad_norm": 0.9073593616485596, "learning_rate": 8.637389706283705e-06, "loss": 0.5944, "step": 4159 }, { "epoch": 0.26, "grad_norm": 0.9186743497848511, "learning_rate": 8.636685663567219e-06, "loss": 0.6469, "step": 4160 }, { "epoch": 0.26, "grad_norm": 0.8272576928138733, "learning_rate": 8.635981467722552e-06, "loss": 0.6093, "step": 4161 }, { "epoch": 0.26, "grad_norm": 0.827934741973877, "learning_rate": 8.635277118779353e-06, "loss": 0.5911, "step": 4162 }, { "epoch": 0.26, "grad_norm": 0.8880283832550049, "learning_rate": 8.63457261676728e-06, "loss": 0.6092, "step": 4163 }, { "epoch": 0.26, "grad_norm": 0.8852022886276245, "learning_rate": 8.633867961715998e-06, "loss": 0.5906, "step": 4164 }, { "epoch": 0.26, "grad_norm": 0.8944527506828308, "learning_rate": 8.633163153655178e-06, "loss": 0.6314, "step": 4165 }, { "epoch": 0.26, "grad_norm": 0.9245870113372803, "learning_rate": 8.632458192614495e-06, "loss": 0.6901, "step": 4166 }, { "epoch": 0.26, "grad_norm": 0.8997650146484375, "learning_rate": 8.631753078623635e-06, "loss": 0.5836, "step": 4167 }, { "epoch": 0.26, "grad_norm": 0.935129702091217, "learning_rate": 8.631047811712288e-06, "loss": 0.6776, "step": 4168 }, { "epoch": 0.26, "grad_norm": 0.9850293397903442, "learning_rate": 8.630342391910147e-06, "loss": 0.6637, "step": 4169 }, { "epoch": 0.26, "grad_norm": 0.9164685010910034, "learning_rate": 8.629636819246919e-06, "loss": 0.6207, "step": 4170 }, { "epoch": 0.26, "grad_norm": 0.8634175658226013, "learning_rate": 8.628931093752308e-06, "loss": 0.6029, "step": 4171 }, { "epoch": 0.26, "grad_norm": 0.8743361234664917, "learning_rate": 8.628225215456037e-06, "loss": 0.6149, "step": 4172 }, { "epoch": 0.26, "grad_norm": 0.9644536972045898, "learning_rate": 8.627519184387821e-06, "loss": 0.6623, "step": 4173 }, { "epoch": 0.26, "grad_norm": 0.9518513679504395, "learning_rate": 8.626813000577393e-06, "loss": 0.6665, "step": 4174 }, { "epoch": 0.26, "grad_norm": 0.9795065522193909, "learning_rate": 8.626106664054483e-06, "loss": 0.6404, "step": 4175 }, { "epoch": 0.26, "grad_norm": 0.8946532011032104, "learning_rate": 8.62540017484884e-06, "loss": 0.6109, "step": 4176 }, { "epoch": 0.26, "grad_norm": 0.8872295618057251, "learning_rate": 8.624693532990205e-06, "loss": 0.591, "step": 4177 }, { "epoch": 0.26, "grad_norm": 0.9337349534034729, "learning_rate": 8.623986738508334e-06, "loss": 0.641, "step": 4178 }, { "epoch": 0.26, "grad_norm": 0.8817663788795471, "learning_rate": 8.62327979143299e-06, "loss": 0.5987, "step": 4179 }, { "epoch": 0.26, "grad_norm": 0.9417575001716614, "learning_rate": 8.622572691793937e-06, "loss": 0.5693, "step": 4180 }, { "epoch": 0.26, "grad_norm": 0.8882385492324829, "learning_rate": 8.621865439620952e-06, "loss": 0.5992, "step": 4181 }, { "epoch": 0.26, "grad_norm": 0.8872155547142029, "learning_rate": 8.621158034943812e-06, "loss": 0.6055, "step": 4182 }, { "epoch": 0.27, "grad_norm": 0.8701667189598083, "learning_rate": 8.620450477792303e-06, "loss": 0.6059, "step": 4183 }, { "epoch": 0.27, "grad_norm": 0.8833332657814026, "learning_rate": 8.619742768196221e-06, "loss": 0.5834, "step": 4184 }, { "epoch": 0.27, "grad_norm": 0.9163500070571899, "learning_rate": 8.619034906185362e-06, "loss": 0.6927, "step": 4185 }, { "epoch": 0.27, "grad_norm": 0.9250738620758057, "learning_rate": 8.618326891789534e-06, "loss": 0.6408, "step": 4186 }, { "epoch": 0.27, "grad_norm": 0.9231948852539062, "learning_rate": 8.617618725038545e-06, "loss": 0.6151, "step": 4187 }, { "epoch": 0.27, "grad_norm": 0.8991936445236206, "learning_rate": 8.61691040596222e-06, "loss": 0.6433, "step": 4188 }, { "epoch": 0.27, "grad_norm": 0.9138967990875244, "learning_rate": 8.616201934590379e-06, "loss": 0.6513, "step": 4189 }, { "epoch": 0.27, "grad_norm": 0.9194620251655579, "learning_rate": 8.615493310952852e-06, "loss": 0.6536, "step": 4190 }, { "epoch": 0.27, "grad_norm": 0.888721227645874, "learning_rate": 8.614784535079482e-06, "loss": 0.606, "step": 4191 }, { "epoch": 0.27, "grad_norm": 0.9047959446907043, "learning_rate": 8.614075607000108e-06, "loss": 0.6485, "step": 4192 }, { "epoch": 0.27, "grad_norm": 0.9056040644645691, "learning_rate": 8.613366526744584e-06, "loss": 0.5843, "step": 4193 }, { "epoch": 0.27, "grad_norm": 0.9224606156349182, "learning_rate": 8.612657294342765e-06, "loss": 0.5978, "step": 4194 }, { "epoch": 0.27, "grad_norm": 0.9035705327987671, "learning_rate": 8.611947909824514e-06, "loss": 0.651, "step": 4195 }, { "epoch": 0.27, "grad_norm": 0.8923839330673218, "learning_rate": 8.611238373219703e-06, "loss": 0.5926, "step": 4196 }, { "epoch": 0.27, "grad_norm": 0.9223050475120544, "learning_rate": 8.610528684558206e-06, "loss": 0.5893, "step": 4197 }, { "epoch": 0.27, "grad_norm": 0.9211618900299072, "learning_rate": 8.609818843869907e-06, "loss": 0.6018, "step": 4198 }, { "epoch": 0.27, "grad_norm": 0.8177082538604736, "learning_rate": 8.609108851184693e-06, "loss": 0.587, "step": 4199 }, { "epoch": 0.27, "grad_norm": 0.8298165202140808, "learning_rate": 8.608398706532462e-06, "loss": 0.6308, "step": 4200 }, { "epoch": 0.27, "grad_norm": 0.8628758192062378, "learning_rate": 8.607688409943112e-06, "loss": 0.5662, "step": 4201 }, { "epoch": 0.27, "grad_norm": 0.8658290505409241, "learning_rate": 8.606977961446554e-06, "loss": 0.6113, "step": 4202 }, { "epoch": 0.27, "grad_norm": 0.9051910638809204, "learning_rate": 8.606267361072704e-06, "loss": 0.6256, "step": 4203 }, { "epoch": 0.27, "grad_norm": 0.8783097267150879, "learning_rate": 8.605556608851478e-06, "loss": 0.6607, "step": 4204 }, { "epoch": 0.27, "grad_norm": 0.9676861763000488, "learning_rate": 8.604845704812808e-06, "loss": 0.6564, "step": 4205 }, { "epoch": 0.27, "grad_norm": 0.9138243198394775, "learning_rate": 8.604134648986625e-06, "loss": 0.5926, "step": 4206 }, { "epoch": 0.27, "grad_norm": 0.9041840434074402, "learning_rate": 8.603423441402868e-06, "loss": 0.6202, "step": 4207 }, { "epoch": 0.27, "grad_norm": 0.8703333735466003, "learning_rate": 8.602712082091487e-06, "loss": 0.573, "step": 4208 }, { "epoch": 0.27, "grad_norm": 0.9118040204048157, "learning_rate": 8.602000571082432e-06, "loss": 0.6348, "step": 4209 }, { "epoch": 0.27, "grad_norm": 0.9517326354980469, "learning_rate": 8.601288908405665e-06, "loss": 0.622, "step": 4210 }, { "epoch": 0.27, "grad_norm": 0.9293259978294373, "learning_rate": 8.60057709409115e-06, "loss": 0.6272, "step": 4211 }, { "epoch": 0.27, "grad_norm": 0.8603157997131348, "learning_rate": 8.599865128168858e-06, "loss": 0.5833, "step": 4212 }, { "epoch": 0.27, "grad_norm": 0.8905279040336609, "learning_rate": 8.599153010668768e-06, "loss": 0.5917, "step": 4213 }, { "epoch": 0.27, "grad_norm": 0.9047275185585022, "learning_rate": 8.598440741620868e-06, "loss": 0.6405, "step": 4214 }, { "epoch": 0.27, "grad_norm": 0.8636517524719238, "learning_rate": 8.597728321055144e-06, "loss": 0.5763, "step": 4215 }, { "epoch": 0.27, "grad_norm": 0.8629072904586792, "learning_rate": 8.597015749001596e-06, "loss": 0.6013, "step": 4216 }, { "epoch": 0.27, "grad_norm": 0.8857645988464355, "learning_rate": 8.59630302549023e-06, "loss": 0.6191, "step": 4217 }, { "epoch": 0.27, "grad_norm": 0.9491539597511292, "learning_rate": 8.595590150551052e-06, "loss": 0.6271, "step": 4218 }, { "epoch": 0.27, "grad_norm": 0.9557621479034424, "learning_rate": 8.59487712421408e-06, "loss": 0.6135, "step": 4219 }, { "epoch": 0.27, "grad_norm": 0.9056437611579895, "learning_rate": 8.594163946509339e-06, "loss": 0.6211, "step": 4220 }, { "epoch": 0.27, "grad_norm": 0.8638589978218079, "learning_rate": 8.593450617466859e-06, "loss": 0.5999, "step": 4221 }, { "epoch": 0.27, "grad_norm": 0.9568116664886475, "learning_rate": 8.592737137116673e-06, "loss": 0.6038, "step": 4222 }, { "epoch": 0.27, "grad_norm": 0.9060722589492798, "learning_rate": 8.592023505488825e-06, "loss": 0.6373, "step": 4223 }, { "epoch": 0.27, "grad_norm": 0.7833012342453003, "learning_rate": 8.591309722613362e-06, "loss": 0.569, "step": 4224 }, { "epoch": 0.27, "grad_norm": 0.9138297438621521, "learning_rate": 8.590595788520342e-06, "loss": 0.5829, "step": 4225 }, { "epoch": 0.27, "grad_norm": 0.8410037755966187, "learning_rate": 8.589881703239821e-06, "loss": 0.5491, "step": 4226 }, { "epoch": 0.27, "grad_norm": 0.8916024565696716, "learning_rate": 8.58916746680187e-06, "loss": 0.6094, "step": 4227 }, { "epoch": 0.27, "grad_norm": 0.9920042157173157, "learning_rate": 8.588453079236565e-06, "loss": 0.6644, "step": 4228 }, { "epoch": 0.27, "grad_norm": 0.9212594032287598, "learning_rate": 8.587738540573984e-06, "loss": 0.5878, "step": 4229 }, { "epoch": 0.27, "grad_norm": 0.8286495804786682, "learning_rate": 8.587023850844212e-06, "loss": 0.6002, "step": 4230 }, { "epoch": 0.27, "grad_norm": 0.8914030194282532, "learning_rate": 8.586309010077345e-06, "loss": 0.6672, "step": 4231 }, { "epoch": 0.27, "grad_norm": 0.8013595342636108, "learning_rate": 8.585594018303482e-06, "loss": 0.6138, "step": 4232 }, { "epoch": 0.27, "grad_norm": 0.8565639853477478, "learning_rate": 8.584878875552727e-06, "loss": 0.6073, "step": 4233 }, { "epoch": 0.27, "grad_norm": 0.818520188331604, "learning_rate": 8.584163581855194e-06, "loss": 0.6158, "step": 4234 }, { "epoch": 0.27, "grad_norm": 0.9362378120422363, "learning_rate": 8.583448137241002e-06, "loss": 0.629, "step": 4235 }, { "epoch": 0.27, "grad_norm": 0.9456666111946106, "learning_rate": 8.582732541740273e-06, "loss": 0.617, "step": 4236 }, { "epoch": 0.27, "grad_norm": 0.8908970952033997, "learning_rate": 8.582016795383142e-06, "loss": 0.5931, "step": 4237 }, { "epoch": 0.27, "grad_norm": 0.8807900547981262, "learning_rate": 8.581300898199743e-06, "loss": 0.5685, "step": 4238 }, { "epoch": 0.27, "grad_norm": 0.8527096509933472, "learning_rate": 8.580584850220222e-06, "loss": 0.6016, "step": 4239 }, { "epoch": 0.27, "grad_norm": 0.942776620388031, "learning_rate": 8.57986865147473e-06, "loss": 0.5871, "step": 4240 }, { "epoch": 0.27, "grad_norm": 0.9495031237602234, "learning_rate": 8.57915230199342e-06, "loss": 0.6078, "step": 4241 }, { "epoch": 0.27, "grad_norm": 0.9065079092979431, "learning_rate": 8.578435801806461e-06, "loss": 0.6451, "step": 4242 }, { "epoch": 0.27, "grad_norm": 0.8677025437355042, "learning_rate": 8.577719150944017e-06, "loss": 0.6228, "step": 4243 }, { "epoch": 0.27, "grad_norm": 0.9314882755279541, "learning_rate": 8.577002349436264e-06, "loss": 0.5969, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.0232270956039429, "learning_rate": 8.57628539731339e-06, "loss": 0.6652, "step": 4245 }, { "epoch": 0.27, "grad_norm": 0.8840213418006897, "learning_rate": 8.575568294605574e-06, "loss": 0.6591, "step": 4246 }, { "epoch": 0.27, "grad_norm": 0.935551643371582, "learning_rate": 8.574851041343018e-06, "loss": 0.5936, "step": 4247 }, { "epoch": 0.27, "grad_norm": 0.9176490902900696, "learning_rate": 8.574133637555921e-06, "loss": 0.6103, "step": 4248 }, { "epoch": 0.27, "grad_norm": 0.8537380695343018, "learning_rate": 8.57341608327449e-06, "loss": 0.5831, "step": 4249 }, { "epoch": 0.27, "grad_norm": 0.8898982405662537, "learning_rate": 8.572698378528937e-06, "loss": 0.6522, "step": 4250 }, { "epoch": 0.27, "grad_norm": 0.8428791761398315, "learning_rate": 8.571980523349485e-06, "loss": 0.6097, "step": 4251 }, { "epoch": 0.27, "grad_norm": 0.9399141669273376, "learning_rate": 8.57126251776636e-06, "loss": 0.6514, "step": 4252 }, { "epoch": 0.27, "grad_norm": 0.9143974781036377, "learning_rate": 8.570544361809792e-06, "loss": 0.6807, "step": 4253 }, { "epoch": 0.27, "grad_norm": 0.9048095941543579, "learning_rate": 8.569826055510025e-06, "loss": 0.5986, "step": 4254 }, { "epoch": 0.27, "grad_norm": 0.8654329776763916, "learning_rate": 8.569107598897296e-06, "loss": 0.5274, "step": 4255 }, { "epoch": 0.27, "grad_norm": 0.9597179293632507, "learning_rate": 8.568388992001868e-06, "loss": 0.5958, "step": 4256 }, { "epoch": 0.27, "grad_norm": 0.8860706090927124, "learning_rate": 8.56767023485399e-06, "loss": 0.5915, "step": 4257 }, { "epoch": 0.27, "grad_norm": 0.8715736269950867, "learning_rate": 8.56695132748393e-06, "loss": 0.6533, "step": 4258 }, { "epoch": 0.27, "grad_norm": 0.9161938428878784, "learning_rate": 8.566232269921957e-06, "loss": 0.7043, "step": 4259 }, { "epoch": 0.27, "grad_norm": 0.8063193559646606, "learning_rate": 8.565513062198351e-06, "loss": 0.6129, "step": 4260 }, { "epoch": 0.27, "grad_norm": 0.9243603944778442, "learning_rate": 8.564793704343392e-06, "loss": 0.5744, "step": 4261 }, { "epoch": 0.27, "grad_norm": 0.9865625500679016, "learning_rate": 8.564074196387371e-06, "loss": 0.6796, "step": 4262 }, { "epoch": 0.27, "grad_norm": 0.8942394256591797, "learning_rate": 8.563354538360585e-06, "loss": 0.6083, "step": 4263 }, { "epoch": 0.27, "grad_norm": 0.9242582321166992, "learning_rate": 8.562634730293335e-06, "loss": 0.5982, "step": 4264 }, { "epoch": 0.27, "grad_norm": 0.9217408895492554, "learning_rate": 8.56191477221593e-06, "loss": 0.6569, "step": 4265 }, { "epoch": 0.27, "grad_norm": 0.9580654501914978, "learning_rate": 8.561194664158685e-06, "loss": 0.6733, "step": 4266 }, { "epoch": 0.27, "grad_norm": 0.9393530488014221, "learning_rate": 8.560474406151921e-06, "loss": 0.668, "step": 4267 }, { "epoch": 0.27, "grad_norm": 0.9454185962677002, "learning_rate": 8.559753998225965e-06, "loss": 0.6592, "step": 4268 }, { "epoch": 0.27, "grad_norm": 0.9573476910591125, "learning_rate": 8.559033440411155e-06, "loss": 0.5933, "step": 4269 }, { "epoch": 0.27, "grad_norm": 0.8191101551055908, "learning_rate": 8.558312732737825e-06, "loss": 0.5713, "step": 4270 }, { "epoch": 0.27, "grad_norm": 0.8455954790115356, "learning_rate": 8.557591875236323e-06, "loss": 0.5984, "step": 4271 }, { "epoch": 0.27, "grad_norm": 0.8731233477592468, "learning_rate": 8.556870867937006e-06, "loss": 0.5876, "step": 4272 }, { "epoch": 0.27, "grad_norm": 0.8729333281517029, "learning_rate": 8.55614971087023e-06, "loss": 0.6102, "step": 4273 }, { "epoch": 0.27, "grad_norm": 0.9293901324272156, "learning_rate": 8.555428404066359e-06, "loss": 0.6141, "step": 4274 }, { "epoch": 0.27, "grad_norm": 0.8134398460388184, "learning_rate": 8.554706947555766e-06, "loss": 0.5814, "step": 4275 }, { "epoch": 0.27, "grad_norm": 0.9086621999740601, "learning_rate": 8.553985341368832e-06, "loss": 0.6756, "step": 4276 }, { "epoch": 0.27, "grad_norm": 0.8340302109718323, "learning_rate": 8.553263585535937e-06, "loss": 0.6272, "step": 4277 }, { "epoch": 0.27, "grad_norm": 0.9644330143928528, "learning_rate": 8.552541680087472e-06, "loss": 0.611, "step": 4278 }, { "epoch": 0.27, "grad_norm": 0.9474432468414307, "learning_rate": 8.551819625053837e-06, "loss": 0.6581, "step": 4279 }, { "epoch": 0.27, "grad_norm": 0.8727615475654602, "learning_rate": 8.551097420465432e-06, "loss": 0.6059, "step": 4280 }, { "epoch": 0.27, "grad_norm": 0.9292715191841125, "learning_rate": 8.55037506635267e-06, "loss": 0.5987, "step": 4281 }, { "epoch": 0.27, "grad_norm": 0.8968216180801392, "learning_rate": 8.549652562745963e-06, "loss": 0.6109, "step": 4282 }, { "epoch": 0.27, "grad_norm": 0.919104278087616, "learning_rate": 8.548929909675736e-06, "loss": 0.6043, "step": 4283 }, { "epoch": 0.27, "grad_norm": 0.963595449924469, "learning_rate": 8.548207107172417e-06, "loss": 0.6421, "step": 4284 }, { "epoch": 0.27, "grad_norm": 0.9195282459259033, "learning_rate": 8.547484155266439e-06, "loss": 0.6284, "step": 4285 }, { "epoch": 0.27, "grad_norm": 0.9050331711769104, "learning_rate": 8.546761053988244e-06, "loss": 0.6787, "step": 4286 }, { "epoch": 0.27, "grad_norm": 0.8294732570648193, "learning_rate": 8.546037803368279e-06, "loss": 0.5982, "step": 4287 }, { "epoch": 0.27, "grad_norm": 0.8532490134239197, "learning_rate": 8.545314403436998e-06, "loss": 0.5664, "step": 4288 }, { "epoch": 0.27, "grad_norm": 0.9732022881507874, "learning_rate": 8.54459085422486e-06, "loss": 0.6, "step": 4289 }, { "epoch": 0.27, "grad_norm": 0.9613706469535828, "learning_rate": 8.543867155762335e-06, "loss": 0.6525, "step": 4290 }, { "epoch": 0.27, "grad_norm": 0.9835689663887024, "learning_rate": 8.543143308079888e-06, "loss": 0.6368, "step": 4291 }, { "epoch": 0.27, "grad_norm": 0.857182502746582, "learning_rate": 8.542419311208006e-06, "loss": 0.6265, "step": 4292 }, { "epoch": 0.27, "grad_norm": 0.8491384983062744, "learning_rate": 8.541695165177169e-06, "loss": 0.664, "step": 4293 }, { "epoch": 0.27, "grad_norm": 0.9267544150352478, "learning_rate": 8.540970870017867e-06, "loss": 0.6202, "step": 4294 }, { "epoch": 0.27, "grad_norm": 0.9041336178779602, "learning_rate": 8.540246425760602e-06, "loss": 0.5934, "step": 4295 }, { "epoch": 0.27, "grad_norm": 0.9102574586868286, "learning_rate": 8.539521832435874e-06, "loss": 0.5931, "step": 4296 }, { "epoch": 0.27, "grad_norm": 0.8750420212745667, "learning_rate": 8.538797090074196e-06, "loss": 0.6128, "step": 4297 }, { "epoch": 0.27, "grad_norm": 0.9216861724853516, "learning_rate": 8.538072198706081e-06, "loss": 0.6311, "step": 4298 }, { "epoch": 0.27, "grad_norm": 0.8805850744247437, "learning_rate": 8.537347158362056e-06, "loss": 0.58, "step": 4299 }, { "epoch": 0.27, "grad_norm": 0.8909803032875061, "learning_rate": 8.536621969072648e-06, "loss": 0.607, "step": 4300 }, { "epoch": 0.27, "grad_norm": 0.9267565608024597, "learning_rate": 8.53589663086839e-06, "loss": 0.6457, "step": 4301 }, { "epoch": 0.27, "grad_norm": 0.9968888759613037, "learning_rate": 8.535171143779828e-06, "loss": 0.6252, "step": 4302 }, { "epoch": 0.27, "grad_norm": 0.8970872163772583, "learning_rate": 8.534445507837505e-06, "loss": 0.6065, "step": 4303 }, { "epoch": 0.27, "grad_norm": 0.9261126518249512, "learning_rate": 8.533719723071979e-06, "loss": 0.6377, "step": 4304 }, { "epoch": 0.27, "grad_norm": 0.9060932993888855, "learning_rate": 8.532993789513805e-06, "loss": 0.6167, "step": 4305 }, { "epoch": 0.27, "grad_norm": 0.9795500636100769, "learning_rate": 8.532267707193555e-06, "loss": 0.6384, "step": 4306 }, { "epoch": 0.27, "grad_norm": 0.8952150940895081, "learning_rate": 8.5315414761418e-06, "loss": 0.6448, "step": 4307 }, { "epoch": 0.27, "grad_norm": 0.9257222414016724, "learning_rate": 8.530815096389118e-06, "loss": 0.5725, "step": 4308 }, { "epoch": 0.27, "grad_norm": 0.871077299118042, "learning_rate": 8.530088567966095e-06, "loss": 0.6262, "step": 4309 }, { "epoch": 0.27, "grad_norm": 0.8593372702598572, "learning_rate": 8.529361890903323e-06, "loss": 0.5855, "step": 4310 }, { "epoch": 0.27, "grad_norm": 0.9580448865890503, "learning_rate": 8.5286350652314e-06, "loss": 0.6397, "step": 4311 }, { "epoch": 0.27, "grad_norm": 0.8802589774131775, "learning_rate": 8.527908090980929e-06, "loss": 0.6593, "step": 4312 }, { "epoch": 0.27, "grad_norm": 0.9041280746459961, "learning_rate": 8.527180968182522e-06, "loss": 0.5961, "step": 4313 }, { "epoch": 0.27, "grad_norm": 0.8729889988899231, "learning_rate": 8.526453696866794e-06, "loss": 0.6, "step": 4314 }, { "epoch": 0.27, "grad_norm": 0.8576443195343018, "learning_rate": 8.525726277064368e-06, "loss": 0.5911, "step": 4315 }, { "epoch": 0.27, "grad_norm": 0.8359036445617676, "learning_rate": 8.524998708805874e-06, "loss": 0.5723, "step": 4316 }, { "epoch": 0.27, "grad_norm": 0.8947839736938477, "learning_rate": 8.524270992121948e-06, "loss": 0.6163, "step": 4317 }, { "epoch": 0.27, "grad_norm": 0.9303499460220337, "learning_rate": 8.523543127043228e-06, "loss": 0.6144, "step": 4318 }, { "epoch": 0.27, "grad_norm": 0.8773894309997559, "learning_rate": 8.522815113600366e-06, "loss": 0.5884, "step": 4319 }, { "epoch": 0.27, "grad_norm": 0.9222464561462402, "learning_rate": 8.522086951824014e-06, "loss": 0.6819, "step": 4320 }, { "epoch": 0.27, "grad_norm": 0.8709927797317505, "learning_rate": 8.521358641744834e-06, "loss": 0.5886, "step": 4321 }, { "epoch": 0.27, "grad_norm": 0.8806871175765991, "learning_rate": 8.520630183393492e-06, "loss": 0.616, "step": 4322 }, { "epoch": 0.27, "grad_norm": 0.9203693866729736, "learning_rate": 8.519901576800657e-06, "loss": 0.6442, "step": 4323 }, { "epoch": 0.27, "grad_norm": 0.9157525300979614, "learning_rate": 8.519172821997015e-06, "loss": 0.57, "step": 4324 }, { "epoch": 0.27, "grad_norm": 0.8757469058036804, "learning_rate": 8.518443919013247e-06, "loss": 0.625, "step": 4325 }, { "epoch": 0.27, "grad_norm": 0.8523043394088745, "learning_rate": 8.517714867880044e-06, "loss": 0.5748, "step": 4326 }, { "epoch": 0.27, "grad_norm": 0.8662055730819702, "learning_rate": 8.516985668628105e-06, "loss": 0.5595, "step": 4327 }, { "epoch": 0.27, "grad_norm": 0.8649899363517761, "learning_rate": 8.516256321288136e-06, "loss": 0.5697, "step": 4328 }, { "epoch": 0.27, "grad_norm": 0.8986943960189819, "learning_rate": 8.515526825890845e-06, "loss": 0.5607, "step": 4329 }, { "epoch": 0.27, "grad_norm": 0.9603455066680908, "learning_rate": 8.514797182466948e-06, "loss": 0.5942, "step": 4330 }, { "epoch": 0.27, "grad_norm": 0.9389190673828125, "learning_rate": 8.51406739104717e-06, "loss": 0.6389, "step": 4331 }, { "epoch": 0.27, "grad_norm": 0.9618402123451233, "learning_rate": 8.513337451662238e-06, "loss": 0.6588, "step": 4332 }, { "epoch": 0.27, "grad_norm": 0.9515010118484497, "learning_rate": 8.512607364342887e-06, "loss": 0.6097, "step": 4333 }, { "epoch": 0.27, "grad_norm": 0.8656193017959595, "learning_rate": 8.51187712911986e-06, "loss": 0.597, "step": 4334 }, { "epoch": 0.27, "grad_norm": 0.9110217094421387, "learning_rate": 8.511146746023905e-06, "loss": 0.5888, "step": 4335 }, { "epoch": 0.27, "grad_norm": 0.8885056376457214, "learning_rate": 8.510416215085775e-06, "loss": 0.6293, "step": 4336 }, { "epoch": 0.27, "grad_norm": 0.8254531621932983, "learning_rate": 8.509685536336229e-06, "loss": 0.5644, "step": 4337 }, { "epoch": 0.27, "grad_norm": 0.8862583041191101, "learning_rate": 8.508954709806034e-06, "loss": 0.633, "step": 4338 }, { "epoch": 0.27, "grad_norm": 0.9127135872840881, "learning_rate": 8.508223735525963e-06, "loss": 0.624, "step": 4339 }, { "epoch": 0.27, "grad_norm": 0.9787098169326782, "learning_rate": 8.507492613526795e-06, "loss": 0.6342, "step": 4340 }, { "epoch": 0.28, "grad_norm": 0.844140887260437, "learning_rate": 8.506761343839316e-06, "loss": 0.6042, "step": 4341 }, { "epoch": 0.28, "grad_norm": 0.9551699757575989, "learning_rate": 8.506029926494315e-06, "loss": 0.6294, "step": 4342 }, { "epoch": 0.28, "grad_norm": 0.8815372586250305, "learning_rate": 8.50529836152259e-06, "loss": 0.678, "step": 4343 }, { "epoch": 0.28, "grad_norm": 0.841645359992981, "learning_rate": 8.504566648954947e-06, "loss": 0.5792, "step": 4344 }, { "epoch": 0.28, "grad_norm": 0.8906237483024597, "learning_rate": 8.503834788822191e-06, "loss": 0.6074, "step": 4345 }, { "epoch": 0.28, "grad_norm": 0.871210515499115, "learning_rate": 8.503102781155141e-06, "loss": 0.5929, "step": 4346 }, { "epoch": 0.28, "grad_norm": 0.823668897151947, "learning_rate": 8.502370625984622e-06, "loss": 0.5886, "step": 4347 }, { "epoch": 0.28, "grad_norm": 0.9484293460845947, "learning_rate": 8.501638323341459e-06, "loss": 0.6557, "step": 4348 }, { "epoch": 0.28, "grad_norm": 0.8655977249145508, "learning_rate": 8.500905873256486e-06, "loss": 0.5899, "step": 4349 }, { "epoch": 0.28, "grad_norm": 0.9463286399841309, "learning_rate": 8.500173275760546e-06, "loss": 0.6128, "step": 4350 }, { "epoch": 0.28, "grad_norm": 0.8562267422676086, "learning_rate": 8.499440530884486e-06, "loss": 0.5932, "step": 4351 }, { "epoch": 0.28, "grad_norm": 0.9182244539260864, "learning_rate": 8.498707638659159e-06, "loss": 0.6024, "step": 4352 }, { "epoch": 0.28, "grad_norm": 0.8319056034088135, "learning_rate": 8.497974599115424e-06, "loss": 0.5626, "step": 4353 }, { "epoch": 0.28, "grad_norm": 0.9287349581718445, "learning_rate": 8.497241412284147e-06, "loss": 0.6092, "step": 4354 }, { "epoch": 0.28, "grad_norm": 0.8886022567749023, "learning_rate": 8.496508078196202e-06, "loss": 0.6414, "step": 4355 }, { "epoch": 0.28, "grad_norm": 0.916700005531311, "learning_rate": 8.495774596882462e-06, "loss": 0.5731, "step": 4356 }, { "epoch": 0.28, "grad_norm": 0.8386786580085754, "learning_rate": 8.495040968373817e-06, "loss": 0.6356, "step": 4357 }, { "epoch": 0.28, "grad_norm": 0.8589484095573425, "learning_rate": 8.494307192701154e-06, "loss": 0.5783, "step": 4358 }, { "epoch": 0.28, "grad_norm": 0.882973849773407, "learning_rate": 8.493573269895372e-06, "loss": 0.5763, "step": 4359 }, { "epoch": 0.28, "grad_norm": 0.8396306037902832, "learning_rate": 8.492839199987373e-06, "loss": 0.5836, "step": 4360 }, { "epoch": 0.28, "grad_norm": 0.8653340935707092, "learning_rate": 8.492104983008065e-06, "loss": 0.5815, "step": 4361 }, { "epoch": 0.28, "grad_norm": 0.8777982592582703, "learning_rate": 8.491370618988367e-06, "loss": 0.5753, "step": 4362 }, { "epoch": 0.28, "grad_norm": 0.9289289116859436, "learning_rate": 8.490636107959194e-06, "loss": 0.5963, "step": 4363 }, { "epoch": 0.28, "grad_norm": 0.9735289216041565, "learning_rate": 8.489901449951478e-06, "loss": 0.6477, "step": 4364 }, { "epoch": 0.28, "grad_norm": 0.9543069005012512, "learning_rate": 8.489166644996154e-06, "loss": 0.6315, "step": 4365 }, { "epoch": 0.28, "grad_norm": 0.9369723200798035, "learning_rate": 8.48843169312416e-06, "loss": 0.6351, "step": 4366 }, { "epoch": 0.28, "grad_norm": 0.9586085677146912, "learning_rate": 8.487696594366444e-06, "loss": 0.6317, "step": 4367 }, { "epoch": 0.28, "grad_norm": 0.8085949420928955, "learning_rate": 8.486961348753954e-06, "loss": 0.5292, "step": 4368 }, { "epoch": 0.28, "grad_norm": 0.9245449900627136, "learning_rate": 8.486225956317655e-06, "loss": 0.6529, "step": 4369 }, { "epoch": 0.28, "grad_norm": 0.8826268315315247, "learning_rate": 8.48549041708851e-06, "loss": 0.5822, "step": 4370 }, { "epoch": 0.28, "grad_norm": 0.8296921253204346, "learning_rate": 8.484754731097484e-06, "loss": 0.565, "step": 4371 }, { "epoch": 0.28, "grad_norm": 0.8971067667007446, "learning_rate": 8.484018898375561e-06, "loss": 0.606, "step": 4372 }, { "epoch": 0.28, "grad_norm": 0.8723403215408325, "learning_rate": 8.483282918953723e-06, "loss": 0.6579, "step": 4373 }, { "epoch": 0.28, "grad_norm": 0.9097625613212585, "learning_rate": 8.482546792862957e-06, "loss": 0.6365, "step": 4374 }, { "epoch": 0.28, "grad_norm": 0.8853545784950256, "learning_rate": 8.481810520134262e-06, "loss": 0.5961, "step": 4375 }, { "epoch": 0.28, "grad_norm": 0.8926584124565125, "learning_rate": 8.481074100798638e-06, "loss": 0.6374, "step": 4376 }, { "epoch": 0.28, "grad_norm": 0.9190264940261841, "learning_rate": 8.480337534887093e-06, "loss": 0.6332, "step": 4377 }, { "epoch": 0.28, "grad_norm": 0.9103266596794128, "learning_rate": 8.479600822430642e-06, "loss": 0.6575, "step": 4378 }, { "epoch": 0.28, "grad_norm": 0.8518051505088806, "learning_rate": 8.478863963460306e-06, "loss": 0.5637, "step": 4379 }, { "epoch": 0.28, "grad_norm": 0.8869740962982178, "learning_rate": 8.478126958007108e-06, "loss": 0.6089, "step": 4380 }, { "epoch": 0.28, "grad_norm": 0.8450909852981567, "learning_rate": 8.477389806102085e-06, "loss": 0.6446, "step": 4381 }, { "epoch": 0.28, "grad_norm": 0.9005980491638184, "learning_rate": 8.476652507776274e-06, "loss": 0.5715, "step": 4382 }, { "epoch": 0.28, "grad_norm": 0.8654862642288208, "learning_rate": 8.475915063060721e-06, "loss": 0.6625, "step": 4383 }, { "epoch": 0.28, "grad_norm": 0.9093218445777893, "learning_rate": 8.475177471986476e-06, "loss": 0.6045, "step": 4384 }, { "epoch": 0.28, "grad_norm": 0.9266924858093262, "learning_rate": 8.474439734584597e-06, "loss": 0.611, "step": 4385 }, { "epoch": 0.28, "grad_norm": 0.9059037566184998, "learning_rate": 8.473701850886147e-06, "loss": 0.6082, "step": 4386 }, { "epoch": 0.28, "grad_norm": 0.8820655941963196, "learning_rate": 8.472963820922195e-06, "loss": 0.5618, "step": 4387 }, { "epoch": 0.28, "grad_norm": 0.9292760491371155, "learning_rate": 8.47222564472382e-06, "loss": 0.636, "step": 4388 }, { "epoch": 0.28, "grad_norm": 0.8835957050323486, "learning_rate": 8.471487322322101e-06, "loss": 0.5778, "step": 4389 }, { "epoch": 0.28, "grad_norm": 0.8266465067863464, "learning_rate": 8.47074885374813e-06, "loss": 0.6343, "step": 4390 }, { "epoch": 0.28, "grad_norm": 0.894709587097168, "learning_rate": 8.470010239032995e-06, "loss": 0.6356, "step": 4391 }, { "epoch": 0.28, "grad_norm": 0.8928598761558533, "learning_rate": 8.469271478207801e-06, "loss": 0.5714, "step": 4392 }, { "epoch": 0.28, "grad_norm": 0.8108189702033997, "learning_rate": 8.468532571303655e-06, "loss": 0.5671, "step": 4393 }, { "epoch": 0.28, "grad_norm": 0.9048933386802673, "learning_rate": 8.467793518351668e-06, "loss": 0.6443, "step": 4394 }, { "epoch": 0.28, "grad_norm": 0.9767211675643921, "learning_rate": 8.46705431938296e-06, "loss": 0.6349, "step": 4395 }, { "epoch": 0.28, "grad_norm": 0.8677191138267517, "learning_rate": 8.466314974428655e-06, "loss": 0.6328, "step": 4396 }, { "epoch": 0.28, "grad_norm": 0.8989687561988831, "learning_rate": 8.465575483519883e-06, "loss": 0.5977, "step": 4397 }, { "epoch": 0.28, "grad_norm": 0.8818314075469971, "learning_rate": 8.464835846687786e-06, "loss": 0.6441, "step": 4398 }, { "epoch": 0.28, "grad_norm": 0.8356281518936157, "learning_rate": 8.464096063963503e-06, "loss": 0.5723, "step": 4399 }, { "epoch": 0.28, "grad_norm": 0.9221736192703247, "learning_rate": 8.463356135378187e-06, "loss": 0.5863, "step": 4400 }, { "epoch": 0.28, "grad_norm": 0.9067344069480896, "learning_rate": 8.462616060962992e-06, "loss": 0.6029, "step": 4401 }, { "epoch": 0.28, "grad_norm": 0.9068452715873718, "learning_rate": 8.46187584074908e-06, "loss": 0.6686, "step": 4402 }, { "epoch": 0.28, "grad_norm": 0.8604983687400818, "learning_rate": 8.461135474767618e-06, "loss": 0.6051, "step": 4403 }, { "epoch": 0.28, "grad_norm": 0.969758152961731, "learning_rate": 8.460394963049784e-06, "loss": 0.6334, "step": 4404 }, { "epoch": 0.28, "grad_norm": 0.8745808005332947, "learning_rate": 8.459654305626754e-06, "loss": 0.6052, "step": 4405 }, { "epoch": 0.28, "grad_norm": 0.8724889755249023, "learning_rate": 8.458913502529718e-06, "loss": 0.6038, "step": 4406 }, { "epoch": 0.28, "grad_norm": 0.977708101272583, "learning_rate": 8.458172553789866e-06, "loss": 0.646, "step": 4407 }, { "epoch": 0.28, "grad_norm": 0.900845468044281, "learning_rate": 8.457431459438398e-06, "loss": 0.6228, "step": 4408 }, { "epoch": 0.28, "grad_norm": 0.9241088032722473, "learning_rate": 8.456690219506519e-06, "loss": 0.5887, "step": 4409 }, { "epoch": 0.28, "grad_norm": 0.8947976231575012, "learning_rate": 8.45594883402544e-06, "loss": 0.6179, "step": 4410 }, { "epoch": 0.28, "grad_norm": 0.9319069385528564, "learning_rate": 8.455207303026378e-06, "loss": 0.6356, "step": 4411 }, { "epoch": 0.28, "grad_norm": 0.8791349530220032, "learning_rate": 8.454465626540555e-06, "loss": 0.5906, "step": 4412 }, { "epoch": 0.28, "grad_norm": 0.9056016802787781, "learning_rate": 8.453723804599203e-06, "loss": 0.6095, "step": 4413 }, { "epoch": 0.28, "grad_norm": 0.9093009233474731, "learning_rate": 8.452981837233555e-06, "loss": 0.6442, "step": 4414 }, { "epoch": 0.28, "grad_norm": 0.9653396010398865, "learning_rate": 8.452239724474856e-06, "loss": 0.6397, "step": 4415 }, { "epoch": 0.28, "grad_norm": 0.9115119576454163, "learning_rate": 8.451497466354349e-06, "loss": 0.5723, "step": 4416 }, { "epoch": 0.28, "grad_norm": 0.9298482537269592, "learning_rate": 8.450755062903293e-06, "loss": 0.6244, "step": 4417 }, { "epoch": 0.28, "grad_norm": 0.8901708126068115, "learning_rate": 8.450012514152943e-06, "loss": 0.6238, "step": 4418 }, { "epoch": 0.28, "grad_norm": 0.8972589373588562, "learning_rate": 8.44926982013457e-06, "loss": 0.6162, "step": 4419 }, { "epoch": 0.28, "grad_norm": 0.8598697185516357, "learning_rate": 8.448526980879444e-06, "loss": 0.5909, "step": 4420 }, { "epoch": 0.28, "grad_norm": 1.0167523622512817, "learning_rate": 8.447783996418843e-06, "loss": 0.6784, "step": 4421 }, { "epoch": 0.28, "grad_norm": 0.8606759905815125, "learning_rate": 8.447040866784051e-06, "loss": 0.5985, "step": 4422 }, { "epoch": 0.28, "grad_norm": 0.9100238084793091, "learning_rate": 8.446297592006361e-06, "loss": 0.5486, "step": 4423 }, { "epoch": 0.28, "grad_norm": 0.864998996257782, "learning_rate": 8.445554172117066e-06, "loss": 0.6308, "step": 4424 }, { "epoch": 0.28, "grad_norm": 0.8984532356262207, "learning_rate": 8.444810607147472e-06, "loss": 0.5894, "step": 4425 }, { "epoch": 0.28, "grad_norm": 0.8566537499427795, "learning_rate": 8.444066897128888e-06, "loss": 0.5764, "step": 4426 }, { "epoch": 0.28, "grad_norm": 0.8784050941467285, "learning_rate": 8.443323042092625e-06, "loss": 0.5923, "step": 4427 }, { "epoch": 0.28, "grad_norm": 0.9064181447029114, "learning_rate": 8.442579042070011e-06, "loss": 0.6279, "step": 4428 }, { "epoch": 0.28, "grad_norm": 0.8186553120613098, "learning_rate": 8.441834897092366e-06, "loss": 0.6041, "step": 4429 }, { "epoch": 0.28, "grad_norm": 0.9280451536178589, "learning_rate": 8.44109060719103e-06, "loss": 0.5901, "step": 4430 }, { "epoch": 0.28, "grad_norm": 0.9555798172950745, "learning_rate": 8.440346172397338e-06, "loss": 0.672, "step": 4431 }, { "epoch": 0.28, "grad_norm": 0.8926699161529541, "learning_rate": 8.439601592742637e-06, "loss": 0.6645, "step": 4432 }, { "epoch": 0.28, "grad_norm": 0.8857988119125366, "learning_rate": 8.438856868258278e-06, "loss": 0.6439, "step": 4433 }, { "epoch": 0.28, "grad_norm": 0.8523682951927185, "learning_rate": 8.438111998975618e-06, "loss": 0.6044, "step": 4434 }, { "epoch": 0.28, "grad_norm": 0.8690520524978638, "learning_rate": 8.437366984926023e-06, "loss": 0.618, "step": 4435 }, { "epoch": 0.28, "grad_norm": 0.8861067891120911, "learning_rate": 8.436621826140863e-06, "loss": 0.617, "step": 4436 }, { "epoch": 0.28, "grad_norm": 0.8998048901557922, "learning_rate": 8.435876522651512e-06, "loss": 0.6881, "step": 4437 }, { "epoch": 0.28, "grad_norm": 1.0284022092819214, "learning_rate": 8.435131074489353e-06, "loss": 0.6871, "step": 4438 }, { "epoch": 0.28, "grad_norm": 0.8755271434783936, "learning_rate": 8.434385481685776e-06, "loss": 0.5637, "step": 4439 }, { "epoch": 0.28, "grad_norm": 0.9131196737289429, "learning_rate": 8.43363974427217e-06, "loss": 0.6516, "step": 4440 }, { "epoch": 0.28, "grad_norm": 0.8995763063430786, "learning_rate": 8.432893862279943e-06, "loss": 0.5847, "step": 4441 }, { "epoch": 0.28, "grad_norm": 0.923299252986908, "learning_rate": 8.432147835740496e-06, "loss": 0.6213, "step": 4442 }, { "epoch": 0.28, "grad_norm": 0.9042030572891235, "learning_rate": 8.431401664685244e-06, "loss": 0.6172, "step": 4443 }, { "epoch": 0.28, "grad_norm": 0.963955283164978, "learning_rate": 8.430655349145604e-06, "loss": 0.6221, "step": 4444 }, { "epoch": 0.28, "grad_norm": 0.9096510410308838, "learning_rate": 8.429908889153003e-06, "loss": 0.6646, "step": 4445 }, { "epoch": 0.28, "grad_norm": 0.8882843852043152, "learning_rate": 8.429162284738868e-06, "loss": 0.6382, "step": 4446 }, { "epoch": 0.28, "grad_norm": 0.8437566757202148, "learning_rate": 8.42841553593464e-06, "loss": 0.6169, "step": 4447 }, { "epoch": 0.28, "grad_norm": 0.8963313102722168, "learning_rate": 8.42766864277176e-06, "loss": 0.6054, "step": 4448 }, { "epoch": 0.28, "grad_norm": 0.8515428900718689, "learning_rate": 8.426921605281677e-06, "loss": 0.6261, "step": 4449 }, { "epoch": 0.28, "grad_norm": 0.9076332449913025, "learning_rate": 8.426174423495848e-06, "loss": 0.6133, "step": 4450 }, { "epoch": 0.28, "grad_norm": 0.9798647165298462, "learning_rate": 8.425427097445733e-06, "loss": 0.6373, "step": 4451 }, { "epoch": 0.28, "grad_norm": 0.8840082883834839, "learning_rate": 8.424679627162798e-06, "loss": 0.65, "step": 4452 }, { "epoch": 0.28, "grad_norm": 0.8393424153327942, "learning_rate": 8.423932012678516e-06, "loss": 0.5844, "step": 4453 }, { "epoch": 0.28, "grad_norm": 0.9224118590354919, "learning_rate": 8.42318425402437e-06, "loss": 0.6875, "step": 4454 }, { "epoch": 0.28, "grad_norm": 0.8217747211456299, "learning_rate": 8.422436351231843e-06, "loss": 0.5858, "step": 4455 }, { "epoch": 0.28, "grad_norm": 0.8549429774284363, "learning_rate": 8.421688304332428e-06, "loss": 0.5739, "step": 4456 }, { "epoch": 0.28, "grad_norm": 0.88507080078125, "learning_rate": 8.42094011335762e-06, "loss": 0.5718, "step": 4457 }, { "epoch": 0.28, "grad_norm": 0.9432583451271057, "learning_rate": 8.420191778338924e-06, "loss": 0.5703, "step": 4458 }, { "epoch": 0.28, "grad_norm": 0.893008291721344, "learning_rate": 8.419443299307852e-06, "loss": 0.6452, "step": 4459 }, { "epoch": 0.28, "grad_norm": 0.8943834900856018, "learning_rate": 8.418694676295918e-06, "loss": 0.5895, "step": 4460 }, { "epoch": 0.28, "grad_norm": 0.8623561859130859, "learning_rate": 8.417945909334642e-06, "loss": 0.6079, "step": 4461 }, { "epoch": 0.28, "grad_norm": 0.8554010987281799, "learning_rate": 8.417196998455555e-06, "loss": 0.6034, "step": 4462 }, { "epoch": 0.28, "grad_norm": 0.8964874148368835, "learning_rate": 8.41644794369019e-06, "loss": 0.5709, "step": 4463 }, { "epoch": 0.28, "grad_norm": 0.8765043616294861, "learning_rate": 8.415698745070088e-06, "loss": 0.5924, "step": 4464 }, { "epoch": 0.28, "grad_norm": 0.9031361937522888, "learning_rate": 8.414949402626793e-06, "loss": 0.644, "step": 4465 }, { "epoch": 0.28, "grad_norm": 0.8381129503250122, "learning_rate": 8.41419991639186e-06, "loss": 0.5794, "step": 4466 }, { "epoch": 0.28, "grad_norm": 0.9276309013366699, "learning_rate": 8.413450286396845e-06, "loss": 0.5939, "step": 4467 }, { "epoch": 0.28, "grad_norm": 0.821047306060791, "learning_rate": 8.41270051267331e-06, "loss": 0.5748, "step": 4468 }, { "epoch": 0.28, "grad_norm": 0.8938078880310059, "learning_rate": 8.411950595252834e-06, "loss": 0.613, "step": 4469 }, { "epoch": 0.28, "grad_norm": 0.9239148497581482, "learning_rate": 8.411200534166983e-06, "loss": 0.6725, "step": 4470 }, { "epoch": 0.28, "grad_norm": 0.8708427548408508, "learning_rate": 8.410450329447346e-06, "loss": 0.6503, "step": 4471 }, { "epoch": 0.28, "grad_norm": 0.9489243626594543, "learning_rate": 8.409699981125509e-06, "loss": 0.6561, "step": 4472 }, { "epoch": 0.28, "grad_norm": 0.9252210259437561, "learning_rate": 8.408949489233068e-06, "loss": 0.6548, "step": 4473 }, { "epoch": 0.28, "grad_norm": 0.8737644553184509, "learning_rate": 8.408198853801623e-06, "loss": 0.5992, "step": 4474 }, { "epoch": 0.28, "grad_norm": 0.9438381195068359, "learning_rate": 8.40744807486278e-06, "loss": 0.6524, "step": 4475 }, { "epoch": 0.28, "grad_norm": 0.8789763450622559, "learning_rate": 8.406697152448152e-06, "loss": 0.6056, "step": 4476 }, { "epoch": 0.28, "grad_norm": 0.9246413707733154, "learning_rate": 8.405946086589359e-06, "loss": 0.6097, "step": 4477 }, { "epoch": 0.28, "grad_norm": 0.9346416592597961, "learning_rate": 8.405194877318023e-06, "loss": 0.6877, "step": 4478 }, { "epoch": 0.28, "grad_norm": 0.8847804069519043, "learning_rate": 8.404443524665777e-06, "loss": 0.6492, "step": 4479 }, { "epoch": 0.28, "grad_norm": 0.8874092698097229, "learning_rate": 8.40369202866426e-06, "loss": 0.5755, "step": 4480 }, { "epoch": 0.28, "grad_norm": 0.9419736266136169, "learning_rate": 8.40294038934511e-06, "loss": 0.6488, "step": 4481 }, { "epoch": 0.28, "grad_norm": 0.8550480604171753, "learning_rate": 8.402188606739977e-06, "loss": 0.5936, "step": 4482 }, { "epoch": 0.28, "grad_norm": 0.9512335062026978, "learning_rate": 8.401436680880518e-06, "loss": 0.6031, "step": 4483 }, { "epoch": 0.28, "grad_norm": 0.8816537857055664, "learning_rate": 8.400684611798395e-06, "loss": 0.5836, "step": 4484 }, { "epoch": 0.28, "grad_norm": 0.8356591463088989, "learning_rate": 8.39993239952527e-06, "loss": 0.5666, "step": 4485 }, { "epoch": 0.28, "grad_norm": 0.9029728770256042, "learning_rate": 8.399180044092821e-06, "loss": 0.5819, "step": 4486 }, { "epoch": 0.28, "grad_norm": 0.9611971378326416, "learning_rate": 8.398427545532726e-06, "loss": 0.589, "step": 4487 }, { "epoch": 0.28, "grad_norm": 0.9153091907501221, "learning_rate": 8.397674903876667e-06, "loss": 0.6034, "step": 4488 }, { "epoch": 0.28, "grad_norm": 0.9280160665512085, "learning_rate": 8.396922119156339e-06, "loss": 0.6534, "step": 4489 }, { "epoch": 0.28, "grad_norm": 0.9492883086204529, "learning_rate": 8.396169191403438e-06, "loss": 0.6022, "step": 4490 }, { "epoch": 0.28, "grad_norm": 0.9128872156143188, "learning_rate": 8.395416120649667e-06, "loss": 0.5807, "step": 4491 }, { "epoch": 0.28, "grad_norm": 0.8702619075775146, "learning_rate": 8.394662906926734e-06, "loss": 0.5689, "step": 4492 }, { "epoch": 0.28, "grad_norm": 0.8514307141304016, "learning_rate": 8.393909550266354e-06, "loss": 0.6161, "step": 4493 }, { "epoch": 0.28, "grad_norm": 0.8472135663032532, "learning_rate": 8.393156050700252e-06, "loss": 0.626, "step": 4494 }, { "epoch": 0.28, "grad_norm": 0.8964636921882629, "learning_rate": 8.39240240826015e-06, "loss": 0.6282, "step": 4495 }, { "epoch": 0.28, "grad_norm": 0.9277433753013611, "learning_rate": 8.391648622977787e-06, "loss": 0.6707, "step": 4496 }, { "epoch": 0.28, "grad_norm": 0.8764444589614868, "learning_rate": 8.390894694884896e-06, "loss": 0.5962, "step": 4497 }, { "epoch": 0.28, "grad_norm": 0.9093109965324402, "learning_rate": 8.390140624013228e-06, "loss": 0.6039, "step": 4498 }, { "epoch": 0.29, "grad_norm": 0.9418292045593262, "learning_rate": 8.38938641039453e-06, "loss": 0.6415, "step": 4499 }, { "epoch": 0.29, "grad_norm": 0.8592790961265564, "learning_rate": 8.388632054060562e-06, "loss": 0.5807, "step": 4500 }, { "epoch": 0.29, "grad_norm": 0.9306639432907104, "learning_rate": 8.387877555043086e-06, "loss": 0.6477, "step": 4501 }, { "epoch": 0.29, "grad_norm": 0.8802691698074341, "learning_rate": 8.38712291337387e-06, "loss": 0.6043, "step": 4502 }, { "epoch": 0.29, "grad_norm": 0.8935637474060059, "learning_rate": 8.386368129084695e-06, "loss": 0.5958, "step": 4503 }, { "epoch": 0.29, "grad_norm": 0.9453160166740417, "learning_rate": 8.385613202207336e-06, "loss": 0.6246, "step": 4504 }, { "epoch": 0.29, "grad_norm": 0.8884761929512024, "learning_rate": 8.384858132773582e-06, "loss": 0.6038, "step": 4505 }, { "epoch": 0.29, "grad_norm": 0.8889615535736084, "learning_rate": 8.38410292081523e-06, "loss": 0.563, "step": 4506 }, { "epoch": 0.29, "grad_norm": 0.8603700995445251, "learning_rate": 8.383347566364072e-06, "loss": 0.5701, "step": 4507 }, { "epoch": 0.29, "grad_norm": 0.8758067488670349, "learning_rate": 8.38259206945192e-06, "loss": 0.6018, "step": 4508 }, { "epoch": 0.29, "grad_norm": 0.9259410500526428, "learning_rate": 8.381836430110585e-06, "loss": 0.5987, "step": 4509 }, { "epoch": 0.29, "grad_norm": 0.913033127784729, "learning_rate": 8.38108064837188e-06, "loss": 0.6485, "step": 4510 }, { "epoch": 0.29, "grad_norm": 0.88724285364151, "learning_rate": 8.380324724267631e-06, "loss": 0.6141, "step": 4511 }, { "epoch": 0.29, "grad_norm": 0.8932639360427856, "learning_rate": 8.379568657829669e-06, "loss": 0.6432, "step": 4512 }, { "epoch": 0.29, "grad_norm": 0.8454581499099731, "learning_rate": 8.378812449089826e-06, "loss": 0.6136, "step": 4513 }, { "epoch": 0.29, "grad_norm": 0.9295586943626404, "learning_rate": 8.378056098079946e-06, "loss": 0.6594, "step": 4514 }, { "epoch": 0.29, "grad_norm": 0.9568715691566467, "learning_rate": 8.377299604831875e-06, "loss": 0.664, "step": 4515 }, { "epoch": 0.29, "grad_norm": 0.8817077875137329, "learning_rate": 8.376542969377465e-06, "loss": 0.6482, "step": 4516 }, { "epoch": 0.29, "grad_norm": 0.924589216709137, "learning_rate": 8.375786191748578e-06, "loss": 0.6711, "step": 4517 }, { "epoch": 0.29, "grad_norm": 0.9990555047988892, "learning_rate": 8.375029271977076e-06, "loss": 0.6757, "step": 4518 }, { "epoch": 0.29, "grad_norm": 0.8449356555938721, "learning_rate": 8.374272210094834e-06, "loss": 0.6219, "step": 4519 }, { "epoch": 0.29, "grad_norm": 0.8764523863792419, "learning_rate": 8.373515006133728e-06, "loss": 0.5765, "step": 4520 }, { "epoch": 0.29, "grad_norm": 0.9035282731056213, "learning_rate": 8.372757660125639e-06, "loss": 0.5511, "step": 4521 }, { "epoch": 0.29, "grad_norm": 0.9758896231651306, "learning_rate": 8.372000172102459e-06, "loss": 0.654, "step": 4522 }, { "epoch": 0.29, "grad_norm": 0.9765152931213379, "learning_rate": 8.37124254209608e-06, "loss": 0.6125, "step": 4523 }, { "epoch": 0.29, "grad_norm": 0.8869422674179077, "learning_rate": 8.370484770138407e-06, "loss": 0.6502, "step": 4524 }, { "epoch": 0.29, "grad_norm": 0.9507737755775452, "learning_rate": 8.369726856261346e-06, "loss": 0.6427, "step": 4525 }, { "epoch": 0.29, "grad_norm": 0.9340800642967224, "learning_rate": 8.36896880049681e-06, "loss": 0.576, "step": 4526 }, { "epoch": 0.29, "grad_norm": 0.9077014327049255, "learning_rate": 8.368210602876716e-06, "loss": 0.5923, "step": 4527 }, { "epoch": 0.29, "grad_norm": 0.8733184933662415, "learning_rate": 8.36745226343299e-06, "loss": 0.6116, "step": 4528 }, { "epoch": 0.29, "grad_norm": 0.9413378834724426, "learning_rate": 8.366693782197566e-06, "loss": 0.6095, "step": 4529 }, { "epoch": 0.29, "grad_norm": 0.9507108330726624, "learning_rate": 8.365935159202378e-06, "loss": 0.6222, "step": 4530 }, { "epoch": 0.29, "grad_norm": 0.950071394443512, "learning_rate": 8.365176394479368e-06, "loss": 0.6427, "step": 4531 }, { "epoch": 0.29, "grad_norm": 0.8926099538803101, "learning_rate": 8.364417488060488e-06, "loss": 0.6253, "step": 4532 }, { "epoch": 0.29, "grad_norm": 0.8760389089584351, "learning_rate": 8.363658439977693e-06, "loss": 0.5829, "step": 4533 }, { "epoch": 0.29, "grad_norm": 0.8548893332481384, "learning_rate": 8.36289925026294e-06, "loss": 0.5992, "step": 4534 }, { "epoch": 0.29, "grad_norm": 0.9305916428565979, "learning_rate": 8.362139918948198e-06, "loss": 0.6084, "step": 4535 }, { "epoch": 0.29, "grad_norm": 0.9650013446807861, "learning_rate": 8.36138044606544e-06, "loss": 0.6429, "step": 4536 }, { "epoch": 0.29, "grad_norm": 0.8791600465774536, "learning_rate": 8.360620831646647e-06, "loss": 0.6104, "step": 4537 }, { "epoch": 0.29, "grad_norm": 0.8988505601882935, "learning_rate": 8.359861075723801e-06, "loss": 0.6117, "step": 4538 }, { "epoch": 0.29, "grad_norm": 0.9081864953041077, "learning_rate": 8.359101178328893e-06, "loss": 0.6432, "step": 4539 }, { "epoch": 0.29, "grad_norm": 0.8397430181503296, "learning_rate": 8.358341139493919e-06, "loss": 0.5822, "step": 4540 }, { "epoch": 0.29, "grad_norm": 0.9269049167633057, "learning_rate": 8.357580959250882e-06, "loss": 0.5718, "step": 4541 }, { "epoch": 0.29, "grad_norm": 0.9173187017440796, "learning_rate": 8.356820637631792e-06, "loss": 0.6343, "step": 4542 }, { "epoch": 0.29, "grad_norm": 0.9146298766136169, "learning_rate": 8.356060174668663e-06, "loss": 0.5987, "step": 4543 }, { "epoch": 0.29, "grad_norm": 0.8490142226219177, "learning_rate": 8.355299570393515e-06, "loss": 0.5537, "step": 4544 }, { "epoch": 0.29, "grad_norm": 0.9867364764213562, "learning_rate": 8.354538824838373e-06, "loss": 0.6229, "step": 4545 }, { "epoch": 0.29, "grad_norm": 0.8394815921783447, "learning_rate": 8.353777938035272e-06, "loss": 0.5803, "step": 4546 }, { "epoch": 0.29, "grad_norm": 0.9035863280296326, "learning_rate": 8.353016910016247e-06, "loss": 0.6028, "step": 4547 }, { "epoch": 0.29, "grad_norm": 0.9454771876335144, "learning_rate": 8.352255740813347e-06, "loss": 0.6281, "step": 4548 }, { "epoch": 0.29, "grad_norm": 0.8816177845001221, "learning_rate": 8.351494430458617e-06, "loss": 0.5853, "step": 4549 }, { "epoch": 0.29, "grad_norm": 0.9621097445487976, "learning_rate": 8.350732978984116e-06, "loss": 0.64, "step": 4550 }, { "epoch": 0.29, "grad_norm": 0.8992953896522522, "learning_rate": 8.349971386421906e-06, "loss": 0.609, "step": 4551 }, { "epoch": 0.29, "grad_norm": 0.8685299754142761, "learning_rate": 8.349209652804055e-06, "loss": 0.5633, "step": 4552 }, { "epoch": 0.29, "grad_norm": 0.8441104292869568, "learning_rate": 8.348447778162636e-06, "loss": 0.6342, "step": 4553 }, { "epoch": 0.29, "grad_norm": 0.8986367583274841, "learning_rate": 8.347685762529729e-06, "loss": 0.6462, "step": 4554 }, { "epoch": 0.29, "grad_norm": 0.8934696316719055, "learning_rate": 8.34692360593742e-06, "loss": 0.5742, "step": 4555 }, { "epoch": 0.29, "grad_norm": 0.9160881042480469, "learning_rate": 8.346161308417805e-06, "loss": 0.6352, "step": 4556 }, { "epoch": 0.29, "grad_norm": 0.8555467128753662, "learning_rate": 8.345398870002972e-06, "loss": 0.6251, "step": 4557 }, { "epoch": 0.29, "grad_norm": 0.9095616340637207, "learning_rate": 8.344636290725035e-06, "loss": 0.6466, "step": 4558 }, { "epoch": 0.29, "grad_norm": 0.8506302237510681, "learning_rate": 8.343873570616097e-06, "loss": 0.6207, "step": 4559 }, { "epoch": 0.29, "grad_norm": 0.8929101228713989, "learning_rate": 8.343110709708275e-06, "loss": 0.6406, "step": 4560 }, { "epoch": 0.29, "grad_norm": 0.919562816619873, "learning_rate": 8.342347708033692e-06, "loss": 0.6297, "step": 4561 }, { "epoch": 0.29, "grad_norm": 0.9147757291793823, "learning_rate": 8.341584565624471e-06, "loss": 0.6507, "step": 4562 }, { "epoch": 0.29, "grad_norm": 0.8911783695220947, "learning_rate": 8.340821282512753e-06, "loss": 0.6204, "step": 4563 }, { "epoch": 0.29, "grad_norm": 0.9014183878898621, "learning_rate": 8.34005785873067e-06, "loss": 0.5942, "step": 4564 }, { "epoch": 0.29, "grad_norm": 0.9250972270965576, "learning_rate": 8.339294294310371e-06, "loss": 0.6221, "step": 4565 }, { "epoch": 0.29, "grad_norm": 0.8855701684951782, "learning_rate": 8.338530589284005e-06, "loss": 0.6531, "step": 4566 }, { "epoch": 0.29, "grad_norm": 0.8788840174674988, "learning_rate": 8.33776674368373e-06, "loss": 0.5342, "step": 4567 }, { "epoch": 0.29, "grad_norm": 0.9645684361457825, "learning_rate": 8.337002757541708e-06, "loss": 0.6643, "step": 4568 }, { "epoch": 0.29, "grad_norm": 0.8790433406829834, "learning_rate": 8.33623863089011e-06, "loss": 0.5891, "step": 4569 }, { "epoch": 0.29, "grad_norm": 0.8718952536582947, "learning_rate": 8.335474363761109e-06, "loss": 0.5621, "step": 4570 }, { "epoch": 0.29, "grad_norm": 0.9348157048225403, "learning_rate": 8.334709956186884e-06, "loss": 0.6079, "step": 4571 }, { "epoch": 0.29, "grad_norm": 0.8799747824668884, "learning_rate": 8.333945408199624e-06, "loss": 0.5964, "step": 4572 }, { "epoch": 0.29, "grad_norm": 0.8926383852958679, "learning_rate": 8.333180719831521e-06, "loss": 0.6197, "step": 4573 }, { "epoch": 0.29, "grad_norm": 0.9374673962593079, "learning_rate": 8.332415891114774e-06, "loss": 0.6275, "step": 4574 }, { "epoch": 0.29, "grad_norm": 0.9162465333938599, "learning_rate": 8.331650922081586e-06, "loss": 0.5774, "step": 4575 }, { "epoch": 0.29, "grad_norm": 0.8876767158508301, "learning_rate": 8.330885812764168e-06, "loss": 0.6274, "step": 4576 }, { "epoch": 0.29, "grad_norm": 0.8842494487762451, "learning_rate": 8.330120563194736e-06, "loss": 0.6357, "step": 4577 }, { "epoch": 0.29, "grad_norm": 0.8948314189910889, "learning_rate": 8.32935517340551e-06, "loss": 0.6393, "step": 4578 }, { "epoch": 0.29, "grad_norm": 0.8375378251075745, "learning_rate": 8.328589643428722e-06, "loss": 0.5923, "step": 4579 }, { "epoch": 0.29, "grad_norm": 0.904406726360321, "learning_rate": 8.327823973296601e-06, "loss": 0.6558, "step": 4580 }, { "epoch": 0.29, "grad_norm": 0.9058637619018555, "learning_rate": 8.32705816304139e-06, "loss": 0.6308, "step": 4581 }, { "epoch": 0.29, "grad_norm": 0.8600705862045288, "learning_rate": 8.326292212695335e-06, "loss": 0.663, "step": 4582 }, { "epoch": 0.29, "grad_norm": 0.9292261004447937, "learning_rate": 8.325526122290685e-06, "loss": 0.6219, "step": 4583 }, { "epoch": 0.29, "grad_norm": 0.8440708518028259, "learning_rate": 8.3247598918597e-06, "loss": 0.5731, "step": 4584 }, { "epoch": 0.29, "grad_norm": 0.9342061281204224, "learning_rate": 8.323993521434639e-06, "loss": 0.6052, "step": 4585 }, { "epoch": 0.29, "grad_norm": 0.9192177057266235, "learning_rate": 8.323227011047777e-06, "loss": 0.5993, "step": 4586 }, { "epoch": 0.29, "grad_norm": 0.8514859676361084, "learning_rate": 8.322460360731386e-06, "loss": 0.6584, "step": 4587 }, { "epoch": 0.29, "grad_norm": 0.8630070686340332, "learning_rate": 8.321693570517745e-06, "loss": 0.6247, "step": 4588 }, { "epoch": 0.29, "grad_norm": 0.9018881320953369, "learning_rate": 8.320926640439145e-06, "loss": 0.5849, "step": 4589 }, { "epoch": 0.29, "grad_norm": 0.8964559435844421, "learning_rate": 8.320159570527876e-06, "loss": 0.6018, "step": 4590 }, { "epoch": 0.29, "grad_norm": 0.8430085182189941, "learning_rate": 8.319392360816239e-06, "loss": 0.6033, "step": 4591 }, { "epoch": 0.29, "grad_norm": 0.8777481913566589, "learning_rate": 8.318625011336533e-06, "loss": 0.6466, "step": 4592 }, { "epoch": 0.29, "grad_norm": 0.9354075789451599, "learning_rate": 8.317857522121078e-06, "loss": 0.6187, "step": 4593 }, { "epoch": 0.29, "grad_norm": 0.9195157289505005, "learning_rate": 8.317089893202181e-06, "loss": 0.6094, "step": 4594 }, { "epoch": 0.29, "grad_norm": 0.9029771089553833, "learning_rate": 8.316322124612169e-06, "loss": 0.6705, "step": 4595 }, { "epoch": 0.29, "grad_norm": 0.9089044332504272, "learning_rate": 8.315554216383368e-06, "loss": 0.657, "step": 4596 }, { "epoch": 0.29, "grad_norm": 0.9705564975738525, "learning_rate": 8.314786168548115e-06, "loss": 0.645, "step": 4597 }, { "epoch": 0.29, "grad_norm": 0.8919417858123779, "learning_rate": 8.314017981138746e-06, "loss": 0.5922, "step": 4598 }, { "epoch": 0.29, "grad_norm": 0.9287596344947815, "learning_rate": 8.31324965418761e-06, "loss": 0.629, "step": 4599 }, { "epoch": 0.29, "grad_norm": 0.8940380811691284, "learning_rate": 8.312481187727055e-06, "loss": 0.6292, "step": 4600 }, { "epoch": 0.29, "grad_norm": 0.8414455652236938, "learning_rate": 8.311712581789442e-06, "loss": 0.5379, "step": 4601 }, { "epoch": 0.29, "grad_norm": 0.8981665372848511, "learning_rate": 8.310943836407132e-06, "loss": 0.6239, "step": 4602 }, { "epoch": 0.29, "grad_norm": 0.9226404428482056, "learning_rate": 8.310174951612495e-06, "loss": 0.5864, "step": 4603 }, { "epoch": 0.29, "grad_norm": 0.8723615407943726, "learning_rate": 8.309405927437906e-06, "loss": 0.5485, "step": 4604 }, { "epoch": 0.29, "grad_norm": 0.8954591751098633, "learning_rate": 8.308636763915746e-06, "loss": 0.6198, "step": 4605 }, { "epoch": 0.29, "grad_norm": 0.8918243050575256, "learning_rate": 8.307867461078402e-06, "loss": 0.6386, "step": 4606 }, { "epoch": 0.29, "grad_norm": 0.9272078275680542, "learning_rate": 8.307098018958266e-06, "loss": 0.6456, "step": 4607 }, { "epoch": 0.29, "grad_norm": 0.8392652869224548, "learning_rate": 8.306328437587738e-06, "loss": 0.6253, "step": 4608 }, { "epoch": 0.29, "grad_norm": 0.8458937406539917, "learning_rate": 8.305558716999221e-06, "loss": 0.619, "step": 4609 }, { "epoch": 0.29, "grad_norm": 0.9669510126113892, "learning_rate": 8.304788857225126e-06, "loss": 0.6159, "step": 4610 }, { "epoch": 0.29, "grad_norm": 0.8691350221633911, "learning_rate": 8.304018858297867e-06, "loss": 0.5951, "step": 4611 }, { "epoch": 0.29, "grad_norm": 0.9048541784286499, "learning_rate": 8.30324872024987e-06, "loss": 0.6122, "step": 4612 }, { "epoch": 0.29, "grad_norm": 0.897702693939209, "learning_rate": 8.30247844311356e-06, "loss": 0.5482, "step": 4613 }, { "epoch": 0.29, "grad_norm": 0.967581570148468, "learning_rate": 8.301708026921371e-06, "loss": 0.6607, "step": 4614 }, { "epoch": 0.29, "grad_norm": 0.9215171933174133, "learning_rate": 8.300937471705742e-06, "loss": 0.6724, "step": 4615 }, { "epoch": 0.29, "grad_norm": 0.8553723692893982, "learning_rate": 8.300166777499119e-06, "loss": 0.6192, "step": 4616 }, { "epoch": 0.29, "grad_norm": 0.9200363755226135, "learning_rate": 8.299395944333955e-06, "loss": 0.6755, "step": 4617 }, { "epoch": 0.29, "grad_norm": 0.883851170539856, "learning_rate": 8.298624972242704e-06, "loss": 0.5919, "step": 4618 }, { "epoch": 0.29, "grad_norm": 0.9365254044532776, "learning_rate": 8.297853861257831e-06, "loss": 0.5648, "step": 4619 }, { "epoch": 0.29, "grad_norm": 0.9034328460693359, "learning_rate": 8.297082611411805e-06, "loss": 0.6108, "step": 4620 }, { "epoch": 0.29, "grad_norm": 0.8502330780029297, "learning_rate": 8.296311222737099e-06, "loss": 0.5599, "step": 4621 }, { "epoch": 0.29, "grad_norm": 0.905636727809906, "learning_rate": 8.295539695266195e-06, "loss": 0.5831, "step": 4622 }, { "epoch": 0.29, "grad_norm": 0.8426777124404907, "learning_rate": 8.29476802903158e-06, "loss": 0.5742, "step": 4623 }, { "epoch": 0.29, "grad_norm": 0.8838980793952942, "learning_rate": 8.293996224065742e-06, "loss": 0.6093, "step": 4624 }, { "epoch": 0.29, "grad_norm": 0.866264283657074, "learning_rate": 8.293224280401185e-06, "loss": 0.6257, "step": 4625 }, { "epoch": 0.29, "grad_norm": 0.9148405194282532, "learning_rate": 8.292452198070406e-06, "loss": 0.6198, "step": 4626 }, { "epoch": 0.29, "grad_norm": 0.8394678831100464, "learning_rate": 8.291679977105922e-06, "loss": 0.5995, "step": 4627 }, { "epoch": 0.29, "grad_norm": 0.858686089515686, "learning_rate": 8.290907617540244e-06, "loss": 0.5912, "step": 4628 }, { "epoch": 0.29, "grad_norm": 0.9442601203918457, "learning_rate": 8.290135119405894e-06, "loss": 0.5746, "step": 4629 }, { "epoch": 0.29, "grad_norm": 0.8091254234313965, "learning_rate": 8.2893624827354e-06, "loss": 0.5738, "step": 4630 }, { "epoch": 0.29, "grad_norm": 0.8589221835136414, "learning_rate": 8.288589707561295e-06, "loss": 0.5908, "step": 4631 }, { "epoch": 0.29, "grad_norm": 0.8597394824028015, "learning_rate": 8.287816793916119e-06, "loss": 0.6029, "step": 4632 }, { "epoch": 0.29, "grad_norm": 0.9113194942474365, "learning_rate": 8.287043741832412e-06, "loss": 0.6494, "step": 4633 }, { "epoch": 0.29, "grad_norm": 0.8626760244369507, "learning_rate": 8.28627055134273e-06, "loss": 0.5452, "step": 4634 }, { "epoch": 0.29, "grad_norm": 0.8131372332572937, "learning_rate": 8.285497222479626e-06, "loss": 0.5791, "step": 4635 }, { "epoch": 0.29, "grad_norm": 0.8600938320159912, "learning_rate": 8.284723755275666e-06, "loss": 0.6142, "step": 4636 }, { "epoch": 0.29, "grad_norm": 0.8754161596298218, "learning_rate": 8.283950149763413e-06, "loss": 0.571, "step": 4637 }, { "epoch": 0.29, "grad_norm": 0.8484256863594055, "learning_rate": 8.283176405975444e-06, "loss": 0.5876, "step": 4638 }, { "epoch": 0.29, "grad_norm": 0.8761142492294312, "learning_rate": 8.282402523944338e-06, "loss": 0.5813, "step": 4639 }, { "epoch": 0.29, "grad_norm": 0.8704332709312439, "learning_rate": 8.28162850370268e-06, "loss": 0.5611, "step": 4640 }, { "epoch": 0.29, "grad_norm": 0.9601176977157593, "learning_rate": 8.28085434528306e-06, "loss": 0.6592, "step": 4641 }, { "epoch": 0.29, "grad_norm": 0.9141191244125366, "learning_rate": 8.28008004871808e-06, "loss": 0.5754, "step": 4642 }, { "epoch": 0.29, "grad_norm": 0.8856356143951416, "learning_rate": 8.279305614040337e-06, "loss": 0.6057, "step": 4643 }, { "epoch": 0.29, "grad_norm": 0.8992973566055298, "learning_rate": 8.278531041282445e-06, "loss": 0.5995, "step": 4644 }, { "epoch": 0.29, "grad_norm": 0.9188979864120483, "learning_rate": 8.277756330477013e-06, "loss": 0.6201, "step": 4645 }, { "epoch": 0.29, "grad_norm": 0.9234612584114075, "learning_rate": 8.276981481656668e-06, "loss": 0.6256, "step": 4646 }, { "epoch": 0.29, "grad_norm": 0.9108220934867859, "learning_rate": 8.276206494854029e-06, "loss": 0.6551, "step": 4647 }, { "epoch": 0.29, "grad_norm": 0.8664566874504089, "learning_rate": 8.275431370101734e-06, "loss": 0.5819, "step": 4648 }, { "epoch": 0.29, "grad_norm": 0.9078052639961243, "learning_rate": 8.274656107432418e-06, "loss": 0.6217, "step": 4649 }, { "epoch": 0.29, "grad_norm": 0.8800520896911621, "learning_rate": 8.273880706878724e-06, "loss": 0.6276, "step": 4650 }, { "epoch": 0.29, "grad_norm": 0.977785587310791, "learning_rate": 8.273105168473304e-06, "loss": 0.5943, "step": 4651 }, { "epoch": 0.29, "grad_norm": 0.9266806840896606, "learning_rate": 8.27232949224881e-06, "loss": 0.6371, "step": 4652 }, { "epoch": 0.29, "grad_norm": 0.9237861037254333, "learning_rate": 8.271553678237904e-06, "loss": 0.6344, "step": 4653 }, { "epoch": 0.29, "grad_norm": 0.9259735345840454, "learning_rate": 8.270777726473256e-06, "loss": 0.5926, "step": 4654 }, { "epoch": 0.29, "grad_norm": 0.9366374611854553, "learning_rate": 8.270001636987535e-06, "loss": 0.5992, "step": 4655 }, { "epoch": 0.29, "grad_norm": 0.9009166359901428, "learning_rate": 8.26922540981342e-06, "loss": 0.6138, "step": 4656 }, { "epoch": 0.3, "grad_norm": 0.8974087834358215, "learning_rate": 8.268449044983598e-06, "loss": 0.5916, "step": 4657 }, { "epoch": 0.3, "grad_norm": 0.8280764222145081, "learning_rate": 8.267672542530753e-06, "loss": 0.6036, "step": 4658 }, { "epoch": 0.3, "grad_norm": 0.8438900709152222, "learning_rate": 8.266895902487588e-06, "loss": 0.5817, "step": 4659 }, { "epoch": 0.3, "grad_norm": 0.8856135010719299, "learning_rate": 8.2661191248868e-06, "loss": 0.6245, "step": 4660 }, { "epoch": 0.3, "grad_norm": 0.8787485957145691, "learning_rate": 8.265342209761098e-06, "loss": 0.6138, "step": 4661 }, { "epoch": 0.3, "grad_norm": 0.8637370467185974, "learning_rate": 8.264565157143194e-06, "loss": 0.6444, "step": 4662 }, { "epoch": 0.3, "grad_norm": 0.9093601107597351, "learning_rate": 8.26378796706581e-06, "loss": 0.6109, "step": 4663 }, { "epoch": 0.3, "grad_norm": 0.8617517352104187, "learning_rate": 8.263010639561666e-06, "loss": 0.6166, "step": 4664 }, { "epoch": 0.3, "grad_norm": 0.8573476076126099, "learning_rate": 8.262233174663497e-06, "loss": 0.5985, "step": 4665 }, { "epoch": 0.3, "grad_norm": 0.9106038808822632, "learning_rate": 8.261455572404036e-06, "loss": 0.6184, "step": 4666 }, { "epoch": 0.3, "grad_norm": 0.9015377163887024, "learning_rate": 8.260677832816029e-06, "loss": 0.5952, "step": 4667 }, { "epoch": 0.3, "grad_norm": 0.8836144804954529, "learning_rate": 8.259899955932218e-06, "loss": 0.5964, "step": 4668 }, { "epoch": 0.3, "grad_norm": 0.9044028520584106, "learning_rate": 8.259121941785362e-06, "loss": 0.626, "step": 4669 }, { "epoch": 0.3, "grad_norm": 0.8526366353034973, "learning_rate": 8.25834379040822e-06, "loss": 0.5792, "step": 4670 }, { "epoch": 0.3, "grad_norm": 0.8615383505821228, "learning_rate": 8.257565501833555e-06, "loss": 0.6326, "step": 4671 }, { "epoch": 0.3, "grad_norm": 0.8761278986930847, "learning_rate": 8.256787076094138e-06, "loss": 0.5917, "step": 4672 }, { "epoch": 0.3, "grad_norm": 0.9185283780097961, "learning_rate": 8.256008513222747e-06, "loss": 0.5846, "step": 4673 }, { "epoch": 0.3, "grad_norm": 0.9208911061286926, "learning_rate": 8.255229813252167e-06, "loss": 0.6307, "step": 4674 }, { "epoch": 0.3, "grad_norm": 0.8966217637062073, "learning_rate": 8.25445097621518e-06, "loss": 0.6079, "step": 4675 }, { "epoch": 0.3, "grad_norm": 0.9059707522392273, "learning_rate": 8.253672002144584e-06, "loss": 0.5925, "step": 4676 }, { "epoch": 0.3, "grad_norm": 0.8802112340927124, "learning_rate": 8.25289289107318e-06, "loss": 0.632, "step": 4677 }, { "epoch": 0.3, "grad_norm": 0.8126215934753418, "learning_rate": 8.252113643033774e-06, "loss": 0.6072, "step": 4678 }, { "epoch": 0.3, "grad_norm": 0.9150187969207764, "learning_rate": 8.251334258059173e-06, "loss": 0.6969, "step": 4679 }, { "epoch": 0.3, "grad_norm": 0.8734495043754578, "learning_rate": 8.250554736182199e-06, "loss": 0.6336, "step": 4680 }, { "epoch": 0.3, "grad_norm": 0.8438607454299927, "learning_rate": 8.249775077435671e-06, "loss": 0.6163, "step": 4681 }, { "epoch": 0.3, "grad_norm": 0.9356812238693237, "learning_rate": 8.24899528185242e-06, "loss": 0.5518, "step": 4682 }, { "epoch": 0.3, "grad_norm": 0.9281412959098816, "learning_rate": 8.24821534946528e-06, "loss": 0.6216, "step": 4683 }, { "epoch": 0.3, "grad_norm": 0.897492527961731, "learning_rate": 8.247435280307093e-06, "loss": 0.6225, "step": 4684 }, { "epoch": 0.3, "grad_norm": 0.8860996961593628, "learning_rate": 8.246655074410703e-06, "loss": 0.6648, "step": 4685 }, { "epoch": 0.3, "grad_norm": 0.9270169138908386, "learning_rate": 8.24587473180896e-06, "loss": 0.6332, "step": 4686 }, { "epoch": 0.3, "grad_norm": 0.9059301018714905, "learning_rate": 8.245094252534727e-06, "loss": 0.605, "step": 4687 }, { "epoch": 0.3, "grad_norm": 0.9001350402832031, "learning_rate": 8.244313636620862e-06, "loss": 0.6293, "step": 4688 }, { "epoch": 0.3, "grad_norm": 0.9218695759773254, "learning_rate": 8.243532884100236e-06, "loss": 0.6064, "step": 4689 }, { "epoch": 0.3, "grad_norm": 0.8904708027839661, "learning_rate": 8.242751995005721e-06, "loss": 0.6136, "step": 4690 }, { "epoch": 0.3, "grad_norm": 0.8279531002044678, "learning_rate": 8.241970969370205e-06, "loss": 0.5799, "step": 4691 }, { "epoch": 0.3, "grad_norm": 0.887289822101593, "learning_rate": 8.241189807226566e-06, "loss": 0.6134, "step": 4692 }, { "epoch": 0.3, "grad_norm": 0.9113506078720093, "learning_rate": 8.240408508607703e-06, "loss": 0.6129, "step": 4693 }, { "epoch": 0.3, "grad_norm": 0.9350869655609131, "learning_rate": 8.239627073546507e-06, "loss": 0.6008, "step": 4694 }, { "epoch": 0.3, "grad_norm": 0.8350124955177307, "learning_rate": 8.238845502075886e-06, "loss": 0.6183, "step": 4695 }, { "epoch": 0.3, "grad_norm": 0.8774599432945251, "learning_rate": 8.238063794228748e-06, "loss": 0.5686, "step": 4696 }, { "epoch": 0.3, "grad_norm": 0.8577974438667297, "learning_rate": 8.237281950038008e-06, "loss": 0.686, "step": 4697 }, { "epoch": 0.3, "grad_norm": 0.8853060603141785, "learning_rate": 8.236499969536585e-06, "loss": 0.6389, "step": 4698 }, { "epoch": 0.3, "grad_norm": 0.8757472038269043, "learning_rate": 8.23571785275741e-06, "loss": 0.6098, "step": 4699 }, { "epoch": 0.3, "grad_norm": 0.9388381242752075, "learning_rate": 8.234935599733412e-06, "loss": 0.6703, "step": 4700 }, { "epoch": 0.3, "grad_norm": 0.9161108136177063, "learning_rate": 8.234153210497528e-06, "loss": 0.5443, "step": 4701 }, { "epoch": 0.3, "grad_norm": 0.8968355059623718, "learning_rate": 8.233370685082704e-06, "loss": 0.6287, "step": 4702 }, { "epoch": 0.3, "grad_norm": 0.8884924650192261, "learning_rate": 8.232588023521888e-06, "loss": 0.5913, "step": 4703 }, { "epoch": 0.3, "grad_norm": 0.8946593999862671, "learning_rate": 8.231805225848035e-06, "loss": 0.6407, "step": 4704 }, { "epoch": 0.3, "grad_norm": 0.8582884669303894, "learning_rate": 8.23102229209411e-06, "loss": 0.5737, "step": 4705 }, { "epoch": 0.3, "grad_norm": 0.9519075155258179, "learning_rate": 8.230239222293073e-06, "loss": 0.5501, "step": 4706 }, { "epoch": 0.3, "grad_norm": 0.9213956594467163, "learning_rate": 8.229456016477899e-06, "loss": 0.5993, "step": 4707 }, { "epoch": 0.3, "grad_norm": 0.9476253390312195, "learning_rate": 8.228672674681568e-06, "loss": 0.6097, "step": 4708 }, { "epoch": 0.3, "grad_norm": 0.9826415181159973, "learning_rate": 8.227889196937062e-06, "loss": 0.697, "step": 4709 }, { "epoch": 0.3, "grad_norm": 0.9420339465141296, "learning_rate": 8.227105583277372e-06, "loss": 0.5592, "step": 4710 }, { "epoch": 0.3, "grad_norm": 0.8203204870223999, "learning_rate": 8.22632183373549e-06, "loss": 0.6001, "step": 4711 }, { "epoch": 0.3, "grad_norm": 0.8779041171073914, "learning_rate": 8.225537948344423e-06, "loss": 0.6033, "step": 4712 }, { "epoch": 0.3, "grad_norm": 0.8583627343177795, "learning_rate": 8.224753927137171e-06, "loss": 0.642, "step": 4713 }, { "epoch": 0.3, "grad_norm": 0.9116830825805664, "learning_rate": 8.22396977014675e-06, "loss": 0.6005, "step": 4714 }, { "epoch": 0.3, "grad_norm": 0.9073758125305176, "learning_rate": 8.223185477406175e-06, "loss": 0.6414, "step": 4715 }, { "epoch": 0.3, "grad_norm": 0.9207981824874878, "learning_rate": 8.222401048948476e-06, "loss": 0.6138, "step": 4716 }, { "epoch": 0.3, "grad_norm": 0.8661931753158569, "learning_rate": 8.221616484806676e-06, "loss": 0.6059, "step": 4717 }, { "epoch": 0.3, "grad_norm": 0.8917931318283081, "learning_rate": 8.220831785013814e-06, "loss": 0.613, "step": 4718 }, { "epoch": 0.3, "grad_norm": 0.8399578928947449, "learning_rate": 8.22004694960293e-06, "loss": 0.5698, "step": 4719 }, { "epoch": 0.3, "grad_norm": 0.8338463306427002, "learning_rate": 8.21926197860707e-06, "loss": 0.5719, "step": 4720 }, { "epoch": 0.3, "grad_norm": 0.9184006452560425, "learning_rate": 8.218476872059288e-06, "loss": 0.5921, "step": 4721 }, { "epoch": 0.3, "grad_norm": 0.9487320780754089, "learning_rate": 8.217691629992641e-06, "loss": 0.6159, "step": 4722 }, { "epoch": 0.3, "grad_norm": 0.8477067351341248, "learning_rate": 8.216906252440193e-06, "loss": 0.5805, "step": 4723 }, { "epoch": 0.3, "grad_norm": 0.878990650177002, "learning_rate": 8.216120739435013e-06, "loss": 0.6369, "step": 4724 }, { "epoch": 0.3, "grad_norm": 0.8777364492416382, "learning_rate": 8.215335091010177e-06, "loss": 0.5752, "step": 4725 }, { "epoch": 0.3, "grad_norm": 0.8827346563339233, "learning_rate": 8.214549307198765e-06, "loss": 0.5918, "step": 4726 }, { "epoch": 0.3, "grad_norm": 0.9704633355140686, "learning_rate": 8.213763388033867e-06, "loss": 0.6544, "step": 4727 }, { "epoch": 0.3, "grad_norm": 0.946010172367096, "learning_rate": 8.212977333548569e-06, "loss": 0.6527, "step": 4728 }, { "epoch": 0.3, "grad_norm": 0.9947195053100586, "learning_rate": 8.212191143775973e-06, "loss": 0.6222, "step": 4729 }, { "epoch": 0.3, "grad_norm": 0.9032514691352844, "learning_rate": 8.211404818749184e-06, "loss": 0.6712, "step": 4730 }, { "epoch": 0.3, "grad_norm": 0.8361782431602478, "learning_rate": 8.21061835850131e-06, "loss": 0.5807, "step": 4731 }, { "epoch": 0.3, "grad_norm": 0.8890867233276367, "learning_rate": 8.209831763065465e-06, "loss": 0.6234, "step": 4732 }, { "epoch": 0.3, "grad_norm": 0.9419566988945007, "learning_rate": 8.209045032474773e-06, "loss": 0.5666, "step": 4733 }, { "epoch": 0.3, "grad_norm": 0.8726335763931274, "learning_rate": 8.208258166762355e-06, "loss": 0.5848, "step": 4734 }, { "epoch": 0.3, "grad_norm": 0.8878278136253357, "learning_rate": 8.207471165961347e-06, "loss": 0.6069, "step": 4735 }, { "epoch": 0.3, "grad_norm": 0.8903132081031799, "learning_rate": 8.206684030104886e-06, "loss": 0.6338, "step": 4736 }, { "epoch": 0.3, "grad_norm": 0.8635721206665039, "learning_rate": 8.205896759226115e-06, "loss": 0.6088, "step": 4737 }, { "epoch": 0.3, "grad_norm": 0.8765946626663208, "learning_rate": 8.205109353358186e-06, "loss": 0.637, "step": 4738 }, { "epoch": 0.3, "grad_norm": 0.8656042218208313, "learning_rate": 8.20432181253425e-06, "loss": 0.5851, "step": 4739 }, { "epoch": 0.3, "grad_norm": 0.8711687922477722, "learning_rate": 8.203534136787473e-06, "loss": 0.5607, "step": 4740 }, { "epoch": 0.3, "grad_norm": 0.8796273469924927, "learning_rate": 8.202746326151015e-06, "loss": 0.5778, "step": 4741 }, { "epoch": 0.3, "grad_norm": 0.8987690806388855, "learning_rate": 8.20195838065805e-06, "loss": 0.6896, "step": 4742 }, { "epoch": 0.3, "grad_norm": 0.9169846177101135, "learning_rate": 8.201170300341757e-06, "loss": 0.6126, "step": 4743 }, { "epoch": 0.3, "grad_norm": 0.8620352149009705, "learning_rate": 8.20038208523532e-06, "loss": 0.6509, "step": 4744 }, { "epoch": 0.3, "grad_norm": 0.9679823517799377, "learning_rate": 8.199593735371924e-06, "loss": 0.6318, "step": 4745 }, { "epoch": 0.3, "grad_norm": 0.8961201310157776, "learning_rate": 8.198805250784769e-06, "loss": 0.6057, "step": 4746 }, { "epoch": 0.3, "grad_norm": 0.8943774700164795, "learning_rate": 8.198016631507053e-06, "loss": 0.5722, "step": 4747 }, { "epoch": 0.3, "grad_norm": 0.9439212679862976, "learning_rate": 8.19722787757198e-06, "loss": 0.5788, "step": 4748 }, { "epoch": 0.3, "grad_norm": 0.929137647151947, "learning_rate": 8.196438989012765e-06, "loss": 0.5959, "step": 4749 }, { "epoch": 0.3, "grad_norm": 0.8944662809371948, "learning_rate": 8.195649965862622e-06, "loss": 0.5926, "step": 4750 }, { "epoch": 0.3, "grad_norm": 0.910987138748169, "learning_rate": 8.194860808154778e-06, "loss": 0.5805, "step": 4751 }, { "epoch": 0.3, "grad_norm": 0.8900378346443176, "learning_rate": 8.194071515922456e-06, "loss": 0.5787, "step": 4752 }, { "epoch": 0.3, "grad_norm": 0.9534246325492859, "learning_rate": 8.193282089198897e-06, "loss": 0.6274, "step": 4753 }, { "epoch": 0.3, "grad_norm": 0.9117621779441833, "learning_rate": 8.192492528017337e-06, "loss": 0.6155, "step": 4754 }, { "epoch": 0.3, "grad_norm": 0.830488383769989, "learning_rate": 8.191702832411023e-06, "loss": 0.5826, "step": 4755 }, { "epoch": 0.3, "grad_norm": 0.9440089464187622, "learning_rate": 8.190913002413204e-06, "loss": 0.652, "step": 4756 }, { "epoch": 0.3, "grad_norm": 0.9013427495956421, "learning_rate": 8.19012303805714e-06, "loss": 0.6266, "step": 4757 }, { "epoch": 0.3, "grad_norm": 0.8950908184051514, "learning_rate": 8.189332939376092e-06, "loss": 0.6494, "step": 4758 }, { "epoch": 0.3, "grad_norm": 0.8194960951805115, "learning_rate": 8.188542706403331e-06, "loss": 0.5597, "step": 4759 }, { "epoch": 0.3, "grad_norm": 0.8966452479362488, "learning_rate": 8.187752339172126e-06, "loss": 0.5973, "step": 4760 }, { "epoch": 0.3, "grad_norm": 0.8810895681381226, "learning_rate": 8.18696183771576e-06, "loss": 0.6343, "step": 4761 }, { "epoch": 0.3, "grad_norm": 0.9110032916069031, "learning_rate": 8.18617120206752e-06, "loss": 0.6262, "step": 4762 }, { "epoch": 0.3, "grad_norm": 0.8125797510147095, "learning_rate": 8.185380432260693e-06, "loss": 0.5832, "step": 4763 }, { "epoch": 0.3, "grad_norm": 0.9191034436225891, "learning_rate": 8.184589528328576e-06, "loss": 0.6396, "step": 4764 }, { "epoch": 0.3, "grad_norm": 0.910497784614563, "learning_rate": 8.183798490304473e-06, "loss": 0.6085, "step": 4765 }, { "epoch": 0.3, "grad_norm": 0.9907393455505371, "learning_rate": 8.183007318221691e-06, "loss": 0.6692, "step": 4766 }, { "epoch": 0.3, "grad_norm": 0.897619903087616, "learning_rate": 8.182216012113543e-06, "loss": 0.5841, "step": 4767 }, { "epoch": 0.3, "grad_norm": 0.8304966688156128, "learning_rate": 8.181424572013352e-06, "loss": 0.5711, "step": 4768 }, { "epoch": 0.3, "grad_norm": 0.8721338510513306, "learning_rate": 8.180632997954437e-06, "loss": 0.5515, "step": 4769 }, { "epoch": 0.3, "grad_norm": 0.9142031073570251, "learning_rate": 8.179841289970132e-06, "loss": 0.6018, "step": 4770 }, { "epoch": 0.3, "grad_norm": 0.8813204765319824, "learning_rate": 8.179049448093771e-06, "loss": 0.6077, "step": 4771 }, { "epoch": 0.3, "grad_norm": 0.8773213624954224, "learning_rate": 8.178257472358697e-06, "loss": 0.6325, "step": 4772 }, { "epoch": 0.3, "grad_norm": 0.936450183391571, "learning_rate": 8.177465362798259e-06, "loss": 0.6321, "step": 4773 }, { "epoch": 0.3, "grad_norm": 0.898252010345459, "learning_rate": 8.176673119445807e-06, "loss": 0.6171, "step": 4774 }, { "epoch": 0.3, "grad_norm": 0.8829185366630554, "learning_rate": 8.1758807423347e-06, "loss": 0.6601, "step": 4775 }, { "epoch": 0.3, "grad_norm": 0.8611942529678345, "learning_rate": 8.175088231498304e-06, "loss": 0.6078, "step": 4776 }, { "epoch": 0.3, "grad_norm": 0.9188866019248962, "learning_rate": 8.174295586969987e-06, "loss": 0.5632, "step": 4777 }, { "epoch": 0.3, "grad_norm": 0.9207391142845154, "learning_rate": 8.173502808783127e-06, "loss": 0.6337, "step": 4778 }, { "epoch": 0.3, "grad_norm": 0.8784085512161255, "learning_rate": 8.172709896971103e-06, "loss": 0.5728, "step": 4779 }, { "epoch": 0.3, "grad_norm": 0.9227593541145325, "learning_rate": 8.1719168515673e-06, "loss": 0.6868, "step": 4780 }, { "epoch": 0.3, "grad_norm": 0.9259268045425415, "learning_rate": 8.171123672605116e-06, "loss": 0.6095, "step": 4781 }, { "epoch": 0.3, "grad_norm": 0.8867712020874023, "learning_rate": 8.170330360117944e-06, "loss": 0.6493, "step": 4782 }, { "epoch": 0.3, "grad_norm": 0.9128400087356567, "learning_rate": 8.169536914139189e-06, "loss": 0.5594, "step": 4783 }, { "epoch": 0.3, "grad_norm": 0.9003540873527527, "learning_rate": 8.168743334702262e-06, "loss": 0.5926, "step": 4784 }, { "epoch": 0.3, "grad_norm": 0.9833676218986511, "learning_rate": 8.167949621840576e-06, "loss": 0.621, "step": 4785 }, { "epoch": 0.3, "grad_norm": 0.865376889705658, "learning_rate": 8.16715577558755e-06, "loss": 0.5874, "step": 4786 }, { "epoch": 0.3, "grad_norm": 0.8381644487380981, "learning_rate": 8.166361795976614e-06, "loss": 0.6214, "step": 4787 }, { "epoch": 0.3, "grad_norm": 0.9137545228004456, "learning_rate": 8.165567683041197e-06, "loss": 0.6128, "step": 4788 }, { "epoch": 0.3, "grad_norm": 0.8159583210945129, "learning_rate": 8.164773436814736e-06, "loss": 0.5778, "step": 4789 }, { "epoch": 0.3, "grad_norm": 0.9791309833526611, "learning_rate": 8.163979057330677e-06, "loss": 0.598, "step": 4790 }, { "epoch": 0.3, "grad_norm": 0.8282786011695862, "learning_rate": 8.163184544622467e-06, "loss": 0.4893, "step": 4791 }, { "epoch": 0.3, "grad_norm": 0.9099088907241821, "learning_rate": 8.162389898723558e-06, "loss": 0.6035, "step": 4792 }, { "epoch": 0.3, "grad_norm": 0.8515049815177917, "learning_rate": 8.161595119667413e-06, "loss": 0.582, "step": 4793 }, { "epoch": 0.3, "grad_norm": 0.9349850416183472, "learning_rate": 8.160800207487495e-06, "loss": 0.6135, "step": 4794 }, { "epoch": 0.3, "grad_norm": 0.848458468914032, "learning_rate": 8.160005162217275e-06, "loss": 0.6004, "step": 4795 }, { "epoch": 0.3, "grad_norm": 0.8345276117324829, "learning_rate": 8.159209983890232e-06, "loss": 0.5956, "step": 4796 }, { "epoch": 0.3, "grad_norm": 0.8990775942802429, "learning_rate": 8.158414672539845e-06, "loss": 0.6247, "step": 4797 }, { "epoch": 0.3, "grad_norm": 0.8795309662818909, "learning_rate": 8.157619228199605e-06, "loss": 0.5965, "step": 4798 }, { "epoch": 0.3, "grad_norm": 0.9947782158851624, "learning_rate": 8.156823650903003e-06, "loss": 0.6742, "step": 4799 }, { "epoch": 0.3, "grad_norm": 0.8884429931640625, "learning_rate": 8.156027940683539e-06, "loss": 0.5757, "step": 4800 }, { "epoch": 0.3, "grad_norm": 0.888424277305603, "learning_rate": 8.15523209757472e-06, "loss": 0.6723, "step": 4801 }, { "epoch": 0.3, "grad_norm": 0.9217067360877991, "learning_rate": 8.15443612161005e-06, "loss": 0.6168, "step": 4802 }, { "epoch": 0.3, "grad_norm": 0.8976277709007263, "learning_rate": 8.15364001282305e-06, "loss": 0.6127, "step": 4803 }, { "epoch": 0.3, "grad_norm": 0.8980615735054016, "learning_rate": 8.15284377124724e-06, "loss": 0.5577, "step": 4804 }, { "epoch": 0.3, "grad_norm": 0.8790192008018494, "learning_rate": 8.152047396916145e-06, "loss": 0.5193, "step": 4805 }, { "epoch": 0.3, "grad_norm": 0.9707584977149963, "learning_rate": 8.1512508898633e-06, "loss": 0.6459, "step": 4806 }, { "epoch": 0.3, "grad_norm": 0.8137477040290833, "learning_rate": 8.150454250122245e-06, "loss": 0.585, "step": 4807 }, { "epoch": 0.3, "grad_norm": 0.8875191807746887, "learning_rate": 8.149657477726518e-06, "loss": 0.581, "step": 4808 }, { "epoch": 0.3, "grad_norm": 0.8823238611221313, "learning_rate": 8.148860572709674e-06, "loss": 0.6095, "step": 4809 }, { "epoch": 0.3, "grad_norm": 0.8552770018577576, "learning_rate": 8.148063535105261e-06, "loss": 0.596, "step": 4810 }, { "epoch": 0.3, "grad_norm": 0.9799924492835999, "learning_rate": 8.147266364946848e-06, "loss": 0.6214, "step": 4811 }, { "epoch": 0.3, "grad_norm": 0.9276837706565857, "learning_rate": 8.146469062267995e-06, "loss": 0.6281, "step": 4812 }, { "epoch": 0.3, "grad_norm": 0.9394620656967163, "learning_rate": 8.145671627102277e-06, "loss": 0.606, "step": 4813 }, { "epoch": 0.3, "grad_norm": 0.8922251462936401, "learning_rate": 8.14487405948327e-06, "loss": 0.5859, "step": 4814 }, { "epoch": 0.31, "grad_norm": 0.8474637866020203, "learning_rate": 8.144076359444555e-06, "loss": 0.5582, "step": 4815 }, { "epoch": 0.31, "grad_norm": 0.9072783589363098, "learning_rate": 8.143278527019722e-06, "loss": 0.6062, "step": 4816 }, { "epoch": 0.31, "grad_norm": 0.9250416159629822, "learning_rate": 8.142480562242365e-06, "loss": 0.6103, "step": 4817 }, { "epoch": 0.31, "grad_norm": 0.9013091325759888, "learning_rate": 8.141682465146084e-06, "loss": 0.6005, "step": 4818 }, { "epoch": 0.31, "grad_norm": 0.8904187083244324, "learning_rate": 8.140884235764484e-06, "loss": 0.6026, "step": 4819 }, { "epoch": 0.31, "grad_norm": 0.884691596031189, "learning_rate": 8.140085874131174e-06, "loss": 0.5996, "step": 4820 }, { "epoch": 0.31, "grad_norm": 0.8515493869781494, "learning_rate": 8.139287380279773e-06, "loss": 0.6056, "step": 4821 }, { "epoch": 0.31, "grad_norm": 0.9234597086906433, "learning_rate": 8.138488754243899e-06, "loss": 0.6553, "step": 4822 }, { "epoch": 0.31, "grad_norm": 0.9508641958236694, "learning_rate": 8.137689996057183e-06, "loss": 0.6402, "step": 4823 }, { "epoch": 0.31, "grad_norm": 0.8529036641120911, "learning_rate": 8.136891105753258e-06, "loss": 0.5396, "step": 4824 }, { "epoch": 0.31, "grad_norm": 0.888896107673645, "learning_rate": 8.136092083365758e-06, "loss": 0.6194, "step": 4825 }, { "epoch": 0.31, "grad_norm": 0.8955714106559753, "learning_rate": 8.13529292892833e-06, "loss": 0.6351, "step": 4826 }, { "epoch": 0.31, "grad_norm": 0.89292311668396, "learning_rate": 8.134493642474625e-06, "loss": 0.6317, "step": 4827 }, { "epoch": 0.31, "grad_norm": 0.8899608850479126, "learning_rate": 8.133694224038297e-06, "loss": 0.5935, "step": 4828 }, { "epoch": 0.31, "grad_norm": 0.8758254051208496, "learning_rate": 8.132894673653007e-06, "loss": 0.6203, "step": 4829 }, { "epoch": 0.31, "grad_norm": 0.8891094326972961, "learning_rate": 8.13209499135242e-06, "loss": 0.5985, "step": 4830 }, { "epoch": 0.31, "grad_norm": 0.9486083984375, "learning_rate": 8.131295177170208e-06, "loss": 0.6333, "step": 4831 }, { "epoch": 0.31, "grad_norm": 0.9144448041915894, "learning_rate": 8.13049523114005e-06, "loss": 0.6241, "step": 4832 }, { "epoch": 0.31, "grad_norm": 0.8380624651908875, "learning_rate": 8.129695153295627e-06, "loss": 0.5578, "step": 4833 }, { "epoch": 0.31, "grad_norm": 0.8630735278129578, "learning_rate": 8.128894943670631e-06, "loss": 0.5934, "step": 4834 }, { "epoch": 0.31, "grad_norm": 0.8654153943061829, "learning_rate": 8.12809460229875e-06, "loss": 0.5767, "step": 4835 }, { "epoch": 0.31, "grad_norm": 0.8959805369377136, "learning_rate": 8.127294129213691e-06, "loss": 0.6212, "step": 4836 }, { "epoch": 0.31, "grad_norm": 0.8940380215644836, "learning_rate": 8.126493524449153e-06, "loss": 0.6261, "step": 4837 }, { "epoch": 0.31, "grad_norm": 0.946277916431427, "learning_rate": 8.12569278803885e-06, "loss": 0.617, "step": 4838 }, { "epoch": 0.31, "grad_norm": 0.9186848998069763, "learning_rate": 8.124891920016495e-06, "loss": 0.6387, "step": 4839 }, { "epoch": 0.31, "grad_norm": 0.9961265325546265, "learning_rate": 8.124090920415814e-06, "loss": 0.6452, "step": 4840 }, { "epoch": 0.31, "grad_norm": 0.8689594268798828, "learning_rate": 8.123289789270532e-06, "loss": 0.6492, "step": 4841 }, { "epoch": 0.31, "grad_norm": 0.9431710243225098, "learning_rate": 8.12248852661438e-06, "loss": 0.639, "step": 4842 }, { "epoch": 0.31, "grad_norm": 0.8190953731536865, "learning_rate": 8.121687132481101e-06, "loss": 0.5752, "step": 4843 }, { "epoch": 0.31, "grad_norm": 0.828509509563446, "learning_rate": 8.120885606904436e-06, "loss": 0.5913, "step": 4844 }, { "epoch": 0.31, "grad_norm": 0.8336859345436096, "learning_rate": 8.120083949918137e-06, "loss": 0.5577, "step": 4845 }, { "epoch": 0.31, "grad_norm": 0.8060721158981323, "learning_rate": 8.119282161555952e-06, "loss": 0.5642, "step": 4846 }, { "epoch": 0.31, "grad_norm": 0.9661688208580017, "learning_rate": 8.11848024185165e-06, "loss": 0.6525, "step": 4847 }, { "epoch": 0.31, "grad_norm": 0.8893968462944031, "learning_rate": 8.117678190838991e-06, "loss": 0.6042, "step": 4848 }, { "epoch": 0.31, "grad_norm": 0.8722717761993408, "learning_rate": 8.116876008551751e-06, "loss": 0.5784, "step": 4849 }, { "epoch": 0.31, "grad_norm": 0.8579627275466919, "learning_rate": 8.116073695023704e-06, "loss": 0.6056, "step": 4850 }, { "epoch": 0.31, "grad_norm": 0.9025737643241882, "learning_rate": 8.115271250288635e-06, "loss": 0.5785, "step": 4851 }, { "epoch": 0.31, "grad_norm": 0.9787115454673767, "learning_rate": 8.114468674380328e-06, "loss": 0.6641, "step": 4852 }, { "epoch": 0.31, "grad_norm": 0.9400716423988342, "learning_rate": 8.113665967332582e-06, "loss": 0.604, "step": 4853 }, { "epoch": 0.31, "grad_norm": 0.8718861937522888, "learning_rate": 8.112863129179194e-06, "loss": 0.644, "step": 4854 }, { "epoch": 0.31, "grad_norm": 0.9167654514312744, "learning_rate": 8.112060159953966e-06, "loss": 0.6431, "step": 4855 }, { "epoch": 0.31, "grad_norm": 0.9498146176338196, "learning_rate": 8.111257059690714e-06, "loss": 0.6549, "step": 4856 }, { "epoch": 0.31, "grad_norm": 0.9236502051353455, "learning_rate": 8.110453828423248e-06, "loss": 0.6302, "step": 4857 }, { "epoch": 0.31, "grad_norm": 0.9004657864570618, "learning_rate": 8.109650466185394e-06, "loss": 0.6053, "step": 4858 }, { "epoch": 0.31, "grad_norm": 0.8638118505477905, "learning_rate": 8.108846973010975e-06, "loss": 0.558, "step": 4859 }, { "epoch": 0.31, "grad_norm": 0.8778232932090759, "learning_rate": 8.108043348933825e-06, "loss": 0.626, "step": 4860 }, { "epoch": 0.31, "grad_norm": 0.9175794720649719, "learning_rate": 8.107239593987781e-06, "loss": 0.5887, "step": 4861 }, { "epoch": 0.31, "grad_norm": 0.9042779207229614, "learning_rate": 8.10643570820669e-06, "loss": 0.6235, "step": 4862 }, { "epoch": 0.31, "grad_norm": 0.9080462455749512, "learning_rate": 8.105631691624394e-06, "loss": 0.6401, "step": 4863 }, { "epoch": 0.31, "grad_norm": 0.9129647016525269, "learning_rate": 8.104827544274754e-06, "loss": 0.7069, "step": 4864 }, { "epoch": 0.31, "grad_norm": 0.876524806022644, "learning_rate": 8.104023266191625e-06, "loss": 0.5799, "step": 4865 }, { "epoch": 0.31, "grad_norm": 0.9011818766593933, "learning_rate": 8.103218857408875e-06, "loss": 0.6213, "step": 4866 }, { "epoch": 0.31, "grad_norm": 0.9621694684028625, "learning_rate": 8.102414317960373e-06, "loss": 0.6267, "step": 4867 }, { "epoch": 0.31, "grad_norm": 0.883078932762146, "learning_rate": 8.10160964788e-06, "loss": 0.5822, "step": 4868 }, { "epoch": 0.31, "grad_norm": 0.9894744157791138, "learning_rate": 8.100804847201632e-06, "loss": 0.6117, "step": 4869 }, { "epoch": 0.31, "grad_norm": 0.9744158983230591, "learning_rate": 8.09999991595916e-06, "loss": 0.5913, "step": 4870 }, { "epoch": 0.31, "grad_norm": 0.9933215379714966, "learning_rate": 8.099194854186475e-06, "loss": 0.6645, "step": 4871 }, { "epoch": 0.31, "grad_norm": 0.8537378907203674, "learning_rate": 8.098389661917475e-06, "loss": 0.5375, "step": 4872 }, { "epoch": 0.31, "grad_norm": 0.9022486209869385, "learning_rate": 8.097584339186066e-06, "loss": 0.5917, "step": 4873 }, { "epoch": 0.31, "grad_norm": 0.8206536173820496, "learning_rate": 8.096778886026155e-06, "loss": 0.6281, "step": 4874 }, { "epoch": 0.31, "grad_norm": 0.9026719927787781, "learning_rate": 8.09597330247166e-06, "loss": 0.5709, "step": 4875 }, { "epoch": 0.31, "grad_norm": 0.8792065978050232, "learning_rate": 8.095167588556498e-06, "loss": 0.6622, "step": 4876 }, { "epoch": 0.31, "grad_norm": 0.938779890537262, "learning_rate": 8.094361744314597e-06, "loss": 0.6345, "step": 4877 }, { "epoch": 0.31, "grad_norm": 0.84425288438797, "learning_rate": 8.093555769779887e-06, "loss": 0.5552, "step": 4878 }, { "epoch": 0.31, "grad_norm": 0.9458581209182739, "learning_rate": 8.092749664986304e-06, "loss": 0.639, "step": 4879 }, { "epoch": 0.31, "grad_norm": 0.8601045608520508, "learning_rate": 8.091943429967792e-06, "loss": 0.5757, "step": 4880 }, { "epoch": 0.31, "grad_norm": 0.9777496457099915, "learning_rate": 8.0911370647583e-06, "loss": 0.6873, "step": 4881 }, { "epoch": 0.31, "grad_norm": 0.830226480960846, "learning_rate": 8.090330569391778e-06, "loss": 0.5941, "step": 4882 }, { "epoch": 0.31, "grad_norm": 0.9674537777900696, "learning_rate": 8.089523943902187e-06, "loss": 0.6356, "step": 4883 }, { "epoch": 0.31, "grad_norm": 0.9554563164710999, "learning_rate": 8.088717188323489e-06, "loss": 0.6701, "step": 4884 }, { "epoch": 0.31, "grad_norm": 0.8986421823501587, "learning_rate": 8.087910302689656e-06, "loss": 0.6009, "step": 4885 }, { "epoch": 0.31, "grad_norm": 0.8413382172584534, "learning_rate": 8.087103287034664e-06, "loss": 0.6183, "step": 4886 }, { "epoch": 0.31, "grad_norm": 0.8769293427467346, "learning_rate": 8.086296141392489e-06, "loss": 0.5598, "step": 4887 }, { "epoch": 0.31, "grad_norm": 0.8853359818458557, "learning_rate": 8.08548886579712e-06, "loss": 0.6184, "step": 4888 }, { "epoch": 0.31, "grad_norm": 0.9408413171768188, "learning_rate": 8.08468146028255e-06, "loss": 0.5866, "step": 4889 }, { "epoch": 0.31, "grad_norm": 0.8464492559432983, "learning_rate": 8.083873924882775e-06, "loss": 0.5744, "step": 4890 }, { "epoch": 0.31, "grad_norm": 0.9093937873840332, "learning_rate": 8.083066259631796e-06, "loss": 0.6079, "step": 4891 }, { "epoch": 0.31, "grad_norm": 0.9300260543823242, "learning_rate": 8.082258464563621e-06, "loss": 0.6214, "step": 4892 }, { "epoch": 0.31, "grad_norm": 0.8799288272857666, "learning_rate": 8.081450539712266e-06, "loss": 0.5775, "step": 4893 }, { "epoch": 0.31, "grad_norm": 0.9392613768577576, "learning_rate": 8.080642485111747e-06, "loss": 0.5812, "step": 4894 }, { "epoch": 0.31, "grad_norm": 0.8481096625328064, "learning_rate": 8.07983430079609e-06, "loss": 0.5847, "step": 4895 }, { "epoch": 0.31, "grad_norm": 0.8689022660255432, "learning_rate": 8.079025986799326e-06, "loss": 0.5681, "step": 4896 }, { "epoch": 0.31, "grad_norm": 0.8942854404449463, "learning_rate": 8.078217543155488e-06, "loss": 0.5555, "step": 4897 }, { "epoch": 0.31, "grad_norm": 0.9350181818008423, "learning_rate": 8.077408969898619e-06, "loss": 0.6732, "step": 4898 }, { "epoch": 0.31, "grad_norm": 0.8985404372215271, "learning_rate": 8.076600267062761e-06, "loss": 0.6312, "step": 4899 }, { "epoch": 0.31, "grad_norm": 0.9145780801773071, "learning_rate": 8.07579143468197e-06, "loss": 0.6166, "step": 4900 }, { "epoch": 0.31, "grad_norm": 0.8609732389450073, "learning_rate": 8.074982472790302e-06, "loss": 0.5519, "step": 4901 }, { "epoch": 0.31, "grad_norm": 0.9401060938835144, "learning_rate": 8.074173381421819e-06, "loss": 0.6135, "step": 4902 }, { "epoch": 0.31, "grad_norm": 0.8980786800384521, "learning_rate": 8.073364160610589e-06, "loss": 0.578, "step": 4903 }, { "epoch": 0.31, "grad_norm": 0.8506133556365967, "learning_rate": 8.072554810390685e-06, "loss": 0.5842, "step": 4904 }, { "epoch": 0.31, "grad_norm": 0.9556955099105835, "learning_rate": 8.071745330796187e-06, "loss": 0.6877, "step": 4905 }, { "epoch": 0.31, "grad_norm": 0.8503575921058655, "learning_rate": 8.070935721861178e-06, "loss": 0.5922, "step": 4906 }, { "epoch": 0.31, "grad_norm": 0.8888681530952454, "learning_rate": 8.07012598361975e-06, "loss": 0.6101, "step": 4907 }, { "epoch": 0.31, "grad_norm": 0.8827106952667236, "learning_rate": 8.069316116105996e-06, "loss": 0.6722, "step": 4908 }, { "epoch": 0.31, "grad_norm": 0.8604966998100281, "learning_rate": 8.068506119354019e-06, "loss": 0.566, "step": 4909 }, { "epoch": 0.31, "grad_norm": 0.9307197332382202, "learning_rate": 8.067695993397923e-06, "loss": 0.6324, "step": 4910 }, { "epoch": 0.31, "grad_norm": 0.8086503148078918, "learning_rate": 8.066885738271821e-06, "loss": 0.5555, "step": 4911 }, { "epoch": 0.31, "grad_norm": 0.8632538914680481, "learning_rate": 8.06607535400983e-06, "loss": 0.5949, "step": 4912 }, { "epoch": 0.31, "grad_norm": 0.893225908279419, "learning_rate": 8.06526484064607e-06, "loss": 0.5895, "step": 4913 }, { "epoch": 0.31, "grad_norm": 0.9265469908714294, "learning_rate": 8.064454198214673e-06, "loss": 0.6288, "step": 4914 }, { "epoch": 0.31, "grad_norm": 0.9373133778572083, "learning_rate": 8.063643426749769e-06, "loss": 0.6299, "step": 4915 }, { "epoch": 0.31, "grad_norm": 0.9107393622398376, "learning_rate": 8.062832526285498e-06, "loss": 0.634, "step": 4916 }, { "epoch": 0.31, "grad_norm": 0.9622877836227417, "learning_rate": 8.062021496856004e-06, "loss": 0.6507, "step": 4917 }, { "epoch": 0.31, "grad_norm": 0.9220041632652283, "learning_rate": 8.061210338495437e-06, "loss": 0.6477, "step": 4918 }, { "epoch": 0.31, "grad_norm": 0.8224441409111023, "learning_rate": 8.060399051237952e-06, "loss": 0.65, "step": 4919 }, { "epoch": 0.31, "grad_norm": 0.8881222605705261, "learning_rate": 8.059587635117709e-06, "loss": 0.5975, "step": 4920 }, { "epoch": 0.31, "grad_norm": 0.8618130683898926, "learning_rate": 8.058776090168874e-06, "loss": 0.5906, "step": 4921 }, { "epoch": 0.31, "grad_norm": 0.8412930369377136, "learning_rate": 8.057964416425618e-06, "loss": 0.5719, "step": 4922 }, { "epoch": 0.31, "grad_norm": 0.9025030136108398, "learning_rate": 8.05715261392212e-06, "loss": 0.5403, "step": 4923 }, { "epoch": 0.31, "grad_norm": 0.8365161418914795, "learning_rate": 8.05634068269256e-06, "loss": 0.5598, "step": 4924 }, { "epoch": 0.31, "grad_norm": 0.8508699536323547, "learning_rate": 8.055528622771124e-06, "loss": 0.6019, "step": 4925 }, { "epoch": 0.31, "grad_norm": 0.8714786171913147, "learning_rate": 8.05471643419201e-06, "loss": 0.5831, "step": 4926 }, { "epoch": 0.31, "grad_norm": 0.9067984819412231, "learning_rate": 8.053904116989413e-06, "loss": 0.6098, "step": 4927 }, { "epoch": 0.31, "grad_norm": 0.7827737927436829, "learning_rate": 8.053091671197537e-06, "loss": 0.5421, "step": 4928 }, { "epoch": 0.31, "grad_norm": 0.8685954809188843, "learning_rate": 8.052279096850591e-06, "loss": 0.6027, "step": 4929 }, { "epoch": 0.31, "grad_norm": 0.8910870552062988, "learning_rate": 8.051466393982792e-06, "loss": 0.6068, "step": 4930 }, { "epoch": 0.31, "grad_norm": 0.8768137693405151, "learning_rate": 8.050653562628356e-06, "loss": 0.6151, "step": 4931 }, { "epoch": 0.31, "grad_norm": 0.9031566977500916, "learning_rate": 8.049840602821512e-06, "loss": 0.6637, "step": 4932 }, { "epoch": 0.31, "grad_norm": 1.0350744724273682, "learning_rate": 8.04902751459649e-06, "loss": 0.606, "step": 4933 }, { "epoch": 0.31, "grad_norm": 0.848858118057251, "learning_rate": 8.048214297987526e-06, "loss": 0.5559, "step": 4934 }, { "epoch": 0.31, "grad_norm": 0.8456379771232605, "learning_rate": 8.047400953028863e-06, "loss": 0.569, "step": 4935 }, { "epoch": 0.31, "grad_norm": 0.8597366213798523, "learning_rate": 8.046587479754746e-06, "loss": 0.5696, "step": 4936 }, { "epoch": 0.31, "grad_norm": 0.9166806936264038, "learning_rate": 8.04577387819943e-06, "loss": 0.659, "step": 4937 }, { "epoch": 0.31, "grad_norm": 0.8950727581977844, "learning_rate": 8.044960148397168e-06, "loss": 0.6182, "step": 4938 }, { "epoch": 0.31, "grad_norm": 0.9122840166091919, "learning_rate": 8.04414629038223e-06, "loss": 0.6245, "step": 4939 }, { "epoch": 0.31, "grad_norm": 0.8276764750480652, "learning_rate": 8.04333230418888e-06, "loss": 0.5669, "step": 4940 }, { "epoch": 0.31, "grad_norm": 0.9038193821907043, "learning_rate": 8.042518189851394e-06, "loss": 0.5997, "step": 4941 }, { "epoch": 0.31, "grad_norm": 0.894939124584198, "learning_rate": 8.04170394740405e-06, "loss": 0.6326, "step": 4942 }, { "epoch": 0.31, "grad_norm": 0.8787042498588562, "learning_rate": 8.040889576881136e-06, "loss": 0.6027, "step": 4943 }, { "epoch": 0.31, "grad_norm": 0.8947983980178833, "learning_rate": 8.04007507831694e-06, "loss": 0.6459, "step": 4944 }, { "epoch": 0.31, "grad_norm": 0.8807556629180908, "learning_rate": 8.039260451745758e-06, "loss": 0.5848, "step": 4945 }, { "epoch": 0.31, "grad_norm": 0.8896877765655518, "learning_rate": 8.03844569720189e-06, "loss": 0.6028, "step": 4946 }, { "epoch": 0.31, "grad_norm": 0.872142493724823, "learning_rate": 8.037630814719644e-06, "loss": 0.5868, "step": 4947 }, { "epoch": 0.31, "grad_norm": 0.808314859867096, "learning_rate": 8.036815804333334e-06, "loss": 0.5896, "step": 4948 }, { "epoch": 0.31, "grad_norm": 0.8907493948936462, "learning_rate": 8.036000666077273e-06, "loss": 0.606, "step": 4949 }, { "epoch": 0.31, "grad_norm": 0.8417367339134216, "learning_rate": 8.035185399985784e-06, "loss": 0.5841, "step": 4950 }, { "epoch": 0.31, "grad_norm": 0.8830011487007141, "learning_rate": 8.034370006093198e-06, "loss": 0.6737, "step": 4951 }, { "epoch": 0.31, "grad_norm": 1.0002917051315308, "learning_rate": 8.033554484433848e-06, "loss": 0.587, "step": 4952 }, { "epoch": 0.31, "grad_norm": 0.9197138547897339, "learning_rate": 8.032738835042068e-06, "loss": 0.6633, "step": 4953 }, { "epoch": 0.31, "grad_norm": 0.889056384563446, "learning_rate": 8.031923057952208e-06, "loss": 0.5941, "step": 4954 }, { "epoch": 0.31, "grad_norm": 0.8778578042984009, "learning_rate": 8.031107153198617e-06, "loss": 0.6132, "step": 4955 }, { "epoch": 0.31, "grad_norm": 0.925252377986908, "learning_rate": 8.030291120815647e-06, "loss": 0.6115, "step": 4956 }, { "epoch": 0.31, "grad_norm": 0.9050502777099609, "learning_rate": 8.029474960837657e-06, "loss": 0.605, "step": 4957 }, { "epoch": 0.31, "grad_norm": 0.8806825280189514, "learning_rate": 8.028658673299019e-06, "loss": 0.6294, "step": 4958 }, { "epoch": 0.31, "grad_norm": 0.8301826119422913, "learning_rate": 8.027842258234097e-06, "loss": 0.5667, "step": 4959 }, { "epoch": 0.31, "grad_norm": 0.9355791211128235, "learning_rate": 8.027025715677273e-06, "loss": 0.6805, "step": 4960 }, { "epoch": 0.31, "grad_norm": 0.9568033814430237, "learning_rate": 8.026209045662925e-06, "loss": 0.6433, "step": 4961 }, { "epoch": 0.31, "grad_norm": 0.8783117532730103, "learning_rate": 8.025392248225444e-06, "loss": 0.581, "step": 4962 }, { "epoch": 0.31, "grad_norm": 0.8595120906829834, "learning_rate": 8.024575323399217e-06, "loss": 0.5877, "step": 4963 }, { "epoch": 0.31, "grad_norm": 0.9189950823783875, "learning_rate": 8.023758271218646e-06, "loss": 0.6188, "step": 4964 }, { "epoch": 0.31, "grad_norm": 0.8918676376342773, "learning_rate": 8.022941091718133e-06, "loss": 0.6064, "step": 4965 }, { "epoch": 0.31, "grad_norm": 0.853366494178772, "learning_rate": 8.022123784932085e-06, "loss": 0.6376, "step": 4966 }, { "epoch": 0.31, "grad_norm": 0.9495976567268372, "learning_rate": 8.02130635089492e-06, "loss": 0.6117, "step": 4967 }, { "epoch": 0.31, "grad_norm": 0.9444292187690735, "learning_rate": 8.020488789641054e-06, "loss": 0.6688, "step": 4968 }, { "epoch": 0.31, "grad_norm": 0.8615371584892273, "learning_rate": 8.019671101204914e-06, "loss": 0.5812, "step": 4969 }, { "epoch": 0.31, "grad_norm": 0.8364808559417725, "learning_rate": 8.018853285620926e-06, "loss": 0.5657, "step": 4970 }, { "epoch": 0.31, "grad_norm": 0.9400182962417603, "learning_rate": 8.018035342923529e-06, "loss": 0.6043, "step": 4971 }, { "epoch": 0.32, "grad_norm": 0.8639470338821411, "learning_rate": 8.017217273147165e-06, "loss": 0.5418, "step": 4972 }, { "epoch": 0.32, "grad_norm": 0.8635435104370117, "learning_rate": 8.016399076326275e-06, "loss": 0.6752, "step": 4973 }, { "epoch": 0.32, "grad_norm": 0.974575400352478, "learning_rate": 8.015580752495314e-06, "loss": 0.6424, "step": 4974 }, { "epoch": 0.32, "grad_norm": 0.9337494969367981, "learning_rate": 8.014762301688737e-06, "loss": 0.6497, "step": 4975 }, { "epoch": 0.32, "grad_norm": 0.9488426446914673, "learning_rate": 8.013943723941009e-06, "loss": 0.5005, "step": 4976 }, { "epoch": 0.32, "grad_norm": 0.8811922073364258, "learning_rate": 8.013125019286594e-06, "loss": 0.6492, "step": 4977 }, { "epoch": 0.32, "grad_norm": 0.8622782230377197, "learning_rate": 8.012306187759966e-06, "loss": 0.6216, "step": 4978 }, { "epoch": 0.32, "grad_norm": 0.9642921686172485, "learning_rate": 8.011487229395605e-06, "loss": 0.6454, "step": 4979 }, { "epoch": 0.32, "grad_norm": 0.8489444851875305, "learning_rate": 8.010668144227991e-06, "loss": 0.6024, "step": 4980 }, { "epoch": 0.32, "grad_norm": 0.9179771542549133, "learning_rate": 8.009848932291617e-06, "loss": 0.5687, "step": 4981 }, { "epoch": 0.32, "grad_norm": 0.8957446813583374, "learning_rate": 8.009029593620974e-06, "loss": 0.5745, "step": 4982 }, { "epoch": 0.32, "grad_norm": 0.9170886874198914, "learning_rate": 8.008210128250563e-06, "loss": 0.6323, "step": 4983 }, { "epoch": 0.32, "grad_norm": 0.8754706382751465, "learning_rate": 8.007390536214888e-06, "loss": 0.6169, "step": 4984 }, { "epoch": 0.32, "grad_norm": 0.8331484794616699, "learning_rate": 8.006570817548457e-06, "loss": 0.5864, "step": 4985 }, { "epoch": 0.32, "grad_norm": 0.9085079431533813, "learning_rate": 8.005750972285793e-06, "loss": 0.6391, "step": 4986 }, { "epoch": 0.32, "grad_norm": 0.8748310208320618, "learning_rate": 8.004931000461408e-06, "loss": 0.5741, "step": 4987 }, { "epoch": 0.32, "grad_norm": 0.8841165900230408, "learning_rate": 8.004110902109832e-06, "loss": 0.6129, "step": 4988 }, { "epoch": 0.32, "grad_norm": 0.88663649559021, "learning_rate": 8.003290677265599e-06, "loss": 0.6556, "step": 4989 }, { "epoch": 0.32, "grad_norm": 0.8917930126190186, "learning_rate": 8.002470325963241e-06, "loss": 0.606, "step": 4990 }, { "epoch": 0.32, "grad_norm": 0.7967976331710815, "learning_rate": 8.001649848237303e-06, "loss": 0.5492, "step": 4991 }, { "epoch": 0.32, "grad_norm": 0.8872556090354919, "learning_rate": 8.000829244122333e-06, "loss": 0.6114, "step": 4992 }, { "epoch": 0.32, "grad_norm": 1.745118498802185, "learning_rate": 8.00000851365288e-06, "loss": 0.6433, "step": 4993 }, { "epoch": 0.32, "grad_norm": 0.8213765621185303, "learning_rate": 7.999187656863507e-06, "loss": 0.5746, "step": 4994 }, { "epoch": 0.32, "grad_norm": 0.8834403157234192, "learning_rate": 7.998366673788775e-06, "loss": 0.5745, "step": 4995 }, { "epoch": 0.32, "grad_norm": 0.9179670214653015, "learning_rate": 7.997545564463251e-06, "loss": 0.641, "step": 4996 }, { "epoch": 0.32, "grad_norm": 0.9359582662582397, "learning_rate": 7.996724328921514e-06, "loss": 0.5567, "step": 4997 }, { "epoch": 0.32, "grad_norm": 0.9402004480361938, "learning_rate": 7.99590296719814e-06, "loss": 0.638, "step": 4998 }, { "epoch": 0.32, "grad_norm": 0.8939769268035889, "learning_rate": 7.995081479327712e-06, "loss": 0.6216, "step": 4999 }, { "epoch": 0.32, "grad_norm": 0.9075430035591125, "learning_rate": 7.994259865344822e-06, "loss": 0.6213, "step": 5000 }, { "epoch": 0.32, "grad_norm": 0.9165618419647217, "learning_rate": 7.993438125284068e-06, "loss": 0.6449, "step": 5001 }, { "epoch": 0.32, "grad_norm": 0.9765704870223999, "learning_rate": 7.992616259180045e-06, "loss": 0.615, "step": 5002 }, { "epoch": 0.32, "grad_norm": 0.9137974977493286, "learning_rate": 7.991794267067363e-06, "loss": 0.6025, "step": 5003 }, { "epoch": 0.32, "grad_norm": 0.8844775557518005, "learning_rate": 7.99097214898063e-06, "loss": 0.6017, "step": 5004 }, { "epoch": 0.32, "grad_norm": 0.9296790361404419, "learning_rate": 7.99014990495447e-06, "loss": 0.5938, "step": 5005 }, { "epoch": 0.32, "grad_norm": 0.8395243287086487, "learning_rate": 7.989327535023495e-06, "loss": 0.6087, "step": 5006 }, { "epoch": 0.32, "grad_norm": 0.9407158493995667, "learning_rate": 7.988505039222339e-06, "loss": 0.6039, "step": 5007 }, { "epoch": 0.32, "grad_norm": 0.9456518292427063, "learning_rate": 7.987682417585629e-06, "loss": 0.6272, "step": 5008 }, { "epoch": 0.32, "grad_norm": 0.9036068916320801, "learning_rate": 7.98685967014801e-06, "loss": 0.6433, "step": 5009 }, { "epoch": 0.32, "grad_norm": 0.9327660202980042, "learning_rate": 7.986036796944116e-06, "loss": 0.6196, "step": 5010 }, { "epoch": 0.32, "grad_norm": 0.9042969346046448, "learning_rate": 7.985213798008605e-06, "loss": 0.6259, "step": 5011 }, { "epoch": 0.32, "grad_norm": 0.8693029880523682, "learning_rate": 7.984390673376123e-06, "loss": 0.6165, "step": 5012 }, { "epoch": 0.32, "grad_norm": 0.8812036514282227, "learning_rate": 7.983567423081331e-06, "loss": 0.6037, "step": 5013 }, { "epoch": 0.32, "grad_norm": 0.8530508279800415, "learning_rate": 7.982744047158897e-06, "loss": 0.6234, "step": 5014 }, { "epoch": 0.32, "grad_norm": 0.9156954884529114, "learning_rate": 7.981920545643485e-06, "loss": 0.5921, "step": 5015 }, { "epoch": 0.32, "grad_norm": 0.849946141242981, "learning_rate": 7.981096918569773e-06, "loss": 0.5624, "step": 5016 }, { "epoch": 0.32, "grad_norm": 0.9375457763671875, "learning_rate": 7.980273165972438e-06, "loss": 0.587, "step": 5017 }, { "epoch": 0.32, "grad_norm": 0.8364583253860474, "learning_rate": 7.979449287886171e-06, "loss": 0.5506, "step": 5018 }, { "epoch": 0.32, "grad_norm": 0.8714501857757568, "learning_rate": 7.978625284345657e-06, "loss": 0.6281, "step": 5019 }, { "epoch": 0.32, "grad_norm": 0.8554301857948303, "learning_rate": 7.977801155385595e-06, "loss": 0.5985, "step": 5020 }, { "epoch": 0.32, "grad_norm": 0.9513722658157349, "learning_rate": 7.976976901040686e-06, "loss": 0.6487, "step": 5021 }, { "epoch": 0.32, "grad_norm": 0.9231401085853577, "learning_rate": 7.976152521345635e-06, "loss": 0.6764, "step": 5022 }, { "epoch": 0.32, "grad_norm": 0.9738418459892273, "learning_rate": 7.975328016335154e-06, "loss": 0.6647, "step": 5023 }, { "epoch": 0.32, "grad_norm": 0.8537105917930603, "learning_rate": 7.974503386043961e-06, "loss": 0.6126, "step": 5024 }, { "epoch": 0.32, "grad_norm": 0.9720308780670166, "learning_rate": 7.973678630506778e-06, "loss": 0.6771, "step": 5025 }, { "epoch": 0.32, "grad_norm": 0.8640322089195251, "learning_rate": 7.972853749758334e-06, "loss": 0.6203, "step": 5026 }, { "epoch": 0.32, "grad_norm": 0.9116325378417969, "learning_rate": 7.972028743833357e-06, "loss": 0.6164, "step": 5027 }, { "epoch": 0.32, "grad_norm": 0.8856568336486816, "learning_rate": 7.971203612766591e-06, "loss": 0.5796, "step": 5028 }, { "epoch": 0.32, "grad_norm": 0.8518129587173462, "learning_rate": 7.970378356592779e-06, "loss": 0.6571, "step": 5029 }, { "epoch": 0.32, "grad_norm": 0.8910609483718872, "learning_rate": 7.969552975346664e-06, "loss": 0.6005, "step": 5030 }, { "epoch": 0.32, "grad_norm": 0.9186645746231079, "learning_rate": 7.968727469063005e-06, "loss": 0.565, "step": 5031 }, { "epoch": 0.32, "grad_norm": 0.8877920508384705, "learning_rate": 7.967901837776559e-06, "loss": 0.6059, "step": 5032 }, { "epoch": 0.32, "grad_norm": 0.8737941384315491, "learning_rate": 7.967076081522091e-06, "loss": 0.6355, "step": 5033 }, { "epoch": 0.32, "grad_norm": 0.9942765831947327, "learning_rate": 7.966250200334373e-06, "loss": 0.7055, "step": 5034 }, { "epoch": 0.32, "grad_norm": 0.8824638724327087, "learning_rate": 7.965424194248176e-06, "loss": 0.5895, "step": 5035 }, { "epoch": 0.32, "grad_norm": 0.9163713455200195, "learning_rate": 7.964598063298282e-06, "loss": 0.6511, "step": 5036 }, { "epoch": 0.32, "grad_norm": 0.8366975784301758, "learning_rate": 7.963771807519477e-06, "loss": 0.5519, "step": 5037 }, { "epoch": 0.32, "grad_norm": 0.8883844614028931, "learning_rate": 7.962945426946552e-06, "loss": 0.5771, "step": 5038 }, { "epoch": 0.32, "grad_norm": 0.862853467464447, "learning_rate": 7.962118921614302e-06, "loss": 0.625, "step": 5039 }, { "epoch": 0.32, "grad_norm": 0.8657647967338562, "learning_rate": 7.961292291557529e-06, "loss": 0.5691, "step": 5040 }, { "epoch": 0.32, "grad_norm": 0.9227752089500427, "learning_rate": 7.960465536811039e-06, "loss": 0.602, "step": 5041 }, { "epoch": 0.32, "grad_norm": 0.8963826298713684, "learning_rate": 7.959638657409643e-06, "loss": 0.5922, "step": 5042 }, { "epoch": 0.32, "grad_norm": 0.8346092104911804, "learning_rate": 7.95881165338816e-06, "loss": 0.5559, "step": 5043 }, { "epoch": 0.32, "grad_norm": 0.9060384631156921, "learning_rate": 7.957984524781413e-06, "loss": 0.6217, "step": 5044 }, { "epoch": 0.32, "grad_norm": 0.9173614978790283, "learning_rate": 7.957157271624225e-06, "loss": 0.5922, "step": 5045 }, { "epoch": 0.32, "grad_norm": 0.9467434883117676, "learning_rate": 7.956329893951432e-06, "loss": 0.6047, "step": 5046 }, { "epoch": 0.32, "grad_norm": 1.0126010179519653, "learning_rate": 7.95550239179787e-06, "loss": 0.6554, "step": 5047 }, { "epoch": 0.32, "grad_norm": 0.780703604221344, "learning_rate": 7.954674765198386e-06, "loss": 0.5616, "step": 5048 }, { "epoch": 0.32, "grad_norm": 0.9652750492095947, "learning_rate": 7.953847014187826e-06, "loss": 0.6468, "step": 5049 }, { "epoch": 0.32, "grad_norm": 0.9707852005958557, "learning_rate": 7.953019138801045e-06, "loss": 0.6298, "step": 5050 }, { "epoch": 0.32, "grad_norm": 0.9064115881919861, "learning_rate": 7.952191139072898e-06, "loss": 0.6399, "step": 5051 }, { "epoch": 0.32, "grad_norm": 0.9037145972251892, "learning_rate": 7.951363015038254e-06, "loss": 0.5806, "step": 5052 }, { "epoch": 0.32, "grad_norm": 0.9301207065582275, "learning_rate": 7.950534766731982e-06, "loss": 0.6627, "step": 5053 }, { "epoch": 0.32, "grad_norm": 0.8569024205207825, "learning_rate": 7.949706394188951e-06, "loss": 0.603, "step": 5054 }, { "epoch": 0.32, "grad_norm": 0.9237979054450989, "learning_rate": 7.948877897444047e-06, "loss": 0.6087, "step": 5055 }, { "epoch": 0.32, "grad_norm": 0.9367351531982422, "learning_rate": 7.948049276532156e-06, "loss": 0.6403, "step": 5056 }, { "epoch": 0.32, "grad_norm": 0.8342140913009644, "learning_rate": 7.94722053148816e-06, "loss": 0.5697, "step": 5057 }, { "epoch": 0.32, "grad_norm": 0.8935142755508423, "learning_rate": 7.946391662346964e-06, "loss": 0.6579, "step": 5058 }, { "epoch": 0.32, "grad_norm": 0.9436396360397339, "learning_rate": 7.945562669143463e-06, "loss": 0.6328, "step": 5059 }, { "epoch": 0.32, "grad_norm": 0.8714977502822876, "learning_rate": 7.944733551912566e-06, "loss": 0.5887, "step": 5060 }, { "epoch": 0.32, "grad_norm": 0.8785292506217957, "learning_rate": 7.943904310689184e-06, "loss": 0.5927, "step": 5061 }, { "epoch": 0.32, "grad_norm": 0.8961544036865234, "learning_rate": 7.94307494550823e-06, "loss": 0.6451, "step": 5062 }, { "epoch": 0.32, "grad_norm": 0.9154882431030273, "learning_rate": 7.94224545640463e-06, "loss": 0.6019, "step": 5063 }, { "epoch": 0.32, "grad_norm": 0.8385921716690063, "learning_rate": 7.941415843413309e-06, "loss": 0.5952, "step": 5064 }, { "epoch": 0.32, "grad_norm": 0.8181779980659485, "learning_rate": 7.940586106569198e-06, "loss": 0.5941, "step": 5065 }, { "epoch": 0.32, "grad_norm": 0.8897058367729187, "learning_rate": 7.939756245907237e-06, "loss": 0.6189, "step": 5066 }, { "epoch": 0.32, "grad_norm": 0.9226515293121338, "learning_rate": 7.938926261462366e-06, "loss": 0.6463, "step": 5067 }, { "epoch": 0.32, "grad_norm": 0.9354571104049683, "learning_rate": 7.938096153269535e-06, "loss": 0.6086, "step": 5068 }, { "epoch": 0.32, "grad_norm": 0.8967651128768921, "learning_rate": 7.937265921363695e-06, "loss": 0.5475, "step": 5069 }, { "epoch": 0.32, "grad_norm": 0.9139410853385925, "learning_rate": 7.936435565779806e-06, "loss": 0.5889, "step": 5070 }, { "epoch": 0.32, "grad_norm": 0.894964337348938, "learning_rate": 7.93560508655283e-06, "loss": 0.59, "step": 5071 }, { "epoch": 0.32, "grad_norm": 0.8929742574691772, "learning_rate": 7.934774483717736e-06, "loss": 0.5761, "step": 5072 }, { "epoch": 0.32, "grad_norm": 0.8965078592300415, "learning_rate": 7.933943757309498e-06, "loss": 0.6356, "step": 5073 }, { "epoch": 0.32, "grad_norm": 0.9059800505638123, "learning_rate": 7.933112907363096e-06, "loss": 0.5718, "step": 5074 }, { "epoch": 0.32, "grad_norm": 0.8989181518554688, "learning_rate": 7.93228193391351e-06, "loss": 0.6147, "step": 5075 }, { "epoch": 0.32, "grad_norm": 0.8238041400909424, "learning_rate": 7.931450836995736e-06, "loss": 0.5621, "step": 5076 }, { "epoch": 0.32, "grad_norm": 0.8373918533325195, "learning_rate": 7.930619616644761e-06, "loss": 0.6033, "step": 5077 }, { "epoch": 0.32, "grad_norm": 0.8767797946929932, "learning_rate": 7.929788272895591e-06, "loss": 0.6104, "step": 5078 }, { "epoch": 0.32, "grad_norm": 0.9680573344230652, "learning_rate": 7.928956805783228e-06, "loss": 0.6186, "step": 5079 }, { "epoch": 0.32, "grad_norm": 0.9051882028579712, "learning_rate": 7.928125215342685e-06, "loss": 0.6336, "step": 5080 }, { "epoch": 0.32, "grad_norm": 0.9240115284919739, "learning_rate": 7.927293501608975e-06, "loss": 0.6207, "step": 5081 }, { "epoch": 0.32, "grad_norm": 0.8769848346710205, "learning_rate": 7.926461664617117e-06, "loss": 0.6018, "step": 5082 }, { "epoch": 0.32, "grad_norm": 0.7785282135009766, "learning_rate": 7.92562970440214e-06, "loss": 0.5859, "step": 5083 }, { "epoch": 0.32, "grad_norm": 0.851161003112793, "learning_rate": 7.924797620999074e-06, "loss": 0.5716, "step": 5084 }, { "epoch": 0.32, "grad_norm": 0.9216321706771851, "learning_rate": 7.923965414442953e-06, "loss": 0.6521, "step": 5085 }, { "epoch": 0.32, "grad_norm": 0.9329628944396973, "learning_rate": 7.923133084768822e-06, "loss": 0.6118, "step": 5086 }, { "epoch": 0.32, "grad_norm": 0.9301400184631348, "learning_rate": 7.922300632011726e-06, "loss": 0.6287, "step": 5087 }, { "epoch": 0.32, "grad_norm": 0.8702458739280701, "learning_rate": 7.921468056206715e-06, "loss": 0.6279, "step": 5088 }, { "epoch": 0.32, "grad_norm": 0.9146727323532104, "learning_rate": 7.920635357388848e-06, "loss": 0.5391, "step": 5089 }, { "epoch": 0.32, "grad_norm": 0.8490307927131653, "learning_rate": 7.919802535593185e-06, "loss": 0.6225, "step": 5090 }, { "epoch": 0.32, "grad_norm": 0.8508750796318054, "learning_rate": 7.918969590854797e-06, "loss": 0.6332, "step": 5091 }, { "epoch": 0.32, "grad_norm": 0.8569998145103455, "learning_rate": 7.91813652320875e-06, "loss": 0.5871, "step": 5092 }, { "epoch": 0.32, "grad_norm": 0.8754677176475525, "learning_rate": 7.91730333269013e-06, "loss": 0.6172, "step": 5093 }, { "epoch": 0.32, "grad_norm": 0.9354038834571838, "learning_rate": 7.916470019334012e-06, "loss": 0.6477, "step": 5094 }, { "epoch": 0.32, "grad_norm": 0.8642258048057556, "learning_rate": 7.915636583175489e-06, "loss": 0.5596, "step": 5095 }, { "epoch": 0.32, "grad_norm": 0.9272780418395996, "learning_rate": 7.91480302424965e-06, "loss": 0.6006, "step": 5096 }, { "epoch": 0.32, "grad_norm": 0.8797223567962646, "learning_rate": 7.913969342591597e-06, "loss": 0.5884, "step": 5097 }, { "epoch": 0.32, "grad_norm": 0.8903371691703796, "learning_rate": 7.913135538236432e-06, "loss": 0.6636, "step": 5098 }, { "epoch": 0.32, "grad_norm": 0.9428971409797668, "learning_rate": 7.912301611219264e-06, "loss": 0.5976, "step": 5099 }, { "epoch": 0.32, "grad_norm": 0.8819142580032349, "learning_rate": 7.911467561575204e-06, "loss": 0.6289, "step": 5100 }, { "epoch": 0.32, "grad_norm": 0.9539601802825928, "learning_rate": 7.910633389339376e-06, "loss": 0.6471, "step": 5101 }, { "epoch": 0.32, "grad_norm": 0.816605806350708, "learning_rate": 7.909799094546899e-06, "loss": 0.5497, "step": 5102 }, { "epoch": 0.32, "grad_norm": 0.8792059421539307, "learning_rate": 7.908964677232906e-06, "loss": 0.5623, "step": 5103 }, { "epoch": 0.32, "grad_norm": 0.9570422172546387, "learning_rate": 7.90813013743253e-06, "loss": 0.628, "step": 5104 }, { "epoch": 0.32, "grad_norm": 0.8918935060501099, "learning_rate": 7.90729547518091e-06, "loss": 0.6153, "step": 5105 }, { "epoch": 0.32, "grad_norm": 0.9165834784507751, "learning_rate": 7.906460690513192e-06, "loss": 0.5937, "step": 5106 }, { "epoch": 0.32, "grad_norm": 0.9291167259216309, "learning_rate": 7.905625783464525e-06, "loss": 0.6248, "step": 5107 }, { "epoch": 0.32, "grad_norm": 0.8594471216201782, "learning_rate": 7.904790754070063e-06, "loss": 0.5916, "step": 5108 }, { "epoch": 0.32, "grad_norm": 1.1345970630645752, "learning_rate": 7.90395560236497e-06, "loss": 0.5552, "step": 5109 }, { "epoch": 0.32, "grad_norm": 0.8997986912727356, "learning_rate": 7.903120328384406e-06, "loss": 0.5698, "step": 5110 }, { "epoch": 0.32, "grad_norm": 0.8802492618560791, "learning_rate": 7.902284932163545e-06, "loss": 0.5791, "step": 5111 }, { "epoch": 0.32, "grad_norm": 0.8879519701004028, "learning_rate": 7.901449413737562e-06, "loss": 0.6044, "step": 5112 }, { "epoch": 0.32, "grad_norm": 0.8550997972488403, "learning_rate": 7.90061377314164e-06, "loss": 0.6232, "step": 5113 }, { "epoch": 0.32, "grad_norm": 0.8434523940086365, "learning_rate": 7.899778010410958e-06, "loss": 0.608, "step": 5114 }, { "epoch": 0.32, "grad_norm": 0.8279407024383545, "learning_rate": 7.898942125580715e-06, "loss": 0.5741, "step": 5115 }, { "epoch": 0.32, "grad_norm": 0.8244683742523193, "learning_rate": 7.898106118686102e-06, "loss": 0.5697, "step": 5116 }, { "epoch": 0.32, "grad_norm": 0.9236017465591431, "learning_rate": 7.897269989762322e-06, "loss": 0.6289, "step": 5117 }, { "epoch": 0.32, "grad_norm": 0.921940803527832, "learning_rate": 7.896433738844583e-06, "loss": 0.5958, "step": 5118 }, { "epoch": 0.32, "grad_norm": 0.8436870574951172, "learning_rate": 7.895597365968093e-06, "loss": 0.6159, "step": 5119 }, { "epoch": 0.32, "grad_norm": 0.8956601619720459, "learning_rate": 7.894760871168074e-06, "loss": 0.6182, "step": 5120 }, { "epoch": 0.32, "grad_norm": 0.9135102033615112, "learning_rate": 7.893924254479744e-06, "loss": 0.6239, "step": 5121 }, { "epoch": 0.32, "grad_norm": 0.9187552332878113, "learning_rate": 7.893087515938329e-06, "loss": 0.6376, "step": 5122 }, { "epoch": 0.32, "grad_norm": 0.9064997434616089, "learning_rate": 7.892250655579063e-06, "loss": 0.6092, "step": 5123 }, { "epoch": 0.32, "grad_norm": 0.9091038107872009, "learning_rate": 7.891413673437185e-06, "loss": 0.5968, "step": 5124 }, { "epoch": 0.32, "grad_norm": 0.9700572490692139, "learning_rate": 7.890576569547937e-06, "loss": 0.6382, "step": 5125 }, { "epoch": 0.32, "grad_norm": 0.8647416830062866, "learning_rate": 7.889739343946561e-06, "loss": 0.646, "step": 5126 }, { "epoch": 0.32, "grad_norm": 0.8427348732948303, "learning_rate": 7.888901996668317e-06, "loss": 0.5889, "step": 5127 }, { "epoch": 0.32, "grad_norm": 0.8934534192085266, "learning_rate": 7.888064527748458e-06, "loss": 0.6227, "step": 5128 }, { "epoch": 0.32, "grad_norm": 0.9173450469970703, "learning_rate": 7.887226937222252e-06, "loss": 0.6438, "step": 5129 }, { "epoch": 0.33, "grad_norm": 0.8801364898681641, "learning_rate": 7.88638922512496e-06, "loss": 0.6402, "step": 5130 }, { "epoch": 0.33, "grad_norm": 0.9062999486923218, "learning_rate": 7.88555139149186e-06, "loss": 0.6434, "step": 5131 }, { "epoch": 0.33, "grad_norm": 0.9464401602745056, "learning_rate": 7.884713436358228e-06, "loss": 0.5997, "step": 5132 }, { "epoch": 0.33, "grad_norm": 0.889105498790741, "learning_rate": 7.883875359759349e-06, "loss": 0.6423, "step": 5133 }, { "epoch": 0.33, "grad_norm": 0.9038829803466797, "learning_rate": 7.883037161730511e-06, "loss": 0.6008, "step": 5134 }, { "epoch": 0.33, "grad_norm": 0.8674249649047852, "learning_rate": 7.882198842307008e-06, "loss": 0.605, "step": 5135 }, { "epoch": 0.33, "grad_norm": 0.9415613412857056, "learning_rate": 7.881360401524138e-06, "loss": 0.6118, "step": 5136 }, { "epoch": 0.33, "grad_norm": 0.9558926820755005, "learning_rate": 7.880521839417206e-06, "loss": 0.5798, "step": 5137 }, { "epoch": 0.33, "grad_norm": 0.8905767798423767, "learning_rate": 7.879683156021518e-06, "loss": 0.611, "step": 5138 }, { "epoch": 0.33, "grad_norm": 0.9160702228546143, "learning_rate": 7.87884435137239e-06, "loss": 0.6068, "step": 5139 }, { "epoch": 0.33, "grad_norm": 0.8442513942718506, "learning_rate": 7.878005425505143e-06, "loss": 0.5846, "step": 5140 }, { "epoch": 0.33, "grad_norm": 0.8762052059173584, "learning_rate": 7.877166378455098e-06, "loss": 0.6208, "step": 5141 }, { "epoch": 0.33, "grad_norm": 0.934578537940979, "learning_rate": 7.876327210257586e-06, "loss": 0.6083, "step": 5142 }, { "epoch": 0.33, "grad_norm": 0.899614155292511, "learning_rate": 7.875487920947941e-06, "loss": 0.6371, "step": 5143 }, { "epoch": 0.33, "grad_norm": 0.8895543217658997, "learning_rate": 7.874648510561503e-06, "loss": 0.6393, "step": 5144 }, { "epoch": 0.33, "grad_norm": 0.8901795744895935, "learning_rate": 7.873808979133616e-06, "loss": 0.6394, "step": 5145 }, { "epoch": 0.33, "grad_norm": 0.8974289894104004, "learning_rate": 7.872969326699631e-06, "loss": 0.5565, "step": 5146 }, { "epoch": 0.33, "grad_norm": 0.9629907608032227, "learning_rate": 7.8721295532949e-06, "loss": 0.6353, "step": 5147 }, { "epoch": 0.33, "grad_norm": 0.91104656457901, "learning_rate": 7.871289658954789e-06, "loss": 0.6165, "step": 5148 }, { "epoch": 0.33, "grad_norm": 0.9044172763824463, "learning_rate": 7.870449643714654e-06, "loss": 0.6053, "step": 5149 }, { "epoch": 0.33, "grad_norm": 0.823835015296936, "learning_rate": 7.869609507609874e-06, "loss": 0.5482, "step": 5150 }, { "epoch": 0.33, "grad_norm": 0.8716912865638733, "learning_rate": 7.868769250675818e-06, "loss": 0.6004, "step": 5151 }, { "epoch": 0.33, "grad_norm": 0.8245472311973572, "learning_rate": 7.867928872947869e-06, "loss": 0.5591, "step": 5152 }, { "epoch": 0.33, "grad_norm": 0.8959210515022278, "learning_rate": 7.867088374461413e-06, "loss": 0.6253, "step": 5153 }, { "epoch": 0.33, "grad_norm": 0.8492079377174377, "learning_rate": 7.866247755251838e-06, "loss": 0.6169, "step": 5154 }, { "epoch": 0.33, "grad_norm": 0.8312681317329407, "learning_rate": 7.865407015354542e-06, "loss": 0.5774, "step": 5155 }, { "epoch": 0.33, "grad_norm": 0.9149585962295532, "learning_rate": 7.864566154804925e-06, "loss": 0.6262, "step": 5156 }, { "epoch": 0.33, "grad_norm": 0.8946517705917358, "learning_rate": 7.86372517363839e-06, "loss": 0.563, "step": 5157 }, { "epoch": 0.33, "grad_norm": 0.8930898904800415, "learning_rate": 7.862884071890353e-06, "loss": 0.6389, "step": 5158 }, { "epoch": 0.33, "grad_norm": 0.8389832973480225, "learning_rate": 7.862042849596225e-06, "loss": 0.5617, "step": 5159 }, { "epoch": 0.33, "grad_norm": 0.8541855216026306, "learning_rate": 7.86120150679143e-06, "loss": 0.6236, "step": 5160 }, { "epoch": 0.33, "grad_norm": 0.8012550473213196, "learning_rate": 7.860360043511392e-06, "loss": 0.5796, "step": 5161 }, { "epoch": 0.33, "grad_norm": 0.8516356348991394, "learning_rate": 7.859518459791543e-06, "loss": 0.6257, "step": 5162 }, { "epoch": 0.33, "grad_norm": 0.8946587443351746, "learning_rate": 7.85867675566732e-06, "loss": 0.5812, "step": 5163 }, { "epoch": 0.33, "grad_norm": 0.8651425838470459, "learning_rate": 7.857834931174164e-06, "loss": 0.6141, "step": 5164 }, { "epoch": 0.33, "grad_norm": 0.8876100182533264, "learning_rate": 7.85699298634752e-06, "loss": 0.6099, "step": 5165 }, { "epoch": 0.33, "grad_norm": 0.8882783055305481, "learning_rate": 7.856150921222838e-06, "loss": 0.5971, "step": 5166 }, { "epoch": 0.33, "grad_norm": 0.862937867641449, "learning_rate": 7.85530873583558e-06, "loss": 0.6422, "step": 5167 }, { "epoch": 0.33, "grad_norm": 0.8829284906387329, "learning_rate": 7.854466430221203e-06, "loss": 0.5815, "step": 5168 }, { "epoch": 0.33, "grad_norm": 0.8932998776435852, "learning_rate": 7.853624004415172e-06, "loss": 0.6657, "step": 5169 }, { "epoch": 0.33, "grad_norm": 0.8481628894805908, "learning_rate": 7.852781458452964e-06, "loss": 0.6036, "step": 5170 }, { "epoch": 0.33, "grad_norm": 0.958634614944458, "learning_rate": 7.851938792370053e-06, "loss": 0.6527, "step": 5171 }, { "epoch": 0.33, "grad_norm": 0.8003389239311218, "learning_rate": 7.85109600620192e-06, "loss": 0.5318, "step": 5172 }, { "epoch": 0.33, "grad_norm": 0.922940731048584, "learning_rate": 7.85025309998405e-06, "loss": 0.5826, "step": 5173 }, { "epoch": 0.33, "grad_norm": 0.8557353019714355, "learning_rate": 7.849410073751942e-06, "loss": 0.5537, "step": 5174 }, { "epoch": 0.33, "grad_norm": 0.9081326723098755, "learning_rate": 7.848566927541084e-06, "loss": 0.5954, "step": 5175 }, { "epoch": 0.33, "grad_norm": 0.8481424450874329, "learning_rate": 7.847723661386985e-06, "loss": 0.582, "step": 5176 }, { "epoch": 0.33, "grad_norm": 0.9431670308113098, "learning_rate": 7.846880275325149e-06, "loss": 0.6132, "step": 5177 }, { "epoch": 0.33, "grad_norm": 0.827930748462677, "learning_rate": 7.846036769391086e-06, "loss": 0.612, "step": 5178 }, { "epoch": 0.33, "grad_norm": 0.8801954984664917, "learning_rate": 7.845193143620316e-06, "loss": 0.6171, "step": 5179 }, { "epoch": 0.33, "grad_norm": 0.9372230768203735, "learning_rate": 7.84434939804836e-06, "loss": 0.636, "step": 5180 }, { "epoch": 0.33, "grad_norm": 0.9458149075508118, "learning_rate": 7.843505532710748e-06, "loss": 0.6446, "step": 5181 }, { "epoch": 0.33, "grad_norm": 0.8717585802078247, "learning_rate": 7.84266154764301e-06, "loss": 0.6349, "step": 5182 }, { "epoch": 0.33, "grad_norm": 0.8793720602989197, "learning_rate": 7.84181744288068e-06, "loss": 0.6381, "step": 5183 }, { "epoch": 0.33, "grad_norm": 0.8223835229873657, "learning_rate": 7.840973218459305e-06, "loss": 0.5489, "step": 5184 }, { "epoch": 0.33, "grad_norm": 0.9283150434494019, "learning_rate": 7.84012887441443e-06, "loss": 0.6059, "step": 5185 }, { "epoch": 0.33, "grad_norm": 0.9703242778778076, "learning_rate": 7.839284410781609e-06, "loss": 0.5753, "step": 5186 }, { "epoch": 0.33, "grad_norm": 0.8721915483474731, "learning_rate": 7.838439827596398e-06, "loss": 0.624, "step": 5187 }, { "epoch": 0.33, "grad_norm": 0.9139184355735779, "learning_rate": 7.83759512489436e-06, "loss": 0.6585, "step": 5188 }, { "epoch": 0.33, "grad_norm": 0.8563583493232727, "learning_rate": 7.836750302711065e-06, "loss": 0.5913, "step": 5189 }, { "epoch": 0.33, "grad_norm": 0.8751399517059326, "learning_rate": 7.83590536108208e-06, "loss": 0.5832, "step": 5190 }, { "epoch": 0.33, "grad_norm": 0.8799748420715332, "learning_rate": 7.835060300042986e-06, "loss": 0.6191, "step": 5191 }, { "epoch": 0.33, "grad_norm": 0.8769707679748535, "learning_rate": 7.834215119629366e-06, "loss": 0.5834, "step": 5192 }, { "epoch": 0.33, "grad_norm": 0.8527321219444275, "learning_rate": 7.833369819876809e-06, "loss": 0.6034, "step": 5193 }, { "epoch": 0.33, "grad_norm": 0.9315845370292664, "learning_rate": 7.832524400820902e-06, "loss": 0.5899, "step": 5194 }, { "epoch": 0.33, "grad_norm": 0.9396250247955322, "learning_rate": 7.831678862497248e-06, "loss": 0.6207, "step": 5195 }, { "epoch": 0.33, "grad_norm": 0.9503593444824219, "learning_rate": 7.830833204941446e-06, "loss": 0.655, "step": 5196 }, { "epoch": 0.33, "grad_norm": 0.8898603916168213, "learning_rate": 7.829987428189108e-06, "loss": 0.6509, "step": 5197 }, { "epoch": 0.33, "grad_norm": 0.85368812084198, "learning_rate": 7.829141532275843e-06, "loss": 0.5851, "step": 5198 }, { "epoch": 0.33, "grad_norm": 0.9276217222213745, "learning_rate": 7.82829551723727e-06, "loss": 0.6307, "step": 5199 }, { "epoch": 0.33, "grad_norm": 0.8713779449462891, "learning_rate": 7.827449383109012e-06, "loss": 0.5739, "step": 5200 }, { "epoch": 0.33, "grad_norm": 0.9299573302268982, "learning_rate": 7.826603129926696e-06, "loss": 0.6233, "step": 5201 }, { "epoch": 0.33, "grad_norm": 0.8474642634391785, "learning_rate": 7.825756757725956e-06, "loss": 0.6163, "step": 5202 }, { "epoch": 0.33, "grad_norm": 0.9293124079704285, "learning_rate": 7.824910266542426e-06, "loss": 0.6704, "step": 5203 }, { "epoch": 0.33, "grad_norm": 0.8167198896408081, "learning_rate": 7.824063656411756e-06, "loss": 0.5296, "step": 5204 }, { "epoch": 0.33, "grad_norm": 0.8058587908744812, "learning_rate": 7.823216927369588e-06, "loss": 0.5909, "step": 5205 }, { "epoch": 0.33, "grad_norm": 0.9068382382392883, "learning_rate": 7.822370079451576e-06, "loss": 0.6536, "step": 5206 }, { "epoch": 0.33, "grad_norm": 0.8225257396697998, "learning_rate": 7.821523112693377e-06, "loss": 0.5772, "step": 5207 }, { "epoch": 0.33, "grad_norm": 0.8437464833259583, "learning_rate": 7.820676027130657e-06, "loss": 0.5401, "step": 5208 }, { "epoch": 0.33, "grad_norm": 0.8533555865287781, "learning_rate": 7.81982882279908e-06, "loss": 0.5964, "step": 5209 }, { "epoch": 0.33, "grad_norm": 0.8721039295196533, "learning_rate": 7.818981499734323e-06, "loss": 0.5896, "step": 5210 }, { "epoch": 0.33, "grad_norm": 0.8488752841949463, "learning_rate": 7.818134057972062e-06, "loss": 0.5717, "step": 5211 }, { "epoch": 0.33, "grad_norm": 0.8960286378860474, "learning_rate": 7.817286497547977e-06, "loss": 0.5905, "step": 5212 }, { "epoch": 0.33, "grad_norm": 0.9279623627662659, "learning_rate": 7.81643881849776e-06, "loss": 0.6313, "step": 5213 }, { "epoch": 0.33, "grad_norm": 0.8772743940353394, "learning_rate": 7.815591020857101e-06, "loss": 0.6305, "step": 5214 }, { "epoch": 0.33, "grad_norm": 0.9260540008544922, "learning_rate": 7.8147431046617e-06, "loss": 0.5967, "step": 5215 }, { "epoch": 0.33, "grad_norm": 0.9077113270759583, "learning_rate": 7.813895069947257e-06, "loss": 0.6454, "step": 5216 }, { "epoch": 0.33, "grad_norm": 0.8971432447433472, "learning_rate": 7.813046916749483e-06, "loss": 0.6458, "step": 5217 }, { "epoch": 0.33, "grad_norm": 0.9562937617301941, "learning_rate": 7.812198645104088e-06, "loss": 0.6051, "step": 5218 }, { "epoch": 0.33, "grad_norm": 0.9105967879295349, "learning_rate": 7.811350255046792e-06, "loss": 0.5909, "step": 5219 }, { "epoch": 0.33, "grad_norm": 0.890044629573822, "learning_rate": 7.810501746613316e-06, "loss": 0.5771, "step": 5220 }, { "epoch": 0.33, "grad_norm": 0.9129796028137207, "learning_rate": 7.809653119839389e-06, "loss": 0.5991, "step": 5221 }, { "epoch": 0.33, "grad_norm": 0.9497199654579163, "learning_rate": 7.808804374760742e-06, "loss": 0.6003, "step": 5222 }, { "epoch": 0.33, "grad_norm": 0.9804506301879883, "learning_rate": 7.807955511413114e-06, "loss": 0.6147, "step": 5223 }, { "epoch": 0.33, "grad_norm": 0.8824604749679565, "learning_rate": 7.80710652983225e-06, "loss": 0.5427, "step": 5224 }, { "epoch": 0.33, "grad_norm": 0.9190927743911743, "learning_rate": 7.806257430053893e-06, "loss": 0.5981, "step": 5225 }, { "epoch": 0.33, "grad_norm": 0.9122849702835083, "learning_rate": 7.8054082121138e-06, "loss": 0.6007, "step": 5226 }, { "epoch": 0.33, "grad_norm": 0.8687419295310974, "learning_rate": 7.804558876047724e-06, "loss": 0.6064, "step": 5227 }, { "epoch": 0.33, "grad_norm": 0.8574259281158447, "learning_rate": 7.80370942189143e-06, "loss": 0.5869, "step": 5228 }, { "epoch": 0.33, "grad_norm": 0.9501886367797852, "learning_rate": 7.802859849680686e-06, "loss": 0.6068, "step": 5229 }, { "epoch": 0.33, "grad_norm": 0.8507223725318909, "learning_rate": 7.802010159451267e-06, "loss": 0.6234, "step": 5230 }, { "epoch": 0.33, "grad_norm": 0.9953079223632812, "learning_rate": 7.801160351238945e-06, "loss": 0.6017, "step": 5231 }, { "epoch": 0.33, "grad_norm": 0.8714452385902405, "learning_rate": 7.800310425079505e-06, "loss": 0.5615, "step": 5232 }, { "epoch": 0.33, "grad_norm": 0.9719001054763794, "learning_rate": 7.799460381008736e-06, "loss": 0.5711, "step": 5233 }, { "epoch": 0.33, "grad_norm": 0.889895498752594, "learning_rate": 7.798610219062428e-06, "loss": 0.6251, "step": 5234 }, { "epoch": 0.33, "grad_norm": 0.8774588108062744, "learning_rate": 7.79775993927638e-06, "loss": 0.5806, "step": 5235 }, { "epoch": 0.33, "grad_norm": 0.8740803599357605, "learning_rate": 7.796909541686392e-06, "loss": 0.5344, "step": 5236 }, { "epoch": 0.33, "grad_norm": 0.8627974987030029, "learning_rate": 7.796059026328274e-06, "loss": 0.6076, "step": 5237 }, { "epoch": 0.33, "grad_norm": 0.858439564704895, "learning_rate": 7.795208393237839e-06, "loss": 0.5604, "step": 5238 }, { "epoch": 0.33, "grad_norm": 0.8907666802406311, "learning_rate": 7.794357642450899e-06, "loss": 0.6255, "step": 5239 }, { "epoch": 0.33, "grad_norm": 0.8581748008728027, "learning_rate": 7.793506774003282e-06, "loss": 0.6136, "step": 5240 }, { "epoch": 0.33, "grad_norm": 0.9139533638954163, "learning_rate": 7.792655787930811e-06, "loss": 0.6128, "step": 5241 }, { "epoch": 0.33, "grad_norm": 0.8762749433517456, "learning_rate": 7.791804684269322e-06, "loss": 0.6169, "step": 5242 }, { "epoch": 0.33, "grad_norm": 0.8736821413040161, "learning_rate": 7.790953463054647e-06, "loss": 0.6287, "step": 5243 }, { "epoch": 0.33, "grad_norm": 0.8526340126991272, "learning_rate": 7.790102124322633e-06, "loss": 0.5913, "step": 5244 }, { "epoch": 0.33, "grad_norm": 0.9196691513061523, "learning_rate": 7.789250668109124e-06, "loss": 0.6574, "step": 5245 }, { "epoch": 0.33, "grad_norm": 0.8942427039146423, "learning_rate": 7.788399094449971e-06, "loss": 0.6133, "step": 5246 }, { "epoch": 0.33, "grad_norm": 0.8590309023857117, "learning_rate": 7.787547403381033e-06, "loss": 0.5746, "step": 5247 }, { "epoch": 0.33, "grad_norm": 0.9016396403312683, "learning_rate": 7.786695594938172e-06, "loss": 0.5917, "step": 5248 }, { "epoch": 0.33, "grad_norm": 0.9520177245140076, "learning_rate": 7.785843669157253e-06, "loss": 0.657, "step": 5249 }, { "epoch": 0.33, "grad_norm": 0.9555111527442932, "learning_rate": 7.784991626074148e-06, "loss": 0.6724, "step": 5250 }, { "epoch": 0.33, "grad_norm": 0.9693423509597778, "learning_rate": 7.784139465724734e-06, "loss": 0.6453, "step": 5251 }, { "epoch": 0.33, "grad_norm": 0.8132855296134949, "learning_rate": 7.783287188144893e-06, "loss": 0.5865, "step": 5252 }, { "epoch": 0.33, "grad_norm": 0.8263188004493713, "learning_rate": 7.78243479337051e-06, "loss": 0.6248, "step": 5253 }, { "epoch": 0.33, "grad_norm": 0.8053151369094849, "learning_rate": 7.781582281437479e-06, "loss": 0.5827, "step": 5254 }, { "epoch": 0.33, "grad_norm": 0.9075903296470642, "learning_rate": 7.780729652381694e-06, "loss": 0.6344, "step": 5255 }, { "epoch": 0.33, "grad_norm": 0.8380961418151855, "learning_rate": 7.779876906239055e-06, "loss": 0.607, "step": 5256 }, { "epoch": 0.33, "grad_norm": 0.892805814743042, "learning_rate": 7.779024043045471e-06, "loss": 0.6279, "step": 5257 }, { "epoch": 0.33, "grad_norm": 0.9007843136787415, "learning_rate": 7.778171062836853e-06, "loss": 0.6653, "step": 5258 }, { "epoch": 0.33, "grad_norm": 0.9166417717933655, "learning_rate": 7.777317965649114e-06, "loss": 0.632, "step": 5259 }, { "epoch": 0.33, "grad_norm": 0.9331604838371277, "learning_rate": 7.776464751518177e-06, "loss": 0.6262, "step": 5260 }, { "epoch": 0.33, "grad_norm": 0.8771944642066956, "learning_rate": 7.775611420479971e-06, "loss": 0.5521, "step": 5261 }, { "epoch": 0.33, "grad_norm": 0.8667744398117065, "learning_rate": 7.774757972570423e-06, "loss": 0.5917, "step": 5262 }, { "epoch": 0.33, "grad_norm": 0.9010536074638367, "learning_rate": 7.773904407825467e-06, "loss": 0.6459, "step": 5263 }, { "epoch": 0.33, "grad_norm": 0.8958863615989685, "learning_rate": 7.773050726281048e-06, "loss": 0.5939, "step": 5264 }, { "epoch": 0.33, "grad_norm": 0.9226192831993103, "learning_rate": 7.772196927973109e-06, "loss": 0.6127, "step": 5265 }, { "epoch": 0.33, "grad_norm": 0.8647396564483643, "learning_rate": 7.771343012937602e-06, "loss": 0.6057, "step": 5266 }, { "epoch": 0.33, "grad_norm": 0.9021638631820679, "learning_rate": 7.77048898121048e-06, "loss": 0.6024, "step": 5267 }, { "epoch": 0.33, "grad_norm": 0.9035550355911255, "learning_rate": 7.769634832827706e-06, "loss": 0.577, "step": 5268 }, { "epoch": 0.33, "grad_norm": 0.8818480968475342, "learning_rate": 7.768780567825243e-06, "loss": 0.5895, "step": 5269 }, { "epoch": 0.33, "grad_norm": 0.8871473670005798, "learning_rate": 7.767926186239064e-06, "loss": 0.6386, "step": 5270 }, { "epoch": 0.33, "grad_norm": 0.9286932945251465, "learning_rate": 7.76707168810514e-06, "loss": 0.6352, "step": 5271 }, { "epoch": 0.33, "grad_norm": 0.8643122315406799, "learning_rate": 7.766217073459454e-06, "loss": 0.5854, "step": 5272 }, { "epoch": 0.33, "grad_norm": 0.8689426183700562, "learning_rate": 7.765362342337991e-06, "loss": 0.6032, "step": 5273 }, { "epoch": 0.33, "grad_norm": 0.8007031679153442, "learning_rate": 7.76450749477674e-06, "loss": 0.5664, "step": 5274 }, { "epoch": 0.33, "grad_norm": 0.8409014940261841, "learning_rate": 7.763652530811692e-06, "loss": 0.5953, "step": 5275 }, { "epoch": 0.33, "grad_norm": 0.8317943215370178, "learning_rate": 7.762797450478853e-06, "loss": 0.6057, "step": 5276 }, { "epoch": 0.33, "grad_norm": 0.8628614544868469, "learning_rate": 7.761942253814225e-06, "loss": 0.6164, "step": 5277 }, { "epoch": 0.33, "grad_norm": 0.87236487865448, "learning_rate": 7.761086940853814e-06, "loss": 0.5065, "step": 5278 }, { "epoch": 0.33, "grad_norm": 0.9762303233146667, "learning_rate": 7.76023151163364e-06, "loss": 0.5775, "step": 5279 }, { "epoch": 0.33, "grad_norm": 0.907646119594574, "learning_rate": 7.759375966189718e-06, "loss": 0.601, "step": 5280 }, { "epoch": 0.33, "grad_norm": 0.9219939112663269, "learning_rate": 7.758520304558072e-06, "loss": 0.5912, "step": 5281 }, { "epoch": 0.33, "grad_norm": 0.9645958542823792, "learning_rate": 7.757664526774733e-06, "loss": 0.6087, "step": 5282 }, { "epoch": 0.33, "grad_norm": 0.9233863353729248, "learning_rate": 7.756808632875737e-06, "loss": 0.6331, "step": 5283 }, { "epoch": 0.33, "grad_norm": 0.8966994285583496, "learning_rate": 7.755952622897117e-06, "loss": 0.5706, "step": 5284 }, { "epoch": 0.33, "grad_norm": 0.9332131743431091, "learning_rate": 7.755096496874918e-06, "loss": 0.5962, "step": 5285 }, { "epoch": 0.33, "grad_norm": 0.8440611958503723, "learning_rate": 7.75424025484519e-06, "loss": 0.6056, "step": 5286 }, { "epoch": 0.33, "grad_norm": 0.9401943683624268, "learning_rate": 7.753383896843988e-06, "loss": 0.6501, "step": 5287 }, { "epoch": 0.34, "grad_norm": 0.8421300053596497, "learning_rate": 7.752527422907368e-06, "loss": 0.5683, "step": 5288 }, { "epoch": 0.34, "grad_norm": 0.8217456340789795, "learning_rate": 7.751670833071393e-06, "loss": 0.5881, "step": 5289 }, { "epoch": 0.34, "grad_norm": 0.9123767018318176, "learning_rate": 7.750814127372131e-06, "loss": 0.5491, "step": 5290 }, { "epoch": 0.34, "grad_norm": 0.875048816204071, "learning_rate": 7.749957305845656e-06, "loss": 0.5582, "step": 5291 }, { "epoch": 0.34, "grad_norm": 0.9074432253837585, "learning_rate": 7.749100368528047e-06, "loss": 0.6511, "step": 5292 }, { "epoch": 0.34, "grad_norm": 0.8981906771659851, "learning_rate": 7.748243315455382e-06, "loss": 0.624, "step": 5293 }, { "epoch": 0.34, "grad_norm": 0.8196624517440796, "learning_rate": 7.747386146663753e-06, "loss": 0.5937, "step": 5294 }, { "epoch": 0.34, "grad_norm": 0.88856440782547, "learning_rate": 7.746528862189251e-06, "loss": 0.6291, "step": 5295 }, { "epoch": 0.34, "grad_norm": 0.8899400234222412, "learning_rate": 7.745671462067974e-06, "loss": 0.6181, "step": 5296 }, { "epoch": 0.34, "grad_norm": 0.910403847694397, "learning_rate": 7.74481394633602e-06, "loss": 0.628, "step": 5297 }, { "epoch": 0.34, "grad_norm": 0.9819753170013428, "learning_rate": 7.743956315029502e-06, "loss": 0.6307, "step": 5298 }, { "epoch": 0.34, "grad_norm": 0.9036092758178711, "learning_rate": 7.743098568184529e-06, "loss": 0.594, "step": 5299 }, { "epoch": 0.34, "grad_norm": 0.9001262784004211, "learning_rate": 7.742240705837217e-06, "loss": 0.5737, "step": 5300 }, { "epoch": 0.34, "grad_norm": 0.8720340132713318, "learning_rate": 7.741382728023687e-06, "loss": 0.6166, "step": 5301 }, { "epoch": 0.34, "grad_norm": 0.8694612383842468, "learning_rate": 7.74052463478007e-06, "loss": 0.5872, "step": 5302 }, { "epoch": 0.34, "grad_norm": 0.9097409844398499, "learning_rate": 7.739666426142493e-06, "loss": 0.5977, "step": 5303 }, { "epoch": 0.34, "grad_norm": 0.9092093706130981, "learning_rate": 7.738808102147093e-06, "loss": 0.5701, "step": 5304 }, { "epoch": 0.34, "grad_norm": 0.9413781762123108, "learning_rate": 7.737949662830012e-06, "loss": 0.6675, "step": 5305 }, { "epoch": 0.34, "grad_norm": 0.9250045418739319, "learning_rate": 7.737091108227395e-06, "loss": 0.6216, "step": 5306 }, { "epoch": 0.34, "grad_norm": 0.9547144174575806, "learning_rate": 7.736232438375391e-06, "loss": 0.6255, "step": 5307 }, { "epoch": 0.34, "grad_norm": 0.8688421845436096, "learning_rate": 7.735373653310161e-06, "loss": 0.5789, "step": 5308 }, { "epoch": 0.34, "grad_norm": 0.890227198600769, "learning_rate": 7.73451475306786e-06, "loss": 0.6464, "step": 5309 }, { "epoch": 0.34, "grad_norm": 0.922257661819458, "learning_rate": 7.733655737684657e-06, "loss": 0.5882, "step": 5310 }, { "epoch": 0.34, "grad_norm": 0.8927624821662903, "learning_rate": 7.732796607196719e-06, "loss": 0.6208, "step": 5311 }, { "epoch": 0.34, "grad_norm": 0.9111786484718323, "learning_rate": 7.731937361640223e-06, "loss": 0.6283, "step": 5312 }, { "epoch": 0.34, "grad_norm": 0.8802262544631958, "learning_rate": 7.73107800105135e-06, "loss": 0.6081, "step": 5313 }, { "epoch": 0.34, "grad_norm": 0.9143234491348267, "learning_rate": 7.730218525466283e-06, "loss": 0.6289, "step": 5314 }, { "epoch": 0.34, "grad_norm": 0.8903287649154663, "learning_rate": 7.729358934921209e-06, "loss": 0.6062, "step": 5315 }, { "epoch": 0.34, "grad_norm": 0.8877756595611572, "learning_rate": 7.728499229452326e-06, "loss": 0.5689, "step": 5316 }, { "epoch": 0.34, "grad_norm": 0.9442094564437866, "learning_rate": 7.727639409095833e-06, "loss": 0.6616, "step": 5317 }, { "epoch": 0.34, "grad_norm": 0.9445149302482605, "learning_rate": 7.726779473887933e-06, "loss": 0.6455, "step": 5318 }, { "epoch": 0.34, "grad_norm": 0.8811274766921997, "learning_rate": 7.725919423864837e-06, "loss": 0.6276, "step": 5319 }, { "epoch": 0.34, "grad_norm": 0.8983349800109863, "learning_rate": 7.725059259062753e-06, "loss": 0.6362, "step": 5320 }, { "epoch": 0.34, "grad_norm": 0.8891294002532959, "learning_rate": 7.724198979517905e-06, "loss": 0.59, "step": 5321 }, { "epoch": 0.34, "grad_norm": 0.9269400238990784, "learning_rate": 7.723338585266515e-06, "loss": 0.6242, "step": 5322 }, { "epoch": 0.34, "grad_norm": 0.8992114067077637, "learning_rate": 7.722478076344812e-06, "loss": 0.5932, "step": 5323 }, { "epoch": 0.34, "grad_norm": 0.9274572134017944, "learning_rate": 7.721617452789028e-06, "loss": 0.6501, "step": 5324 }, { "epoch": 0.34, "grad_norm": 0.8954104781150818, "learning_rate": 7.7207567146354e-06, "loss": 0.6378, "step": 5325 }, { "epoch": 0.34, "grad_norm": 0.9126365780830383, "learning_rate": 7.71989586192017e-06, "loss": 0.5861, "step": 5326 }, { "epoch": 0.34, "grad_norm": 0.9049072265625, "learning_rate": 7.719034894679589e-06, "loss": 0.6177, "step": 5327 }, { "epoch": 0.34, "grad_norm": 0.8507171273231506, "learning_rate": 7.718173812949908e-06, "loss": 0.5861, "step": 5328 }, { "epoch": 0.34, "grad_norm": 0.9381729960441589, "learning_rate": 7.717312616767382e-06, "loss": 0.6176, "step": 5329 }, { "epoch": 0.34, "grad_norm": 0.8493825793266296, "learning_rate": 7.716451306168276e-06, "loss": 0.6133, "step": 5330 }, { "epoch": 0.34, "grad_norm": 0.8845789432525635, "learning_rate": 7.715589881188852e-06, "loss": 0.5937, "step": 5331 }, { "epoch": 0.34, "grad_norm": 0.9143087863922119, "learning_rate": 7.71472834186539e-06, "loss": 0.5789, "step": 5332 }, { "epoch": 0.34, "grad_norm": 0.9418982863426208, "learning_rate": 7.713866688234157e-06, "loss": 0.6042, "step": 5333 }, { "epoch": 0.34, "grad_norm": 0.8240166902542114, "learning_rate": 7.713004920331441e-06, "loss": 0.6097, "step": 5334 }, { "epoch": 0.34, "grad_norm": 0.8444035649299622, "learning_rate": 7.712143038193525e-06, "loss": 0.5586, "step": 5335 }, { "epoch": 0.34, "grad_norm": 0.8501242995262146, "learning_rate": 7.7112810418567e-06, "loss": 0.5876, "step": 5336 }, { "epoch": 0.34, "grad_norm": 0.8870479464530945, "learning_rate": 7.710418931357263e-06, "loss": 0.6137, "step": 5337 }, { "epoch": 0.34, "grad_norm": 0.8917999267578125, "learning_rate": 7.709556706731514e-06, "loss": 0.6208, "step": 5338 }, { "epoch": 0.34, "grad_norm": 0.8900894522666931, "learning_rate": 7.708694368015758e-06, "loss": 0.5654, "step": 5339 }, { "epoch": 0.34, "grad_norm": 0.9891944527626038, "learning_rate": 7.707831915246304e-06, "loss": 0.5949, "step": 5340 }, { "epoch": 0.34, "grad_norm": 0.9004802703857422, "learning_rate": 7.706969348459469e-06, "loss": 0.6573, "step": 5341 }, { "epoch": 0.34, "grad_norm": 0.9725054502487183, "learning_rate": 7.70610666769157e-06, "loss": 0.6963, "step": 5342 }, { "epoch": 0.34, "grad_norm": 0.895476758480072, "learning_rate": 7.705243872978935e-06, "loss": 0.5893, "step": 5343 }, { "epoch": 0.34, "grad_norm": 0.898909866809845, "learning_rate": 7.704380964357889e-06, "loss": 0.5709, "step": 5344 }, { "epoch": 0.34, "grad_norm": 0.8911014795303345, "learning_rate": 7.70351794186477e-06, "loss": 0.631, "step": 5345 }, { "epoch": 0.34, "grad_norm": 0.8815633654594421, "learning_rate": 7.702654805535915e-06, "loss": 0.5953, "step": 5346 }, { "epoch": 0.34, "grad_norm": 0.8706081509590149, "learning_rate": 7.701791555407669e-06, "loss": 0.5798, "step": 5347 }, { "epoch": 0.34, "grad_norm": 0.839159369468689, "learning_rate": 7.700928191516378e-06, "loss": 0.6532, "step": 5348 }, { "epoch": 0.34, "grad_norm": 0.8313089609146118, "learning_rate": 7.700064713898398e-06, "loss": 0.6238, "step": 5349 }, { "epoch": 0.34, "grad_norm": 0.9056754112243652, "learning_rate": 7.699201122590086e-06, "loss": 0.6051, "step": 5350 }, { "epoch": 0.34, "grad_norm": 0.8567859530448914, "learning_rate": 7.6983374176278e-06, "loss": 0.6282, "step": 5351 }, { "epoch": 0.34, "grad_norm": 0.8340045809745789, "learning_rate": 7.697473599047918e-06, "loss": 0.605, "step": 5352 }, { "epoch": 0.34, "grad_norm": 0.8645469546318054, "learning_rate": 7.696609666886805e-06, "loss": 0.6075, "step": 5353 }, { "epoch": 0.34, "grad_norm": 0.8319426774978638, "learning_rate": 7.695745621180839e-06, "loss": 0.5254, "step": 5354 }, { "epoch": 0.34, "grad_norm": 0.824740469455719, "learning_rate": 7.694881461966402e-06, "loss": 0.636, "step": 5355 }, { "epoch": 0.34, "grad_norm": 0.8777102828025818, "learning_rate": 7.694017189279882e-06, "loss": 0.6096, "step": 5356 }, { "epoch": 0.34, "grad_norm": 0.8239105939865112, "learning_rate": 7.69315280315767e-06, "loss": 0.5593, "step": 5357 }, { "epoch": 0.34, "grad_norm": 0.9346814751625061, "learning_rate": 7.692288303636163e-06, "loss": 0.6136, "step": 5358 }, { "epoch": 0.34, "grad_norm": 0.8404369950294495, "learning_rate": 7.69142369075176e-06, "loss": 0.5977, "step": 5359 }, { "epoch": 0.34, "grad_norm": 0.8172876834869385, "learning_rate": 7.690558964540872e-06, "loss": 0.6095, "step": 5360 }, { "epoch": 0.34, "grad_norm": 0.913045346736908, "learning_rate": 7.6896941250399e-06, "loss": 0.5932, "step": 5361 }, { "epoch": 0.34, "grad_norm": 0.8815491795539856, "learning_rate": 7.688829172285267e-06, "loss": 0.6035, "step": 5362 }, { "epoch": 0.34, "grad_norm": 0.8652727007865906, "learning_rate": 7.687964106313392e-06, "loss": 0.5792, "step": 5363 }, { "epoch": 0.34, "grad_norm": 0.8789160847663879, "learning_rate": 7.687098927160701e-06, "loss": 0.6358, "step": 5364 }, { "epoch": 0.34, "grad_norm": 0.8862786889076233, "learning_rate": 7.68623363486362e-06, "loss": 0.6316, "step": 5365 }, { "epoch": 0.34, "grad_norm": 0.9177654981613159, "learning_rate": 7.685368229458584e-06, "loss": 0.5892, "step": 5366 }, { "epoch": 0.34, "grad_norm": 0.9102894067764282, "learning_rate": 7.684502710982035e-06, "loss": 0.6003, "step": 5367 }, { "epoch": 0.34, "grad_norm": 0.8236129283905029, "learning_rate": 7.683637079470418e-06, "loss": 0.6175, "step": 5368 }, { "epoch": 0.34, "grad_norm": 0.886927604675293, "learning_rate": 7.682771334960178e-06, "loss": 0.5757, "step": 5369 }, { "epoch": 0.34, "grad_norm": 0.8804916739463806, "learning_rate": 7.681905477487769e-06, "loss": 0.6581, "step": 5370 }, { "epoch": 0.34, "grad_norm": 0.8064201474189758, "learning_rate": 7.68103950708965e-06, "loss": 0.5431, "step": 5371 }, { "epoch": 0.34, "grad_norm": 0.8837984204292297, "learning_rate": 7.680173423802282e-06, "loss": 0.6277, "step": 5372 }, { "epoch": 0.34, "grad_norm": 0.9009150266647339, "learning_rate": 7.679307227662136e-06, "loss": 0.6023, "step": 5373 }, { "epoch": 0.34, "grad_norm": 0.9290765523910522, "learning_rate": 7.678440918705686e-06, "loss": 0.6381, "step": 5374 }, { "epoch": 0.34, "grad_norm": 0.9562059640884399, "learning_rate": 7.677574496969404e-06, "loss": 0.6542, "step": 5375 }, { "epoch": 0.34, "grad_norm": 0.9217070937156677, "learning_rate": 7.676707962489775e-06, "loss": 0.6375, "step": 5376 }, { "epoch": 0.34, "grad_norm": 0.8787111043930054, "learning_rate": 7.675841315303284e-06, "loss": 0.6749, "step": 5377 }, { "epoch": 0.34, "grad_norm": 0.9050287008285522, "learning_rate": 7.674974555446425e-06, "loss": 0.6174, "step": 5378 }, { "epoch": 0.34, "grad_norm": 0.8558552861213684, "learning_rate": 7.674107682955693e-06, "loss": 0.5902, "step": 5379 }, { "epoch": 0.34, "grad_norm": 0.8936824202537537, "learning_rate": 7.67324069786759e-06, "loss": 0.6121, "step": 5380 }, { "epoch": 0.34, "grad_norm": 0.9137732982635498, "learning_rate": 7.67237360021862e-06, "loss": 0.6546, "step": 5381 }, { "epoch": 0.34, "grad_norm": 0.9589877128601074, "learning_rate": 7.671506390045293e-06, "loss": 0.6522, "step": 5382 }, { "epoch": 0.34, "grad_norm": 0.9142245054244995, "learning_rate": 7.670639067384126e-06, "loss": 0.6284, "step": 5383 }, { "epoch": 0.34, "grad_norm": 0.8741958141326904, "learning_rate": 7.66977163227164e-06, "loss": 0.5957, "step": 5384 }, { "epoch": 0.34, "grad_norm": 0.9198216795921326, "learning_rate": 7.668904084744357e-06, "loss": 0.5629, "step": 5385 }, { "epoch": 0.34, "grad_norm": 0.8666446805000305, "learning_rate": 7.668036424838808e-06, "loss": 0.5829, "step": 5386 }, { "epoch": 0.34, "grad_norm": 0.8472068309783936, "learning_rate": 7.667168652591524e-06, "loss": 0.6183, "step": 5387 }, { "epoch": 0.34, "grad_norm": 0.960817277431488, "learning_rate": 7.66630076803905e-06, "loss": 0.6487, "step": 5388 }, { "epoch": 0.34, "grad_norm": 0.8368389010429382, "learning_rate": 7.665432771217922e-06, "loss": 0.5899, "step": 5389 }, { "epoch": 0.34, "grad_norm": 0.8463855385780334, "learning_rate": 7.664564662164696e-06, "loss": 0.6046, "step": 5390 }, { "epoch": 0.34, "grad_norm": 0.9495236277580261, "learning_rate": 7.66369644091592e-06, "loss": 0.6498, "step": 5391 }, { "epoch": 0.34, "grad_norm": 0.8692662119865417, "learning_rate": 7.662828107508153e-06, "loss": 0.6034, "step": 5392 }, { "epoch": 0.34, "grad_norm": 0.8595423698425293, "learning_rate": 7.661959661977958e-06, "loss": 0.5903, "step": 5393 }, { "epoch": 0.34, "grad_norm": 0.9107503890991211, "learning_rate": 7.661091104361902e-06, "loss": 0.6285, "step": 5394 }, { "epoch": 0.34, "grad_norm": 0.8617141842842102, "learning_rate": 7.660222434696556e-06, "loss": 0.6372, "step": 5395 }, { "epoch": 0.34, "grad_norm": 0.8542279005050659, "learning_rate": 7.6593536530185e-06, "loss": 0.6045, "step": 5396 }, { "epoch": 0.34, "grad_norm": 0.9125630855560303, "learning_rate": 7.658484759364308e-06, "loss": 0.6111, "step": 5397 }, { "epoch": 0.34, "grad_norm": 0.9282498359680176, "learning_rate": 7.657615753770575e-06, "loss": 0.6401, "step": 5398 }, { "epoch": 0.34, "grad_norm": 0.760006844997406, "learning_rate": 7.656746636273889e-06, "loss": 0.5258, "step": 5399 }, { "epoch": 0.34, "grad_norm": 0.8629961013793945, "learning_rate": 7.655877406910841e-06, "loss": 0.5787, "step": 5400 }, { "epoch": 0.34, "grad_norm": 0.9403144121170044, "learning_rate": 7.655008065718036e-06, "loss": 0.6448, "step": 5401 }, { "epoch": 0.34, "grad_norm": 0.8610935211181641, "learning_rate": 7.654138612732078e-06, "loss": 0.6125, "step": 5402 }, { "epoch": 0.34, "grad_norm": 0.9002783298492432, "learning_rate": 7.653269047989575e-06, "loss": 0.5981, "step": 5403 }, { "epoch": 0.34, "grad_norm": 0.8649095892906189, "learning_rate": 7.652399371527142e-06, "loss": 0.6343, "step": 5404 }, { "epoch": 0.34, "grad_norm": 0.9302815794944763, "learning_rate": 7.651529583381398e-06, "loss": 0.6527, "step": 5405 }, { "epoch": 0.34, "grad_norm": 0.9225360751152039, "learning_rate": 7.65065968358897e-06, "loss": 0.6909, "step": 5406 }, { "epoch": 0.34, "grad_norm": 0.9352942109107971, "learning_rate": 7.649789672186483e-06, "loss": 0.6563, "step": 5407 }, { "epoch": 0.34, "grad_norm": 0.9485490918159485, "learning_rate": 7.648919549210567e-06, "loss": 0.6339, "step": 5408 }, { "epoch": 0.34, "grad_norm": 0.8463318347930908, "learning_rate": 7.648049314697869e-06, "loss": 0.5555, "step": 5409 }, { "epoch": 0.34, "grad_norm": 0.8683443069458008, "learning_rate": 7.647178968685024e-06, "loss": 0.5861, "step": 5410 }, { "epoch": 0.34, "grad_norm": 0.95866459608078, "learning_rate": 7.646308511208682e-06, "loss": 0.6818, "step": 5411 }, { "epoch": 0.34, "grad_norm": 0.856253981590271, "learning_rate": 7.645437942305491e-06, "loss": 0.5561, "step": 5412 }, { "epoch": 0.34, "grad_norm": 0.9046028852462769, "learning_rate": 7.644567262012115e-06, "loss": 0.6405, "step": 5413 }, { "epoch": 0.34, "grad_norm": 0.8811362981796265, "learning_rate": 7.643696470365209e-06, "loss": 0.6266, "step": 5414 }, { "epoch": 0.34, "grad_norm": 0.8369075059890747, "learning_rate": 7.642825567401444e-06, "loss": 0.5538, "step": 5415 }, { "epoch": 0.34, "grad_norm": 0.9165283441543579, "learning_rate": 7.641954553157487e-06, "loss": 0.5952, "step": 5416 }, { "epoch": 0.34, "grad_norm": 0.8416288495063782, "learning_rate": 7.641083427670014e-06, "loss": 0.6023, "step": 5417 }, { "epoch": 0.34, "grad_norm": 0.8895038962364197, "learning_rate": 7.640212190975707e-06, "loss": 0.5969, "step": 5418 }, { "epoch": 0.34, "grad_norm": 0.8565618991851807, "learning_rate": 7.639340843111247e-06, "loss": 0.5769, "step": 5419 }, { "epoch": 0.34, "grad_norm": 0.825664222240448, "learning_rate": 7.638469384113328e-06, "loss": 0.5199, "step": 5420 }, { "epoch": 0.34, "grad_norm": 0.8779264092445374, "learning_rate": 7.637597814018638e-06, "loss": 0.5795, "step": 5421 }, { "epoch": 0.34, "grad_norm": 0.8773237466812134, "learning_rate": 7.636726132863883e-06, "loss": 0.6019, "step": 5422 }, { "epoch": 0.34, "grad_norm": 0.9273678660392761, "learning_rate": 7.635854340685762e-06, "loss": 0.6015, "step": 5423 }, { "epoch": 0.34, "grad_norm": 0.9024190902709961, "learning_rate": 7.634982437520984e-06, "loss": 0.5763, "step": 5424 }, { "epoch": 0.34, "grad_norm": 0.8656637668609619, "learning_rate": 7.634110423406262e-06, "loss": 0.5785, "step": 5425 }, { "epoch": 0.34, "grad_norm": 0.8862728476524353, "learning_rate": 7.633238298378315e-06, "loss": 0.6294, "step": 5426 }, { "epoch": 0.34, "grad_norm": 0.8349065184593201, "learning_rate": 7.632366062473862e-06, "loss": 0.5862, "step": 5427 }, { "epoch": 0.34, "grad_norm": 0.8949868083000183, "learning_rate": 7.631493715729632e-06, "loss": 0.5676, "step": 5428 }, { "epoch": 0.34, "grad_norm": 0.897675633430481, "learning_rate": 7.630621258182354e-06, "loss": 0.5963, "step": 5429 }, { "epoch": 0.34, "grad_norm": 0.8373680114746094, "learning_rate": 7.62974868986877e-06, "loss": 0.5706, "step": 5430 }, { "epoch": 0.34, "grad_norm": 0.9069997072219849, "learning_rate": 7.628876010825614e-06, "loss": 0.6501, "step": 5431 }, { "epoch": 0.34, "grad_norm": 0.8189912438392639, "learning_rate": 7.628003221089635e-06, "loss": 0.5475, "step": 5432 }, { "epoch": 0.34, "grad_norm": 0.9497076869010925, "learning_rate": 7.6271303206975825e-06, "loss": 0.6459, "step": 5433 }, { "epoch": 0.34, "grad_norm": 0.8492891788482666, "learning_rate": 7.626257309686211e-06, "loss": 0.5883, "step": 5434 }, { "epoch": 0.34, "grad_norm": 0.8823180198669434, "learning_rate": 7.6253841880922805e-06, "loss": 0.5968, "step": 5435 }, { "epoch": 0.34, "grad_norm": 0.8924271464347839, "learning_rate": 7.624510955952555e-06, "loss": 0.5706, "step": 5436 }, { "epoch": 0.34, "grad_norm": 0.8900327682495117, "learning_rate": 7.623637613303805e-06, "loss": 0.5903, "step": 5437 }, { "epoch": 0.34, "grad_norm": 0.8470126986503601, "learning_rate": 7.6227641601827996e-06, "loss": 0.553, "step": 5438 }, { "epoch": 0.34, "grad_norm": 0.8747822642326355, "learning_rate": 7.62189059662632e-06, "loss": 0.6095, "step": 5439 }, { "epoch": 0.34, "grad_norm": 0.8955729603767395, "learning_rate": 7.621016922671147e-06, "loss": 0.5983, "step": 5440 }, { "epoch": 0.34, "grad_norm": 0.8502835631370544, "learning_rate": 7.620143138354072e-06, "loss": 0.5978, "step": 5441 }, { "epoch": 0.34, "grad_norm": 0.8627199530601501, "learning_rate": 7.6192692437118825e-06, "loss": 0.6227, "step": 5442 }, { "epoch": 0.34, "grad_norm": 0.930798351764679, "learning_rate": 7.618395238781377e-06, "loss": 0.6769, "step": 5443 }, { "epoch": 0.34, "grad_norm": 0.889930009841919, "learning_rate": 7.617521123599356e-06, "loss": 0.6135, "step": 5444 }, { "epoch": 0.34, "grad_norm": 0.910830557346344, "learning_rate": 7.616646898202629e-06, "loss": 0.6337, "step": 5445 }, { "epoch": 0.35, "grad_norm": 0.867741048336029, "learning_rate": 7.6157725626280014e-06, "loss": 0.5566, "step": 5446 }, { "epoch": 0.35, "grad_norm": 0.8112003207206726, "learning_rate": 7.61489811691229e-06, "loss": 0.57, "step": 5447 }, { "epoch": 0.35, "grad_norm": 0.9317660927772522, "learning_rate": 7.614023561092319e-06, "loss": 0.6141, "step": 5448 }, { "epoch": 0.35, "grad_norm": 0.8938388228416443, "learning_rate": 7.613148895204906e-06, "loss": 0.6114, "step": 5449 }, { "epoch": 0.35, "grad_norm": 0.8985342979431152, "learning_rate": 7.612274119286884e-06, "loss": 0.626, "step": 5450 }, { "epoch": 0.35, "grad_norm": 0.9427514672279358, "learning_rate": 7.611399233375087e-06, "loss": 0.6303, "step": 5451 }, { "epoch": 0.35, "grad_norm": 0.9037792682647705, "learning_rate": 7.610524237506354e-06, "loss": 0.6456, "step": 5452 }, { "epoch": 0.35, "grad_norm": 0.8891815543174744, "learning_rate": 7.6096491317175246e-06, "loss": 0.6235, "step": 5453 }, { "epoch": 0.35, "grad_norm": 0.8519503474235535, "learning_rate": 7.608773916045449e-06, "loss": 0.5835, "step": 5454 }, { "epoch": 0.35, "grad_norm": 0.8248928785324097, "learning_rate": 7.607898590526979e-06, "loss": 0.5891, "step": 5455 }, { "epoch": 0.35, "grad_norm": 0.8195099234580994, "learning_rate": 7.607023155198973e-06, "loss": 0.5548, "step": 5456 }, { "epoch": 0.35, "grad_norm": 0.8967714309692383, "learning_rate": 7.606147610098289e-06, "loss": 0.6207, "step": 5457 }, { "epoch": 0.35, "grad_norm": 0.8687184453010559, "learning_rate": 7.605271955261796e-06, "loss": 0.6149, "step": 5458 }, { "epoch": 0.35, "grad_norm": 0.9524543285369873, "learning_rate": 7.604396190726364e-06, "loss": 0.5933, "step": 5459 }, { "epoch": 0.35, "grad_norm": 0.9514956474304199, "learning_rate": 7.603520316528869e-06, "loss": 0.6466, "step": 5460 }, { "epoch": 0.35, "grad_norm": 0.89705491065979, "learning_rate": 7.60264433270619e-06, "loss": 0.608, "step": 5461 }, { "epoch": 0.35, "grad_norm": 0.803554117679596, "learning_rate": 7.601768239295213e-06, "loss": 0.5203, "step": 5462 }, { "epoch": 0.35, "grad_norm": 0.9711521863937378, "learning_rate": 7.600892036332825e-06, "loss": 0.6572, "step": 5463 }, { "epoch": 0.35, "grad_norm": 0.8963906168937683, "learning_rate": 7.600015723855922e-06, "loss": 0.6329, "step": 5464 }, { "epoch": 0.35, "grad_norm": 0.9670395255088806, "learning_rate": 7.599139301901401e-06, "loss": 0.6172, "step": 5465 }, { "epoch": 0.35, "grad_norm": 0.9355558156967163, "learning_rate": 7.5982627705061666e-06, "loss": 0.6574, "step": 5466 }, { "epoch": 0.35, "grad_norm": 0.8632118105888367, "learning_rate": 7.597386129707126e-06, "loss": 0.6021, "step": 5467 }, { "epoch": 0.35, "grad_norm": 0.8859368562698364, "learning_rate": 7.596509379541191e-06, "loss": 0.5763, "step": 5468 }, { "epoch": 0.35, "grad_norm": 0.8613402843475342, "learning_rate": 7.595632520045277e-06, "loss": 0.6077, "step": 5469 }, { "epoch": 0.35, "grad_norm": 0.8863072395324707, "learning_rate": 7.594755551256308e-06, "loss": 0.5881, "step": 5470 }, { "epoch": 0.35, "grad_norm": 0.9150487780570984, "learning_rate": 7.593878473211209e-06, "loss": 0.6351, "step": 5471 }, { "epoch": 0.35, "grad_norm": 0.8424960970878601, "learning_rate": 7.593001285946913e-06, "loss": 0.5416, "step": 5472 }, { "epoch": 0.35, "grad_norm": 0.8403632640838623, "learning_rate": 7.592123989500351e-06, "loss": 0.6015, "step": 5473 }, { "epoch": 0.35, "grad_norm": 0.9487394094467163, "learning_rate": 7.591246583908465e-06, "loss": 0.6518, "step": 5474 }, { "epoch": 0.35, "grad_norm": 0.915139377117157, "learning_rate": 7.590369069208201e-06, "loss": 0.6258, "step": 5475 }, { "epoch": 0.35, "grad_norm": 0.8754032254219055, "learning_rate": 7.589491445436505e-06, "loss": 0.6127, "step": 5476 }, { "epoch": 0.35, "grad_norm": 0.8421617746353149, "learning_rate": 7.588613712630334e-06, "loss": 0.5761, "step": 5477 }, { "epoch": 0.35, "grad_norm": 0.8702454566955566, "learning_rate": 7.587735870826643e-06, "loss": 0.5819, "step": 5478 }, { "epoch": 0.35, "grad_norm": 0.8792976140975952, "learning_rate": 7.586857920062399e-06, "loss": 0.6374, "step": 5479 }, { "epoch": 0.35, "grad_norm": 0.9013099074363708, "learning_rate": 7.585979860374566e-06, "loss": 0.6053, "step": 5480 }, { "epoch": 0.35, "grad_norm": 0.8370474576950073, "learning_rate": 7.5851016918001165e-06, "loss": 0.5803, "step": 5481 }, { "epoch": 0.35, "grad_norm": 0.8300336003303528, "learning_rate": 7.584223414376028e-06, "loss": 0.5983, "step": 5482 }, { "epoch": 0.35, "grad_norm": 0.9231306910514832, "learning_rate": 7.583345028139282e-06, "loss": 0.6231, "step": 5483 }, { "epoch": 0.35, "grad_norm": 0.8919202089309692, "learning_rate": 7.582466533126863e-06, "loss": 0.6033, "step": 5484 }, { "epoch": 0.35, "grad_norm": 0.8878291845321655, "learning_rate": 7.581587929375761e-06, "loss": 0.6483, "step": 5485 }, { "epoch": 0.35, "grad_norm": 0.7955220341682434, "learning_rate": 7.580709216922973e-06, "loss": 0.6065, "step": 5486 }, { "epoch": 0.35, "grad_norm": 0.9067592620849609, "learning_rate": 7.579830395805499e-06, "loss": 0.6261, "step": 5487 }, { "epoch": 0.35, "grad_norm": 0.9961644411087036, "learning_rate": 7.578951466060341e-06, "loss": 0.6041, "step": 5488 }, { "epoch": 0.35, "grad_norm": 0.8630528450012207, "learning_rate": 7.578072427724506e-06, "loss": 0.5756, "step": 5489 }, { "epoch": 0.35, "grad_norm": 0.8708525896072388, "learning_rate": 7.577193280835011e-06, "loss": 0.6126, "step": 5490 }, { "epoch": 0.35, "grad_norm": 0.8305570483207703, "learning_rate": 7.5763140254288716e-06, "loss": 0.5874, "step": 5491 }, { "epoch": 0.35, "grad_norm": 0.9040376543998718, "learning_rate": 7.575434661543113e-06, "loss": 0.6401, "step": 5492 }, { "epoch": 0.35, "grad_norm": 0.9144179224967957, "learning_rate": 7.574555189214756e-06, "loss": 0.6298, "step": 5493 }, { "epoch": 0.35, "grad_norm": 0.9132001399993896, "learning_rate": 7.573675608480841e-06, "loss": 0.5974, "step": 5494 }, { "epoch": 0.35, "grad_norm": 0.8850140571594238, "learning_rate": 7.5727959193783974e-06, "loss": 0.6335, "step": 5495 }, { "epoch": 0.35, "grad_norm": 0.9785036444664001, "learning_rate": 7.571916121944467e-06, "loss": 0.5492, "step": 5496 }, { "epoch": 0.35, "grad_norm": 0.8609431385993958, "learning_rate": 7.571036216216097e-06, "loss": 0.5885, "step": 5497 }, { "epoch": 0.35, "grad_norm": 0.9320406317710876, "learning_rate": 7.570156202230335e-06, "loss": 0.6477, "step": 5498 }, { "epoch": 0.35, "grad_norm": 0.8788042664527893, "learning_rate": 7.569276080024237e-06, "loss": 0.6574, "step": 5499 }, { "epoch": 0.35, "grad_norm": 0.8510634899139404, "learning_rate": 7.5683958496348596e-06, "loss": 0.6256, "step": 5500 }, { "epoch": 0.35, "grad_norm": 0.8864413499832153, "learning_rate": 7.567515511099268e-06, "loss": 0.5793, "step": 5501 }, { "epoch": 0.35, "grad_norm": 0.860865592956543, "learning_rate": 7.56663506445453e-06, "loss": 0.6229, "step": 5502 }, { "epoch": 0.35, "grad_norm": 0.913250744342804, "learning_rate": 7.5657545097377205e-06, "loss": 0.5914, "step": 5503 }, { "epoch": 0.35, "grad_norm": 0.8102872967720032, "learning_rate": 7.564873846985912e-06, "loss": 0.5833, "step": 5504 }, { "epoch": 0.35, "grad_norm": 0.8643232583999634, "learning_rate": 7.563993076236189e-06, "loss": 0.6061, "step": 5505 }, { "epoch": 0.35, "grad_norm": 0.9757564067840576, "learning_rate": 7.563112197525637e-06, "loss": 0.6515, "step": 5506 }, { "epoch": 0.35, "grad_norm": 0.8703305721282959, "learning_rate": 7.562231210891347e-06, "loss": 0.5819, "step": 5507 }, { "epoch": 0.35, "grad_norm": 0.8819752931594849, "learning_rate": 7.561350116370413e-06, "loss": 0.5966, "step": 5508 }, { "epoch": 0.35, "grad_norm": 0.8967403173446655, "learning_rate": 7.560468913999937e-06, "loss": 0.6338, "step": 5509 }, { "epoch": 0.35, "grad_norm": 0.8586651682853699, "learning_rate": 7.559587603817022e-06, "loss": 0.6135, "step": 5510 }, { "epoch": 0.35, "grad_norm": 0.8888817429542542, "learning_rate": 7.558706185858777e-06, "loss": 0.6236, "step": 5511 }, { "epoch": 0.35, "grad_norm": 0.8927393555641174, "learning_rate": 7.557824660162316e-06, "loss": 0.6012, "step": 5512 }, { "epoch": 0.35, "grad_norm": 0.9518846273422241, "learning_rate": 7.556943026764756e-06, "loss": 0.5581, "step": 5513 }, { "epoch": 0.35, "grad_norm": 0.8957030773162842, "learning_rate": 7.55606128570322e-06, "loss": 0.5849, "step": 5514 }, { "epoch": 0.35, "grad_norm": 0.9107878804206848, "learning_rate": 7.5551794370148366e-06, "loss": 0.6504, "step": 5515 }, { "epoch": 0.35, "grad_norm": 0.8559346795082092, "learning_rate": 7.554297480736734e-06, "loss": 0.5891, "step": 5516 }, { "epoch": 0.35, "grad_norm": 0.8798370361328125, "learning_rate": 7.553415416906051e-06, "loss": 0.6028, "step": 5517 }, { "epoch": 0.35, "grad_norm": 0.9414769411087036, "learning_rate": 7.552533245559927e-06, "loss": 0.6174, "step": 5518 }, { "epoch": 0.35, "grad_norm": 0.8583175539970398, "learning_rate": 7.551650966735509e-06, "loss": 0.5641, "step": 5519 }, { "epoch": 0.35, "grad_norm": 0.8779864311218262, "learning_rate": 7.550768580469945e-06, "loss": 0.6283, "step": 5520 }, { "epoch": 0.35, "grad_norm": 0.8857389092445374, "learning_rate": 7.549886086800389e-06, "loss": 0.5855, "step": 5521 }, { "epoch": 0.35, "grad_norm": 0.8128264546394348, "learning_rate": 7.549003485763999e-06, "loss": 0.4986, "step": 5522 }, { "epoch": 0.35, "grad_norm": 0.9185560941696167, "learning_rate": 7.548120777397941e-06, "loss": 0.6204, "step": 5523 }, { "epoch": 0.35, "grad_norm": 0.9126561284065247, "learning_rate": 7.547237961739382e-06, "loss": 0.6516, "step": 5524 }, { "epoch": 0.35, "grad_norm": 0.8364182114601135, "learning_rate": 7.546355038825492e-06, "loss": 0.573, "step": 5525 }, { "epoch": 0.35, "grad_norm": 0.8545491099357605, "learning_rate": 7.545472008693451e-06, "loss": 0.6251, "step": 5526 }, { "epoch": 0.35, "grad_norm": 0.9368882775306702, "learning_rate": 7.544588871380439e-06, "loss": 0.6421, "step": 5527 }, { "epoch": 0.35, "grad_norm": 0.8525586128234863, "learning_rate": 7.54370562692364e-06, "loss": 0.6311, "step": 5528 }, { "epoch": 0.35, "grad_norm": 0.8583645820617676, "learning_rate": 7.542822275360246e-06, "loss": 0.6295, "step": 5529 }, { "epoch": 0.35, "grad_norm": 0.9096074104309082, "learning_rate": 7.541938816727453e-06, "loss": 0.6628, "step": 5530 }, { "epoch": 0.35, "grad_norm": 0.8697735071182251, "learning_rate": 7.5410552510624594e-06, "loss": 0.5986, "step": 5531 }, { "epoch": 0.35, "grad_norm": 0.869107186794281, "learning_rate": 7.540171578402466e-06, "loss": 0.6293, "step": 5532 }, { "epoch": 0.35, "grad_norm": 0.8785176873207092, "learning_rate": 7.539287798784688e-06, "loss": 0.5971, "step": 5533 }, { "epoch": 0.35, "grad_norm": 0.9223856329917908, "learning_rate": 7.538403912246333e-06, "loss": 0.5812, "step": 5534 }, { "epoch": 0.35, "grad_norm": 0.8824152946472168, "learning_rate": 7.537519918824619e-06, "loss": 0.5718, "step": 5535 }, { "epoch": 0.35, "grad_norm": 0.8068228960037231, "learning_rate": 7.5366358185567676e-06, "loss": 0.5295, "step": 5536 }, { "epoch": 0.35, "grad_norm": 0.8570433259010315, "learning_rate": 7.5357516114800075e-06, "loss": 0.6182, "step": 5537 }, { "epoch": 0.35, "grad_norm": 0.8204308748245239, "learning_rate": 7.534867297631569e-06, "loss": 0.5698, "step": 5538 }, { "epoch": 0.35, "grad_norm": 0.9290466904640198, "learning_rate": 7.533982877048685e-06, "loss": 0.6612, "step": 5539 }, { "epoch": 0.35, "grad_norm": 0.925410807132721, "learning_rate": 7.5330983497685975e-06, "loss": 0.6015, "step": 5540 }, { "epoch": 0.35, "grad_norm": 0.8890109658241272, "learning_rate": 7.532213715828551e-06, "loss": 0.5958, "step": 5541 }, { "epoch": 0.35, "grad_norm": 0.8188264966011047, "learning_rate": 7.531328975265795e-06, "loss": 0.6184, "step": 5542 }, { "epoch": 0.35, "grad_norm": 0.8602173328399658, "learning_rate": 7.53044412811758e-06, "loss": 0.5672, "step": 5543 }, { "epoch": 0.35, "grad_norm": 0.8795886039733887, "learning_rate": 7.529559174421167e-06, "loss": 0.6153, "step": 5544 }, { "epoch": 0.35, "grad_norm": 0.8424326777458191, "learning_rate": 7.528674114213816e-06, "loss": 0.6177, "step": 5545 }, { "epoch": 0.35, "grad_norm": 0.9181726574897766, "learning_rate": 7.527788947532795e-06, "loss": 0.6457, "step": 5546 }, { "epoch": 0.35, "grad_norm": 0.980117678642273, "learning_rate": 7.526903674415373e-06, "loss": 0.6007, "step": 5547 }, { "epoch": 0.35, "grad_norm": 0.9220601916313171, "learning_rate": 7.526018294898832e-06, "loss": 0.6301, "step": 5548 }, { "epoch": 0.35, "grad_norm": 0.9367707371711731, "learning_rate": 7.525132809020443e-06, "loss": 0.5758, "step": 5549 }, { "epoch": 0.35, "grad_norm": 0.8081425428390503, "learning_rate": 7.524247216817499e-06, "loss": 0.5754, "step": 5550 }, { "epoch": 0.35, "grad_norm": 0.8742004632949829, "learning_rate": 7.5233615183272836e-06, "loss": 0.5852, "step": 5551 }, { "epoch": 0.35, "grad_norm": 0.9098623394966125, "learning_rate": 7.522475713587095e-06, "loss": 0.6307, "step": 5552 }, { "epoch": 0.35, "grad_norm": 0.8602703809738159, "learning_rate": 7.521589802634228e-06, "loss": 0.633, "step": 5553 }, { "epoch": 0.35, "grad_norm": 0.8878544569015503, "learning_rate": 7.520703785505987e-06, "loss": 0.6116, "step": 5554 }, { "epoch": 0.35, "grad_norm": 0.8622645735740662, "learning_rate": 7.519817662239678e-06, "loss": 0.5874, "step": 5555 }, { "epoch": 0.35, "grad_norm": 0.8816009759902954, "learning_rate": 7.518931432872614e-06, "loss": 0.6309, "step": 5556 }, { "epoch": 0.35, "grad_norm": 0.8535116910934448, "learning_rate": 7.518045097442111e-06, "loss": 0.6286, "step": 5557 }, { "epoch": 0.35, "grad_norm": 0.9062272310256958, "learning_rate": 7.517158655985483e-06, "loss": 0.6187, "step": 5558 }, { "epoch": 0.35, "grad_norm": 0.9796926975250244, "learning_rate": 7.516272108540066e-06, "loss": 0.6543, "step": 5559 }, { "epoch": 0.35, "grad_norm": 0.9051242470741272, "learning_rate": 7.515385455143183e-06, "loss": 0.6491, "step": 5560 }, { "epoch": 0.35, "grad_norm": 0.935102105140686, "learning_rate": 7.514498695832169e-06, "loss": 0.6305, "step": 5561 }, { "epoch": 0.35, "grad_norm": 0.8482328653335571, "learning_rate": 7.51361183064436e-06, "loss": 0.6013, "step": 5562 }, { "epoch": 0.35, "grad_norm": 0.9039483070373535, "learning_rate": 7.512724859617103e-06, "loss": 0.591, "step": 5563 }, { "epoch": 0.35, "grad_norm": 0.924065113067627, "learning_rate": 7.511837782787743e-06, "loss": 0.5901, "step": 5564 }, { "epoch": 0.35, "grad_norm": 0.8983739614486694, "learning_rate": 7.510950600193632e-06, "loss": 0.5847, "step": 5565 }, { "epoch": 0.35, "grad_norm": 0.8916130065917969, "learning_rate": 7.510063311872125e-06, "loss": 0.5815, "step": 5566 }, { "epoch": 0.35, "grad_norm": 0.9395748972892761, "learning_rate": 7.509175917860586e-06, "loss": 0.6353, "step": 5567 }, { "epoch": 0.35, "grad_norm": 0.8860333561897278, "learning_rate": 7.508288418196377e-06, "loss": 0.6206, "step": 5568 }, { "epoch": 0.35, "grad_norm": 0.8644207715988159, "learning_rate": 7.507400812916868e-06, "loss": 0.5608, "step": 5569 }, { "epoch": 0.35, "grad_norm": 0.8900479674339294, "learning_rate": 7.5065131020594316e-06, "loss": 0.6308, "step": 5570 }, { "epoch": 0.35, "grad_norm": 0.862021267414093, "learning_rate": 7.5056252856614505e-06, "loss": 0.5858, "step": 5571 }, { "epoch": 0.35, "grad_norm": 0.90825355052948, "learning_rate": 7.504737363760306e-06, "loss": 0.6993, "step": 5572 }, { "epoch": 0.35, "grad_norm": 0.9253191351890564, "learning_rate": 7.503849336393382e-06, "loss": 0.6081, "step": 5573 }, { "epoch": 0.35, "grad_norm": 0.9334720969200134, "learning_rate": 7.502961203598074e-06, "loss": 0.6203, "step": 5574 }, { "epoch": 0.35, "grad_norm": 0.861369252204895, "learning_rate": 7.502072965411776e-06, "loss": 0.5873, "step": 5575 }, { "epoch": 0.35, "grad_norm": 0.941525399684906, "learning_rate": 7.501184621871891e-06, "loss": 0.5849, "step": 5576 }, { "epoch": 0.35, "grad_norm": 0.9132643342018127, "learning_rate": 7.5002961730158204e-06, "loss": 0.5786, "step": 5577 }, { "epoch": 0.35, "grad_norm": 0.8970744013786316, "learning_rate": 7.499407618880979e-06, "loss": 0.6126, "step": 5578 }, { "epoch": 0.35, "grad_norm": 0.8514313697814941, "learning_rate": 7.498518959504775e-06, "loss": 0.6322, "step": 5579 }, { "epoch": 0.35, "grad_norm": 0.8997253775596619, "learning_rate": 7.49763019492463e-06, "loss": 0.6461, "step": 5580 }, { "epoch": 0.35, "grad_norm": 0.8769670128822327, "learning_rate": 7.4967413251779655e-06, "loss": 0.6362, "step": 5581 }, { "epoch": 0.35, "grad_norm": 0.928396999835968, "learning_rate": 7.495852350302209e-06, "loss": 0.6229, "step": 5582 }, { "epoch": 0.35, "grad_norm": 0.8975219130516052, "learning_rate": 7.494963270334794e-06, "loss": 0.6457, "step": 5583 }, { "epoch": 0.35, "grad_norm": 0.8608077764511108, "learning_rate": 7.494074085313155e-06, "loss": 0.5369, "step": 5584 }, { "epoch": 0.35, "grad_norm": 0.917822003364563, "learning_rate": 7.493184795274731e-06, "loss": 0.6064, "step": 5585 }, { "epoch": 0.35, "grad_norm": 0.9204185605049133, "learning_rate": 7.49229540025697e-06, "loss": 0.7078, "step": 5586 }, { "epoch": 0.35, "grad_norm": 0.8705748915672302, "learning_rate": 7.4914059002973185e-06, "loss": 0.6384, "step": 5587 }, { "epoch": 0.35, "grad_norm": 0.8483352661132812, "learning_rate": 7.490516295433232e-06, "loss": 0.5437, "step": 5588 }, { "epoch": 0.35, "grad_norm": 0.8893619179725647, "learning_rate": 7.489626585702169e-06, "loss": 0.5999, "step": 5589 }, { "epoch": 0.35, "grad_norm": 0.8645599484443665, "learning_rate": 7.4887367711415905e-06, "loss": 0.6121, "step": 5590 }, { "epoch": 0.35, "grad_norm": 0.8719490766525269, "learning_rate": 7.487846851788965e-06, "loss": 0.6051, "step": 5591 }, { "epoch": 0.35, "grad_norm": 0.9109401702880859, "learning_rate": 7.486956827681761e-06, "loss": 0.616, "step": 5592 }, { "epoch": 0.35, "grad_norm": 0.9400895237922668, "learning_rate": 7.4860666988574585e-06, "loss": 0.6035, "step": 5593 }, { "epoch": 0.35, "grad_norm": 0.8858636021614075, "learning_rate": 7.485176465353534e-06, "loss": 0.5885, "step": 5594 }, { "epoch": 0.35, "grad_norm": 0.7887114882469177, "learning_rate": 7.484286127207476e-06, "loss": 0.5434, "step": 5595 }, { "epoch": 0.35, "grad_norm": 0.8736209869384766, "learning_rate": 7.48339568445677e-06, "loss": 0.6051, "step": 5596 }, { "epoch": 0.35, "grad_norm": 0.8536117672920227, "learning_rate": 7.482505137138911e-06, "loss": 0.6083, "step": 5597 }, { "epoch": 0.35, "grad_norm": 0.9169653654098511, "learning_rate": 7.4816144852913975e-06, "loss": 0.6361, "step": 5598 }, { "epoch": 0.35, "grad_norm": 0.9062714576721191, "learning_rate": 7.480723728951731e-06, "loss": 0.6284, "step": 5599 }, { "epoch": 0.35, "grad_norm": 0.8766511678695679, "learning_rate": 7.479832868157416e-06, "loss": 0.6035, "step": 5600 }, { "epoch": 0.35, "grad_norm": 0.8506543636322021, "learning_rate": 7.4789419029459675e-06, "loss": 0.5387, "step": 5601 }, { "epoch": 0.35, "grad_norm": 0.8686463236808777, "learning_rate": 7.478050833354897e-06, "loss": 0.6335, "step": 5602 }, { "epoch": 0.35, "grad_norm": 0.8849419951438904, "learning_rate": 7.47715965942173e-06, "loss": 0.5983, "step": 5603 }, { "epoch": 0.36, "grad_norm": 0.8478937745094299, "learning_rate": 7.476268381183984e-06, "loss": 0.5266, "step": 5604 }, { "epoch": 0.36, "grad_norm": 0.9055560827255249, "learning_rate": 7.475376998679193e-06, "loss": 0.6483, "step": 5605 }, { "epoch": 0.36, "grad_norm": 0.9069551825523376, "learning_rate": 7.474485511944887e-06, "loss": 0.6147, "step": 5606 }, { "epoch": 0.36, "grad_norm": 0.8674218058586121, "learning_rate": 7.4735939210186036e-06, "loss": 0.5723, "step": 5607 }, { "epoch": 0.36, "grad_norm": 0.8519677519798279, "learning_rate": 7.472702225937884e-06, "loss": 0.5941, "step": 5608 }, { "epoch": 0.36, "grad_norm": 0.9071281552314758, "learning_rate": 7.471810426740278e-06, "loss": 0.5995, "step": 5609 }, { "epoch": 0.36, "grad_norm": 0.8679485321044922, "learning_rate": 7.470918523463333e-06, "loss": 0.5833, "step": 5610 }, { "epoch": 0.36, "grad_norm": 0.8723646402359009, "learning_rate": 7.470026516144604e-06, "loss": 0.6437, "step": 5611 }, { "epoch": 0.36, "grad_norm": 0.8494699001312256, "learning_rate": 7.469134404821652e-06, "loss": 0.5599, "step": 5612 }, { "epoch": 0.36, "grad_norm": 0.9291670322418213, "learning_rate": 7.468242189532039e-06, "loss": 0.5898, "step": 5613 }, { "epoch": 0.36, "grad_norm": 0.9132770895957947, "learning_rate": 7.467349870313334e-06, "loss": 0.5415, "step": 5614 }, { "epoch": 0.36, "grad_norm": 0.8860681653022766, "learning_rate": 7.466457447203109e-06, "loss": 0.5958, "step": 5615 }, { "epoch": 0.36, "grad_norm": 0.8410258293151855, "learning_rate": 7.465564920238941e-06, "loss": 0.5857, "step": 5616 }, { "epoch": 0.36, "grad_norm": 0.8339051008224487, "learning_rate": 7.464672289458411e-06, "loss": 0.5913, "step": 5617 }, { "epoch": 0.36, "grad_norm": 0.9605398774147034, "learning_rate": 7.463779554899107e-06, "loss": 0.6138, "step": 5618 }, { "epoch": 0.36, "grad_norm": 0.8939738273620605, "learning_rate": 7.462886716598614e-06, "loss": 0.6042, "step": 5619 }, { "epoch": 0.36, "grad_norm": 0.842354953289032, "learning_rate": 7.46199377459453e-06, "loss": 0.6018, "step": 5620 }, { "epoch": 0.36, "grad_norm": 0.9019548892974854, "learning_rate": 7.461100728924455e-06, "loss": 0.556, "step": 5621 }, { "epoch": 0.36, "grad_norm": 0.8871820569038391, "learning_rate": 7.460207579625988e-06, "loss": 0.5807, "step": 5622 }, { "epoch": 0.36, "grad_norm": 0.8736592531204224, "learning_rate": 7.459314326736738e-06, "loss": 0.5672, "step": 5623 }, { "epoch": 0.36, "grad_norm": 0.9413090348243713, "learning_rate": 7.458420970294317e-06, "loss": 0.6163, "step": 5624 }, { "epoch": 0.36, "grad_norm": 0.8506051301956177, "learning_rate": 7.457527510336342e-06, "loss": 0.5363, "step": 5625 }, { "epoch": 0.36, "grad_norm": 0.869382381439209, "learning_rate": 7.456633946900432e-06, "loss": 0.6099, "step": 5626 }, { "epoch": 0.36, "grad_norm": 0.8384730219841003, "learning_rate": 7.455740280024212e-06, "loss": 0.5834, "step": 5627 }, { "epoch": 0.36, "grad_norm": 0.8252652883529663, "learning_rate": 7.454846509745311e-06, "loss": 0.5918, "step": 5628 }, { "epoch": 0.36, "grad_norm": 0.9574599862098694, "learning_rate": 7.453952636101366e-06, "loss": 0.6747, "step": 5629 }, { "epoch": 0.36, "grad_norm": 0.8753709197044373, "learning_rate": 7.45305865913001e-06, "loss": 0.6559, "step": 5630 }, { "epoch": 0.36, "grad_norm": 0.9628907442092896, "learning_rate": 7.452164578868889e-06, "loss": 0.6476, "step": 5631 }, { "epoch": 0.36, "grad_norm": 0.9094507098197937, "learning_rate": 7.451270395355647e-06, "loss": 0.6579, "step": 5632 }, { "epoch": 0.36, "grad_norm": 0.7960030436515808, "learning_rate": 7.450376108627937e-06, "loss": 0.5376, "step": 5633 }, { "epoch": 0.36, "grad_norm": 0.8576752543449402, "learning_rate": 7.449481718723411e-06, "loss": 0.616, "step": 5634 }, { "epoch": 0.36, "grad_norm": 0.8710610866546631, "learning_rate": 7.448587225679733e-06, "loss": 0.6292, "step": 5635 }, { "epoch": 0.36, "grad_norm": 0.9258856177330017, "learning_rate": 7.447692629534565e-06, "loss": 0.5753, "step": 5636 }, { "epoch": 0.36, "grad_norm": 0.911663830280304, "learning_rate": 7.446797930325574e-06, "loss": 0.6113, "step": 5637 }, { "epoch": 0.36, "grad_norm": 0.8927462697029114, "learning_rate": 7.445903128090435e-06, "loss": 0.5843, "step": 5638 }, { "epoch": 0.36, "grad_norm": 0.9059770703315735, "learning_rate": 7.445008222866823e-06, "loss": 0.5649, "step": 5639 }, { "epoch": 0.36, "grad_norm": 0.8566960692405701, "learning_rate": 7.444113214692422e-06, "loss": 0.5713, "step": 5640 }, { "epoch": 0.36, "grad_norm": 0.9214499592781067, "learning_rate": 7.443218103604915e-06, "loss": 0.5919, "step": 5641 }, { "epoch": 0.36, "grad_norm": 0.9560672640800476, "learning_rate": 7.442322889641992e-06, "loss": 0.6563, "step": 5642 }, { "epoch": 0.36, "grad_norm": 0.9713243246078491, "learning_rate": 7.441427572841349e-06, "loss": 0.6801, "step": 5643 }, { "epoch": 0.36, "grad_norm": 0.8504186272621155, "learning_rate": 7.440532153240685e-06, "loss": 0.5809, "step": 5644 }, { "epoch": 0.36, "grad_norm": 0.8800424337387085, "learning_rate": 7.4396366308777015e-06, "loss": 0.6323, "step": 5645 }, { "epoch": 0.36, "grad_norm": 0.8435956239700317, "learning_rate": 7.4387410057901056e-06, "loss": 0.5616, "step": 5646 }, { "epoch": 0.36, "grad_norm": 0.8319722414016724, "learning_rate": 7.4378452780156094e-06, "loss": 0.5398, "step": 5647 }, { "epoch": 0.36, "grad_norm": 0.9279896020889282, "learning_rate": 7.436949447591931e-06, "loss": 0.5779, "step": 5648 }, { "epoch": 0.36, "grad_norm": 0.8527793884277344, "learning_rate": 7.4360535145567865e-06, "loss": 0.5943, "step": 5649 }, { "epoch": 0.36, "grad_norm": 0.8525310754776001, "learning_rate": 7.435157478947905e-06, "loss": 0.5427, "step": 5650 }, { "epoch": 0.36, "grad_norm": 0.8320702910423279, "learning_rate": 7.434261340803013e-06, "loss": 0.5816, "step": 5651 }, { "epoch": 0.36, "grad_norm": 0.8606296181678772, "learning_rate": 7.433365100159844e-06, "loss": 0.582, "step": 5652 }, { "epoch": 0.36, "grad_norm": 0.9004180431365967, "learning_rate": 7.432468757056136e-06, "loss": 0.581, "step": 5653 }, { "epoch": 0.36, "grad_norm": 0.858650267124176, "learning_rate": 7.431572311529629e-06, "loss": 0.6042, "step": 5654 }, { "epoch": 0.36, "grad_norm": 0.9310391545295715, "learning_rate": 7.4306757636180725e-06, "loss": 0.5514, "step": 5655 }, { "epoch": 0.36, "grad_norm": 0.8439887762069702, "learning_rate": 7.429779113359214e-06, "loss": 0.5636, "step": 5656 }, { "epoch": 0.36, "grad_norm": 0.890603244304657, "learning_rate": 7.428882360790811e-06, "loss": 0.572, "step": 5657 }, { "epoch": 0.36, "grad_norm": 0.9427062273025513, "learning_rate": 7.427985505950619e-06, "loss": 0.5997, "step": 5658 }, { "epoch": 0.36, "grad_norm": 0.9088672399520874, "learning_rate": 7.427088548876406e-06, "loss": 0.6665, "step": 5659 }, { "epoch": 0.36, "grad_norm": 0.9224042892456055, "learning_rate": 7.426191489605936e-06, "loss": 0.596, "step": 5660 }, { "epoch": 0.36, "grad_norm": 0.8789502382278442, "learning_rate": 7.425294328176984e-06, "loss": 0.567, "step": 5661 }, { "epoch": 0.36, "grad_norm": 0.8930298686027527, "learning_rate": 7.4243970646273236e-06, "loss": 0.5902, "step": 5662 }, { "epoch": 0.36, "grad_norm": 0.8686020970344543, "learning_rate": 7.423499698994737e-06, "loss": 0.5841, "step": 5663 }, { "epoch": 0.36, "grad_norm": 0.8149586319923401, "learning_rate": 7.422602231317009e-06, "loss": 0.5857, "step": 5664 }, { "epoch": 0.36, "grad_norm": 0.9393472075462341, "learning_rate": 7.421704661631929e-06, "loss": 0.6012, "step": 5665 }, { "epoch": 0.36, "grad_norm": 0.8844897150993347, "learning_rate": 7.42080698997729e-06, "loss": 0.6458, "step": 5666 }, { "epoch": 0.36, "grad_norm": 0.8492723107337952, "learning_rate": 7.419909216390889e-06, "loss": 0.6077, "step": 5667 }, { "epoch": 0.36, "grad_norm": 0.8630576133728027, "learning_rate": 7.4190113409105304e-06, "loss": 0.5597, "step": 5668 }, { "epoch": 0.36, "grad_norm": 0.8691625595092773, "learning_rate": 7.418113363574018e-06, "loss": 0.5845, "step": 5669 }, { "epoch": 0.36, "grad_norm": 0.9205952286720276, "learning_rate": 7.417215284419165e-06, "loss": 0.6316, "step": 5670 }, { "epoch": 0.36, "grad_norm": 0.9179142713546753, "learning_rate": 7.416317103483784e-06, "loss": 0.6545, "step": 5671 }, { "epoch": 0.36, "grad_norm": 0.9906255602836609, "learning_rate": 7.415418820805698e-06, "loss": 0.5923, "step": 5672 }, { "epoch": 0.36, "grad_norm": 0.8854468464851379, "learning_rate": 7.414520436422725e-06, "loss": 0.6767, "step": 5673 }, { "epoch": 0.36, "grad_norm": 0.8236328959465027, "learning_rate": 7.413621950372698e-06, "loss": 0.5705, "step": 5674 }, { "epoch": 0.36, "grad_norm": 0.8900964856147766, "learning_rate": 7.4127233626934456e-06, "loss": 0.615, "step": 5675 }, { "epoch": 0.36, "grad_norm": 0.9543713927268982, "learning_rate": 7.411824673422808e-06, "loss": 0.6227, "step": 5676 }, { "epoch": 0.36, "grad_norm": 0.8835585713386536, "learning_rate": 7.410925882598621e-06, "loss": 0.6486, "step": 5677 }, { "epoch": 0.36, "grad_norm": 0.8607789874076843, "learning_rate": 7.410026990258734e-06, "loss": 0.595, "step": 5678 }, { "epoch": 0.36, "grad_norm": 0.8899136781692505, "learning_rate": 7.409127996440993e-06, "loss": 0.5775, "step": 5679 }, { "epoch": 0.36, "grad_norm": 0.9142584204673767, "learning_rate": 7.408228901183254e-06, "loss": 0.6192, "step": 5680 }, { "epoch": 0.36, "grad_norm": 0.9016997218132019, "learning_rate": 7.407329704523372e-06, "loss": 0.6105, "step": 5681 }, { "epoch": 0.36, "grad_norm": 0.9058455228805542, "learning_rate": 7.406430406499212e-06, "loss": 0.6091, "step": 5682 }, { "epoch": 0.36, "grad_norm": 0.8867766261100769, "learning_rate": 7.405531007148638e-06, "loss": 0.6034, "step": 5683 }, { "epoch": 0.36, "grad_norm": 0.8825892210006714, "learning_rate": 7.404631506509523e-06, "loss": 0.5718, "step": 5684 }, { "epoch": 0.36, "grad_norm": 0.9403483867645264, "learning_rate": 7.403731904619739e-06, "loss": 0.6664, "step": 5685 }, { "epoch": 0.36, "grad_norm": 0.8549020886421204, "learning_rate": 7.402832201517166e-06, "loss": 0.6042, "step": 5686 }, { "epoch": 0.36, "grad_norm": 0.9207231402397156, "learning_rate": 7.40193239723969e-06, "loss": 0.6183, "step": 5687 }, { "epoch": 0.36, "grad_norm": 0.8826068639755249, "learning_rate": 7.401032491825194e-06, "loss": 0.6157, "step": 5688 }, { "epoch": 0.36, "grad_norm": 0.9273738265037537, "learning_rate": 7.400132485311573e-06, "loss": 0.6135, "step": 5689 }, { "epoch": 0.36, "grad_norm": 0.9432485699653625, "learning_rate": 7.399232377736722e-06, "loss": 0.641, "step": 5690 }, { "epoch": 0.36, "grad_norm": 0.957802951335907, "learning_rate": 7.398332169138544e-06, "loss": 0.614, "step": 5691 }, { "epoch": 0.36, "grad_norm": 0.9042625427246094, "learning_rate": 7.397431859554941e-06, "loss": 0.6075, "step": 5692 }, { "epoch": 0.36, "grad_norm": 0.8862330317497253, "learning_rate": 7.396531449023821e-06, "loss": 0.5823, "step": 5693 }, { "epoch": 0.36, "grad_norm": 0.8898954391479492, "learning_rate": 7.395630937583099e-06, "loss": 0.6319, "step": 5694 }, { "epoch": 0.36, "grad_norm": 0.8645822405815125, "learning_rate": 7.394730325270693e-06, "loss": 0.6029, "step": 5695 }, { "epoch": 0.36, "grad_norm": 0.9035110473632812, "learning_rate": 7.393829612124524e-06, "loss": 0.6147, "step": 5696 }, { "epoch": 0.36, "grad_norm": 0.8781278133392334, "learning_rate": 7.392928798182516e-06, "loss": 0.5922, "step": 5697 }, { "epoch": 0.36, "grad_norm": 0.8469416499137878, "learning_rate": 7.392027883482602e-06, "loss": 0.6564, "step": 5698 }, { "epoch": 0.36, "grad_norm": 0.8922897577285767, "learning_rate": 7.391126868062714e-06, "loss": 0.5904, "step": 5699 }, { "epoch": 0.36, "grad_norm": 0.8153558969497681, "learning_rate": 7.390225751960792e-06, "loss": 0.5945, "step": 5700 }, { "epoch": 0.36, "grad_norm": 0.8306947350502014, "learning_rate": 7.389324535214779e-06, "loss": 0.623, "step": 5701 }, { "epoch": 0.36, "grad_norm": 0.9121823906898499, "learning_rate": 7.388423217862621e-06, "loss": 0.6231, "step": 5702 }, { "epoch": 0.36, "grad_norm": 0.9361130595207214, "learning_rate": 7.387521799942271e-06, "loss": 0.6028, "step": 5703 }, { "epoch": 0.36, "grad_norm": 0.8886232972145081, "learning_rate": 7.386620281491683e-06, "loss": 0.6612, "step": 5704 }, { "epoch": 0.36, "grad_norm": 0.8650026917457581, "learning_rate": 7.385718662548817e-06, "loss": 0.6552, "step": 5705 }, { "epoch": 0.36, "grad_norm": 0.9328054785728455, "learning_rate": 7.384816943151638e-06, "loss": 0.604, "step": 5706 }, { "epoch": 0.36, "grad_norm": 0.8479319214820862, "learning_rate": 7.383915123338113e-06, "loss": 0.5936, "step": 5707 }, { "epoch": 0.36, "grad_norm": 0.8170728087425232, "learning_rate": 7.3830132031462165e-06, "loss": 0.6188, "step": 5708 }, { "epoch": 0.36, "grad_norm": 0.8698776364326477, "learning_rate": 7.382111182613923e-06, "loss": 0.5548, "step": 5709 }, { "epoch": 0.36, "grad_norm": 0.8348639607429504, "learning_rate": 7.381209061779214e-06, "loss": 0.635, "step": 5710 }, { "epoch": 0.36, "grad_norm": 0.9122574329376221, "learning_rate": 7.380306840680076e-06, "loss": 0.6264, "step": 5711 }, { "epoch": 0.36, "grad_norm": 0.8634544014930725, "learning_rate": 7.379404519354496e-06, "loss": 0.6421, "step": 5712 }, { "epoch": 0.36, "grad_norm": 0.830940306186676, "learning_rate": 7.378502097840471e-06, "loss": 0.599, "step": 5713 }, { "epoch": 0.36, "grad_norm": 0.9275731444358826, "learning_rate": 7.377599576175995e-06, "loss": 0.6653, "step": 5714 }, { "epoch": 0.36, "grad_norm": 0.9509021639823914, "learning_rate": 7.376696954399073e-06, "loss": 0.6107, "step": 5715 }, { "epoch": 0.36, "grad_norm": 0.8291517496109009, "learning_rate": 7.37579423254771e-06, "loss": 0.6103, "step": 5716 }, { "epoch": 0.36, "grad_norm": 0.864422082901001, "learning_rate": 7.374891410659917e-06, "loss": 0.6241, "step": 5717 }, { "epoch": 0.36, "grad_norm": 0.8923708200454712, "learning_rate": 7.373988488773708e-06, "loss": 0.6212, "step": 5718 }, { "epoch": 0.36, "grad_norm": 0.8855364918708801, "learning_rate": 7.3730854669271015e-06, "loss": 0.5703, "step": 5719 }, { "epoch": 0.36, "grad_norm": 0.8736538290977478, "learning_rate": 7.372182345158122e-06, "loss": 0.6469, "step": 5720 }, { "epoch": 0.36, "grad_norm": 0.9270285964012146, "learning_rate": 7.3712791235047976e-06, "loss": 0.6194, "step": 5721 }, { "epoch": 0.36, "grad_norm": 0.9518702030181885, "learning_rate": 7.370375802005157e-06, "loss": 0.6458, "step": 5722 }, { "epoch": 0.36, "grad_norm": 0.948585569858551, "learning_rate": 7.369472380697236e-06, "loss": 0.5592, "step": 5723 }, { "epoch": 0.36, "grad_norm": 0.8145323991775513, "learning_rate": 7.368568859619078e-06, "loss": 0.5643, "step": 5724 }, { "epoch": 0.36, "grad_norm": 0.88991379737854, "learning_rate": 7.3676652388087234e-06, "loss": 0.6035, "step": 5725 }, { "epoch": 0.36, "grad_norm": 0.9013904333114624, "learning_rate": 7.366761518304223e-06, "loss": 0.598, "step": 5726 }, { "epoch": 0.36, "grad_norm": 0.9081125259399414, "learning_rate": 7.365857698143628e-06, "loss": 0.5988, "step": 5727 }, { "epoch": 0.36, "grad_norm": 0.9681587219238281, "learning_rate": 7.364953778364996e-06, "loss": 0.6415, "step": 5728 }, { "epoch": 0.36, "grad_norm": 0.8465878367424011, "learning_rate": 7.364049759006387e-06, "loss": 0.6199, "step": 5729 }, { "epoch": 0.36, "grad_norm": 0.8657549619674683, "learning_rate": 7.363145640105867e-06, "loss": 0.5782, "step": 5730 }, { "epoch": 0.36, "grad_norm": 0.8750969171524048, "learning_rate": 7.362241421701505e-06, "loss": 0.5967, "step": 5731 }, { "epoch": 0.36, "grad_norm": 0.9876574277877808, "learning_rate": 7.3613371038313744e-06, "loss": 0.6121, "step": 5732 }, { "epoch": 0.36, "grad_norm": 0.906506359577179, "learning_rate": 7.360432686533552e-06, "loss": 0.6573, "step": 5733 }, { "epoch": 0.36, "grad_norm": 0.8797792196273804, "learning_rate": 7.359528169846121e-06, "loss": 0.6132, "step": 5734 }, { "epoch": 0.36, "grad_norm": 0.7947115898132324, "learning_rate": 7.358623553807167e-06, "loss": 0.5339, "step": 5735 }, { "epoch": 0.36, "grad_norm": 0.8234474658966064, "learning_rate": 7.35771883845478e-06, "loss": 0.5606, "step": 5736 }, { "epoch": 0.36, "grad_norm": 0.827809751033783, "learning_rate": 7.356814023827055e-06, "loss": 0.5549, "step": 5737 }, { "epoch": 0.36, "grad_norm": 0.9173133373260498, "learning_rate": 7.35590910996209e-06, "loss": 0.608, "step": 5738 }, { "epoch": 0.36, "grad_norm": 0.8398633599281311, "learning_rate": 7.355004096897987e-06, "loss": 0.5656, "step": 5739 }, { "epoch": 0.36, "grad_norm": 0.8507029414176941, "learning_rate": 7.354098984672856e-06, "loss": 0.5315, "step": 5740 }, { "epoch": 0.36, "grad_norm": 0.9494758248329163, "learning_rate": 7.353193773324805e-06, "loss": 0.6437, "step": 5741 }, { "epoch": 0.36, "grad_norm": 0.8865925669670105, "learning_rate": 7.35228846289195e-06, "loss": 0.5627, "step": 5742 }, { "epoch": 0.36, "grad_norm": 0.9043111205101013, "learning_rate": 7.351383053412411e-06, "loss": 0.6526, "step": 5743 }, { "epoch": 0.36, "grad_norm": 0.8272423148155212, "learning_rate": 7.350477544924313e-06, "loss": 0.6375, "step": 5744 }, { "epoch": 0.36, "grad_norm": 0.8952882885932922, "learning_rate": 7.349571937465782e-06, "loss": 0.6383, "step": 5745 }, { "epoch": 0.36, "grad_norm": 0.9154927730560303, "learning_rate": 7.348666231074948e-06, "loss": 0.5916, "step": 5746 }, { "epoch": 0.36, "grad_norm": 0.8953961730003357, "learning_rate": 7.3477604257899515e-06, "loss": 0.6092, "step": 5747 }, { "epoch": 0.36, "grad_norm": 0.9233314990997314, "learning_rate": 7.346854521648929e-06, "loss": 0.6414, "step": 5748 }, { "epoch": 0.36, "grad_norm": 0.8458792567253113, "learning_rate": 7.345948518690029e-06, "loss": 0.556, "step": 5749 }, { "epoch": 0.36, "grad_norm": 0.9279628396034241, "learning_rate": 7.345042416951395e-06, "loss": 0.6147, "step": 5750 }, { "epoch": 0.36, "grad_norm": 0.8384361267089844, "learning_rate": 7.344136216471185e-06, "loss": 0.5691, "step": 5751 }, { "epoch": 0.36, "grad_norm": 0.8720436096191406, "learning_rate": 7.343229917287552e-06, "loss": 0.5882, "step": 5752 }, { "epoch": 0.36, "grad_norm": 0.9607126712799072, "learning_rate": 7.34232351943866e-06, "loss": 0.5573, "step": 5753 }, { "epoch": 0.36, "grad_norm": 0.8432719707489014, "learning_rate": 7.341417022962671e-06, "loss": 0.54, "step": 5754 }, { "epoch": 0.36, "grad_norm": 0.9096271395683289, "learning_rate": 7.340510427897759e-06, "loss": 0.5467, "step": 5755 }, { "epoch": 0.36, "grad_norm": 0.9777395725250244, "learning_rate": 7.339603734282093e-06, "loss": 0.6271, "step": 5756 }, { "epoch": 0.36, "grad_norm": 0.8779467344284058, "learning_rate": 7.338696942153855e-06, "loss": 0.6058, "step": 5757 }, { "epoch": 0.36, "grad_norm": 0.8599120378494263, "learning_rate": 7.337790051551221e-06, "loss": 0.5776, "step": 5758 }, { "epoch": 0.36, "grad_norm": 0.8779652118682861, "learning_rate": 7.3368830625123835e-06, "loss": 0.6134, "step": 5759 }, { "epoch": 0.36, "grad_norm": 0.903643012046814, "learning_rate": 7.335975975075529e-06, "loss": 0.6908, "step": 5760 }, { "epoch": 0.36, "grad_norm": 0.895206093788147, "learning_rate": 7.3350687892788505e-06, "loss": 0.5559, "step": 5761 }, { "epoch": 0.37, "grad_norm": 0.907238781452179, "learning_rate": 7.33416150516055e-06, "loss": 0.5807, "step": 5762 }, { "epoch": 0.37, "grad_norm": 0.877465546131134, "learning_rate": 7.333254122758828e-06, "loss": 0.6332, "step": 5763 }, { "epoch": 0.37, "grad_norm": 0.8640191555023193, "learning_rate": 7.332346642111893e-06, "loss": 0.6103, "step": 5764 }, { "epoch": 0.37, "grad_norm": 0.8886452317237854, "learning_rate": 7.331439063257953e-06, "loss": 0.6206, "step": 5765 }, { "epoch": 0.37, "grad_norm": 0.8364898562431335, "learning_rate": 7.330531386235225e-06, "loss": 0.5864, "step": 5766 }, { "epoch": 0.37, "grad_norm": 0.9376548528671265, "learning_rate": 7.329623611081927e-06, "loss": 0.6066, "step": 5767 }, { "epoch": 0.37, "grad_norm": 0.8801112174987793, "learning_rate": 7.3287157378362846e-06, "loss": 0.5709, "step": 5768 }, { "epoch": 0.37, "grad_norm": 0.8653738498687744, "learning_rate": 7.327807766536521e-06, "loss": 0.6407, "step": 5769 }, { "epoch": 0.37, "grad_norm": 0.8694636821746826, "learning_rate": 7.3268996972208725e-06, "loss": 0.6672, "step": 5770 }, { "epoch": 0.37, "grad_norm": 0.8904354572296143, "learning_rate": 7.325991529927572e-06, "loss": 0.6163, "step": 5771 }, { "epoch": 0.37, "grad_norm": 0.9113852977752686, "learning_rate": 7.325083264694859e-06, "loss": 0.5539, "step": 5772 }, { "epoch": 0.37, "grad_norm": 0.8761439919471741, "learning_rate": 7.324174901560978e-06, "loss": 0.6205, "step": 5773 }, { "epoch": 0.37, "grad_norm": 0.8492023944854736, "learning_rate": 7.323266440564177e-06, "loss": 0.5814, "step": 5774 }, { "epoch": 0.37, "grad_norm": 0.8769062757492065, "learning_rate": 7.32235788174271e-06, "loss": 0.5551, "step": 5775 }, { "epoch": 0.37, "grad_norm": 0.8555404543876648, "learning_rate": 7.32144922513483e-06, "loss": 0.5793, "step": 5776 }, { "epoch": 0.37, "grad_norm": 0.874083936214447, "learning_rate": 7.320540470778799e-06, "loss": 0.6028, "step": 5777 }, { "epoch": 0.37, "grad_norm": 0.9265373945236206, "learning_rate": 7.319631618712881e-06, "loss": 0.6117, "step": 5778 }, { "epoch": 0.37, "grad_norm": 0.8393657803535461, "learning_rate": 7.318722668975347e-06, "loss": 0.5443, "step": 5779 }, { "epoch": 0.37, "grad_norm": 0.844636857509613, "learning_rate": 7.317813621604466e-06, "loss": 0.6026, "step": 5780 }, { "epoch": 0.37, "grad_norm": 0.853661060333252, "learning_rate": 7.316904476638515e-06, "loss": 0.565, "step": 5781 }, { "epoch": 0.37, "grad_norm": 0.871853768825531, "learning_rate": 7.315995234115778e-06, "loss": 0.5588, "step": 5782 }, { "epoch": 0.37, "grad_norm": 0.814250111579895, "learning_rate": 7.315085894074539e-06, "loss": 0.5909, "step": 5783 }, { "epoch": 0.37, "grad_norm": 0.904152512550354, "learning_rate": 7.314176456553086e-06, "loss": 0.5795, "step": 5784 }, { "epoch": 0.37, "grad_norm": 0.8134939074516296, "learning_rate": 7.3132669215897125e-06, "loss": 0.6044, "step": 5785 }, { "epoch": 0.37, "grad_norm": 0.8810901641845703, "learning_rate": 7.312357289222717e-06, "loss": 0.6512, "step": 5786 }, { "epoch": 0.37, "grad_norm": 0.8648774027824402, "learning_rate": 7.3114475594904e-06, "loss": 0.5882, "step": 5787 }, { "epoch": 0.37, "grad_norm": 0.8309141993522644, "learning_rate": 7.310537732431067e-06, "loss": 0.6525, "step": 5788 }, { "epoch": 0.37, "grad_norm": 0.9296196699142456, "learning_rate": 7.309627808083027e-06, "loss": 0.642, "step": 5789 }, { "epoch": 0.37, "grad_norm": 0.9500271081924438, "learning_rate": 7.308717786484596e-06, "loss": 0.5761, "step": 5790 }, { "epoch": 0.37, "grad_norm": 0.9415786266326904, "learning_rate": 7.30780766767409e-06, "loss": 0.7613, "step": 5791 }, { "epoch": 0.37, "grad_norm": 0.771344780921936, "learning_rate": 7.306897451689832e-06, "loss": 0.5429, "step": 5792 }, { "epoch": 0.37, "grad_norm": 0.8383582830429077, "learning_rate": 7.305987138570145e-06, "loss": 0.5749, "step": 5793 }, { "epoch": 0.37, "grad_norm": 0.9000876545906067, "learning_rate": 7.305076728353364e-06, "loss": 0.592, "step": 5794 }, { "epoch": 0.37, "grad_norm": 0.841670572757721, "learning_rate": 7.30416622107782e-06, "loss": 0.5762, "step": 5795 }, { "epoch": 0.37, "grad_norm": 0.8553557395935059, "learning_rate": 7.303255616781853e-06, "loss": 0.6068, "step": 5796 }, { "epoch": 0.37, "grad_norm": 0.9532732367515564, "learning_rate": 7.3023449155038016e-06, "loss": 0.6489, "step": 5797 }, { "epoch": 0.37, "grad_norm": 0.8923346996307373, "learning_rate": 7.301434117282018e-06, "loss": 0.6421, "step": 5798 }, { "epoch": 0.37, "grad_norm": 0.8571204543113708, "learning_rate": 7.300523222154848e-06, "loss": 0.5909, "step": 5799 }, { "epoch": 0.37, "grad_norm": 0.9138479232788086, "learning_rate": 7.299612230160648e-06, "loss": 0.6169, "step": 5800 }, { "epoch": 0.37, "grad_norm": 0.9442511796951294, "learning_rate": 7.298701141337778e-06, "loss": 0.5826, "step": 5801 }, { "epoch": 0.37, "grad_norm": 0.8205499053001404, "learning_rate": 7.2977899557246e-06, "loss": 0.6157, "step": 5802 }, { "epoch": 0.37, "grad_norm": 0.8094413876533508, "learning_rate": 7.2968786733594795e-06, "loss": 0.5982, "step": 5803 }, { "epoch": 0.37, "grad_norm": 0.8342402577400208, "learning_rate": 7.295967294280788e-06, "loss": 0.6123, "step": 5804 }, { "epoch": 0.37, "grad_norm": 0.883686363697052, "learning_rate": 7.2950558185269005e-06, "loss": 0.5801, "step": 5805 }, { "epoch": 0.37, "grad_norm": 0.9100261330604553, "learning_rate": 7.294144246136198e-06, "loss": 0.5688, "step": 5806 }, { "epoch": 0.37, "grad_norm": 0.8626593351364136, "learning_rate": 7.29323257714706e-06, "loss": 0.6418, "step": 5807 }, { "epoch": 0.37, "grad_norm": 0.8720927834510803, "learning_rate": 7.292320811597877e-06, "loss": 0.6389, "step": 5808 }, { "epoch": 0.37, "grad_norm": 0.8386964797973633, "learning_rate": 7.291408949527039e-06, "loss": 0.5383, "step": 5809 }, { "epoch": 0.37, "grad_norm": 0.921635091304779, "learning_rate": 7.290496990972942e-06, "loss": 0.5874, "step": 5810 }, { "epoch": 0.37, "grad_norm": 0.8803329467773438, "learning_rate": 7.2895849359739834e-06, "loss": 0.5582, "step": 5811 }, { "epoch": 0.37, "grad_norm": 0.9119853973388672, "learning_rate": 7.288672784568568e-06, "loss": 0.6075, "step": 5812 }, { "epoch": 0.37, "grad_norm": 0.8550745844841003, "learning_rate": 7.2877605367951055e-06, "loss": 0.5818, "step": 5813 }, { "epoch": 0.37, "grad_norm": 0.8705887794494629, "learning_rate": 7.286848192692003e-06, "loss": 0.5768, "step": 5814 }, { "epoch": 0.37, "grad_norm": 0.8665969371795654, "learning_rate": 7.28593575229768e-06, "loss": 0.6108, "step": 5815 }, { "epoch": 0.37, "grad_norm": 0.8779606819152832, "learning_rate": 7.285023215650553e-06, "loss": 0.5621, "step": 5816 }, { "epoch": 0.37, "grad_norm": 0.8697792291641235, "learning_rate": 7.2841105827890475e-06, "loss": 0.6248, "step": 5817 }, { "epoch": 0.37, "grad_norm": 0.9603003859519958, "learning_rate": 7.283197853751593e-06, "loss": 0.6527, "step": 5818 }, { "epoch": 0.37, "grad_norm": 0.9190054535865784, "learning_rate": 7.282285028576618e-06, "loss": 0.6703, "step": 5819 }, { "epoch": 0.37, "grad_norm": 0.9047878980636597, "learning_rate": 7.28137210730256e-06, "loss": 0.6406, "step": 5820 }, { "epoch": 0.37, "grad_norm": 0.8862581849098206, "learning_rate": 7.280459089967861e-06, "loss": 0.5556, "step": 5821 }, { "epoch": 0.37, "grad_norm": 0.8609002232551575, "learning_rate": 7.279545976610961e-06, "loss": 0.5763, "step": 5822 }, { "epoch": 0.37, "grad_norm": 0.9380242824554443, "learning_rate": 7.278632767270309e-06, "loss": 0.617, "step": 5823 }, { "epoch": 0.37, "grad_norm": 0.827458381652832, "learning_rate": 7.277719461984361e-06, "loss": 0.5788, "step": 5824 }, { "epoch": 0.37, "grad_norm": 0.8551861047744751, "learning_rate": 7.276806060791567e-06, "loss": 0.6054, "step": 5825 }, { "epoch": 0.37, "grad_norm": 0.8845090270042419, "learning_rate": 7.275892563730393e-06, "loss": 0.6049, "step": 5826 }, { "epoch": 0.37, "grad_norm": 0.8537983894348145, "learning_rate": 7.274978970839297e-06, "loss": 0.5715, "step": 5827 }, { "epoch": 0.37, "grad_norm": 0.8627631068229675, "learning_rate": 7.274065282156752e-06, "loss": 0.5343, "step": 5828 }, { "epoch": 0.37, "grad_norm": 0.9428598284721375, "learning_rate": 7.273151497721229e-06, "loss": 0.6423, "step": 5829 }, { "epoch": 0.37, "grad_norm": 0.8636415600776672, "learning_rate": 7.272237617571205e-06, "loss": 0.5829, "step": 5830 }, { "epoch": 0.37, "grad_norm": 0.9982849359512329, "learning_rate": 7.2713236417451584e-06, "loss": 0.6376, "step": 5831 }, { "epoch": 0.37, "grad_norm": 0.8668151497840881, "learning_rate": 7.2704095702815754e-06, "loss": 0.5882, "step": 5832 }, { "epoch": 0.37, "grad_norm": 0.9315029382705688, "learning_rate": 7.269495403218943e-06, "loss": 0.5898, "step": 5833 }, { "epoch": 0.37, "grad_norm": 0.8428326845169067, "learning_rate": 7.268581140595754e-06, "loss": 0.5528, "step": 5834 }, { "epoch": 0.37, "grad_norm": 0.8342899084091187, "learning_rate": 7.267666782450505e-06, "loss": 0.5497, "step": 5835 }, { "epoch": 0.37, "grad_norm": 0.9424355030059814, "learning_rate": 7.266752328821698e-06, "loss": 0.6838, "step": 5836 }, { "epoch": 0.37, "grad_norm": 0.8566783666610718, "learning_rate": 7.265837779747834e-06, "loss": 0.5478, "step": 5837 }, { "epoch": 0.37, "grad_norm": 0.982837975025177, "learning_rate": 7.264923135267425e-06, "loss": 0.6028, "step": 5838 }, { "epoch": 0.37, "grad_norm": 0.9721706509590149, "learning_rate": 7.264008395418981e-06, "loss": 0.6461, "step": 5839 }, { "epoch": 0.37, "grad_norm": 0.8464512825012207, "learning_rate": 7.263093560241019e-06, "loss": 0.5897, "step": 5840 }, { "epoch": 0.37, "grad_norm": 0.8291548490524292, "learning_rate": 7.262178629772061e-06, "loss": 0.5641, "step": 5841 }, { "epoch": 0.37, "grad_norm": 0.9384708404541016, "learning_rate": 7.261263604050628e-06, "loss": 0.5823, "step": 5842 }, { "epoch": 0.37, "grad_norm": 0.8504778146743774, "learning_rate": 7.260348483115254e-06, "loss": 0.5374, "step": 5843 }, { "epoch": 0.37, "grad_norm": 0.848728358745575, "learning_rate": 7.259433267004466e-06, "loss": 0.5656, "step": 5844 }, { "epoch": 0.37, "grad_norm": 0.8592720031738281, "learning_rate": 7.258517955756805e-06, "loss": 0.6059, "step": 5845 }, { "epoch": 0.37, "grad_norm": 0.8951132297515869, "learning_rate": 7.257602549410808e-06, "loss": 0.5942, "step": 5846 }, { "epoch": 0.37, "grad_norm": 0.9378473162651062, "learning_rate": 7.256687048005024e-06, "loss": 0.6411, "step": 5847 }, { "epoch": 0.37, "grad_norm": 0.9194514751434326, "learning_rate": 7.255771451577996e-06, "loss": 0.6337, "step": 5848 }, { "epoch": 0.37, "grad_norm": 0.8979505300521851, "learning_rate": 7.254855760168281e-06, "loss": 0.5663, "step": 5849 }, { "epoch": 0.37, "grad_norm": 0.8199179768562317, "learning_rate": 7.2539399738144325e-06, "loss": 0.5586, "step": 5850 }, { "epoch": 0.37, "grad_norm": 0.9413596391677856, "learning_rate": 7.2530240925550145e-06, "loss": 0.589, "step": 5851 }, { "epoch": 0.37, "grad_norm": 0.9242424964904785, "learning_rate": 7.252108116428589e-06, "loss": 0.5799, "step": 5852 }, { "epoch": 0.37, "grad_norm": 0.9168336391448975, "learning_rate": 7.251192045473725e-06, "loss": 0.5839, "step": 5853 }, { "epoch": 0.37, "grad_norm": 0.9121633768081665, "learning_rate": 7.250275879728995e-06, "loss": 0.5788, "step": 5854 }, { "epoch": 0.37, "grad_norm": 0.8337844014167786, "learning_rate": 7.249359619232976e-06, "loss": 0.5781, "step": 5855 }, { "epoch": 0.37, "grad_norm": 0.9401801228523254, "learning_rate": 7.24844326402425e-06, "loss": 0.6175, "step": 5856 }, { "epoch": 0.37, "grad_norm": 0.8701263666152954, "learning_rate": 7.247526814141398e-06, "loss": 0.5735, "step": 5857 }, { "epoch": 0.37, "grad_norm": 0.8712503910064697, "learning_rate": 7.2466102696230115e-06, "loss": 0.6065, "step": 5858 }, { "epoch": 0.37, "grad_norm": 0.8291772603988647, "learning_rate": 7.24569363050768e-06, "loss": 0.6063, "step": 5859 }, { "epoch": 0.37, "grad_norm": 0.843247652053833, "learning_rate": 7.244776896834004e-06, "loss": 0.5898, "step": 5860 }, { "epoch": 0.37, "grad_norm": 0.8736797571182251, "learning_rate": 7.243860068640581e-06, "loss": 0.6175, "step": 5861 }, { "epoch": 0.37, "grad_norm": 0.9105240106582642, "learning_rate": 7.242943145966016e-06, "loss": 0.5534, "step": 5862 }, { "epoch": 0.37, "grad_norm": 0.901670515537262, "learning_rate": 7.242026128848918e-06, "loss": 0.5567, "step": 5863 }, { "epoch": 0.37, "grad_norm": 0.8726474642753601, "learning_rate": 7.241109017327901e-06, "loss": 0.5742, "step": 5864 }, { "epoch": 0.37, "grad_norm": 0.8959450125694275, "learning_rate": 7.240191811441577e-06, "loss": 0.6154, "step": 5865 }, { "epoch": 0.37, "grad_norm": 0.9082683324813843, "learning_rate": 7.239274511228569e-06, "loss": 0.6233, "step": 5866 }, { "epoch": 0.37, "grad_norm": 0.8369854092597961, "learning_rate": 7.238357116727502e-06, "loss": 0.634, "step": 5867 }, { "epoch": 0.37, "grad_norm": 0.9661149978637695, "learning_rate": 7.2374396279770044e-06, "loss": 0.5991, "step": 5868 }, { "epoch": 0.37, "grad_norm": 0.8639382719993591, "learning_rate": 7.236522045015706e-06, "loss": 0.5692, "step": 5869 }, { "epoch": 0.37, "grad_norm": 0.8862959742546082, "learning_rate": 7.235604367882245e-06, "loss": 0.6189, "step": 5870 }, { "epoch": 0.37, "grad_norm": 0.8773701190948486, "learning_rate": 7.234686596615262e-06, "loss": 0.56, "step": 5871 }, { "epoch": 0.37, "grad_norm": 0.8855640292167664, "learning_rate": 7.2337687312534e-06, "loss": 0.618, "step": 5872 }, { "epoch": 0.37, "grad_norm": 0.8535584807395935, "learning_rate": 7.232850771835307e-06, "loss": 0.6135, "step": 5873 }, { "epoch": 0.37, "grad_norm": 0.9095380306243896, "learning_rate": 7.231932718399635e-06, "loss": 0.6002, "step": 5874 }, { "epoch": 0.37, "grad_norm": 0.9614174962043762, "learning_rate": 7.231014570985042e-06, "loss": 0.6079, "step": 5875 }, { "epoch": 0.37, "grad_norm": 0.8840222358703613, "learning_rate": 7.230096329630185e-06, "loss": 0.5708, "step": 5876 }, { "epoch": 0.37, "grad_norm": 0.8881139755249023, "learning_rate": 7.22917799437373e-06, "loss": 0.5693, "step": 5877 }, { "epoch": 0.37, "grad_norm": 0.8951361775398254, "learning_rate": 7.228259565254345e-06, "loss": 0.6344, "step": 5878 }, { "epoch": 0.37, "grad_norm": 0.9418209791183472, "learning_rate": 7.227341042310702e-06, "loss": 0.595, "step": 5879 }, { "epoch": 0.37, "grad_norm": 0.964740514755249, "learning_rate": 7.226422425581474e-06, "loss": 0.6433, "step": 5880 }, { "epoch": 0.37, "grad_norm": 0.8945766687393188, "learning_rate": 7.225503715105344e-06, "loss": 0.5805, "step": 5881 }, { "epoch": 0.37, "grad_norm": 0.8209680914878845, "learning_rate": 7.224584910920994e-06, "loss": 0.6353, "step": 5882 }, { "epoch": 0.37, "grad_norm": 0.9142740368843079, "learning_rate": 7.223666013067113e-06, "loss": 0.5583, "step": 5883 }, { "epoch": 0.37, "grad_norm": 0.9378098845481873, "learning_rate": 7.222747021582392e-06, "loss": 0.5952, "step": 5884 }, { "epoch": 0.37, "grad_norm": 0.9350360035896301, "learning_rate": 7.221827936505524e-06, "loss": 0.6235, "step": 5885 }, { "epoch": 0.37, "grad_norm": 0.8425854444503784, "learning_rate": 7.220908757875214e-06, "loss": 0.5706, "step": 5886 }, { "epoch": 0.37, "grad_norm": 0.8196877837181091, "learning_rate": 7.21998948573016e-06, "loss": 0.6116, "step": 5887 }, { "epoch": 0.37, "grad_norm": 0.8354714512825012, "learning_rate": 7.219070120109072e-06, "loss": 0.545, "step": 5888 }, { "epoch": 0.37, "grad_norm": 0.9335945248603821, "learning_rate": 7.2181506610506605e-06, "loss": 0.5873, "step": 5889 }, { "epoch": 0.37, "grad_norm": 0.9078087210655212, "learning_rate": 7.217231108593642e-06, "loss": 0.6323, "step": 5890 }, { "epoch": 0.37, "grad_norm": 0.8889597058296204, "learning_rate": 7.2163114627767336e-06, "loss": 0.5855, "step": 5891 }, { "epoch": 0.37, "grad_norm": 0.9393039345741272, "learning_rate": 7.21539172363866e-06, "loss": 0.6515, "step": 5892 }, { "epoch": 0.37, "grad_norm": 0.8929221034049988, "learning_rate": 7.214471891218147e-06, "loss": 0.5601, "step": 5893 }, { "epoch": 0.37, "grad_norm": 0.8714567422866821, "learning_rate": 7.213551965553927e-06, "loss": 0.5709, "step": 5894 }, { "epoch": 0.37, "grad_norm": 0.8751015067100525, "learning_rate": 7.212631946684735e-06, "loss": 0.5834, "step": 5895 }, { "epoch": 0.37, "grad_norm": 0.8570420742034912, "learning_rate": 7.211711834649308e-06, "loss": 0.6357, "step": 5896 }, { "epoch": 0.37, "grad_norm": 0.8587523102760315, "learning_rate": 7.210791629486389e-06, "loss": 0.6232, "step": 5897 }, { "epoch": 0.37, "grad_norm": 0.9013690948486328, "learning_rate": 7.209871331234727e-06, "loss": 0.5748, "step": 5898 }, { "epoch": 0.37, "grad_norm": 0.9406622052192688, "learning_rate": 7.208950939933069e-06, "loss": 0.6136, "step": 5899 }, { "epoch": 0.37, "grad_norm": 0.8297491073608398, "learning_rate": 7.208030455620172e-06, "loss": 0.6091, "step": 5900 }, { "epoch": 0.37, "grad_norm": 0.8118994235992432, "learning_rate": 7.207109878334794e-06, "loss": 0.5538, "step": 5901 }, { "epoch": 0.37, "grad_norm": 0.8709977865219116, "learning_rate": 7.206189208115697e-06, "loss": 0.6218, "step": 5902 }, { "epoch": 0.37, "grad_norm": 0.7942225337028503, "learning_rate": 7.205268445001647e-06, "loss": 0.5634, "step": 5903 }, { "epoch": 0.37, "grad_norm": 0.9106520414352417, "learning_rate": 7.204347589031413e-06, "loss": 0.6096, "step": 5904 }, { "epoch": 0.37, "grad_norm": 0.8729263544082642, "learning_rate": 7.203426640243772e-06, "loss": 0.5695, "step": 5905 }, { "epoch": 0.37, "grad_norm": 0.8718299865722656, "learning_rate": 7.2025055986775e-06, "loss": 0.584, "step": 5906 }, { "epoch": 0.37, "grad_norm": 0.877406895160675, "learning_rate": 7.201584464371378e-06, "loss": 0.6209, "step": 5907 }, { "epoch": 0.37, "grad_norm": 0.8972481489181519, "learning_rate": 7.200663237364195e-06, "loss": 0.6161, "step": 5908 }, { "epoch": 0.37, "grad_norm": 0.8868620991706848, "learning_rate": 7.199741917694738e-06, "loss": 0.6095, "step": 5909 }, { "epoch": 0.37, "grad_norm": 0.9140734672546387, "learning_rate": 7.198820505401801e-06, "loss": 0.6437, "step": 5910 }, { "epoch": 0.37, "grad_norm": 0.9390980005264282, "learning_rate": 7.197899000524181e-06, "loss": 0.6443, "step": 5911 }, { "epoch": 0.37, "grad_norm": 0.8791154623031616, "learning_rate": 7.196977403100681e-06, "loss": 0.6049, "step": 5912 }, { "epoch": 0.37, "grad_norm": 0.8596461415290833, "learning_rate": 7.196055713170105e-06, "loss": 0.577, "step": 5913 }, { "epoch": 0.37, "grad_norm": 0.8921295404434204, "learning_rate": 7.195133930771263e-06, "loss": 0.6139, "step": 5914 }, { "epoch": 0.37, "grad_norm": 0.8871878981590271, "learning_rate": 7.194212055942966e-06, "loss": 0.6127, "step": 5915 }, { "epoch": 0.37, "grad_norm": 0.8868473172187805, "learning_rate": 7.193290088724034e-06, "loss": 0.5821, "step": 5916 }, { "epoch": 0.37, "grad_norm": 0.8867928981781006, "learning_rate": 7.192368029153285e-06, "loss": 0.6544, "step": 5917 }, { "epoch": 0.37, "grad_norm": 0.8952857851982117, "learning_rate": 7.191445877269548e-06, "loss": 0.5514, "step": 5918 }, { "epoch": 0.38, "grad_norm": 0.9083967804908752, "learning_rate": 7.190523633111644e-06, "loss": 0.6256, "step": 5919 }, { "epoch": 0.38, "grad_norm": 0.8887345194816589, "learning_rate": 7.189601296718413e-06, "loss": 0.6002, "step": 5920 }, { "epoch": 0.38, "grad_norm": 0.8916110992431641, "learning_rate": 7.188678868128687e-06, "loss": 0.6277, "step": 5921 }, { "epoch": 0.38, "grad_norm": 0.8981056809425354, "learning_rate": 7.18775634738131e-06, "loss": 0.6223, "step": 5922 }, { "epoch": 0.38, "grad_norm": 0.9082187414169312, "learning_rate": 7.18683373451512e-06, "loss": 0.6221, "step": 5923 }, { "epoch": 0.38, "grad_norm": 0.8695595860481262, "learning_rate": 7.185911029568972e-06, "loss": 0.607, "step": 5924 }, { "epoch": 0.38, "grad_norm": 0.8874411582946777, "learning_rate": 7.184988232581713e-06, "loss": 0.6072, "step": 5925 }, { "epoch": 0.38, "grad_norm": 0.8543808460235596, "learning_rate": 7.184065343592203e-06, "loss": 0.6432, "step": 5926 }, { "epoch": 0.38, "grad_norm": 0.8796266317367554, "learning_rate": 7.183142362639296e-06, "loss": 0.6275, "step": 5927 }, { "epoch": 0.38, "grad_norm": 0.8801624178886414, "learning_rate": 7.18221928976186e-06, "loss": 0.5814, "step": 5928 }, { "epoch": 0.38, "grad_norm": 0.8554267287254333, "learning_rate": 7.181296124998762e-06, "loss": 0.6135, "step": 5929 }, { "epoch": 0.38, "grad_norm": 0.9125354290008545, "learning_rate": 7.180372868388873e-06, "loss": 0.6057, "step": 5930 }, { "epoch": 0.38, "grad_norm": 0.8697827458381653, "learning_rate": 7.179449519971066e-06, "loss": 0.5761, "step": 5931 }, { "epoch": 0.38, "grad_norm": 0.8693752288818359, "learning_rate": 7.178526079784221e-06, "loss": 0.5969, "step": 5932 }, { "epoch": 0.38, "grad_norm": 0.9136356711387634, "learning_rate": 7.1776025478672225e-06, "loss": 0.6007, "step": 5933 }, { "epoch": 0.38, "grad_norm": 0.9643456339836121, "learning_rate": 7.176678924258955e-06, "loss": 0.6225, "step": 5934 }, { "epoch": 0.38, "grad_norm": 0.8101844191551208, "learning_rate": 7.175755208998311e-06, "loss": 0.5552, "step": 5935 }, { "epoch": 0.38, "grad_norm": 0.8744382262229919, "learning_rate": 7.174831402124184e-06, "loss": 0.6181, "step": 5936 }, { "epoch": 0.38, "grad_norm": 0.9439733624458313, "learning_rate": 7.173907503675472e-06, "loss": 0.6245, "step": 5937 }, { "epoch": 0.38, "grad_norm": 0.9993674159049988, "learning_rate": 7.172983513691076e-06, "loss": 0.633, "step": 5938 }, { "epoch": 0.38, "grad_norm": 0.9159564971923828, "learning_rate": 7.172059432209907e-06, "loss": 0.5969, "step": 5939 }, { "epoch": 0.38, "grad_norm": 0.9775694608688354, "learning_rate": 7.171135259270868e-06, "loss": 0.6291, "step": 5940 }, { "epoch": 0.38, "grad_norm": 0.8840250968933105, "learning_rate": 7.170210994912878e-06, "loss": 0.5855, "step": 5941 }, { "epoch": 0.38, "grad_norm": 0.8848263025283813, "learning_rate": 7.169286639174852e-06, "loss": 0.604, "step": 5942 }, { "epoch": 0.38, "grad_norm": 0.943367063999176, "learning_rate": 7.168362192095712e-06, "loss": 0.6189, "step": 5943 }, { "epoch": 0.38, "grad_norm": 1.0210529565811157, "learning_rate": 7.1674376537143845e-06, "loss": 0.6232, "step": 5944 }, { "epoch": 0.38, "grad_norm": 0.9326754212379456, "learning_rate": 7.166513024069797e-06, "loss": 0.6188, "step": 5945 }, { "epoch": 0.38, "grad_norm": 0.8790732622146606, "learning_rate": 7.16558830320088e-06, "loss": 0.628, "step": 5946 }, { "epoch": 0.38, "grad_norm": 0.8562813401222229, "learning_rate": 7.1646634911465765e-06, "loss": 0.5557, "step": 5947 }, { "epoch": 0.38, "grad_norm": 0.8628082871437073, "learning_rate": 7.163738587945822e-06, "loss": 0.5901, "step": 5948 }, { "epoch": 0.38, "grad_norm": 0.9249915480613708, "learning_rate": 7.162813593637563e-06, "loss": 0.5991, "step": 5949 }, { "epoch": 0.38, "grad_norm": 0.8744149208068848, "learning_rate": 7.161888508260748e-06, "loss": 0.6241, "step": 5950 }, { "epoch": 0.38, "grad_norm": 0.8531312942504883, "learning_rate": 7.160963331854327e-06, "loss": 0.5488, "step": 5951 }, { "epoch": 0.38, "grad_norm": 0.8790968060493469, "learning_rate": 7.16003806445726e-06, "loss": 0.5869, "step": 5952 }, { "epoch": 0.38, "grad_norm": 0.8855732679367065, "learning_rate": 7.159112706108502e-06, "loss": 0.5524, "step": 5953 }, { "epoch": 0.38, "grad_norm": 0.8487377166748047, "learning_rate": 7.15818725684702e-06, "loss": 0.6133, "step": 5954 }, { "epoch": 0.38, "grad_norm": 0.9325571060180664, "learning_rate": 7.15726171671178e-06, "loss": 0.6002, "step": 5955 }, { "epoch": 0.38, "grad_norm": 0.9158957600593567, "learning_rate": 7.156336085741755e-06, "loss": 0.6271, "step": 5956 }, { "epoch": 0.38, "grad_norm": 0.8471969962120056, "learning_rate": 7.155410363975916e-06, "loss": 0.5651, "step": 5957 }, { "epoch": 0.38, "grad_norm": 0.8656317591667175, "learning_rate": 7.154484551453247e-06, "loss": 0.6275, "step": 5958 }, { "epoch": 0.38, "grad_norm": 0.8509047627449036, "learning_rate": 7.1535586482127284e-06, "loss": 0.6528, "step": 5959 }, { "epoch": 0.38, "grad_norm": 0.8533027768135071, "learning_rate": 7.152632654293347e-06, "loss": 0.6032, "step": 5960 }, { "epoch": 0.38, "grad_norm": 0.9203348755836487, "learning_rate": 7.151706569734091e-06, "loss": 0.5851, "step": 5961 }, { "epoch": 0.38, "grad_norm": 0.7722728848457336, "learning_rate": 7.150780394573957e-06, "loss": 0.5788, "step": 5962 }, { "epoch": 0.38, "grad_norm": 0.873199462890625, "learning_rate": 7.149854128851945e-06, "loss": 0.5754, "step": 5963 }, { "epoch": 0.38, "grad_norm": 0.8286789059638977, "learning_rate": 7.148927772607053e-06, "loss": 0.5779, "step": 5964 }, { "epoch": 0.38, "grad_norm": 0.8518579602241516, "learning_rate": 7.148001325878287e-06, "loss": 0.5629, "step": 5965 }, { "epoch": 0.38, "grad_norm": 0.9076201319694519, "learning_rate": 7.147074788704659e-06, "loss": 0.608, "step": 5966 }, { "epoch": 0.38, "grad_norm": 0.9196124076843262, "learning_rate": 7.14614816112518e-06, "loss": 0.5983, "step": 5967 }, { "epoch": 0.38, "grad_norm": 0.8748944997787476, "learning_rate": 7.145221443178868e-06, "loss": 0.6094, "step": 5968 }, { "epoch": 0.38, "grad_norm": 0.9023792147636414, "learning_rate": 7.144294634904744e-06, "loss": 0.6188, "step": 5969 }, { "epoch": 0.38, "grad_norm": 0.9357802271842957, "learning_rate": 7.143367736341832e-06, "loss": 0.6842, "step": 5970 }, { "epoch": 0.38, "grad_norm": 0.9043236970901489, "learning_rate": 7.142440747529161e-06, "loss": 0.6354, "step": 5971 }, { "epoch": 0.38, "grad_norm": 0.9322927594184875, "learning_rate": 7.141513668505764e-06, "loss": 0.5922, "step": 5972 }, { "epoch": 0.38, "grad_norm": 0.8984158635139465, "learning_rate": 7.140586499310674e-06, "loss": 0.5912, "step": 5973 }, { "epoch": 0.38, "grad_norm": 0.8156484961509705, "learning_rate": 7.139659239982935e-06, "loss": 0.5413, "step": 5974 }, { "epoch": 0.38, "grad_norm": 0.8405022621154785, "learning_rate": 7.138731890561589e-06, "loss": 0.586, "step": 5975 }, { "epoch": 0.38, "grad_norm": 0.8600237965583801, "learning_rate": 7.1378044510856814e-06, "loss": 0.5976, "step": 5976 }, { "epoch": 0.38, "grad_norm": 0.8850138783454895, "learning_rate": 7.136876921594267e-06, "loss": 0.6245, "step": 5977 }, { "epoch": 0.38, "grad_norm": 0.9403291344642639, "learning_rate": 7.1359493021263986e-06, "loss": 0.6494, "step": 5978 }, { "epoch": 0.38, "grad_norm": 0.8556556701660156, "learning_rate": 7.135021592721134e-06, "loss": 0.5771, "step": 5979 }, { "epoch": 0.38, "grad_norm": 0.8727120757102966, "learning_rate": 7.134093793417539e-06, "loss": 0.6104, "step": 5980 }, { "epoch": 0.38, "grad_norm": 0.8781840205192566, "learning_rate": 7.133165904254677e-06, "loss": 0.5915, "step": 5981 }, { "epoch": 0.38, "grad_norm": 0.9176463484764099, "learning_rate": 7.132237925271621e-06, "loss": 0.5915, "step": 5982 }, { "epoch": 0.38, "grad_norm": 0.8665004968643188, "learning_rate": 7.131309856507444e-06, "loss": 0.6643, "step": 5983 }, { "epoch": 0.38, "grad_norm": 0.9312930107116699, "learning_rate": 7.13038169800122e-06, "loss": 0.6334, "step": 5984 }, { "epoch": 0.38, "grad_norm": 0.9924306869506836, "learning_rate": 7.129453449792036e-06, "loss": 0.6339, "step": 5985 }, { "epoch": 0.38, "grad_norm": 0.8924956917762756, "learning_rate": 7.1285251119189754e-06, "loss": 0.5738, "step": 5986 }, { "epoch": 0.38, "grad_norm": 0.997128963470459, "learning_rate": 7.127596684421127e-06, "loss": 0.6045, "step": 5987 }, { "epoch": 0.38, "grad_norm": 0.8882451057434082, "learning_rate": 7.126668167337583e-06, "loss": 0.589, "step": 5988 }, { "epoch": 0.38, "grad_norm": 0.855974018573761, "learning_rate": 7.12573956070744e-06, "loss": 0.6437, "step": 5989 }, { "epoch": 0.38, "grad_norm": 0.885186493396759, "learning_rate": 7.1248108645698e-06, "loss": 0.6057, "step": 5990 }, { "epoch": 0.38, "grad_norm": 0.8319755792617798, "learning_rate": 7.123882078963766e-06, "loss": 0.5789, "step": 5991 }, { "epoch": 0.38, "grad_norm": 0.8926076292991638, "learning_rate": 7.1229532039284455e-06, "loss": 0.637, "step": 5992 }, { "epoch": 0.38, "grad_norm": 0.9193412661552429, "learning_rate": 7.122024239502951e-06, "loss": 0.5881, "step": 5993 }, { "epoch": 0.38, "grad_norm": 0.9050919413566589, "learning_rate": 7.121095185726399e-06, "loss": 0.6494, "step": 5994 }, { "epoch": 0.38, "grad_norm": 0.8967909812927246, "learning_rate": 7.120166042637906e-06, "loss": 0.6335, "step": 5995 }, { "epoch": 0.38, "grad_norm": 0.8294476866722107, "learning_rate": 7.119236810276598e-06, "loss": 0.5503, "step": 5996 }, { "epoch": 0.38, "grad_norm": 0.8650161027908325, "learning_rate": 7.118307488681598e-06, "loss": 0.6328, "step": 5997 }, { "epoch": 0.38, "grad_norm": 0.8785965442657471, "learning_rate": 7.11737807789204e-06, "loss": 0.58, "step": 5998 }, { "epoch": 0.38, "grad_norm": 0.9463037252426147, "learning_rate": 7.116448577947057e-06, "loss": 0.5731, "step": 5999 }, { "epoch": 0.38, "grad_norm": 0.8291397094726562, "learning_rate": 7.115518988885785e-06, "loss": 0.5948, "step": 6000 }, { "epoch": 0.38, "grad_norm": 0.9187091588973999, "learning_rate": 7.114589310747371e-06, "loss": 0.6384, "step": 6001 }, { "epoch": 0.38, "grad_norm": 0.8593400716781616, "learning_rate": 7.113659543570956e-06, "loss": 0.625, "step": 6002 }, { "epoch": 0.38, "grad_norm": 0.8747579455375671, "learning_rate": 7.11272968739569e-06, "loss": 0.5569, "step": 6003 }, { "epoch": 0.38, "grad_norm": 0.8783309459686279, "learning_rate": 7.1117997422607264e-06, "loss": 0.5986, "step": 6004 }, { "epoch": 0.38, "grad_norm": 0.8772686123847961, "learning_rate": 7.110869708205224e-06, "loss": 0.5752, "step": 6005 }, { "epoch": 0.38, "grad_norm": 0.8766029477119446, "learning_rate": 7.109939585268339e-06, "loss": 0.6299, "step": 6006 }, { "epoch": 0.38, "grad_norm": 0.8981195688247681, "learning_rate": 7.109009373489239e-06, "loss": 0.6076, "step": 6007 }, { "epoch": 0.38, "grad_norm": 0.8908311128616333, "learning_rate": 7.10807907290709e-06, "loss": 0.651, "step": 6008 }, { "epoch": 0.38, "grad_norm": 0.9420418739318848, "learning_rate": 7.107148683561066e-06, "loss": 0.5797, "step": 6009 }, { "epoch": 0.38, "grad_norm": 0.9113646149635315, "learning_rate": 7.106218205490342e-06, "loss": 0.6277, "step": 6010 }, { "epoch": 0.38, "grad_norm": 0.9516562223434448, "learning_rate": 7.105287638734093e-06, "loss": 0.6429, "step": 6011 }, { "epoch": 0.38, "grad_norm": 0.9115347862243652, "learning_rate": 7.104356983331509e-06, "loss": 0.6106, "step": 6012 }, { "epoch": 0.38, "grad_norm": 0.9286765456199646, "learning_rate": 7.1034262393217705e-06, "loss": 0.5787, "step": 6013 }, { "epoch": 0.38, "grad_norm": 0.9642840027809143, "learning_rate": 7.1024954067440725e-06, "loss": 0.6729, "step": 6014 }, { "epoch": 0.38, "grad_norm": 0.9277244806289673, "learning_rate": 7.101564485637603e-06, "loss": 0.601, "step": 6015 }, { "epoch": 0.38, "grad_norm": 0.8856588006019592, "learning_rate": 7.1006334760415674e-06, "loss": 0.6001, "step": 6016 }, { "epoch": 0.38, "grad_norm": 0.916569709777832, "learning_rate": 7.0997023779951625e-06, "loss": 0.6309, "step": 6017 }, { "epoch": 0.38, "grad_norm": 0.9436630606651306, "learning_rate": 7.098771191537596e-06, "loss": 0.6716, "step": 6018 }, { "epoch": 0.38, "grad_norm": 0.897139847278595, "learning_rate": 7.097839916708073e-06, "loss": 0.6179, "step": 6019 }, { "epoch": 0.38, "grad_norm": 1.0071852207183838, "learning_rate": 7.096908553545812e-06, "loss": 0.627, "step": 6020 }, { "epoch": 0.38, "grad_norm": 0.830710232257843, "learning_rate": 7.095977102090025e-06, "loss": 0.6087, "step": 6021 }, { "epoch": 0.38, "grad_norm": 0.9118586182594299, "learning_rate": 7.095045562379934e-06, "loss": 0.5829, "step": 6022 }, { "epoch": 0.38, "grad_norm": 0.8319807052612305, "learning_rate": 7.0941139344547605e-06, "loss": 0.5733, "step": 6023 }, { "epoch": 0.38, "grad_norm": 0.8906463980674744, "learning_rate": 7.093182218353737e-06, "loss": 0.6338, "step": 6024 }, { "epoch": 0.38, "grad_norm": 0.8869120478630066, "learning_rate": 7.092250414116091e-06, "loss": 0.5613, "step": 6025 }, { "epoch": 0.38, "grad_norm": 0.8718534111976624, "learning_rate": 7.091318521781058e-06, "loss": 0.5957, "step": 6026 }, { "epoch": 0.38, "grad_norm": 0.8886241912841797, "learning_rate": 7.090386541387878e-06, "loss": 0.6346, "step": 6027 }, { "epoch": 0.38, "grad_norm": 0.8198200464248657, "learning_rate": 7.089454472975792e-06, "loss": 0.5945, "step": 6028 }, { "epoch": 0.38, "grad_norm": 0.8481683135032654, "learning_rate": 7.088522316584048e-06, "loss": 0.5899, "step": 6029 }, { "epoch": 0.38, "grad_norm": 0.8683075904846191, "learning_rate": 7.087590072251893e-06, "loss": 0.5804, "step": 6030 }, { "epoch": 0.38, "grad_norm": 0.8363116383552551, "learning_rate": 7.086657740018582e-06, "loss": 0.6085, "step": 6031 }, { "epoch": 0.38, "grad_norm": 0.8278794288635254, "learning_rate": 7.085725319923373e-06, "loss": 0.5597, "step": 6032 }, { "epoch": 0.38, "grad_norm": 0.9533769488334656, "learning_rate": 7.084792812005528e-06, "loss": 0.6417, "step": 6033 }, { "epoch": 0.38, "grad_norm": 0.9329741597175598, "learning_rate": 7.083860216304309e-06, "loss": 0.6205, "step": 6034 }, { "epoch": 0.38, "grad_norm": 0.9326625466346741, "learning_rate": 7.082927532858985e-06, "loss": 0.5771, "step": 6035 }, { "epoch": 0.38, "grad_norm": 0.9433557987213135, "learning_rate": 7.0819947617088294e-06, "loss": 0.5943, "step": 6036 }, { "epoch": 0.38, "grad_norm": 0.9084176421165466, "learning_rate": 7.081061902893117e-06, "loss": 0.6308, "step": 6037 }, { "epoch": 0.38, "grad_norm": 1.0079909563064575, "learning_rate": 7.080128956451125e-06, "loss": 0.5854, "step": 6038 }, { "epoch": 0.38, "grad_norm": 0.9684156179428101, "learning_rate": 7.079195922422143e-06, "loss": 0.6058, "step": 6039 }, { "epoch": 0.38, "grad_norm": 0.8209320902824402, "learning_rate": 7.078262800845453e-06, "loss": 0.5948, "step": 6040 }, { "epoch": 0.38, "grad_norm": 0.8796716928482056, "learning_rate": 7.0773295917603445e-06, "loss": 0.5924, "step": 6041 }, { "epoch": 0.38, "grad_norm": 0.8752491474151611, "learning_rate": 7.076396295206113e-06, "loss": 0.5695, "step": 6042 }, { "epoch": 0.38, "grad_norm": 0.9148269891738892, "learning_rate": 7.075462911222057e-06, "loss": 0.5703, "step": 6043 }, { "epoch": 0.38, "grad_norm": 0.8726043701171875, "learning_rate": 7.07452943984748e-06, "loss": 0.5915, "step": 6044 }, { "epoch": 0.38, "grad_norm": 0.8048043847084045, "learning_rate": 7.073595881121683e-06, "loss": 0.5756, "step": 6045 }, { "epoch": 0.38, "grad_norm": 0.9457216262817383, "learning_rate": 7.072662235083977e-06, "loss": 0.6594, "step": 6046 }, { "epoch": 0.38, "grad_norm": 0.9144176840782166, "learning_rate": 7.071728501773675e-06, "loss": 0.6171, "step": 6047 }, { "epoch": 0.38, "grad_norm": 0.9629214406013489, "learning_rate": 7.070794681230093e-06, "loss": 0.6228, "step": 6048 }, { "epoch": 0.38, "grad_norm": 0.9110321402549744, "learning_rate": 7.06986077349255e-06, "loss": 0.6194, "step": 6049 }, { "epoch": 0.38, "grad_norm": 0.8530512452125549, "learning_rate": 7.068926778600372e-06, "loss": 0.6007, "step": 6050 }, { "epoch": 0.38, "grad_norm": 0.8579297661781311, "learning_rate": 7.067992696592882e-06, "loss": 0.6179, "step": 6051 }, { "epoch": 0.38, "grad_norm": 0.9101974368095398, "learning_rate": 7.067058527509416e-06, "loss": 0.5758, "step": 6052 }, { "epoch": 0.38, "grad_norm": 0.92631995677948, "learning_rate": 7.066124271389305e-06, "loss": 0.5397, "step": 6053 }, { "epoch": 0.38, "grad_norm": 0.8497442603111267, "learning_rate": 7.0651899282718896e-06, "loss": 0.64, "step": 6054 }, { "epoch": 0.38, "grad_norm": 0.9552360773086548, "learning_rate": 7.064255498196509e-06, "loss": 0.6108, "step": 6055 }, { "epoch": 0.38, "grad_norm": 0.9272350668907166, "learning_rate": 7.0633209812025116e-06, "loss": 0.6146, "step": 6056 }, { "epoch": 0.38, "grad_norm": 0.9053919315338135, "learning_rate": 7.062386377329245e-06, "loss": 0.5826, "step": 6057 }, { "epoch": 0.38, "grad_norm": 0.9134330749511719, "learning_rate": 7.061451686616062e-06, "loss": 0.5864, "step": 6058 }, { "epoch": 0.38, "grad_norm": 1.0126466751098633, "learning_rate": 7.0605169091023205e-06, "loss": 0.6486, "step": 6059 }, { "epoch": 0.38, "grad_norm": 0.9160744547843933, "learning_rate": 7.05958204482738e-06, "loss": 0.5698, "step": 6060 }, { "epoch": 0.38, "grad_norm": 0.924263060092926, "learning_rate": 7.058647093830604e-06, "loss": 0.6247, "step": 6061 }, { "epoch": 0.38, "grad_norm": 0.8898268342018127, "learning_rate": 7.0577120561513604e-06, "loss": 0.6066, "step": 6062 }, { "epoch": 0.38, "grad_norm": 0.887617826461792, "learning_rate": 7.056776931829021e-06, "loss": 0.5524, "step": 6063 }, { "epoch": 0.38, "grad_norm": 0.899122416973114, "learning_rate": 7.055841720902959e-06, "loss": 0.5709, "step": 6064 }, { "epoch": 0.38, "grad_norm": 0.9129178524017334, "learning_rate": 7.054906423412554e-06, "loss": 0.6503, "step": 6065 }, { "epoch": 0.38, "grad_norm": 0.9189284443855286, "learning_rate": 7.053971039397188e-06, "loss": 0.645, "step": 6066 }, { "epoch": 0.38, "grad_norm": 0.9444376230239868, "learning_rate": 7.0530355688962484e-06, "loss": 0.6305, "step": 6067 }, { "epoch": 0.38, "grad_norm": 0.8827232122421265, "learning_rate": 7.0521000119491215e-06, "loss": 0.5677, "step": 6068 }, { "epoch": 0.38, "grad_norm": 0.918749213218689, "learning_rate": 7.0511643685952014e-06, "loss": 0.6198, "step": 6069 }, { "epoch": 0.38, "grad_norm": 0.8609430193901062, "learning_rate": 7.050228638873886e-06, "loss": 0.5669, "step": 6070 }, { "epoch": 0.38, "grad_norm": 0.8873887658119202, "learning_rate": 7.049292822824575e-06, "loss": 0.6614, "step": 6071 }, { "epoch": 0.38, "grad_norm": 0.8995460271835327, "learning_rate": 7.048356920486672e-06, "loss": 0.6257, "step": 6072 }, { "epoch": 0.38, "grad_norm": 0.8707825541496277, "learning_rate": 7.047420931899585e-06, "loss": 0.5955, "step": 6073 }, { "epoch": 0.38, "grad_norm": 0.8987425565719604, "learning_rate": 7.0464848571027246e-06, "loss": 0.612, "step": 6074 }, { "epoch": 0.38, "grad_norm": 0.9181625247001648, "learning_rate": 7.045548696135506e-06, "loss": 0.5886, "step": 6075 }, { "epoch": 0.38, "grad_norm": 0.9375488758087158, "learning_rate": 7.044612449037348e-06, "loss": 0.6653, "step": 6076 }, { "epoch": 0.39, "grad_norm": 0.8560453057289124, "learning_rate": 7.0436761158476715e-06, "loss": 0.601, "step": 6077 }, { "epoch": 0.39, "grad_norm": 0.8806048035621643, "learning_rate": 7.042739696605905e-06, "loss": 0.588, "step": 6078 }, { "epoch": 0.39, "grad_norm": 0.8374508619308472, "learning_rate": 7.041803191351475e-06, "loss": 0.5957, "step": 6079 }, { "epoch": 0.39, "grad_norm": 0.888600766658783, "learning_rate": 7.040866600123816e-06, "loss": 0.6332, "step": 6080 }, { "epoch": 0.39, "grad_norm": 0.9433616399765015, "learning_rate": 7.039929922962363e-06, "loss": 0.6253, "step": 6081 }, { "epoch": 0.39, "grad_norm": 0.920203447341919, "learning_rate": 7.038993159906558e-06, "loss": 0.6191, "step": 6082 }, { "epoch": 0.39, "grad_norm": 0.8820478916168213, "learning_rate": 7.0380563109958445e-06, "loss": 0.5925, "step": 6083 }, { "epoch": 0.39, "grad_norm": 0.8885151147842407, "learning_rate": 7.03711937626967e-06, "loss": 0.6106, "step": 6084 }, { "epoch": 0.39, "grad_norm": 0.8442419767379761, "learning_rate": 7.036182355767485e-06, "loss": 0.5689, "step": 6085 }, { "epoch": 0.39, "grad_norm": 0.8483255505561829, "learning_rate": 7.0352452495287435e-06, "loss": 0.594, "step": 6086 }, { "epoch": 0.39, "grad_norm": 0.8738897442817688, "learning_rate": 7.034308057592907e-06, "loss": 0.5607, "step": 6087 }, { "epoch": 0.39, "grad_norm": 0.8586130738258362, "learning_rate": 7.033370779999431e-06, "loss": 0.641, "step": 6088 }, { "epoch": 0.39, "grad_norm": 0.8719096183776855, "learning_rate": 7.032433416787788e-06, "loss": 0.6118, "step": 6089 }, { "epoch": 0.39, "grad_norm": 0.8584408164024353, "learning_rate": 7.031495967997444e-06, "loss": 0.5459, "step": 6090 }, { "epoch": 0.39, "grad_norm": 0.8801223635673523, "learning_rate": 7.0305584336678715e-06, "loss": 0.5927, "step": 6091 }, { "epoch": 0.39, "grad_norm": 0.7907819151878357, "learning_rate": 7.029620813838544e-06, "loss": 0.5669, "step": 6092 }, { "epoch": 0.39, "grad_norm": 0.8615099191665649, "learning_rate": 7.02868310854895e-06, "loss": 0.5749, "step": 6093 }, { "epoch": 0.39, "grad_norm": 0.8567502498626709, "learning_rate": 7.027745317838564e-06, "loss": 0.6005, "step": 6094 }, { "epoch": 0.39, "grad_norm": 0.8757819533348083, "learning_rate": 7.026807441746879e-06, "loss": 0.6121, "step": 6095 }, { "epoch": 0.39, "grad_norm": 0.8814988732337952, "learning_rate": 7.025869480313381e-06, "loss": 0.6107, "step": 6096 }, { "epoch": 0.39, "grad_norm": 0.8909090757369995, "learning_rate": 7.0249314335775675e-06, "loss": 0.5844, "step": 6097 }, { "epoch": 0.39, "grad_norm": 0.8157296776771545, "learning_rate": 7.023993301578935e-06, "loss": 0.5491, "step": 6098 }, { "epoch": 0.39, "grad_norm": 0.8694610595703125, "learning_rate": 7.023055084356987e-06, "loss": 0.5522, "step": 6099 }, { "epoch": 0.39, "grad_norm": 0.863211989402771, "learning_rate": 7.022116781951226e-06, "loss": 0.6082, "step": 6100 }, { "epoch": 0.39, "grad_norm": 0.8649691939353943, "learning_rate": 7.021178394401162e-06, "loss": 0.6182, "step": 6101 }, { "epoch": 0.39, "grad_norm": 0.8408727049827576, "learning_rate": 7.020239921746308e-06, "loss": 0.6067, "step": 6102 }, { "epoch": 0.39, "grad_norm": 0.920093297958374, "learning_rate": 7.019301364026178e-06, "loss": 0.6312, "step": 6103 }, { "epoch": 0.39, "grad_norm": 0.9169816374778748, "learning_rate": 7.018362721280292e-06, "loss": 0.5826, "step": 6104 }, { "epoch": 0.39, "grad_norm": 0.9006035327911377, "learning_rate": 7.0174239935481735e-06, "loss": 0.6304, "step": 6105 }, { "epoch": 0.39, "grad_norm": 0.8806290626525879, "learning_rate": 7.016485180869349e-06, "loss": 0.582, "step": 6106 }, { "epoch": 0.39, "grad_norm": 0.9303503036499023, "learning_rate": 7.015546283283346e-06, "loss": 0.5657, "step": 6107 }, { "epoch": 0.39, "grad_norm": 0.9579445123672485, "learning_rate": 7.014607300829703e-06, "loss": 0.6414, "step": 6108 }, { "epoch": 0.39, "grad_norm": 0.8906927108764648, "learning_rate": 7.013668233547955e-06, "loss": 0.5922, "step": 6109 }, { "epoch": 0.39, "grad_norm": 0.866254448890686, "learning_rate": 7.0127290814776424e-06, "loss": 0.5812, "step": 6110 }, { "epoch": 0.39, "grad_norm": 0.9502847194671631, "learning_rate": 7.0117898446583084e-06, "loss": 0.6458, "step": 6111 }, { "epoch": 0.39, "grad_norm": 0.8791959285736084, "learning_rate": 7.010850523129504e-06, "loss": 0.5782, "step": 6112 }, { "epoch": 0.39, "grad_norm": 0.8798953294754028, "learning_rate": 7.009911116930779e-06, "loss": 0.6134, "step": 6113 }, { "epoch": 0.39, "grad_norm": 0.9204188585281372, "learning_rate": 7.00897162610169e-06, "loss": 0.6289, "step": 6114 }, { "epoch": 0.39, "grad_norm": 0.9081289172172546, "learning_rate": 7.0080320506817926e-06, "loss": 0.5684, "step": 6115 }, { "epoch": 0.39, "grad_norm": 0.8632351160049438, "learning_rate": 7.007092390710652e-06, "loss": 0.5876, "step": 6116 }, { "epoch": 0.39, "grad_norm": 0.8665913939476013, "learning_rate": 7.006152646227833e-06, "loss": 0.5125, "step": 6117 }, { "epoch": 0.39, "grad_norm": 0.8278458118438721, "learning_rate": 7.005212817272905e-06, "loss": 0.6409, "step": 6118 }, { "epoch": 0.39, "grad_norm": 0.9356765151023865, "learning_rate": 7.0042729038854405e-06, "loss": 0.6375, "step": 6119 }, { "epoch": 0.39, "grad_norm": 0.8514903783798218, "learning_rate": 7.003332906105016e-06, "loss": 0.6365, "step": 6120 }, { "epoch": 0.39, "grad_norm": 0.9448802471160889, "learning_rate": 7.002392823971214e-06, "loss": 0.6518, "step": 6121 }, { "epoch": 0.39, "grad_norm": 0.8504220247268677, "learning_rate": 7.001452657523614e-06, "loss": 0.503, "step": 6122 }, { "epoch": 0.39, "grad_norm": 0.9173238277435303, "learning_rate": 7.000512406801805e-06, "loss": 0.6124, "step": 6123 }, { "epoch": 0.39, "grad_norm": 0.8627074956893921, "learning_rate": 6.9995720718453786e-06, "loss": 0.5751, "step": 6124 }, { "epoch": 0.39, "grad_norm": 0.997188925743103, "learning_rate": 6.998631652693928e-06, "loss": 0.6455, "step": 6125 }, { "epoch": 0.39, "grad_norm": 0.8653777837753296, "learning_rate": 6.997691149387052e-06, "loss": 0.5966, "step": 6126 }, { "epoch": 0.39, "grad_norm": 0.8478190302848816, "learning_rate": 6.99675056196435e-06, "loss": 0.5918, "step": 6127 }, { "epoch": 0.39, "grad_norm": 0.8888818025588989, "learning_rate": 6.995809890465428e-06, "loss": 0.5978, "step": 6128 }, { "epoch": 0.39, "grad_norm": 0.8966024518013, "learning_rate": 6.994869134929895e-06, "loss": 0.6194, "step": 6129 }, { "epoch": 0.39, "grad_norm": 0.8759685158729553, "learning_rate": 6.993928295397363e-06, "loss": 0.6068, "step": 6130 }, { "epoch": 0.39, "grad_norm": 0.8871753215789795, "learning_rate": 6.992987371907446e-06, "loss": 0.6187, "step": 6131 }, { "epoch": 0.39, "grad_norm": 0.8602596521377563, "learning_rate": 6.992046364499764e-06, "loss": 0.5815, "step": 6132 }, { "epoch": 0.39, "grad_norm": 0.8757937550544739, "learning_rate": 6.991105273213939e-06, "loss": 0.5496, "step": 6133 }, { "epoch": 0.39, "grad_norm": 0.8693877458572388, "learning_rate": 6.990164098089598e-06, "loss": 0.6058, "step": 6134 }, { "epoch": 0.39, "grad_norm": 0.8464959263801575, "learning_rate": 6.9892228391663694e-06, "loss": 0.5767, "step": 6135 }, { "epoch": 0.39, "grad_norm": 0.8602965474128723, "learning_rate": 6.988281496483888e-06, "loss": 0.6125, "step": 6136 }, { "epoch": 0.39, "grad_norm": 0.9073672294616699, "learning_rate": 6.987340070081789e-06, "loss": 0.6005, "step": 6137 }, { "epoch": 0.39, "grad_norm": 0.9364018440246582, "learning_rate": 6.986398559999714e-06, "loss": 0.5963, "step": 6138 }, { "epoch": 0.39, "grad_norm": 0.875133216381073, "learning_rate": 6.9854569662773044e-06, "loss": 0.5463, "step": 6139 }, { "epoch": 0.39, "grad_norm": 0.934817373752594, "learning_rate": 6.984515288954211e-06, "loss": 0.6034, "step": 6140 }, { "epoch": 0.39, "grad_norm": 0.9065064191818237, "learning_rate": 6.98357352807008e-06, "loss": 0.5662, "step": 6141 }, { "epoch": 0.39, "grad_norm": 0.813168466091156, "learning_rate": 6.982631683664569e-06, "loss": 0.5632, "step": 6142 }, { "epoch": 0.39, "grad_norm": 0.8873375654220581, "learning_rate": 6.981689755777335e-06, "loss": 0.5367, "step": 6143 }, { "epoch": 0.39, "grad_norm": 0.8773168325424194, "learning_rate": 6.98074774444804e-06, "loss": 0.5601, "step": 6144 }, { "epoch": 0.39, "grad_norm": 0.8021374344825745, "learning_rate": 6.979805649716347e-06, "loss": 0.5076, "step": 6145 }, { "epoch": 0.39, "grad_norm": 0.8933539986610413, "learning_rate": 6.978863471621925e-06, "loss": 0.6894, "step": 6146 }, { "epoch": 0.39, "grad_norm": 0.8887168169021606, "learning_rate": 6.977921210204446e-06, "loss": 0.647, "step": 6147 }, { "epoch": 0.39, "grad_norm": 0.8803666234016418, "learning_rate": 6.9769788655035875e-06, "loss": 0.5892, "step": 6148 }, { "epoch": 0.39, "grad_norm": 0.9113365411758423, "learning_rate": 6.976036437559024e-06, "loss": 0.6732, "step": 6149 }, { "epoch": 0.39, "grad_norm": 0.8204461932182312, "learning_rate": 6.975093926410441e-06, "loss": 0.5916, "step": 6150 }, { "epoch": 0.39, "grad_norm": 0.934197187423706, "learning_rate": 6.974151332097525e-06, "loss": 0.6305, "step": 6151 }, { "epoch": 0.39, "grad_norm": 0.9386470913887024, "learning_rate": 6.973208654659962e-06, "loss": 0.6485, "step": 6152 }, { "epoch": 0.39, "grad_norm": 0.9400019645690918, "learning_rate": 6.9722658941374475e-06, "loss": 0.5726, "step": 6153 }, { "epoch": 0.39, "grad_norm": 0.8022521734237671, "learning_rate": 6.971323050569677e-06, "loss": 0.593, "step": 6154 }, { "epoch": 0.39, "grad_norm": 0.8721299171447754, "learning_rate": 6.970380123996352e-06, "loss": 0.5738, "step": 6155 }, { "epoch": 0.39, "grad_norm": 0.9494243264198303, "learning_rate": 6.969437114457174e-06, "loss": 0.6282, "step": 6156 }, { "epoch": 0.39, "grad_norm": 0.8277761936187744, "learning_rate": 6.968494021991848e-06, "loss": 0.5913, "step": 6157 }, { "epoch": 0.39, "grad_norm": 0.854987621307373, "learning_rate": 6.967550846640089e-06, "loss": 0.5491, "step": 6158 }, { "epoch": 0.39, "grad_norm": 0.9130845665931702, "learning_rate": 6.966607588441609e-06, "loss": 0.6274, "step": 6159 }, { "epoch": 0.39, "grad_norm": 0.8112385869026184, "learning_rate": 6.9656642474361225e-06, "loss": 0.5309, "step": 6160 }, { "epoch": 0.39, "grad_norm": 0.8674074411392212, "learning_rate": 6.964720823663353e-06, "loss": 0.6072, "step": 6161 }, { "epoch": 0.39, "grad_norm": 0.9010210633277893, "learning_rate": 6.963777317163025e-06, "loss": 0.604, "step": 6162 }, { "epoch": 0.39, "grad_norm": 0.8281999230384827, "learning_rate": 6.962833727974867e-06, "loss": 0.5805, "step": 6163 }, { "epoch": 0.39, "grad_norm": 0.879539966583252, "learning_rate": 6.961890056138607e-06, "loss": 0.5993, "step": 6164 }, { "epoch": 0.39, "grad_norm": 0.9275795221328735, "learning_rate": 6.9609463016939816e-06, "loss": 0.6101, "step": 6165 }, { "epoch": 0.39, "grad_norm": 0.8362293839454651, "learning_rate": 6.960002464680731e-06, "loss": 0.5565, "step": 6166 }, { "epoch": 0.39, "grad_norm": 0.8443682193756104, "learning_rate": 6.959058545138593e-06, "loss": 0.5736, "step": 6167 }, { "epoch": 0.39, "grad_norm": 0.9468548893928528, "learning_rate": 6.958114543107315e-06, "loss": 0.6321, "step": 6168 }, { "epoch": 0.39, "grad_norm": 0.8098998069763184, "learning_rate": 6.957170458626645e-06, "loss": 0.552, "step": 6169 }, { "epoch": 0.39, "grad_norm": 0.9221862554550171, "learning_rate": 6.956226291736338e-06, "loss": 0.6174, "step": 6170 }, { "epoch": 0.39, "grad_norm": 0.8823233246803284, "learning_rate": 6.955282042476144e-06, "loss": 0.5788, "step": 6171 }, { "epoch": 0.39, "grad_norm": 0.8700152039527893, "learning_rate": 6.9543377108858265e-06, "loss": 0.6143, "step": 6172 }, { "epoch": 0.39, "grad_norm": 0.866326093673706, "learning_rate": 6.9533932970051465e-06, "loss": 0.586, "step": 6173 }, { "epoch": 0.39, "grad_norm": 0.9445212483406067, "learning_rate": 6.952448800873871e-06, "loss": 0.6754, "step": 6174 }, { "epoch": 0.39, "grad_norm": 0.9050667881965637, "learning_rate": 6.951504222531768e-06, "loss": 0.6266, "step": 6175 }, { "epoch": 0.39, "grad_norm": 0.8842514157295227, "learning_rate": 6.950559562018611e-06, "loss": 0.6103, "step": 6176 }, { "epoch": 0.39, "grad_norm": 0.8354772329330444, "learning_rate": 6.949614819374175e-06, "loss": 0.5891, "step": 6177 }, { "epoch": 0.39, "grad_norm": 0.8761371970176697, "learning_rate": 6.948669994638243e-06, "loss": 0.6099, "step": 6178 }, { "epoch": 0.39, "grad_norm": 0.827156126499176, "learning_rate": 6.947725087850595e-06, "loss": 0.5347, "step": 6179 }, { "epoch": 0.39, "grad_norm": 0.8923287987709045, "learning_rate": 6.94678009905102e-06, "loss": 0.5873, "step": 6180 }, { "epoch": 0.39, "grad_norm": 0.865619421005249, "learning_rate": 6.945835028279308e-06, "loss": 0.6504, "step": 6181 }, { "epoch": 0.39, "grad_norm": 0.8588405251502991, "learning_rate": 6.944889875575251e-06, "loss": 0.5939, "step": 6182 }, { "epoch": 0.39, "grad_norm": 0.8965503573417664, "learning_rate": 6.943944640978648e-06, "loss": 0.6188, "step": 6183 }, { "epoch": 0.39, "grad_norm": 0.8754391670227051, "learning_rate": 6.942999324529297e-06, "loss": 0.5729, "step": 6184 }, { "epoch": 0.39, "grad_norm": 0.873710036277771, "learning_rate": 6.942053926267005e-06, "loss": 0.5963, "step": 6185 }, { "epoch": 0.39, "grad_norm": 0.8937984704971313, "learning_rate": 6.941108446231578e-06, "loss": 0.5968, "step": 6186 }, { "epoch": 0.39, "grad_norm": 0.8646506071090698, "learning_rate": 6.940162884462828e-06, "loss": 0.5911, "step": 6187 }, { "epoch": 0.39, "grad_norm": 0.8940115571022034, "learning_rate": 6.9392172410005656e-06, "loss": 0.6188, "step": 6188 }, { "epoch": 0.39, "grad_norm": 0.8401895761489868, "learning_rate": 6.9382715158846135e-06, "loss": 0.5936, "step": 6189 }, { "epoch": 0.39, "grad_norm": 0.8863813281059265, "learning_rate": 6.93732570915479e-06, "loss": 0.5897, "step": 6190 }, { "epoch": 0.39, "grad_norm": 0.9222760796546936, "learning_rate": 6.93637982085092e-06, "loss": 0.6047, "step": 6191 }, { "epoch": 0.39, "grad_norm": 0.8968461751937866, "learning_rate": 6.9354338510128315e-06, "loss": 0.5943, "step": 6192 }, { "epoch": 0.39, "grad_norm": 0.9590244293212891, "learning_rate": 6.934487799680357e-06, "loss": 0.6274, "step": 6193 }, { "epoch": 0.39, "grad_norm": 0.8756579756736755, "learning_rate": 6.933541666893331e-06, "loss": 0.6139, "step": 6194 }, { "epoch": 0.39, "grad_norm": 0.921607494354248, "learning_rate": 6.932595452691592e-06, "loss": 0.64, "step": 6195 }, { "epoch": 0.39, "grad_norm": 0.8667705059051514, "learning_rate": 6.9316491571149815e-06, "loss": 0.6098, "step": 6196 }, { "epoch": 0.39, "grad_norm": 0.8910043835639954, "learning_rate": 6.930702780203344e-06, "loss": 0.6432, "step": 6197 }, { "epoch": 0.39, "grad_norm": 0.9581403732299805, "learning_rate": 6.929756321996529e-06, "loss": 0.6453, "step": 6198 }, { "epoch": 0.39, "grad_norm": 0.8930731415748596, "learning_rate": 6.928809782534388e-06, "loss": 0.6059, "step": 6199 }, { "epoch": 0.39, "grad_norm": 0.9078335762023926, "learning_rate": 6.927863161856778e-06, "loss": 0.5956, "step": 6200 }, { "epoch": 0.39, "grad_norm": 0.8804222345352173, "learning_rate": 6.9269164600035555e-06, "loss": 0.5862, "step": 6201 }, { "epoch": 0.39, "grad_norm": 0.8888744711875916, "learning_rate": 6.925969677014585e-06, "loss": 0.6367, "step": 6202 }, { "epoch": 0.39, "grad_norm": 0.9415931105613708, "learning_rate": 6.92502281292973e-06, "loss": 0.5966, "step": 6203 }, { "epoch": 0.39, "grad_norm": 0.8707212209701538, "learning_rate": 6.924075867788863e-06, "loss": 0.6106, "step": 6204 }, { "epoch": 0.39, "grad_norm": 0.866563081741333, "learning_rate": 6.923128841631854e-06, "loss": 0.5493, "step": 6205 }, { "epoch": 0.39, "grad_norm": 0.9359866976737976, "learning_rate": 6.92218173449858e-06, "loss": 0.5749, "step": 6206 }, { "epoch": 0.39, "grad_norm": 0.9220528602600098, "learning_rate": 6.921234546428918e-06, "loss": 0.5909, "step": 6207 }, { "epoch": 0.39, "grad_norm": 0.8486345410346985, "learning_rate": 6.920287277462755e-06, "loss": 0.5765, "step": 6208 }, { "epoch": 0.39, "grad_norm": 0.8374233245849609, "learning_rate": 6.9193399276399745e-06, "loss": 0.5556, "step": 6209 }, { "epoch": 0.39, "grad_norm": 0.8650535345077515, "learning_rate": 6.918392497000466e-06, "loss": 0.6162, "step": 6210 }, { "epoch": 0.39, "grad_norm": 0.8010015487670898, "learning_rate": 6.917444985584122e-06, "loss": 0.5534, "step": 6211 }, { "epoch": 0.39, "grad_norm": 0.888006865978241, "learning_rate": 6.916497393430841e-06, "loss": 0.6161, "step": 6212 }, { "epoch": 0.39, "grad_norm": 0.8319229483604431, "learning_rate": 6.915549720580523e-06, "loss": 0.5842, "step": 6213 }, { "epoch": 0.39, "grad_norm": 0.8947864174842834, "learning_rate": 6.914601967073068e-06, "loss": 0.5607, "step": 6214 }, { "epoch": 0.39, "grad_norm": 0.9286026358604431, "learning_rate": 6.913654132948385e-06, "loss": 0.6001, "step": 6215 }, { "epoch": 0.39, "grad_norm": 0.8386892676353455, "learning_rate": 6.912706218246384e-06, "loss": 0.5296, "step": 6216 }, { "epoch": 0.39, "grad_norm": 0.8397946357727051, "learning_rate": 6.911758223006979e-06, "loss": 0.5952, "step": 6217 }, { "epoch": 0.39, "grad_norm": 0.8822040557861328, "learning_rate": 6.910810147270084e-06, "loss": 0.5506, "step": 6218 }, { "epoch": 0.39, "grad_norm": 0.9485325217247009, "learning_rate": 6.909861991075622e-06, "loss": 0.6302, "step": 6219 }, { "epoch": 0.39, "grad_norm": 0.9046191573143005, "learning_rate": 6.908913754463514e-06, "loss": 0.6251, "step": 6220 }, { "epoch": 0.39, "grad_norm": 0.9548308849334717, "learning_rate": 6.90796543747369e-06, "loss": 0.6542, "step": 6221 }, { "epoch": 0.39, "grad_norm": 0.9304654002189636, "learning_rate": 6.907017040146078e-06, "loss": 0.6334, "step": 6222 }, { "epoch": 0.39, "grad_norm": 0.9015122652053833, "learning_rate": 6.906068562520613e-06, "loss": 0.6062, "step": 6223 }, { "epoch": 0.39, "grad_norm": 0.8413129448890686, "learning_rate": 6.905120004637232e-06, "loss": 0.5425, "step": 6224 }, { "epoch": 0.39, "grad_norm": 0.9578669667243958, "learning_rate": 6.904171366535873e-06, "loss": 0.607, "step": 6225 }, { "epoch": 0.39, "grad_norm": 0.895363450050354, "learning_rate": 6.9032226482564835e-06, "loss": 0.5703, "step": 6226 }, { "epoch": 0.39, "grad_norm": 0.9190669059753418, "learning_rate": 6.9022738498390084e-06, "loss": 0.6413, "step": 6227 }, { "epoch": 0.39, "grad_norm": 0.8880024552345276, "learning_rate": 6.9013249713234e-06, "loss": 0.6153, "step": 6228 }, { "epoch": 0.39, "grad_norm": 0.8834933042526245, "learning_rate": 6.900376012749611e-06, "loss": 0.5887, "step": 6229 }, { "epoch": 0.39, "grad_norm": 0.9798893928527832, "learning_rate": 6.899426974157598e-06, "loss": 0.6217, "step": 6230 }, { "epoch": 0.39, "grad_norm": 0.8374887704849243, "learning_rate": 6.898477855587323e-06, "loss": 0.6106, "step": 6231 }, { "epoch": 0.39, "grad_norm": 0.8667147159576416, "learning_rate": 6.897528657078752e-06, "loss": 0.5879, "step": 6232 }, { "epoch": 0.39, "grad_norm": 0.928011417388916, "learning_rate": 6.8965793786718484e-06, "loss": 0.591, "step": 6233 }, { "epoch": 0.39, "grad_norm": 0.8557186126708984, "learning_rate": 6.895630020406584e-06, "loss": 0.5891, "step": 6234 }, { "epoch": 0.4, "grad_norm": 0.9000698328018188, "learning_rate": 6.894680582322934e-06, "loss": 0.6082, "step": 6235 }, { "epoch": 0.4, "grad_norm": 0.8863718509674072, "learning_rate": 6.893731064460878e-06, "loss": 0.6171, "step": 6236 }, { "epoch": 0.4, "grad_norm": 0.9076705574989319, "learning_rate": 6.892781466860393e-06, "loss": 0.5794, "step": 6237 }, { "epoch": 0.4, "grad_norm": 0.8823980689048767, "learning_rate": 6.891831789561465e-06, "loss": 0.6175, "step": 6238 }, { "epoch": 0.4, "grad_norm": 0.9114968776702881, "learning_rate": 6.8908820326040815e-06, "loss": 0.6038, "step": 6239 }, { "epoch": 0.4, "grad_norm": 0.8561393618583679, "learning_rate": 6.889932196028235e-06, "loss": 0.6196, "step": 6240 }, { "epoch": 0.4, "grad_norm": 0.9283210635185242, "learning_rate": 6.888982279873917e-06, "loss": 0.582, "step": 6241 }, { "epoch": 0.4, "grad_norm": 0.8675887584686279, "learning_rate": 6.888032284181127e-06, "loss": 0.583, "step": 6242 }, { "epoch": 0.4, "grad_norm": 0.9557647109031677, "learning_rate": 6.887082208989865e-06, "loss": 0.6167, "step": 6243 }, { "epoch": 0.4, "grad_norm": 0.9393128156661987, "learning_rate": 6.886132054340136e-06, "loss": 0.6255, "step": 6244 }, { "epoch": 0.4, "grad_norm": 0.8403303027153015, "learning_rate": 6.885181820271947e-06, "loss": 0.6011, "step": 6245 }, { "epoch": 0.4, "grad_norm": 0.8862718343734741, "learning_rate": 6.88423150682531e-06, "loss": 0.6226, "step": 6246 }, { "epoch": 0.4, "grad_norm": 0.9034367799758911, "learning_rate": 6.88328111404024e-06, "loss": 0.5662, "step": 6247 }, { "epoch": 0.4, "grad_norm": 0.8718511462211609, "learning_rate": 6.882330641956752e-06, "loss": 0.6259, "step": 6248 }, { "epoch": 0.4, "grad_norm": 0.834060549736023, "learning_rate": 6.881380090614871e-06, "loss": 0.5645, "step": 6249 }, { "epoch": 0.4, "grad_norm": 0.9293310046195984, "learning_rate": 6.8804294600546175e-06, "loss": 0.6016, "step": 6250 }, { "epoch": 0.4, "grad_norm": 0.9908111095428467, "learning_rate": 6.879478750316022e-06, "loss": 0.6271, "step": 6251 }, { "epoch": 0.4, "grad_norm": 0.9571794867515564, "learning_rate": 6.878527961439113e-06, "loss": 0.6243, "step": 6252 }, { "epoch": 0.4, "grad_norm": 0.9029168486595154, "learning_rate": 6.877577093463927e-06, "loss": 0.6002, "step": 6253 }, { "epoch": 0.4, "grad_norm": 0.9042819738388062, "learning_rate": 6.876626146430502e-06, "loss": 0.5916, "step": 6254 }, { "epoch": 0.4, "grad_norm": 0.9775123000144958, "learning_rate": 6.875675120378878e-06, "loss": 0.6199, "step": 6255 }, { "epoch": 0.4, "grad_norm": 0.909796416759491, "learning_rate": 6.8747240153491e-06, "loss": 0.5858, "step": 6256 }, { "epoch": 0.4, "grad_norm": 0.847358763217926, "learning_rate": 6.873772831381214e-06, "loss": 0.6043, "step": 6257 }, { "epoch": 0.4, "grad_norm": 0.9297115206718445, "learning_rate": 6.872821568515275e-06, "loss": 0.6586, "step": 6258 }, { "epoch": 0.4, "grad_norm": 0.8991652727127075, "learning_rate": 6.8718702267913325e-06, "loss": 0.6056, "step": 6259 }, { "epoch": 0.4, "grad_norm": 0.8950271010398865, "learning_rate": 6.870918806249449e-06, "loss": 0.6192, "step": 6260 }, { "epoch": 0.4, "grad_norm": 0.8827762007713318, "learning_rate": 6.8699673069296806e-06, "loss": 0.588, "step": 6261 }, { "epoch": 0.4, "grad_norm": 0.8640381693840027, "learning_rate": 6.869015728872095e-06, "loss": 0.6255, "step": 6262 }, { "epoch": 0.4, "grad_norm": 0.8890305757522583, "learning_rate": 6.868064072116758e-06, "loss": 0.6502, "step": 6263 }, { "epoch": 0.4, "grad_norm": 0.854560911655426, "learning_rate": 6.867112336703743e-06, "loss": 0.5748, "step": 6264 }, { "epoch": 0.4, "grad_norm": 0.872962236404419, "learning_rate": 6.866160522673121e-06, "loss": 0.6329, "step": 6265 }, { "epoch": 0.4, "grad_norm": 0.8564478754997253, "learning_rate": 6.865208630064973e-06, "loss": 0.6265, "step": 6266 }, { "epoch": 0.4, "grad_norm": 0.8863121271133423, "learning_rate": 6.864256658919377e-06, "loss": 0.5473, "step": 6267 }, { "epoch": 0.4, "grad_norm": 0.8578380942344666, "learning_rate": 6.8633046092764174e-06, "loss": 0.6347, "step": 6268 }, { "epoch": 0.4, "grad_norm": 0.8845486640930176, "learning_rate": 6.862352481176184e-06, "loss": 0.6456, "step": 6269 }, { "epoch": 0.4, "grad_norm": 0.9655935168266296, "learning_rate": 6.861400274658767e-06, "loss": 0.5902, "step": 6270 }, { "epoch": 0.4, "grad_norm": 0.8958570957183838, "learning_rate": 6.860447989764259e-06, "loss": 0.5804, "step": 6271 }, { "epoch": 0.4, "grad_norm": 0.8562657237052917, "learning_rate": 6.8594956265327585e-06, "loss": 0.574, "step": 6272 }, { "epoch": 0.4, "grad_norm": 0.9815998077392578, "learning_rate": 6.858543185004365e-06, "loss": 0.6155, "step": 6273 }, { "epoch": 0.4, "grad_norm": 0.8810309171676636, "learning_rate": 6.857590665219185e-06, "loss": 0.6283, "step": 6274 }, { "epoch": 0.4, "grad_norm": 0.8395465016365051, "learning_rate": 6.856638067217324e-06, "loss": 0.5414, "step": 6275 }, { "epoch": 0.4, "grad_norm": 0.9288424253463745, "learning_rate": 6.85568539103889e-06, "loss": 0.5853, "step": 6276 }, { "epoch": 0.4, "grad_norm": 0.9081584215164185, "learning_rate": 6.854732636724002e-06, "loss": 0.6545, "step": 6277 }, { "epoch": 0.4, "grad_norm": 0.8159523606300354, "learning_rate": 6.853779804312775e-06, "loss": 0.5649, "step": 6278 }, { "epoch": 0.4, "grad_norm": 0.92462158203125, "learning_rate": 6.8528268938453295e-06, "loss": 0.5591, "step": 6279 }, { "epoch": 0.4, "grad_norm": 0.9456450939178467, "learning_rate": 6.851873905361786e-06, "loss": 0.6015, "step": 6280 }, { "epoch": 0.4, "grad_norm": 0.9764153957366943, "learning_rate": 6.850920838902278e-06, "loss": 0.6429, "step": 6281 }, { "epoch": 0.4, "grad_norm": 0.893409252166748, "learning_rate": 6.84996769450693e-06, "loss": 0.5466, "step": 6282 }, { "epoch": 0.4, "grad_norm": 0.9273908734321594, "learning_rate": 6.84901447221588e-06, "loss": 0.655, "step": 6283 }, { "epoch": 0.4, "grad_norm": 0.8750333189964294, "learning_rate": 6.84806117206926e-06, "loss": 0.6498, "step": 6284 }, { "epoch": 0.4, "grad_norm": 0.8754233121871948, "learning_rate": 6.847107794107216e-06, "loss": 0.5554, "step": 6285 }, { "epoch": 0.4, "grad_norm": 0.93915194272995, "learning_rate": 6.846154338369887e-06, "loss": 0.6434, "step": 6286 }, { "epoch": 0.4, "grad_norm": 0.835665225982666, "learning_rate": 6.845200804897421e-06, "loss": 0.6035, "step": 6287 }, { "epoch": 0.4, "grad_norm": 0.8906847834587097, "learning_rate": 6.844247193729968e-06, "loss": 0.636, "step": 6288 }, { "epoch": 0.4, "grad_norm": 0.8233811855316162, "learning_rate": 6.843293504907682e-06, "loss": 0.5461, "step": 6289 }, { "epoch": 0.4, "grad_norm": 0.9119184613227844, "learning_rate": 6.84233973847072e-06, "loss": 0.5923, "step": 6290 }, { "epoch": 0.4, "grad_norm": 0.9312586784362793, "learning_rate": 6.8413858944592385e-06, "loss": 0.66, "step": 6291 }, { "epoch": 0.4, "grad_norm": 0.8756263256072998, "learning_rate": 6.840431972913404e-06, "loss": 0.6262, "step": 6292 }, { "epoch": 0.4, "grad_norm": 0.8882813453674316, "learning_rate": 6.83947797387338e-06, "loss": 0.5798, "step": 6293 }, { "epoch": 0.4, "grad_norm": 0.8455925583839417, "learning_rate": 6.838523897379339e-06, "loss": 0.618, "step": 6294 }, { "epoch": 0.4, "grad_norm": 0.8319289684295654, "learning_rate": 6.837569743471451e-06, "loss": 0.6029, "step": 6295 }, { "epoch": 0.4, "grad_norm": 0.8721569180488586, "learning_rate": 6.836615512189895e-06, "loss": 0.5526, "step": 6296 }, { "epoch": 0.4, "grad_norm": 0.8549659848213196, "learning_rate": 6.835661203574848e-06, "loss": 0.5947, "step": 6297 }, { "epoch": 0.4, "grad_norm": 0.8474895358085632, "learning_rate": 6.834706817666495e-06, "loss": 0.6118, "step": 6298 }, { "epoch": 0.4, "grad_norm": 0.8855010271072388, "learning_rate": 6.833752354505019e-06, "loss": 0.5868, "step": 6299 }, { "epoch": 0.4, "grad_norm": 0.8940566182136536, "learning_rate": 6.832797814130611e-06, "loss": 0.5843, "step": 6300 }, { "epoch": 0.4, "grad_norm": 0.842008650302887, "learning_rate": 6.831843196583462e-06, "loss": 0.6042, "step": 6301 }, { "epoch": 0.4, "grad_norm": 0.8390910029411316, "learning_rate": 6.8308885019037695e-06, "loss": 0.5937, "step": 6302 }, { "epoch": 0.4, "grad_norm": 0.8749220967292786, "learning_rate": 6.82993373013173e-06, "loss": 0.6125, "step": 6303 }, { "epoch": 0.4, "grad_norm": 0.9013246297836304, "learning_rate": 6.8289788813075485e-06, "loss": 0.5911, "step": 6304 }, { "epoch": 0.4, "grad_norm": 0.9145839810371399, "learning_rate": 6.82802395547143e-06, "loss": 0.5748, "step": 6305 }, { "epoch": 0.4, "grad_norm": 0.8356090188026428, "learning_rate": 6.82706895266358e-06, "loss": 0.4983, "step": 6306 }, { "epoch": 0.4, "grad_norm": 0.8637154698371887, "learning_rate": 6.826113872924213e-06, "loss": 0.6237, "step": 6307 }, { "epoch": 0.4, "grad_norm": 0.8588926792144775, "learning_rate": 6.825158716293543e-06, "loss": 0.6215, "step": 6308 }, { "epoch": 0.4, "grad_norm": 0.8768167495727539, "learning_rate": 6.824203482811788e-06, "loss": 0.5862, "step": 6309 }, { "epoch": 0.4, "grad_norm": 0.8740860819816589, "learning_rate": 6.823248172519173e-06, "loss": 0.5699, "step": 6310 }, { "epoch": 0.4, "grad_norm": 0.937689483165741, "learning_rate": 6.8222927854559175e-06, "loss": 0.6146, "step": 6311 }, { "epoch": 0.4, "grad_norm": 0.8367653489112854, "learning_rate": 6.8213373216622514e-06, "loss": 0.5808, "step": 6312 }, { "epoch": 0.4, "grad_norm": 0.9312880635261536, "learning_rate": 6.820381781178409e-06, "loss": 0.6059, "step": 6313 }, { "epoch": 0.4, "grad_norm": 0.9240770936012268, "learning_rate": 6.819426164044622e-06, "loss": 0.6084, "step": 6314 }, { "epoch": 0.4, "grad_norm": 0.8993687629699707, "learning_rate": 6.818470470301128e-06, "loss": 0.5742, "step": 6315 }, { "epoch": 0.4, "grad_norm": 0.8884747624397278, "learning_rate": 6.817514699988168e-06, "loss": 0.5959, "step": 6316 }, { "epoch": 0.4, "grad_norm": 0.919092059135437, "learning_rate": 6.8165588531459885e-06, "loss": 0.612, "step": 6317 }, { "epoch": 0.4, "grad_norm": 0.8630106449127197, "learning_rate": 6.815602929814833e-06, "loss": 0.5945, "step": 6318 }, { "epoch": 0.4, "grad_norm": 0.8956739902496338, "learning_rate": 6.814646930034954e-06, "loss": 0.6494, "step": 6319 }, { "epoch": 0.4, "grad_norm": 0.8530880808830261, "learning_rate": 6.813690853846606e-06, "loss": 0.5881, "step": 6320 }, { "epoch": 0.4, "grad_norm": 0.9456024765968323, "learning_rate": 6.8127347012900465e-06, "loss": 0.592, "step": 6321 }, { "epoch": 0.4, "grad_norm": 0.959709882736206, "learning_rate": 6.811778472405534e-06, "loss": 0.6175, "step": 6322 }, { "epoch": 0.4, "grad_norm": 0.8467543721199036, "learning_rate": 6.810822167233333e-06, "loss": 0.5823, "step": 6323 }, { "epoch": 0.4, "grad_norm": 0.9109113216400146, "learning_rate": 6.80986578581371e-06, "loss": 0.618, "step": 6324 }, { "epoch": 0.4, "grad_norm": 0.9041874408721924, "learning_rate": 6.808909328186934e-06, "loss": 0.6321, "step": 6325 }, { "epoch": 0.4, "grad_norm": 0.81452476978302, "learning_rate": 6.80795279439328e-06, "loss": 0.5553, "step": 6326 }, { "epoch": 0.4, "grad_norm": 0.8997363448143005, "learning_rate": 6.806996184473023e-06, "loss": 0.623, "step": 6327 }, { "epoch": 0.4, "grad_norm": 0.9020070433616638, "learning_rate": 6.806039498466444e-06, "loss": 0.5917, "step": 6328 }, { "epoch": 0.4, "grad_norm": 0.8951176404953003, "learning_rate": 6.805082736413822e-06, "loss": 0.6224, "step": 6329 }, { "epoch": 0.4, "grad_norm": 0.9783088564872742, "learning_rate": 6.804125898355447e-06, "loss": 0.5973, "step": 6330 }, { "epoch": 0.4, "grad_norm": 0.8250484466552734, "learning_rate": 6.8031689843316054e-06, "loss": 0.5555, "step": 6331 }, { "epoch": 0.4, "grad_norm": 0.8294229507446289, "learning_rate": 6.802211994382591e-06, "loss": 0.5801, "step": 6332 }, { "epoch": 0.4, "grad_norm": 0.8849250078201294, "learning_rate": 6.8012549285487e-06, "loss": 0.6152, "step": 6333 }, { "epoch": 0.4, "grad_norm": 0.8871194124221802, "learning_rate": 6.800297786870228e-06, "loss": 0.5972, "step": 6334 }, { "epoch": 0.4, "grad_norm": 0.8779382705688477, "learning_rate": 6.799340569387481e-06, "loss": 0.548, "step": 6335 }, { "epoch": 0.4, "grad_norm": 0.8883922100067139, "learning_rate": 6.798383276140761e-06, "loss": 0.611, "step": 6336 }, { "epoch": 0.4, "grad_norm": 0.9105244874954224, "learning_rate": 6.797425907170378e-06, "loss": 0.5728, "step": 6337 }, { "epoch": 0.4, "grad_norm": 0.8537696003913879, "learning_rate": 6.796468462516642e-06, "loss": 0.5878, "step": 6338 }, { "epoch": 0.4, "grad_norm": 0.9340306520462036, "learning_rate": 6.79551094221987e-06, "loss": 0.6079, "step": 6339 }, { "epoch": 0.4, "grad_norm": 0.8353374600410461, "learning_rate": 6.794553346320376e-06, "loss": 0.5569, "step": 6340 }, { "epoch": 0.4, "grad_norm": 0.9598260521888733, "learning_rate": 6.7935956748584855e-06, "loss": 0.5305, "step": 6341 }, { "epoch": 0.4, "grad_norm": 0.8836723566055298, "learning_rate": 6.792637927874519e-06, "loss": 0.6038, "step": 6342 }, { "epoch": 0.4, "grad_norm": 0.930091142654419, "learning_rate": 6.791680105408807e-06, "loss": 0.6583, "step": 6343 }, { "epoch": 0.4, "grad_norm": 0.9237890839576721, "learning_rate": 6.790722207501678e-06, "loss": 0.6554, "step": 6344 }, { "epoch": 0.4, "grad_norm": 0.8894320726394653, "learning_rate": 6.789764234193465e-06, "loss": 0.5665, "step": 6345 }, { "epoch": 0.4, "grad_norm": 0.9483606815338135, "learning_rate": 6.788806185524508e-06, "loss": 0.6341, "step": 6346 }, { "epoch": 0.4, "grad_norm": 0.948627769947052, "learning_rate": 6.787848061535145e-06, "loss": 0.6711, "step": 6347 }, { "epoch": 0.4, "grad_norm": 0.8971147537231445, "learning_rate": 6.786889862265719e-06, "loss": 0.5643, "step": 6348 }, { "epoch": 0.4, "grad_norm": 0.8906237483024597, "learning_rate": 6.7859315877565775e-06, "loss": 0.5608, "step": 6349 }, { "epoch": 0.4, "grad_norm": 0.9154103398323059, "learning_rate": 6.784973238048069e-06, "loss": 0.6419, "step": 6350 }, { "epoch": 0.4, "grad_norm": 0.8681836128234863, "learning_rate": 6.7840148131805485e-06, "loss": 0.6173, "step": 6351 }, { "epoch": 0.4, "grad_norm": 0.8321382403373718, "learning_rate": 6.783056313194369e-06, "loss": 0.5718, "step": 6352 }, { "epoch": 0.4, "grad_norm": 0.8255459666252136, "learning_rate": 6.7820977381298915e-06, "loss": 0.6267, "step": 6353 }, { "epoch": 0.4, "grad_norm": 0.8768226504325867, "learning_rate": 6.781139088027477e-06, "loss": 0.6143, "step": 6354 }, { "epoch": 0.4, "grad_norm": 0.9021497368812561, "learning_rate": 6.780180362927492e-06, "loss": 0.5718, "step": 6355 }, { "epoch": 0.4, "grad_norm": 0.8666380643844604, "learning_rate": 6.779221562870306e-06, "loss": 0.5783, "step": 6356 }, { "epoch": 0.4, "grad_norm": 0.9858885407447815, "learning_rate": 6.778262687896287e-06, "loss": 0.6102, "step": 6357 }, { "epoch": 0.4, "grad_norm": 0.973626434803009, "learning_rate": 6.777303738045814e-06, "loss": 0.6679, "step": 6358 }, { "epoch": 0.4, "grad_norm": 0.8242490291595459, "learning_rate": 6.776344713359263e-06, "loss": 0.593, "step": 6359 }, { "epoch": 0.4, "grad_norm": 0.8745444416999817, "learning_rate": 6.775385613877016e-06, "loss": 0.6231, "step": 6360 }, { "epoch": 0.4, "grad_norm": 0.8920515775680542, "learning_rate": 6.774426439639455e-06, "loss": 0.5821, "step": 6361 }, { "epoch": 0.4, "grad_norm": 0.9180237650871277, "learning_rate": 6.773467190686972e-06, "loss": 0.625, "step": 6362 }, { "epoch": 0.4, "grad_norm": 0.9676087498664856, "learning_rate": 6.772507867059953e-06, "loss": 0.6289, "step": 6363 }, { "epoch": 0.4, "grad_norm": 0.9148452281951904, "learning_rate": 6.771548468798796e-06, "loss": 0.5209, "step": 6364 }, { "epoch": 0.4, "grad_norm": 0.9174354076385498, "learning_rate": 6.770588995943893e-06, "loss": 0.5529, "step": 6365 }, { "epoch": 0.4, "grad_norm": 0.9325718879699707, "learning_rate": 6.769629448535648e-06, "loss": 0.6186, "step": 6366 }, { "epoch": 0.4, "grad_norm": 0.9010034799575806, "learning_rate": 6.768669826614464e-06, "loss": 0.5967, "step": 6367 }, { "epoch": 0.4, "grad_norm": 0.8638269901275635, "learning_rate": 6.767710130220745e-06, "loss": 0.6489, "step": 6368 }, { "epoch": 0.4, "grad_norm": 0.8227560520172119, "learning_rate": 6.766750359394904e-06, "loss": 0.5682, "step": 6369 }, { "epoch": 0.4, "grad_norm": 0.9004592895507812, "learning_rate": 6.76579051417735e-06, "loss": 0.6632, "step": 6370 }, { "epoch": 0.4, "grad_norm": 0.8845899701118469, "learning_rate": 6.7648305946085e-06, "loss": 0.5959, "step": 6371 }, { "epoch": 0.4, "grad_norm": 0.9487060904502869, "learning_rate": 6.763870600728772e-06, "loss": 0.6677, "step": 6372 }, { "epoch": 0.4, "grad_norm": 0.8828071355819702, "learning_rate": 6.76291053257859e-06, "loss": 0.5542, "step": 6373 }, { "epoch": 0.4, "grad_norm": 0.9216554164886475, "learning_rate": 6.761950390198378e-06, "loss": 0.6145, "step": 6374 }, { "epoch": 0.4, "grad_norm": 0.8994758725166321, "learning_rate": 6.760990173628566e-06, "loss": 0.5999, "step": 6375 }, { "epoch": 0.4, "grad_norm": 0.8886323571205139, "learning_rate": 6.760029882909582e-06, "loss": 0.5941, "step": 6376 }, { "epoch": 0.4, "grad_norm": 0.8494300842285156, "learning_rate": 6.759069518081863e-06, "loss": 0.5829, "step": 6377 }, { "epoch": 0.4, "grad_norm": 0.9388317465782166, "learning_rate": 6.758109079185846e-06, "loss": 0.593, "step": 6378 }, { "epoch": 0.4, "grad_norm": 0.9075881838798523, "learning_rate": 6.757148566261973e-06, "loss": 0.5656, "step": 6379 }, { "epoch": 0.4, "grad_norm": 0.9015637040138245, "learning_rate": 6.756187979350684e-06, "loss": 0.6046, "step": 6380 }, { "epoch": 0.4, "grad_norm": 0.9175539016723633, "learning_rate": 6.75522731849243e-06, "loss": 0.5636, "step": 6381 }, { "epoch": 0.4, "grad_norm": 0.8536416292190552, "learning_rate": 6.754266583727659e-06, "loss": 0.6258, "step": 6382 }, { "epoch": 0.4, "grad_norm": 0.84648197889328, "learning_rate": 6.753305775096826e-06, "loss": 0.5732, "step": 6383 }, { "epoch": 0.4, "grad_norm": 0.8069581389427185, "learning_rate": 6.752344892640384e-06, "loss": 0.5564, "step": 6384 }, { "epoch": 0.4, "grad_norm": 0.8196657299995422, "learning_rate": 6.751383936398796e-06, "loss": 0.5909, "step": 6385 }, { "epoch": 0.4, "grad_norm": 0.9850438237190247, "learning_rate": 6.750422906412523e-06, "loss": 0.6585, "step": 6386 }, { "epoch": 0.4, "grad_norm": 0.9552303552627563, "learning_rate": 6.749461802722032e-06, "loss": 0.6039, "step": 6387 }, { "epoch": 0.4, "grad_norm": 0.955740213394165, "learning_rate": 6.7485006253677875e-06, "loss": 0.6084, "step": 6388 }, { "epoch": 0.4, "grad_norm": 1.0010960102081299, "learning_rate": 6.747539374390266e-06, "loss": 0.6799, "step": 6389 }, { "epoch": 0.4, "grad_norm": 0.909136950969696, "learning_rate": 6.746578049829942e-06, "loss": 0.5671, "step": 6390 }, { "epoch": 0.4, "grad_norm": 0.8283319473266602, "learning_rate": 6.745616651727289e-06, "loss": 0.5801, "step": 6391 }, { "epoch": 0.4, "grad_norm": 0.8886178135871887, "learning_rate": 6.744655180122793e-06, "loss": 0.5932, "step": 6392 }, { "epoch": 0.41, "grad_norm": 0.9179041385650635, "learning_rate": 6.743693635056936e-06, "loss": 0.6413, "step": 6393 }, { "epoch": 0.41, "grad_norm": 0.9177907109260559, "learning_rate": 6.742732016570207e-06, "loss": 0.6361, "step": 6394 }, { "epoch": 0.41, "grad_norm": 0.8754076361656189, "learning_rate": 6.741770324703095e-06, "loss": 0.6194, "step": 6395 }, { "epoch": 0.41, "grad_norm": 0.8487926721572876, "learning_rate": 6.740808559496093e-06, "loss": 0.5599, "step": 6396 }, { "epoch": 0.41, "grad_norm": 0.9245063066482544, "learning_rate": 6.739846720989699e-06, "loss": 0.6556, "step": 6397 }, { "epoch": 0.41, "grad_norm": 0.9024572968482971, "learning_rate": 6.738884809224413e-06, "loss": 0.5621, "step": 6398 }, { "epoch": 0.41, "grad_norm": 0.9168578386306763, "learning_rate": 6.7379228242407345e-06, "loss": 0.6098, "step": 6399 }, { "epoch": 0.41, "grad_norm": 0.8839691281318665, "learning_rate": 6.736960766079173e-06, "loss": 0.5978, "step": 6400 }, { "epoch": 0.41, "grad_norm": 0.9675304293632507, "learning_rate": 6.735998634780238e-06, "loss": 0.5849, "step": 6401 }, { "epoch": 0.41, "grad_norm": 0.8997515439987183, "learning_rate": 6.735036430384436e-06, "loss": 0.5645, "step": 6402 }, { "epoch": 0.41, "grad_norm": 0.8792773485183716, "learning_rate": 6.7340741529322875e-06, "loss": 0.6105, "step": 6403 }, { "epoch": 0.41, "grad_norm": 0.9032172560691833, "learning_rate": 6.733111802464308e-06, "loss": 0.5789, "step": 6404 }, { "epoch": 0.41, "grad_norm": 0.8126611113548279, "learning_rate": 6.732149379021022e-06, "loss": 0.5711, "step": 6405 }, { "epoch": 0.41, "grad_norm": 0.8911159038543701, "learning_rate": 6.7311868826429485e-06, "loss": 0.6068, "step": 6406 }, { "epoch": 0.41, "grad_norm": 0.9121822714805603, "learning_rate": 6.730224313370619e-06, "loss": 0.6165, "step": 6407 }, { "epoch": 0.41, "grad_norm": 0.8678528070449829, "learning_rate": 6.729261671244563e-06, "loss": 0.5745, "step": 6408 }, { "epoch": 0.41, "grad_norm": 0.9104927182197571, "learning_rate": 6.728298956305313e-06, "loss": 0.5595, "step": 6409 }, { "epoch": 0.41, "grad_norm": 0.9237872958183289, "learning_rate": 6.727336168593406e-06, "loss": 0.582, "step": 6410 }, { "epoch": 0.41, "grad_norm": 0.9053632020950317, "learning_rate": 6.726373308149382e-06, "loss": 0.5984, "step": 6411 }, { "epoch": 0.41, "grad_norm": 0.85235995054245, "learning_rate": 6.725410375013783e-06, "loss": 0.5581, "step": 6412 }, { "epoch": 0.41, "grad_norm": 0.8615298271179199, "learning_rate": 6.724447369227159e-06, "loss": 0.5921, "step": 6413 }, { "epoch": 0.41, "grad_norm": 0.9467587471008301, "learning_rate": 6.723484290830051e-06, "loss": 0.5917, "step": 6414 }, { "epoch": 0.41, "grad_norm": 0.9265984892845154, "learning_rate": 6.722521139863017e-06, "loss": 0.6216, "step": 6415 }, { "epoch": 0.41, "grad_norm": 0.8947895169258118, "learning_rate": 6.72155791636661e-06, "loss": 0.6052, "step": 6416 }, { "epoch": 0.41, "grad_norm": 0.8797786235809326, "learning_rate": 6.720594620381387e-06, "loss": 0.5621, "step": 6417 }, { "epoch": 0.41, "grad_norm": 0.9423597455024719, "learning_rate": 6.71963125194791e-06, "loss": 0.6233, "step": 6418 }, { "epoch": 0.41, "grad_norm": 0.9435870051383972, "learning_rate": 6.718667811106744e-06, "loss": 0.5961, "step": 6419 }, { "epoch": 0.41, "grad_norm": 0.9278707504272461, "learning_rate": 6.717704297898455e-06, "loss": 0.5821, "step": 6420 }, { "epoch": 0.41, "grad_norm": 0.8902246952056885, "learning_rate": 6.716740712363614e-06, "loss": 0.5672, "step": 6421 }, { "epoch": 0.41, "grad_norm": 0.9437769651412964, "learning_rate": 6.715777054542793e-06, "loss": 0.6031, "step": 6422 }, { "epoch": 0.41, "grad_norm": 0.9507419466972351, "learning_rate": 6.714813324476569e-06, "loss": 0.5812, "step": 6423 }, { "epoch": 0.41, "grad_norm": 0.9317444562911987, "learning_rate": 6.713849522205522e-06, "loss": 0.5611, "step": 6424 }, { "epoch": 0.41, "grad_norm": 0.8754682540893555, "learning_rate": 6.712885647770233e-06, "loss": 0.6031, "step": 6425 }, { "epoch": 0.41, "grad_norm": 0.9129989743232727, "learning_rate": 6.711921701211288e-06, "loss": 0.5967, "step": 6426 }, { "epoch": 0.41, "grad_norm": 0.9079276919364929, "learning_rate": 6.710957682569276e-06, "loss": 0.6603, "step": 6427 }, { "epoch": 0.41, "grad_norm": 0.8813990950584412, "learning_rate": 6.709993591884788e-06, "loss": 0.5873, "step": 6428 }, { "epoch": 0.41, "grad_norm": 0.8813159465789795, "learning_rate": 6.709029429198418e-06, "loss": 0.5746, "step": 6429 }, { "epoch": 0.41, "grad_norm": 0.9071645140647888, "learning_rate": 6.7080651945507645e-06, "loss": 0.5743, "step": 6430 }, { "epoch": 0.41, "grad_norm": 0.8338029384613037, "learning_rate": 6.707100887982427e-06, "loss": 0.5769, "step": 6431 }, { "epoch": 0.41, "grad_norm": 0.8543631434440613, "learning_rate": 6.7061365095340105e-06, "loss": 0.592, "step": 6432 }, { "epoch": 0.41, "grad_norm": 0.9253416061401367, "learning_rate": 6.70517205924612e-06, "loss": 0.6099, "step": 6433 }, { "epoch": 0.41, "grad_norm": 0.846316933631897, "learning_rate": 6.7042075371593665e-06, "loss": 0.5734, "step": 6434 }, { "epoch": 0.41, "grad_norm": 0.9376114010810852, "learning_rate": 6.703242943314362e-06, "loss": 0.6333, "step": 6435 }, { "epoch": 0.41, "grad_norm": 0.970414400100708, "learning_rate": 6.702278277751722e-06, "loss": 0.5598, "step": 6436 }, { "epoch": 0.41, "grad_norm": 0.957120418548584, "learning_rate": 6.701313540512065e-06, "loss": 0.6345, "step": 6437 }, { "epoch": 0.41, "grad_norm": 0.932551920413971, "learning_rate": 6.700348731636014e-06, "loss": 0.5905, "step": 6438 }, { "epoch": 0.41, "grad_norm": 0.9044030904769897, "learning_rate": 6.699383851164194e-06, "loss": 0.6525, "step": 6439 }, { "epoch": 0.41, "grad_norm": 0.8771166205406189, "learning_rate": 6.6984188991372305e-06, "loss": 0.599, "step": 6440 }, { "epoch": 0.41, "grad_norm": 0.8178818821907043, "learning_rate": 6.697453875595755e-06, "loss": 0.5609, "step": 6441 }, { "epoch": 0.41, "grad_norm": 0.8368890881538391, "learning_rate": 6.696488780580403e-06, "loss": 0.543, "step": 6442 }, { "epoch": 0.41, "grad_norm": 0.8307216763496399, "learning_rate": 6.69552361413181e-06, "loss": 0.5784, "step": 6443 }, { "epoch": 0.41, "grad_norm": 0.8592568039894104, "learning_rate": 6.694558376290615e-06, "loss": 0.603, "step": 6444 }, { "epoch": 0.41, "grad_norm": 0.8686701655387878, "learning_rate": 6.693593067097462e-06, "loss": 0.6026, "step": 6445 }, { "epoch": 0.41, "grad_norm": 0.9390038251876831, "learning_rate": 6.692627686592998e-06, "loss": 0.6531, "step": 6446 }, { "epoch": 0.41, "grad_norm": 0.9398483633995056, "learning_rate": 6.691662234817869e-06, "loss": 0.6016, "step": 6447 }, { "epoch": 0.41, "grad_norm": 0.8840192556381226, "learning_rate": 6.690696711812729e-06, "loss": 0.5461, "step": 6448 }, { "epoch": 0.41, "grad_norm": 0.8928658366203308, "learning_rate": 6.68973111761823e-06, "loss": 0.5753, "step": 6449 }, { "epoch": 0.41, "grad_norm": 0.9356186985969543, "learning_rate": 6.688765452275033e-06, "loss": 0.6636, "step": 6450 }, { "epoch": 0.41, "grad_norm": 0.8654458522796631, "learning_rate": 6.687799715823798e-06, "loss": 0.5351, "step": 6451 }, { "epoch": 0.41, "grad_norm": 0.867955207824707, "learning_rate": 6.686833908305188e-06, "loss": 0.6091, "step": 6452 }, { "epoch": 0.41, "grad_norm": 0.8342301845550537, "learning_rate": 6.68586802975987e-06, "loss": 0.591, "step": 6453 }, { "epoch": 0.41, "grad_norm": 0.9121977090835571, "learning_rate": 6.684902080228514e-06, "loss": 0.5892, "step": 6454 }, { "epoch": 0.41, "grad_norm": 0.9055156111717224, "learning_rate": 6.6839360597517935e-06, "loss": 0.5665, "step": 6455 }, { "epoch": 0.41, "grad_norm": 0.8971875905990601, "learning_rate": 6.682969968370383e-06, "loss": 0.6021, "step": 6456 }, { "epoch": 0.41, "grad_norm": 0.9293539524078369, "learning_rate": 6.68200380612496e-06, "loss": 0.5815, "step": 6457 }, { "epoch": 0.41, "grad_norm": 0.9090824127197266, "learning_rate": 6.681037573056211e-06, "loss": 0.5778, "step": 6458 }, { "epoch": 0.41, "grad_norm": 0.9384252429008484, "learning_rate": 6.6800712692048164e-06, "loss": 0.5974, "step": 6459 }, { "epoch": 0.41, "grad_norm": 0.9273927211761475, "learning_rate": 6.679104894611466e-06, "loss": 0.6242, "step": 6460 }, { "epoch": 0.41, "grad_norm": 0.9325118660926819, "learning_rate": 6.678138449316848e-06, "loss": 0.6443, "step": 6461 }, { "epoch": 0.41, "grad_norm": 0.8972262740135193, "learning_rate": 6.6771719333616584e-06, "loss": 0.568, "step": 6462 }, { "epoch": 0.41, "grad_norm": 0.828413724899292, "learning_rate": 6.676205346786594e-06, "loss": 0.5929, "step": 6463 }, { "epoch": 0.41, "grad_norm": 0.8351660966873169, "learning_rate": 6.6752386896323526e-06, "loss": 0.6104, "step": 6464 }, { "epoch": 0.41, "grad_norm": 0.9743680953979492, "learning_rate": 6.674271961939638e-06, "loss": 0.6608, "step": 6465 }, { "epoch": 0.41, "grad_norm": 0.8384668231010437, "learning_rate": 6.673305163749155e-06, "loss": 0.5683, "step": 6466 }, { "epoch": 0.41, "grad_norm": 0.8962710499763489, "learning_rate": 6.672338295101614e-06, "loss": 0.5661, "step": 6467 }, { "epoch": 0.41, "grad_norm": 0.8527003526687622, "learning_rate": 6.671371356037723e-06, "loss": 0.6172, "step": 6468 }, { "epoch": 0.41, "grad_norm": 0.967922568321228, "learning_rate": 6.670404346598199e-06, "loss": 0.605, "step": 6469 }, { "epoch": 0.41, "grad_norm": 0.8897997736930847, "learning_rate": 6.669437266823759e-06, "loss": 0.6087, "step": 6470 }, { "epoch": 0.41, "grad_norm": 0.9014569520950317, "learning_rate": 6.668470116755125e-06, "loss": 0.589, "step": 6471 }, { "epoch": 0.41, "grad_norm": 0.8684948086738586, "learning_rate": 6.6675028964330156e-06, "loss": 0.5962, "step": 6472 }, { "epoch": 0.41, "grad_norm": 0.8721036911010742, "learning_rate": 6.666535605898162e-06, "loss": 0.666, "step": 6473 }, { "epoch": 0.41, "grad_norm": 0.8894490599632263, "learning_rate": 6.6655682451912915e-06, "loss": 0.5945, "step": 6474 }, { "epoch": 0.41, "grad_norm": 0.8807538747787476, "learning_rate": 6.664600814353137e-06, "loss": 0.6073, "step": 6475 }, { "epoch": 0.41, "grad_norm": 0.9010364413261414, "learning_rate": 6.6636333134244305e-06, "loss": 0.5884, "step": 6476 }, { "epoch": 0.41, "grad_norm": 0.8854992985725403, "learning_rate": 6.662665742445914e-06, "loss": 0.5852, "step": 6477 }, { "epoch": 0.41, "grad_norm": 0.8660020232200623, "learning_rate": 6.661698101458327e-06, "loss": 0.5954, "step": 6478 }, { "epoch": 0.41, "grad_norm": 0.8577721118927002, "learning_rate": 6.660730390502414e-06, "loss": 0.5837, "step": 6479 }, { "epoch": 0.41, "grad_norm": 0.8442829251289368, "learning_rate": 6.6597626096189206e-06, "loss": 0.619, "step": 6480 }, { "epoch": 0.41, "grad_norm": 0.877422571182251, "learning_rate": 6.658794758848598e-06, "loss": 0.6028, "step": 6481 }, { "epoch": 0.41, "grad_norm": 0.880001425743103, "learning_rate": 6.6578268382322e-06, "loss": 0.5807, "step": 6482 }, { "epoch": 0.41, "grad_norm": 0.8305491209030151, "learning_rate": 6.656858847810479e-06, "loss": 0.5786, "step": 6483 }, { "epoch": 0.41, "grad_norm": 0.8943942785263062, "learning_rate": 6.655890787624195e-06, "loss": 0.5586, "step": 6484 }, { "epoch": 0.41, "grad_norm": 0.893250584602356, "learning_rate": 6.654922657714112e-06, "loss": 0.5612, "step": 6485 }, { "epoch": 0.41, "grad_norm": 0.9150073528289795, "learning_rate": 6.6539544581209935e-06, "loss": 0.6442, "step": 6486 }, { "epoch": 0.41, "grad_norm": 0.8710561394691467, "learning_rate": 6.652986188885605e-06, "loss": 0.6025, "step": 6487 }, { "epoch": 0.41, "grad_norm": 0.8758864402770996, "learning_rate": 6.652017850048719e-06, "loss": 0.5879, "step": 6488 }, { "epoch": 0.41, "grad_norm": 0.925520658493042, "learning_rate": 6.651049441651107e-06, "loss": 0.6254, "step": 6489 }, { "epoch": 0.41, "grad_norm": 0.8633304834365845, "learning_rate": 6.65008096373355e-06, "loss": 0.5776, "step": 6490 }, { "epoch": 0.41, "grad_norm": 0.9169586300849915, "learning_rate": 6.6491124163368215e-06, "loss": 0.587, "step": 6491 }, { "epoch": 0.41, "grad_norm": 0.815740168094635, "learning_rate": 6.648143799501705e-06, "loss": 0.5786, "step": 6492 }, { "epoch": 0.41, "grad_norm": 0.9177011847496033, "learning_rate": 6.647175113268989e-06, "loss": 0.5998, "step": 6493 }, { "epoch": 0.41, "grad_norm": 0.9129186868667603, "learning_rate": 6.646206357679458e-06, "loss": 0.6242, "step": 6494 }, { "epoch": 0.41, "grad_norm": 0.8686244487762451, "learning_rate": 6.645237532773902e-06, "loss": 0.6423, "step": 6495 }, { "epoch": 0.41, "grad_norm": 0.9115392565727234, "learning_rate": 6.64426863859312e-06, "loss": 0.6303, "step": 6496 }, { "epoch": 0.41, "grad_norm": 0.9037183523178101, "learning_rate": 6.643299675177906e-06, "loss": 0.6147, "step": 6497 }, { "epoch": 0.41, "grad_norm": 0.9037627577781677, "learning_rate": 6.642330642569056e-06, "loss": 0.5879, "step": 6498 }, { "epoch": 0.41, "grad_norm": 0.8843808770179749, "learning_rate": 6.641361540807377e-06, "loss": 0.5843, "step": 6499 }, { "epoch": 0.41, "grad_norm": 0.9096183180809021, "learning_rate": 6.640392369933675e-06, "loss": 0.5984, "step": 6500 }, { "epoch": 0.41, "grad_norm": 0.9090222120285034, "learning_rate": 6.639423129988756e-06, "loss": 0.5941, "step": 6501 }, { "epoch": 0.41, "grad_norm": 0.9428609609603882, "learning_rate": 6.638453821013431e-06, "loss": 0.6811, "step": 6502 }, { "epoch": 0.41, "grad_norm": 0.8241065144538879, "learning_rate": 6.637484443048516e-06, "loss": 0.59, "step": 6503 }, { "epoch": 0.41, "grad_norm": 0.8859769701957703, "learning_rate": 6.636514996134828e-06, "loss": 0.555, "step": 6504 }, { "epoch": 0.41, "grad_norm": 0.9394935965538025, "learning_rate": 6.635545480313187e-06, "loss": 0.6377, "step": 6505 }, { "epoch": 0.41, "grad_norm": 0.8997877240180969, "learning_rate": 6.634575895624414e-06, "loss": 0.66, "step": 6506 }, { "epoch": 0.41, "grad_norm": 0.9390882253646851, "learning_rate": 6.6336062421093374e-06, "loss": 0.6316, "step": 6507 }, { "epoch": 0.41, "grad_norm": 0.8696218729019165, "learning_rate": 6.632636519808785e-06, "loss": 0.6006, "step": 6508 }, { "epoch": 0.41, "grad_norm": 0.9233937859535217, "learning_rate": 6.6316667287635875e-06, "loss": 0.6195, "step": 6509 }, { "epoch": 0.41, "grad_norm": 0.9679466485977173, "learning_rate": 6.63069686901458e-06, "loss": 0.6293, "step": 6510 }, { "epoch": 0.41, "grad_norm": 0.8663052320480347, "learning_rate": 6.629726940602601e-06, "loss": 0.6051, "step": 6511 }, { "epoch": 0.41, "grad_norm": 0.8438270092010498, "learning_rate": 6.62875694356849e-06, "loss": 0.6315, "step": 6512 }, { "epoch": 0.41, "grad_norm": 0.9236611723899841, "learning_rate": 6.62778687795309e-06, "loss": 0.564, "step": 6513 }, { "epoch": 0.41, "grad_norm": 0.8759667873382568, "learning_rate": 6.626816743797246e-06, "loss": 0.5565, "step": 6514 }, { "epoch": 0.41, "grad_norm": 0.8147273659706116, "learning_rate": 6.62584654114181e-06, "loss": 0.5434, "step": 6515 }, { "epoch": 0.41, "grad_norm": 0.8937092423439026, "learning_rate": 6.6248762700276315e-06, "loss": 0.6153, "step": 6516 }, { "epoch": 0.41, "grad_norm": 0.8718124032020569, "learning_rate": 6.623905930495565e-06, "loss": 0.5902, "step": 6517 }, { "epoch": 0.41, "grad_norm": 1.0065981149673462, "learning_rate": 6.622935522586469e-06, "loss": 0.5592, "step": 6518 }, { "epoch": 0.41, "grad_norm": 0.8148283362388611, "learning_rate": 6.6219650463412034e-06, "loss": 0.5861, "step": 6519 }, { "epoch": 0.41, "grad_norm": 0.884833574295044, "learning_rate": 6.620994501800634e-06, "loss": 0.5903, "step": 6520 }, { "epoch": 0.41, "grad_norm": 0.873306393623352, "learning_rate": 6.620023889005624e-06, "loss": 0.6419, "step": 6521 }, { "epoch": 0.41, "grad_norm": 0.9364440441131592, "learning_rate": 6.619053207997043e-06, "loss": 0.6001, "step": 6522 }, { "epoch": 0.41, "grad_norm": 0.9023630619049072, "learning_rate": 6.618082458815765e-06, "loss": 0.5793, "step": 6523 }, { "epoch": 0.41, "grad_norm": 0.8948296904563904, "learning_rate": 6.617111641502664e-06, "loss": 0.58, "step": 6524 }, { "epoch": 0.41, "grad_norm": 0.8921267986297607, "learning_rate": 6.616140756098617e-06, "loss": 0.5626, "step": 6525 }, { "epoch": 0.41, "grad_norm": 0.8996078968048096, "learning_rate": 6.615169802644503e-06, "loss": 0.6441, "step": 6526 }, { "epoch": 0.41, "grad_norm": 0.992561936378479, "learning_rate": 6.614198781181209e-06, "loss": 0.6163, "step": 6527 }, { "epoch": 0.41, "grad_norm": 0.8944520354270935, "learning_rate": 6.613227691749619e-06, "loss": 0.6454, "step": 6528 }, { "epoch": 0.41, "grad_norm": 0.9010036587715149, "learning_rate": 6.612256534390624e-06, "loss": 0.6159, "step": 6529 }, { "epoch": 0.41, "grad_norm": 0.893017053604126, "learning_rate": 6.611285309145113e-06, "loss": 0.5674, "step": 6530 }, { "epoch": 0.41, "grad_norm": 0.849725067615509, "learning_rate": 6.610314016053986e-06, "loss": 0.5657, "step": 6531 }, { "epoch": 0.41, "grad_norm": 0.8771043419837952, "learning_rate": 6.609342655158135e-06, "loss": 0.6204, "step": 6532 }, { "epoch": 0.41, "grad_norm": 0.9552651643753052, "learning_rate": 6.608371226498464e-06, "loss": 0.5664, "step": 6533 }, { "epoch": 0.41, "grad_norm": 0.9059584140777588, "learning_rate": 6.607399730115875e-06, "loss": 0.612, "step": 6534 }, { "epoch": 0.41, "grad_norm": 0.8679310083389282, "learning_rate": 6.6064281660512775e-06, "loss": 0.5264, "step": 6535 }, { "epoch": 0.41, "grad_norm": 0.9331749081611633, "learning_rate": 6.6054565343455765e-06, "loss": 0.6195, "step": 6536 }, { "epoch": 0.41, "grad_norm": 0.8616225719451904, "learning_rate": 6.604484835039686e-06, "loss": 0.5867, "step": 6537 }, { "epoch": 0.41, "grad_norm": 0.8593981862068176, "learning_rate": 6.603513068174521e-06, "loss": 0.5371, "step": 6538 }, { "epoch": 0.41, "grad_norm": 0.8332374095916748, "learning_rate": 6.602541233790999e-06, "loss": 0.5885, "step": 6539 }, { "epoch": 0.41, "grad_norm": 0.9694592356681824, "learning_rate": 6.601569331930041e-06, "loss": 0.6602, "step": 6540 }, { "epoch": 0.41, "grad_norm": 0.8334494829177856, "learning_rate": 6.600597362632568e-06, "loss": 0.5729, "step": 6541 }, { "epoch": 0.41, "grad_norm": 0.8962088823318481, "learning_rate": 6.599625325939509e-06, "loss": 0.6357, "step": 6542 }, { "epoch": 0.41, "grad_norm": 0.9451315402984619, "learning_rate": 6.598653221891793e-06, "loss": 0.619, "step": 6543 }, { "epoch": 0.41, "grad_norm": 0.9206660389900208, "learning_rate": 6.597681050530351e-06, "loss": 0.6387, "step": 6544 }, { "epoch": 0.41, "grad_norm": 0.8884252905845642, "learning_rate": 6.596708811896116e-06, "loss": 0.61, "step": 6545 }, { "epoch": 0.41, "grad_norm": 0.8867802619934082, "learning_rate": 6.595736506030029e-06, "loss": 0.6128, "step": 6546 }, { "epoch": 0.41, "grad_norm": 0.8310429453849792, "learning_rate": 6.59476413297303e-06, "loss": 0.5547, "step": 6547 }, { "epoch": 0.41, "grad_norm": 0.9180542230606079, "learning_rate": 6.59379169276606e-06, "loss": 0.5834, "step": 6548 }, { "epoch": 0.41, "grad_norm": 0.8704774379730225, "learning_rate": 6.5928191854500644e-06, "loss": 0.6258, "step": 6549 }, { "epoch": 0.41, "grad_norm": 0.8992159962654114, "learning_rate": 6.591846611065997e-06, "loss": 0.5977, "step": 6550 }, { "epoch": 0.42, "grad_norm": 0.8730959892272949, "learning_rate": 6.590873969654805e-06, "loss": 0.6295, "step": 6551 }, { "epoch": 0.42, "grad_norm": 0.9002555012702942, "learning_rate": 6.589901261257445e-06, "loss": 0.6144, "step": 6552 }, { "epoch": 0.42, "grad_norm": 0.9010111093521118, "learning_rate": 6.588928485914871e-06, "loss": 0.6662, "step": 6553 }, { "epoch": 0.42, "grad_norm": 0.8698523044586182, "learning_rate": 6.587955643668049e-06, "loss": 0.6023, "step": 6554 }, { "epoch": 0.42, "grad_norm": 0.9001327753067017, "learning_rate": 6.58698273455794e-06, "loss": 0.5513, "step": 6555 }, { "epoch": 0.42, "grad_norm": 0.9791713953018188, "learning_rate": 6.586009758625507e-06, "loss": 0.6439, "step": 6556 }, { "epoch": 0.42, "grad_norm": 0.9007930159568787, "learning_rate": 6.585036715911719e-06, "loss": 0.5951, "step": 6557 }, { "epoch": 0.42, "grad_norm": 0.8678936958312988, "learning_rate": 6.58406360645755e-06, "loss": 0.6341, "step": 6558 }, { "epoch": 0.42, "grad_norm": 0.8232488036155701, "learning_rate": 6.583090430303975e-06, "loss": 0.5596, "step": 6559 }, { "epoch": 0.42, "grad_norm": 0.8638771772384644, "learning_rate": 6.582117187491967e-06, "loss": 0.6657, "step": 6560 }, { "epoch": 0.42, "grad_norm": 0.9044223427772522, "learning_rate": 6.581143878062507e-06, "loss": 0.6091, "step": 6561 }, { "epoch": 0.42, "grad_norm": 0.9236576557159424, "learning_rate": 6.58017050205658e-06, "loss": 0.5625, "step": 6562 }, { "epoch": 0.42, "grad_norm": 0.9195695519447327, "learning_rate": 6.5791970595151714e-06, "loss": 0.6354, "step": 6563 }, { "epoch": 0.42, "grad_norm": 0.8628154993057251, "learning_rate": 6.578223550479266e-06, "loss": 0.6041, "step": 6564 }, { "epoch": 0.42, "grad_norm": 0.779208779335022, "learning_rate": 6.5772499749898585e-06, "loss": 0.555, "step": 6565 }, { "epoch": 0.42, "grad_norm": 1.0027345418930054, "learning_rate": 6.576276333087941e-06, "loss": 0.6062, "step": 6566 }, { "epoch": 0.42, "grad_norm": 0.8687159419059753, "learning_rate": 6.575302624814512e-06, "loss": 0.5796, "step": 6567 }, { "epoch": 0.42, "grad_norm": 0.9297102093696594, "learning_rate": 6.5743288502105675e-06, "loss": 0.5697, "step": 6568 }, { "epoch": 0.42, "grad_norm": 0.9116488099098206, "learning_rate": 6.5733550093171115e-06, "loss": 0.5924, "step": 6569 }, { "epoch": 0.42, "grad_norm": 0.9103240370750427, "learning_rate": 6.572381102175151e-06, "loss": 0.5937, "step": 6570 }, { "epoch": 0.42, "grad_norm": 0.8994322419166565, "learning_rate": 6.571407128825692e-06, "loss": 0.6327, "step": 6571 }, { "epoch": 0.42, "grad_norm": 0.9073256850242615, "learning_rate": 6.570433089309745e-06, "loss": 0.5543, "step": 6572 }, { "epoch": 0.42, "grad_norm": 0.902117133140564, "learning_rate": 6.569458983668323e-06, "loss": 0.5465, "step": 6573 }, { "epoch": 0.42, "grad_norm": 0.8766512870788574, "learning_rate": 6.5684848119424435e-06, "loss": 0.6265, "step": 6574 }, { "epoch": 0.42, "grad_norm": 0.8914992213249207, "learning_rate": 6.567510574173126e-06, "loss": 0.6145, "step": 6575 }, { "epoch": 0.42, "grad_norm": 0.9038758277893066, "learning_rate": 6.566536270401389e-06, "loss": 0.6472, "step": 6576 }, { "epoch": 0.42, "grad_norm": 0.9073125123977661, "learning_rate": 6.5655619006682604e-06, "loss": 0.6354, "step": 6577 }, { "epoch": 0.42, "grad_norm": 0.8908482193946838, "learning_rate": 6.5645874650147676e-06, "loss": 0.603, "step": 6578 }, { "epoch": 0.42, "grad_norm": 0.9067582488059998, "learning_rate": 6.563612963481938e-06, "loss": 0.5947, "step": 6579 }, { "epoch": 0.42, "grad_norm": 0.926496148109436, "learning_rate": 6.562638396110805e-06, "loss": 0.5645, "step": 6580 }, { "epoch": 0.42, "grad_norm": 1.002769112586975, "learning_rate": 6.561663762942407e-06, "loss": 0.6122, "step": 6581 }, { "epoch": 0.42, "grad_norm": 1.0203826427459717, "learning_rate": 6.560689064017781e-06, "loss": 0.5591, "step": 6582 }, { "epoch": 0.42, "grad_norm": 0.9004783034324646, "learning_rate": 6.559714299377966e-06, "loss": 0.6435, "step": 6583 }, { "epoch": 0.42, "grad_norm": 0.8488182425498962, "learning_rate": 6.558739469064008e-06, "loss": 0.6716, "step": 6584 }, { "epoch": 0.42, "grad_norm": 0.9152944087982178, "learning_rate": 6.5577645731169535e-06, "loss": 0.6506, "step": 6585 }, { "epoch": 0.42, "grad_norm": 1.0391813516616821, "learning_rate": 6.556789611577854e-06, "loss": 0.6031, "step": 6586 }, { "epoch": 0.42, "grad_norm": 0.8500710129737854, "learning_rate": 6.555814584487757e-06, "loss": 0.604, "step": 6587 }, { "epoch": 0.42, "grad_norm": 0.9001255631446838, "learning_rate": 6.5548394918877216e-06, "loss": 0.6199, "step": 6588 }, { "epoch": 0.42, "grad_norm": 0.8187232613563538, "learning_rate": 6.553864333818803e-06, "loss": 0.5698, "step": 6589 }, { "epoch": 0.42, "grad_norm": 0.8661369681358337, "learning_rate": 6.552889110322062e-06, "loss": 0.568, "step": 6590 }, { "epoch": 0.42, "grad_norm": 0.9107438325881958, "learning_rate": 6.551913821438565e-06, "loss": 0.5363, "step": 6591 }, { "epoch": 0.42, "grad_norm": 0.8906565308570862, "learning_rate": 6.550938467209375e-06, "loss": 0.5933, "step": 6592 }, { "epoch": 0.42, "grad_norm": 0.865942656993866, "learning_rate": 6.5499630476755616e-06, "loss": 0.5402, "step": 6593 }, { "epoch": 0.42, "grad_norm": 0.862751305103302, "learning_rate": 6.548987562878195e-06, "loss": 0.5899, "step": 6594 }, { "epoch": 0.42, "grad_norm": 0.8719815015792847, "learning_rate": 6.548012012858352e-06, "loss": 0.5804, "step": 6595 }, { "epoch": 0.42, "grad_norm": 0.9156465530395508, "learning_rate": 6.547036397657106e-06, "loss": 0.5877, "step": 6596 }, { "epoch": 0.42, "grad_norm": 0.9153836965560913, "learning_rate": 6.546060717315542e-06, "loss": 0.6035, "step": 6597 }, { "epoch": 0.42, "grad_norm": 0.9377827644348145, "learning_rate": 6.545084971874738e-06, "loss": 0.6147, "step": 6598 }, { "epoch": 0.42, "grad_norm": 0.9083762168884277, "learning_rate": 6.5441091613757805e-06, "loss": 0.5889, "step": 6599 }, { "epoch": 0.42, "grad_norm": 0.870985746383667, "learning_rate": 6.543133285859758e-06, "loss": 0.5776, "step": 6600 }, { "epoch": 0.42, "grad_norm": 0.8286287188529968, "learning_rate": 6.542157345367763e-06, "loss": 0.5341, "step": 6601 }, { "epoch": 0.42, "grad_norm": 0.9526362419128418, "learning_rate": 6.5411813399408845e-06, "loss": 0.6605, "step": 6602 }, { "epoch": 0.42, "grad_norm": 0.8954978585243225, "learning_rate": 6.540205269620221e-06, "loss": 0.6206, "step": 6603 }, { "epoch": 0.42, "grad_norm": 0.8499834537506104, "learning_rate": 6.539229134446874e-06, "loss": 0.5997, "step": 6604 }, { "epoch": 0.42, "grad_norm": 0.9309713840484619, "learning_rate": 6.538252934461941e-06, "loss": 0.5951, "step": 6605 }, { "epoch": 0.42, "grad_norm": 0.9219299554824829, "learning_rate": 6.537276669706527e-06, "loss": 0.6351, "step": 6606 }, { "epoch": 0.42, "grad_norm": 0.8586229085922241, "learning_rate": 6.536300340221742e-06, "loss": 0.6374, "step": 6607 }, { "epoch": 0.42, "grad_norm": 0.8765944242477417, "learning_rate": 6.535323946048695e-06, "loss": 0.5754, "step": 6608 }, { "epoch": 0.42, "grad_norm": 0.8662661910057068, "learning_rate": 6.534347487228495e-06, "loss": 0.5382, "step": 6609 }, { "epoch": 0.42, "grad_norm": 0.8375385999679565, "learning_rate": 6.533370963802261e-06, "loss": 0.5487, "step": 6610 }, { "epoch": 0.42, "grad_norm": 0.9295637011528015, "learning_rate": 6.532394375811111e-06, "loss": 0.6287, "step": 6611 }, { "epoch": 0.42, "grad_norm": 0.8826519250869751, "learning_rate": 6.531417723296164e-06, "loss": 0.5982, "step": 6612 }, { "epoch": 0.42, "grad_norm": 0.9090942740440369, "learning_rate": 6.530441006298544e-06, "loss": 0.6041, "step": 6613 }, { "epoch": 0.42, "grad_norm": 0.8962193131446838, "learning_rate": 6.5294642248593765e-06, "loss": 0.6588, "step": 6614 }, { "epoch": 0.42, "grad_norm": 0.9374716281890869, "learning_rate": 6.528487379019791e-06, "loss": 0.5957, "step": 6615 }, { "epoch": 0.42, "grad_norm": 0.8533490300178528, "learning_rate": 6.5275104688209215e-06, "loss": 0.567, "step": 6616 }, { "epoch": 0.42, "grad_norm": 0.9722062349319458, "learning_rate": 6.526533494303898e-06, "loss": 0.65, "step": 6617 }, { "epoch": 0.42, "grad_norm": 0.9122631549835205, "learning_rate": 6.525556455509858e-06, "loss": 0.6388, "step": 6618 }, { "epoch": 0.42, "grad_norm": 0.8122672438621521, "learning_rate": 6.5245793524799465e-06, "loss": 0.5713, "step": 6619 }, { "epoch": 0.42, "grad_norm": 0.8519312739372253, "learning_rate": 6.5236021852553e-06, "loss": 0.633, "step": 6620 }, { "epoch": 0.42, "grad_norm": 0.9361836910247803, "learning_rate": 6.522624953877066e-06, "loss": 0.6151, "step": 6621 }, { "epoch": 0.42, "grad_norm": 0.832336962223053, "learning_rate": 6.52164765838639e-06, "loss": 0.545, "step": 6622 }, { "epoch": 0.42, "grad_norm": 0.8808085322380066, "learning_rate": 6.520670298824428e-06, "loss": 0.5869, "step": 6623 }, { "epoch": 0.42, "grad_norm": 0.9161610007286072, "learning_rate": 6.519692875232328e-06, "loss": 0.5429, "step": 6624 }, { "epoch": 0.42, "grad_norm": 0.9123284220695496, "learning_rate": 6.518715387651249e-06, "loss": 0.6242, "step": 6625 }, { "epoch": 0.42, "grad_norm": 0.8876476883888245, "learning_rate": 6.517737836122345e-06, "loss": 0.6074, "step": 6626 }, { "epoch": 0.42, "grad_norm": 0.8674134016036987, "learning_rate": 6.516760220686783e-06, "loss": 0.6166, "step": 6627 }, { "epoch": 0.42, "grad_norm": 0.9016671776771545, "learning_rate": 6.515782541385725e-06, "loss": 0.6589, "step": 6628 }, { "epoch": 0.42, "grad_norm": 0.9042580127716064, "learning_rate": 6.514804798260337e-06, "loss": 0.6446, "step": 6629 }, { "epoch": 0.42, "grad_norm": 0.8501339554786682, "learning_rate": 6.513826991351786e-06, "loss": 0.6305, "step": 6630 }, { "epoch": 0.42, "grad_norm": 0.8709739446640015, "learning_rate": 6.512849120701249e-06, "loss": 0.5833, "step": 6631 }, { "epoch": 0.42, "grad_norm": 0.8446660041809082, "learning_rate": 6.511871186349897e-06, "loss": 0.5731, "step": 6632 }, { "epoch": 0.42, "grad_norm": 0.8911131620407104, "learning_rate": 6.510893188338911e-06, "loss": 0.6193, "step": 6633 }, { "epoch": 0.42, "grad_norm": 0.8830543756484985, "learning_rate": 6.509915126709467e-06, "loss": 0.5538, "step": 6634 }, { "epoch": 0.42, "grad_norm": 0.9163636565208435, "learning_rate": 6.50893700150275e-06, "loss": 0.5837, "step": 6635 }, { "epoch": 0.42, "grad_norm": 0.8984601497650146, "learning_rate": 6.5079588127599455e-06, "loss": 0.5719, "step": 6636 }, { "epoch": 0.42, "grad_norm": 0.8621581196784973, "learning_rate": 6.50698056052224e-06, "loss": 0.5852, "step": 6637 }, { "epoch": 0.42, "grad_norm": 0.837208092212677, "learning_rate": 6.506002244830827e-06, "loss": 0.5823, "step": 6638 }, { "epoch": 0.42, "grad_norm": 0.8576619029045105, "learning_rate": 6.505023865726898e-06, "loss": 0.5895, "step": 6639 }, { "epoch": 0.42, "grad_norm": 0.8191852569580078, "learning_rate": 6.50404542325165e-06, "loss": 0.5928, "step": 6640 }, { "epoch": 0.42, "grad_norm": 0.9108313322067261, "learning_rate": 6.503066917446279e-06, "loss": 0.5934, "step": 6641 }, { "epoch": 0.42, "grad_norm": 0.867838442325592, "learning_rate": 6.502088348351992e-06, "loss": 0.5703, "step": 6642 }, { "epoch": 0.42, "grad_norm": 0.8657822608947754, "learning_rate": 6.501109716009988e-06, "loss": 0.5505, "step": 6643 }, { "epoch": 0.42, "grad_norm": 0.8648907542228699, "learning_rate": 6.500131020461477e-06, "loss": 0.5656, "step": 6644 }, { "epoch": 0.42, "grad_norm": 0.8719663619995117, "learning_rate": 6.4991522617476666e-06, "loss": 0.5562, "step": 6645 }, { "epoch": 0.42, "grad_norm": 0.8621832728385925, "learning_rate": 6.498173439909771e-06, "loss": 0.6282, "step": 6646 }, { "epoch": 0.42, "grad_norm": 0.8455116748809814, "learning_rate": 6.497194554989001e-06, "loss": 0.5634, "step": 6647 }, { "epoch": 0.42, "grad_norm": 0.8869051933288574, "learning_rate": 6.496215607026579e-06, "loss": 0.5434, "step": 6648 }, { "epoch": 0.42, "grad_norm": 0.8986787796020508, "learning_rate": 6.495236596063722e-06, "loss": 0.5891, "step": 6649 }, { "epoch": 0.42, "grad_norm": 0.8959378004074097, "learning_rate": 6.494257522141654e-06, "loss": 0.5889, "step": 6650 }, { "epoch": 0.42, "grad_norm": 1.0358039140701294, "learning_rate": 6.4932783853016005e-06, "loss": 0.6392, "step": 6651 }, { "epoch": 0.42, "grad_norm": 0.8593500256538391, "learning_rate": 6.492299185584787e-06, "loss": 0.5139, "step": 6652 }, { "epoch": 0.42, "grad_norm": 0.9131850600242615, "learning_rate": 6.491319923032446e-06, "loss": 0.5909, "step": 6653 }, { "epoch": 0.42, "grad_norm": 0.8696557283401489, "learning_rate": 6.490340597685811e-06, "loss": 0.5991, "step": 6654 }, { "epoch": 0.42, "grad_norm": 0.8535944819450378, "learning_rate": 6.48936120958612e-06, "loss": 0.5772, "step": 6655 }, { "epoch": 0.42, "grad_norm": 0.8401122689247131, "learning_rate": 6.488381758774609e-06, "loss": 0.6119, "step": 6656 }, { "epoch": 0.42, "grad_norm": 0.8727520704269409, "learning_rate": 6.487402245292518e-06, "loss": 0.5869, "step": 6657 }, { "epoch": 0.42, "grad_norm": 0.9223040342330933, "learning_rate": 6.486422669181094e-06, "loss": 0.6218, "step": 6658 }, { "epoch": 0.42, "grad_norm": 0.868571937084198, "learning_rate": 6.485443030481583e-06, "loss": 0.607, "step": 6659 }, { "epoch": 0.42, "grad_norm": 0.9342830777168274, "learning_rate": 6.4844633292352335e-06, "loss": 0.6237, "step": 6660 }, { "epoch": 0.42, "grad_norm": 0.8326634764671326, "learning_rate": 6.483483565483295e-06, "loss": 0.5574, "step": 6661 }, { "epoch": 0.42, "grad_norm": 0.8713539242744446, "learning_rate": 6.482503739267026e-06, "loss": 0.5629, "step": 6662 }, { "epoch": 0.42, "grad_norm": 0.8934717178344727, "learning_rate": 6.481523850627682e-06, "loss": 0.63, "step": 6663 }, { "epoch": 0.42, "grad_norm": 0.9452871084213257, "learning_rate": 6.4805438996065215e-06, "loss": 0.6842, "step": 6664 }, { "epoch": 0.42, "grad_norm": 0.8760863542556763, "learning_rate": 6.479563886244809e-06, "loss": 0.6127, "step": 6665 }, { "epoch": 0.42, "grad_norm": 0.901567280292511, "learning_rate": 6.478583810583807e-06, "loss": 0.5937, "step": 6666 }, { "epoch": 0.42, "grad_norm": 0.9208518266677856, "learning_rate": 6.477603672664785e-06, "loss": 0.5968, "step": 6667 }, { "epoch": 0.42, "grad_norm": 0.8600721955299377, "learning_rate": 6.476623472529012e-06, "loss": 0.5893, "step": 6668 }, { "epoch": 0.42, "grad_norm": 0.8686032295227051, "learning_rate": 6.475643210217762e-06, "loss": 0.6386, "step": 6669 }, { "epoch": 0.42, "grad_norm": 0.9031897187232971, "learning_rate": 6.47466288577231e-06, "loss": 0.6378, "step": 6670 }, { "epoch": 0.42, "grad_norm": 0.82213294506073, "learning_rate": 6.473682499233934e-06, "loss": 0.5587, "step": 6671 }, { "epoch": 0.42, "grad_norm": 0.83423912525177, "learning_rate": 6.472702050643913e-06, "loss": 0.5684, "step": 6672 }, { "epoch": 0.42, "grad_norm": 0.9469904899597168, "learning_rate": 6.471721540043533e-06, "loss": 0.5984, "step": 6673 }, { "epoch": 0.42, "grad_norm": 0.9193524718284607, "learning_rate": 6.47074096747408e-06, "loss": 0.6047, "step": 6674 }, { "epoch": 0.42, "grad_norm": 0.8525941967964172, "learning_rate": 6.469760332976839e-06, "loss": 0.5274, "step": 6675 }, { "epoch": 0.42, "grad_norm": 0.9107722043991089, "learning_rate": 6.4687796365931035e-06, "loss": 0.5995, "step": 6676 }, { "epoch": 0.42, "grad_norm": 0.8921878933906555, "learning_rate": 6.467798878364168e-06, "loss": 0.589, "step": 6677 }, { "epoch": 0.42, "grad_norm": 0.8674684166908264, "learning_rate": 6.466818058331328e-06, "loss": 0.5953, "step": 6678 }, { "epoch": 0.42, "grad_norm": 0.9107003211975098, "learning_rate": 6.465837176535881e-06, "loss": 0.5684, "step": 6679 }, { "epoch": 0.42, "grad_norm": 0.8787494897842407, "learning_rate": 6.46485623301913e-06, "loss": 0.5651, "step": 6680 }, { "epoch": 0.42, "grad_norm": 0.8581385016441345, "learning_rate": 6.46387522782238e-06, "loss": 0.6103, "step": 6681 }, { "epoch": 0.42, "grad_norm": 0.8561550378799438, "learning_rate": 6.462894160986937e-06, "loss": 0.6005, "step": 6682 }, { "epoch": 0.42, "grad_norm": 0.912476122379303, "learning_rate": 6.461913032554108e-06, "loss": 0.5908, "step": 6683 }, { "epoch": 0.42, "grad_norm": 0.8894026875495911, "learning_rate": 6.460931842565207e-06, "loss": 0.59, "step": 6684 }, { "epoch": 0.42, "grad_norm": 0.8487771153450012, "learning_rate": 6.4599505910615505e-06, "loss": 0.5819, "step": 6685 }, { "epoch": 0.42, "grad_norm": 0.8792235255241394, "learning_rate": 6.45896927808445e-06, "loss": 0.5918, "step": 6686 }, { "epoch": 0.42, "grad_norm": 0.8825286626815796, "learning_rate": 6.4579879036752315e-06, "loss": 0.6375, "step": 6687 }, { "epoch": 0.42, "grad_norm": 0.857980489730835, "learning_rate": 6.457006467875213e-06, "loss": 0.5757, "step": 6688 }, { "epoch": 0.42, "grad_norm": 0.9466935396194458, "learning_rate": 6.456024970725722e-06, "loss": 0.5619, "step": 6689 }, { "epoch": 0.42, "grad_norm": 0.9351262450218201, "learning_rate": 6.455043412268083e-06, "loss": 0.5754, "step": 6690 }, { "epoch": 0.42, "grad_norm": 0.9212837815284729, "learning_rate": 6.4540617925436275e-06, "loss": 0.5439, "step": 6691 }, { "epoch": 0.42, "grad_norm": 0.8872720003128052, "learning_rate": 6.453080111593689e-06, "loss": 0.6074, "step": 6692 }, { "epoch": 0.42, "grad_norm": 0.8125371336936951, "learning_rate": 6.4520983694596025e-06, "loss": 0.546, "step": 6693 }, { "epoch": 0.42, "grad_norm": 0.9116303324699402, "learning_rate": 6.451116566182704e-06, "loss": 0.5957, "step": 6694 }, { "epoch": 0.42, "grad_norm": 0.8791963458061218, "learning_rate": 6.4501347018043356e-06, "loss": 0.5968, "step": 6695 }, { "epoch": 0.42, "grad_norm": 0.9115133285522461, "learning_rate": 6.44915277636584e-06, "loss": 0.6176, "step": 6696 }, { "epoch": 0.42, "grad_norm": 0.9027078151702881, "learning_rate": 6.4481707899085624e-06, "loss": 0.5877, "step": 6697 }, { "epoch": 0.42, "grad_norm": 0.9268640279769897, "learning_rate": 6.447188742473849e-06, "loss": 0.6045, "step": 6698 }, { "epoch": 0.42, "grad_norm": 0.9698523879051208, "learning_rate": 6.446206634103053e-06, "loss": 0.6128, "step": 6699 }, { "epoch": 0.42, "grad_norm": 0.8391386866569519, "learning_rate": 6.445224464837527e-06, "loss": 0.632, "step": 6700 }, { "epoch": 0.42, "grad_norm": 0.8128573894500732, "learning_rate": 6.444242234718626e-06, "loss": 0.5394, "step": 6701 }, { "epoch": 0.42, "grad_norm": 0.8981994986534119, "learning_rate": 6.443259943787708e-06, "loss": 0.613, "step": 6702 }, { "epoch": 0.42, "grad_norm": 0.8788542747497559, "learning_rate": 6.442277592086135e-06, "loss": 0.6071, "step": 6703 }, { "epoch": 0.42, "grad_norm": 0.8909327387809753, "learning_rate": 6.4412951796552715e-06, "loss": 0.643, "step": 6704 }, { "epoch": 0.42, "grad_norm": 0.8961040377616882, "learning_rate": 6.44031270653648e-06, "loss": 0.6128, "step": 6705 }, { "epoch": 0.42, "grad_norm": 0.9458435773849487, "learning_rate": 6.4393301727711296e-06, "loss": 0.6847, "step": 6706 }, { "epoch": 0.42, "grad_norm": 0.9194031357765198, "learning_rate": 6.438347578400595e-06, "loss": 0.6484, "step": 6707 }, { "epoch": 0.42, "grad_norm": 0.9502468705177307, "learning_rate": 6.437364923466247e-06, "loss": 0.6456, "step": 6708 }, { "epoch": 0.43, "grad_norm": 0.8829242587089539, "learning_rate": 6.436382208009463e-06, "loss": 0.64, "step": 6709 }, { "epoch": 0.43, "grad_norm": 0.9908746480941772, "learning_rate": 6.43539943207162e-06, "loss": 0.6066, "step": 6710 }, { "epoch": 0.43, "grad_norm": 0.8264694213867188, "learning_rate": 6.434416595694102e-06, "loss": 0.5476, "step": 6711 }, { "epoch": 0.43, "grad_norm": 0.8517434000968933, "learning_rate": 6.433433698918289e-06, "loss": 0.5964, "step": 6712 }, { "epoch": 0.43, "grad_norm": 0.8980498909950256, "learning_rate": 6.432450741785571e-06, "loss": 0.5834, "step": 6713 }, { "epoch": 0.43, "grad_norm": 0.9315409660339355, "learning_rate": 6.431467724337332e-06, "loss": 0.5854, "step": 6714 }, { "epoch": 0.43, "grad_norm": 0.9002824425697327, "learning_rate": 6.430484646614971e-06, "loss": 0.7002, "step": 6715 }, { "epoch": 0.43, "grad_norm": 0.8017958998680115, "learning_rate": 6.429501508659877e-06, "loss": 0.555, "step": 6716 }, { "epoch": 0.43, "grad_norm": 0.87758469581604, "learning_rate": 6.428518310513446e-06, "loss": 0.6322, "step": 6717 }, { "epoch": 0.43, "grad_norm": 0.9012987613677979, "learning_rate": 6.427535052217078e-06, "loss": 0.6406, "step": 6718 }, { "epoch": 0.43, "grad_norm": 0.8559851050376892, "learning_rate": 6.4265517338121764e-06, "loss": 0.6203, "step": 6719 }, { "epoch": 0.43, "grad_norm": 0.8328604102134705, "learning_rate": 6.4255683553401435e-06, "loss": 0.6212, "step": 6720 }, { "epoch": 0.43, "grad_norm": 0.8184729814529419, "learning_rate": 6.424584916842387e-06, "loss": 0.5646, "step": 6721 }, { "epoch": 0.43, "grad_norm": 0.8883371353149414, "learning_rate": 6.423601418360314e-06, "loss": 0.585, "step": 6722 }, { "epoch": 0.43, "grad_norm": 0.9067890644073486, "learning_rate": 6.4226178599353385e-06, "loss": 0.6143, "step": 6723 }, { "epoch": 0.43, "grad_norm": 0.9497420787811279, "learning_rate": 6.421634241608874e-06, "loss": 0.6008, "step": 6724 }, { "epoch": 0.43, "grad_norm": 0.8506911396980286, "learning_rate": 6.420650563422337e-06, "loss": 0.5696, "step": 6725 }, { "epoch": 0.43, "grad_norm": 0.8900071978569031, "learning_rate": 6.419666825417147e-06, "loss": 0.6127, "step": 6726 }, { "epoch": 0.43, "grad_norm": 0.8769127726554871, "learning_rate": 6.4186830276347246e-06, "loss": 0.582, "step": 6727 }, { "epoch": 0.43, "grad_norm": 0.9167845249176025, "learning_rate": 6.417699170116497e-06, "loss": 0.6236, "step": 6728 }, { "epoch": 0.43, "grad_norm": 0.8981141448020935, "learning_rate": 6.416715252903888e-06, "loss": 0.6296, "step": 6729 }, { "epoch": 0.43, "grad_norm": 0.8629951477050781, "learning_rate": 6.415731276038327e-06, "loss": 0.5923, "step": 6730 }, { "epoch": 0.43, "grad_norm": 0.8666777014732361, "learning_rate": 6.414747239561249e-06, "loss": 0.616, "step": 6731 }, { "epoch": 0.43, "grad_norm": 0.9089431762695312, "learning_rate": 6.413763143514086e-06, "loss": 0.6325, "step": 6732 }, { "epoch": 0.43, "grad_norm": 0.9420037865638733, "learning_rate": 6.412778987938273e-06, "loss": 0.6228, "step": 6733 }, { "epoch": 0.43, "grad_norm": 0.9398850202560425, "learning_rate": 6.411794772875253e-06, "loss": 0.6237, "step": 6734 }, { "epoch": 0.43, "grad_norm": 0.842210054397583, "learning_rate": 6.4108104983664665e-06, "loss": 0.5431, "step": 6735 }, { "epoch": 0.43, "grad_norm": 0.8711762428283691, "learning_rate": 6.409826164453359e-06, "loss": 0.5273, "step": 6736 }, { "epoch": 0.43, "grad_norm": 0.9114711284637451, "learning_rate": 6.408841771177373e-06, "loss": 0.6513, "step": 6737 }, { "epoch": 0.43, "grad_norm": 0.86746746301651, "learning_rate": 6.407857318579963e-06, "loss": 0.5329, "step": 6738 }, { "epoch": 0.43, "grad_norm": 0.8684642314910889, "learning_rate": 6.4068728067025785e-06, "loss": 0.6305, "step": 6739 }, { "epoch": 0.43, "grad_norm": 0.8810404539108276, "learning_rate": 6.405888235586676e-06, "loss": 0.5681, "step": 6740 }, { "epoch": 0.43, "grad_norm": 0.9271003603935242, "learning_rate": 6.4049036052737065e-06, "loss": 0.6079, "step": 6741 }, { "epoch": 0.43, "grad_norm": 0.9591821432113647, "learning_rate": 6.403918915805138e-06, "loss": 0.5997, "step": 6742 }, { "epoch": 0.43, "grad_norm": 0.939398467540741, "learning_rate": 6.402934167222427e-06, "loss": 0.6271, "step": 6743 }, { "epoch": 0.43, "grad_norm": 0.8856723308563232, "learning_rate": 6.4019493595670365e-06, "loss": 0.5538, "step": 6744 }, { "epoch": 0.43, "grad_norm": 0.8420456051826477, "learning_rate": 6.400964492880437e-06, "loss": 0.6144, "step": 6745 }, { "epoch": 0.43, "grad_norm": 0.9057135581970215, "learning_rate": 6.399979567204096e-06, "loss": 0.6294, "step": 6746 }, { "epoch": 0.43, "grad_norm": 0.8914698958396912, "learning_rate": 6.398994582579485e-06, "loss": 0.6168, "step": 6747 }, { "epoch": 0.43, "grad_norm": 0.7790830135345459, "learning_rate": 6.39800953904808e-06, "loss": 0.563, "step": 6748 }, { "epoch": 0.43, "grad_norm": 0.8107707500457764, "learning_rate": 6.397024436651356e-06, "loss": 0.5594, "step": 6749 }, { "epoch": 0.43, "grad_norm": 0.9206598401069641, "learning_rate": 6.396039275430792e-06, "loss": 0.6187, "step": 6750 }, { "epoch": 0.43, "grad_norm": 0.973175048828125, "learning_rate": 6.395054055427872e-06, "loss": 0.6636, "step": 6751 }, { "epoch": 0.43, "grad_norm": 0.8376911878585815, "learning_rate": 6.394068776684078e-06, "loss": 0.5941, "step": 6752 }, { "epoch": 0.43, "grad_norm": 0.8403307795524597, "learning_rate": 6.393083439240897e-06, "loss": 0.5494, "step": 6753 }, { "epoch": 0.43, "grad_norm": 0.8593806028366089, "learning_rate": 6.39209804313982e-06, "loss": 0.5894, "step": 6754 }, { "epoch": 0.43, "grad_norm": 0.81999671459198, "learning_rate": 6.391112588422337e-06, "loss": 0.5574, "step": 6755 }, { "epoch": 0.43, "grad_norm": 0.917597770690918, "learning_rate": 6.390127075129941e-06, "loss": 0.556, "step": 6756 }, { "epoch": 0.43, "grad_norm": 0.9527899026870728, "learning_rate": 6.38914150330413e-06, "loss": 0.6124, "step": 6757 }, { "epoch": 0.43, "grad_norm": 0.9341422319412231, "learning_rate": 6.388155872986404e-06, "loss": 0.5947, "step": 6758 }, { "epoch": 0.43, "grad_norm": 0.8585883378982544, "learning_rate": 6.3871701842182625e-06, "loss": 0.579, "step": 6759 }, { "epoch": 0.43, "grad_norm": 0.8847423195838928, "learning_rate": 6.386184437041208e-06, "loss": 0.6301, "step": 6760 }, { "epoch": 0.43, "grad_norm": 0.9007404446601868, "learning_rate": 6.385198631496752e-06, "loss": 0.6414, "step": 6761 }, { "epoch": 0.43, "grad_norm": 0.8374068140983582, "learning_rate": 6.3842127676263995e-06, "loss": 0.5746, "step": 6762 }, { "epoch": 0.43, "grad_norm": 0.8941364884376526, "learning_rate": 6.383226845471663e-06, "loss": 0.6523, "step": 6763 }, { "epoch": 0.43, "grad_norm": 0.8259331583976746, "learning_rate": 6.382240865074055e-06, "loss": 0.5926, "step": 6764 }, { "epoch": 0.43, "grad_norm": 0.8802745342254639, "learning_rate": 6.381254826475093e-06, "loss": 0.5734, "step": 6765 }, { "epoch": 0.43, "grad_norm": 0.8371772766113281, "learning_rate": 6.380268729716296e-06, "loss": 0.634, "step": 6766 }, { "epoch": 0.43, "grad_norm": 0.9037656188011169, "learning_rate": 6.379282574839184e-06, "loss": 0.6121, "step": 6767 }, { "epoch": 0.43, "grad_norm": 0.8730578422546387, "learning_rate": 6.37829636188528e-06, "loss": 0.5665, "step": 6768 }, { "epoch": 0.43, "grad_norm": 0.9292997717857361, "learning_rate": 6.377310090896112e-06, "loss": 0.5958, "step": 6769 }, { "epoch": 0.43, "grad_norm": 0.9569960236549377, "learning_rate": 6.376323761913208e-06, "loss": 0.5732, "step": 6770 }, { "epoch": 0.43, "grad_norm": 0.8929063081741333, "learning_rate": 6.375337374978097e-06, "loss": 0.6509, "step": 6771 }, { "epoch": 0.43, "grad_norm": 0.9145770072937012, "learning_rate": 6.374350930132313e-06, "loss": 0.6502, "step": 6772 }, { "epoch": 0.43, "grad_norm": 0.8785668015480042, "learning_rate": 6.373364427417395e-06, "loss": 0.6297, "step": 6773 }, { "epoch": 0.43, "grad_norm": 0.8315816521644592, "learning_rate": 6.372377866874876e-06, "loss": 0.5375, "step": 6774 }, { "epoch": 0.43, "grad_norm": 0.8660714030265808, "learning_rate": 6.371391248546299e-06, "loss": 0.5814, "step": 6775 }, { "epoch": 0.43, "grad_norm": 0.801703691482544, "learning_rate": 6.370404572473209e-06, "loss": 0.6316, "step": 6776 }, { "epoch": 0.43, "grad_norm": 0.9656221270561218, "learning_rate": 6.36941783869715e-06, "loss": 0.6798, "step": 6777 }, { "epoch": 0.43, "grad_norm": 0.8311265707015991, "learning_rate": 6.368431047259668e-06, "loss": 0.5343, "step": 6778 }, { "epoch": 0.43, "grad_norm": 0.9228345155715942, "learning_rate": 6.367444198202315e-06, "loss": 0.6175, "step": 6779 }, { "epoch": 0.43, "grad_norm": 0.9040692448616028, "learning_rate": 6.366457291566645e-06, "loss": 0.6427, "step": 6780 }, { "epoch": 0.43, "grad_norm": 0.8820178508758545, "learning_rate": 6.365470327394212e-06, "loss": 0.6016, "step": 6781 }, { "epoch": 0.43, "grad_norm": 0.8262830376625061, "learning_rate": 6.3644833057265735e-06, "loss": 0.615, "step": 6782 }, { "epoch": 0.43, "grad_norm": 0.8388856053352356, "learning_rate": 6.363496226605289e-06, "loss": 0.5929, "step": 6783 }, { "epoch": 0.43, "grad_norm": 0.9199455380439758, "learning_rate": 6.362509090071922e-06, "loss": 0.5725, "step": 6784 }, { "epoch": 0.43, "grad_norm": 0.9267382621765137, "learning_rate": 6.361521896168037e-06, "loss": 0.6032, "step": 6785 }, { "epoch": 0.43, "grad_norm": 0.858314573764801, "learning_rate": 6.360534644935201e-06, "loss": 0.6036, "step": 6786 }, { "epoch": 0.43, "grad_norm": 0.8850862979888916, "learning_rate": 6.359547336414985e-06, "loss": 0.5966, "step": 6787 }, { "epoch": 0.43, "grad_norm": 0.910456657409668, "learning_rate": 6.358559970648958e-06, "loss": 0.6172, "step": 6788 }, { "epoch": 0.43, "grad_norm": 0.8374682068824768, "learning_rate": 6.357572547678701e-06, "loss": 0.5973, "step": 6789 }, { "epoch": 0.43, "grad_norm": 0.8853792548179626, "learning_rate": 6.356585067545784e-06, "loss": 0.5811, "step": 6790 }, { "epoch": 0.43, "grad_norm": 0.8989521861076355, "learning_rate": 6.355597530291788e-06, "loss": 0.6074, "step": 6791 }, { "epoch": 0.43, "grad_norm": 0.9015896916389465, "learning_rate": 6.354609935958298e-06, "loss": 0.5856, "step": 6792 }, { "epoch": 0.43, "grad_norm": 0.937824010848999, "learning_rate": 6.3536222845868934e-06, "loss": 0.6377, "step": 6793 }, { "epoch": 0.43, "grad_norm": 0.9121703505516052, "learning_rate": 6.3526345762191656e-06, "loss": 0.6304, "step": 6794 }, { "epoch": 0.43, "grad_norm": 0.8476263284683228, "learning_rate": 6.351646810896699e-06, "loss": 0.5777, "step": 6795 }, { "epoch": 0.43, "grad_norm": 0.8620879650115967, "learning_rate": 6.350658988661089e-06, "loss": 0.5876, "step": 6796 }, { "epoch": 0.43, "grad_norm": 0.8748513460159302, "learning_rate": 6.349671109553928e-06, "loss": 0.5557, "step": 6797 }, { "epoch": 0.43, "grad_norm": 0.9378863573074341, "learning_rate": 6.348683173616811e-06, "loss": 0.5726, "step": 6798 }, { "epoch": 0.43, "grad_norm": 0.8460593223571777, "learning_rate": 6.347695180891337e-06, "loss": 0.6111, "step": 6799 }, { "epoch": 0.43, "grad_norm": 0.8482157588005066, "learning_rate": 6.346707131419108e-06, "loss": 0.5726, "step": 6800 }, { "epoch": 0.43, "grad_norm": 0.935832142829895, "learning_rate": 6.345719025241725e-06, "loss": 0.5852, "step": 6801 }, { "epoch": 0.43, "grad_norm": 0.8839829564094543, "learning_rate": 6.3447308624007964e-06, "loss": 0.6138, "step": 6802 }, { "epoch": 0.43, "grad_norm": 0.9015828967094421, "learning_rate": 6.343742642937929e-06, "loss": 0.5436, "step": 6803 }, { "epoch": 0.43, "grad_norm": 0.925391674041748, "learning_rate": 6.342754366894735e-06, "loss": 0.6357, "step": 6804 }, { "epoch": 0.43, "grad_norm": 0.8897901177406311, "learning_rate": 6.341766034312824e-06, "loss": 0.6055, "step": 6805 }, { "epoch": 0.43, "grad_norm": 0.9219132661819458, "learning_rate": 6.340777645233811e-06, "loss": 0.6218, "step": 6806 }, { "epoch": 0.43, "grad_norm": 0.8790163993835449, "learning_rate": 6.339789199699319e-06, "loss": 0.5795, "step": 6807 }, { "epoch": 0.43, "grad_norm": 0.860368549823761, "learning_rate": 6.338800697750963e-06, "loss": 0.5757, "step": 6808 }, { "epoch": 0.43, "grad_norm": 0.8733096718788147, "learning_rate": 6.337812139430368e-06, "loss": 0.5503, "step": 6809 }, { "epoch": 0.43, "grad_norm": 0.9531643986701965, "learning_rate": 6.336823524779155e-06, "loss": 0.6141, "step": 6810 }, { "epoch": 0.43, "grad_norm": 0.8316904902458191, "learning_rate": 6.335834853838957e-06, "loss": 0.5442, "step": 6811 }, { "epoch": 0.43, "grad_norm": 0.9344193339347839, "learning_rate": 6.334846126651399e-06, "loss": 0.6328, "step": 6812 }, { "epoch": 0.43, "grad_norm": 0.9671064615249634, "learning_rate": 6.333857343258115e-06, "loss": 0.6196, "step": 6813 }, { "epoch": 0.43, "grad_norm": 0.9183486104011536, "learning_rate": 6.3328685037007365e-06, "loss": 0.5904, "step": 6814 }, { "epoch": 0.43, "grad_norm": 0.9257077574729919, "learning_rate": 6.331879608020905e-06, "loss": 0.5997, "step": 6815 }, { "epoch": 0.43, "grad_norm": 0.9391463398933411, "learning_rate": 6.330890656260253e-06, "loss": 0.6602, "step": 6816 }, { "epoch": 0.43, "grad_norm": 0.9092316627502441, "learning_rate": 6.329901648460428e-06, "loss": 0.6427, "step": 6817 }, { "epoch": 0.43, "grad_norm": 0.9237379431724548, "learning_rate": 6.32891258466307e-06, "loss": 0.6052, "step": 6818 }, { "epoch": 0.43, "grad_norm": 0.8574221134185791, "learning_rate": 6.3279234649098265e-06, "loss": 0.5748, "step": 6819 }, { "epoch": 0.43, "grad_norm": 0.9020368456840515, "learning_rate": 6.326934289242346e-06, "loss": 0.5697, "step": 6820 }, { "epoch": 0.43, "grad_norm": 0.9599592685699463, "learning_rate": 6.325945057702276e-06, "loss": 0.6449, "step": 6821 }, { "epoch": 0.43, "grad_norm": 0.8588045239448547, "learning_rate": 6.324955770331274e-06, "loss": 0.6282, "step": 6822 }, { "epoch": 0.43, "grad_norm": 0.8139827847480774, "learning_rate": 6.323966427170993e-06, "loss": 0.5639, "step": 6823 }, { "epoch": 0.43, "grad_norm": 0.9147988557815552, "learning_rate": 6.322977028263093e-06, "loss": 0.6103, "step": 6824 }, { "epoch": 0.43, "grad_norm": 0.9550712704658508, "learning_rate": 6.321987573649232e-06, "loss": 0.5802, "step": 6825 }, { "epoch": 0.43, "grad_norm": 0.8623383045196533, "learning_rate": 6.320998063371072e-06, "loss": 0.5587, "step": 6826 }, { "epoch": 0.43, "grad_norm": 0.8975523710250854, "learning_rate": 6.320008497470281e-06, "loss": 0.6382, "step": 6827 }, { "epoch": 0.43, "grad_norm": 0.9629261493682861, "learning_rate": 6.319018875988523e-06, "loss": 0.6272, "step": 6828 }, { "epoch": 0.43, "grad_norm": 0.9650130271911621, "learning_rate": 6.318029198967468e-06, "loss": 0.6143, "step": 6829 }, { "epoch": 0.43, "grad_norm": 0.9213373064994812, "learning_rate": 6.317039466448789e-06, "loss": 0.6329, "step": 6830 }, { "epoch": 0.43, "grad_norm": 0.8667360544204712, "learning_rate": 6.316049678474159e-06, "loss": 0.5997, "step": 6831 }, { "epoch": 0.43, "grad_norm": 0.9180268049240112, "learning_rate": 6.315059835085257e-06, "loss": 0.5874, "step": 6832 }, { "epoch": 0.43, "grad_norm": 0.9233614802360535, "learning_rate": 6.314069936323759e-06, "loss": 0.5789, "step": 6833 }, { "epoch": 0.43, "grad_norm": 0.8247601985931396, "learning_rate": 6.313079982231347e-06, "loss": 0.5741, "step": 6834 }, { "epoch": 0.43, "grad_norm": 0.893379807472229, "learning_rate": 6.312089972849707e-06, "loss": 0.615, "step": 6835 }, { "epoch": 0.43, "grad_norm": 0.8536086082458496, "learning_rate": 6.31109990822052e-06, "loss": 0.6018, "step": 6836 }, { "epoch": 0.43, "grad_norm": 0.8153089284896851, "learning_rate": 6.3101097883854765e-06, "loss": 0.6091, "step": 6837 }, { "epoch": 0.43, "grad_norm": 0.9722812175750732, "learning_rate": 6.30911961338627e-06, "loss": 0.5822, "step": 6838 }, { "epoch": 0.43, "grad_norm": 0.8883670568466187, "learning_rate": 6.3081293832645896e-06, "loss": 0.5889, "step": 6839 }, { "epoch": 0.43, "grad_norm": 0.9067282676696777, "learning_rate": 6.30713909806213e-06, "loss": 0.538, "step": 6840 }, { "epoch": 0.43, "grad_norm": 0.9098742008209229, "learning_rate": 6.306148757820591e-06, "loss": 0.5896, "step": 6841 }, { "epoch": 0.43, "grad_norm": 0.8773499131202698, "learning_rate": 6.3051583625816725e-06, "loss": 0.5662, "step": 6842 }, { "epoch": 0.43, "grad_norm": 0.81912761926651, "learning_rate": 6.304167912387076e-06, "loss": 0.5311, "step": 6843 }, { "epoch": 0.43, "grad_norm": 0.8784845471382141, "learning_rate": 6.303177407278504e-06, "loss": 0.6069, "step": 6844 }, { "epoch": 0.43, "grad_norm": 0.885051965713501, "learning_rate": 6.302186847297666e-06, "loss": 0.5553, "step": 6845 }, { "epoch": 0.43, "grad_norm": 0.879306972026825, "learning_rate": 6.301196232486269e-06, "loss": 0.5763, "step": 6846 }, { "epoch": 0.43, "grad_norm": 0.9128481149673462, "learning_rate": 6.300205562886026e-06, "loss": 0.5423, "step": 6847 }, { "epoch": 0.43, "grad_norm": 0.9183526635169983, "learning_rate": 6.29921483853865e-06, "loss": 0.6028, "step": 6848 }, { "epoch": 0.43, "grad_norm": 0.8842886090278625, "learning_rate": 6.298224059485856e-06, "loss": 0.5602, "step": 6849 }, { "epoch": 0.43, "grad_norm": 0.87552809715271, "learning_rate": 6.297233225769363e-06, "loss": 0.6139, "step": 6850 }, { "epoch": 0.43, "grad_norm": 0.8830863237380981, "learning_rate": 6.296242337430892e-06, "loss": 0.5313, "step": 6851 }, { "epoch": 0.43, "grad_norm": 0.8993502259254456, "learning_rate": 6.2952513945121654e-06, "loss": 0.6239, "step": 6852 }, { "epoch": 0.43, "grad_norm": 0.8739321827888489, "learning_rate": 6.2942603970549075e-06, "loss": 0.5823, "step": 6853 }, { "epoch": 0.43, "grad_norm": 0.872380793094635, "learning_rate": 6.293269345100849e-06, "loss": 0.5773, "step": 6854 }, { "epoch": 0.43, "grad_norm": 0.8558187484741211, "learning_rate": 6.292278238691715e-06, "loss": 0.6118, "step": 6855 }, { "epoch": 0.43, "grad_norm": 0.8276113271713257, "learning_rate": 6.29128707786924e-06, "loss": 0.5871, "step": 6856 }, { "epoch": 0.43, "grad_norm": 0.9095969200134277, "learning_rate": 6.29029586267516e-06, "loss": 0.5682, "step": 6857 }, { "epoch": 0.43, "grad_norm": 0.9365728497505188, "learning_rate": 6.289304593151209e-06, "loss": 0.6114, "step": 6858 }, { "epoch": 0.43, "grad_norm": 0.849093496799469, "learning_rate": 6.288313269339126e-06, "loss": 0.587, "step": 6859 }, { "epoch": 0.43, "grad_norm": 0.871545672416687, "learning_rate": 6.287321891280653e-06, "loss": 0.5703, "step": 6860 }, { "epoch": 0.43, "grad_norm": 0.8848944306373596, "learning_rate": 6.2863304590175335e-06, "loss": 0.5847, "step": 6861 }, { "epoch": 0.43, "grad_norm": 0.8347170948982239, "learning_rate": 6.2853389725915146e-06, "loss": 0.5983, "step": 6862 }, { "epoch": 0.43, "grad_norm": 0.8573687672615051, "learning_rate": 6.284347432044342e-06, "loss": 0.6016, "step": 6863 }, { "epoch": 0.43, "grad_norm": 0.9318529963493347, "learning_rate": 6.2833558374177664e-06, "loss": 0.5946, "step": 6864 }, { "epoch": 0.43, "grad_norm": 0.8856549859046936, "learning_rate": 6.282364188753541e-06, "loss": 0.5791, "step": 6865 }, { "epoch": 0.43, "grad_norm": 0.8939905762672424, "learning_rate": 6.281372486093422e-06, "loss": 0.6297, "step": 6866 }, { "epoch": 0.44, "grad_norm": 0.9018425941467285, "learning_rate": 6.280380729479164e-06, "loss": 0.5627, "step": 6867 }, { "epoch": 0.44, "grad_norm": 0.9689738750457764, "learning_rate": 6.279388918952527e-06, "loss": 0.6036, "step": 6868 }, { "epoch": 0.44, "grad_norm": 0.9120928049087524, "learning_rate": 6.278397054555275e-06, "loss": 0.6214, "step": 6869 }, { "epoch": 0.44, "grad_norm": 0.9339777827262878, "learning_rate": 6.277405136329169e-06, "loss": 0.6575, "step": 6870 }, { "epoch": 0.44, "grad_norm": 0.8833754658699036, "learning_rate": 6.276413164315978e-06, "loss": 0.5584, "step": 6871 }, { "epoch": 0.44, "grad_norm": 0.8568885326385498, "learning_rate": 6.2754211385574674e-06, "loss": 0.5942, "step": 6872 }, { "epoch": 0.44, "grad_norm": 0.9012584686279297, "learning_rate": 6.274429059095411e-06, "loss": 0.6192, "step": 6873 }, { "epoch": 0.44, "grad_norm": 0.8778733611106873, "learning_rate": 6.273436925971578e-06, "loss": 0.5976, "step": 6874 }, { "epoch": 0.44, "grad_norm": 0.8796364068984985, "learning_rate": 6.272444739227748e-06, "loss": 0.5434, "step": 6875 }, { "epoch": 0.44, "grad_norm": 0.870852530002594, "learning_rate": 6.2714524989056945e-06, "loss": 0.6127, "step": 6876 }, { "epoch": 0.44, "grad_norm": 0.9310038685798645, "learning_rate": 6.270460205047202e-06, "loss": 0.6447, "step": 6877 }, { "epoch": 0.44, "grad_norm": 0.8664422035217285, "learning_rate": 6.269467857694047e-06, "loss": 0.5595, "step": 6878 }, { "epoch": 0.44, "grad_norm": 0.9125611186027527, "learning_rate": 6.268475456888019e-06, "loss": 0.6122, "step": 6879 }, { "epoch": 0.44, "grad_norm": 0.8741683959960938, "learning_rate": 6.2674830026709014e-06, "loss": 0.6289, "step": 6880 }, { "epoch": 0.44, "grad_norm": 0.8851672410964966, "learning_rate": 6.266490495084484e-06, "loss": 0.5838, "step": 6881 }, { "epoch": 0.44, "grad_norm": 0.9018517732620239, "learning_rate": 6.265497934170559e-06, "loss": 0.602, "step": 6882 }, { "epoch": 0.44, "grad_norm": 0.8665854930877686, "learning_rate": 6.264505319970915e-06, "loss": 0.5744, "step": 6883 }, { "epoch": 0.44, "grad_norm": 0.8087853789329529, "learning_rate": 6.263512652527353e-06, "loss": 0.5761, "step": 6884 }, { "epoch": 0.44, "grad_norm": 0.8873346447944641, "learning_rate": 6.262519931881669e-06, "loss": 0.626, "step": 6885 }, { "epoch": 0.44, "grad_norm": 0.9511377811431885, "learning_rate": 6.261527158075662e-06, "loss": 0.6155, "step": 6886 }, { "epoch": 0.44, "grad_norm": 0.8355633616447449, "learning_rate": 6.260534331151133e-06, "loss": 0.5037, "step": 6887 }, { "epoch": 0.44, "grad_norm": 0.8886730670928955, "learning_rate": 6.259541451149892e-06, "loss": 0.566, "step": 6888 }, { "epoch": 0.44, "grad_norm": 0.8618388175964355, "learning_rate": 6.258548518113741e-06, "loss": 0.603, "step": 6889 }, { "epoch": 0.44, "grad_norm": 0.8546575307846069, "learning_rate": 6.257555532084489e-06, "loss": 0.5327, "step": 6890 }, { "epoch": 0.44, "grad_norm": 0.8894109725952148, "learning_rate": 6.2565624931039485e-06, "loss": 0.6208, "step": 6891 }, { "epoch": 0.44, "grad_norm": 0.8885670900344849, "learning_rate": 6.255569401213933e-06, "loss": 0.6106, "step": 6892 }, { "epoch": 0.44, "grad_norm": 0.8722066283226013, "learning_rate": 6.254576256456257e-06, "loss": 0.5616, "step": 6893 }, { "epoch": 0.44, "grad_norm": 0.8073423504829407, "learning_rate": 6.253583058872741e-06, "loss": 0.5666, "step": 6894 }, { "epoch": 0.44, "grad_norm": 0.8527199625968933, "learning_rate": 6.2525898085052005e-06, "loss": 0.5878, "step": 6895 }, { "epoch": 0.44, "grad_norm": 0.8704131841659546, "learning_rate": 6.251596505395463e-06, "loss": 0.587, "step": 6896 }, { "epoch": 0.44, "grad_norm": 0.9160160422325134, "learning_rate": 6.25060314958535e-06, "loss": 0.6092, "step": 6897 }, { "epoch": 0.44, "grad_norm": 0.9297466278076172, "learning_rate": 6.249609741116689e-06, "loss": 0.573, "step": 6898 }, { "epoch": 0.44, "grad_norm": 0.8892688155174255, "learning_rate": 6.24861628003131e-06, "loss": 0.633, "step": 6899 }, { "epoch": 0.44, "grad_norm": 0.8495330214500427, "learning_rate": 6.247622766371041e-06, "loss": 0.5785, "step": 6900 }, { "epoch": 0.44, "grad_norm": 0.8945955634117126, "learning_rate": 6.246629200177718e-06, "loss": 0.6395, "step": 6901 }, { "epoch": 0.44, "grad_norm": 0.8665342330932617, "learning_rate": 6.245635581493176e-06, "loss": 0.5672, "step": 6902 }, { "epoch": 0.44, "grad_norm": 0.9849283695220947, "learning_rate": 6.244641910359254e-06, "loss": 0.687, "step": 6903 }, { "epoch": 0.44, "grad_norm": 0.8854192495346069, "learning_rate": 6.24364818681779e-06, "loss": 0.5928, "step": 6904 }, { "epoch": 0.44, "grad_norm": 0.9012208580970764, "learning_rate": 6.242654410910628e-06, "loss": 0.5718, "step": 6905 }, { "epoch": 0.44, "grad_norm": 0.9132283329963684, "learning_rate": 6.2416605826796095e-06, "loss": 0.647, "step": 6906 }, { "epoch": 0.44, "grad_norm": 0.8352293968200684, "learning_rate": 6.240666702166587e-06, "loss": 0.6094, "step": 6907 }, { "epoch": 0.44, "grad_norm": 0.8616271615028381, "learning_rate": 6.239672769413403e-06, "loss": 0.604, "step": 6908 }, { "epoch": 0.44, "grad_norm": 0.8567937612533569, "learning_rate": 6.238678784461913e-06, "loss": 0.5418, "step": 6909 }, { "epoch": 0.44, "grad_norm": 0.8747637867927551, "learning_rate": 6.237684747353965e-06, "loss": 0.5794, "step": 6910 }, { "epoch": 0.44, "grad_norm": 0.8634200096130371, "learning_rate": 6.23669065813142e-06, "loss": 0.5864, "step": 6911 }, { "epoch": 0.44, "grad_norm": 0.8566752076148987, "learning_rate": 6.235696516836134e-06, "loss": 0.601, "step": 6912 }, { "epoch": 0.44, "grad_norm": 0.8930138945579529, "learning_rate": 6.234702323509967e-06, "loss": 0.6216, "step": 6913 }, { "epoch": 0.44, "grad_norm": 0.9066216945648193, "learning_rate": 6.233708078194778e-06, "loss": 0.6281, "step": 6914 }, { "epoch": 0.44, "grad_norm": 0.9339972138404846, "learning_rate": 6.232713780932434e-06, "loss": 0.6423, "step": 6915 }, { "epoch": 0.44, "grad_norm": 0.9028674364089966, "learning_rate": 6.231719431764804e-06, "loss": 0.593, "step": 6916 }, { "epoch": 0.44, "grad_norm": 0.8986188769340515, "learning_rate": 6.230725030733751e-06, "loss": 0.6325, "step": 6917 }, { "epoch": 0.44, "grad_norm": 0.9198395013809204, "learning_rate": 6.229730577881148e-06, "loss": 0.6486, "step": 6918 }, { "epoch": 0.44, "grad_norm": 0.8530875444412231, "learning_rate": 6.2287360732488685e-06, "loss": 0.5365, "step": 6919 }, { "epoch": 0.44, "grad_norm": 0.9224251508712769, "learning_rate": 6.227741516878789e-06, "loss": 0.6252, "step": 6920 }, { "epoch": 0.44, "grad_norm": 0.9238904118537903, "learning_rate": 6.226746908812784e-06, "loss": 0.6171, "step": 6921 }, { "epoch": 0.44, "grad_norm": 0.9478338956832886, "learning_rate": 6.225752249092734e-06, "loss": 0.6544, "step": 6922 }, { "epoch": 0.44, "grad_norm": 0.9255354404449463, "learning_rate": 6.224757537760521e-06, "loss": 0.6176, "step": 6923 }, { "epoch": 0.44, "grad_norm": 0.8838732838630676, "learning_rate": 6.2237627748580294e-06, "loss": 0.633, "step": 6924 }, { "epoch": 0.44, "grad_norm": 0.8878322243690491, "learning_rate": 6.222767960427144e-06, "loss": 0.6434, "step": 6925 }, { "epoch": 0.44, "grad_norm": 0.8358001112937927, "learning_rate": 6.221773094509753e-06, "loss": 0.6189, "step": 6926 }, { "epoch": 0.44, "grad_norm": 0.8636587858200073, "learning_rate": 6.220778177147747e-06, "loss": 0.5856, "step": 6927 }, { "epoch": 0.44, "grad_norm": 0.859220027923584, "learning_rate": 6.219783208383021e-06, "loss": 0.568, "step": 6928 }, { "epoch": 0.44, "grad_norm": 0.9622043967247009, "learning_rate": 6.218788188257465e-06, "loss": 0.5914, "step": 6929 }, { "epoch": 0.44, "grad_norm": 0.8458547592163086, "learning_rate": 6.217793116812979e-06, "loss": 0.554, "step": 6930 }, { "epoch": 0.44, "grad_norm": 0.8938260078430176, "learning_rate": 6.216797994091462e-06, "loss": 0.6286, "step": 6931 }, { "epoch": 0.44, "grad_norm": 0.8333603739738464, "learning_rate": 6.215802820134814e-06, "loss": 0.5297, "step": 6932 }, { "epoch": 0.44, "grad_norm": 0.8489833474159241, "learning_rate": 6.214807594984939e-06, "loss": 0.6139, "step": 6933 }, { "epoch": 0.44, "grad_norm": 0.9169575572013855, "learning_rate": 6.213812318683741e-06, "loss": 0.6339, "step": 6934 }, { "epoch": 0.44, "grad_norm": 0.8761480450630188, "learning_rate": 6.2128169912731295e-06, "loss": 0.6299, "step": 6935 }, { "epoch": 0.44, "grad_norm": 0.948622465133667, "learning_rate": 6.211821612795014e-06, "loss": 0.5962, "step": 6936 }, { "epoch": 0.44, "grad_norm": 0.874839186668396, "learning_rate": 6.210826183291305e-06, "loss": 0.5761, "step": 6937 }, { "epoch": 0.44, "grad_norm": 0.8137356638908386, "learning_rate": 6.209830702803918e-06, "loss": 0.5615, "step": 6938 }, { "epoch": 0.44, "grad_norm": 0.8627551794052124, "learning_rate": 6.208835171374769e-06, "loss": 0.6144, "step": 6939 }, { "epoch": 0.44, "grad_norm": 0.9175261855125427, "learning_rate": 6.207839589045777e-06, "loss": 0.578, "step": 6940 }, { "epoch": 0.44, "grad_norm": 0.8478714823722839, "learning_rate": 6.20684395585886e-06, "loss": 0.5606, "step": 6941 }, { "epoch": 0.44, "grad_norm": 0.8721091747283936, "learning_rate": 6.205848271855943e-06, "loss": 0.5986, "step": 6942 }, { "epoch": 0.44, "grad_norm": 0.9747815132141113, "learning_rate": 6.204852537078952e-06, "loss": 0.5936, "step": 6943 }, { "epoch": 0.44, "grad_norm": 0.9325621724128723, "learning_rate": 6.203856751569809e-06, "loss": 0.6673, "step": 6944 }, { "epoch": 0.44, "grad_norm": 0.8681188821792603, "learning_rate": 6.202860915370447e-06, "loss": 0.6053, "step": 6945 }, { "epoch": 0.44, "grad_norm": 0.8858415484428406, "learning_rate": 6.201865028522798e-06, "loss": 0.5277, "step": 6946 }, { "epoch": 0.44, "grad_norm": 0.9327991604804993, "learning_rate": 6.200869091068791e-06, "loss": 0.5919, "step": 6947 }, { "epoch": 0.44, "grad_norm": 0.8470887541770935, "learning_rate": 6.1998731030503655e-06, "loss": 0.5295, "step": 6948 }, { "epoch": 0.44, "grad_norm": 0.8615082502365112, "learning_rate": 6.198877064509458e-06, "loss": 0.5833, "step": 6949 }, { "epoch": 0.44, "grad_norm": 0.9464468359947205, "learning_rate": 6.1978809754880076e-06, "loss": 0.6293, "step": 6950 }, { "epoch": 0.44, "grad_norm": 0.8637322783470154, "learning_rate": 6.196884836027957e-06, "loss": 0.5632, "step": 6951 }, { "epoch": 0.44, "grad_norm": 0.8578369617462158, "learning_rate": 6.195888646171247e-06, "loss": 0.5286, "step": 6952 }, { "epoch": 0.44, "grad_norm": 0.8746157288551331, "learning_rate": 6.194892405959829e-06, "loss": 0.597, "step": 6953 }, { "epoch": 0.44, "grad_norm": 0.8710793852806091, "learning_rate": 6.193896115435648e-06, "loss": 0.603, "step": 6954 }, { "epoch": 0.44, "grad_norm": 0.8585920929908752, "learning_rate": 6.192899774640655e-06, "loss": 0.6162, "step": 6955 }, { "epoch": 0.44, "grad_norm": 0.8971749544143677, "learning_rate": 6.191903383616801e-06, "loss": 0.5941, "step": 6956 }, { "epoch": 0.44, "grad_norm": 0.8642129898071289, "learning_rate": 6.190906942406043e-06, "loss": 0.5589, "step": 6957 }, { "epoch": 0.44, "grad_norm": 0.9450658559799194, "learning_rate": 6.189910451050336e-06, "loss": 0.5831, "step": 6958 }, { "epoch": 0.44, "grad_norm": 0.9805313348770142, "learning_rate": 6.1889139095916395e-06, "loss": 0.6607, "step": 6959 }, { "epoch": 0.44, "grad_norm": 0.8744614124298096, "learning_rate": 6.187917318071914e-06, "loss": 0.6163, "step": 6960 }, { "epoch": 0.44, "grad_norm": 0.8556416034698486, "learning_rate": 6.1869206765331234e-06, "loss": 0.5893, "step": 6961 }, { "epoch": 0.44, "grad_norm": 0.8845242261886597, "learning_rate": 6.1859239850172325e-06, "loss": 0.5842, "step": 6962 }, { "epoch": 0.44, "grad_norm": 0.8819428086280823, "learning_rate": 6.1849272435662065e-06, "loss": 0.5875, "step": 6963 }, { "epoch": 0.44, "grad_norm": 0.9113361239433289, "learning_rate": 6.183930452222017e-06, "loss": 0.5681, "step": 6964 }, { "epoch": 0.44, "grad_norm": 0.8745653629302979, "learning_rate": 6.1829336110266356e-06, "loss": 0.5826, "step": 6965 }, { "epoch": 0.44, "grad_norm": 0.8720842599868774, "learning_rate": 6.181936720022033e-06, "loss": 0.6105, "step": 6966 }, { "epoch": 0.44, "grad_norm": 0.9576596617698669, "learning_rate": 6.180939779250188e-06, "loss": 0.6231, "step": 6967 }, { "epoch": 0.44, "grad_norm": 0.8861308097839355, "learning_rate": 6.179942788753077e-06, "loss": 0.6204, "step": 6968 }, { "epoch": 0.44, "grad_norm": 0.9912355542182922, "learning_rate": 6.178945748572681e-06, "loss": 0.6036, "step": 6969 }, { "epoch": 0.44, "grad_norm": 0.9015969634056091, "learning_rate": 6.177948658750979e-06, "loss": 0.5217, "step": 6970 }, { "epoch": 0.44, "grad_norm": 0.8962372541427612, "learning_rate": 6.176951519329958e-06, "loss": 0.5973, "step": 6971 }, { "epoch": 0.44, "grad_norm": 0.9305719137191772, "learning_rate": 6.1759543303516025e-06, "loss": 0.5845, "step": 6972 }, { "epoch": 0.44, "grad_norm": 0.9429194331169128, "learning_rate": 6.174957091857901e-06, "loss": 0.6336, "step": 6973 }, { "epoch": 0.44, "grad_norm": 0.8551360368728638, "learning_rate": 6.173959803890843e-06, "loss": 0.5911, "step": 6974 }, { "epoch": 0.44, "grad_norm": 0.9430440068244934, "learning_rate": 6.172962466492423e-06, "loss": 0.612, "step": 6975 }, { "epoch": 0.44, "grad_norm": 0.8664399981498718, "learning_rate": 6.171965079704634e-06, "loss": 0.6186, "step": 6976 }, { "epoch": 0.44, "grad_norm": 0.9083961248397827, "learning_rate": 6.17096764356947e-06, "loss": 0.5966, "step": 6977 }, { "epoch": 0.44, "grad_norm": 0.8894690275192261, "learning_rate": 6.169970158128935e-06, "loss": 0.6315, "step": 6978 }, { "epoch": 0.44, "grad_norm": 0.9232130646705627, "learning_rate": 6.168972623425023e-06, "loss": 0.5961, "step": 6979 }, { "epoch": 0.44, "grad_norm": 0.8410488963127136, "learning_rate": 6.167975039499744e-06, "loss": 0.5795, "step": 6980 }, { "epoch": 0.44, "grad_norm": 0.8683662414550781, "learning_rate": 6.1669774063950985e-06, "loss": 0.6774, "step": 6981 }, { "epoch": 0.44, "grad_norm": 1.0402193069458008, "learning_rate": 6.165979724153094e-06, "loss": 0.636, "step": 6982 }, { "epoch": 0.44, "grad_norm": 0.8895815014839172, "learning_rate": 6.164981992815737e-06, "loss": 0.5795, "step": 6983 }, { "epoch": 0.44, "grad_norm": 0.8773569464683533, "learning_rate": 6.163984212425043e-06, "loss": 0.5905, "step": 6984 }, { "epoch": 0.44, "grad_norm": 0.8979213833808899, "learning_rate": 6.162986383023023e-06, "loss": 0.6089, "step": 6985 }, { "epoch": 0.44, "grad_norm": 0.953054666519165, "learning_rate": 6.161988504651692e-06, "loss": 0.6058, "step": 6986 }, { "epoch": 0.44, "grad_norm": 0.8674301505088806, "learning_rate": 6.160990577353066e-06, "loss": 0.6171, "step": 6987 }, { "epoch": 0.44, "grad_norm": 0.8784220814704895, "learning_rate": 6.1599926011691695e-06, "loss": 0.5925, "step": 6988 }, { "epoch": 0.44, "grad_norm": 0.9530996084213257, "learning_rate": 6.1589945761420166e-06, "loss": 0.6118, "step": 6989 }, { "epoch": 0.44, "grad_norm": 0.892207145690918, "learning_rate": 6.157996502313635e-06, "loss": 0.5923, "step": 6990 }, { "epoch": 0.44, "grad_norm": 0.803701639175415, "learning_rate": 6.156998379726048e-06, "loss": 0.549, "step": 6991 }, { "epoch": 0.44, "grad_norm": 0.9421709775924683, "learning_rate": 6.1560002084212845e-06, "loss": 0.5709, "step": 6992 }, { "epoch": 0.44, "grad_norm": 0.8066360354423523, "learning_rate": 6.155001988441375e-06, "loss": 0.5746, "step": 6993 }, { "epoch": 0.44, "grad_norm": 0.8635882139205933, "learning_rate": 6.154003719828349e-06, "loss": 0.5782, "step": 6994 }, { "epoch": 0.44, "grad_norm": 0.8454831838607788, "learning_rate": 6.1530054026242405e-06, "loss": 0.5256, "step": 6995 }, { "epoch": 0.44, "grad_norm": 0.890565037727356, "learning_rate": 6.152007036871085e-06, "loss": 0.5331, "step": 6996 }, { "epoch": 0.44, "grad_norm": 0.8612550497055054, "learning_rate": 6.151008622610921e-06, "loss": 0.6093, "step": 6997 }, { "epoch": 0.44, "grad_norm": 0.8857212066650391, "learning_rate": 6.150010159885789e-06, "loss": 0.5336, "step": 6998 }, { "epoch": 0.44, "grad_norm": 0.9954962730407715, "learning_rate": 6.149011648737728e-06, "loss": 0.6686, "step": 6999 }, { "epoch": 0.44, "grad_norm": 0.9537789225578308, "learning_rate": 6.148013089208784e-06, "loss": 0.6394, "step": 7000 }, { "epoch": 0.44, "grad_norm": 0.872518002986908, "learning_rate": 6.1470144813410045e-06, "loss": 0.6226, "step": 7001 }, { "epoch": 0.44, "grad_norm": 0.8990280032157898, "learning_rate": 6.146015825176432e-06, "loss": 0.5554, "step": 7002 }, { "epoch": 0.44, "grad_norm": 0.8978776931762695, "learning_rate": 6.145017120757123e-06, "loss": 0.6101, "step": 7003 }, { "epoch": 0.44, "grad_norm": 0.9086971879005432, "learning_rate": 6.144018368125124e-06, "loss": 0.5648, "step": 7004 }, { "epoch": 0.44, "grad_norm": 0.8578811287879944, "learning_rate": 6.143019567322493e-06, "loss": 0.5522, "step": 7005 }, { "epoch": 0.44, "grad_norm": 0.9005651473999023, "learning_rate": 6.1420207183912824e-06, "loss": 0.6659, "step": 7006 }, { "epoch": 0.44, "grad_norm": 0.8784024715423584, "learning_rate": 6.141021821373555e-06, "loss": 0.5978, "step": 7007 }, { "epoch": 0.44, "grad_norm": 0.9198904633522034, "learning_rate": 6.140022876311367e-06, "loss": 0.5903, "step": 7008 }, { "epoch": 0.44, "grad_norm": 0.8121350407600403, "learning_rate": 6.139023883246781e-06, "loss": 0.5551, "step": 7009 }, { "epoch": 0.44, "grad_norm": 0.8870401382446289, "learning_rate": 6.1380248422218604e-06, "loss": 0.521, "step": 7010 }, { "epoch": 0.44, "grad_norm": 0.9249464869499207, "learning_rate": 6.137025753278673e-06, "loss": 0.6247, "step": 7011 }, { "epoch": 0.44, "grad_norm": 0.8742251992225647, "learning_rate": 6.1360266164592886e-06, "loss": 0.6279, "step": 7012 }, { "epoch": 0.44, "grad_norm": 0.9462286829948425, "learning_rate": 6.135027431805774e-06, "loss": 0.6258, "step": 7013 }, { "epoch": 0.44, "grad_norm": 0.8585238456726074, "learning_rate": 6.134028199360203e-06, "loss": 0.5567, "step": 7014 }, { "epoch": 0.44, "grad_norm": 0.8556974530220032, "learning_rate": 6.133028919164647e-06, "loss": 0.6523, "step": 7015 }, { "epoch": 0.44, "grad_norm": 0.8407923579216003, "learning_rate": 6.132029591261188e-06, "loss": 0.5777, "step": 7016 }, { "epoch": 0.44, "grad_norm": 0.9544344544410706, "learning_rate": 6.1310302156919e-06, "loss": 0.6063, "step": 7017 }, { "epoch": 0.44, "grad_norm": 0.9588916301727295, "learning_rate": 6.130030792498865e-06, "loss": 0.6007, "step": 7018 }, { "epoch": 0.44, "grad_norm": 0.8659964203834534, "learning_rate": 6.129031321724163e-06, "loss": 0.5848, "step": 7019 }, { "epoch": 0.44, "grad_norm": 0.9121047854423523, "learning_rate": 6.128031803409881e-06, "loss": 0.5774, "step": 7020 }, { "epoch": 0.44, "grad_norm": 0.9202248454093933, "learning_rate": 6.127032237598102e-06, "loss": 0.6225, "step": 7021 }, { "epoch": 0.44, "grad_norm": 0.8752833604812622, "learning_rate": 6.126032624330917e-06, "loss": 0.5862, "step": 7022 }, { "epoch": 0.44, "grad_norm": 0.8969300389289856, "learning_rate": 6.125032963650417e-06, "loss": 0.6297, "step": 7023 }, { "epoch": 0.45, "grad_norm": 0.880032479763031, "learning_rate": 6.124033255598691e-06, "loss": 0.5669, "step": 7024 }, { "epoch": 0.45, "grad_norm": 0.8935046195983887, "learning_rate": 6.1230335002178345e-06, "loss": 0.5929, "step": 7025 }, { "epoch": 0.45, "grad_norm": 0.9103530645370483, "learning_rate": 6.1220336975499435e-06, "loss": 0.5772, "step": 7026 }, { "epoch": 0.45, "grad_norm": 0.8994046449661255, "learning_rate": 6.121033847637119e-06, "loss": 0.63, "step": 7027 }, { "epoch": 0.45, "grad_norm": 0.8760805726051331, "learning_rate": 6.120033950521458e-06, "loss": 0.568, "step": 7028 }, { "epoch": 0.45, "grad_norm": 0.8472411632537842, "learning_rate": 6.119034006245063e-06, "loss": 0.5667, "step": 7029 }, { "epoch": 0.45, "grad_norm": 0.9165576100349426, "learning_rate": 6.118034014850039e-06, "loss": 0.6196, "step": 7030 }, { "epoch": 0.45, "grad_norm": 0.8201605677604675, "learning_rate": 6.117033976378493e-06, "loss": 0.5162, "step": 7031 }, { "epoch": 0.45, "grad_norm": 0.9203435182571411, "learning_rate": 6.116033890872531e-06, "loss": 0.558, "step": 7032 }, { "epoch": 0.45, "grad_norm": 0.8711187839508057, "learning_rate": 6.115033758374265e-06, "loss": 0.634, "step": 7033 }, { "epoch": 0.45, "grad_norm": 0.8825559616088867, "learning_rate": 6.114033578925805e-06, "loss": 0.599, "step": 7034 }, { "epoch": 0.45, "grad_norm": 0.8482293486595154, "learning_rate": 6.1130333525692684e-06, "loss": 0.583, "step": 7035 }, { "epoch": 0.45, "grad_norm": 0.9043130278587341, "learning_rate": 6.112033079346767e-06, "loss": 0.575, "step": 7036 }, { "epoch": 0.45, "grad_norm": 0.8233504295349121, "learning_rate": 6.111032759300423e-06, "loss": 0.6182, "step": 7037 }, { "epoch": 0.45, "grad_norm": 0.8660386204719543, "learning_rate": 6.110032392472354e-06, "loss": 0.5851, "step": 7038 }, { "epoch": 0.45, "grad_norm": 0.8297396302223206, "learning_rate": 6.109031978904683e-06, "loss": 0.5715, "step": 7039 }, { "epoch": 0.45, "grad_norm": 0.8824520707130432, "learning_rate": 6.108031518639532e-06, "loss": 0.6218, "step": 7040 }, { "epoch": 0.45, "grad_norm": 0.941839873790741, "learning_rate": 6.107031011719029e-06, "loss": 0.6708, "step": 7041 }, { "epoch": 0.45, "grad_norm": 0.8961352705955505, "learning_rate": 6.106030458185303e-06, "loss": 0.5851, "step": 7042 }, { "epoch": 0.45, "grad_norm": 0.9293150305747986, "learning_rate": 6.105029858080479e-06, "loss": 0.5899, "step": 7043 }, { "epoch": 0.45, "grad_norm": 0.84063720703125, "learning_rate": 6.1040292114466935e-06, "loss": 0.5756, "step": 7044 }, { "epoch": 0.45, "grad_norm": 0.8923290371894836, "learning_rate": 6.103028518326077e-06, "loss": 0.5649, "step": 7045 }, { "epoch": 0.45, "grad_norm": 0.9170678853988647, "learning_rate": 6.102027778760769e-06, "loss": 0.6111, "step": 7046 }, { "epoch": 0.45, "grad_norm": 0.9100625514984131, "learning_rate": 6.101026992792904e-06, "loss": 0.6542, "step": 7047 }, { "epoch": 0.45, "grad_norm": 0.842936635017395, "learning_rate": 6.100026160464621e-06, "loss": 0.5677, "step": 7048 }, { "epoch": 0.45, "grad_norm": 0.8089660406112671, "learning_rate": 6.099025281818065e-06, "loss": 0.5623, "step": 7049 }, { "epoch": 0.45, "grad_norm": 0.8938851952552795, "learning_rate": 6.098024356895378e-06, "loss": 0.5826, "step": 7050 }, { "epoch": 0.45, "grad_norm": 0.8716176152229309, "learning_rate": 6.097023385738704e-06, "loss": 0.5478, "step": 7051 }, { "epoch": 0.45, "grad_norm": 0.9070749878883362, "learning_rate": 6.096022368390191e-06, "loss": 0.6217, "step": 7052 }, { "epoch": 0.45, "grad_norm": 0.8996195197105408, "learning_rate": 6.0950213048919895e-06, "loss": 0.5936, "step": 7053 }, { "epoch": 0.45, "grad_norm": 0.8879945278167725, "learning_rate": 6.094020195286251e-06, "loss": 0.57, "step": 7054 }, { "epoch": 0.45, "grad_norm": 0.9025259017944336, "learning_rate": 6.093019039615128e-06, "loss": 0.6607, "step": 7055 }, { "epoch": 0.45, "grad_norm": 0.8679327368736267, "learning_rate": 6.092017837920773e-06, "loss": 0.5673, "step": 7056 }, { "epoch": 0.45, "grad_norm": 0.8866552114486694, "learning_rate": 6.091016590245347e-06, "loss": 0.5824, "step": 7057 }, { "epoch": 0.45, "grad_norm": 0.885572075843811, "learning_rate": 6.090015296631009e-06, "loss": 0.5513, "step": 7058 }, { "epoch": 0.45, "grad_norm": 0.9012414813041687, "learning_rate": 6.089013957119918e-06, "loss": 0.6531, "step": 7059 }, { "epoch": 0.45, "grad_norm": 0.833052396774292, "learning_rate": 6.088012571754236e-06, "loss": 0.5201, "step": 7060 }, { "epoch": 0.45, "grad_norm": 0.952321469783783, "learning_rate": 6.087011140576132e-06, "loss": 0.6119, "step": 7061 }, { "epoch": 0.45, "grad_norm": 0.8854734301567078, "learning_rate": 6.086009663627769e-06, "loss": 0.5879, "step": 7062 }, { "epoch": 0.45, "grad_norm": 0.899401068687439, "learning_rate": 6.085008140951318e-06, "loss": 0.5806, "step": 7063 }, { "epoch": 0.45, "grad_norm": 0.945583164691925, "learning_rate": 6.084006572588947e-06, "loss": 0.6169, "step": 7064 }, { "epoch": 0.45, "grad_norm": 0.8657901287078857, "learning_rate": 6.083004958582832e-06, "loss": 0.5392, "step": 7065 }, { "epoch": 0.45, "grad_norm": 0.8929893374443054, "learning_rate": 6.082003298975144e-06, "loss": 0.6197, "step": 7066 }, { "epoch": 0.45, "grad_norm": 0.8724581599235535, "learning_rate": 6.081001593808063e-06, "loss": 0.5692, "step": 7067 }, { "epoch": 0.45, "grad_norm": 0.9062278866767883, "learning_rate": 6.079999843123763e-06, "loss": 0.6532, "step": 7068 }, { "epoch": 0.45, "grad_norm": 0.8632785081863403, "learning_rate": 6.07899804696443e-06, "loss": 0.6221, "step": 7069 }, { "epoch": 0.45, "grad_norm": 0.9054343104362488, "learning_rate": 6.077996205372241e-06, "loss": 0.5598, "step": 7070 }, { "epoch": 0.45, "grad_norm": 0.8777742385864258, "learning_rate": 6.07699431838938e-06, "loss": 0.5767, "step": 7071 }, { "epoch": 0.45, "grad_norm": 0.8978030681610107, "learning_rate": 6.075992386058037e-06, "loss": 0.5976, "step": 7072 }, { "epoch": 0.45, "grad_norm": 0.9662826061248779, "learning_rate": 6.074990408420397e-06, "loss": 0.5744, "step": 7073 }, { "epoch": 0.45, "grad_norm": 0.8414073586463928, "learning_rate": 6.073988385518652e-06, "loss": 0.543, "step": 7074 }, { "epoch": 0.45, "grad_norm": 0.863689124584198, "learning_rate": 6.07298631739499e-06, "loss": 0.6149, "step": 7075 }, { "epoch": 0.45, "grad_norm": 0.8683016896247864, "learning_rate": 6.071984204091608e-06, "loss": 0.6363, "step": 7076 }, { "epoch": 0.45, "grad_norm": 0.9165253639221191, "learning_rate": 6.0709820456507e-06, "loss": 0.6226, "step": 7077 }, { "epoch": 0.45, "grad_norm": 0.8738973140716553, "learning_rate": 6.069979842114465e-06, "loss": 0.5733, "step": 7078 }, { "epoch": 0.45, "grad_norm": 0.954142153263092, "learning_rate": 6.068977593525098e-06, "loss": 0.625, "step": 7079 }, { "epoch": 0.45, "grad_norm": 0.8756037354469299, "learning_rate": 6.067975299924806e-06, "loss": 0.6264, "step": 7080 }, { "epoch": 0.45, "grad_norm": 0.8994114398956299, "learning_rate": 6.066972961355788e-06, "loss": 0.6367, "step": 7081 }, { "epoch": 0.45, "grad_norm": 0.8711373805999756, "learning_rate": 6.065970577860252e-06, "loss": 0.5993, "step": 7082 }, { "epoch": 0.45, "grad_norm": 0.9261775016784668, "learning_rate": 6.0649681494804014e-06, "loss": 0.619, "step": 7083 }, { "epoch": 0.45, "grad_norm": 0.8910576701164246, "learning_rate": 6.063965676258448e-06, "loss": 0.594, "step": 7084 }, { "epoch": 0.45, "grad_norm": 0.8395871520042419, "learning_rate": 6.0629631582366015e-06, "loss": 0.6077, "step": 7085 }, { "epoch": 0.45, "grad_norm": 0.8894856572151184, "learning_rate": 6.0619605954570726e-06, "loss": 0.5605, "step": 7086 }, { "epoch": 0.45, "grad_norm": 0.8762527704238892, "learning_rate": 6.060957987962077e-06, "loss": 0.5896, "step": 7087 }, { "epoch": 0.45, "grad_norm": 0.8831724524497986, "learning_rate": 6.059955335793832e-06, "loss": 0.6159, "step": 7088 }, { "epoch": 0.45, "grad_norm": 0.9416611790657043, "learning_rate": 6.0589526389945576e-06, "loss": 0.5841, "step": 7089 }, { "epoch": 0.45, "grad_norm": 0.8676833510398865, "learning_rate": 6.057949897606469e-06, "loss": 0.5974, "step": 7090 }, { "epoch": 0.45, "grad_norm": 0.8935969471931458, "learning_rate": 6.05694711167179e-06, "loss": 0.5916, "step": 7091 }, { "epoch": 0.45, "grad_norm": 0.8953608870506287, "learning_rate": 6.055944281232746e-06, "loss": 0.6164, "step": 7092 }, { "epoch": 0.45, "grad_norm": 0.9108656048774719, "learning_rate": 6.0549414063315625e-06, "loss": 0.6615, "step": 7093 }, { "epoch": 0.45, "grad_norm": 0.8927263617515564, "learning_rate": 6.053938487010464e-06, "loss": 0.5843, "step": 7094 }, { "epoch": 0.45, "grad_norm": 0.8488055467605591, "learning_rate": 6.052935523311684e-06, "loss": 0.5774, "step": 7095 }, { "epoch": 0.45, "grad_norm": 0.861768901348114, "learning_rate": 6.0519325152774515e-06, "loss": 0.5815, "step": 7096 }, { "epoch": 0.45, "grad_norm": 0.8488924503326416, "learning_rate": 6.05092946295e-06, "loss": 0.6129, "step": 7097 }, { "epoch": 0.45, "grad_norm": 0.8515621423721313, "learning_rate": 6.049926366371565e-06, "loss": 0.6088, "step": 7098 }, { "epoch": 0.45, "grad_norm": 0.8117412328720093, "learning_rate": 6.048923225584383e-06, "loss": 0.5814, "step": 7099 }, { "epoch": 0.45, "grad_norm": 0.9428633451461792, "learning_rate": 6.047920040630692e-06, "loss": 0.5821, "step": 7100 }, { "epoch": 0.45, "grad_norm": 0.8841315507888794, "learning_rate": 6.046916811552735e-06, "loss": 0.5685, "step": 7101 }, { "epoch": 0.45, "grad_norm": 0.8266769051551819, "learning_rate": 6.045913538392754e-06, "loss": 0.5903, "step": 7102 }, { "epoch": 0.45, "grad_norm": 0.9347844123840332, "learning_rate": 6.04491022119299e-06, "loss": 0.6212, "step": 7103 }, { "epoch": 0.45, "grad_norm": 0.8492763042449951, "learning_rate": 6.043906859995693e-06, "loss": 0.5411, "step": 7104 }, { "epoch": 0.45, "grad_norm": 0.8961597084999084, "learning_rate": 6.042903454843109e-06, "loss": 0.5772, "step": 7105 }, { "epoch": 0.45, "grad_norm": 0.9451767802238464, "learning_rate": 6.041900005777488e-06, "loss": 0.6845, "step": 7106 }, { "epoch": 0.45, "grad_norm": 0.8957589268684387, "learning_rate": 6.040896512841083e-06, "loss": 0.607, "step": 7107 }, { "epoch": 0.45, "grad_norm": 0.9263405203819275, "learning_rate": 6.039892976076147e-06, "loss": 0.5677, "step": 7108 }, { "epoch": 0.45, "grad_norm": 0.842929482460022, "learning_rate": 6.038889395524935e-06, "loss": 0.6016, "step": 7109 }, { "epoch": 0.45, "grad_norm": 0.9199305772781372, "learning_rate": 6.037885771229703e-06, "loss": 0.6241, "step": 7110 }, { "epoch": 0.45, "grad_norm": 0.8669660091400146, "learning_rate": 6.036882103232714e-06, "loss": 0.6024, "step": 7111 }, { "epoch": 0.45, "grad_norm": 0.8775947690010071, "learning_rate": 6.0358783915762265e-06, "loss": 0.5895, "step": 7112 }, { "epoch": 0.45, "grad_norm": 0.8761069178581238, "learning_rate": 6.034874636302502e-06, "loss": 0.5858, "step": 7113 }, { "epoch": 0.45, "grad_norm": 0.9764485955238342, "learning_rate": 6.033870837453808e-06, "loss": 0.6528, "step": 7114 }, { "epoch": 0.45, "grad_norm": 0.8723066449165344, "learning_rate": 6.0328669950724096e-06, "loss": 0.5737, "step": 7115 }, { "epoch": 0.45, "grad_norm": 0.8759384751319885, "learning_rate": 6.031863109200575e-06, "loss": 0.6642, "step": 7116 }, { "epoch": 0.45, "grad_norm": 0.8828076124191284, "learning_rate": 6.030859179880574e-06, "loss": 0.6082, "step": 7117 }, { "epoch": 0.45, "grad_norm": 0.9559153318405151, "learning_rate": 6.029855207154679e-06, "loss": 0.599, "step": 7118 }, { "epoch": 0.45, "grad_norm": 0.8825298547744751, "learning_rate": 6.0288511910651644e-06, "loss": 0.5871, "step": 7119 }, { "epoch": 0.45, "grad_norm": 0.8728659152984619, "learning_rate": 6.027847131654305e-06, "loss": 0.5783, "step": 7120 }, { "epoch": 0.45, "grad_norm": 0.9349566102027893, "learning_rate": 6.026843028964378e-06, "loss": 0.6797, "step": 7121 }, { "epoch": 0.45, "grad_norm": 0.9346398711204529, "learning_rate": 6.025838883037664e-06, "loss": 0.5802, "step": 7122 }, { "epoch": 0.45, "grad_norm": 1.0061968564987183, "learning_rate": 6.024834693916443e-06, "loss": 0.6027, "step": 7123 }, { "epoch": 0.45, "grad_norm": 0.8729983568191528, "learning_rate": 6.023830461642998e-06, "loss": 0.5871, "step": 7124 }, { "epoch": 0.45, "grad_norm": 0.8659200072288513, "learning_rate": 6.022826186259614e-06, "loss": 0.575, "step": 7125 }, { "epoch": 0.45, "grad_norm": 0.8419411182403564, "learning_rate": 6.021821867808576e-06, "loss": 0.5713, "step": 7126 }, { "epoch": 0.45, "grad_norm": 0.9160114526748657, "learning_rate": 6.0208175063321765e-06, "loss": 0.5911, "step": 7127 }, { "epoch": 0.45, "grad_norm": 0.869117259979248, "learning_rate": 6.019813101872701e-06, "loss": 0.5719, "step": 7128 }, { "epoch": 0.45, "grad_norm": 0.9373559355735779, "learning_rate": 6.018808654472445e-06, "loss": 0.5958, "step": 7129 }, { "epoch": 0.45, "grad_norm": 0.9139472246170044, "learning_rate": 6.017804164173698e-06, "loss": 0.6223, "step": 7130 }, { "epoch": 0.45, "grad_norm": 0.9792049527168274, "learning_rate": 6.0167996310187615e-06, "loss": 0.6056, "step": 7131 }, { "epoch": 0.45, "grad_norm": 0.8391651511192322, "learning_rate": 6.015795055049929e-06, "loss": 0.5411, "step": 7132 }, { "epoch": 0.45, "grad_norm": 0.9273377060890198, "learning_rate": 6.014790436309499e-06, "loss": 0.6351, "step": 7133 }, { "epoch": 0.45, "grad_norm": 0.8438581228256226, "learning_rate": 6.013785774839776e-06, "loss": 0.5491, "step": 7134 }, { "epoch": 0.45, "grad_norm": 0.858647882938385, "learning_rate": 6.012781070683058e-06, "loss": 0.6346, "step": 7135 }, { "epoch": 0.45, "grad_norm": 0.8910332918167114, "learning_rate": 6.011776323881654e-06, "loss": 0.6019, "step": 7136 }, { "epoch": 0.45, "grad_norm": 0.8092496991157532, "learning_rate": 6.0107715344778684e-06, "loss": 0.5526, "step": 7137 }, { "epoch": 0.45, "grad_norm": 0.8764297962188721, "learning_rate": 6.00976670251401e-06, "loss": 0.6015, "step": 7138 }, { "epoch": 0.45, "grad_norm": 0.8800103664398193, "learning_rate": 6.008761828032389e-06, "loss": 0.5809, "step": 7139 }, { "epoch": 0.45, "grad_norm": 0.9022515416145325, "learning_rate": 6.007756911075315e-06, "loss": 0.5433, "step": 7140 }, { "epoch": 0.45, "grad_norm": 0.8949940800666809, "learning_rate": 6.006751951685104e-06, "loss": 0.5678, "step": 7141 }, { "epoch": 0.45, "grad_norm": 0.9681234955787659, "learning_rate": 6.005746949904072e-06, "loss": 0.6141, "step": 7142 }, { "epoch": 0.45, "grad_norm": 0.863433301448822, "learning_rate": 6.004741905774533e-06, "loss": 0.6122, "step": 7143 }, { "epoch": 0.45, "grad_norm": 0.9015092253684998, "learning_rate": 6.003736819338808e-06, "loss": 0.6173, "step": 7144 }, { "epoch": 0.45, "grad_norm": 0.9383165836334229, "learning_rate": 6.0027316906392165e-06, "loss": 0.5596, "step": 7145 }, { "epoch": 0.45, "grad_norm": 0.8968831896781921, "learning_rate": 6.001726519718083e-06, "loss": 0.5773, "step": 7146 }, { "epoch": 0.45, "grad_norm": 0.9414469003677368, "learning_rate": 6.000721306617731e-06, "loss": 0.613, "step": 7147 }, { "epoch": 0.45, "grad_norm": 0.8644320368766785, "learning_rate": 5.999716051380484e-06, "loss": 0.5843, "step": 7148 }, { "epoch": 0.45, "grad_norm": 0.8745971322059631, "learning_rate": 5.998710754048674e-06, "loss": 0.6112, "step": 7149 }, { "epoch": 0.45, "grad_norm": 0.9257605075836182, "learning_rate": 5.997705414664627e-06, "loss": 0.5828, "step": 7150 }, { "epoch": 0.45, "grad_norm": 0.8571212887763977, "learning_rate": 5.996700033270676e-06, "loss": 0.5362, "step": 7151 }, { "epoch": 0.45, "grad_norm": 0.895595908164978, "learning_rate": 5.995694609909153e-06, "loss": 0.5773, "step": 7152 }, { "epoch": 0.45, "grad_norm": 0.8279756307601929, "learning_rate": 5.9946891446223955e-06, "loss": 0.5738, "step": 7153 }, { "epoch": 0.45, "grad_norm": 0.8749321699142456, "learning_rate": 5.993683637452736e-06, "loss": 0.5683, "step": 7154 }, { "epoch": 0.45, "grad_norm": 0.8859013319015503, "learning_rate": 5.992678088442518e-06, "loss": 0.6348, "step": 7155 }, { "epoch": 0.45, "grad_norm": 0.8397629857063293, "learning_rate": 5.991672497634076e-06, "loss": 0.5788, "step": 7156 }, { "epoch": 0.45, "grad_norm": 0.8651146292686462, "learning_rate": 5.990666865069759e-06, "loss": 0.5909, "step": 7157 }, { "epoch": 0.45, "grad_norm": 0.8625426888465881, "learning_rate": 5.9896611907919034e-06, "loss": 0.6283, "step": 7158 }, { "epoch": 0.45, "grad_norm": 0.8170305490493774, "learning_rate": 5.98865547484286e-06, "loss": 0.5283, "step": 7159 }, { "epoch": 0.45, "grad_norm": 0.8646724820137024, "learning_rate": 5.9876497172649704e-06, "loss": 0.5889, "step": 7160 }, { "epoch": 0.45, "grad_norm": 0.9071366786956787, "learning_rate": 5.986643918100591e-06, "loss": 0.6299, "step": 7161 }, { "epoch": 0.45, "grad_norm": 0.8904988169670105, "learning_rate": 5.985638077392066e-06, "loss": 0.5889, "step": 7162 }, { "epoch": 0.45, "grad_norm": 0.8769485950469971, "learning_rate": 5.984632195181752e-06, "loss": 0.5965, "step": 7163 }, { "epoch": 0.45, "grad_norm": 0.8972844481468201, "learning_rate": 5.983626271512e-06, "loss": 0.6024, "step": 7164 }, { "epoch": 0.45, "grad_norm": 0.92257159948349, "learning_rate": 5.982620306425167e-06, "loss": 0.6158, "step": 7165 }, { "epoch": 0.45, "grad_norm": 0.8915507793426514, "learning_rate": 5.981614299963614e-06, "loss": 0.6001, "step": 7166 }, { "epoch": 0.45, "grad_norm": 0.9429782032966614, "learning_rate": 5.9806082521696936e-06, "loss": 0.6177, "step": 7167 }, { "epoch": 0.45, "grad_norm": 0.8852347135543823, "learning_rate": 5.979602163085775e-06, "loss": 0.5969, "step": 7168 }, { "epoch": 0.45, "grad_norm": 0.880511999130249, "learning_rate": 5.978596032754215e-06, "loss": 0.5388, "step": 7169 }, { "epoch": 0.45, "grad_norm": 0.8679192066192627, "learning_rate": 5.977589861217381e-06, "loss": 0.5925, "step": 7170 }, { "epoch": 0.45, "grad_norm": 0.8661013245582581, "learning_rate": 5.9765836485176376e-06, "loss": 0.5717, "step": 7171 }, { "epoch": 0.45, "grad_norm": 0.8547639846801758, "learning_rate": 5.9755773946973546e-06, "loss": 0.6167, "step": 7172 }, { "epoch": 0.45, "grad_norm": 0.8579798936843872, "learning_rate": 5.974571099798902e-06, "loss": 0.584, "step": 7173 }, { "epoch": 0.45, "grad_norm": 0.8616194128990173, "learning_rate": 5.973564763864651e-06, "loss": 0.6371, "step": 7174 }, { "epoch": 0.45, "grad_norm": 0.8729208111763, "learning_rate": 5.972558386936973e-06, "loss": 0.6199, "step": 7175 }, { "epoch": 0.45, "grad_norm": 0.8993321657180786, "learning_rate": 5.971551969058246e-06, "loss": 0.615, "step": 7176 }, { "epoch": 0.45, "grad_norm": 0.8909555673599243, "learning_rate": 5.970545510270845e-06, "loss": 0.5902, "step": 7177 }, { "epoch": 0.45, "grad_norm": 0.8814198970794678, "learning_rate": 5.969539010617149e-06, "loss": 0.5594, "step": 7178 }, { "epoch": 0.45, "grad_norm": 0.8692460060119629, "learning_rate": 5.968532470139537e-06, "loss": 0.5863, "step": 7179 }, { "epoch": 0.45, "grad_norm": 0.8972712755203247, "learning_rate": 5.967525888880392e-06, "loss": 0.5691, "step": 7180 }, { "epoch": 0.45, "grad_norm": 0.8847329020500183, "learning_rate": 5.966519266882099e-06, "loss": 0.58, "step": 7181 }, { "epoch": 0.46, "grad_norm": 0.9059928059577942, "learning_rate": 5.965512604187041e-06, "loss": 0.5482, "step": 7182 }, { "epoch": 0.46, "grad_norm": 0.9167791604995728, "learning_rate": 5.964505900837606e-06, "loss": 0.5814, "step": 7183 }, { "epoch": 0.46, "grad_norm": 0.9062039852142334, "learning_rate": 5.963499156876182e-06, "loss": 0.5696, "step": 7184 }, { "epoch": 0.46, "grad_norm": 0.9285433888435364, "learning_rate": 5.962492372345163e-06, "loss": 0.5891, "step": 7185 }, { "epoch": 0.46, "grad_norm": 0.8977758288383484, "learning_rate": 5.961485547286936e-06, "loss": 0.634, "step": 7186 }, { "epoch": 0.46, "grad_norm": 0.8944379091262817, "learning_rate": 5.960478681743897e-06, "loss": 0.5478, "step": 7187 }, { "epoch": 0.46, "grad_norm": 0.8805668950080872, "learning_rate": 5.959471775758444e-06, "loss": 0.6422, "step": 7188 }, { "epoch": 0.46, "grad_norm": 0.8319005370140076, "learning_rate": 5.9584648293729715e-06, "loss": 0.5771, "step": 7189 }, { "epoch": 0.46, "grad_norm": 0.834413468837738, "learning_rate": 5.957457842629879e-06, "loss": 0.5732, "step": 7190 }, { "epoch": 0.46, "grad_norm": 0.8137858510017395, "learning_rate": 5.956450815571567e-06, "loss": 0.545, "step": 7191 }, { "epoch": 0.46, "grad_norm": 0.8825639486312866, "learning_rate": 5.955443748240439e-06, "loss": 0.5388, "step": 7192 }, { "epoch": 0.46, "grad_norm": 0.8979218006134033, "learning_rate": 5.9544366406789e-06, "loss": 0.6082, "step": 7193 }, { "epoch": 0.46, "grad_norm": 0.8679836392402649, "learning_rate": 5.953429492929352e-06, "loss": 0.557, "step": 7194 }, { "epoch": 0.46, "grad_norm": 0.9074422121047974, "learning_rate": 5.952422305034206e-06, "loss": 0.5523, "step": 7195 }, { "epoch": 0.46, "grad_norm": 0.8395237326622009, "learning_rate": 5.95141507703587e-06, "loss": 0.5881, "step": 7196 }, { "epoch": 0.46, "grad_norm": 0.8752167820930481, "learning_rate": 5.9504078089767545e-06, "loss": 0.6212, "step": 7197 }, { "epoch": 0.46, "grad_norm": 0.9009909629821777, "learning_rate": 5.949400500899272e-06, "loss": 0.6038, "step": 7198 }, { "epoch": 0.46, "grad_norm": 0.8272262215614319, "learning_rate": 5.948393152845837e-06, "loss": 0.492, "step": 7199 }, { "epoch": 0.46, "grad_norm": 0.9322216510772705, "learning_rate": 5.9473857648588665e-06, "loss": 0.6591, "step": 7200 }, { "epoch": 0.46, "grad_norm": 0.831906795501709, "learning_rate": 5.9463783369807775e-06, "loss": 0.601, "step": 7201 }, { "epoch": 0.46, "grad_norm": 0.844941258430481, "learning_rate": 5.945370869253987e-06, "loss": 0.5914, "step": 7202 }, { "epoch": 0.46, "grad_norm": 0.8600195050239563, "learning_rate": 5.944363361720919e-06, "loss": 0.6095, "step": 7203 }, { "epoch": 0.46, "grad_norm": 0.8606268167495728, "learning_rate": 5.943355814423996e-06, "loss": 0.5522, "step": 7204 }, { "epoch": 0.46, "grad_norm": 0.8842875361442566, "learning_rate": 5.94234822740564e-06, "loss": 0.6165, "step": 7205 }, { "epoch": 0.46, "grad_norm": 0.8753437995910645, "learning_rate": 5.941340600708279e-06, "loss": 0.5917, "step": 7206 }, { "epoch": 0.46, "grad_norm": 0.9226531386375427, "learning_rate": 5.9403329343743385e-06, "loss": 0.6226, "step": 7207 }, { "epoch": 0.46, "grad_norm": 0.9380946755409241, "learning_rate": 5.939325228446251e-06, "loss": 0.6713, "step": 7208 }, { "epoch": 0.46, "grad_norm": 0.8104608058929443, "learning_rate": 5.938317482966446e-06, "loss": 0.5834, "step": 7209 }, { "epoch": 0.46, "grad_norm": 0.8702726364135742, "learning_rate": 5.937309697977355e-06, "loss": 0.5383, "step": 7210 }, { "epoch": 0.46, "grad_norm": 0.8711553812026978, "learning_rate": 5.936301873521414e-06, "loss": 0.5848, "step": 7211 }, { "epoch": 0.46, "grad_norm": 0.9385100603103638, "learning_rate": 5.935294009641057e-06, "loss": 0.6047, "step": 7212 }, { "epoch": 0.46, "grad_norm": 0.8969570994377136, "learning_rate": 5.934286106378724e-06, "loss": 0.6154, "step": 7213 }, { "epoch": 0.46, "grad_norm": 0.9219831824302673, "learning_rate": 5.933278163776852e-06, "loss": 0.62, "step": 7214 }, { "epoch": 0.46, "grad_norm": 0.9561776518821716, "learning_rate": 5.932270181877886e-06, "loss": 0.6364, "step": 7215 }, { "epoch": 0.46, "grad_norm": 0.8352993130683899, "learning_rate": 5.9312621607242625e-06, "loss": 0.5626, "step": 7216 }, { "epoch": 0.46, "grad_norm": 0.8720530271530151, "learning_rate": 5.93025410035843e-06, "loss": 0.569, "step": 7217 }, { "epoch": 0.46, "grad_norm": 0.8734807372093201, "learning_rate": 5.929246000822835e-06, "loss": 0.6127, "step": 7218 }, { "epoch": 0.46, "grad_norm": 0.9370132088661194, "learning_rate": 5.928237862159922e-06, "loss": 0.6122, "step": 7219 }, { "epoch": 0.46, "grad_norm": 0.918322741985321, "learning_rate": 5.927229684412143e-06, "loss": 0.6148, "step": 7220 }, { "epoch": 0.46, "grad_norm": 0.8534547090530396, "learning_rate": 5.926221467621945e-06, "loss": 0.5618, "step": 7221 }, { "epoch": 0.46, "grad_norm": 0.8477325439453125, "learning_rate": 5.925213211831785e-06, "loss": 0.5562, "step": 7222 }, { "epoch": 0.46, "grad_norm": 0.881864070892334, "learning_rate": 5.924204917084116e-06, "loss": 0.5994, "step": 7223 }, { "epoch": 0.46, "grad_norm": 0.8880230784416199, "learning_rate": 5.923196583421392e-06, "loss": 0.5846, "step": 7224 }, { "epoch": 0.46, "grad_norm": 0.8969435691833496, "learning_rate": 5.922188210886071e-06, "loss": 0.57, "step": 7225 }, { "epoch": 0.46, "grad_norm": 0.8515308499336243, "learning_rate": 5.921179799520613e-06, "loss": 0.6143, "step": 7226 }, { "epoch": 0.46, "grad_norm": 0.9298543334007263, "learning_rate": 5.920171349367478e-06, "loss": 0.6082, "step": 7227 }, { "epoch": 0.46, "grad_norm": 0.8796992301940918, "learning_rate": 5.919162860469129e-06, "loss": 0.5899, "step": 7228 }, { "epoch": 0.46, "grad_norm": 0.8747338056564331, "learning_rate": 5.9181543328680295e-06, "loss": 0.6593, "step": 7229 }, { "epoch": 0.46, "grad_norm": 0.9241954684257507, "learning_rate": 5.917145766606645e-06, "loss": 0.6421, "step": 7230 }, { "epoch": 0.46, "grad_norm": 0.8967227935791016, "learning_rate": 5.9161371617274425e-06, "loss": 0.5775, "step": 7231 }, { "epoch": 0.46, "grad_norm": 0.8867378830909729, "learning_rate": 5.91512851827289e-06, "loss": 0.5834, "step": 7232 }, { "epoch": 0.46, "grad_norm": 0.8239137530326843, "learning_rate": 5.914119836285461e-06, "loss": 0.5708, "step": 7233 }, { "epoch": 0.46, "grad_norm": 0.8044520020484924, "learning_rate": 5.913111115807626e-06, "loss": 0.6005, "step": 7234 }, { "epoch": 0.46, "grad_norm": 0.8815402388572693, "learning_rate": 5.912102356881857e-06, "loss": 0.5627, "step": 7235 }, { "epoch": 0.46, "grad_norm": 0.9280535578727722, "learning_rate": 5.91109355955063e-06, "loss": 0.5705, "step": 7236 }, { "epoch": 0.46, "grad_norm": 0.8546304702758789, "learning_rate": 5.910084723856424e-06, "loss": 0.5808, "step": 7237 }, { "epoch": 0.46, "grad_norm": 0.8888003826141357, "learning_rate": 5.909075849841717e-06, "loss": 0.596, "step": 7238 }, { "epoch": 0.46, "grad_norm": 0.8333981037139893, "learning_rate": 5.908066937548987e-06, "loss": 0.5282, "step": 7239 }, { "epoch": 0.46, "grad_norm": 0.8448134660720825, "learning_rate": 5.907057987020717e-06, "loss": 0.5851, "step": 7240 }, { "epoch": 0.46, "grad_norm": 0.8403011560440063, "learning_rate": 5.906048998299392e-06, "loss": 0.5617, "step": 7241 }, { "epoch": 0.46, "grad_norm": 0.8628389835357666, "learning_rate": 5.905039971427494e-06, "loss": 0.621, "step": 7242 }, { "epoch": 0.46, "grad_norm": 0.9215841889381409, "learning_rate": 5.9040309064475136e-06, "loss": 0.6134, "step": 7243 }, { "epoch": 0.46, "grad_norm": 0.858690083026886, "learning_rate": 5.903021803401933e-06, "loss": 0.5893, "step": 7244 }, { "epoch": 0.46, "grad_norm": 0.8196396231651306, "learning_rate": 5.902012662333248e-06, "loss": 0.5492, "step": 7245 }, { "epoch": 0.46, "grad_norm": 0.8949219584465027, "learning_rate": 5.9010034832839466e-06, "loss": 0.6423, "step": 7246 }, { "epoch": 0.46, "grad_norm": 0.9517080187797546, "learning_rate": 5.899994266296525e-06, "loss": 0.6048, "step": 7247 }, { "epoch": 0.46, "grad_norm": 0.8856121897697449, "learning_rate": 5.898985011413473e-06, "loss": 0.549, "step": 7248 }, { "epoch": 0.46, "grad_norm": 0.8450676798820496, "learning_rate": 5.897975718677291e-06, "loss": 0.5636, "step": 7249 }, { "epoch": 0.46, "grad_norm": 0.8568273782730103, "learning_rate": 5.896966388130475e-06, "loss": 0.5788, "step": 7250 }, { "epoch": 0.46, "grad_norm": 0.8017422556877136, "learning_rate": 5.895957019815526e-06, "loss": 0.5543, "step": 7251 }, { "epoch": 0.46, "grad_norm": 0.9004830718040466, "learning_rate": 5.894947613774942e-06, "loss": 0.5613, "step": 7252 }, { "epoch": 0.46, "grad_norm": 0.7895128726959229, "learning_rate": 5.8939381700512275e-06, "loss": 0.5361, "step": 7253 }, { "epoch": 0.46, "grad_norm": 0.8576557040214539, "learning_rate": 5.892928688686887e-06, "loss": 0.6323, "step": 7254 }, { "epoch": 0.46, "grad_norm": 0.8445666432380676, "learning_rate": 5.891919169724426e-06, "loss": 0.5944, "step": 7255 }, { "epoch": 0.46, "grad_norm": 0.8773793578147888, "learning_rate": 5.890909613206351e-06, "loss": 0.6197, "step": 7256 }, { "epoch": 0.46, "grad_norm": 0.853339672088623, "learning_rate": 5.889900019175171e-06, "loss": 0.5519, "step": 7257 }, { "epoch": 0.46, "grad_norm": 0.9311347007751465, "learning_rate": 5.888890387673398e-06, "loss": 0.5779, "step": 7258 }, { "epoch": 0.46, "grad_norm": 0.823020339012146, "learning_rate": 5.887880718743541e-06, "loss": 0.5509, "step": 7259 }, { "epoch": 0.46, "grad_norm": 0.8491629362106323, "learning_rate": 5.886871012428117e-06, "loss": 0.5738, "step": 7260 }, { "epoch": 0.46, "grad_norm": 0.8535467982292175, "learning_rate": 5.885861268769641e-06, "loss": 0.5945, "step": 7261 }, { "epoch": 0.46, "grad_norm": 0.8396848440170288, "learning_rate": 5.8848514878106275e-06, "loss": 0.5535, "step": 7262 }, { "epoch": 0.46, "grad_norm": 0.9186087250709534, "learning_rate": 5.883841669593595e-06, "loss": 0.6172, "step": 7263 }, { "epoch": 0.46, "grad_norm": 0.877123236656189, "learning_rate": 5.882831814161065e-06, "loss": 0.6176, "step": 7264 }, { "epoch": 0.46, "grad_norm": 0.8525793552398682, "learning_rate": 5.881821921555559e-06, "loss": 0.5688, "step": 7265 }, { "epoch": 0.46, "grad_norm": 0.8865832090377808, "learning_rate": 5.880811991819601e-06, "loss": 0.6026, "step": 7266 }, { "epoch": 0.46, "grad_norm": 0.9039906859397888, "learning_rate": 5.879802024995712e-06, "loss": 0.6023, "step": 7267 }, { "epoch": 0.46, "grad_norm": 0.9067119359970093, "learning_rate": 5.878792021126421e-06, "loss": 0.6153, "step": 7268 }, { "epoch": 0.46, "grad_norm": 0.8938568830490112, "learning_rate": 5.877781980254255e-06, "loss": 0.6366, "step": 7269 }, { "epoch": 0.46, "grad_norm": 0.8254221081733704, "learning_rate": 5.876771902421743e-06, "loss": 0.5323, "step": 7270 }, { "epoch": 0.46, "grad_norm": 0.9549217820167542, "learning_rate": 5.875761787671416e-06, "loss": 0.6151, "step": 7271 }, { "epoch": 0.46, "grad_norm": 0.8977713584899902, "learning_rate": 5.874751636045808e-06, "loss": 0.5451, "step": 7272 }, { "epoch": 0.46, "grad_norm": 0.8796578645706177, "learning_rate": 5.873741447587451e-06, "loss": 0.5895, "step": 7273 }, { "epoch": 0.46, "grad_norm": 0.8962649703025818, "learning_rate": 5.8727312223388814e-06, "loss": 0.632, "step": 7274 }, { "epoch": 0.46, "grad_norm": 0.8637465238571167, "learning_rate": 5.871720960342635e-06, "loss": 0.6002, "step": 7275 }, { "epoch": 0.46, "grad_norm": 0.8970744013786316, "learning_rate": 5.870710661641252e-06, "loss": 0.551, "step": 7276 }, { "epoch": 0.46, "grad_norm": 0.8728744387626648, "learning_rate": 5.869700326277273e-06, "loss": 0.6214, "step": 7277 }, { "epoch": 0.46, "grad_norm": 0.8638222217559814, "learning_rate": 5.868689954293239e-06, "loss": 0.583, "step": 7278 }, { "epoch": 0.46, "grad_norm": 0.8397230505943298, "learning_rate": 5.86767954573169e-06, "loss": 0.5571, "step": 7279 }, { "epoch": 0.46, "grad_norm": 0.921284556388855, "learning_rate": 5.866669100635176e-06, "loss": 0.6216, "step": 7280 }, { "epoch": 0.46, "grad_norm": 0.9188320636749268, "learning_rate": 5.865658619046242e-06, "loss": 0.5926, "step": 7281 }, { "epoch": 0.46, "grad_norm": 0.831984281539917, "learning_rate": 5.864648101007433e-06, "loss": 0.5531, "step": 7282 }, { "epoch": 0.46, "grad_norm": 0.8987353444099426, "learning_rate": 5.863637546561301e-06, "loss": 0.5643, "step": 7283 }, { "epoch": 0.46, "grad_norm": 0.9372497200965881, "learning_rate": 5.862626955750397e-06, "loss": 0.6315, "step": 7284 }, { "epoch": 0.46, "grad_norm": 0.8938281536102295, "learning_rate": 5.8616163286172726e-06, "loss": 0.6466, "step": 7285 }, { "epoch": 0.46, "grad_norm": 0.8691145777702332, "learning_rate": 5.8606056652044805e-06, "loss": 0.5714, "step": 7286 }, { "epoch": 0.46, "grad_norm": 0.9212241172790527, "learning_rate": 5.859594965554579e-06, "loss": 0.6383, "step": 7287 }, { "epoch": 0.46, "grad_norm": 0.8839470148086548, "learning_rate": 5.858584229710124e-06, "loss": 0.6086, "step": 7288 }, { "epoch": 0.46, "grad_norm": 0.8715324401855469, "learning_rate": 5.857573457713674e-06, "loss": 0.5746, "step": 7289 }, { "epoch": 0.46, "grad_norm": 0.8384736776351929, "learning_rate": 5.856562649607788e-06, "loss": 0.6008, "step": 7290 }, { "epoch": 0.46, "grad_norm": 0.9453056454658508, "learning_rate": 5.855551805435028e-06, "loss": 0.589, "step": 7291 }, { "epoch": 0.46, "grad_norm": 0.8720511198043823, "learning_rate": 5.854540925237959e-06, "loss": 0.622, "step": 7292 }, { "epoch": 0.46, "grad_norm": 0.950019896030426, "learning_rate": 5.853530009059144e-06, "loss": 0.6021, "step": 7293 }, { "epoch": 0.46, "grad_norm": 0.9160835146903992, "learning_rate": 5.852519056941149e-06, "loss": 0.5935, "step": 7294 }, { "epoch": 0.46, "grad_norm": 0.8492597937583923, "learning_rate": 5.851508068926542e-06, "loss": 0.5688, "step": 7295 }, { "epoch": 0.46, "grad_norm": 0.947134792804718, "learning_rate": 5.850497045057895e-06, "loss": 0.6288, "step": 7296 }, { "epoch": 0.46, "grad_norm": 0.8388863205909729, "learning_rate": 5.849485985377774e-06, "loss": 0.6021, "step": 7297 }, { "epoch": 0.46, "grad_norm": 0.886326014995575, "learning_rate": 5.848474889928753e-06, "loss": 0.5793, "step": 7298 }, { "epoch": 0.46, "grad_norm": 0.9255046248435974, "learning_rate": 5.8474637587534065e-06, "loss": 0.5786, "step": 7299 }, { "epoch": 0.46, "grad_norm": 0.9360898733139038, "learning_rate": 5.84645259189431e-06, "loss": 0.6297, "step": 7300 }, { "epoch": 0.46, "grad_norm": 0.9143325686454773, "learning_rate": 5.845441389394039e-06, "loss": 0.631, "step": 7301 }, { "epoch": 0.46, "grad_norm": 0.8604922294616699, "learning_rate": 5.844430151295171e-06, "loss": 0.5886, "step": 7302 }, { "epoch": 0.46, "grad_norm": 0.8875581622123718, "learning_rate": 5.843418877640289e-06, "loss": 0.6584, "step": 7303 }, { "epoch": 0.46, "grad_norm": 0.8978346586227417, "learning_rate": 5.842407568471971e-06, "loss": 0.6007, "step": 7304 }, { "epoch": 0.46, "grad_norm": 0.8847493529319763, "learning_rate": 5.8413962238328e-06, "loss": 0.5763, "step": 7305 }, { "epoch": 0.46, "grad_norm": 0.8711661696434021, "learning_rate": 5.840384843765361e-06, "loss": 0.5876, "step": 7306 }, { "epoch": 0.46, "grad_norm": 1.0056418180465698, "learning_rate": 5.839373428312242e-06, "loss": 0.5685, "step": 7307 }, { "epoch": 0.46, "grad_norm": 0.9755017161369324, "learning_rate": 5.838361977516026e-06, "loss": 0.6263, "step": 7308 }, { "epoch": 0.46, "grad_norm": 0.9591142535209656, "learning_rate": 5.837350491419304e-06, "loss": 0.6817, "step": 7309 }, { "epoch": 0.46, "grad_norm": 0.8851566314697266, "learning_rate": 5.836338970064664e-06, "loss": 0.5556, "step": 7310 }, { "epoch": 0.46, "grad_norm": 0.9328012466430664, "learning_rate": 5.835327413494702e-06, "loss": 0.634, "step": 7311 }, { "epoch": 0.46, "grad_norm": 0.8392686247825623, "learning_rate": 5.834315821752008e-06, "loss": 0.609, "step": 7312 }, { "epoch": 0.46, "grad_norm": 0.9183607697486877, "learning_rate": 5.833304194879176e-06, "loss": 0.6487, "step": 7313 }, { "epoch": 0.46, "grad_norm": 0.8311749696731567, "learning_rate": 5.832292532918804e-06, "loss": 0.5665, "step": 7314 }, { "epoch": 0.46, "grad_norm": 0.7938551902770996, "learning_rate": 5.831280835913489e-06, "loss": 0.5891, "step": 7315 }, { "epoch": 0.46, "grad_norm": 0.8665035963058472, "learning_rate": 5.83026910390583e-06, "loss": 0.6339, "step": 7316 }, { "epoch": 0.46, "grad_norm": 0.8929190039634705, "learning_rate": 5.829257336938427e-06, "loss": 0.6195, "step": 7317 }, { "epoch": 0.46, "grad_norm": 0.948819637298584, "learning_rate": 5.8282455350538815e-06, "loss": 0.6272, "step": 7318 }, { "epoch": 0.46, "grad_norm": 0.8194432854652405, "learning_rate": 5.827233698294799e-06, "loss": 0.6038, "step": 7319 }, { "epoch": 0.46, "grad_norm": 0.8802596926689148, "learning_rate": 5.826221826703783e-06, "loss": 0.6913, "step": 7320 }, { "epoch": 0.46, "grad_norm": 0.8479204177856445, "learning_rate": 5.825209920323438e-06, "loss": 0.5725, "step": 7321 }, { "epoch": 0.46, "grad_norm": 0.8597609400749207, "learning_rate": 5.824197979196377e-06, "loss": 0.595, "step": 7322 }, { "epoch": 0.46, "grad_norm": 0.874144434928894, "learning_rate": 5.823186003365205e-06, "loss": 0.6125, "step": 7323 }, { "epoch": 0.46, "grad_norm": 0.904408872127533, "learning_rate": 5.822173992872534e-06, "loss": 0.5542, "step": 7324 }, { "epoch": 0.46, "grad_norm": 0.8976813554763794, "learning_rate": 5.821161947760975e-06, "loss": 0.5595, "step": 7325 }, { "epoch": 0.46, "grad_norm": 0.9583491683006287, "learning_rate": 5.820149868073145e-06, "loss": 0.7058, "step": 7326 }, { "epoch": 0.46, "grad_norm": 0.8859381675720215, "learning_rate": 5.819137753851656e-06, "loss": 0.6169, "step": 7327 }, { "epoch": 0.46, "grad_norm": 0.8623588681221008, "learning_rate": 5.8181256051391276e-06, "loss": 0.5796, "step": 7328 }, { "epoch": 0.46, "grad_norm": 0.9704835414886475, "learning_rate": 5.817113421978173e-06, "loss": 0.6314, "step": 7329 }, { "epoch": 0.46, "grad_norm": 0.8964600563049316, "learning_rate": 5.816101204411417e-06, "loss": 0.5712, "step": 7330 }, { "epoch": 0.46, "grad_norm": 0.8039814233779907, "learning_rate": 5.815088952481478e-06, "loss": 0.5073, "step": 7331 }, { "epoch": 0.46, "grad_norm": 0.8570845723152161, "learning_rate": 5.814076666230978e-06, "loss": 0.613, "step": 7332 }, { "epoch": 0.46, "grad_norm": 0.8780226707458496, "learning_rate": 5.813064345702542e-06, "loss": 0.5941, "step": 7333 }, { "epoch": 0.46, "grad_norm": 0.9203137159347534, "learning_rate": 5.812051990938794e-06, "loss": 0.5627, "step": 7334 }, { "epoch": 0.46, "grad_norm": 0.9094358682632446, "learning_rate": 5.811039601982363e-06, "loss": 0.6046, "step": 7335 }, { "epoch": 0.46, "grad_norm": 0.8867512345314026, "learning_rate": 5.810027178875875e-06, "loss": 0.5973, "step": 7336 }, { "epoch": 0.46, "grad_norm": 0.8854659795761108, "learning_rate": 5.809014721661961e-06, "loss": 0.6152, "step": 7337 }, { "epoch": 0.46, "grad_norm": 0.8991286158561707, "learning_rate": 5.808002230383249e-06, "loss": 0.6124, "step": 7338 }, { "epoch": 0.46, "grad_norm": 0.8840140104293823, "learning_rate": 5.806989705082377e-06, "loss": 0.5494, "step": 7339 }, { "epoch": 0.47, "grad_norm": 0.8739617466926575, "learning_rate": 5.805977145801975e-06, "loss": 0.6322, "step": 7340 }, { "epoch": 0.47, "grad_norm": 0.8306631445884705, "learning_rate": 5.8049645525846785e-06, "loss": 0.526, "step": 7341 }, { "epoch": 0.47, "grad_norm": 0.8292911648750305, "learning_rate": 5.8039519254731245e-06, "loss": 0.5709, "step": 7342 }, { "epoch": 0.47, "grad_norm": 0.8418349623680115, "learning_rate": 5.802939264509954e-06, "loss": 0.6032, "step": 7343 }, { "epoch": 0.47, "grad_norm": 0.906843364238739, "learning_rate": 5.801926569737802e-06, "loss": 0.5835, "step": 7344 }, { "epoch": 0.47, "grad_norm": 0.8574205040931702, "learning_rate": 5.800913841199312e-06, "loss": 0.612, "step": 7345 }, { "epoch": 0.47, "grad_norm": 0.9067574739456177, "learning_rate": 5.799901078937127e-06, "loss": 0.5773, "step": 7346 }, { "epoch": 0.47, "grad_norm": 0.894777238368988, "learning_rate": 5.798888282993891e-06, "loss": 0.5373, "step": 7347 }, { "epoch": 0.47, "grad_norm": 0.9085848331451416, "learning_rate": 5.7978754534122465e-06, "loss": 0.5839, "step": 7348 }, { "epoch": 0.47, "grad_norm": 0.8634418249130249, "learning_rate": 5.7968625902348445e-06, "loss": 0.5919, "step": 7349 }, { "epoch": 0.47, "grad_norm": 0.8625919818878174, "learning_rate": 5.7958496935043296e-06, "loss": 0.5809, "step": 7350 }, { "epoch": 0.47, "grad_norm": 0.8647257089614868, "learning_rate": 5.794836763263353e-06, "loss": 0.6084, "step": 7351 }, { "epoch": 0.47, "grad_norm": 0.877373456954956, "learning_rate": 5.793823799554564e-06, "loss": 0.5515, "step": 7352 }, { "epoch": 0.47, "grad_norm": 0.8372183442115784, "learning_rate": 5.792810802420618e-06, "loss": 0.6313, "step": 7353 }, { "epoch": 0.47, "grad_norm": 0.9380940198898315, "learning_rate": 5.791797771904168e-06, "loss": 0.5946, "step": 7354 }, { "epoch": 0.47, "grad_norm": 0.9767922759056091, "learning_rate": 5.790784708047866e-06, "loss": 0.6452, "step": 7355 }, { "epoch": 0.47, "grad_norm": 0.8654528856277466, "learning_rate": 5.789771610894371e-06, "loss": 0.5671, "step": 7356 }, { "epoch": 0.47, "grad_norm": 0.8537338972091675, "learning_rate": 5.7887584804863414e-06, "loss": 0.5607, "step": 7357 }, { "epoch": 0.47, "grad_norm": 0.879467785358429, "learning_rate": 5.787745316866438e-06, "loss": 0.5736, "step": 7358 }, { "epoch": 0.47, "grad_norm": 0.8996354341506958, "learning_rate": 5.786732120077318e-06, "loss": 0.5326, "step": 7359 }, { "epoch": 0.47, "grad_norm": 0.9491006135940552, "learning_rate": 5.7857188901616444e-06, "loss": 0.617, "step": 7360 }, { "epoch": 0.47, "grad_norm": 0.9167372584342957, "learning_rate": 5.7847056271620815e-06, "loss": 0.5909, "step": 7361 }, { "epoch": 0.47, "grad_norm": 0.88127201795578, "learning_rate": 5.783692331121296e-06, "loss": 0.6109, "step": 7362 }, { "epoch": 0.47, "grad_norm": 0.9358324408531189, "learning_rate": 5.7826790020819525e-06, "loss": 0.6228, "step": 7363 }, { "epoch": 0.47, "grad_norm": 0.8748879432678223, "learning_rate": 5.781665640086719e-06, "loss": 0.5388, "step": 7364 }, { "epoch": 0.47, "grad_norm": 0.9955645203590393, "learning_rate": 5.780652245178263e-06, "loss": 0.5945, "step": 7365 }, { "epoch": 0.47, "grad_norm": 1.0095912218093872, "learning_rate": 5.779638817399259e-06, "loss": 0.5464, "step": 7366 }, { "epoch": 0.47, "grad_norm": 0.8746544718742371, "learning_rate": 5.778625356792376e-06, "loss": 0.5783, "step": 7367 }, { "epoch": 0.47, "grad_norm": 0.8243803381919861, "learning_rate": 5.7776118634002865e-06, "loss": 0.4783, "step": 7368 }, { "epoch": 0.47, "grad_norm": 0.9888680577278137, "learning_rate": 5.776598337265668e-06, "loss": 0.5734, "step": 7369 }, { "epoch": 0.47, "grad_norm": 0.8647724986076355, "learning_rate": 5.775584778431194e-06, "loss": 0.6255, "step": 7370 }, { "epoch": 0.47, "grad_norm": 0.9031562805175781, "learning_rate": 5.774571186939543e-06, "loss": 0.5906, "step": 7371 }, { "epoch": 0.47, "grad_norm": 0.9837010502815247, "learning_rate": 5.773557562833394e-06, "loss": 0.6282, "step": 7372 }, { "epoch": 0.47, "grad_norm": 0.9058972597122192, "learning_rate": 5.772543906155429e-06, "loss": 0.6202, "step": 7373 }, { "epoch": 0.47, "grad_norm": 0.8994501233100891, "learning_rate": 5.7715302169483254e-06, "loss": 0.5639, "step": 7374 }, { "epoch": 0.47, "grad_norm": 0.8851544260978699, "learning_rate": 5.770516495254769e-06, "loss": 0.6659, "step": 7375 }, { "epoch": 0.47, "grad_norm": 0.93473881483078, "learning_rate": 5.769502741117443e-06, "loss": 0.6339, "step": 7376 }, { "epoch": 0.47, "grad_norm": 0.8682790398597717, "learning_rate": 5.7684889545790346e-06, "loss": 0.5733, "step": 7377 }, { "epoch": 0.47, "grad_norm": 0.9060890078544617, "learning_rate": 5.767475135682228e-06, "loss": 0.604, "step": 7378 }, { "epoch": 0.47, "grad_norm": 0.862415075302124, "learning_rate": 5.766461284469714e-06, "loss": 0.6114, "step": 7379 }, { "epoch": 0.47, "grad_norm": 0.9211068153381348, "learning_rate": 5.765447400984182e-06, "loss": 0.6212, "step": 7380 }, { "epoch": 0.47, "grad_norm": 0.9584636092185974, "learning_rate": 5.7644334852683236e-06, "loss": 0.6299, "step": 7381 }, { "epoch": 0.47, "grad_norm": 0.8537229299545288, "learning_rate": 5.763419537364828e-06, "loss": 0.5601, "step": 7382 }, { "epoch": 0.47, "grad_norm": 0.9321445822715759, "learning_rate": 5.762405557316393e-06, "loss": 0.598, "step": 7383 }, { "epoch": 0.47, "grad_norm": 0.9108582735061646, "learning_rate": 5.761391545165713e-06, "loss": 0.5775, "step": 7384 }, { "epoch": 0.47, "grad_norm": 0.903030514717102, "learning_rate": 5.760377500955483e-06, "loss": 0.622, "step": 7385 }, { "epoch": 0.47, "grad_norm": 0.88475102186203, "learning_rate": 5.759363424728401e-06, "loss": 0.6028, "step": 7386 }, { "epoch": 0.47, "grad_norm": 0.952921986579895, "learning_rate": 5.758349316527166e-06, "loss": 0.6262, "step": 7387 }, { "epoch": 0.47, "grad_norm": 0.867470383644104, "learning_rate": 5.7573351763944815e-06, "loss": 0.5876, "step": 7388 }, { "epoch": 0.47, "grad_norm": 0.8601709604263306, "learning_rate": 5.756321004373047e-06, "loss": 0.5715, "step": 7389 }, { "epoch": 0.47, "grad_norm": 0.910184919834137, "learning_rate": 5.755306800505564e-06, "loss": 0.6218, "step": 7390 }, { "epoch": 0.47, "grad_norm": 0.9151275157928467, "learning_rate": 5.754292564834741e-06, "loss": 0.6137, "step": 7391 }, { "epoch": 0.47, "grad_norm": 0.9013386964797974, "learning_rate": 5.753278297403282e-06, "loss": 0.6175, "step": 7392 }, { "epoch": 0.47, "grad_norm": 0.8471426367759705, "learning_rate": 5.752263998253893e-06, "loss": 0.5546, "step": 7393 }, { "epoch": 0.47, "grad_norm": 0.9509868025779724, "learning_rate": 5.751249667429285e-06, "loss": 0.6276, "step": 7394 }, { "epoch": 0.47, "grad_norm": 0.9001954793930054, "learning_rate": 5.7502353049721674e-06, "loss": 0.595, "step": 7395 }, { "epoch": 0.47, "grad_norm": 0.8812659382820129, "learning_rate": 5.74922091092525e-06, "loss": 0.5687, "step": 7396 }, { "epoch": 0.47, "grad_norm": 0.8915801644325256, "learning_rate": 5.748206485331247e-06, "loss": 0.6137, "step": 7397 }, { "epoch": 0.47, "grad_norm": 0.8607172966003418, "learning_rate": 5.747192028232872e-06, "loss": 0.5964, "step": 7398 }, { "epoch": 0.47, "grad_norm": 0.9319098591804504, "learning_rate": 5.746177539672841e-06, "loss": 0.5956, "step": 7399 }, { "epoch": 0.47, "grad_norm": 0.9447979927062988, "learning_rate": 5.745163019693867e-06, "loss": 0.5742, "step": 7400 }, { "epoch": 0.47, "grad_norm": 0.9158990979194641, "learning_rate": 5.744148468338671e-06, "loss": 0.6162, "step": 7401 }, { "epoch": 0.47, "grad_norm": 0.9429414868354797, "learning_rate": 5.743133885649972e-06, "loss": 0.6195, "step": 7402 }, { "epoch": 0.47, "grad_norm": 0.8871979117393494, "learning_rate": 5.742119271670491e-06, "loss": 0.5716, "step": 7403 }, { "epoch": 0.47, "grad_norm": 0.9028074741363525, "learning_rate": 5.741104626442948e-06, "loss": 0.6184, "step": 7404 }, { "epoch": 0.47, "grad_norm": 0.8605913519859314, "learning_rate": 5.740089950010068e-06, "loss": 0.5238, "step": 7405 }, { "epoch": 0.47, "grad_norm": 0.8414510488510132, "learning_rate": 5.739075242414575e-06, "loss": 0.5995, "step": 7406 }, { "epoch": 0.47, "grad_norm": 0.851617157459259, "learning_rate": 5.738060503699194e-06, "loss": 0.5505, "step": 7407 }, { "epoch": 0.47, "grad_norm": 0.9584022164344788, "learning_rate": 5.737045733906653e-06, "loss": 0.5901, "step": 7408 }, { "epoch": 0.47, "grad_norm": 0.9968786239624023, "learning_rate": 5.7360309330796805e-06, "loss": 0.6653, "step": 7409 }, { "epoch": 0.47, "grad_norm": 0.9742656350135803, "learning_rate": 5.735016101261005e-06, "loss": 0.6075, "step": 7410 }, { "epoch": 0.47, "grad_norm": 0.8474623560905457, "learning_rate": 5.7340012384933595e-06, "loss": 0.6144, "step": 7411 }, { "epoch": 0.47, "grad_norm": 0.9446218609809875, "learning_rate": 5.732986344819475e-06, "loss": 0.6079, "step": 7412 }, { "epoch": 0.47, "grad_norm": 0.842605710029602, "learning_rate": 5.731971420282085e-06, "loss": 0.5098, "step": 7413 }, { "epoch": 0.47, "grad_norm": 0.9913969039916992, "learning_rate": 5.730956464923926e-06, "loss": 0.594, "step": 7414 }, { "epoch": 0.47, "grad_norm": 0.8629537224769592, "learning_rate": 5.729941478787732e-06, "loss": 0.5961, "step": 7415 }, { "epoch": 0.47, "grad_norm": 0.897515058517456, "learning_rate": 5.728926461916242e-06, "loss": 0.5481, "step": 7416 }, { "epoch": 0.47, "grad_norm": 0.8936898708343506, "learning_rate": 5.727911414352192e-06, "loss": 0.5766, "step": 7417 }, { "epoch": 0.47, "grad_norm": 0.936795711517334, "learning_rate": 5.726896336138328e-06, "loss": 0.6159, "step": 7418 }, { "epoch": 0.47, "grad_norm": 0.83265620470047, "learning_rate": 5.725881227317386e-06, "loss": 0.5623, "step": 7419 }, { "epoch": 0.47, "grad_norm": 0.8322146534919739, "learning_rate": 5.724866087932113e-06, "loss": 0.5664, "step": 7420 }, { "epoch": 0.47, "grad_norm": 0.8756260871887207, "learning_rate": 5.723850918025246e-06, "loss": 0.5767, "step": 7421 }, { "epoch": 0.47, "grad_norm": 0.9313575029373169, "learning_rate": 5.722835717639539e-06, "loss": 0.5878, "step": 7422 }, { "epoch": 0.47, "grad_norm": 0.9568567872047424, "learning_rate": 5.721820486817733e-06, "loss": 0.5955, "step": 7423 }, { "epoch": 0.47, "grad_norm": 0.8222607970237732, "learning_rate": 5.720805225602579e-06, "loss": 0.5706, "step": 7424 }, { "epoch": 0.47, "grad_norm": 0.8587454557418823, "learning_rate": 5.719789934036821e-06, "loss": 0.5963, "step": 7425 }, { "epoch": 0.47, "grad_norm": 0.8979743123054504, "learning_rate": 5.718774612163216e-06, "loss": 0.5787, "step": 7426 }, { "epoch": 0.47, "grad_norm": 0.8900598883628845, "learning_rate": 5.717759260024511e-06, "loss": 0.6332, "step": 7427 }, { "epoch": 0.47, "grad_norm": 0.8718476891517639, "learning_rate": 5.716743877663462e-06, "loss": 0.5851, "step": 7428 }, { "epoch": 0.47, "grad_norm": 0.9177529215812683, "learning_rate": 5.715728465122821e-06, "loss": 0.6083, "step": 7429 }, { "epoch": 0.47, "grad_norm": 0.9315070509910583, "learning_rate": 5.714713022445344e-06, "loss": 0.6009, "step": 7430 }, { "epoch": 0.47, "grad_norm": 0.8802310228347778, "learning_rate": 5.713697549673788e-06, "loss": 0.5769, "step": 7431 }, { "epoch": 0.47, "grad_norm": 0.9262186884880066, "learning_rate": 5.712682046850909e-06, "loss": 0.5989, "step": 7432 }, { "epoch": 0.47, "grad_norm": 0.939360499382019, "learning_rate": 5.711666514019472e-06, "loss": 0.6666, "step": 7433 }, { "epoch": 0.47, "grad_norm": 0.8618243336677551, "learning_rate": 5.710650951222231e-06, "loss": 0.5733, "step": 7434 }, { "epoch": 0.47, "grad_norm": 0.8601074814796448, "learning_rate": 5.709635358501952e-06, "loss": 0.5871, "step": 7435 }, { "epoch": 0.47, "grad_norm": 0.9362636804580688, "learning_rate": 5.708619735901394e-06, "loss": 0.573, "step": 7436 }, { "epoch": 0.47, "grad_norm": 0.8923870325088501, "learning_rate": 5.707604083463327e-06, "loss": 0.5884, "step": 7437 }, { "epoch": 0.47, "grad_norm": 0.9044986367225647, "learning_rate": 5.706588401230513e-06, "loss": 0.573, "step": 7438 }, { "epoch": 0.47, "grad_norm": 0.8761860728263855, "learning_rate": 5.70557268924572e-06, "loss": 0.6136, "step": 7439 }, { "epoch": 0.47, "grad_norm": 0.8322923183441162, "learning_rate": 5.7045569475517126e-06, "loss": 0.5395, "step": 7440 }, { "epoch": 0.47, "grad_norm": 0.9440213441848755, "learning_rate": 5.703541176191266e-06, "loss": 0.6287, "step": 7441 }, { "epoch": 0.47, "grad_norm": 0.815701425075531, "learning_rate": 5.702525375207147e-06, "loss": 0.536, "step": 7442 }, { "epoch": 0.47, "grad_norm": 0.9086024761199951, "learning_rate": 5.70150954464213e-06, "loss": 0.537, "step": 7443 }, { "epoch": 0.47, "grad_norm": 0.9420557618141174, "learning_rate": 5.700493684538984e-06, "loss": 0.5822, "step": 7444 }, { "epoch": 0.47, "grad_norm": 0.8448433876037598, "learning_rate": 5.699477794940487e-06, "loss": 0.599, "step": 7445 }, { "epoch": 0.47, "grad_norm": 0.8653044104576111, "learning_rate": 5.698461875889414e-06, "loss": 0.5989, "step": 7446 }, { "epoch": 0.47, "grad_norm": 0.8957589864730835, "learning_rate": 5.6974459274285395e-06, "loss": 0.6335, "step": 7447 }, { "epoch": 0.47, "grad_norm": 0.8396274447441101, "learning_rate": 5.696429949600643e-06, "loss": 0.5672, "step": 7448 }, { "epoch": 0.47, "grad_norm": 0.900188148021698, "learning_rate": 5.695413942448505e-06, "loss": 0.5975, "step": 7449 }, { "epoch": 0.47, "grad_norm": 0.8910940289497375, "learning_rate": 5.694397906014907e-06, "loss": 0.5868, "step": 7450 }, { "epoch": 0.47, "grad_norm": 0.8663356900215149, "learning_rate": 5.693381840342626e-06, "loss": 0.5789, "step": 7451 }, { "epoch": 0.47, "grad_norm": 0.8898122310638428, "learning_rate": 5.692365745474448e-06, "loss": 0.595, "step": 7452 }, { "epoch": 0.47, "grad_norm": 0.7971364259719849, "learning_rate": 5.691349621453158e-06, "loss": 0.5529, "step": 7453 }, { "epoch": 0.47, "grad_norm": 0.813381552696228, "learning_rate": 5.6903334683215416e-06, "loss": 0.5635, "step": 7454 }, { "epoch": 0.47, "grad_norm": 0.9315560460090637, "learning_rate": 5.689317286122382e-06, "loss": 0.6557, "step": 7455 }, { "epoch": 0.47, "grad_norm": 0.9134712815284729, "learning_rate": 5.68830107489847e-06, "loss": 0.5853, "step": 7456 }, { "epoch": 0.47, "grad_norm": 0.966414749622345, "learning_rate": 5.687284834692595e-06, "loss": 0.5889, "step": 7457 }, { "epoch": 0.47, "grad_norm": 0.9134582281112671, "learning_rate": 5.686268565547547e-06, "loss": 0.6798, "step": 7458 }, { "epoch": 0.47, "grad_norm": 0.8649297952651978, "learning_rate": 5.685252267506116e-06, "loss": 0.5932, "step": 7459 }, { "epoch": 0.47, "grad_norm": 0.94404536485672, "learning_rate": 5.6842359406110945e-06, "loss": 0.6098, "step": 7460 }, { "epoch": 0.47, "grad_norm": 0.8642643094062805, "learning_rate": 5.683219584905281e-06, "loss": 0.5948, "step": 7461 }, { "epoch": 0.47, "grad_norm": 0.9007630944252014, "learning_rate": 5.682203200431465e-06, "loss": 0.5938, "step": 7462 }, { "epoch": 0.47, "grad_norm": 0.901642918586731, "learning_rate": 5.6811867872324465e-06, "loss": 0.6043, "step": 7463 }, { "epoch": 0.47, "grad_norm": 0.9297770857810974, "learning_rate": 5.680170345351021e-06, "loss": 0.6118, "step": 7464 }, { "epoch": 0.47, "grad_norm": 0.9477009177207947, "learning_rate": 5.67915387482999e-06, "loss": 0.6211, "step": 7465 }, { "epoch": 0.47, "grad_norm": 0.9416295289993286, "learning_rate": 5.678137375712152e-06, "loss": 0.6327, "step": 7466 }, { "epoch": 0.47, "grad_norm": 0.8348994851112366, "learning_rate": 5.6771208480403065e-06, "loss": 0.5896, "step": 7467 }, { "epoch": 0.47, "grad_norm": 0.8662605285644531, "learning_rate": 5.6761042918572585e-06, "loss": 0.6347, "step": 7468 }, { "epoch": 0.47, "grad_norm": 0.9073401689529419, "learning_rate": 5.675087707205811e-06, "loss": 0.5953, "step": 7469 }, { "epoch": 0.47, "grad_norm": 0.8502627015113831, "learning_rate": 5.674071094128768e-06, "loss": 0.56, "step": 7470 }, { "epoch": 0.47, "grad_norm": 0.8814505934715271, "learning_rate": 5.673054452668936e-06, "loss": 0.5533, "step": 7471 }, { "epoch": 0.47, "grad_norm": 0.8160227537155151, "learning_rate": 5.672037782869123e-06, "loss": 0.5823, "step": 7472 }, { "epoch": 0.47, "grad_norm": 0.9062105417251587, "learning_rate": 5.671021084772137e-06, "loss": 0.629, "step": 7473 }, { "epoch": 0.47, "grad_norm": 0.8528299927711487, "learning_rate": 5.670004358420786e-06, "loss": 0.5908, "step": 7474 }, { "epoch": 0.47, "grad_norm": 0.8880297541618347, "learning_rate": 5.668987603857884e-06, "loss": 0.5729, "step": 7475 }, { "epoch": 0.47, "grad_norm": 0.8712710738182068, "learning_rate": 5.6679708211262415e-06, "loss": 0.5703, "step": 7476 }, { "epoch": 0.47, "grad_norm": 0.8730039596557617, "learning_rate": 5.66695401026867e-06, "loss": 0.5824, "step": 7477 }, { "epoch": 0.47, "grad_norm": 0.9115322232246399, "learning_rate": 5.665937171327985e-06, "loss": 0.6206, "step": 7478 }, { "epoch": 0.47, "grad_norm": 0.8727670311927795, "learning_rate": 5.664920304347004e-06, "loss": 0.5998, "step": 7479 }, { "epoch": 0.47, "grad_norm": 0.8780418634414673, "learning_rate": 5.6639034093685416e-06, "loss": 0.6111, "step": 7480 }, { "epoch": 0.47, "grad_norm": 0.902927577495575, "learning_rate": 5.662886486435415e-06, "loss": 0.5843, "step": 7481 }, { "epoch": 0.47, "grad_norm": 0.9045441746711731, "learning_rate": 5.6618695355904456e-06, "loss": 0.5906, "step": 7482 }, { "epoch": 0.47, "grad_norm": 0.8809554576873779, "learning_rate": 5.660852556876452e-06, "loss": 0.5882, "step": 7483 }, { "epoch": 0.47, "grad_norm": 0.8581720590591431, "learning_rate": 5.659835550336257e-06, "loss": 0.5565, "step": 7484 }, { "epoch": 0.47, "grad_norm": 0.836743175983429, "learning_rate": 5.658818516012681e-06, "loss": 0.6018, "step": 7485 }, { "epoch": 0.47, "grad_norm": 0.8696823716163635, "learning_rate": 5.65780145394855e-06, "loss": 0.5717, "step": 7486 }, { "epoch": 0.47, "grad_norm": 0.8720874786376953, "learning_rate": 5.656784364186687e-06, "loss": 0.6145, "step": 7487 }, { "epoch": 0.47, "grad_norm": 0.8161273002624512, "learning_rate": 5.655767246769921e-06, "loss": 0.5921, "step": 7488 }, { "epoch": 0.47, "grad_norm": 0.9067077040672302, "learning_rate": 5.6547501017410765e-06, "loss": 0.5551, "step": 7489 }, { "epoch": 0.47, "grad_norm": 0.8600305318832397, "learning_rate": 5.6537329291429835e-06, "loss": 0.5712, "step": 7490 }, { "epoch": 0.47, "grad_norm": 0.9493293762207031, "learning_rate": 5.652715729018471e-06, "loss": 0.5631, "step": 7491 }, { "epoch": 0.47, "grad_norm": 0.9080491065979004, "learning_rate": 5.65169850141037e-06, "loss": 0.6021, "step": 7492 }, { "epoch": 0.47, "grad_norm": 0.8665675520896912, "learning_rate": 5.650681246361511e-06, "loss": 0.5662, "step": 7493 }, { "epoch": 0.47, "grad_norm": 0.8644539713859558, "learning_rate": 5.649663963914729e-06, "loss": 0.5776, "step": 7494 }, { "epoch": 0.47, "grad_norm": 0.8949490785598755, "learning_rate": 5.6486466541128575e-06, "loss": 0.6025, "step": 7495 }, { "epoch": 0.47, "grad_norm": 0.8618309497833252, "learning_rate": 5.6476293169987314e-06, "loss": 0.5809, "step": 7496 }, { "epoch": 0.47, "grad_norm": 0.9121665954589844, "learning_rate": 5.646611952615188e-06, "loss": 0.5776, "step": 7497 }, { "epoch": 0.48, "grad_norm": 0.9601690173149109, "learning_rate": 5.645594561005064e-06, "loss": 0.6445, "step": 7498 }, { "epoch": 0.48, "grad_norm": 0.8770933151245117, "learning_rate": 5.6445771422112005e-06, "loss": 0.5561, "step": 7499 }, { "epoch": 0.48, "grad_norm": 0.9160272479057312, "learning_rate": 5.643559696276435e-06, "loss": 0.5923, "step": 7500 }, { "epoch": 0.48, "grad_norm": 0.8537315726280212, "learning_rate": 5.6425422232436085e-06, "loss": 0.5297, "step": 7501 }, { "epoch": 0.48, "grad_norm": 0.9248793125152588, "learning_rate": 5.641524723155566e-06, "loss": 0.6002, "step": 7502 }, { "epoch": 0.48, "grad_norm": 0.8903481960296631, "learning_rate": 5.6405071960551485e-06, "loss": 0.5629, "step": 7503 }, { "epoch": 0.48, "grad_norm": 0.8650072813034058, "learning_rate": 5.639489641985201e-06, "loss": 0.6217, "step": 7504 }, { "epoch": 0.48, "grad_norm": 0.9362791180610657, "learning_rate": 5.638472060988569e-06, "loss": 0.6735, "step": 7505 }, { "epoch": 0.48, "grad_norm": 0.8435890078544617, "learning_rate": 5.6374544531081e-06, "loss": 0.5357, "step": 7506 }, { "epoch": 0.48, "grad_norm": 0.8968993425369263, "learning_rate": 5.636436818386641e-06, "loss": 0.6284, "step": 7507 }, { "epoch": 0.48, "grad_norm": 0.8205461502075195, "learning_rate": 5.635419156867043e-06, "loss": 0.5537, "step": 7508 }, { "epoch": 0.48, "grad_norm": 0.8897349238395691, "learning_rate": 5.634401468592152e-06, "loss": 0.5937, "step": 7509 }, { "epoch": 0.48, "grad_norm": 0.8807753920555115, "learning_rate": 5.6333837536048255e-06, "loss": 0.5546, "step": 7510 }, { "epoch": 0.48, "grad_norm": 0.8418203592300415, "learning_rate": 5.63236601194791e-06, "loss": 0.5596, "step": 7511 }, { "epoch": 0.48, "grad_norm": 0.849711000919342, "learning_rate": 5.631348243664263e-06, "loss": 0.6252, "step": 7512 }, { "epoch": 0.48, "grad_norm": 0.9313555955886841, "learning_rate": 5.630330448796736e-06, "loss": 0.6325, "step": 7513 }, { "epoch": 0.48, "grad_norm": 0.9534391164779663, "learning_rate": 5.629312627388188e-06, "loss": 0.6615, "step": 7514 }, { "epoch": 0.48, "grad_norm": 0.8927303552627563, "learning_rate": 5.628294779481474e-06, "loss": 0.5842, "step": 7515 }, { "epoch": 0.48, "grad_norm": 0.8985401391983032, "learning_rate": 5.6272769051194535e-06, "loss": 0.6764, "step": 7516 }, { "epoch": 0.48, "grad_norm": 0.8968629240989685, "learning_rate": 5.626259004344983e-06, "loss": 0.5754, "step": 7517 }, { "epoch": 0.48, "grad_norm": 0.8578152656555176, "learning_rate": 5.625241077200926e-06, "loss": 0.6018, "step": 7518 }, { "epoch": 0.48, "grad_norm": 0.8742191791534424, "learning_rate": 5.624223123730141e-06, "loss": 0.5841, "step": 7519 }, { "epoch": 0.48, "grad_norm": 0.8463053703308105, "learning_rate": 5.6232051439754935e-06, "loss": 0.5883, "step": 7520 }, { "epoch": 0.48, "grad_norm": 0.8800484538078308, "learning_rate": 5.622187137979843e-06, "loss": 0.544, "step": 7521 }, { "epoch": 0.48, "grad_norm": 0.8594365119934082, "learning_rate": 5.621169105786057e-06, "loss": 0.5719, "step": 7522 }, { "epoch": 0.48, "grad_norm": 0.8680949807167053, "learning_rate": 5.620151047437004e-06, "loss": 0.6466, "step": 7523 }, { "epoch": 0.48, "grad_norm": 0.8772831559181213, "learning_rate": 5.619132962975544e-06, "loss": 0.6038, "step": 7524 }, { "epoch": 0.48, "grad_norm": 0.9212350845336914, "learning_rate": 5.6181148524445506e-06, "loss": 0.586, "step": 7525 }, { "epoch": 0.48, "grad_norm": 0.9081183671951294, "learning_rate": 5.617096715886889e-06, "loss": 0.6006, "step": 7526 }, { "epoch": 0.48, "grad_norm": 0.9265548586845398, "learning_rate": 5.616078553345434e-06, "loss": 0.6323, "step": 7527 }, { "epoch": 0.48, "grad_norm": 0.8656793236732483, "learning_rate": 5.615060364863053e-06, "loss": 0.5746, "step": 7528 }, { "epoch": 0.48, "grad_norm": 0.8917694091796875, "learning_rate": 5.6140421504826205e-06, "loss": 0.5804, "step": 7529 }, { "epoch": 0.48, "grad_norm": 0.9244688153266907, "learning_rate": 5.6130239102470075e-06, "loss": 0.6397, "step": 7530 }, { "epoch": 0.48, "grad_norm": 0.873084306716919, "learning_rate": 5.612005644199092e-06, "loss": 0.5719, "step": 7531 }, { "epoch": 0.48, "grad_norm": 0.8609447479248047, "learning_rate": 5.610987352381747e-06, "loss": 0.5613, "step": 7532 }, { "epoch": 0.48, "grad_norm": 0.9368327856063843, "learning_rate": 5.60996903483785e-06, "loss": 0.6308, "step": 7533 }, { "epoch": 0.48, "grad_norm": 0.8809864521026611, "learning_rate": 5.608950691610279e-06, "loss": 0.5802, "step": 7534 }, { "epoch": 0.48, "grad_norm": 0.926231324672699, "learning_rate": 5.607932322741912e-06, "loss": 0.6366, "step": 7535 }, { "epoch": 0.48, "grad_norm": 0.9103180766105652, "learning_rate": 5.60691392827563e-06, "loss": 0.6085, "step": 7536 }, { "epoch": 0.48, "grad_norm": 0.9746480584144592, "learning_rate": 5.605895508254315e-06, "loss": 0.5465, "step": 7537 }, { "epoch": 0.48, "grad_norm": 0.9505468606948853, "learning_rate": 5.604877062720848e-06, "loss": 0.6671, "step": 7538 }, { "epoch": 0.48, "grad_norm": 0.8591948747634888, "learning_rate": 5.603858591718111e-06, "loss": 0.5763, "step": 7539 }, { "epoch": 0.48, "grad_norm": 0.9409388303756714, "learning_rate": 5.602840095288989e-06, "loss": 0.6302, "step": 7540 }, { "epoch": 0.48, "grad_norm": 0.8928959965705872, "learning_rate": 5.6018215734763685e-06, "loss": 0.6324, "step": 7541 }, { "epoch": 0.48, "grad_norm": 0.8751816749572754, "learning_rate": 5.600803026323136e-06, "loss": 0.5362, "step": 7542 }, { "epoch": 0.48, "grad_norm": 0.9378029704093933, "learning_rate": 5.599784453872177e-06, "loss": 0.5904, "step": 7543 }, { "epoch": 0.48, "grad_norm": 0.926987886428833, "learning_rate": 5.5987658561663805e-06, "loss": 0.6353, "step": 7544 }, { "epoch": 0.48, "grad_norm": 0.8623868823051453, "learning_rate": 5.597747233248637e-06, "loss": 0.5966, "step": 7545 }, { "epoch": 0.48, "grad_norm": 0.8656692504882812, "learning_rate": 5.596728585161838e-06, "loss": 0.5803, "step": 7546 }, { "epoch": 0.48, "grad_norm": 0.8522694110870361, "learning_rate": 5.595709911948873e-06, "loss": 0.6405, "step": 7547 }, { "epoch": 0.48, "grad_norm": 0.8766559958457947, "learning_rate": 5.5946912136526365e-06, "loss": 0.6045, "step": 7548 }, { "epoch": 0.48, "grad_norm": 0.8855379223823547, "learning_rate": 5.593672490316022e-06, "loss": 0.6335, "step": 7549 }, { "epoch": 0.48, "grad_norm": 0.8828189969062805, "learning_rate": 5.5926537419819234e-06, "loss": 0.6326, "step": 7550 }, { "epoch": 0.48, "grad_norm": 0.8517670035362244, "learning_rate": 5.591634968693238e-06, "loss": 0.6034, "step": 7551 }, { "epoch": 0.48, "grad_norm": 0.8935458064079285, "learning_rate": 5.590616170492862e-06, "loss": 0.628, "step": 7552 }, { "epoch": 0.48, "grad_norm": 0.9340348839759827, "learning_rate": 5.589597347423696e-06, "loss": 0.6073, "step": 7553 }, { "epoch": 0.48, "grad_norm": 0.8754671812057495, "learning_rate": 5.588578499528633e-06, "loss": 0.584, "step": 7554 }, { "epoch": 0.48, "grad_norm": 0.8489634990692139, "learning_rate": 5.587559626850578e-06, "loss": 0.5701, "step": 7555 }, { "epoch": 0.48, "grad_norm": 0.871929407119751, "learning_rate": 5.586540729432431e-06, "loss": 0.5916, "step": 7556 }, { "epoch": 0.48, "grad_norm": 0.905117928981781, "learning_rate": 5.585521807317097e-06, "loss": 0.6647, "step": 7557 }, { "epoch": 0.48, "grad_norm": 0.9271255731582642, "learning_rate": 5.584502860547474e-06, "loss": 0.6214, "step": 7558 }, { "epoch": 0.48, "grad_norm": 0.8846172094345093, "learning_rate": 5.5834838891664685e-06, "loss": 0.6159, "step": 7559 }, { "epoch": 0.48, "grad_norm": 0.8657467365264893, "learning_rate": 5.582464893216987e-06, "loss": 0.5704, "step": 7560 }, { "epoch": 0.48, "grad_norm": 0.8502207398414612, "learning_rate": 5.5814458727419365e-06, "loss": 0.5878, "step": 7561 }, { "epoch": 0.48, "grad_norm": 0.8335548639297485, "learning_rate": 5.580426827784221e-06, "loss": 0.612, "step": 7562 }, { "epoch": 0.48, "grad_norm": 0.8756707310676575, "learning_rate": 5.579407758386751e-06, "loss": 0.6283, "step": 7563 }, { "epoch": 0.48, "grad_norm": 0.9669787883758545, "learning_rate": 5.578388664592435e-06, "loss": 0.5999, "step": 7564 }, { "epoch": 0.48, "grad_norm": 0.9120867848396301, "learning_rate": 5.577369546444188e-06, "loss": 0.6249, "step": 7565 }, { "epoch": 0.48, "grad_norm": 0.9110515117645264, "learning_rate": 5.576350403984915e-06, "loss": 0.6314, "step": 7566 }, { "epoch": 0.48, "grad_norm": 0.9408080577850342, "learning_rate": 5.575331237257532e-06, "loss": 0.5728, "step": 7567 }, { "epoch": 0.48, "grad_norm": 0.9744350910186768, "learning_rate": 5.574312046304954e-06, "loss": 0.6502, "step": 7568 }, { "epoch": 0.48, "grad_norm": 0.9024521112442017, "learning_rate": 5.5732928311700906e-06, "loss": 0.5861, "step": 7569 }, { "epoch": 0.48, "grad_norm": 0.8772839903831482, "learning_rate": 5.5722735918958614e-06, "loss": 0.5825, "step": 7570 }, { "epoch": 0.48, "grad_norm": 0.9152007699012756, "learning_rate": 5.571254328525183e-06, "loss": 0.5854, "step": 7571 }, { "epoch": 0.48, "grad_norm": 0.9742832183837891, "learning_rate": 5.570235041100972e-06, "loss": 0.6213, "step": 7572 }, { "epoch": 0.48, "grad_norm": 0.8699829578399658, "learning_rate": 5.569215729666146e-06, "loss": 0.5945, "step": 7573 }, { "epoch": 0.48, "grad_norm": 0.8996490240097046, "learning_rate": 5.568196394263626e-06, "loss": 0.6015, "step": 7574 }, { "epoch": 0.48, "grad_norm": 0.8309650421142578, "learning_rate": 5.567177034936333e-06, "loss": 0.5423, "step": 7575 }, { "epoch": 0.48, "grad_norm": 0.884103000164032, "learning_rate": 5.566157651727189e-06, "loss": 0.6507, "step": 7576 }, { "epoch": 0.48, "grad_norm": 0.8329902291297913, "learning_rate": 5.5651382446791134e-06, "loss": 0.5838, "step": 7577 }, { "epoch": 0.48, "grad_norm": 0.8965960741043091, "learning_rate": 5.564118813835033e-06, "loss": 0.5781, "step": 7578 }, { "epoch": 0.48, "grad_norm": 0.8552922010421753, "learning_rate": 5.563099359237872e-06, "loss": 0.5883, "step": 7579 }, { "epoch": 0.48, "grad_norm": 0.8796671628952026, "learning_rate": 5.5620798809305575e-06, "loss": 0.6016, "step": 7580 }, { "epoch": 0.48, "grad_norm": 0.9553985595703125, "learning_rate": 5.561060378956014e-06, "loss": 0.6185, "step": 7581 }, { "epoch": 0.48, "grad_norm": 0.836025059223175, "learning_rate": 5.560040853357168e-06, "loss": 0.5931, "step": 7582 }, { "epoch": 0.48, "grad_norm": 0.9648067355155945, "learning_rate": 5.55902130417695e-06, "loss": 0.6206, "step": 7583 }, { "epoch": 0.48, "grad_norm": 0.8633977174758911, "learning_rate": 5.558001731458293e-06, "loss": 0.6114, "step": 7584 }, { "epoch": 0.48, "grad_norm": 0.885905921459198, "learning_rate": 5.556982135244121e-06, "loss": 0.6113, "step": 7585 }, { "epoch": 0.48, "grad_norm": 0.8822622299194336, "learning_rate": 5.5559625155773685e-06, "loss": 0.5788, "step": 7586 }, { "epoch": 0.48, "grad_norm": 0.8463605046272278, "learning_rate": 5.554942872500971e-06, "loss": 0.5707, "step": 7587 }, { "epoch": 0.48, "grad_norm": 0.8903326392173767, "learning_rate": 5.5539232060578574e-06, "loss": 0.6062, "step": 7588 }, { "epoch": 0.48, "grad_norm": 0.8166199922561646, "learning_rate": 5.552903516290966e-06, "loss": 0.5806, "step": 7589 }, { "epoch": 0.48, "grad_norm": 0.8574814200401306, "learning_rate": 5.55188380324323e-06, "loss": 0.586, "step": 7590 }, { "epoch": 0.48, "grad_norm": 0.8767586946487427, "learning_rate": 5.550864066957587e-06, "loss": 0.6098, "step": 7591 }, { "epoch": 0.48, "grad_norm": 0.8981362581253052, "learning_rate": 5.549844307476975e-06, "loss": 0.6253, "step": 7592 }, { "epoch": 0.48, "grad_norm": 0.9011292457580566, "learning_rate": 5.548824524844333e-06, "loss": 0.6296, "step": 7593 }, { "epoch": 0.48, "grad_norm": 0.8577702045440674, "learning_rate": 5.547804719102596e-06, "loss": 0.5661, "step": 7594 }, { "epoch": 0.48, "grad_norm": 0.9309691190719604, "learning_rate": 5.546784890294712e-06, "loss": 0.646, "step": 7595 }, { "epoch": 0.48, "grad_norm": 0.9005495309829712, "learning_rate": 5.545765038463615e-06, "loss": 0.634, "step": 7596 }, { "epoch": 0.48, "grad_norm": 0.9504218697547913, "learning_rate": 5.544745163652253e-06, "loss": 0.5943, "step": 7597 }, { "epoch": 0.48, "grad_norm": 0.880858302116394, "learning_rate": 5.543725265903565e-06, "loss": 0.5743, "step": 7598 }, { "epoch": 0.48, "grad_norm": 0.9303281307220459, "learning_rate": 5.5427053452605004e-06, "loss": 0.6134, "step": 7599 }, { "epoch": 0.48, "grad_norm": 0.8957832455635071, "learning_rate": 5.541685401766001e-06, "loss": 0.6142, "step": 7600 }, { "epoch": 0.48, "grad_norm": 0.860815703868866, "learning_rate": 5.540665435463013e-06, "loss": 0.5398, "step": 7601 }, { "epoch": 0.48, "grad_norm": 0.8271889090538025, "learning_rate": 5.539645446394485e-06, "loss": 0.6002, "step": 7602 }, { "epoch": 0.48, "grad_norm": 0.8800649046897888, "learning_rate": 5.538625434603363e-06, "loss": 0.6247, "step": 7603 }, { "epoch": 0.48, "grad_norm": 0.8922380208969116, "learning_rate": 5.5376054001326e-06, "loss": 0.6069, "step": 7604 }, { "epoch": 0.48, "grad_norm": 0.8567295074462891, "learning_rate": 5.53658534302514e-06, "loss": 0.6585, "step": 7605 }, { "epoch": 0.48, "grad_norm": 0.9114384651184082, "learning_rate": 5.535565263323942e-06, "loss": 0.5325, "step": 7606 }, { "epoch": 0.48, "grad_norm": 0.8971738219261169, "learning_rate": 5.534545161071951e-06, "loss": 0.6266, "step": 7607 }, { "epoch": 0.48, "grad_norm": 0.8661735653877258, "learning_rate": 5.533525036312125e-06, "loss": 0.5861, "step": 7608 }, { "epoch": 0.48, "grad_norm": 0.9209964275360107, "learning_rate": 5.532504889087413e-06, "loss": 0.608, "step": 7609 }, { "epoch": 0.48, "grad_norm": 0.9354657530784607, "learning_rate": 5.531484719440776e-06, "loss": 0.6116, "step": 7610 }, { "epoch": 0.48, "grad_norm": 0.8302944898605347, "learning_rate": 5.530464527415164e-06, "loss": 0.5349, "step": 7611 }, { "epoch": 0.48, "grad_norm": 0.9335947632789612, "learning_rate": 5.529444313053538e-06, "loss": 0.5976, "step": 7612 }, { "epoch": 0.48, "grad_norm": 0.9276868104934692, "learning_rate": 5.528424076398851e-06, "loss": 0.6024, "step": 7613 }, { "epoch": 0.48, "grad_norm": 0.8259304165840149, "learning_rate": 5.527403817494067e-06, "loss": 0.6018, "step": 7614 }, { "epoch": 0.48, "grad_norm": 0.8607040643692017, "learning_rate": 5.526383536382142e-06, "loss": 0.5221, "step": 7615 }, { "epoch": 0.48, "grad_norm": 0.8541266918182373, "learning_rate": 5.525363233106037e-06, "loss": 0.5861, "step": 7616 }, { "epoch": 0.48, "grad_norm": 0.8392813801765442, "learning_rate": 5.524342907708714e-06, "loss": 0.6229, "step": 7617 }, { "epoch": 0.48, "grad_norm": 0.8519023060798645, "learning_rate": 5.5233225602331355e-06, "loss": 0.5533, "step": 7618 }, { "epoch": 0.48, "grad_norm": 0.9389131665229797, "learning_rate": 5.522302190722264e-06, "loss": 0.5698, "step": 7619 }, { "epoch": 0.48, "grad_norm": 0.8677113056182861, "learning_rate": 5.5212817992190644e-06, "loss": 0.6011, "step": 7620 }, { "epoch": 0.48, "grad_norm": 1.0327938795089722, "learning_rate": 5.5202613857665025e-06, "loss": 0.622, "step": 7621 }, { "epoch": 0.48, "grad_norm": 0.8638737797737122, "learning_rate": 5.5192409504075416e-06, "loss": 0.5677, "step": 7622 }, { "epoch": 0.48, "grad_norm": 0.9155073761940002, "learning_rate": 5.518220493185153e-06, "loss": 0.5872, "step": 7623 }, { "epoch": 0.48, "grad_norm": 0.8531370162963867, "learning_rate": 5.517200014142301e-06, "loss": 0.5778, "step": 7624 }, { "epoch": 0.48, "grad_norm": 0.865264356136322, "learning_rate": 5.516179513321955e-06, "loss": 0.5836, "step": 7625 }, { "epoch": 0.48, "grad_norm": 0.8751364946365356, "learning_rate": 5.5151589907670856e-06, "loss": 0.572, "step": 7626 }, { "epoch": 0.48, "grad_norm": 0.8969213962554932, "learning_rate": 5.514138446520664e-06, "loss": 0.6152, "step": 7627 }, { "epoch": 0.48, "grad_norm": 0.8782392740249634, "learning_rate": 5.51311788062566e-06, "loss": 0.5778, "step": 7628 }, { "epoch": 0.48, "grad_norm": 0.8104063868522644, "learning_rate": 5.512097293125047e-06, "loss": 0.5285, "step": 7629 }, { "epoch": 0.48, "grad_norm": 0.8759908676147461, "learning_rate": 5.511076684061799e-06, "loss": 0.5613, "step": 7630 }, { "epoch": 0.48, "grad_norm": 0.8901430368423462, "learning_rate": 5.51005605347889e-06, "loss": 0.6212, "step": 7631 }, { "epoch": 0.48, "grad_norm": 0.8541998863220215, "learning_rate": 5.509035401419296e-06, "loss": 0.5491, "step": 7632 }, { "epoch": 0.48, "grad_norm": 0.938401460647583, "learning_rate": 5.50801472792599e-06, "loss": 0.5846, "step": 7633 }, { "epoch": 0.48, "grad_norm": 0.8890235424041748, "learning_rate": 5.5069940330419525e-06, "loss": 0.5504, "step": 7634 }, { "epoch": 0.48, "grad_norm": 0.9128061532974243, "learning_rate": 5.5059733168101596e-06, "loss": 0.6344, "step": 7635 }, { "epoch": 0.48, "grad_norm": 0.9124163389205933, "learning_rate": 5.504952579273589e-06, "loss": 0.5604, "step": 7636 }, { "epoch": 0.48, "grad_norm": 0.9557987451553345, "learning_rate": 5.503931820475223e-06, "loss": 0.6476, "step": 7637 }, { "epoch": 0.48, "grad_norm": 0.8558072447776794, "learning_rate": 5.502911040458042e-06, "loss": 0.6132, "step": 7638 }, { "epoch": 0.48, "grad_norm": 0.8189815878868103, "learning_rate": 5.501890239265025e-06, "loss": 0.5687, "step": 7639 }, { "epoch": 0.48, "grad_norm": 0.8753035664558411, "learning_rate": 5.500869416939156e-06, "loss": 0.6288, "step": 7640 }, { "epoch": 0.48, "grad_norm": 0.8629709482192993, "learning_rate": 5.49984857352342e-06, "loss": 0.5766, "step": 7641 }, { "epoch": 0.48, "grad_norm": 0.9087205529212952, "learning_rate": 5.4988277090607986e-06, "loss": 0.5434, "step": 7642 }, { "epoch": 0.48, "grad_norm": 0.9986720085144043, "learning_rate": 5.4978068235942775e-06, "loss": 0.6495, "step": 7643 }, { "epoch": 0.48, "grad_norm": 0.957332193851471, "learning_rate": 5.496785917166843e-06, "loss": 0.6054, "step": 7644 }, { "epoch": 0.48, "grad_norm": 0.8836731910705566, "learning_rate": 5.49576498982148e-06, "loss": 0.5649, "step": 7645 }, { "epoch": 0.48, "grad_norm": 0.8434025049209595, "learning_rate": 5.49474404160118e-06, "loss": 0.5556, "step": 7646 }, { "epoch": 0.48, "grad_norm": 0.89605712890625, "learning_rate": 5.4937230725489285e-06, "loss": 0.6331, "step": 7647 }, { "epoch": 0.48, "grad_norm": 0.8855474591255188, "learning_rate": 5.492702082707716e-06, "loss": 0.5908, "step": 7648 }, { "epoch": 0.48, "grad_norm": 0.8697336316108704, "learning_rate": 5.491681072120534e-06, "loss": 0.6265, "step": 7649 }, { "epoch": 0.48, "grad_norm": 0.9648655652999878, "learning_rate": 5.4906600408303715e-06, "loss": 0.6178, "step": 7650 }, { "epoch": 0.48, "grad_norm": 0.901523768901825, "learning_rate": 5.489638988880222e-06, "loss": 0.562, "step": 7651 }, { "epoch": 0.48, "grad_norm": 0.9213078618049622, "learning_rate": 5.488617916313077e-06, "loss": 0.5578, "step": 7652 }, { "epoch": 0.48, "grad_norm": 0.8305292725563049, "learning_rate": 5.487596823171932e-06, "loss": 0.5244, "step": 7653 }, { "epoch": 0.48, "grad_norm": 0.8835660219192505, "learning_rate": 5.486575709499782e-06, "loss": 0.6254, "step": 7654 }, { "epoch": 0.48, "grad_norm": 0.9087215065956116, "learning_rate": 5.48555457533962e-06, "loss": 0.6071, "step": 7655 }, { "epoch": 0.49, "grad_norm": 0.8418556451797485, "learning_rate": 5.484533420734444e-06, "loss": 0.5622, "step": 7656 }, { "epoch": 0.49, "grad_norm": 0.8559536337852478, "learning_rate": 5.483512245727252e-06, "loss": 0.625, "step": 7657 }, { "epoch": 0.49, "grad_norm": 0.8496268391609192, "learning_rate": 5.482491050361041e-06, "loss": 0.5712, "step": 7658 }, { "epoch": 0.49, "grad_norm": 0.8907086253166199, "learning_rate": 5.48146983467881e-06, "loss": 0.618, "step": 7659 }, { "epoch": 0.49, "grad_norm": 0.9585722088813782, "learning_rate": 5.480448598723559e-06, "loss": 0.6301, "step": 7660 }, { "epoch": 0.49, "grad_norm": 0.9126084446907043, "learning_rate": 5.47942734253829e-06, "loss": 0.6009, "step": 7661 }, { "epoch": 0.49, "grad_norm": 0.8798913955688477, "learning_rate": 5.478406066166003e-06, "loss": 0.5604, "step": 7662 }, { "epoch": 0.49, "grad_norm": 0.8995177745819092, "learning_rate": 5.477384769649701e-06, "loss": 0.6143, "step": 7663 }, { "epoch": 0.49, "grad_norm": 0.9048047661781311, "learning_rate": 5.476363453032387e-06, "loss": 0.5813, "step": 7664 }, { "epoch": 0.49, "grad_norm": 0.808437168598175, "learning_rate": 5.475342116357064e-06, "loss": 0.6184, "step": 7665 }, { "epoch": 0.49, "grad_norm": 0.9141887426376343, "learning_rate": 5.474320759666739e-06, "loss": 0.5828, "step": 7666 }, { "epoch": 0.49, "grad_norm": 0.885321855545044, "learning_rate": 5.473299383004417e-06, "loss": 0.6431, "step": 7667 }, { "epoch": 0.49, "grad_norm": 0.828567385673523, "learning_rate": 5.472277986413104e-06, "loss": 0.5438, "step": 7668 }, { "epoch": 0.49, "grad_norm": 0.8383506536483765, "learning_rate": 5.471256569935809e-06, "loss": 0.5765, "step": 7669 }, { "epoch": 0.49, "grad_norm": 0.8781369924545288, "learning_rate": 5.470235133615538e-06, "loss": 0.5844, "step": 7670 }, { "epoch": 0.49, "grad_norm": 0.9070538282394409, "learning_rate": 5.4692136774953004e-06, "loss": 0.5773, "step": 7671 }, { "epoch": 0.49, "grad_norm": 0.8789056539535522, "learning_rate": 5.46819220161811e-06, "loss": 0.5749, "step": 7672 }, { "epoch": 0.49, "grad_norm": 0.8679722547531128, "learning_rate": 5.467170706026973e-06, "loss": 0.5518, "step": 7673 }, { "epoch": 0.49, "grad_norm": 0.8629012107849121, "learning_rate": 5.466149190764902e-06, "loss": 0.5732, "step": 7674 }, { "epoch": 0.49, "grad_norm": 0.8968449831008911, "learning_rate": 5.465127655874911e-06, "loss": 0.6644, "step": 7675 }, { "epoch": 0.49, "grad_norm": 0.9048300385475159, "learning_rate": 5.464106101400013e-06, "loss": 0.5802, "step": 7676 }, { "epoch": 0.49, "grad_norm": 0.8762057423591614, "learning_rate": 5.463084527383222e-06, "loss": 0.5703, "step": 7677 }, { "epoch": 0.49, "grad_norm": 0.8548493385314941, "learning_rate": 5.4620629338675505e-06, "loss": 0.5622, "step": 7678 }, { "epoch": 0.49, "grad_norm": 0.9109390377998352, "learning_rate": 5.461041320896019e-06, "loss": 0.6231, "step": 7679 }, { "epoch": 0.49, "grad_norm": 0.8898347616195679, "learning_rate": 5.460019688511639e-06, "loss": 0.5709, "step": 7680 }, { "epoch": 0.49, "grad_norm": 0.9044870138168335, "learning_rate": 5.458998036757431e-06, "loss": 0.6593, "step": 7681 }, { "epoch": 0.49, "grad_norm": 0.8833417296409607, "learning_rate": 5.4579763656764115e-06, "loss": 0.6153, "step": 7682 }, { "epoch": 0.49, "grad_norm": 0.9275777339935303, "learning_rate": 5.456954675311602e-06, "loss": 0.5869, "step": 7683 }, { "epoch": 0.49, "grad_norm": 0.9542369246482849, "learning_rate": 5.45593296570602e-06, "loss": 0.6183, "step": 7684 }, { "epoch": 0.49, "grad_norm": 0.9259552359580994, "learning_rate": 5.454911236902687e-06, "loss": 0.6372, "step": 7685 }, { "epoch": 0.49, "grad_norm": 0.9347798824310303, "learning_rate": 5.453889488944623e-06, "loss": 0.5539, "step": 7686 }, { "epoch": 0.49, "grad_norm": 0.9027972221374512, "learning_rate": 5.452867721874854e-06, "loss": 0.6572, "step": 7687 }, { "epoch": 0.49, "grad_norm": 0.904701828956604, "learning_rate": 5.4518459357364e-06, "loss": 0.6378, "step": 7688 }, { "epoch": 0.49, "grad_norm": 0.8021993637084961, "learning_rate": 5.4508241305722856e-06, "loss": 0.6006, "step": 7689 }, { "epoch": 0.49, "grad_norm": 0.8704695701599121, "learning_rate": 5.449802306425532e-06, "loss": 0.5669, "step": 7690 }, { "epoch": 0.49, "grad_norm": 0.8994425535202026, "learning_rate": 5.448780463339172e-06, "loss": 0.6242, "step": 7691 }, { "epoch": 0.49, "grad_norm": 0.8955239653587341, "learning_rate": 5.447758601356226e-06, "loss": 0.6172, "step": 7692 }, { "epoch": 0.49, "grad_norm": 0.8894906044006348, "learning_rate": 5.446736720519725e-06, "loss": 0.592, "step": 7693 }, { "epoch": 0.49, "grad_norm": 0.9087505340576172, "learning_rate": 5.445714820872693e-06, "loss": 0.5644, "step": 7694 }, { "epoch": 0.49, "grad_norm": 0.8230986595153809, "learning_rate": 5.4446929024581606e-06, "loss": 0.5527, "step": 7695 }, { "epoch": 0.49, "grad_norm": 0.9331679344177246, "learning_rate": 5.4436709653191575e-06, "loss": 0.5922, "step": 7696 }, { "epoch": 0.49, "grad_norm": 0.8441013097763062, "learning_rate": 5.442649009498713e-06, "loss": 0.5654, "step": 7697 }, { "epoch": 0.49, "grad_norm": 0.9754238128662109, "learning_rate": 5.441627035039859e-06, "loss": 0.6082, "step": 7698 }, { "epoch": 0.49, "grad_norm": 0.8756945133209229, "learning_rate": 5.440605041985626e-06, "loss": 0.5359, "step": 7699 }, { "epoch": 0.49, "grad_norm": 0.7946870923042297, "learning_rate": 5.439583030379049e-06, "loss": 0.5282, "step": 7700 }, { "epoch": 0.49, "grad_norm": 0.9179619550704956, "learning_rate": 5.438561000263157e-06, "loss": 0.5532, "step": 7701 }, { "epoch": 0.49, "grad_norm": 0.8969404697418213, "learning_rate": 5.4375389516809895e-06, "loss": 0.6416, "step": 7702 }, { "epoch": 0.49, "grad_norm": 0.9204484820365906, "learning_rate": 5.436516884675579e-06, "loss": 0.6281, "step": 7703 }, { "epoch": 0.49, "grad_norm": 0.9213035702705383, "learning_rate": 5.43549479928996e-06, "loss": 0.5883, "step": 7704 }, { "epoch": 0.49, "grad_norm": 0.8417788743972778, "learning_rate": 5.434472695567169e-06, "loss": 0.5565, "step": 7705 }, { "epoch": 0.49, "grad_norm": 0.8737784028053284, "learning_rate": 5.433450573550246e-06, "loss": 0.5677, "step": 7706 }, { "epoch": 0.49, "grad_norm": 0.8927813768386841, "learning_rate": 5.432428433282226e-06, "loss": 0.5841, "step": 7707 }, { "epoch": 0.49, "grad_norm": 0.9724695086479187, "learning_rate": 5.43140627480615e-06, "loss": 0.6503, "step": 7708 }, { "epoch": 0.49, "grad_norm": 0.854300856590271, "learning_rate": 5.4303840981650565e-06, "loss": 0.5347, "step": 7709 }, { "epoch": 0.49, "grad_norm": 0.8913581371307373, "learning_rate": 5.429361903401985e-06, "loss": 0.6083, "step": 7710 }, { "epoch": 0.49, "grad_norm": 0.8786452412605286, "learning_rate": 5.4283396905599785e-06, "loss": 0.5958, "step": 7711 }, { "epoch": 0.49, "grad_norm": 0.9056409001350403, "learning_rate": 5.427317459682076e-06, "loss": 0.6015, "step": 7712 }, { "epoch": 0.49, "grad_norm": 0.8648980855941772, "learning_rate": 5.426295210811323e-06, "loss": 0.5982, "step": 7713 }, { "epoch": 0.49, "grad_norm": 0.8439405560493469, "learning_rate": 5.425272943990761e-06, "loss": 0.5646, "step": 7714 }, { "epoch": 0.49, "grad_norm": 0.9676143527030945, "learning_rate": 5.4242506592634354e-06, "loss": 0.5852, "step": 7715 }, { "epoch": 0.49, "grad_norm": 0.8673433661460876, "learning_rate": 5.423228356672391e-06, "loss": 0.5583, "step": 7716 }, { "epoch": 0.49, "grad_norm": 0.878349244594574, "learning_rate": 5.422206036260671e-06, "loss": 0.5877, "step": 7717 }, { "epoch": 0.49, "grad_norm": 0.863304853439331, "learning_rate": 5.421183698071325e-06, "loss": 0.633, "step": 7718 }, { "epoch": 0.49, "grad_norm": 0.8977344036102295, "learning_rate": 5.420161342147399e-06, "loss": 0.5786, "step": 7719 }, { "epoch": 0.49, "grad_norm": 0.9075315594673157, "learning_rate": 5.4191389685319395e-06, "loss": 0.6107, "step": 7720 }, { "epoch": 0.49, "grad_norm": 0.8574069738388062, "learning_rate": 5.4181165772679955e-06, "loss": 0.5664, "step": 7721 }, { "epoch": 0.49, "grad_norm": 0.8526822328567505, "learning_rate": 5.417094168398618e-06, "loss": 0.5912, "step": 7722 }, { "epoch": 0.49, "grad_norm": 0.8845213055610657, "learning_rate": 5.416071741966856e-06, "loss": 0.6056, "step": 7723 }, { "epoch": 0.49, "grad_norm": 0.8884814977645874, "learning_rate": 5.41504929801576e-06, "loss": 0.6047, "step": 7724 }, { "epoch": 0.49, "grad_norm": 0.8512783050537109, "learning_rate": 5.414026836588382e-06, "loss": 0.5831, "step": 7725 }, { "epoch": 0.49, "grad_norm": 0.8551806807518005, "learning_rate": 5.413004357727775e-06, "loss": 0.5865, "step": 7726 }, { "epoch": 0.49, "grad_norm": 0.9021192789077759, "learning_rate": 5.411981861476991e-06, "loss": 0.5534, "step": 7727 }, { "epoch": 0.49, "grad_norm": 0.8763885498046875, "learning_rate": 5.4109593478790825e-06, "loss": 0.544, "step": 7728 }, { "epoch": 0.49, "grad_norm": 0.9107353687286377, "learning_rate": 5.409936816977106e-06, "loss": 0.654, "step": 7729 }, { "epoch": 0.49, "grad_norm": 0.8590700626373291, "learning_rate": 5.408914268814117e-06, "loss": 0.594, "step": 7730 }, { "epoch": 0.49, "grad_norm": 0.883228063583374, "learning_rate": 5.4078917034331705e-06, "loss": 0.5545, "step": 7731 }, { "epoch": 0.49, "grad_norm": 0.8624158501625061, "learning_rate": 5.4068691208773225e-06, "loss": 0.5184, "step": 7732 }, { "epoch": 0.49, "grad_norm": 0.8744218945503235, "learning_rate": 5.405846521189632e-06, "loss": 0.6158, "step": 7733 }, { "epoch": 0.49, "grad_norm": 0.9129903316497803, "learning_rate": 5.404823904413157e-06, "loss": 0.5886, "step": 7734 }, { "epoch": 0.49, "grad_norm": 0.8881204128265381, "learning_rate": 5.403801270590955e-06, "loss": 0.5861, "step": 7735 }, { "epoch": 0.49, "grad_norm": 0.938651442527771, "learning_rate": 5.402778619766086e-06, "loss": 0.6119, "step": 7736 }, { "epoch": 0.49, "grad_norm": 0.8968808650970459, "learning_rate": 5.40175595198161e-06, "loss": 0.6051, "step": 7737 }, { "epoch": 0.49, "grad_norm": 0.8762619495391846, "learning_rate": 5.400733267280589e-06, "loss": 0.5904, "step": 7738 }, { "epoch": 0.49, "grad_norm": 0.863160252571106, "learning_rate": 5.399710565706084e-06, "loss": 0.6209, "step": 7739 }, { "epoch": 0.49, "grad_norm": 0.8458674550056458, "learning_rate": 5.3986878473011585e-06, "loss": 0.5892, "step": 7740 }, { "epoch": 0.49, "grad_norm": 0.8740219473838806, "learning_rate": 5.397665112108874e-06, "loss": 0.5339, "step": 7741 }, { "epoch": 0.49, "grad_norm": 0.8719754815101624, "learning_rate": 5.3966423601722955e-06, "loss": 0.5679, "step": 7742 }, { "epoch": 0.49, "grad_norm": 1.0123850107192993, "learning_rate": 5.3956195915344855e-06, "loss": 0.6039, "step": 7743 }, { "epoch": 0.49, "grad_norm": 0.9461089372634888, "learning_rate": 5.394596806238511e-06, "loss": 0.6337, "step": 7744 }, { "epoch": 0.49, "grad_norm": 0.8724687695503235, "learning_rate": 5.39357400432744e-06, "loss": 0.524, "step": 7745 }, { "epoch": 0.49, "grad_norm": 0.8834648728370667, "learning_rate": 5.392551185844334e-06, "loss": 0.6, "step": 7746 }, { "epoch": 0.49, "grad_norm": 0.8506259322166443, "learning_rate": 5.391528350832265e-06, "loss": 0.5867, "step": 7747 }, { "epoch": 0.49, "grad_norm": 0.9510423541069031, "learning_rate": 5.3905054993342985e-06, "loss": 0.6107, "step": 7748 }, { "epoch": 0.49, "grad_norm": 0.8999704122543335, "learning_rate": 5.389482631393504e-06, "loss": 0.6091, "step": 7749 }, { "epoch": 0.49, "grad_norm": 0.8855018019676208, "learning_rate": 5.388459747052951e-06, "loss": 0.5748, "step": 7750 }, { "epoch": 0.49, "grad_norm": 0.8717223405838013, "learning_rate": 5.387436846355709e-06, "loss": 0.5826, "step": 7751 }, { "epoch": 0.49, "grad_norm": 0.8774482607841492, "learning_rate": 5.386413929344849e-06, "loss": 0.5812, "step": 7752 }, { "epoch": 0.49, "grad_norm": 0.96014004945755, "learning_rate": 5.3853909960634446e-06, "loss": 0.5428, "step": 7753 }, { "epoch": 0.49, "grad_norm": 0.8995871543884277, "learning_rate": 5.3843680465545635e-06, "loss": 0.6025, "step": 7754 }, { "epoch": 0.49, "grad_norm": 0.9755268096923828, "learning_rate": 5.3833450808612816e-06, "loss": 0.6194, "step": 7755 }, { "epoch": 0.49, "grad_norm": 0.8455917835235596, "learning_rate": 5.382322099026673e-06, "loss": 0.5979, "step": 7756 }, { "epoch": 0.49, "grad_norm": 0.85719895362854, "learning_rate": 5.38129910109381e-06, "loss": 0.5648, "step": 7757 }, { "epoch": 0.49, "grad_norm": 0.8735889196395874, "learning_rate": 5.380276087105769e-06, "loss": 0.5679, "step": 7758 }, { "epoch": 0.49, "grad_norm": 0.8770782351493835, "learning_rate": 5.379253057105623e-06, "loss": 0.5705, "step": 7759 }, { "epoch": 0.49, "grad_norm": 0.8773306012153625, "learning_rate": 5.378230011136453e-06, "loss": 0.6444, "step": 7760 }, { "epoch": 0.49, "grad_norm": 0.8813656568527222, "learning_rate": 5.37720694924133e-06, "loss": 0.5294, "step": 7761 }, { "epoch": 0.49, "grad_norm": 0.8682379126548767, "learning_rate": 5.376183871463336e-06, "loss": 0.5526, "step": 7762 }, { "epoch": 0.49, "grad_norm": 0.881206214427948, "learning_rate": 5.375160777845548e-06, "loss": 0.587, "step": 7763 }, { "epoch": 0.49, "grad_norm": 0.8982335329055786, "learning_rate": 5.3741376684310455e-06, "loss": 0.5905, "step": 7764 }, { "epoch": 0.49, "grad_norm": 0.9069334864616394, "learning_rate": 5.3731145432629065e-06, "loss": 0.6072, "step": 7765 }, { "epoch": 0.49, "grad_norm": 0.900351881980896, "learning_rate": 5.3720914023842105e-06, "loss": 0.5809, "step": 7766 }, { "epoch": 0.49, "grad_norm": 0.8970122337341309, "learning_rate": 5.371068245838042e-06, "loss": 0.6318, "step": 7767 }, { "epoch": 0.49, "grad_norm": 0.854917049407959, "learning_rate": 5.37004507366748e-06, "loss": 0.595, "step": 7768 }, { "epoch": 0.49, "grad_norm": 0.904750645160675, "learning_rate": 5.369021885915607e-06, "loss": 0.6008, "step": 7769 }, { "epoch": 0.49, "grad_norm": 0.9690344333648682, "learning_rate": 5.367998682625506e-06, "loss": 0.6083, "step": 7770 }, { "epoch": 0.49, "grad_norm": 0.829422116279602, "learning_rate": 5.366975463840262e-06, "loss": 0.5196, "step": 7771 }, { "epoch": 0.49, "grad_norm": 0.8954147696495056, "learning_rate": 5.365952229602956e-06, "loss": 0.5388, "step": 7772 }, { "epoch": 0.49, "grad_norm": 0.8637533187866211, "learning_rate": 5.3649289799566766e-06, "loss": 0.6015, "step": 7773 }, { "epoch": 0.49, "grad_norm": 0.9479194283485413, "learning_rate": 5.363905714944505e-06, "loss": 0.5729, "step": 7774 }, { "epoch": 0.49, "grad_norm": 0.9531112313270569, "learning_rate": 5.362882434609531e-06, "loss": 0.6434, "step": 7775 }, { "epoch": 0.49, "grad_norm": 0.8943924307823181, "learning_rate": 5.36185913899484e-06, "loss": 0.576, "step": 7776 }, { "epoch": 0.49, "grad_norm": 0.9110773801803589, "learning_rate": 5.36083582814352e-06, "loss": 0.6053, "step": 7777 }, { "epoch": 0.49, "grad_norm": 0.8563951849937439, "learning_rate": 5.359812502098657e-06, "loss": 0.5905, "step": 7778 }, { "epoch": 0.49, "grad_norm": 0.8632087707519531, "learning_rate": 5.358789160903343e-06, "loss": 0.5819, "step": 7779 }, { "epoch": 0.49, "grad_norm": 0.8864629864692688, "learning_rate": 5.357765804600664e-06, "loss": 0.5689, "step": 7780 }, { "epoch": 0.49, "grad_norm": 0.9528976678848267, "learning_rate": 5.3567424332337125e-06, "loss": 0.5989, "step": 7781 }, { "epoch": 0.49, "grad_norm": 0.92073655128479, "learning_rate": 5.355719046845577e-06, "loss": 0.6029, "step": 7782 }, { "epoch": 0.49, "grad_norm": 0.8833118677139282, "learning_rate": 5.354695645479352e-06, "loss": 0.5792, "step": 7783 }, { "epoch": 0.49, "grad_norm": 0.8913655877113342, "learning_rate": 5.353672229178125e-06, "loss": 0.564, "step": 7784 }, { "epoch": 0.49, "grad_norm": 0.8653914332389832, "learning_rate": 5.352648797984993e-06, "loss": 0.5835, "step": 7785 }, { "epoch": 0.49, "grad_norm": 0.8871545791625977, "learning_rate": 5.351625351943044e-06, "loss": 0.6209, "step": 7786 }, { "epoch": 0.49, "grad_norm": 0.888781726360321, "learning_rate": 5.350601891095377e-06, "loss": 0.5891, "step": 7787 }, { "epoch": 0.49, "grad_norm": 0.7901937961578369, "learning_rate": 5.349578415485085e-06, "loss": 0.4945, "step": 7788 }, { "epoch": 0.49, "grad_norm": 0.9174894094467163, "learning_rate": 5.34855492515526e-06, "loss": 0.5817, "step": 7789 }, { "epoch": 0.49, "grad_norm": 0.9078687429428101, "learning_rate": 5.347531420148999e-06, "loss": 0.6333, "step": 7790 }, { "epoch": 0.49, "grad_norm": 0.8857147097587585, "learning_rate": 5.3465079005094e-06, "loss": 0.5257, "step": 7791 }, { "epoch": 0.49, "grad_norm": 0.8582876920700073, "learning_rate": 5.34548436627956e-06, "loss": 0.6155, "step": 7792 }, { "epoch": 0.49, "grad_norm": 0.9454988837242126, "learning_rate": 5.344460817502573e-06, "loss": 0.6312, "step": 7793 }, { "epoch": 0.49, "grad_norm": 0.8494389057159424, "learning_rate": 5.34343725422154e-06, "loss": 0.5916, "step": 7794 }, { "epoch": 0.49, "grad_norm": 0.8725386261940002, "learning_rate": 5.342413676479559e-06, "loss": 0.5982, "step": 7795 }, { "epoch": 0.49, "grad_norm": 0.890536904335022, "learning_rate": 5.34139008431973e-06, "loss": 0.5657, "step": 7796 }, { "epoch": 0.49, "grad_norm": 0.8989502787590027, "learning_rate": 5.34036647778515e-06, "loss": 0.6311, "step": 7797 }, { "epoch": 0.49, "grad_norm": 0.8554948568344116, "learning_rate": 5.3393428569189235e-06, "loss": 0.5428, "step": 7798 }, { "epoch": 0.49, "grad_norm": 0.8485680818557739, "learning_rate": 5.338319221764149e-06, "loss": 0.5813, "step": 7799 }, { "epoch": 0.49, "grad_norm": 0.8714006543159485, "learning_rate": 5.33729557236393e-06, "loss": 0.6127, "step": 7800 }, { "epoch": 0.49, "grad_norm": 0.8680577874183655, "learning_rate": 5.336271908761367e-06, "loss": 0.6045, "step": 7801 }, { "epoch": 0.49, "grad_norm": 0.8042650818824768, "learning_rate": 5.335248230999565e-06, "loss": 0.5189, "step": 7802 }, { "epoch": 0.49, "grad_norm": 0.9379438757896423, "learning_rate": 5.334224539121625e-06, "loss": 0.5976, "step": 7803 }, { "epoch": 0.49, "grad_norm": 0.8921198844909668, "learning_rate": 5.333200833170652e-06, "loss": 0.5507, "step": 7804 }, { "epoch": 0.49, "grad_norm": 0.8879731893539429, "learning_rate": 5.332177113189751e-06, "loss": 0.6268, "step": 7805 }, { "epoch": 0.49, "grad_norm": 0.8605756759643555, "learning_rate": 5.331153379222028e-06, "loss": 0.6194, "step": 7806 }, { "epoch": 0.49, "grad_norm": 0.9379689693450928, "learning_rate": 5.330129631310589e-06, "loss": 0.6294, "step": 7807 }, { "epoch": 0.49, "grad_norm": 0.8884453177452087, "learning_rate": 5.3291058694985385e-06, "loss": 0.6167, "step": 7808 }, { "epoch": 0.49, "grad_norm": 0.8566985726356506, "learning_rate": 5.328082093828984e-06, "loss": 0.6185, "step": 7809 }, { "epoch": 0.49, "grad_norm": 0.8915068507194519, "learning_rate": 5.327058304345035e-06, "loss": 0.5716, "step": 7810 }, { "epoch": 0.49, "grad_norm": 0.8683719635009766, "learning_rate": 5.3260345010898e-06, "loss": 0.5944, "step": 7811 }, { "epoch": 0.49, "grad_norm": 0.9614280462265015, "learning_rate": 5.325010684106384e-06, "loss": 0.5791, "step": 7812 }, { "epoch": 0.49, "grad_norm": 0.8619272708892822, "learning_rate": 5.323986853437899e-06, "loss": 0.5457, "step": 7813 }, { "epoch": 0.5, "grad_norm": 0.9838071465492249, "learning_rate": 5.322963009127454e-06, "loss": 0.618, "step": 7814 }, { "epoch": 0.5, "grad_norm": 0.8671537637710571, "learning_rate": 5.321939151218163e-06, "loss": 0.5641, "step": 7815 }, { "epoch": 0.5, "grad_norm": 0.8439149856567383, "learning_rate": 5.320915279753132e-06, "loss": 0.592, "step": 7816 }, { "epoch": 0.5, "grad_norm": 0.8067424297332764, "learning_rate": 5.319891394775475e-06, "loss": 0.5262, "step": 7817 }, { "epoch": 0.5, "grad_norm": 0.979844331741333, "learning_rate": 5.3188674963283064e-06, "loss": 0.5845, "step": 7818 }, { "epoch": 0.5, "grad_norm": 0.8906669020652771, "learning_rate": 5.317843584454734e-06, "loss": 0.5639, "step": 7819 }, { "epoch": 0.5, "grad_norm": 0.9191656112670898, "learning_rate": 5.316819659197875e-06, "loss": 0.5816, "step": 7820 }, { "epoch": 0.5, "grad_norm": 0.917048990726471, "learning_rate": 5.315795720600842e-06, "loss": 0.5991, "step": 7821 }, { "epoch": 0.5, "grad_norm": 0.8632756471633911, "learning_rate": 5.314771768706751e-06, "loss": 0.5967, "step": 7822 }, { "epoch": 0.5, "grad_norm": 0.9136775732040405, "learning_rate": 5.313747803558714e-06, "loss": 0.5945, "step": 7823 }, { "epoch": 0.5, "grad_norm": 0.84110426902771, "learning_rate": 5.312723825199849e-06, "loss": 0.6218, "step": 7824 }, { "epoch": 0.5, "grad_norm": 0.9239146113395691, "learning_rate": 5.311699833673273e-06, "loss": 0.6421, "step": 7825 }, { "epoch": 0.5, "grad_norm": 0.9365952014923096, "learning_rate": 5.310675829022101e-06, "loss": 0.5668, "step": 7826 }, { "epoch": 0.5, "grad_norm": 0.9483537673950195, "learning_rate": 5.309651811289449e-06, "loss": 0.5903, "step": 7827 }, { "epoch": 0.5, "grad_norm": 0.9416823387145996, "learning_rate": 5.308627780518437e-06, "loss": 0.5688, "step": 7828 }, { "epoch": 0.5, "grad_norm": 0.9334666728973389, "learning_rate": 5.307603736752183e-06, "loss": 0.5561, "step": 7829 }, { "epoch": 0.5, "grad_norm": 0.8541433215141296, "learning_rate": 5.306579680033807e-06, "loss": 0.592, "step": 7830 }, { "epoch": 0.5, "grad_norm": 0.9449893832206726, "learning_rate": 5.305555610406425e-06, "loss": 0.6002, "step": 7831 }, { "epoch": 0.5, "grad_norm": 0.8886929154396057, "learning_rate": 5.30453152791316e-06, "loss": 0.6695, "step": 7832 }, { "epoch": 0.5, "grad_norm": 0.8141634464263916, "learning_rate": 5.303507432597134e-06, "loss": 0.5946, "step": 7833 }, { "epoch": 0.5, "grad_norm": 0.8616921901702881, "learning_rate": 5.302483324501463e-06, "loss": 0.6024, "step": 7834 }, { "epoch": 0.5, "grad_norm": 0.8720713257789612, "learning_rate": 5.3014592036692715e-06, "loss": 0.5934, "step": 7835 }, { "epoch": 0.5, "grad_norm": 0.954289436340332, "learning_rate": 5.300435070143683e-06, "loss": 0.5998, "step": 7836 }, { "epoch": 0.5, "grad_norm": 0.8757979273796082, "learning_rate": 5.2994109239678185e-06, "loss": 0.6295, "step": 7837 }, { "epoch": 0.5, "grad_norm": 0.9314550161361694, "learning_rate": 5.298386765184801e-06, "loss": 0.6031, "step": 7838 }, { "epoch": 0.5, "grad_norm": 0.8398404121398926, "learning_rate": 5.297362593837755e-06, "loss": 0.5565, "step": 7839 }, { "epoch": 0.5, "grad_norm": 0.8812541365623474, "learning_rate": 5.296338409969805e-06, "loss": 0.5657, "step": 7840 }, { "epoch": 0.5, "grad_norm": 0.8774970173835754, "learning_rate": 5.295314213624076e-06, "loss": 0.5786, "step": 7841 }, { "epoch": 0.5, "grad_norm": 0.8290955424308777, "learning_rate": 5.2942900048436914e-06, "loss": 0.5563, "step": 7842 }, { "epoch": 0.5, "grad_norm": 0.9258725047111511, "learning_rate": 5.293265783671778e-06, "loss": 0.5809, "step": 7843 }, { "epoch": 0.5, "grad_norm": 0.8245546817779541, "learning_rate": 5.292241550151465e-06, "loss": 0.6013, "step": 7844 }, { "epoch": 0.5, "grad_norm": 0.9611520767211914, "learning_rate": 5.291217304325875e-06, "loss": 0.6349, "step": 7845 }, { "epoch": 0.5, "grad_norm": 0.89043790102005, "learning_rate": 5.290193046238139e-06, "loss": 0.6121, "step": 7846 }, { "epoch": 0.5, "grad_norm": 0.9023299217224121, "learning_rate": 5.289168775931381e-06, "loss": 0.5966, "step": 7847 }, { "epoch": 0.5, "grad_norm": 0.881334125995636, "learning_rate": 5.288144493448733e-06, "loss": 0.5643, "step": 7848 }, { "epoch": 0.5, "grad_norm": 0.9082907438278198, "learning_rate": 5.287120198833324e-06, "loss": 0.5767, "step": 7849 }, { "epoch": 0.5, "grad_norm": 0.9419313073158264, "learning_rate": 5.286095892128282e-06, "loss": 0.6172, "step": 7850 }, { "epoch": 0.5, "grad_norm": 0.9367068409919739, "learning_rate": 5.285071573376735e-06, "loss": 0.6101, "step": 7851 }, { "epoch": 0.5, "grad_norm": 0.9079290628433228, "learning_rate": 5.2840472426218185e-06, "loss": 0.589, "step": 7852 }, { "epoch": 0.5, "grad_norm": 0.948851466178894, "learning_rate": 5.283022899906659e-06, "loss": 0.6335, "step": 7853 }, { "epoch": 0.5, "grad_norm": 0.921149492263794, "learning_rate": 5.28199854527439e-06, "loss": 0.5849, "step": 7854 }, { "epoch": 0.5, "grad_norm": 0.8445439338684082, "learning_rate": 5.280974178768144e-06, "loss": 0.54, "step": 7855 }, { "epoch": 0.5, "grad_norm": 0.9971843361854553, "learning_rate": 5.279949800431052e-06, "loss": 0.6041, "step": 7856 }, { "epoch": 0.5, "grad_norm": 0.8367643356323242, "learning_rate": 5.278925410306248e-06, "loss": 0.5955, "step": 7857 }, { "epoch": 0.5, "grad_norm": 0.8564116358757019, "learning_rate": 5.277901008436865e-06, "loss": 0.5757, "step": 7858 }, { "epoch": 0.5, "grad_norm": 0.8856030702590942, "learning_rate": 5.276876594866037e-06, "loss": 0.5963, "step": 7859 }, { "epoch": 0.5, "grad_norm": 0.8912267088890076, "learning_rate": 5.2758521696369e-06, "loss": 0.5597, "step": 7860 }, { "epoch": 0.5, "grad_norm": 0.8481583595275879, "learning_rate": 5.274827732792587e-06, "loss": 0.5526, "step": 7861 }, { "epoch": 0.5, "grad_norm": 0.9108606576919556, "learning_rate": 5.273803284376234e-06, "loss": 0.6079, "step": 7862 }, { "epoch": 0.5, "grad_norm": 0.9559755921363831, "learning_rate": 5.272778824430977e-06, "loss": 0.6008, "step": 7863 }, { "epoch": 0.5, "grad_norm": 0.8783113360404968, "learning_rate": 5.271754352999953e-06, "loss": 0.6102, "step": 7864 }, { "epoch": 0.5, "grad_norm": 0.8379794359207153, "learning_rate": 5.2707298701263e-06, "loss": 0.5744, "step": 7865 }, { "epoch": 0.5, "grad_norm": 0.8685166835784912, "learning_rate": 5.269705375853151e-06, "loss": 0.604, "step": 7866 }, { "epoch": 0.5, "grad_norm": 0.875748336315155, "learning_rate": 5.26868087022365e-06, "loss": 0.6116, "step": 7867 }, { "epoch": 0.5, "grad_norm": 0.890408992767334, "learning_rate": 5.26765635328093e-06, "loss": 0.567, "step": 7868 }, { "epoch": 0.5, "grad_norm": 0.9582130312919617, "learning_rate": 5.266631825068134e-06, "loss": 0.6553, "step": 7869 }, { "epoch": 0.5, "grad_norm": 0.888396143913269, "learning_rate": 5.265607285628397e-06, "loss": 0.611, "step": 7870 }, { "epoch": 0.5, "grad_norm": 0.869216799736023, "learning_rate": 5.264582735004863e-06, "loss": 0.5906, "step": 7871 }, { "epoch": 0.5, "grad_norm": 0.8864418864250183, "learning_rate": 5.26355817324067e-06, "loss": 0.6488, "step": 7872 }, { "epoch": 0.5, "grad_norm": 0.8367258906364441, "learning_rate": 5.26253360037896e-06, "loss": 0.6189, "step": 7873 }, { "epoch": 0.5, "grad_norm": 0.8717927932739258, "learning_rate": 5.2615090164628705e-06, "loss": 0.6071, "step": 7874 }, { "epoch": 0.5, "grad_norm": 0.8906144499778748, "learning_rate": 5.2604844215355484e-06, "loss": 0.5616, "step": 7875 }, { "epoch": 0.5, "grad_norm": 0.9154402017593384, "learning_rate": 5.259459815640133e-06, "loss": 0.6081, "step": 7876 }, { "epoch": 0.5, "grad_norm": 0.9059274792671204, "learning_rate": 5.258435198819768e-06, "loss": 0.6212, "step": 7877 }, { "epoch": 0.5, "grad_norm": 0.9382339715957642, "learning_rate": 5.257410571117594e-06, "loss": 0.6418, "step": 7878 }, { "epoch": 0.5, "grad_norm": 0.8434200882911682, "learning_rate": 5.256385932576759e-06, "loss": 0.5638, "step": 7879 }, { "epoch": 0.5, "grad_norm": 0.8744908571243286, "learning_rate": 5.255361283240402e-06, "loss": 0.5436, "step": 7880 }, { "epoch": 0.5, "grad_norm": 0.8957458138465881, "learning_rate": 5.254336623151672e-06, "loss": 0.6203, "step": 7881 }, { "epoch": 0.5, "grad_norm": 0.8486526608467102, "learning_rate": 5.253311952353708e-06, "loss": 0.5835, "step": 7882 }, { "epoch": 0.5, "grad_norm": 0.9576562643051147, "learning_rate": 5.252287270889661e-06, "loss": 0.6513, "step": 7883 }, { "epoch": 0.5, "grad_norm": 0.85997474193573, "learning_rate": 5.251262578802675e-06, "loss": 0.5634, "step": 7884 }, { "epoch": 0.5, "grad_norm": 0.87550950050354, "learning_rate": 5.250237876135895e-06, "loss": 0.6243, "step": 7885 }, { "epoch": 0.5, "grad_norm": 0.8734540343284607, "learning_rate": 5.2492131629324695e-06, "loss": 0.5659, "step": 7886 }, { "epoch": 0.5, "grad_norm": 0.8869773745536804, "learning_rate": 5.248188439235544e-06, "loss": 0.578, "step": 7887 }, { "epoch": 0.5, "grad_norm": 0.8749696016311646, "learning_rate": 5.247163705088267e-06, "loss": 0.564, "step": 7888 }, { "epoch": 0.5, "grad_norm": 0.8944323658943176, "learning_rate": 5.246138960533786e-06, "loss": 0.6297, "step": 7889 }, { "epoch": 0.5, "grad_norm": 0.9447425603866577, "learning_rate": 5.245114205615249e-06, "loss": 0.5535, "step": 7890 }, { "epoch": 0.5, "grad_norm": 0.8836696743965149, "learning_rate": 5.244089440375807e-06, "loss": 0.5838, "step": 7891 }, { "epoch": 0.5, "grad_norm": 0.8536423444747925, "learning_rate": 5.243064664858607e-06, "loss": 0.5484, "step": 7892 }, { "epoch": 0.5, "grad_norm": 0.9463775157928467, "learning_rate": 5.242039879106799e-06, "loss": 0.631, "step": 7893 }, { "epoch": 0.5, "grad_norm": 0.9138554334640503, "learning_rate": 5.241015083163534e-06, "loss": 0.6952, "step": 7894 }, { "epoch": 0.5, "grad_norm": 0.8552803993225098, "learning_rate": 5.239990277071962e-06, "loss": 0.625, "step": 7895 }, { "epoch": 0.5, "grad_norm": 0.894889235496521, "learning_rate": 5.238965460875236e-06, "loss": 0.5667, "step": 7896 }, { "epoch": 0.5, "grad_norm": 0.8741210699081421, "learning_rate": 5.237940634616504e-06, "loss": 0.5868, "step": 7897 }, { "epoch": 0.5, "grad_norm": 0.8499166965484619, "learning_rate": 5.2369157983389205e-06, "loss": 0.6187, "step": 7898 }, { "epoch": 0.5, "grad_norm": 0.9158671498298645, "learning_rate": 5.235890952085637e-06, "loss": 0.5634, "step": 7899 }, { "epoch": 0.5, "grad_norm": 0.8855353593826294, "learning_rate": 5.234866095899806e-06, "loss": 0.5651, "step": 7900 }, { "epoch": 0.5, "grad_norm": 0.9134857654571533, "learning_rate": 5.23384122982458e-06, "loss": 0.588, "step": 7901 }, { "epoch": 0.5, "grad_norm": 0.9252248406410217, "learning_rate": 5.232816353903113e-06, "loss": 0.6017, "step": 7902 }, { "epoch": 0.5, "grad_norm": 0.8008279800415039, "learning_rate": 5.231791468178561e-06, "loss": 0.5136, "step": 7903 }, { "epoch": 0.5, "grad_norm": 0.8626922965049744, "learning_rate": 5.230766572694075e-06, "loss": 0.5724, "step": 7904 }, { "epoch": 0.5, "grad_norm": 0.9324626326560974, "learning_rate": 5.229741667492811e-06, "loss": 0.6267, "step": 7905 }, { "epoch": 0.5, "grad_norm": 0.8620643615722656, "learning_rate": 5.228716752617926e-06, "loss": 0.5924, "step": 7906 }, { "epoch": 0.5, "grad_norm": 0.8927160501480103, "learning_rate": 5.2276918281125744e-06, "loss": 0.6103, "step": 7907 }, { "epoch": 0.5, "grad_norm": 0.8659266233444214, "learning_rate": 5.22666689401991e-06, "loss": 0.5934, "step": 7908 }, { "epoch": 0.5, "grad_norm": 0.8656795620918274, "learning_rate": 5.225641950383094e-06, "loss": 0.6328, "step": 7909 }, { "epoch": 0.5, "grad_norm": 0.881079375743866, "learning_rate": 5.2246169972452775e-06, "loss": 0.6129, "step": 7910 }, { "epoch": 0.5, "grad_norm": 0.8573868870735168, "learning_rate": 5.223592034649624e-06, "loss": 0.5608, "step": 7911 }, { "epoch": 0.5, "grad_norm": 0.8543702960014343, "learning_rate": 5.2225670626392845e-06, "loss": 0.5469, "step": 7912 }, { "epoch": 0.5, "grad_norm": 0.8963991403579712, "learning_rate": 5.221542081257421e-06, "loss": 0.6221, "step": 7913 }, { "epoch": 0.5, "grad_norm": 0.9009084105491638, "learning_rate": 5.220517090547194e-06, "loss": 0.5719, "step": 7914 }, { "epoch": 0.5, "grad_norm": 0.9578242301940918, "learning_rate": 5.219492090551757e-06, "loss": 0.6152, "step": 7915 }, { "epoch": 0.5, "grad_norm": 0.9097537398338318, "learning_rate": 5.21846708131427e-06, "loss": 0.6263, "step": 7916 }, { "epoch": 0.5, "grad_norm": 0.932669997215271, "learning_rate": 5.217442062877897e-06, "loss": 0.5925, "step": 7917 }, { "epoch": 0.5, "grad_norm": 0.8461833000183105, "learning_rate": 5.216417035285795e-06, "loss": 0.6158, "step": 7918 }, { "epoch": 0.5, "grad_norm": 0.9005031585693359, "learning_rate": 5.215391998581123e-06, "loss": 0.6002, "step": 7919 }, { "epoch": 0.5, "grad_norm": 0.8439646363258362, "learning_rate": 5.214366952807043e-06, "loss": 0.5732, "step": 7920 }, { "epoch": 0.5, "grad_norm": 0.839756190776825, "learning_rate": 5.213341898006718e-06, "loss": 0.5291, "step": 7921 }, { "epoch": 0.5, "grad_norm": 0.8541595935821533, "learning_rate": 5.212316834223307e-06, "loss": 0.6199, "step": 7922 }, { "epoch": 0.5, "grad_norm": 0.8544859886169434, "learning_rate": 5.211291761499973e-06, "loss": 0.558, "step": 7923 }, { "epoch": 0.5, "grad_norm": 0.8676169514656067, "learning_rate": 5.210266679879877e-06, "loss": 0.5531, "step": 7924 }, { "epoch": 0.5, "grad_norm": 0.9017534255981445, "learning_rate": 5.209241589406183e-06, "loss": 0.5912, "step": 7925 }, { "epoch": 0.5, "grad_norm": 0.8735457062721252, "learning_rate": 5.208216490122055e-06, "loss": 0.5727, "step": 7926 }, { "epoch": 0.5, "grad_norm": 0.9029328227043152, "learning_rate": 5.207191382070653e-06, "loss": 0.5819, "step": 7927 }, { "epoch": 0.5, "grad_norm": 0.9156153202056885, "learning_rate": 5.206166265295143e-06, "loss": 0.5943, "step": 7928 }, { "epoch": 0.5, "grad_norm": 0.8806928396224976, "learning_rate": 5.205141139838691e-06, "loss": 0.5618, "step": 7929 }, { "epoch": 0.5, "grad_norm": 0.903069257736206, "learning_rate": 5.204116005744456e-06, "loss": 0.5822, "step": 7930 }, { "epoch": 0.5, "grad_norm": 0.9287469983100891, "learning_rate": 5.2030908630556075e-06, "loss": 0.6082, "step": 7931 }, { "epoch": 0.5, "grad_norm": 0.8750594258308411, "learning_rate": 5.202065711815309e-06, "loss": 0.5648, "step": 7932 }, { "epoch": 0.5, "grad_norm": 0.8411305546760559, "learning_rate": 5.201040552066727e-06, "loss": 0.5076, "step": 7933 }, { "epoch": 0.5, "grad_norm": 0.9401187896728516, "learning_rate": 5.200015383853026e-06, "loss": 0.5915, "step": 7934 }, { "epoch": 0.5, "grad_norm": 0.8993878364562988, "learning_rate": 5.1989902072173735e-06, "loss": 0.6175, "step": 7935 }, { "epoch": 0.5, "grad_norm": 0.9325996041297913, "learning_rate": 5.197965022202935e-06, "loss": 0.5977, "step": 7936 }, { "epoch": 0.5, "grad_norm": 0.8501147627830505, "learning_rate": 5.196939828852879e-06, "loss": 0.5955, "step": 7937 }, { "epoch": 0.5, "grad_norm": 0.8839433789253235, "learning_rate": 5.195914627210372e-06, "loss": 0.5685, "step": 7938 }, { "epoch": 0.5, "grad_norm": 0.8878698945045471, "learning_rate": 5.19488941731858e-06, "loss": 0.6122, "step": 7939 }, { "epoch": 0.5, "grad_norm": 0.8705379366874695, "learning_rate": 5.193864199220674e-06, "loss": 0.5531, "step": 7940 }, { "epoch": 0.5, "grad_norm": 0.8977400064468384, "learning_rate": 5.192838972959821e-06, "loss": 0.6069, "step": 7941 }, { "epoch": 0.5, "grad_norm": 0.8894720673561096, "learning_rate": 5.19181373857919e-06, "loss": 0.5976, "step": 7942 }, { "epoch": 0.5, "grad_norm": 0.8529515862464905, "learning_rate": 5.190788496121948e-06, "loss": 0.5842, "step": 7943 }, { "epoch": 0.5, "grad_norm": 0.8185912370681763, "learning_rate": 5.189763245631268e-06, "loss": 0.5169, "step": 7944 }, { "epoch": 0.5, "grad_norm": 0.8798929452896118, "learning_rate": 5.188737987150316e-06, "loss": 0.6247, "step": 7945 }, { "epoch": 0.5, "grad_norm": 0.8841909766197205, "learning_rate": 5.1877127207222666e-06, "loss": 0.5642, "step": 7946 }, { "epoch": 0.5, "grad_norm": 0.8578714728355408, "learning_rate": 5.186687446390284e-06, "loss": 0.5656, "step": 7947 }, { "epoch": 0.5, "grad_norm": 0.8991813659667969, "learning_rate": 5.185662164197546e-06, "loss": 0.5925, "step": 7948 }, { "epoch": 0.5, "grad_norm": 0.8656896352767944, "learning_rate": 5.184636874187218e-06, "loss": 0.5762, "step": 7949 }, { "epoch": 0.5, "grad_norm": 0.9480549097061157, "learning_rate": 5.183611576402474e-06, "loss": 0.5916, "step": 7950 }, { "epoch": 0.5, "grad_norm": 0.8683533072471619, "learning_rate": 5.182586270886485e-06, "loss": 0.6007, "step": 7951 }, { "epoch": 0.5, "grad_norm": 0.8761510848999023, "learning_rate": 5.181560957682423e-06, "loss": 0.5939, "step": 7952 }, { "epoch": 0.5, "grad_norm": 0.8311535716056824, "learning_rate": 5.180535636833462e-06, "loss": 0.5621, "step": 7953 }, { "epoch": 0.5, "grad_norm": 0.859836995601654, "learning_rate": 5.179510308382773e-06, "loss": 0.5844, "step": 7954 }, { "epoch": 0.5, "grad_norm": 0.897769033908844, "learning_rate": 5.178484972373528e-06, "loss": 0.6163, "step": 7955 }, { "epoch": 0.5, "grad_norm": 0.8741475343704224, "learning_rate": 5.177459628848903e-06, "loss": 0.6248, "step": 7956 }, { "epoch": 0.5, "grad_norm": 0.8983214497566223, "learning_rate": 5.17643427785207e-06, "loss": 0.6236, "step": 7957 }, { "epoch": 0.5, "grad_norm": 0.8339930772781372, "learning_rate": 5.175408919426204e-06, "loss": 0.5713, "step": 7958 }, { "epoch": 0.5, "grad_norm": 0.890082061290741, "learning_rate": 5.174383553614478e-06, "loss": 0.5438, "step": 7959 }, { "epoch": 0.5, "grad_norm": 0.8514465689659119, "learning_rate": 5.1733581804600674e-06, "loss": 0.5949, "step": 7960 }, { "epoch": 0.5, "grad_norm": 0.9061854481697083, "learning_rate": 5.172332800006147e-06, "loss": 0.6432, "step": 7961 }, { "epoch": 0.5, "grad_norm": 0.8398959636688232, "learning_rate": 5.171307412295892e-06, "loss": 0.5946, "step": 7962 }, { "epoch": 0.5, "grad_norm": 0.8187358379364014, "learning_rate": 5.1702820173724766e-06, "loss": 0.5614, "step": 7963 }, { "epoch": 0.5, "grad_norm": 0.9887537360191345, "learning_rate": 5.169256615279078e-06, "loss": 0.6059, "step": 7964 }, { "epoch": 0.5, "grad_norm": 0.8645609617233276, "learning_rate": 5.168231206058874e-06, "loss": 0.5553, "step": 7965 }, { "epoch": 0.5, "grad_norm": 0.8241131901741028, "learning_rate": 5.167205789755037e-06, "loss": 0.5472, "step": 7966 }, { "epoch": 0.5, "grad_norm": 0.8981542587280273, "learning_rate": 5.1661803664107465e-06, "loss": 0.5675, "step": 7967 }, { "epoch": 0.5, "grad_norm": 0.8625651001930237, "learning_rate": 5.16515493606918e-06, "loss": 0.5916, "step": 7968 }, { "epoch": 0.5, "grad_norm": 0.8327503800392151, "learning_rate": 5.164129498773513e-06, "loss": 0.5837, "step": 7969 }, { "epoch": 0.5, "grad_norm": 0.8808488845825195, "learning_rate": 5.163104054566922e-06, "loss": 0.6029, "step": 7970 }, { "epoch": 0.51, "grad_norm": 0.8956292867660522, "learning_rate": 5.16207860349259e-06, "loss": 0.5893, "step": 7971 }, { "epoch": 0.51, "grad_norm": 0.8336197137832642, "learning_rate": 5.16105314559369e-06, "loss": 0.5671, "step": 7972 }, { "epoch": 0.51, "grad_norm": 0.8766692280769348, "learning_rate": 5.160027680913402e-06, "loss": 0.5806, "step": 7973 }, { "epoch": 0.51, "grad_norm": 0.8673431873321533, "learning_rate": 5.159002209494905e-06, "loss": 0.6534, "step": 7974 }, { "epoch": 0.51, "grad_norm": 0.8875123858451843, "learning_rate": 5.157976731381379e-06, "loss": 0.5969, "step": 7975 }, { "epoch": 0.51, "grad_norm": 0.9223279356956482, "learning_rate": 5.1569512466160025e-06, "loss": 0.6288, "step": 7976 }, { "epoch": 0.51, "grad_norm": 0.9694954752922058, "learning_rate": 5.155925755241954e-06, "loss": 0.6392, "step": 7977 }, { "epoch": 0.51, "grad_norm": 0.9013630151748657, "learning_rate": 5.1549002573024144e-06, "loss": 0.5671, "step": 7978 }, { "epoch": 0.51, "grad_norm": 0.8672821521759033, "learning_rate": 5.153874752840564e-06, "loss": 0.5583, "step": 7979 }, { "epoch": 0.51, "grad_norm": 1.076423168182373, "learning_rate": 5.152849241899585e-06, "loss": 0.5713, "step": 7980 }, { "epoch": 0.51, "grad_norm": 0.9117089509963989, "learning_rate": 5.151823724522653e-06, "loss": 0.5954, "step": 7981 }, { "epoch": 0.51, "grad_norm": 0.8285648226737976, "learning_rate": 5.150798200752953e-06, "loss": 0.5856, "step": 7982 }, { "epoch": 0.51, "grad_norm": 0.8754099607467651, "learning_rate": 5.149772670633666e-06, "loss": 0.5748, "step": 7983 }, { "epoch": 0.51, "grad_norm": 0.8837385177612305, "learning_rate": 5.148747134207974e-06, "loss": 0.5615, "step": 7984 }, { "epoch": 0.51, "grad_norm": 0.8902435302734375, "learning_rate": 5.147721591519056e-06, "loss": 0.5814, "step": 7985 }, { "epoch": 0.51, "grad_norm": 0.8963085412979126, "learning_rate": 5.146696042610095e-06, "loss": 0.5477, "step": 7986 }, { "epoch": 0.51, "grad_norm": 0.9228818416595459, "learning_rate": 5.145670487524276e-06, "loss": 0.6119, "step": 7987 }, { "epoch": 0.51, "grad_norm": 0.9034307599067688, "learning_rate": 5.144644926304778e-06, "loss": 0.6482, "step": 7988 }, { "epoch": 0.51, "grad_norm": 0.9602980017662048, "learning_rate": 5.1436193589947855e-06, "loss": 0.5889, "step": 7989 }, { "epoch": 0.51, "grad_norm": 0.9697549939155579, "learning_rate": 5.1425937856374816e-06, "loss": 0.6406, "step": 7990 }, { "epoch": 0.51, "grad_norm": 0.8972442746162415, "learning_rate": 5.141568206276051e-06, "loss": 0.6258, "step": 7991 }, { "epoch": 0.51, "grad_norm": 0.8347691297531128, "learning_rate": 5.140542620953675e-06, "loss": 0.5349, "step": 7992 }, { "epoch": 0.51, "grad_norm": 1.0211116075515747, "learning_rate": 5.139517029713537e-06, "loss": 0.665, "step": 7993 }, { "epoch": 0.51, "grad_norm": 0.8696901202201843, "learning_rate": 5.138491432598822e-06, "loss": 0.5532, "step": 7994 }, { "epoch": 0.51, "grad_norm": 0.9338617920875549, "learning_rate": 5.137465829652716e-06, "loss": 0.5866, "step": 7995 }, { "epoch": 0.51, "grad_norm": 0.9527667760848999, "learning_rate": 5.136440220918401e-06, "loss": 0.5829, "step": 7996 }, { "epoch": 0.51, "grad_norm": 0.9329034686088562, "learning_rate": 5.135414606439063e-06, "loss": 0.6293, "step": 7997 }, { "epoch": 0.51, "grad_norm": 0.924534797668457, "learning_rate": 5.134388986257887e-06, "loss": 0.5911, "step": 7998 }, { "epoch": 0.51, "grad_norm": 0.8370699286460876, "learning_rate": 5.133363360418059e-06, "loss": 0.6032, "step": 7999 }, { "epoch": 0.51, "grad_norm": 0.8892449736595154, "learning_rate": 5.132337728962763e-06, "loss": 0.6089, "step": 8000 }, { "epoch": 0.51, "grad_norm": 0.8967301249504089, "learning_rate": 5.131312091935186e-06, "loss": 0.5924, "step": 8001 }, { "epoch": 0.51, "grad_norm": 0.8810504674911499, "learning_rate": 5.130286449378513e-06, "loss": 0.6515, "step": 8002 }, { "epoch": 0.51, "grad_norm": 0.9304781556129456, "learning_rate": 5.129260801335932e-06, "loss": 0.6081, "step": 8003 }, { "epoch": 0.51, "grad_norm": 0.8867761492729187, "learning_rate": 5.128235147850629e-06, "loss": 0.6011, "step": 8004 }, { "epoch": 0.51, "grad_norm": 0.9013170003890991, "learning_rate": 5.127209488965787e-06, "loss": 0.5825, "step": 8005 }, { "epoch": 0.51, "grad_norm": 0.8430556654930115, "learning_rate": 5.1261838247246e-06, "loss": 0.5425, "step": 8006 }, { "epoch": 0.51, "grad_norm": 0.8869624733924866, "learning_rate": 5.125158155170248e-06, "loss": 0.5767, "step": 8007 }, { "epoch": 0.51, "grad_norm": 0.9233295321464539, "learning_rate": 5.124132480345922e-06, "loss": 0.5623, "step": 8008 }, { "epoch": 0.51, "grad_norm": 0.9272169470787048, "learning_rate": 5.123106800294809e-06, "loss": 0.596, "step": 8009 }, { "epoch": 0.51, "grad_norm": 0.8874875903129578, "learning_rate": 5.122081115060098e-06, "loss": 0.5913, "step": 8010 }, { "epoch": 0.51, "grad_norm": 0.8825517296791077, "learning_rate": 5.121055424684975e-06, "loss": 0.5532, "step": 8011 }, { "epoch": 0.51, "grad_norm": 0.8856724500656128, "learning_rate": 5.12002972921263e-06, "loss": 0.5788, "step": 8012 }, { "epoch": 0.51, "grad_norm": 0.9288915395736694, "learning_rate": 5.119004028686249e-06, "loss": 0.5705, "step": 8013 }, { "epoch": 0.51, "grad_norm": 0.897471010684967, "learning_rate": 5.117978323149025e-06, "loss": 0.6277, "step": 8014 }, { "epoch": 0.51, "grad_norm": 0.8995818495750427, "learning_rate": 5.116952612644141e-06, "loss": 0.5288, "step": 8015 }, { "epoch": 0.51, "grad_norm": 0.9045858979225159, "learning_rate": 5.1159268972147915e-06, "loss": 0.6051, "step": 8016 }, { "epoch": 0.51, "grad_norm": 0.913692057132721, "learning_rate": 5.114901176904164e-06, "loss": 0.5748, "step": 8017 }, { "epoch": 0.51, "grad_norm": 0.865149736404419, "learning_rate": 5.113875451755447e-06, "loss": 0.6055, "step": 8018 }, { "epoch": 0.51, "grad_norm": 0.828730046749115, "learning_rate": 5.11284972181183e-06, "loss": 0.5581, "step": 8019 }, { "epoch": 0.51, "grad_norm": 0.8808106184005737, "learning_rate": 5.111823987116504e-06, "loss": 0.5795, "step": 8020 }, { "epoch": 0.51, "grad_norm": 0.8963019847869873, "learning_rate": 5.110798247712661e-06, "loss": 0.5901, "step": 8021 }, { "epoch": 0.51, "grad_norm": 0.9240871667861938, "learning_rate": 5.109772503643486e-06, "loss": 0.6433, "step": 8022 }, { "epoch": 0.51, "grad_norm": 0.8749609589576721, "learning_rate": 5.108746754952177e-06, "loss": 0.5391, "step": 8023 }, { "epoch": 0.51, "grad_norm": 0.906970202922821, "learning_rate": 5.107721001681915e-06, "loss": 0.6189, "step": 8024 }, { "epoch": 0.51, "grad_norm": 0.8912851214408875, "learning_rate": 5.1066952438759e-06, "loss": 0.5633, "step": 8025 }, { "epoch": 0.51, "grad_norm": 0.8463259339332581, "learning_rate": 5.105669481577319e-06, "loss": 0.6057, "step": 8026 }, { "epoch": 0.51, "grad_norm": 0.8847749829292297, "learning_rate": 5.104643714829362e-06, "loss": 0.6348, "step": 8027 }, { "epoch": 0.51, "grad_norm": 0.8036050796508789, "learning_rate": 5.103617943675224e-06, "loss": 0.586, "step": 8028 }, { "epoch": 0.51, "grad_norm": 0.8839384913444519, "learning_rate": 5.102592168158095e-06, "loss": 0.5924, "step": 8029 }, { "epoch": 0.51, "grad_norm": 0.9251484870910645, "learning_rate": 5.101566388321165e-06, "loss": 0.572, "step": 8030 }, { "epoch": 0.51, "grad_norm": 0.8279865980148315, "learning_rate": 5.100540604207629e-06, "loss": 0.5797, "step": 8031 }, { "epoch": 0.51, "grad_norm": 0.9217899441719055, "learning_rate": 5.099514815860678e-06, "loss": 0.5839, "step": 8032 }, { "epoch": 0.51, "grad_norm": 0.903213381767273, "learning_rate": 5.098489023323504e-06, "loss": 0.5239, "step": 8033 }, { "epoch": 0.51, "grad_norm": 0.8775154948234558, "learning_rate": 5.0974632266393e-06, "loss": 0.591, "step": 8034 }, { "epoch": 0.51, "grad_norm": 0.8651240468025208, "learning_rate": 5.0964374258512585e-06, "loss": 0.5767, "step": 8035 }, { "epoch": 0.51, "grad_norm": 0.975160539150238, "learning_rate": 5.0954116210025725e-06, "loss": 0.6185, "step": 8036 }, { "epoch": 0.51, "grad_norm": 0.9247754812240601, "learning_rate": 5.094385812136435e-06, "loss": 0.5795, "step": 8037 }, { "epoch": 0.51, "grad_norm": 0.8965883255004883, "learning_rate": 5.09335999929604e-06, "loss": 0.577, "step": 8038 }, { "epoch": 0.51, "grad_norm": 0.8666002750396729, "learning_rate": 5.092334182524578e-06, "loss": 0.5766, "step": 8039 }, { "epoch": 0.51, "grad_norm": 0.94881272315979, "learning_rate": 5.091308361865247e-06, "loss": 0.6627, "step": 8040 }, { "epoch": 0.51, "grad_norm": 0.8409824371337891, "learning_rate": 5.090282537361237e-06, "loss": 0.5406, "step": 8041 }, { "epoch": 0.51, "grad_norm": 0.9426827430725098, "learning_rate": 5.089256709055745e-06, "loss": 0.6425, "step": 8042 }, { "epoch": 0.51, "grad_norm": 0.925849199295044, "learning_rate": 5.088230876991962e-06, "loss": 0.5865, "step": 8043 }, { "epoch": 0.51, "grad_norm": 0.8730261325836182, "learning_rate": 5.087205041213085e-06, "loss": 0.6125, "step": 8044 }, { "epoch": 0.51, "grad_norm": 0.9450942277908325, "learning_rate": 5.086179201762306e-06, "loss": 0.6118, "step": 8045 }, { "epoch": 0.51, "grad_norm": 0.9059416055679321, "learning_rate": 5.085153358682822e-06, "loss": 0.5707, "step": 8046 }, { "epoch": 0.51, "grad_norm": 0.867950975894928, "learning_rate": 5.084127512017823e-06, "loss": 0.5792, "step": 8047 }, { "epoch": 0.51, "grad_norm": 0.8605546951293945, "learning_rate": 5.083101661810511e-06, "loss": 0.5895, "step": 8048 }, { "epoch": 0.51, "grad_norm": 0.9312983155250549, "learning_rate": 5.082075808104075e-06, "loss": 0.5799, "step": 8049 }, { "epoch": 0.51, "grad_norm": 0.8603020310401917, "learning_rate": 5.081049950941713e-06, "loss": 0.6026, "step": 8050 }, { "epoch": 0.51, "grad_norm": 0.8669036626815796, "learning_rate": 5.080024090366618e-06, "loss": 0.6017, "step": 8051 }, { "epoch": 0.51, "grad_norm": 0.9047536253929138, "learning_rate": 5.078998226421989e-06, "loss": 0.6222, "step": 8052 }, { "epoch": 0.51, "grad_norm": 0.9225742816925049, "learning_rate": 5.07797235915102e-06, "loss": 0.6049, "step": 8053 }, { "epoch": 0.51, "grad_norm": 0.9004045724868774, "learning_rate": 5.076946488596905e-06, "loss": 0.6042, "step": 8054 }, { "epoch": 0.51, "grad_norm": 0.9334387183189392, "learning_rate": 5.07592061480284e-06, "loss": 0.6317, "step": 8055 }, { "epoch": 0.51, "grad_norm": 0.890455424785614, "learning_rate": 5.074894737812023e-06, "loss": 0.5758, "step": 8056 }, { "epoch": 0.51, "grad_norm": 0.8868134021759033, "learning_rate": 5.07386885766765e-06, "loss": 0.6435, "step": 8057 }, { "epoch": 0.51, "grad_norm": 0.8172594904899597, "learning_rate": 5.072842974412916e-06, "loss": 0.5172, "step": 8058 }, { "epoch": 0.51, "grad_norm": 0.8145936727523804, "learning_rate": 5.071817088091017e-06, "loss": 0.5327, "step": 8059 }, { "epoch": 0.51, "grad_norm": 0.8804033994674683, "learning_rate": 5.0707911987451496e-06, "loss": 0.6371, "step": 8060 }, { "epoch": 0.51, "grad_norm": 0.9473575353622437, "learning_rate": 5.0697653064185125e-06, "loss": 0.6165, "step": 8061 }, { "epoch": 0.51, "grad_norm": 0.894706130027771, "learning_rate": 5.068739411154301e-06, "loss": 0.5932, "step": 8062 }, { "epoch": 0.51, "grad_norm": 0.8926814198493958, "learning_rate": 5.0677135129957115e-06, "loss": 0.6044, "step": 8063 }, { "epoch": 0.51, "grad_norm": 0.8846773505210876, "learning_rate": 5.066687611985941e-06, "loss": 0.5754, "step": 8064 }, { "epoch": 0.51, "grad_norm": 0.8815335631370544, "learning_rate": 5.065661708168188e-06, "loss": 0.5586, "step": 8065 }, { "epoch": 0.51, "grad_norm": 0.916784942150116, "learning_rate": 5.064635801585649e-06, "loss": 0.5575, "step": 8066 }, { "epoch": 0.51, "grad_norm": 0.8539628982543945, "learning_rate": 5.06360989228152e-06, "loss": 0.5518, "step": 8067 }, { "epoch": 0.51, "grad_norm": 0.9269511103630066, "learning_rate": 5.062583980299002e-06, "loss": 0.5955, "step": 8068 }, { "epoch": 0.51, "grad_norm": 0.856561541557312, "learning_rate": 5.061558065681288e-06, "loss": 0.5672, "step": 8069 }, { "epoch": 0.51, "grad_norm": 0.8841691017150879, "learning_rate": 5.060532148471578e-06, "loss": 0.6283, "step": 8070 }, { "epoch": 0.51, "grad_norm": 0.832876980304718, "learning_rate": 5.059506228713071e-06, "loss": 0.5384, "step": 8071 }, { "epoch": 0.51, "grad_norm": 0.8955254554748535, "learning_rate": 5.058480306448965e-06, "loss": 0.6119, "step": 8072 }, { "epoch": 0.51, "grad_norm": 0.8828347325325012, "learning_rate": 5.057454381722455e-06, "loss": 0.5756, "step": 8073 }, { "epoch": 0.51, "grad_norm": 0.9125185012817383, "learning_rate": 5.056428454576741e-06, "loss": 0.5958, "step": 8074 }, { "epoch": 0.51, "grad_norm": 0.8723667860031128, "learning_rate": 5.0554025250550195e-06, "loss": 0.6055, "step": 8075 }, { "epoch": 0.51, "grad_norm": 0.8208953738212585, "learning_rate": 5.054376593200493e-06, "loss": 0.5141, "step": 8076 }, { "epoch": 0.51, "grad_norm": 0.8895772695541382, "learning_rate": 5.053350659056356e-06, "loss": 0.5693, "step": 8077 }, { "epoch": 0.51, "grad_norm": 0.9235116243362427, "learning_rate": 5.052324722665809e-06, "loss": 0.6041, "step": 8078 }, { "epoch": 0.51, "grad_norm": 0.9034695625305176, "learning_rate": 5.0512987840720495e-06, "loss": 0.5887, "step": 8079 }, { "epoch": 0.51, "grad_norm": 0.9228042960166931, "learning_rate": 5.0502728433182765e-06, "loss": 0.5828, "step": 8080 }, { "epoch": 0.51, "grad_norm": 0.927101731300354, "learning_rate": 5.049246900447689e-06, "loss": 0.6073, "step": 8081 }, { "epoch": 0.51, "grad_norm": 0.8888689279556274, "learning_rate": 5.048220955503487e-06, "loss": 0.581, "step": 8082 }, { "epoch": 0.51, "grad_norm": 0.8097081184387207, "learning_rate": 5.047195008528868e-06, "loss": 0.5485, "step": 8083 }, { "epoch": 0.51, "grad_norm": 0.8781763315200806, "learning_rate": 5.04616905956703e-06, "loss": 0.555, "step": 8084 }, { "epoch": 0.51, "grad_norm": 0.8688362836837769, "learning_rate": 5.045143108661174e-06, "loss": 0.5991, "step": 8085 }, { "epoch": 0.51, "grad_norm": 0.8414211869239807, "learning_rate": 5.044117155854499e-06, "loss": 0.6283, "step": 8086 }, { "epoch": 0.51, "grad_norm": 0.7982466816902161, "learning_rate": 5.043091201190204e-06, "loss": 0.5701, "step": 8087 }, { "epoch": 0.51, "grad_norm": 0.8860836029052734, "learning_rate": 5.042065244711488e-06, "loss": 0.5306, "step": 8088 }, { "epoch": 0.51, "grad_norm": 0.8865799903869629, "learning_rate": 5.041039286461552e-06, "loss": 0.5947, "step": 8089 }, { "epoch": 0.51, "grad_norm": 0.8962934017181396, "learning_rate": 5.040013326483593e-06, "loss": 0.596, "step": 8090 }, { "epoch": 0.51, "grad_norm": 1.0645703077316284, "learning_rate": 5.038987364820813e-06, "loss": 0.65, "step": 8091 }, { "epoch": 0.51, "grad_norm": 0.8976729512214661, "learning_rate": 5.037961401516411e-06, "loss": 0.5364, "step": 8092 }, { "epoch": 0.51, "grad_norm": 0.8850423097610474, "learning_rate": 5.036935436613586e-06, "loss": 0.5901, "step": 8093 }, { "epoch": 0.51, "grad_norm": 0.9318758845329285, "learning_rate": 5.0359094701555375e-06, "loss": 0.5786, "step": 8094 }, { "epoch": 0.51, "grad_norm": 0.8748635649681091, "learning_rate": 5.034883502185467e-06, "loss": 0.5917, "step": 8095 }, { "epoch": 0.51, "grad_norm": 0.894095242023468, "learning_rate": 5.033857532746573e-06, "loss": 0.5896, "step": 8096 }, { "epoch": 0.51, "grad_norm": 0.8149279952049255, "learning_rate": 5.032831561882057e-06, "loss": 0.5542, "step": 8097 }, { "epoch": 0.51, "grad_norm": 0.852733850479126, "learning_rate": 5.0318055896351185e-06, "loss": 0.5888, "step": 8098 }, { "epoch": 0.51, "grad_norm": 0.7920023202896118, "learning_rate": 5.030779616048955e-06, "loss": 0.513, "step": 8099 }, { "epoch": 0.51, "grad_norm": 0.9614823460578918, "learning_rate": 5.02975364116677e-06, "loss": 0.594, "step": 8100 }, { "epoch": 0.51, "grad_norm": 0.9171684980392456, "learning_rate": 5.0287276650317626e-06, "loss": 0.5822, "step": 8101 }, { "epoch": 0.51, "grad_norm": 0.8981472849845886, "learning_rate": 5.027701687687135e-06, "loss": 0.601, "step": 8102 }, { "epoch": 0.51, "grad_norm": 0.8312231302261353, "learning_rate": 5.026675709176084e-06, "loss": 0.5113, "step": 8103 }, { "epoch": 0.51, "grad_norm": 0.915739893913269, "learning_rate": 5.0256497295418115e-06, "loss": 0.5998, "step": 8104 }, { "epoch": 0.51, "grad_norm": 0.9062038660049438, "learning_rate": 5.0246237488275185e-06, "loss": 0.5541, "step": 8105 }, { "epoch": 0.51, "grad_norm": 0.8854556679725647, "learning_rate": 5.0235977670764055e-06, "loss": 0.5467, "step": 8106 }, { "epoch": 0.51, "grad_norm": 0.8781667947769165, "learning_rate": 5.022571784331672e-06, "loss": 0.6031, "step": 8107 }, { "epoch": 0.51, "grad_norm": 0.8494471311569214, "learning_rate": 5.021545800636519e-06, "loss": 0.5906, "step": 8108 }, { "epoch": 0.51, "grad_norm": 0.8764198422431946, "learning_rate": 5.020519816034148e-06, "loss": 0.5826, "step": 8109 }, { "epoch": 0.51, "grad_norm": 0.9023407101631165, "learning_rate": 5.019493830567758e-06, "loss": 0.605, "step": 8110 }, { "epoch": 0.51, "grad_norm": 0.8451856374740601, "learning_rate": 5.018467844280553e-06, "loss": 0.5689, "step": 8111 }, { "epoch": 0.51, "grad_norm": 0.8786736130714417, "learning_rate": 5.0174418572157276e-06, "loss": 0.5584, "step": 8112 }, { "epoch": 0.51, "grad_norm": 0.8404189348220825, "learning_rate": 5.0164158694164884e-06, "loss": 0.5621, "step": 8113 }, { "epoch": 0.51, "grad_norm": 0.9702364802360535, "learning_rate": 5.015389880926035e-06, "loss": 0.614, "step": 8114 }, { "epoch": 0.51, "grad_norm": 0.8589154481887817, "learning_rate": 5.014363891787567e-06, "loss": 0.5671, "step": 8115 }, { "epoch": 0.51, "grad_norm": 0.9409849047660828, "learning_rate": 5.013337902044283e-06, "loss": 0.5915, "step": 8116 }, { "epoch": 0.51, "grad_norm": 1.0001648664474487, "learning_rate": 5.0123119117393894e-06, "loss": 0.5942, "step": 8117 }, { "epoch": 0.51, "grad_norm": 0.86786288022995, "learning_rate": 5.011285920916082e-06, "loss": 0.6033, "step": 8118 }, { "epoch": 0.51, "grad_norm": 0.8556507229804993, "learning_rate": 5.010259929617565e-06, "loss": 0.6032, "step": 8119 }, { "epoch": 0.51, "grad_norm": 0.9855061769485474, "learning_rate": 5.009233937887036e-06, "loss": 0.5679, "step": 8120 }, { "epoch": 0.51, "grad_norm": 0.8764082789421082, "learning_rate": 5.0082079457677e-06, "loss": 0.558, "step": 8121 }, { "epoch": 0.51, "grad_norm": 0.9630783796310425, "learning_rate": 5.007181953302755e-06, "loss": 0.6147, "step": 8122 }, { "epoch": 0.51, "grad_norm": 0.882135808467865, "learning_rate": 5.006155960535405e-06, "loss": 0.606, "step": 8123 }, { "epoch": 0.51, "grad_norm": 0.8694536685943604, "learning_rate": 5.005129967508845e-06, "loss": 0.6031, "step": 8124 }, { "epoch": 0.51, "grad_norm": 0.8778092265129089, "learning_rate": 5.004103974266284e-06, "loss": 0.5793, "step": 8125 }, { "epoch": 0.51, "grad_norm": 0.869263768196106, "learning_rate": 5.0030779808509155e-06, "loss": 0.5959, "step": 8126 }, { "epoch": 0.51, "grad_norm": 0.8371315598487854, "learning_rate": 5.002051987305947e-06, "loss": 0.5804, "step": 8127 }, { "epoch": 0.51, "grad_norm": 0.8696556091308594, "learning_rate": 5.0010259936745735e-06, "loss": 0.544, "step": 8128 }, { "epoch": 0.52, "grad_norm": 0.8770456314086914, "learning_rate": 5e-06, "loss": 0.6189, "step": 8129 }, { "epoch": 0.52, "grad_norm": 0.8599352240562439, "learning_rate": 4.998974006325428e-06, "loss": 0.5789, "step": 8130 }, { "epoch": 0.52, "grad_norm": 0.9081400036811829, "learning_rate": 4.997948012694056e-06, "loss": 0.5858, "step": 8131 }, { "epoch": 0.52, "grad_norm": 0.9235000014305115, "learning_rate": 4.9969220191490845e-06, "loss": 0.6132, "step": 8132 }, { "epoch": 0.52, "grad_norm": 0.8584170341491699, "learning_rate": 4.995896025733719e-06, "loss": 0.6016, "step": 8133 }, { "epoch": 0.52, "grad_norm": 0.9502587914466858, "learning_rate": 4.994870032491156e-06, "loss": 0.5802, "step": 8134 }, { "epoch": 0.52, "grad_norm": 0.8924700021743774, "learning_rate": 4.993844039464598e-06, "loss": 0.6686, "step": 8135 }, { "epoch": 0.52, "grad_norm": 0.8873922824859619, "learning_rate": 4.992818046697245e-06, "loss": 0.5937, "step": 8136 }, { "epoch": 0.52, "grad_norm": 0.9198696613311768, "learning_rate": 4.991792054232301e-06, "loss": 0.5419, "step": 8137 }, { "epoch": 0.52, "grad_norm": 0.8334248661994934, "learning_rate": 4.990766062112966e-06, "loss": 0.5722, "step": 8138 }, { "epoch": 0.52, "grad_norm": 0.8983075022697449, "learning_rate": 4.989740070382438e-06, "loss": 0.588, "step": 8139 }, { "epoch": 0.52, "grad_norm": 0.8269035220146179, "learning_rate": 4.988714079083918e-06, "loss": 0.5973, "step": 8140 }, { "epoch": 0.52, "grad_norm": 1.0999228954315186, "learning_rate": 4.987688088260613e-06, "loss": 0.5795, "step": 8141 }, { "epoch": 0.52, "grad_norm": 0.9255691766738892, "learning_rate": 4.986662097955718e-06, "loss": 0.5924, "step": 8142 }, { "epoch": 0.52, "grad_norm": 0.8680478930473328, "learning_rate": 4.985636108212435e-06, "loss": 0.6024, "step": 8143 }, { "epoch": 0.52, "grad_norm": 0.844215452671051, "learning_rate": 4.984610119073965e-06, "loss": 0.5356, "step": 8144 }, { "epoch": 0.52, "grad_norm": 0.8514224886894226, "learning_rate": 4.9835841305835115e-06, "loss": 0.5734, "step": 8145 }, { "epoch": 0.52, "grad_norm": 0.8678837418556213, "learning_rate": 4.982558142784273e-06, "loss": 0.6142, "step": 8146 }, { "epoch": 0.52, "grad_norm": 0.8894163966178894, "learning_rate": 4.98153215571945e-06, "loss": 0.6196, "step": 8147 }, { "epoch": 0.52, "grad_norm": 0.9071709513664246, "learning_rate": 4.980506169432243e-06, "loss": 0.5878, "step": 8148 }, { "epoch": 0.52, "grad_norm": 0.9013687372207642, "learning_rate": 4.979480183965852e-06, "loss": 0.6077, "step": 8149 }, { "epoch": 0.52, "grad_norm": 0.8970010876655579, "learning_rate": 4.9784541993634824e-06, "loss": 0.5885, "step": 8150 }, { "epoch": 0.52, "grad_norm": 0.9553268551826477, "learning_rate": 4.977428215668329e-06, "loss": 0.6642, "step": 8151 }, { "epoch": 0.52, "grad_norm": 0.8925964832305908, "learning_rate": 4.976402232923597e-06, "loss": 0.6099, "step": 8152 }, { "epoch": 0.52, "grad_norm": 0.9235319495201111, "learning_rate": 4.9753762511724815e-06, "loss": 0.548, "step": 8153 }, { "epoch": 0.52, "grad_norm": 0.8916828036308289, "learning_rate": 4.974350270458189e-06, "loss": 0.6115, "step": 8154 }, { "epoch": 0.52, "grad_norm": 0.8752048015594482, "learning_rate": 4.9733242908239175e-06, "loss": 0.6241, "step": 8155 }, { "epoch": 0.52, "grad_norm": 0.9507616758346558, "learning_rate": 4.972298312312867e-06, "loss": 0.6082, "step": 8156 }, { "epoch": 0.52, "grad_norm": 0.9458578824996948, "learning_rate": 4.9712723349682365e-06, "loss": 0.5945, "step": 8157 }, { "epoch": 0.52, "grad_norm": 0.8483637571334839, "learning_rate": 4.970246358833231e-06, "loss": 0.5418, "step": 8158 }, { "epoch": 0.52, "grad_norm": 0.8706662654876709, "learning_rate": 4.969220383951046e-06, "loss": 0.5383, "step": 8159 }, { "epoch": 0.52, "grad_norm": 0.843956470489502, "learning_rate": 4.968194410364884e-06, "loss": 0.5799, "step": 8160 }, { "epoch": 0.52, "grad_norm": 0.887324333190918, "learning_rate": 4.967168438117945e-06, "loss": 0.5922, "step": 8161 }, { "epoch": 0.52, "grad_norm": 0.8479996919631958, "learning_rate": 4.966142467253428e-06, "loss": 0.5402, "step": 8162 }, { "epoch": 0.52, "grad_norm": 0.906588077545166, "learning_rate": 4.965116497814534e-06, "loss": 0.6009, "step": 8163 }, { "epoch": 0.52, "grad_norm": 0.8441720008850098, "learning_rate": 4.964090529844464e-06, "loss": 0.5834, "step": 8164 }, { "epoch": 0.52, "grad_norm": 0.8537503480911255, "learning_rate": 4.963064563386416e-06, "loss": 0.5948, "step": 8165 }, { "epoch": 0.52, "grad_norm": 0.9106093049049377, "learning_rate": 4.96203859848359e-06, "loss": 0.5898, "step": 8166 }, { "epoch": 0.52, "grad_norm": 0.9316169619560242, "learning_rate": 4.961012635179188e-06, "loss": 0.5515, "step": 8167 }, { "epoch": 0.52, "grad_norm": 0.8743208646774292, "learning_rate": 4.959986673516408e-06, "loss": 0.5791, "step": 8168 }, { "epoch": 0.52, "grad_norm": 0.878601610660553, "learning_rate": 4.95896071353845e-06, "loss": 0.5654, "step": 8169 }, { "epoch": 0.52, "grad_norm": 0.9046491384506226, "learning_rate": 4.9579347552885125e-06, "loss": 0.6205, "step": 8170 }, { "epoch": 0.52, "grad_norm": 0.9015951156616211, "learning_rate": 4.956908798809797e-06, "loss": 0.6079, "step": 8171 }, { "epoch": 0.52, "grad_norm": 0.9551298022270203, "learning_rate": 4.955882844145503e-06, "loss": 0.6354, "step": 8172 }, { "epoch": 0.52, "grad_norm": 0.9143627882003784, "learning_rate": 4.954856891338827e-06, "loss": 0.6224, "step": 8173 }, { "epoch": 0.52, "grad_norm": 0.9006348252296448, "learning_rate": 4.95383094043297e-06, "loss": 0.5597, "step": 8174 }, { "epoch": 0.52, "grad_norm": 0.8101087808609009, "learning_rate": 4.952804991471134e-06, "loss": 0.5693, "step": 8175 }, { "epoch": 0.52, "grad_norm": 0.847748339176178, "learning_rate": 4.951779044496515e-06, "loss": 0.5625, "step": 8176 }, { "epoch": 0.52, "grad_norm": 0.950564980506897, "learning_rate": 4.9507530995523115e-06, "loss": 0.5894, "step": 8177 }, { "epoch": 0.52, "grad_norm": 0.8164709806442261, "learning_rate": 4.949727156681726e-06, "loss": 0.6046, "step": 8178 }, { "epoch": 0.52, "grad_norm": 0.887380838394165, "learning_rate": 4.948701215927951e-06, "loss": 0.5241, "step": 8179 }, { "epoch": 0.52, "grad_norm": 0.8414967060089111, "learning_rate": 4.947675277334193e-06, "loss": 0.5771, "step": 8180 }, { "epoch": 0.52, "grad_norm": 0.9173058867454529, "learning_rate": 4.946649340943645e-06, "loss": 0.6376, "step": 8181 }, { "epoch": 0.52, "grad_norm": 0.9363717436790466, "learning_rate": 4.9456234067995094e-06, "loss": 0.6236, "step": 8182 }, { "epoch": 0.52, "grad_norm": 0.8463205099105835, "learning_rate": 4.9445974749449805e-06, "loss": 0.555, "step": 8183 }, { "epoch": 0.52, "grad_norm": 0.8751280307769775, "learning_rate": 4.9435715454232615e-06, "loss": 0.584, "step": 8184 }, { "epoch": 0.52, "grad_norm": 0.9037527441978455, "learning_rate": 4.942545618277547e-06, "loss": 0.614, "step": 8185 }, { "epoch": 0.52, "grad_norm": 0.8870174884796143, "learning_rate": 4.9415196935510375e-06, "loss": 0.5755, "step": 8186 }, { "epoch": 0.52, "grad_norm": 0.9150660037994385, "learning_rate": 4.940493771286929e-06, "loss": 0.5779, "step": 8187 }, { "epoch": 0.52, "grad_norm": 0.8672343492507935, "learning_rate": 4.939467851528423e-06, "loss": 0.5905, "step": 8188 }, { "epoch": 0.52, "grad_norm": 0.8112956881523132, "learning_rate": 4.938441934318713e-06, "loss": 0.5317, "step": 8189 }, { "epoch": 0.52, "grad_norm": 0.8447852730751038, "learning_rate": 4.937416019701e-06, "loss": 0.5753, "step": 8190 }, { "epoch": 0.52, "grad_norm": 0.8433228135108948, "learning_rate": 4.93639010771848e-06, "loss": 0.5283, "step": 8191 }, { "epoch": 0.52, "grad_norm": 0.8930540084838867, "learning_rate": 4.9353641984143526e-06, "loss": 0.5907, "step": 8192 }, { "epoch": 0.52, "grad_norm": 0.8250675201416016, "learning_rate": 4.934338291831813e-06, "loss": 0.5775, "step": 8193 }, { "epoch": 0.52, "grad_norm": 0.8587763905525208, "learning_rate": 4.93331238801406e-06, "loss": 0.5706, "step": 8194 }, { "epoch": 0.52, "grad_norm": 0.9937714338302612, "learning_rate": 4.932286487004291e-06, "loss": 0.6685, "step": 8195 }, { "epoch": 0.52, "grad_norm": 0.8941221833229065, "learning_rate": 4.931260588845701e-06, "loss": 0.5856, "step": 8196 }, { "epoch": 0.52, "grad_norm": 0.8236309885978699, "learning_rate": 4.930234693581489e-06, "loss": 0.595, "step": 8197 }, { "epoch": 0.52, "grad_norm": 0.8598278760910034, "learning_rate": 4.929208801254851e-06, "loss": 0.5957, "step": 8198 }, { "epoch": 0.52, "grad_norm": 0.9491175413131714, "learning_rate": 4.928182911908987e-06, "loss": 0.6515, "step": 8199 }, { "epoch": 0.52, "grad_norm": 0.847444474697113, "learning_rate": 4.927157025587086e-06, "loss": 0.5541, "step": 8200 }, { "epoch": 0.52, "grad_norm": 0.9040679335594177, "learning_rate": 4.926131142332351e-06, "loss": 0.6053, "step": 8201 }, { "epoch": 0.52, "grad_norm": 0.8832661509513855, "learning_rate": 4.925105262187978e-06, "loss": 0.6243, "step": 8202 }, { "epoch": 0.52, "grad_norm": 0.8717993497848511, "learning_rate": 4.924079385197162e-06, "loss": 0.563, "step": 8203 }, { "epoch": 0.52, "grad_norm": 0.8877679705619812, "learning_rate": 4.923053511403096e-06, "loss": 0.6599, "step": 8204 }, { "epoch": 0.52, "grad_norm": 0.8722405433654785, "learning_rate": 4.922027640848981e-06, "loss": 0.5793, "step": 8205 }, { "epoch": 0.52, "grad_norm": 0.9440850019454956, "learning_rate": 4.921001773578012e-06, "loss": 0.6429, "step": 8206 }, { "epoch": 0.52, "grad_norm": 0.9616214632987976, "learning_rate": 4.9199759096333825e-06, "loss": 0.6532, "step": 8207 }, { "epoch": 0.52, "grad_norm": 0.8866004943847656, "learning_rate": 4.918950049058289e-06, "loss": 0.5907, "step": 8208 }, { "epoch": 0.52, "grad_norm": 0.8617315888404846, "learning_rate": 4.9179241918959255e-06, "loss": 0.6039, "step": 8209 }, { "epoch": 0.52, "grad_norm": 0.8040612936019897, "learning_rate": 4.916898338189491e-06, "loss": 0.5269, "step": 8210 }, { "epoch": 0.52, "grad_norm": 0.8695709705352783, "learning_rate": 4.9158724879821775e-06, "loss": 0.5651, "step": 8211 }, { "epoch": 0.52, "grad_norm": 0.8399918675422668, "learning_rate": 4.914846641317181e-06, "loss": 0.5193, "step": 8212 }, { "epoch": 0.52, "grad_norm": 0.8823307752609253, "learning_rate": 4.913820798237695e-06, "loss": 0.5814, "step": 8213 }, { "epoch": 0.52, "grad_norm": 0.9517965912818909, "learning_rate": 4.912794958786917e-06, "loss": 0.5904, "step": 8214 }, { "epoch": 0.52, "grad_norm": 0.9135156273841858, "learning_rate": 4.91176912300804e-06, "loss": 0.5795, "step": 8215 }, { "epoch": 0.52, "grad_norm": 1.0179460048675537, "learning_rate": 4.9107432909442575e-06, "loss": 0.5925, "step": 8216 }, { "epoch": 0.52, "grad_norm": 0.91028892993927, "learning_rate": 4.909717462638763e-06, "loss": 0.625, "step": 8217 }, { "epoch": 0.52, "grad_norm": 0.9520250558853149, "learning_rate": 4.908691638134754e-06, "loss": 0.6201, "step": 8218 }, { "epoch": 0.52, "grad_norm": 0.897201418876648, "learning_rate": 4.907665817475424e-06, "loss": 0.5532, "step": 8219 }, { "epoch": 0.52, "grad_norm": 0.8576155304908752, "learning_rate": 4.906640000703963e-06, "loss": 0.5918, "step": 8220 }, { "epoch": 0.52, "grad_norm": 0.8770981431007385, "learning_rate": 4.905614187863565e-06, "loss": 0.6275, "step": 8221 }, { "epoch": 0.52, "grad_norm": 1.008365273475647, "learning_rate": 4.904588378997428e-06, "loss": 0.6307, "step": 8222 }, { "epoch": 0.52, "grad_norm": 0.8657634258270264, "learning_rate": 4.903562574148744e-06, "loss": 0.6345, "step": 8223 }, { "epoch": 0.52, "grad_norm": 0.9766127467155457, "learning_rate": 4.902536773360702e-06, "loss": 0.5598, "step": 8224 }, { "epoch": 0.52, "grad_norm": 0.8664228916168213, "learning_rate": 4.9015109766764985e-06, "loss": 0.6031, "step": 8225 }, { "epoch": 0.52, "grad_norm": 0.8865102529525757, "learning_rate": 4.900485184139323e-06, "loss": 0.5766, "step": 8226 }, { "epoch": 0.52, "grad_norm": 0.9038271307945251, "learning_rate": 4.899459395792373e-06, "loss": 0.6025, "step": 8227 }, { "epoch": 0.52, "grad_norm": 0.8609294295310974, "learning_rate": 4.8984336116788355e-06, "loss": 0.5279, "step": 8228 }, { "epoch": 0.52, "grad_norm": 0.846961259841919, "learning_rate": 4.897407831841908e-06, "loss": 0.5631, "step": 8229 }, { "epoch": 0.52, "grad_norm": 0.8961449861526489, "learning_rate": 4.8963820563247765e-06, "loss": 0.5892, "step": 8230 }, { "epoch": 0.52, "grad_norm": 0.9013886451721191, "learning_rate": 4.8953562851706385e-06, "loss": 0.5458, "step": 8231 }, { "epoch": 0.52, "grad_norm": 0.8823043704032898, "learning_rate": 4.894330518422683e-06, "loss": 0.5935, "step": 8232 }, { "epoch": 0.52, "grad_norm": 0.8829339742660522, "learning_rate": 4.893304756124102e-06, "loss": 0.5716, "step": 8233 }, { "epoch": 0.52, "grad_norm": 0.8946317434310913, "learning_rate": 4.8922789983180854e-06, "loss": 0.6174, "step": 8234 }, { "epoch": 0.52, "grad_norm": 0.8930938839912415, "learning_rate": 4.891253245047826e-06, "loss": 0.5584, "step": 8235 }, { "epoch": 0.52, "grad_norm": 0.8877846002578735, "learning_rate": 4.890227496356515e-06, "loss": 0.5851, "step": 8236 }, { "epoch": 0.52, "grad_norm": 0.8552438616752625, "learning_rate": 4.889201752287342e-06, "loss": 0.5844, "step": 8237 }, { "epoch": 0.52, "grad_norm": 0.9162623882293701, "learning_rate": 4.888176012883496e-06, "loss": 0.6057, "step": 8238 }, { "epoch": 0.52, "grad_norm": 0.8288585543632507, "learning_rate": 4.88715027818817e-06, "loss": 0.5871, "step": 8239 }, { "epoch": 0.52, "grad_norm": 0.8896382451057434, "learning_rate": 4.886124548244555e-06, "loss": 0.6483, "step": 8240 }, { "epoch": 0.52, "grad_norm": 0.9036986231803894, "learning_rate": 4.885098823095838e-06, "loss": 0.609, "step": 8241 }, { "epoch": 0.52, "grad_norm": 0.828501284122467, "learning_rate": 4.884073102785209e-06, "loss": 0.5929, "step": 8242 }, { "epoch": 0.52, "grad_norm": 0.8982778191566467, "learning_rate": 4.883047387355858e-06, "loss": 0.5726, "step": 8243 }, { "epoch": 0.52, "grad_norm": 0.9407196640968323, "learning_rate": 4.882021676850977e-06, "loss": 0.5888, "step": 8244 }, { "epoch": 0.52, "grad_norm": 0.9057026505470276, "learning_rate": 4.880995971313752e-06, "loss": 0.5436, "step": 8245 }, { "epoch": 0.52, "grad_norm": 0.8921209573745728, "learning_rate": 4.879970270787372e-06, "loss": 0.6365, "step": 8246 }, { "epoch": 0.52, "grad_norm": 0.9471856951713562, "learning_rate": 4.878944575315025e-06, "loss": 0.5888, "step": 8247 }, { "epoch": 0.52, "grad_norm": 0.8575695753097534, "learning_rate": 4.877918884939903e-06, "loss": 0.599, "step": 8248 }, { "epoch": 0.52, "grad_norm": 0.9358868598937988, "learning_rate": 4.8768931997051925e-06, "loss": 0.5986, "step": 8249 }, { "epoch": 0.52, "grad_norm": 0.8470869660377502, "learning_rate": 4.8758675196540795e-06, "loss": 0.5713, "step": 8250 }, { "epoch": 0.52, "grad_norm": 0.8792859315872192, "learning_rate": 4.874841844829753e-06, "loss": 0.5646, "step": 8251 }, { "epoch": 0.52, "grad_norm": 0.870421826839447, "learning_rate": 4.873816175275402e-06, "loss": 0.5701, "step": 8252 }, { "epoch": 0.52, "grad_norm": 0.882820188999176, "learning_rate": 4.8727905110342135e-06, "loss": 0.6186, "step": 8253 }, { "epoch": 0.52, "grad_norm": 0.8869359493255615, "learning_rate": 4.871764852149373e-06, "loss": 0.6131, "step": 8254 }, { "epoch": 0.52, "grad_norm": 0.870141327381134, "learning_rate": 4.87073919866407e-06, "loss": 0.5999, "step": 8255 }, { "epoch": 0.52, "grad_norm": 0.8610088229179382, "learning_rate": 4.869713550621487e-06, "loss": 0.5949, "step": 8256 }, { "epoch": 0.52, "grad_norm": 0.8822341561317444, "learning_rate": 4.868687908064815e-06, "loss": 0.5805, "step": 8257 }, { "epoch": 0.52, "grad_norm": 0.8881772756576538, "learning_rate": 4.867662271037238e-06, "loss": 0.5319, "step": 8258 }, { "epoch": 0.52, "grad_norm": 0.9677478075027466, "learning_rate": 4.866636639581943e-06, "loss": 0.5925, "step": 8259 }, { "epoch": 0.52, "grad_norm": 0.8670486211776733, "learning_rate": 4.865611013742114e-06, "loss": 0.5811, "step": 8260 }, { "epoch": 0.52, "grad_norm": 0.8827394247055054, "learning_rate": 4.864585393560939e-06, "loss": 0.5945, "step": 8261 }, { "epoch": 0.52, "grad_norm": 0.9279113411903381, "learning_rate": 4.863559779081601e-06, "loss": 0.5824, "step": 8262 }, { "epoch": 0.52, "grad_norm": 0.8230646848678589, "learning_rate": 4.862534170347287e-06, "loss": 0.5946, "step": 8263 }, { "epoch": 0.52, "grad_norm": 0.8288192749023438, "learning_rate": 4.861508567401179e-06, "loss": 0.5486, "step": 8264 }, { "epoch": 0.52, "grad_norm": 0.882305383682251, "learning_rate": 4.860482970286465e-06, "loss": 0.5531, "step": 8265 }, { "epoch": 0.52, "grad_norm": 0.881271481513977, "learning_rate": 4.859457379046327e-06, "loss": 0.577, "step": 8266 }, { "epoch": 0.52, "grad_norm": 0.8755255937576294, "learning_rate": 4.858431793723952e-06, "loss": 0.5614, "step": 8267 }, { "epoch": 0.52, "grad_norm": 0.8271751999855042, "learning_rate": 4.857406214362518e-06, "loss": 0.5615, "step": 8268 }, { "epoch": 0.52, "grad_norm": 0.9304192066192627, "learning_rate": 4.856380641005215e-06, "loss": 0.5808, "step": 8269 }, { "epoch": 0.52, "grad_norm": 0.8733910918235779, "learning_rate": 4.855355073695223e-06, "loss": 0.6571, "step": 8270 }, { "epoch": 0.52, "grad_norm": 0.944700300693512, "learning_rate": 4.8543295124757265e-06, "loss": 0.5915, "step": 8271 }, { "epoch": 0.52, "grad_norm": 0.9210183024406433, "learning_rate": 4.8533039573899075e-06, "loss": 0.6014, "step": 8272 }, { "epoch": 0.52, "grad_norm": 0.8870010375976562, "learning_rate": 4.852278408480946e-06, "loss": 0.5976, "step": 8273 }, { "epoch": 0.52, "grad_norm": 1.0010098218917847, "learning_rate": 4.8512528657920275e-06, "loss": 0.5804, "step": 8274 }, { "epoch": 0.52, "grad_norm": 0.9052338600158691, "learning_rate": 4.850227329366335e-06, "loss": 0.6216, "step": 8275 }, { "epoch": 0.52, "grad_norm": 0.8478895425796509, "learning_rate": 4.849201799247049e-06, "loss": 0.5468, "step": 8276 }, { "epoch": 0.52, "grad_norm": 0.8541980981826782, "learning_rate": 4.848176275477348e-06, "loss": 0.5529, "step": 8277 }, { "epoch": 0.52, "grad_norm": 0.881534218788147, "learning_rate": 4.847150758100418e-06, "loss": 0.581, "step": 8278 }, { "epoch": 0.52, "grad_norm": 0.8824727535247803, "learning_rate": 4.846125247159437e-06, "loss": 0.5844, "step": 8279 }, { "epoch": 0.52, "grad_norm": 0.861589252948761, "learning_rate": 4.845099742697588e-06, "loss": 0.5607, "step": 8280 }, { "epoch": 0.52, "grad_norm": 0.8586124777793884, "learning_rate": 4.844074244758047e-06, "loss": 0.5151, "step": 8281 }, { "epoch": 0.52, "grad_norm": 0.9040012955665588, "learning_rate": 4.843048753383998e-06, "loss": 0.586, "step": 8282 }, { "epoch": 0.52, "grad_norm": 0.8967165350914001, "learning_rate": 4.8420232686186226e-06, "loss": 0.5654, "step": 8283 }, { "epoch": 0.52, "grad_norm": 0.8572660684585571, "learning_rate": 4.840997790505097e-06, "loss": 0.5538, "step": 8284 }, { "epoch": 0.52, "grad_norm": 0.859514594078064, "learning_rate": 4.8399723190866e-06, "loss": 0.5347, "step": 8285 }, { "epoch": 0.52, "grad_norm": 0.8236177563667297, "learning_rate": 4.838946854406311e-06, "loss": 0.5735, "step": 8286 }, { "epoch": 0.53, "grad_norm": 0.8584608435630798, "learning_rate": 4.8379213965074125e-06, "loss": 0.5974, "step": 8287 }, { "epoch": 0.53, "grad_norm": 0.8580573797225952, "learning_rate": 4.83689594543308e-06, "loss": 0.5857, "step": 8288 }, { "epoch": 0.53, "grad_norm": 0.898115873336792, "learning_rate": 4.835870501226489e-06, "loss": 0.6063, "step": 8289 }, { "epoch": 0.53, "grad_norm": 0.8824769258499146, "learning_rate": 4.834845063930821e-06, "loss": 0.5794, "step": 8290 }, { "epoch": 0.53, "grad_norm": 0.7949787378311157, "learning_rate": 4.833819633589254e-06, "loss": 0.5864, "step": 8291 }, { "epoch": 0.53, "grad_norm": 0.8064171671867371, "learning_rate": 4.832794210244965e-06, "loss": 0.5185, "step": 8292 }, { "epoch": 0.53, "grad_norm": 0.9789409041404724, "learning_rate": 4.831768793941129e-06, "loss": 0.6399, "step": 8293 }, { "epoch": 0.53, "grad_norm": 0.8709642887115479, "learning_rate": 4.830743384720922e-06, "loss": 0.5817, "step": 8294 }, { "epoch": 0.53, "grad_norm": 0.9149221181869507, "learning_rate": 4.829717982627525e-06, "loss": 0.5949, "step": 8295 }, { "epoch": 0.53, "grad_norm": 0.8690757751464844, "learning_rate": 4.82869258770411e-06, "loss": 0.6369, "step": 8296 }, { "epoch": 0.53, "grad_norm": 0.8303024172782898, "learning_rate": 4.827667199993855e-06, "loss": 0.5615, "step": 8297 }, { "epoch": 0.53, "grad_norm": 0.8637316226959229, "learning_rate": 4.826641819539933e-06, "loss": 0.557, "step": 8298 }, { "epoch": 0.53, "grad_norm": 0.8349990844726562, "learning_rate": 4.825616446385523e-06, "loss": 0.5814, "step": 8299 }, { "epoch": 0.53, "grad_norm": 0.8609099388122559, "learning_rate": 4.824591080573797e-06, "loss": 0.5872, "step": 8300 }, { "epoch": 0.53, "grad_norm": 0.92775958776474, "learning_rate": 4.823565722147932e-06, "loss": 0.6211, "step": 8301 }, { "epoch": 0.53, "grad_norm": 0.8916222453117371, "learning_rate": 4.8225403711511e-06, "loss": 0.5705, "step": 8302 }, { "epoch": 0.53, "grad_norm": 0.8630041480064392, "learning_rate": 4.821515027626473e-06, "loss": 0.5799, "step": 8303 }, { "epoch": 0.53, "grad_norm": 0.8404906988143921, "learning_rate": 4.8204896916172285e-06, "loss": 0.5419, "step": 8304 }, { "epoch": 0.53, "grad_norm": 0.8835939168930054, "learning_rate": 4.819464363166539e-06, "loss": 0.5335, "step": 8305 }, { "epoch": 0.53, "grad_norm": 0.9106584191322327, "learning_rate": 4.818439042317578e-06, "loss": 0.5901, "step": 8306 }, { "epoch": 0.53, "grad_norm": 0.8627772331237793, "learning_rate": 4.817413729113516e-06, "loss": 0.5799, "step": 8307 }, { "epoch": 0.53, "grad_norm": 0.9338002800941467, "learning_rate": 4.816388423597527e-06, "loss": 0.5736, "step": 8308 }, { "epoch": 0.53, "grad_norm": 0.9331300258636475, "learning_rate": 4.815363125812784e-06, "loss": 0.6421, "step": 8309 }, { "epoch": 0.53, "grad_norm": 0.8660625219345093, "learning_rate": 4.814337835802457e-06, "loss": 0.614, "step": 8310 }, { "epoch": 0.53, "grad_norm": 0.8572609424591064, "learning_rate": 4.813312553609716e-06, "loss": 0.5237, "step": 8311 }, { "epoch": 0.53, "grad_norm": 0.8259177207946777, "learning_rate": 4.812287279277735e-06, "loss": 0.5701, "step": 8312 }, { "epoch": 0.53, "grad_norm": 0.853283703327179, "learning_rate": 4.811262012849685e-06, "loss": 0.5947, "step": 8313 }, { "epoch": 0.53, "grad_norm": 0.885016143321991, "learning_rate": 4.810236754368735e-06, "loss": 0.6032, "step": 8314 }, { "epoch": 0.53, "grad_norm": 0.8650339841842651, "learning_rate": 4.8092115038780525e-06, "loss": 0.6111, "step": 8315 }, { "epoch": 0.53, "grad_norm": 0.8747149109840393, "learning_rate": 4.808186261420811e-06, "loss": 0.5894, "step": 8316 }, { "epoch": 0.53, "grad_norm": 0.8412078619003296, "learning_rate": 4.80716102704018e-06, "loss": 0.581, "step": 8317 }, { "epoch": 0.53, "grad_norm": 0.917317271232605, "learning_rate": 4.806135800779328e-06, "loss": 0.5797, "step": 8318 }, { "epoch": 0.53, "grad_norm": 0.8281989693641663, "learning_rate": 4.805110582681421e-06, "loss": 0.5697, "step": 8319 }, { "epoch": 0.53, "grad_norm": 0.9350634217262268, "learning_rate": 4.804085372789629e-06, "loss": 0.6051, "step": 8320 }, { "epoch": 0.53, "grad_norm": 0.9457853436470032, "learning_rate": 4.803060171147122e-06, "loss": 0.6187, "step": 8321 }, { "epoch": 0.53, "grad_norm": 0.9334213733673096, "learning_rate": 4.802034977797066e-06, "loss": 0.6349, "step": 8322 }, { "epoch": 0.53, "grad_norm": 0.8959923982620239, "learning_rate": 4.801009792782627e-06, "loss": 0.5949, "step": 8323 }, { "epoch": 0.53, "grad_norm": 0.8187436461448669, "learning_rate": 4.799984616146974e-06, "loss": 0.5693, "step": 8324 }, { "epoch": 0.53, "grad_norm": 0.896421492099762, "learning_rate": 4.798959447933274e-06, "loss": 0.6583, "step": 8325 }, { "epoch": 0.53, "grad_norm": 0.9043596386909485, "learning_rate": 4.797934288184692e-06, "loss": 0.5758, "step": 8326 }, { "epoch": 0.53, "grad_norm": 0.959918200969696, "learning_rate": 4.796909136944394e-06, "loss": 0.6453, "step": 8327 }, { "epoch": 0.53, "grad_norm": 0.855787992477417, "learning_rate": 4.795883994255544e-06, "loss": 0.5633, "step": 8328 }, { "epoch": 0.53, "grad_norm": 0.9476739764213562, "learning_rate": 4.794858860161311e-06, "loss": 0.674, "step": 8329 }, { "epoch": 0.53, "grad_norm": 0.8766798973083496, "learning_rate": 4.793833734704858e-06, "loss": 0.6058, "step": 8330 }, { "epoch": 0.53, "grad_norm": 0.8943171501159668, "learning_rate": 4.792808617929348e-06, "loss": 0.59, "step": 8331 }, { "epoch": 0.53, "grad_norm": 0.8322863578796387, "learning_rate": 4.791783509877948e-06, "loss": 0.5921, "step": 8332 }, { "epoch": 0.53, "grad_norm": 0.9394057393074036, "learning_rate": 4.790758410593818e-06, "loss": 0.6143, "step": 8333 }, { "epoch": 0.53, "grad_norm": 0.9464383721351624, "learning_rate": 4.789733320120124e-06, "loss": 0.5695, "step": 8334 }, { "epoch": 0.53, "grad_norm": 0.8929427266120911, "learning_rate": 4.788708238500029e-06, "loss": 0.5768, "step": 8335 }, { "epoch": 0.53, "grad_norm": 0.8872730731964111, "learning_rate": 4.787683165776695e-06, "loss": 0.5809, "step": 8336 }, { "epoch": 0.53, "grad_norm": 0.8962015509605408, "learning_rate": 4.786658101993283e-06, "loss": 0.6007, "step": 8337 }, { "epoch": 0.53, "grad_norm": 0.8641744256019592, "learning_rate": 4.785633047192959e-06, "loss": 0.5726, "step": 8338 }, { "epoch": 0.53, "grad_norm": 0.9444864988327026, "learning_rate": 4.7846080014188786e-06, "loss": 0.6105, "step": 8339 }, { "epoch": 0.53, "grad_norm": 0.8568362593650818, "learning_rate": 4.783582964714209e-06, "loss": 0.6058, "step": 8340 }, { "epoch": 0.53, "grad_norm": 0.8523517847061157, "learning_rate": 4.782557937122104e-06, "loss": 0.5627, "step": 8341 }, { "epoch": 0.53, "grad_norm": 0.9169915914535522, "learning_rate": 4.781532918685731e-06, "loss": 0.556, "step": 8342 }, { "epoch": 0.53, "grad_norm": 0.9116235375404358, "learning_rate": 4.780507909448246e-06, "loss": 0.6041, "step": 8343 }, { "epoch": 0.53, "grad_norm": 0.9121682047843933, "learning_rate": 4.77948290945281e-06, "loss": 0.5696, "step": 8344 }, { "epoch": 0.53, "grad_norm": 0.9193983674049377, "learning_rate": 4.778457918742579e-06, "loss": 0.5995, "step": 8345 }, { "epoch": 0.53, "grad_norm": 0.8698511123657227, "learning_rate": 4.777432937360716e-06, "loss": 0.6134, "step": 8346 }, { "epoch": 0.53, "grad_norm": 0.9621423482894897, "learning_rate": 4.776407965350378e-06, "loss": 0.5889, "step": 8347 }, { "epoch": 0.53, "grad_norm": 0.9202246069908142, "learning_rate": 4.775383002754723e-06, "loss": 0.6282, "step": 8348 }, { "epoch": 0.53, "grad_norm": 0.8784829378128052, "learning_rate": 4.7743580496169095e-06, "loss": 0.6325, "step": 8349 }, { "epoch": 0.53, "grad_norm": 0.8858938813209534, "learning_rate": 4.773333105980091e-06, "loss": 0.5691, "step": 8350 }, { "epoch": 0.53, "grad_norm": 0.8641536831855774, "learning_rate": 4.772308171887427e-06, "loss": 0.5179, "step": 8351 }, { "epoch": 0.53, "grad_norm": 0.9512357115745544, "learning_rate": 4.771283247382076e-06, "loss": 0.6028, "step": 8352 }, { "epoch": 0.53, "grad_norm": 0.852192223072052, "learning_rate": 4.770258332507191e-06, "loss": 0.5482, "step": 8353 }, { "epoch": 0.53, "grad_norm": 0.8949208855628967, "learning_rate": 4.7692334273059265e-06, "loss": 0.6007, "step": 8354 }, { "epoch": 0.53, "grad_norm": 0.9022393822669983, "learning_rate": 4.768208531821441e-06, "loss": 0.5518, "step": 8355 }, { "epoch": 0.53, "grad_norm": 0.8701500296592712, "learning_rate": 4.767183646096889e-06, "loss": 0.5991, "step": 8356 }, { "epoch": 0.53, "grad_norm": 0.8898680806159973, "learning_rate": 4.766158770175422e-06, "loss": 0.6007, "step": 8357 }, { "epoch": 0.53, "grad_norm": 0.8867197036743164, "learning_rate": 4.765133904100196e-06, "loss": 0.5388, "step": 8358 }, { "epoch": 0.53, "grad_norm": 0.9473680257797241, "learning_rate": 4.764109047914365e-06, "loss": 0.582, "step": 8359 }, { "epoch": 0.53, "grad_norm": 0.9777132272720337, "learning_rate": 4.763084201661081e-06, "loss": 0.5981, "step": 8360 }, { "epoch": 0.53, "grad_norm": 0.9255326390266418, "learning_rate": 4.762059365383497e-06, "loss": 0.6236, "step": 8361 }, { "epoch": 0.53, "grad_norm": 0.834649920463562, "learning_rate": 4.761034539124765e-06, "loss": 0.5596, "step": 8362 }, { "epoch": 0.53, "grad_norm": 0.970477819442749, "learning_rate": 4.760009722928038e-06, "loss": 0.6285, "step": 8363 }, { "epoch": 0.53, "grad_norm": 0.9015950560569763, "learning_rate": 4.7589849168364675e-06, "loss": 0.5778, "step": 8364 }, { "epoch": 0.53, "grad_norm": 0.9307251572608948, "learning_rate": 4.7579601208932015e-06, "loss": 0.6193, "step": 8365 }, { "epoch": 0.53, "grad_norm": 0.8972157835960388, "learning_rate": 4.756935335141395e-06, "loss": 0.5971, "step": 8366 }, { "epoch": 0.53, "grad_norm": 0.8648176193237305, "learning_rate": 4.755910559624194e-06, "loss": 0.5711, "step": 8367 }, { "epoch": 0.53, "grad_norm": 0.8773466348648071, "learning_rate": 4.754885794384752e-06, "loss": 0.5989, "step": 8368 }, { "epoch": 0.53, "grad_norm": 0.9213095903396606, "learning_rate": 4.7538610394662156e-06, "loss": 0.5402, "step": 8369 }, { "epoch": 0.53, "grad_norm": 0.9090799689292908, "learning_rate": 4.7528362949117355e-06, "loss": 0.5578, "step": 8370 }, { "epoch": 0.53, "grad_norm": 0.8713887929916382, "learning_rate": 4.751811560764457e-06, "loss": 0.5654, "step": 8371 }, { "epoch": 0.53, "grad_norm": 0.8781121373176575, "learning_rate": 4.750786837067532e-06, "loss": 0.5971, "step": 8372 }, { "epoch": 0.53, "grad_norm": 0.955092191696167, "learning_rate": 4.7497621238641055e-06, "loss": 0.612, "step": 8373 }, { "epoch": 0.53, "grad_norm": 0.9178210496902466, "learning_rate": 4.7487374211973266e-06, "loss": 0.6232, "step": 8374 }, { "epoch": 0.53, "grad_norm": 0.8522612452507019, "learning_rate": 4.747712729110339e-06, "loss": 0.5371, "step": 8375 }, { "epoch": 0.53, "grad_norm": 0.8494625687599182, "learning_rate": 4.746688047646293e-06, "loss": 0.5617, "step": 8376 }, { "epoch": 0.53, "grad_norm": 0.8461270332336426, "learning_rate": 4.745663376848331e-06, "loss": 0.5576, "step": 8377 }, { "epoch": 0.53, "grad_norm": 0.9114232659339905, "learning_rate": 4.744638716759599e-06, "loss": 0.6225, "step": 8378 }, { "epoch": 0.53, "grad_norm": 0.8230855464935303, "learning_rate": 4.743614067423245e-06, "loss": 0.6225, "step": 8379 }, { "epoch": 0.53, "grad_norm": 0.8851600885391235, "learning_rate": 4.742589428882406e-06, "loss": 0.5544, "step": 8380 }, { "epoch": 0.53, "grad_norm": 0.8462688326835632, "learning_rate": 4.7415648011802335e-06, "loss": 0.6182, "step": 8381 }, { "epoch": 0.53, "grad_norm": 0.8912700414657593, "learning_rate": 4.7405401843598686e-06, "loss": 0.5913, "step": 8382 }, { "epoch": 0.53, "grad_norm": 0.8330943584442139, "learning_rate": 4.739515578464454e-06, "loss": 0.5526, "step": 8383 }, { "epoch": 0.53, "grad_norm": 0.9202174544334412, "learning_rate": 4.73849098353713e-06, "loss": 0.6407, "step": 8384 }, { "epoch": 0.53, "grad_norm": 0.8993576169013977, "learning_rate": 4.737466399621043e-06, "loss": 0.5996, "step": 8385 }, { "epoch": 0.53, "grad_norm": 0.9567261934280396, "learning_rate": 4.736441826759332e-06, "loss": 0.5523, "step": 8386 }, { "epoch": 0.53, "grad_norm": 0.8996643424034119, "learning_rate": 4.73541726499514e-06, "loss": 0.5853, "step": 8387 }, { "epoch": 0.53, "grad_norm": 0.8818598389625549, "learning_rate": 4.734392714371603e-06, "loss": 0.6365, "step": 8388 }, { "epoch": 0.53, "grad_norm": 0.8730207681655884, "learning_rate": 4.733368174931867e-06, "loss": 0.5728, "step": 8389 }, { "epoch": 0.53, "grad_norm": 0.870194673538208, "learning_rate": 4.7323436467190705e-06, "loss": 0.549, "step": 8390 }, { "epoch": 0.53, "grad_norm": 0.9448916912078857, "learning_rate": 4.7313191297763524e-06, "loss": 0.5897, "step": 8391 }, { "epoch": 0.53, "grad_norm": 0.859308123588562, "learning_rate": 4.730294624146849e-06, "loss": 0.5922, "step": 8392 }, { "epoch": 0.53, "grad_norm": 0.9392966628074646, "learning_rate": 4.729270129873701e-06, "loss": 0.5768, "step": 8393 }, { "epoch": 0.53, "grad_norm": 0.919874370098114, "learning_rate": 4.728245647000047e-06, "loss": 0.5809, "step": 8394 }, { "epoch": 0.53, "grad_norm": 0.8396472930908203, "learning_rate": 4.7272211755690245e-06, "loss": 0.6086, "step": 8395 }, { "epoch": 0.53, "grad_norm": 0.8304100632667542, "learning_rate": 4.7261967156237676e-06, "loss": 0.5668, "step": 8396 }, { "epoch": 0.53, "grad_norm": 0.856200098991394, "learning_rate": 4.725172267207413e-06, "loss": 0.5228, "step": 8397 }, { "epoch": 0.53, "grad_norm": 0.8551792502403259, "learning_rate": 4.724147830363101e-06, "loss": 0.5902, "step": 8398 }, { "epoch": 0.53, "grad_norm": 0.8715303540229797, "learning_rate": 4.723123405133965e-06, "loss": 0.6097, "step": 8399 }, { "epoch": 0.53, "grad_norm": 0.8818318843841553, "learning_rate": 4.722098991563137e-06, "loss": 0.584, "step": 8400 }, { "epoch": 0.53, "grad_norm": 0.8724188804626465, "learning_rate": 4.721074589693753e-06, "loss": 0.5802, "step": 8401 }, { "epoch": 0.53, "grad_norm": 0.8455575108528137, "learning_rate": 4.72005019956895e-06, "loss": 0.5751, "step": 8402 }, { "epoch": 0.53, "grad_norm": 0.8873419165611267, "learning_rate": 4.719025821231859e-06, "loss": 0.5904, "step": 8403 }, { "epoch": 0.53, "grad_norm": 0.9218294620513916, "learning_rate": 4.718001454725612e-06, "loss": 0.5189, "step": 8404 }, { "epoch": 0.53, "grad_norm": 0.9355472326278687, "learning_rate": 4.716977100093342e-06, "loss": 0.6187, "step": 8405 }, { "epoch": 0.53, "grad_norm": 0.881987988948822, "learning_rate": 4.715952757378183e-06, "loss": 0.5762, "step": 8406 }, { "epoch": 0.53, "grad_norm": 0.9212351441383362, "learning_rate": 4.714928426623266e-06, "loss": 0.5961, "step": 8407 }, { "epoch": 0.53, "grad_norm": 0.890076220035553, "learning_rate": 4.71390410787172e-06, "loss": 0.6016, "step": 8408 }, { "epoch": 0.53, "grad_norm": 0.9456012845039368, "learning_rate": 4.712879801166676e-06, "loss": 0.5956, "step": 8409 }, { "epoch": 0.53, "grad_norm": 0.9153468012809753, "learning_rate": 4.711855506551267e-06, "loss": 0.6155, "step": 8410 }, { "epoch": 0.53, "grad_norm": 0.9316279292106628, "learning_rate": 4.71083122406862e-06, "loss": 0.5859, "step": 8411 }, { "epoch": 0.53, "grad_norm": 0.879622220993042, "learning_rate": 4.709806953761863e-06, "loss": 0.6248, "step": 8412 }, { "epoch": 0.53, "grad_norm": 0.8345575928688049, "learning_rate": 4.7087826956741266e-06, "loss": 0.6002, "step": 8413 }, { "epoch": 0.53, "grad_norm": 0.8680174946784973, "learning_rate": 4.707758449848536e-06, "loss": 0.5105, "step": 8414 }, { "epoch": 0.53, "grad_norm": 0.9455978870391846, "learning_rate": 4.7067342163282225e-06, "loss": 0.571, "step": 8415 }, { "epoch": 0.53, "grad_norm": 0.828173816204071, "learning_rate": 4.70570999515631e-06, "loss": 0.5763, "step": 8416 }, { "epoch": 0.53, "grad_norm": 0.9333354234695435, "learning_rate": 4.704685786375927e-06, "loss": 0.5924, "step": 8417 }, { "epoch": 0.53, "grad_norm": 0.9066340923309326, "learning_rate": 4.703661590030196e-06, "loss": 0.5833, "step": 8418 }, { "epoch": 0.53, "grad_norm": 0.8967267274856567, "learning_rate": 4.702637406162247e-06, "loss": 0.6445, "step": 8419 }, { "epoch": 0.53, "grad_norm": 0.8336849808692932, "learning_rate": 4.7016132348152e-06, "loss": 0.5238, "step": 8420 }, { "epoch": 0.53, "grad_norm": 1.0905916690826416, "learning_rate": 4.700589076032184e-06, "loss": 0.5929, "step": 8421 }, { "epoch": 0.53, "grad_norm": 0.8906887173652649, "learning_rate": 4.699564929856318e-06, "loss": 0.6375, "step": 8422 }, { "epoch": 0.53, "grad_norm": 0.8552356362342834, "learning_rate": 4.698540796330729e-06, "loss": 0.5987, "step": 8423 }, { "epoch": 0.53, "grad_norm": 0.8900651931762695, "learning_rate": 4.697516675498538e-06, "loss": 0.5935, "step": 8424 }, { "epoch": 0.53, "grad_norm": 0.9135156869888306, "learning_rate": 4.69649256740287e-06, "loss": 0.5729, "step": 8425 }, { "epoch": 0.53, "grad_norm": 0.9399777054786682, "learning_rate": 4.695468472086841e-06, "loss": 0.6642, "step": 8426 }, { "epoch": 0.53, "grad_norm": 0.9039340019226074, "learning_rate": 4.694444389593576e-06, "loss": 0.5794, "step": 8427 }, { "epoch": 0.53, "grad_norm": 0.8976691961288452, "learning_rate": 4.693420319966195e-06, "loss": 0.6221, "step": 8428 }, { "epoch": 0.53, "grad_norm": 0.8583334684371948, "learning_rate": 4.692396263247818e-06, "loss": 0.6189, "step": 8429 }, { "epoch": 0.53, "grad_norm": 0.8833329677581787, "learning_rate": 4.691372219481564e-06, "loss": 0.5687, "step": 8430 }, { "epoch": 0.53, "grad_norm": 0.8784264326095581, "learning_rate": 4.690348188710552e-06, "loss": 0.6358, "step": 8431 }, { "epoch": 0.53, "grad_norm": 0.8706404566764832, "learning_rate": 4.689324170977901e-06, "loss": 0.5894, "step": 8432 }, { "epoch": 0.53, "grad_norm": 0.82457035779953, "learning_rate": 4.688300166326729e-06, "loss": 0.5753, "step": 8433 }, { "epoch": 0.53, "grad_norm": 0.8955191969871521, "learning_rate": 4.6872761748001515e-06, "loss": 0.5895, "step": 8434 }, { "epoch": 0.53, "grad_norm": 0.8949235081672668, "learning_rate": 4.6862521964412865e-06, "loss": 0.6284, "step": 8435 }, { "epoch": 0.53, "grad_norm": 0.9302405714988708, "learning_rate": 4.6852282312932505e-06, "loss": 0.6205, "step": 8436 }, { "epoch": 0.53, "grad_norm": 0.9242597818374634, "learning_rate": 4.684204279399159e-06, "loss": 0.6397, "step": 8437 }, { "epoch": 0.53, "grad_norm": 0.907974898815155, "learning_rate": 4.683180340802126e-06, "loss": 0.6082, "step": 8438 }, { "epoch": 0.53, "grad_norm": 0.8790863752365112, "learning_rate": 4.682156415545266e-06, "loss": 0.5588, "step": 8439 }, { "epoch": 0.53, "grad_norm": 0.8933101892471313, "learning_rate": 4.681132503671696e-06, "loss": 0.5786, "step": 8440 }, { "epoch": 0.53, "grad_norm": 0.9507586359977722, "learning_rate": 4.680108605224526e-06, "loss": 0.5959, "step": 8441 }, { "epoch": 0.53, "grad_norm": 0.8982723355293274, "learning_rate": 4.679084720246869e-06, "loss": 0.587, "step": 8442 }, { "epoch": 0.53, "grad_norm": 0.8750470280647278, "learning_rate": 4.67806084878184e-06, "loss": 0.5799, "step": 8443 }, { "epoch": 0.53, "grad_norm": 0.859678328037262, "learning_rate": 4.677036990872546e-06, "loss": 0.565, "step": 8444 }, { "epoch": 0.54, "grad_norm": 0.8528252243995667, "learning_rate": 4.676013146562103e-06, "loss": 0.5786, "step": 8445 }, { "epoch": 0.54, "grad_norm": 1.003036379814148, "learning_rate": 4.674989315893618e-06, "loss": 0.6294, "step": 8446 }, { "epoch": 0.54, "grad_norm": 0.849640429019928, "learning_rate": 4.6739654989102034e-06, "loss": 0.5834, "step": 8447 }, { "epoch": 0.54, "grad_norm": 0.9035535454750061, "learning_rate": 4.672941695654965e-06, "loss": 0.5989, "step": 8448 }, { "epoch": 0.54, "grad_norm": 0.8059716820716858, "learning_rate": 4.6719179061710164e-06, "loss": 0.5307, "step": 8449 }, { "epoch": 0.54, "grad_norm": 0.8504626154899597, "learning_rate": 4.670894130501462e-06, "loss": 0.5043, "step": 8450 }, { "epoch": 0.54, "grad_norm": 0.9729040861129761, "learning_rate": 4.669870368689414e-06, "loss": 0.5991, "step": 8451 }, { "epoch": 0.54, "grad_norm": 0.9525192975997925, "learning_rate": 4.668846620777972e-06, "loss": 0.6014, "step": 8452 }, { "epoch": 0.54, "grad_norm": 0.8673512935638428, "learning_rate": 4.6678228868102495e-06, "loss": 0.6118, "step": 8453 }, { "epoch": 0.54, "grad_norm": 0.8499407172203064, "learning_rate": 4.666799166829349e-06, "loss": 0.5924, "step": 8454 }, { "epoch": 0.54, "grad_norm": 0.930546224117279, "learning_rate": 4.665775460878377e-06, "loss": 0.5947, "step": 8455 }, { "epoch": 0.54, "grad_norm": 0.8475348949432373, "learning_rate": 4.664751769000436e-06, "loss": 0.5222, "step": 8456 }, { "epoch": 0.54, "grad_norm": 0.9042877554893494, "learning_rate": 4.663728091238634e-06, "loss": 0.58, "step": 8457 }, { "epoch": 0.54, "grad_norm": 0.8196518421173096, "learning_rate": 4.662704427636071e-06, "loss": 0.5491, "step": 8458 }, { "epoch": 0.54, "grad_norm": 0.9162909984588623, "learning_rate": 4.661680778235852e-06, "loss": 0.5964, "step": 8459 }, { "epoch": 0.54, "grad_norm": 0.907317042350769, "learning_rate": 4.660657143081079e-06, "loss": 0.6112, "step": 8460 }, { "epoch": 0.54, "grad_norm": 0.9425126314163208, "learning_rate": 4.65963352221485e-06, "loss": 0.591, "step": 8461 }, { "epoch": 0.54, "grad_norm": 0.8683360815048218, "learning_rate": 4.658609915680272e-06, "loss": 0.5176, "step": 8462 }, { "epoch": 0.54, "grad_norm": 0.8283640742301941, "learning_rate": 4.657586323520443e-06, "loss": 0.5724, "step": 8463 }, { "epoch": 0.54, "grad_norm": 0.873866617679596, "learning_rate": 4.6565627457784625e-06, "loss": 0.5928, "step": 8464 }, { "epoch": 0.54, "grad_norm": 0.8793148398399353, "learning_rate": 4.655539182497428e-06, "loss": 0.5796, "step": 8465 }, { "epoch": 0.54, "grad_norm": 0.8088488578796387, "learning_rate": 4.654515633720442e-06, "loss": 0.4934, "step": 8466 }, { "epoch": 0.54, "grad_norm": 0.8243443369865417, "learning_rate": 4.653492099490601e-06, "loss": 0.5183, "step": 8467 }, { "epoch": 0.54, "grad_norm": 0.9187846779823303, "learning_rate": 4.6524685798510025e-06, "loss": 0.6225, "step": 8468 }, { "epoch": 0.54, "grad_norm": 0.8890441060066223, "learning_rate": 4.651445074844742e-06, "loss": 0.5769, "step": 8469 }, { "epoch": 0.54, "grad_norm": 0.9298631548881531, "learning_rate": 4.650421584514917e-06, "loss": 0.5943, "step": 8470 }, { "epoch": 0.54, "grad_norm": 0.8094522356987, "learning_rate": 4.649398108904624e-06, "loss": 0.5371, "step": 8471 }, { "epoch": 0.54, "grad_norm": 0.8985278606414795, "learning_rate": 4.648374648056957e-06, "loss": 0.5962, "step": 8472 }, { "epoch": 0.54, "grad_norm": 0.870664656162262, "learning_rate": 4.64735120201501e-06, "loss": 0.5929, "step": 8473 }, { "epoch": 0.54, "grad_norm": 0.8985655307769775, "learning_rate": 4.646327770821875e-06, "loss": 0.6354, "step": 8474 }, { "epoch": 0.54, "grad_norm": 0.8748487830162048, "learning_rate": 4.64530435452065e-06, "loss": 0.5795, "step": 8475 }, { "epoch": 0.54, "grad_norm": 0.8997553586959839, "learning_rate": 4.644280953154424e-06, "loss": 0.6348, "step": 8476 }, { "epoch": 0.54, "grad_norm": 0.9536176323890686, "learning_rate": 4.643257566766289e-06, "loss": 0.5866, "step": 8477 }, { "epoch": 0.54, "grad_norm": 0.8656853437423706, "learning_rate": 4.642234195399336e-06, "loss": 0.5839, "step": 8478 }, { "epoch": 0.54, "grad_norm": 0.8885663151741028, "learning_rate": 4.641210839096659e-06, "loss": 0.5734, "step": 8479 }, { "epoch": 0.54, "grad_norm": 0.9137561917304993, "learning_rate": 4.6401874979013455e-06, "loss": 0.5855, "step": 8480 }, { "epoch": 0.54, "grad_norm": 0.8827475905418396, "learning_rate": 4.639164171856483e-06, "loss": 0.6211, "step": 8481 }, { "epoch": 0.54, "grad_norm": 0.9084077477455139, "learning_rate": 4.6381408610051605e-06, "loss": 0.5965, "step": 8482 }, { "epoch": 0.54, "grad_norm": 0.9235100746154785, "learning_rate": 4.63711756539047e-06, "loss": 0.6093, "step": 8483 }, { "epoch": 0.54, "grad_norm": 0.8328654170036316, "learning_rate": 4.636094285055497e-06, "loss": 0.5547, "step": 8484 }, { "epoch": 0.54, "grad_norm": 0.8300716280937195, "learning_rate": 4.635071020043326e-06, "loss": 0.534, "step": 8485 }, { "epoch": 0.54, "grad_norm": 0.914543628692627, "learning_rate": 4.634047770397044e-06, "loss": 0.5686, "step": 8486 }, { "epoch": 0.54, "grad_norm": 0.9101009964942932, "learning_rate": 4.633024536159739e-06, "loss": 0.5694, "step": 8487 }, { "epoch": 0.54, "grad_norm": 0.8731689453125, "learning_rate": 4.632001317374495e-06, "loss": 0.5888, "step": 8488 }, { "epoch": 0.54, "grad_norm": 0.8360764384269714, "learning_rate": 4.630978114084394e-06, "loss": 0.5559, "step": 8489 }, { "epoch": 0.54, "grad_norm": 0.956150233745575, "learning_rate": 4.629954926332522e-06, "loss": 0.6186, "step": 8490 }, { "epoch": 0.54, "grad_norm": 0.9069817066192627, "learning_rate": 4.628931754161959e-06, "loss": 0.5812, "step": 8491 }, { "epoch": 0.54, "grad_norm": 0.8588123917579651, "learning_rate": 4.62790859761579e-06, "loss": 0.5806, "step": 8492 }, { "epoch": 0.54, "grad_norm": 0.9087151288986206, "learning_rate": 4.626885456737095e-06, "loss": 0.6061, "step": 8493 }, { "epoch": 0.54, "grad_norm": 0.9100707173347473, "learning_rate": 4.625862331568957e-06, "loss": 0.5807, "step": 8494 }, { "epoch": 0.54, "grad_norm": 0.9260814785957336, "learning_rate": 4.624839222154453e-06, "loss": 0.5917, "step": 8495 }, { "epoch": 0.54, "grad_norm": 0.9124268293380737, "learning_rate": 4.623816128536665e-06, "loss": 0.5771, "step": 8496 }, { "epoch": 0.54, "grad_norm": 0.9149200320243835, "learning_rate": 4.6227930507586705e-06, "loss": 0.5622, "step": 8497 }, { "epoch": 0.54, "grad_norm": 0.9365261197090149, "learning_rate": 4.62176998886355e-06, "loss": 0.6093, "step": 8498 }, { "epoch": 0.54, "grad_norm": 0.8503932952880859, "learning_rate": 4.620746942894377e-06, "loss": 0.6117, "step": 8499 }, { "epoch": 0.54, "grad_norm": 0.8979615569114685, "learning_rate": 4.619723912894232e-06, "loss": 0.5852, "step": 8500 }, { "epoch": 0.54, "grad_norm": 0.8997284770011902, "learning_rate": 4.618700898906191e-06, "loss": 0.5506, "step": 8501 }, { "epoch": 0.54, "grad_norm": 0.8395345211029053, "learning_rate": 4.6176779009733295e-06, "loss": 0.5371, "step": 8502 }, { "epoch": 0.54, "grad_norm": 0.8006191253662109, "learning_rate": 4.616654919138719e-06, "loss": 0.5285, "step": 8503 }, { "epoch": 0.54, "grad_norm": 0.9190979599952698, "learning_rate": 4.6156319534454365e-06, "loss": 0.6529, "step": 8504 }, { "epoch": 0.54, "grad_norm": 0.875033438205719, "learning_rate": 4.614609003936558e-06, "loss": 0.5774, "step": 8505 }, { "epoch": 0.54, "grad_norm": 0.8436771035194397, "learning_rate": 4.613586070655152e-06, "loss": 0.5751, "step": 8506 }, { "epoch": 0.54, "grad_norm": 0.8874161243438721, "learning_rate": 4.612563153644292e-06, "loss": 0.581, "step": 8507 }, { "epoch": 0.54, "grad_norm": 0.8197293281555176, "learning_rate": 4.6115402529470495e-06, "loss": 0.6048, "step": 8508 }, { "epoch": 0.54, "grad_norm": 0.901355504989624, "learning_rate": 4.610517368606497e-06, "loss": 0.5648, "step": 8509 }, { "epoch": 0.54, "grad_norm": 0.8736656308174133, "learning_rate": 4.609494500665703e-06, "loss": 0.5775, "step": 8510 }, { "epoch": 0.54, "grad_norm": 0.9123381972312927, "learning_rate": 4.608471649167737e-06, "loss": 0.5824, "step": 8511 }, { "epoch": 0.54, "grad_norm": 0.895682156085968, "learning_rate": 4.6074488141556656e-06, "loss": 0.6338, "step": 8512 }, { "epoch": 0.54, "grad_norm": 0.8595967292785645, "learning_rate": 4.606425995672562e-06, "loss": 0.5794, "step": 8513 }, { "epoch": 0.54, "grad_norm": 0.8449206948280334, "learning_rate": 4.605403193761489e-06, "loss": 0.5957, "step": 8514 }, { "epoch": 0.54, "grad_norm": 0.8243349194526672, "learning_rate": 4.604380408465516e-06, "loss": 0.55, "step": 8515 }, { "epoch": 0.54, "grad_norm": 0.8854864239692688, "learning_rate": 4.603357639827705e-06, "loss": 0.6184, "step": 8516 }, { "epoch": 0.54, "grad_norm": 0.9017980098724365, "learning_rate": 4.602334887891127e-06, "loss": 0.6258, "step": 8517 }, { "epoch": 0.54, "grad_norm": 0.9609394669532776, "learning_rate": 4.601312152698843e-06, "loss": 0.6221, "step": 8518 }, { "epoch": 0.54, "grad_norm": 0.9184016585350037, "learning_rate": 4.600289434293917e-06, "loss": 0.5955, "step": 8519 }, { "epoch": 0.54, "grad_norm": 0.9105634093284607, "learning_rate": 4.599266732719413e-06, "loss": 0.5936, "step": 8520 }, { "epoch": 0.54, "grad_norm": 0.8601149320602417, "learning_rate": 4.598244048018391e-06, "loss": 0.5765, "step": 8521 }, { "epoch": 0.54, "grad_norm": 0.8680559396743774, "learning_rate": 4.5972213802339165e-06, "loss": 0.6048, "step": 8522 }, { "epoch": 0.54, "grad_norm": 0.8764021396636963, "learning_rate": 4.596198729409047e-06, "loss": 0.6259, "step": 8523 }, { "epoch": 0.54, "grad_norm": 1.0313016176223755, "learning_rate": 4.5951760955868455e-06, "loss": 0.5857, "step": 8524 }, { "epoch": 0.54, "grad_norm": 0.9298897385597229, "learning_rate": 4.594153478810368e-06, "loss": 0.603, "step": 8525 }, { "epoch": 0.54, "grad_norm": 0.904453456401825, "learning_rate": 4.593130879122678e-06, "loss": 0.5608, "step": 8526 }, { "epoch": 0.54, "grad_norm": 0.9432054162025452, "learning_rate": 4.59210829656683e-06, "loss": 0.5982, "step": 8527 }, { "epoch": 0.54, "grad_norm": 0.8348836302757263, "learning_rate": 4.591085731185885e-06, "loss": 0.5458, "step": 8528 }, { "epoch": 0.54, "grad_norm": 0.9127042293548584, "learning_rate": 4.590063183022894e-06, "loss": 0.5765, "step": 8529 }, { "epoch": 0.54, "grad_norm": 0.8295519351959229, "learning_rate": 4.589040652120919e-06, "loss": 0.5914, "step": 8530 }, { "epoch": 0.54, "grad_norm": 0.9889672994613647, "learning_rate": 4.588018138523011e-06, "loss": 0.5926, "step": 8531 }, { "epoch": 0.54, "grad_norm": 0.8858618140220642, "learning_rate": 4.5869956422722274e-06, "loss": 0.5637, "step": 8532 }, { "epoch": 0.54, "grad_norm": 0.8900063633918762, "learning_rate": 4.585973163411618e-06, "loss": 0.5885, "step": 8533 }, { "epoch": 0.54, "grad_norm": 0.8373422026634216, "learning_rate": 4.584950701984241e-06, "loss": 0.56, "step": 8534 }, { "epoch": 0.54, "grad_norm": 0.8420644998550415, "learning_rate": 4.583928258033145e-06, "loss": 0.5761, "step": 8535 }, { "epoch": 0.54, "grad_norm": 0.8812116980552673, "learning_rate": 4.5829058316013835e-06, "loss": 0.5584, "step": 8536 }, { "epoch": 0.54, "grad_norm": 0.967736542224884, "learning_rate": 4.581883422732007e-06, "loss": 0.615, "step": 8537 }, { "epoch": 0.54, "grad_norm": 0.8752156496047974, "learning_rate": 4.580861031468062e-06, "loss": 0.5622, "step": 8538 }, { "epoch": 0.54, "grad_norm": 0.845308780670166, "learning_rate": 4.579838657852603e-06, "loss": 0.5925, "step": 8539 }, { "epoch": 0.54, "grad_norm": 0.8537322282791138, "learning_rate": 4.578816301928677e-06, "loss": 0.5804, "step": 8540 }, { "epoch": 0.54, "grad_norm": 0.8931176066398621, "learning_rate": 4.577793963739331e-06, "loss": 0.5816, "step": 8541 }, { "epoch": 0.54, "grad_norm": 0.855497419834137, "learning_rate": 4.576771643327611e-06, "loss": 0.5514, "step": 8542 }, { "epoch": 0.54, "grad_norm": 0.8563072681427002, "learning_rate": 4.575749340736565e-06, "loss": 0.5706, "step": 8543 }, { "epoch": 0.54, "grad_norm": 0.8625338673591614, "learning_rate": 4.57472705600924e-06, "loss": 0.5584, "step": 8544 }, { "epoch": 0.54, "grad_norm": 0.9388693571090698, "learning_rate": 4.573704789188679e-06, "loss": 0.6424, "step": 8545 }, { "epoch": 0.54, "grad_norm": 0.8577854633331299, "learning_rate": 4.5726825403179245e-06, "loss": 0.6327, "step": 8546 }, { "epoch": 0.54, "grad_norm": 0.8097984194755554, "learning_rate": 4.571660309440022e-06, "loss": 0.5456, "step": 8547 }, { "epoch": 0.54, "grad_norm": 0.9322377443313599, "learning_rate": 4.570638096598016e-06, "loss": 0.6238, "step": 8548 }, { "epoch": 0.54, "grad_norm": 0.9196782112121582, "learning_rate": 4.569615901834946e-06, "loss": 0.578, "step": 8549 }, { "epoch": 0.54, "grad_norm": 0.9435470700263977, "learning_rate": 4.568593725193852e-06, "loss": 0.5887, "step": 8550 }, { "epoch": 0.54, "grad_norm": 0.8405277132987976, "learning_rate": 4.567571566717774e-06, "loss": 0.5792, "step": 8551 }, { "epoch": 0.54, "grad_norm": 0.8100456595420837, "learning_rate": 4.566549426449755e-06, "loss": 0.5389, "step": 8552 }, { "epoch": 0.54, "grad_norm": 0.8953537940979004, "learning_rate": 4.565527304432833e-06, "loss": 0.5842, "step": 8553 }, { "epoch": 0.54, "grad_norm": 0.8631918430328369, "learning_rate": 4.564505200710042e-06, "loss": 0.5341, "step": 8554 }, { "epoch": 0.54, "grad_norm": 0.8625524640083313, "learning_rate": 4.5634831153244215e-06, "loss": 0.5662, "step": 8555 }, { "epoch": 0.54, "grad_norm": 0.8663583993911743, "learning_rate": 4.562461048319011e-06, "loss": 0.563, "step": 8556 }, { "epoch": 0.54, "grad_norm": 0.9523765444755554, "learning_rate": 4.561438999736844e-06, "loss": 0.6671, "step": 8557 }, { "epoch": 0.54, "grad_norm": 0.9273942708969116, "learning_rate": 4.5604169696209535e-06, "loss": 0.6043, "step": 8558 }, { "epoch": 0.54, "grad_norm": 0.8723426461219788, "learning_rate": 4.559394958014375e-06, "loss": 0.6176, "step": 8559 }, { "epoch": 0.54, "grad_norm": 0.9040724635124207, "learning_rate": 4.558372964960142e-06, "loss": 0.594, "step": 8560 }, { "epoch": 0.54, "grad_norm": 0.8431350588798523, "learning_rate": 4.557350990501288e-06, "loss": 0.5618, "step": 8561 }, { "epoch": 0.54, "grad_norm": 0.8898385763168335, "learning_rate": 4.556329034680845e-06, "loss": 0.5719, "step": 8562 }, { "epoch": 0.54, "grad_norm": 0.9010729789733887, "learning_rate": 4.55530709754184e-06, "loss": 0.6032, "step": 8563 }, { "epoch": 0.54, "grad_norm": 0.9148017764091492, "learning_rate": 4.5542851791273085e-06, "loss": 0.6184, "step": 8564 }, { "epoch": 0.54, "grad_norm": 0.8323689103126526, "learning_rate": 4.5532632794802766e-06, "loss": 0.5297, "step": 8565 }, { "epoch": 0.54, "grad_norm": 0.8298326134681702, "learning_rate": 4.5522413986437745e-06, "loss": 0.5933, "step": 8566 }, { "epoch": 0.54, "grad_norm": 0.9563860893249512, "learning_rate": 4.55121953666083e-06, "loss": 0.5663, "step": 8567 }, { "epoch": 0.54, "grad_norm": 0.8822575211524963, "learning_rate": 4.550197693574468e-06, "loss": 0.5713, "step": 8568 }, { "epoch": 0.54, "grad_norm": 0.9293971657752991, "learning_rate": 4.549175869427717e-06, "loss": 0.5852, "step": 8569 }, { "epoch": 0.54, "grad_norm": 0.846694827079773, "learning_rate": 4.548154064263603e-06, "loss": 0.558, "step": 8570 }, { "epoch": 0.54, "grad_norm": 1.0178662538528442, "learning_rate": 4.547132278125149e-06, "loss": 0.5941, "step": 8571 }, { "epoch": 0.54, "grad_norm": 0.9423683881759644, "learning_rate": 4.546110511055377e-06, "loss": 0.5525, "step": 8572 }, { "epoch": 0.54, "grad_norm": 0.8660984039306641, "learning_rate": 4.545088763097314e-06, "loss": 0.5657, "step": 8573 }, { "epoch": 0.54, "grad_norm": 0.8998304605484009, "learning_rate": 4.544067034293982e-06, "loss": 0.5886, "step": 8574 }, { "epoch": 0.54, "grad_norm": 0.8911488056182861, "learning_rate": 4.543045324688401e-06, "loss": 0.55, "step": 8575 }, { "epoch": 0.54, "grad_norm": 0.8904201984405518, "learning_rate": 4.542023634323589e-06, "loss": 0.5812, "step": 8576 }, { "epoch": 0.54, "grad_norm": 0.8831244111061096, "learning_rate": 4.54100196324257e-06, "loss": 0.5895, "step": 8577 }, { "epoch": 0.54, "grad_norm": 0.877057671546936, "learning_rate": 4.539980311488363e-06, "loss": 0.5412, "step": 8578 }, { "epoch": 0.54, "grad_norm": 0.8660837411880493, "learning_rate": 4.538958679103984e-06, "loss": 0.5673, "step": 8579 }, { "epoch": 0.54, "grad_norm": 0.8540948033332825, "learning_rate": 4.5379370661324495e-06, "loss": 0.5545, "step": 8580 }, { "epoch": 0.54, "grad_norm": 0.8788366317749023, "learning_rate": 4.536915472616779e-06, "loss": 0.577, "step": 8581 }, { "epoch": 0.54, "grad_norm": 0.8697229623794556, "learning_rate": 4.535893898599988e-06, "loss": 0.5517, "step": 8582 }, { "epoch": 0.54, "grad_norm": 0.9252592921257019, "learning_rate": 4.53487234412509e-06, "loss": 0.5577, "step": 8583 }, { "epoch": 0.54, "grad_norm": 0.8896933197975159, "learning_rate": 4.533850809235099e-06, "loss": 0.6227, "step": 8584 }, { "epoch": 0.54, "grad_norm": 0.9034548401832581, "learning_rate": 4.532829293973028e-06, "loss": 0.6235, "step": 8585 }, { "epoch": 0.54, "grad_norm": 0.9039227962493896, "learning_rate": 4.531807798381892e-06, "loss": 0.5804, "step": 8586 }, { "epoch": 0.54, "grad_norm": 0.8506115674972534, "learning_rate": 4.5307863225047e-06, "loss": 0.5546, "step": 8587 }, { "epoch": 0.54, "grad_norm": 0.9655522108078003, "learning_rate": 4.529764866384464e-06, "loss": 0.6156, "step": 8588 }, { "epoch": 0.54, "grad_norm": 0.8476807475090027, "learning_rate": 4.528743430064192e-06, "loss": 0.5422, "step": 8589 }, { "epoch": 0.54, "grad_norm": 0.9019517302513123, "learning_rate": 4.527722013586897e-06, "loss": 0.5895, "step": 8590 }, { "epoch": 0.54, "grad_norm": 0.9369930624961853, "learning_rate": 4.5267006169955855e-06, "loss": 0.6462, "step": 8591 }, { "epoch": 0.54, "grad_norm": 0.9092143774032593, "learning_rate": 4.525679240333262e-06, "loss": 0.5999, "step": 8592 }, { "epoch": 0.54, "grad_norm": 0.8883408904075623, "learning_rate": 4.524657883642936e-06, "loss": 0.629, "step": 8593 }, { "epoch": 0.54, "grad_norm": 0.8866313099861145, "learning_rate": 4.5236365469676144e-06, "loss": 0.5621, "step": 8594 }, { "epoch": 0.54, "grad_norm": 0.8671311736106873, "learning_rate": 4.522615230350302e-06, "loss": 0.633, "step": 8595 }, { "epoch": 0.54, "grad_norm": 0.8295920491218567, "learning_rate": 4.521593933833998e-06, "loss": 0.5545, "step": 8596 }, { "epoch": 0.54, "grad_norm": 0.8872106671333313, "learning_rate": 4.520572657461712e-06, "loss": 0.5561, "step": 8597 }, { "epoch": 0.54, "grad_norm": 0.9319486021995544, "learning_rate": 4.519551401276441e-06, "loss": 0.5819, "step": 8598 }, { "epoch": 0.54, "grad_norm": 0.8339968323707581, "learning_rate": 4.518530165321192e-06, "loss": 0.5802, "step": 8599 }, { "epoch": 0.54, "grad_norm": 0.910203218460083, "learning_rate": 4.517508949638961e-06, "loss": 0.5946, "step": 8600 }, { "epoch": 0.54, "grad_norm": 0.8642169833183289, "learning_rate": 4.516487754272751e-06, "loss": 0.6044, "step": 8601 }, { "epoch": 0.54, "grad_norm": 0.8620786070823669, "learning_rate": 4.515466579265557e-06, "loss": 0.5943, "step": 8602 }, { "epoch": 0.55, "grad_norm": 0.865822970867157, "learning_rate": 4.5144454246603816e-06, "loss": 0.5797, "step": 8603 }, { "epoch": 0.55, "grad_norm": 0.9090112447738647, "learning_rate": 4.51342429050022e-06, "loss": 0.5848, "step": 8604 }, { "epoch": 0.55, "grad_norm": 0.8540524244308472, "learning_rate": 4.51240317682807e-06, "loss": 0.6534, "step": 8605 }, { "epoch": 0.55, "grad_norm": 0.8696991801261902, "learning_rate": 4.5113820836869234e-06, "loss": 0.5707, "step": 8606 }, { "epoch": 0.55, "grad_norm": 0.9889391660690308, "learning_rate": 4.51036101111978e-06, "loss": 0.6631, "step": 8607 }, { "epoch": 0.55, "grad_norm": 0.8236830234527588, "learning_rate": 4.509339959169629e-06, "loss": 0.55, "step": 8608 }, { "epoch": 0.55, "grad_norm": 0.9895977973937988, "learning_rate": 4.508318927879468e-06, "loss": 0.6829, "step": 8609 }, { "epoch": 0.55, "grad_norm": 0.8496975302696228, "learning_rate": 4.507297917292284e-06, "loss": 0.537, "step": 8610 }, { "epoch": 0.55, "grad_norm": 0.8107864260673523, "learning_rate": 4.506276927451072e-06, "loss": 0.5522, "step": 8611 }, { "epoch": 0.55, "grad_norm": 0.8916783928871155, "learning_rate": 4.505255958398821e-06, "loss": 0.6091, "step": 8612 }, { "epoch": 0.55, "grad_norm": 0.9526239037513733, "learning_rate": 4.504235010178521e-06, "loss": 0.5811, "step": 8613 }, { "epoch": 0.55, "grad_norm": 0.9013256430625916, "learning_rate": 4.503214082833161e-06, "loss": 0.5874, "step": 8614 }, { "epoch": 0.55, "grad_norm": 0.8835528492927551, "learning_rate": 4.502193176405724e-06, "loss": 0.5748, "step": 8615 }, { "epoch": 0.55, "grad_norm": 0.8880060315132141, "learning_rate": 4.501172290939203e-06, "loss": 0.559, "step": 8616 }, { "epoch": 0.55, "grad_norm": 0.8688036799430847, "learning_rate": 4.5001514264765826e-06, "loss": 0.5764, "step": 8617 }, { "epoch": 0.55, "grad_norm": 0.8470838069915771, "learning_rate": 4.499130583060845e-06, "loss": 0.558, "step": 8618 }, { "epoch": 0.55, "grad_norm": 0.8374934196472168, "learning_rate": 4.4981097607349764e-06, "loss": 0.5753, "step": 8619 }, { "epoch": 0.55, "grad_norm": 0.8633030652999878, "learning_rate": 4.49708895954196e-06, "loss": 0.5417, "step": 8620 }, { "epoch": 0.55, "grad_norm": 0.9553747177124023, "learning_rate": 4.496068179524778e-06, "loss": 0.6072, "step": 8621 }, { "epoch": 0.55, "grad_norm": 0.9089109301567078, "learning_rate": 4.495047420726412e-06, "loss": 0.6185, "step": 8622 }, { "epoch": 0.55, "grad_norm": 0.881161630153656, "learning_rate": 4.494026683189843e-06, "loss": 0.6265, "step": 8623 }, { "epoch": 0.55, "grad_norm": 0.943405032157898, "learning_rate": 4.493005966958049e-06, "loss": 0.5904, "step": 8624 }, { "epoch": 0.55, "grad_norm": 0.8863667845726013, "learning_rate": 4.4919852720740115e-06, "loss": 0.5585, "step": 8625 }, { "epoch": 0.55, "grad_norm": 0.9154953956604004, "learning_rate": 4.490964598580706e-06, "loss": 0.6232, "step": 8626 }, { "epoch": 0.55, "grad_norm": 0.885014533996582, "learning_rate": 4.489943946521111e-06, "loss": 0.5734, "step": 8627 }, { "epoch": 0.55, "grad_norm": 0.9125478267669678, "learning_rate": 4.4889233159382e-06, "loss": 0.6038, "step": 8628 }, { "epoch": 0.55, "grad_norm": 0.9338325262069702, "learning_rate": 4.487902706874954e-06, "loss": 0.6203, "step": 8629 }, { "epoch": 0.55, "grad_norm": 0.98078852891922, "learning_rate": 4.486882119374341e-06, "loss": 0.6177, "step": 8630 }, { "epoch": 0.55, "grad_norm": 0.9384462833404541, "learning_rate": 4.485861553479338e-06, "loss": 0.6567, "step": 8631 }, { "epoch": 0.55, "grad_norm": 0.9040749669075012, "learning_rate": 4.484841009232914e-06, "loss": 0.6106, "step": 8632 }, { "epoch": 0.55, "grad_norm": 0.8856215476989746, "learning_rate": 4.483820486678047e-06, "loss": 0.5848, "step": 8633 }, { "epoch": 0.55, "grad_norm": 0.9175398349761963, "learning_rate": 4.482799985857701e-06, "loss": 0.6391, "step": 8634 }, { "epoch": 0.55, "grad_norm": 0.8846474885940552, "learning_rate": 4.48177950681485e-06, "loss": 0.568, "step": 8635 }, { "epoch": 0.55, "grad_norm": 0.8936927318572998, "learning_rate": 4.480759049592458e-06, "loss": 0.6202, "step": 8636 }, { "epoch": 0.55, "grad_norm": 0.8895360231399536, "learning_rate": 4.4797386142335e-06, "loss": 0.5881, "step": 8637 }, { "epoch": 0.55, "grad_norm": 0.8970397710800171, "learning_rate": 4.478718200780936e-06, "loss": 0.622, "step": 8638 }, { "epoch": 0.55, "grad_norm": 0.8260961771011353, "learning_rate": 4.477697809277738e-06, "loss": 0.5248, "step": 8639 }, { "epoch": 0.55, "grad_norm": 0.8553221821784973, "learning_rate": 4.476677439766865e-06, "loss": 0.5772, "step": 8640 }, { "epoch": 0.55, "grad_norm": 0.9180177450180054, "learning_rate": 4.475657092291287e-06, "loss": 0.6119, "step": 8641 }, { "epoch": 0.55, "grad_norm": 0.9326549172401428, "learning_rate": 4.4746367668939646e-06, "loss": 0.6103, "step": 8642 }, { "epoch": 0.55, "grad_norm": 0.9045451879501343, "learning_rate": 4.4736164636178605e-06, "loss": 0.5519, "step": 8643 }, { "epoch": 0.55, "grad_norm": 0.8994148969650269, "learning_rate": 4.472596182505936e-06, "loss": 0.5811, "step": 8644 }, { "epoch": 0.55, "grad_norm": 0.9174748659133911, "learning_rate": 4.47157592360115e-06, "loss": 0.5954, "step": 8645 }, { "epoch": 0.55, "grad_norm": 0.881083607673645, "learning_rate": 4.470555686946464e-06, "loss": 0.5622, "step": 8646 }, { "epoch": 0.55, "grad_norm": 0.9171141982078552, "learning_rate": 4.469535472584837e-06, "loss": 0.5954, "step": 8647 }, { "epoch": 0.55, "grad_norm": 0.9388046264648438, "learning_rate": 4.468515280559227e-06, "loss": 0.598, "step": 8648 }, { "epoch": 0.55, "grad_norm": 0.8907988667488098, "learning_rate": 4.467495110912587e-06, "loss": 0.6237, "step": 8649 }, { "epoch": 0.55, "grad_norm": 0.8721764087677002, "learning_rate": 4.466474963687876e-06, "loss": 0.6328, "step": 8650 }, { "epoch": 0.55, "grad_norm": 0.8680887222290039, "learning_rate": 4.46545483892805e-06, "loss": 0.6172, "step": 8651 }, { "epoch": 0.55, "grad_norm": 1.012911081314087, "learning_rate": 4.464434736676061e-06, "loss": 0.5685, "step": 8652 }, { "epoch": 0.55, "grad_norm": 0.9347690343856812, "learning_rate": 4.46341465697486e-06, "loss": 0.6096, "step": 8653 }, { "epoch": 0.55, "grad_norm": 0.7885945439338684, "learning_rate": 4.462394599867402e-06, "loss": 0.5374, "step": 8654 }, { "epoch": 0.55, "grad_norm": 0.8694291114807129, "learning_rate": 4.461374565396638e-06, "loss": 0.561, "step": 8655 }, { "epoch": 0.55, "grad_norm": 0.8917961716651917, "learning_rate": 4.460354553605518e-06, "loss": 0.6115, "step": 8656 }, { "epoch": 0.55, "grad_norm": 0.8918887376785278, "learning_rate": 4.459334564536988e-06, "loss": 0.6392, "step": 8657 }, { "epoch": 0.55, "grad_norm": 0.8710853457450867, "learning_rate": 4.458314598234e-06, "loss": 0.579, "step": 8658 }, { "epoch": 0.55, "grad_norm": 0.8684004545211792, "learning_rate": 4.4572946547395e-06, "loss": 0.5717, "step": 8659 }, { "epoch": 0.55, "grad_norm": 0.8856059908866882, "learning_rate": 4.456274734096436e-06, "loss": 0.5882, "step": 8660 }, { "epoch": 0.55, "grad_norm": 0.9372711181640625, "learning_rate": 4.455254836347749e-06, "loss": 0.6284, "step": 8661 }, { "epoch": 0.55, "grad_norm": 0.8738864064216614, "learning_rate": 4.454234961536384e-06, "loss": 0.5937, "step": 8662 }, { "epoch": 0.55, "grad_norm": 0.8427755832672119, "learning_rate": 4.45321510970529e-06, "loss": 0.5822, "step": 8663 }, { "epoch": 0.55, "grad_norm": 0.8868844509124756, "learning_rate": 4.452195280897405e-06, "loss": 0.5819, "step": 8664 }, { "epoch": 0.55, "grad_norm": 0.8627145290374756, "learning_rate": 4.451175475155669e-06, "loss": 0.5566, "step": 8665 }, { "epoch": 0.55, "grad_norm": 0.9276217222213745, "learning_rate": 4.450155692523025e-06, "loss": 0.5756, "step": 8666 }, { "epoch": 0.55, "grad_norm": 0.8811351656913757, "learning_rate": 4.449135933042414e-06, "loss": 0.5945, "step": 8667 }, { "epoch": 0.55, "grad_norm": 0.9855297803878784, "learning_rate": 4.448116196756771e-06, "loss": 0.6194, "step": 8668 }, { "epoch": 0.55, "grad_norm": 0.813127338886261, "learning_rate": 4.447096483709035e-06, "loss": 0.5267, "step": 8669 }, { "epoch": 0.55, "grad_norm": 0.8596179485321045, "learning_rate": 4.4460767939421425e-06, "loss": 0.5878, "step": 8670 }, { "epoch": 0.55, "grad_norm": 0.9059436917304993, "learning_rate": 4.44505712749903e-06, "loss": 0.5663, "step": 8671 }, { "epoch": 0.55, "grad_norm": 0.9185728430747986, "learning_rate": 4.444037484422632e-06, "loss": 0.6195, "step": 8672 }, { "epoch": 0.55, "grad_norm": 0.8514521718025208, "learning_rate": 4.44301786475588e-06, "loss": 0.5572, "step": 8673 }, { "epoch": 0.55, "grad_norm": 0.8482089042663574, "learning_rate": 4.441998268541708e-06, "loss": 0.5652, "step": 8674 }, { "epoch": 0.55, "grad_norm": 0.8797301054000854, "learning_rate": 4.440978695823049e-06, "loss": 0.5766, "step": 8675 }, { "epoch": 0.55, "grad_norm": 0.8916350603103638, "learning_rate": 4.439959146642833e-06, "loss": 0.5874, "step": 8676 }, { "epoch": 0.55, "grad_norm": 0.9491859078407288, "learning_rate": 4.4389396210439886e-06, "loss": 0.5796, "step": 8677 }, { "epoch": 0.55, "grad_norm": 0.8709646463394165, "learning_rate": 4.437920119069445e-06, "loss": 0.6025, "step": 8678 }, { "epoch": 0.55, "grad_norm": 0.9544169306755066, "learning_rate": 4.436900640762128e-06, "loss": 0.6039, "step": 8679 }, { "epoch": 0.55, "grad_norm": 0.9094875454902649, "learning_rate": 4.435881186164968e-06, "loss": 0.595, "step": 8680 }, { "epoch": 0.55, "grad_norm": 0.9330776333808899, "learning_rate": 4.434861755320888e-06, "loss": 0.5634, "step": 8681 }, { "epoch": 0.55, "grad_norm": 0.8398060202598572, "learning_rate": 4.433842348272815e-06, "loss": 0.573, "step": 8682 }, { "epoch": 0.55, "grad_norm": 0.8635173439979553, "learning_rate": 4.4328229650636676e-06, "loss": 0.6207, "step": 8683 }, { "epoch": 0.55, "grad_norm": 0.8932275176048279, "learning_rate": 4.431803605736376e-06, "loss": 0.5626, "step": 8684 }, { "epoch": 0.55, "grad_norm": 0.8580974340438843, "learning_rate": 4.430784270333855e-06, "loss": 0.6231, "step": 8685 }, { "epoch": 0.55, "grad_norm": 0.8090934753417969, "learning_rate": 4.429764958899031e-06, "loss": 0.524, "step": 8686 }, { "epoch": 0.55, "grad_norm": 0.8802515864372253, "learning_rate": 4.428745671474818e-06, "loss": 0.5884, "step": 8687 }, { "epoch": 0.55, "grad_norm": 0.9674070477485657, "learning_rate": 4.427726408104139e-06, "loss": 0.6131, "step": 8688 }, { "epoch": 0.55, "grad_norm": 0.8582731485366821, "learning_rate": 4.42670716882991e-06, "loss": 0.6055, "step": 8689 }, { "epoch": 0.55, "grad_norm": 0.87099689245224, "learning_rate": 4.4256879536950495e-06, "loss": 0.5777, "step": 8690 }, { "epoch": 0.55, "grad_norm": 0.8034923076629639, "learning_rate": 4.4246687627424686e-06, "loss": 0.5599, "step": 8691 }, { "epoch": 0.55, "grad_norm": 0.9542539119720459, "learning_rate": 4.423649596015086e-06, "loss": 0.6035, "step": 8692 }, { "epoch": 0.55, "grad_norm": 0.8825756907463074, "learning_rate": 4.422630453555814e-06, "loss": 0.5617, "step": 8693 }, { "epoch": 0.55, "grad_norm": 0.9769675135612488, "learning_rate": 4.4216113354075654e-06, "loss": 0.5966, "step": 8694 }, { "epoch": 0.55, "grad_norm": 0.8933631777763367, "learning_rate": 4.420592241613251e-06, "loss": 0.6053, "step": 8695 }, { "epoch": 0.55, "grad_norm": 0.9478098154067993, "learning_rate": 4.4195731722157805e-06, "loss": 0.5765, "step": 8696 }, { "epoch": 0.55, "grad_norm": 0.8842805027961731, "learning_rate": 4.418554127258066e-06, "loss": 0.6167, "step": 8697 }, { "epoch": 0.55, "grad_norm": 0.9180606007575989, "learning_rate": 4.417535106783015e-06, "loss": 0.6466, "step": 8698 }, { "epoch": 0.55, "grad_norm": 0.8405973315238953, "learning_rate": 4.416516110833533e-06, "loss": 0.5296, "step": 8699 }, { "epoch": 0.55, "grad_norm": 0.8814043998718262, "learning_rate": 4.415497139452528e-06, "loss": 0.6272, "step": 8700 }, { "epoch": 0.55, "grad_norm": 0.9587448835372925, "learning_rate": 4.414478192682905e-06, "loss": 0.6149, "step": 8701 }, { "epoch": 0.55, "grad_norm": 0.8646758198738098, "learning_rate": 4.41345927056757e-06, "loss": 0.53, "step": 8702 }, { "epoch": 0.55, "grad_norm": 0.917334794998169, "learning_rate": 4.4124403731494235e-06, "loss": 0.6441, "step": 8703 }, { "epoch": 0.55, "grad_norm": 0.8852252960205078, "learning_rate": 4.4114215004713665e-06, "loss": 0.5874, "step": 8704 }, { "epoch": 0.55, "grad_norm": 0.8766316771507263, "learning_rate": 4.410402652576307e-06, "loss": 0.5342, "step": 8705 }, { "epoch": 0.55, "grad_norm": 0.904898464679718, "learning_rate": 4.409383829507139e-06, "loss": 0.5368, "step": 8706 }, { "epoch": 0.55, "grad_norm": 0.877981960773468, "learning_rate": 4.408365031306763e-06, "loss": 0.5601, "step": 8707 }, { "epoch": 0.55, "grad_norm": 0.9853324890136719, "learning_rate": 4.407346258018078e-06, "loss": 0.6007, "step": 8708 }, { "epoch": 0.55, "grad_norm": 0.8749459981918335, "learning_rate": 4.4063275096839785e-06, "loss": 0.5521, "step": 8709 }, { "epoch": 0.55, "grad_norm": 0.9380457997322083, "learning_rate": 4.405308786347365e-06, "loss": 0.5872, "step": 8710 }, { "epoch": 0.55, "grad_norm": 0.9295644164085388, "learning_rate": 4.404290088051128e-06, "loss": 0.5885, "step": 8711 }, { "epoch": 0.55, "grad_norm": 0.8839128613471985, "learning_rate": 4.403271414838164e-06, "loss": 0.5699, "step": 8712 }, { "epoch": 0.55, "grad_norm": 0.9140700101852417, "learning_rate": 4.402252766751363e-06, "loss": 0.5752, "step": 8713 }, { "epoch": 0.55, "grad_norm": 0.8850396871566772, "learning_rate": 4.401234143833621e-06, "loss": 0.5811, "step": 8714 }, { "epoch": 0.55, "grad_norm": 0.8859695196151733, "learning_rate": 4.400215546127825e-06, "loss": 0.5739, "step": 8715 }, { "epoch": 0.55, "grad_norm": 0.8375210762023926, "learning_rate": 4.399196973676867e-06, "loss": 0.5541, "step": 8716 }, { "epoch": 0.55, "grad_norm": 0.9590082168579102, "learning_rate": 4.398178426523632e-06, "loss": 0.6474, "step": 8717 }, { "epoch": 0.55, "grad_norm": 0.9406694769859314, "learning_rate": 4.3971599047110116e-06, "loss": 0.5898, "step": 8718 }, { "epoch": 0.55, "grad_norm": 0.8761373162269592, "learning_rate": 4.3961414082818904e-06, "loss": 0.5739, "step": 8719 }, { "epoch": 0.55, "grad_norm": 0.9185157418251038, "learning_rate": 4.395122937279154e-06, "loss": 0.6258, "step": 8720 }, { "epoch": 0.55, "grad_norm": 0.8924233317375183, "learning_rate": 4.394104491745686e-06, "loss": 0.5819, "step": 8721 }, { "epoch": 0.55, "grad_norm": 0.7826727032661438, "learning_rate": 4.393086071724371e-06, "loss": 0.5175, "step": 8722 }, { "epoch": 0.55, "grad_norm": 0.9112175107002258, "learning_rate": 4.392067677258089e-06, "loss": 0.644, "step": 8723 }, { "epoch": 0.55, "grad_norm": 0.8790960907936096, "learning_rate": 4.391049308389723e-06, "loss": 0.5537, "step": 8724 }, { "epoch": 0.55, "grad_norm": 0.8678017854690552, "learning_rate": 4.390030965162153e-06, "loss": 0.5639, "step": 8725 }, { "epoch": 0.55, "grad_norm": 0.8150790929794312, "learning_rate": 4.389012647618255e-06, "loss": 0.5596, "step": 8726 }, { "epoch": 0.55, "grad_norm": 0.9205260872840881, "learning_rate": 4.387994355800909e-06, "loss": 0.6169, "step": 8727 }, { "epoch": 0.55, "grad_norm": 0.8948895931243896, "learning_rate": 4.386976089752994e-06, "loss": 0.5627, "step": 8728 }, { "epoch": 0.55, "grad_norm": 0.9792115688323975, "learning_rate": 4.385957849517383e-06, "loss": 0.6722, "step": 8729 }, { "epoch": 0.55, "grad_norm": 0.8200992941856384, "learning_rate": 4.384939635136948e-06, "loss": 0.5813, "step": 8730 }, { "epoch": 0.55, "grad_norm": 0.8706687092781067, "learning_rate": 4.383921446654567e-06, "loss": 0.5613, "step": 8731 }, { "epoch": 0.55, "grad_norm": 0.9445881247520447, "learning_rate": 4.3829032841131116e-06, "loss": 0.5922, "step": 8732 }, { "epoch": 0.55, "grad_norm": 0.9358285665512085, "learning_rate": 4.381885147555453e-06, "loss": 0.6582, "step": 8733 }, { "epoch": 0.55, "grad_norm": 0.9091733694076538, "learning_rate": 4.380867037024457e-06, "loss": 0.5389, "step": 8734 }, { "epoch": 0.55, "grad_norm": 0.8815348744392395, "learning_rate": 4.379848952562999e-06, "loss": 0.6216, "step": 8735 }, { "epoch": 0.55, "grad_norm": 0.859954833984375, "learning_rate": 4.3788308942139435e-06, "loss": 0.568, "step": 8736 }, { "epoch": 0.55, "grad_norm": 0.838782548904419, "learning_rate": 4.3778128620201595e-06, "loss": 0.546, "step": 8737 }, { "epoch": 0.55, "grad_norm": 0.8781520128250122, "learning_rate": 4.376794856024509e-06, "loss": 0.6458, "step": 8738 }, { "epoch": 0.55, "grad_norm": 0.908060610294342, "learning_rate": 4.37577687626986e-06, "loss": 0.5873, "step": 8739 }, { "epoch": 0.55, "grad_norm": 0.8939605355262756, "learning_rate": 4.374758922799076e-06, "loss": 0.5933, "step": 8740 }, { "epoch": 0.55, "grad_norm": 0.9307653307914734, "learning_rate": 4.373740995655019e-06, "loss": 0.6065, "step": 8741 }, { "epoch": 0.55, "grad_norm": 0.9014899134635925, "learning_rate": 4.372723094880549e-06, "loss": 0.667, "step": 8742 }, { "epoch": 0.55, "grad_norm": 0.9142792820930481, "learning_rate": 4.371705220518526e-06, "loss": 0.5868, "step": 8743 }, { "epoch": 0.55, "grad_norm": 0.8495569229125977, "learning_rate": 4.3706873726118135e-06, "loss": 0.5737, "step": 8744 }, { "epoch": 0.55, "grad_norm": 0.8663256168365479, "learning_rate": 4.369669551203266e-06, "loss": 0.5857, "step": 8745 }, { "epoch": 0.55, "grad_norm": 0.8795154690742493, "learning_rate": 4.368651756335739e-06, "loss": 0.601, "step": 8746 }, { "epoch": 0.55, "grad_norm": 0.9166411757469177, "learning_rate": 4.36763398805209e-06, "loss": 0.5774, "step": 8747 }, { "epoch": 0.55, "grad_norm": 0.9235051870346069, "learning_rate": 4.366616246395177e-06, "loss": 0.5795, "step": 8748 }, { "epoch": 0.55, "grad_norm": 0.9201914072036743, "learning_rate": 4.365598531407849e-06, "loss": 0.6128, "step": 8749 }, { "epoch": 0.55, "grad_norm": 0.9426406025886536, "learning_rate": 4.364580843132959e-06, "loss": 0.5561, "step": 8750 }, { "epoch": 0.55, "grad_norm": 0.8633618354797363, "learning_rate": 4.363563181613359e-06, "loss": 0.5643, "step": 8751 }, { "epoch": 0.55, "grad_norm": 0.8648019433021545, "learning_rate": 4.362545546891901e-06, "loss": 0.5829, "step": 8752 }, { "epoch": 0.55, "grad_norm": 0.9268175363540649, "learning_rate": 4.361527939011433e-06, "loss": 0.5497, "step": 8753 }, { "epoch": 0.55, "grad_norm": 0.9331282377243042, "learning_rate": 4.360510358014801e-06, "loss": 0.6365, "step": 8754 }, { "epoch": 0.55, "grad_norm": 0.9760857820510864, "learning_rate": 4.359492803944854e-06, "loss": 0.617, "step": 8755 }, { "epoch": 0.55, "grad_norm": 0.8864888548851013, "learning_rate": 4.358475276844435e-06, "loss": 0.5794, "step": 8756 }, { "epoch": 0.55, "grad_norm": 0.8550977110862732, "learning_rate": 4.357457776756392e-06, "loss": 0.6164, "step": 8757 }, { "epoch": 0.55, "grad_norm": 0.8886324167251587, "learning_rate": 4.3564403037235666e-06, "loss": 0.5582, "step": 8758 }, { "epoch": 0.55, "grad_norm": 0.8365561962127686, "learning_rate": 4.355422857788802e-06, "loss": 0.5514, "step": 8759 }, { "epoch": 0.55, "grad_norm": 0.8547555208206177, "learning_rate": 4.3544054389949366e-06, "loss": 0.579, "step": 8760 }, { "epoch": 0.56, "grad_norm": 0.9086821675300598, "learning_rate": 4.353388047384813e-06, "loss": 0.5918, "step": 8761 }, { "epoch": 0.56, "grad_norm": 0.8336657881736755, "learning_rate": 4.35237068300127e-06, "loss": 0.5914, "step": 8762 }, { "epoch": 0.56, "grad_norm": 1.0008983612060547, "learning_rate": 4.351353345887145e-06, "loss": 0.6075, "step": 8763 }, { "epoch": 0.56, "grad_norm": 0.9368928074836731, "learning_rate": 4.350336036085272e-06, "loss": 0.6175, "step": 8764 }, { "epoch": 0.56, "grad_norm": 0.9230781197547913, "learning_rate": 4.349318753638491e-06, "loss": 0.6663, "step": 8765 }, { "epoch": 0.56, "grad_norm": 0.8669142127037048, "learning_rate": 4.348301498589632e-06, "loss": 0.5702, "step": 8766 }, { "epoch": 0.56, "grad_norm": 0.908332347869873, "learning_rate": 4.347284270981531e-06, "loss": 0.6157, "step": 8767 }, { "epoch": 0.56, "grad_norm": 0.8867782950401306, "learning_rate": 4.346267070857017e-06, "loss": 0.5932, "step": 8768 }, { "epoch": 0.56, "grad_norm": 0.8559575080871582, "learning_rate": 4.3452498982589234e-06, "loss": 0.5792, "step": 8769 }, { "epoch": 0.56, "grad_norm": 0.8476456999778748, "learning_rate": 4.34423275323008e-06, "loss": 0.5813, "step": 8770 }, { "epoch": 0.56, "grad_norm": 0.8500044941902161, "learning_rate": 4.343215635813314e-06, "loss": 0.5623, "step": 8771 }, { "epoch": 0.56, "grad_norm": 0.8590050935745239, "learning_rate": 4.3421985460514515e-06, "loss": 0.5822, "step": 8772 }, { "epoch": 0.56, "grad_norm": 0.882090151309967, "learning_rate": 4.341181483987319e-06, "loss": 0.5638, "step": 8773 }, { "epoch": 0.56, "grad_norm": 0.8285457491874695, "learning_rate": 4.340164449663745e-06, "loss": 0.5572, "step": 8774 }, { "epoch": 0.56, "grad_norm": 0.8915181159973145, "learning_rate": 4.33914744312355e-06, "loss": 0.5627, "step": 8775 }, { "epoch": 0.56, "grad_norm": 0.9251353740692139, "learning_rate": 4.338130464409556e-06, "loss": 0.5431, "step": 8776 }, { "epoch": 0.56, "grad_norm": 0.9160726070404053, "learning_rate": 4.3371135135645845e-06, "loss": 0.6369, "step": 8777 }, { "epoch": 0.56, "grad_norm": 0.9499028325080872, "learning_rate": 4.33609659063146e-06, "loss": 0.5955, "step": 8778 }, { "epoch": 0.56, "grad_norm": 0.8448708653450012, "learning_rate": 4.335079695652998e-06, "loss": 0.6101, "step": 8779 }, { "epoch": 0.56, "grad_norm": 1.0195928812026978, "learning_rate": 4.334062828672016e-06, "loss": 0.6204, "step": 8780 }, { "epoch": 0.56, "grad_norm": 0.9017850756645203, "learning_rate": 4.3330459897313305e-06, "loss": 0.5725, "step": 8781 }, { "epoch": 0.56, "grad_norm": 0.8847092390060425, "learning_rate": 4.33202917887376e-06, "loss": 0.5784, "step": 8782 }, { "epoch": 0.56, "grad_norm": 0.864553689956665, "learning_rate": 4.331012396142117e-06, "loss": 0.5691, "step": 8783 }, { "epoch": 0.56, "grad_norm": 0.8894702792167664, "learning_rate": 4.3299956415792145e-06, "loss": 0.6365, "step": 8784 }, { "epoch": 0.56, "grad_norm": 0.8423247337341309, "learning_rate": 4.328978915227866e-06, "loss": 0.5971, "step": 8785 }, { "epoch": 0.56, "grad_norm": 0.9544634819030762, "learning_rate": 4.327962217130878e-06, "loss": 0.6287, "step": 8786 }, { "epoch": 0.56, "grad_norm": 0.9328646659851074, "learning_rate": 4.326945547331065e-06, "loss": 0.5529, "step": 8787 }, { "epoch": 0.56, "grad_norm": 0.921759843826294, "learning_rate": 4.325928905871233e-06, "loss": 0.6143, "step": 8788 }, { "epoch": 0.56, "grad_norm": 0.8561935424804688, "learning_rate": 4.324912292794192e-06, "loss": 0.6107, "step": 8789 }, { "epoch": 0.56, "grad_norm": 0.8923735618591309, "learning_rate": 4.323895708142742e-06, "loss": 0.5331, "step": 8790 }, { "epoch": 0.56, "grad_norm": 0.8794368505477905, "learning_rate": 4.322879151959695e-06, "loss": 0.5809, "step": 8791 }, { "epoch": 0.56, "grad_norm": 0.8946419358253479, "learning_rate": 4.321862624287851e-06, "loss": 0.5801, "step": 8792 }, { "epoch": 0.56, "grad_norm": 0.9291636943817139, "learning_rate": 4.320846125170012e-06, "loss": 0.6148, "step": 8793 }, { "epoch": 0.56, "grad_norm": 0.8626858592033386, "learning_rate": 4.31982965464898e-06, "loss": 0.5753, "step": 8794 }, { "epoch": 0.56, "grad_norm": 0.9002351760864258, "learning_rate": 4.318813212767555e-06, "loss": 0.5691, "step": 8795 }, { "epoch": 0.56, "grad_norm": 0.8788061141967773, "learning_rate": 4.317796799568536e-06, "loss": 0.6002, "step": 8796 }, { "epoch": 0.56, "grad_norm": 0.8354102373123169, "learning_rate": 4.316780415094722e-06, "loss": 0.5693, "step": 8797 }, { "epoch": 0.56, "grad_norm": 0.89030921459198, "learning_rate": 4.315764059388905e-06, "loss": 0.5916, "step": 8798 }, { "epoch": 0.56, "grad_norm": 0.856412947177887, "learning_rate": 4.314747732493886e-06, "loss": 0.563, "step": 8799 }, { "epoch": 0.56, "grad_norm": 0.832213282585144, "learning_rate": 4.313731434452455e-06, "loss": 0.5612, "step": 8800 }, { "epoch": 0.56, "grad_norm": 0.8731396794319153, "learning_rate": 4.312715165307407e-06, "loss": 0.6631, "step": 8801 }, { "epoch": 0.56, "grad_norm": 0.9026145935058594, "learning_rate": 4.311698925101532e-06, "loss": 0.5776, "step": 8802 }, { "epoch": 0.56, "grad_norm": 0.8666503429412842, "learning_rate": 4.310682713877619e-06, "loss": 0.5579, "step": 8803 }, { "epoch": 0.56, "grad_norm": 0.9560415744781494, "learning_rate": 4.30966653167846e-06, "loss": 0.6415, "step": 8804 }, { "epoch": 0.56, "grad_norm": 0.8633235096931458, "learning_rate": 4.308650378546843e-06, "loss": 0.5844, "step": 8805 }, { "epoch": 0.56, "grad_norm": 0.8731099367141724, "learning_rate": 4.3076342545255535e-06, "loss": 0.5678, "step": 8806 }, { "epoch": 0.56, "grad_norm": 0.8647497296333313, "learning_rate": 4.306618159657375e-06, "loss": 0.5964, "step": 8807 }, { "epoch": 0.56, "grad_norm": 0.867325484752655, "learning_rate": 4.305602093985095e-06, "loss": 0.597, "step": 8808 }, { "epoch": 0.56, "grad_norm": 0.8929917812347412, "learning_rate": 4.3045860575514955e-06, "loss": 0.5933, "step": 8809 }, { "epoch": 0.56, "grad_norm": 0.8812966346740723, "learning_rate": 4.303570050399358e-06, "loss": 0.6035, "step": 8810 }, { "epoch": 0.56, "grad_norm": 0.8197950124740601, "learning_rate": 4.302554072571461e-06, "loss": 0.5531, "step": 8811 }, { "epoch": 0.56, "grad_norm": 0.8902185559272766, "learning_rate": 4.301538124110588e-06, "loss": 0.6078, "step": 8812 }, { "epoch": 0.56, "grad_norm": 0.8458168506622314, "learning_rate": 4.300522205059515e-06, "loss": 0.5865, "step": 8813 }, { "epoch": 0.56, "grad_norm": 0.9090011119842529, "learning_rate": 4.299506315461018e-06, "loss": 0.5848, "step": 8814 }, { "epoch": 0.56, "grad_norm": 0.8977993726730347, "learning_rate": 4.2984904553578725e-06, "loss": 0.607, "step": 8815 }, { "epoch": 0.56, "grad_norm": 0.888264000415802, "learning_rate": 4.297474624792853e-06, "loss": 0.5694, "step": 8816 }, { "epoch": 0.56, "grad_norm": 0.8837360739707947, "learning_rate": 4.296458823808735e-06, "loss": 0.5943, "step": 8817 }, { "epoch": 0.56, "grad_norm": 0.9838821887969971, "learning_rate": 4.295443052448288e-06, "loss": 0.6391, "step": 8818 }, { "epoch": 0.56, "grad_norm": 0.8406442999839783, "learning_rate": 4.294427310754283e-06, "loss": 0.5576, "step": 8819 }, { "epoch": 0.56, "grad_norm": 0.9092972278594971, "learning_rate": 4.293411598769487e-06, "loss": 0.6397, "step": 8820 }, { "epoch": 0.56, "grad_norm": 0.8684690594673157, "learning_rate": 4.292395916536674e-06, "loss": 0.5996, "step": 8821 }, { "epoch": 0.56, "grad_norm": 0.938960075378418, "learning_rate": 4.291380264098607e-06, "loss": 0.6217, "step": 8822 }, { "epoch": 0.56, "grad_norm": 0.9801902174949646, "learning_rate": 4.290364641498051e-06, "loss": 0.5536, "step": 8823 }, { "epoch": 0.56, "grad_norm": 0.8995389938354492, "learning_rate": 4.28934904877777e-06, "loss": 0.5696, "step": 8824 }, { "epoch": 0.56, "grad_norm": 0.9216705560684204, "learning_rate": 4.288333485980531e-06, "loss": 0.5857, "step": 8825 }, { "epoch": 0.56, "grad_norm": 0.8806384205818176, "learning_rate": 4.287317953149092e-06, "loss": 0.592, "step": 8826 }, { "epoch": 0.56, "grad_norm": 0.8204985857009888, "learning_rate": 4.2863024503262146e-06, "loss": 0.4938, "step": 8827 }, { "epoch": 0.56, "grad_norm": 0.9380052089691162, "learning_rate": 4.285286977554657e-06, "loss": 0.6195, "step": 8828 }, { "epoch": 0.56, "grad_norm": 0.9966148734092712, "learning_rate": 4.284271534877181e-06, "loss": 0.6133, "step": 8829 }, { "epoch": 0.56, "grad_norm": 0.9887740015983582, "learning_rate": 4.283256122336539e-06, "loss": 0.6519, "step": 8830 }, { "epoch": 0.56, "grad_norm": 0.8869448900222778, "learning_rate": 4.28224073997549e-06, "loss": 0.6057, "step": 8831 }, { "epoch": 0.56, "grad_norm": 0.8631427884101868, "learning_rate": 4.281225387836786e-06, "loss": 0.552, "step": 8832 }, { "epoch": 0.56, "grad_norm": 0.8739815354347229, "learning_rate": 4.280210065963179e-06, "loss": 0.5631, "step": 8833 }, { "epoch": 0.56, "grad_norm": 0.8833276629447937, "learning_rate": 4.279194774397422e-06, "loss": 0.5637, "step": 8834 }, { "epoch": 0.56, "grad_norm": 0.8888053894042969, "learning_rate": 4.278179513182268e-06, "loss": 0.5978, "step": 8835 }, { "epoch": 0.56, "grad_norm": 0.9251044392585754, "learning_rate": 4.2771642823604635e-06, "loss": 0.5844, "step": 8836 }, { "epoch": 0.56, "grad_norm": 0.9090611934661865, "learning_rate": 4.276149081974754e-06, "loss": 0.5963, "step": 8837 }, { "epoch": 0.56, "grad_norm": 0.8514662384986877, "learning_rate": 4.275133912067889e-06, "loss": 0.5199, "step": 8838 }, { "epoch": 0.56, "grad_norm": 0.9209812879562378, "learning_rate": 4.274118772682615e-06, "loss": 0.5519, "step": 8839 }, { "epoch": 0.56, "grad_norm": 0.9110792279243469, "learning_rate": 4.273103663861675e-06, "loss": 0.582, "step": 8840 }, { "epoch": 0.56, "grad_norm": 0.8700342774391174, "learning_rate": 4.272088585647808e-06, "loss": 0.5325, "step": 8841 }, { "epoch": 0.56, "grad_norm": 0.8676977157592773, "learning_rate": 4.27107353808376e-06, "loss": 0.5971, "step": 8842 }, { "epoch": 0.56, "grad_norm": 0.8846719861030579, "learning_rate": 4.2700585212122705e-06, "loss": 0.5739, "step": 8843 }, { "epoch": 0.56, "grad_norm": 0.9149625301361084, "learning_rate": 4.269043535076077e-06, "loss": 0.5704, "step": 8844 }, { "epoch": 0.56, "grad_norm": 0.8597497344017029, "learning_rate": 4.2680285797179155e-06, "loss": 0.5798, "step": 8845 }, { "epoch": 0.56, "grad_norm": 0.8914947509765625, "learning_rate": 4.267013655180526e-06, "loss": 0.6291, "step": 8846 }, { "epoch": 0.56, "grad_norm": 0.8733804821968079, "learning_rate": 4.265998761506641e-06, "loss": 0.62, "step": 8847 }, { "epoch": 0.56, "grad_norm": 0.8868311047554016, "learning_rate": 4.264983898738996e-06, "loss": 0.5277, "step": 8848 }, { "epoch": 0.56, "grad_norm": 0.9130145907402039, "learning_rate": 4.263969066920321e-06, "loss": 0.5653, "step": 8849 }, { "epoch": 0.56, "grad_norm": 0.9107689261436462, "learning_rate": 4.262954266093347e-06, "loss": 0.603, "step": 8850 }, { "epoch": 0.56, "grad_norm": 0.8246250748634338, "learning_rate": 4.261939496300807e-06, "loss": 0.545, "step": 8851 }, { "epoch": 0.56, "grad_norm": 0.8571567535400391, "learning_rate": 4.260924757585427e-06, "loss": 0.5724, "step": 8852 }, { "epoch": 0.56, "grad_norm": 0.8799802660942078, "learning_rate": 4.259910049989933e-06, "loss": 0.5336, "step": 8853 }, { "epoch": 0.56, "grad_norm": 0.8496525287628174, "learning_rate": 4.258895373557051e-06, "loss": 0.5659, "step": 8854 }, { "epoch": 0.56, "grad_norm": 0.8902441263198853, "learning_rate": 4.25788072832951e-06, "loss": 0.6043, "step": 8855 }, { "epoch": 0.56, "grad_norm": 0.8644466400146484, "learning_rate": 4.256866114350029e-06, "loss": 0.5516, "step": 8856 }, { "epoch": 0.56, "grad_norm": 0.9049948453903198, "learning_rate": 4.25585153166133e-06, "loss": 0.669, "step": 8857 }, { "epoch": 0.56, "grad_norm": 0.9145426750183105, "learning_rate": 4.254836980306134e-06, "loss": 0.5955, "step": 8858 }, { "epoch": 0.56, "grad_norm": 0.8813319206237793, "learning_rate": 4.253822460327162e-06, "loss": 0.5738, "step": 8859 }, { "epoch": 0.56, "grad_norm": 0.994666576385498, "learning_rate": 4.25280797176713e-06, "loss": 0.5967, "step": 8860 }, { "epoch": 0.56, "grad_norm": 0.9201557636260986, "learning_rate": 4.251793514668754e-06, "loss": 0.5901, "step": 8861 }, { "epoch": 0.56, "grad_norm": 0.8030241131782532, "learning_rate": 4.250779089074752e-06, "loss": 0.5547, "step": 8862 }, { "epoch": 0.56, "grad_norm": 0.8798213601112366, "learning_rate": 4.249764695027833e-06, "loss": 0.6044, "step": 8863 }, { "epoch": 0.56, "grad_norm": 0.8339963555335999, "learning_rate": 4.248750332570716e-06, "loss": 0.6645, "step": 8864 }, { "epoch": 0.56, "grad_norm": 0.9063261151313782, "learning_rate": 4.247736001746108e-06, "loss": 0.5697, "step": 8865 }, { "epoch": 0.56, "grad_norm": 0.9362940192222595, "learning_rate": 4.246721702596721e-06, "loss": 0.5943, "step": 8866 }, { "epoch": 0.56, "grad_norm": 0.904593288898468, "learning_rate": 4.24570743516526e-06, "loss": 0.626, "step": 8867 }, { "epoch": 0.56, "grad_norm": 0.9311546683311462, "learning_rate": 4.2446931994944375e-06, "loss": 0.5865, "step": 8868 }, { "epoch": 0.56, "grad_norm": 0.9218093156814575, "learning_rate": 4.243678995626955e-06, "loss": 0.5746, "step": 8869 }, { "epoch": 0.56, "grad_norm": 0.8506073355674744, "learning_rate": 4.242664823605521e-06, "loss": 0.5585, "step": 8870 }, { "epoch": 0.56, "grad_norm": 0.9345227479934692, "learning_rate": 4.241650683472834e-06, "loss": 0.5323, "step": 8871 }, { "epoch": 0.56, "grad_norm": 0.8558427095413208, "learning_rate": 4.240636575271601e-06, "loss": 0.4936, "step": 8872 }, { "epoch": 0.56, "grad_norm": 0.8252081871032715, "learning_rate": 4.239622499044519e-06, "loss": 0.55, "step": 8873 }, { "epoch": 0.56, "grad_norm": 0.8779731392860413, "learning_rate": 4.23860845483429e-06, "loss": 0.5766, "step": 8874 }, { "epoch": 0.56, "grad_norm": 0.9093831181526184, "learning_rate": 4.237594442683607e-06, "loss": 0.6104, "step": 8875 }, { "epoch": 0.56, "grad_norm": 0.8918717503547668, "learning_rate": 4.236580462635173e-06, "loss": 0.5824, "step": 8876 }, { "epoch": 0.56, "grad_norm": 0.8895564675331116, "learning_rate": 4.235566514731678e-06, "loss": 0.6093, "step": 8877 }, { "epoch": 0.56, "grad_norm": 0.9440225958824158, "learning_rate": 4.23455259901582e-06, "loss": 0.617, "step": 8878 }, { "epoch": 0.56, "grad_norm": 0.8962016105651855, "learning_rate": 4.2335387155302885e-06, "loss": 0.5789, "step": 8879 }, { "epoch": 0.56, "grad_norm": 0.8680998682975769, "learning_rate": 4.232524864317773e-06, "loss": 0.549, "step": 8880 }, { "epoch": 0.56, "grad_norm": 0.8944227695465088, "learning_rate": 4.231511045420967e-06, "loss": 0.6008, "step": 8881 }, { "epoch": 0.56, "grad_norm": 0.8181406259536743, "learning_rate": 4.230497258882559e-06, "loss": 0.5423, "step": 8882 }, { "epoch": 0.56, "grad_norm": 0.9459832906723022, "learning_rate": 4.229483504745233e-06, "loss": 0.6108, "step": 8883 }, { "epoch": 0.56, "grad_norm": 0.8519952297210693, "learning_rate": 4.228469783051676e-06, "loss": 0.5936, "step": 8884 }, { "epoch": 0.56, "grad_norm": 0.8907895088195801, "learning_rate": 4.227456093844573e-06, "loss": 0.5805, "step": 8885 }, { "epoch": 0.56, "grad_norm": 1.0412497520446777, "learning_rate": 4.226442437166607e-06, "loss": 0.5508, "step": 8886 }, { "epoch": 0.56, "grad_norm": 0.8901419639587402, "learning_rate": 4.225428813060459e-06, "loss": 0.5822, "step": 8887 }, { "epoch": 0.56, "grad_norm": 0.8553881049156189, "learning_rate": 4.224415221568807e-06, "loss": 0.5854, "step": 8888 }, { "epoch": 0.56, "grad_norm": 0.8875113129615784, "learning_rate": 4.223401662734333e-06, "loss": 0.5348, "step": 8889 }, { "epoch": 0.56, "grad_norm": 0.8207681179046631, "learning_rate": 4.222388136599715e-06, "loss": 0.5878, "step": 8890 }, { "epoch": 0.56, "grad_norm": 0.8918472528457642, "learning_rate": 4.221374643207626e-06, "loss": 0.5744, "step": 8891 }, { "epoch": 0.56, "grad_norm": 0.8446689248085022, "learning_rate": 4.220361182600742e-06, "loss": 0.6045, "step": 8892 }, { "epoch": 0.56, "grad_norm": 0.883139967918396, "learning_rate": 4.219347754821737e-06, "loss": 0.5616, "step": 8893 }, { "epoch": 0.56, "grad_norm": 0.9404736161231995, "learning_rate": 4.218334359913283e-06, "loss": 0.565, "step": 8894 }, { "epoch": 0.56, "grad_norm": 0.8452960848808289, "learning_rate": 4.217320997918048e-06, "loss": 0.5913, "step": 8895 }, { "epoch": 0.56, "grad_norm": 0.9132777452468872, "learning_rate": 4.216307668878706e-06, "loss": 0.6401, "step": 8896 }, { "epoch": 0.56, "grad_norm": 0.8934757113456726, "learning_rate": 4.2152943728379185e-06, "loss": 0.618, "step": 8897 }, { "epoch": 0.56, "grad_norm": 0.9089536070823669, "learning_rate": 4.214281109838357e-06, "loss": 0.545, "step": 8898 }, { "epoch": 0.56, "grad_norm": 0.8987053036689758, "learning_rate": 4.213267879922685e-06, "loss": 0.6345, "step": 8899 }, { "epoch": 0.56, "grad_norm": 0.8274092078208923, "learning_rate": 4.212254683133565e-06, "loss": 0.5152, "step": 8900 }, { "epoch": 0.56, "grad_norm": 0.8774511814117432, "learning_rate": 4.2112415195136585e-06, "loss": 0.5542, "step": 8901 }, { "epoch": 0.56, "grad_norm": 0.9276379942893982, "learning_rate": 4.21022838910563e-06, "loss": 0.5648, "step": 8902 }, { "epoch": 0.56, "grad_norm": 0.8499544858932495, "learning_rate": 4.209215291952135e-06, "loss": 0.5782, "step": 8903 }, { "epoch": 0.56, "grad_norm": 0.8849813938140869, "learning_rate": 4.208202228095835e-06, "loss": 0.5964, "step": 8904 }, { "epoch": 0.56, "grad_norm": 0.9096781611442566, "learning_rate": 4.207189197579382e-06, "loss": 0.5476, "step": 8905 }, { "epoch": 0.56, "grad_norm": 0.7918185591697693, "learning_rate": 4.2061762004454365e-06, "loss": 0.5343, "step": 8906 }, { "epoch": 0.56, "grad_norm": 0.9129202365875244, "learning_rate": 4.2051632367366485e-06, "loss": 0.6456, "step": 8907 }, { "epoch": 0.56, "grad_norm": 0.8676325082778931, "learning_rate": 4.204150306495672e-06, "loss": 0.6489, "step": 8908 }, { "epoch": 0.56, "grad_norm": 0.8340794444084167, "learning_rate": 4.203137409765159e-06, "loss": 0.5279, "step": 8909 }, { "epoch": 0.56, "grad_norm": 0.8473523855209351, "learning_rate": 4.202124546587754e-06, "loss": 0.5894, "step": 8910 }, { "epoch": 0.56, "grad_norm": 0.8485411405563354, "learning_rate": 4.201111717006111e-06, "loss": 0.6015, "step": 8911 }, { "epoch": 0.56, "grad_norm": 1.0016659498214722, "learning_rate": 4.200098921062875e-06, "loss": 0.5994, "step": 8912 }, { "epoch": 0.56, "grad_norm": 0.8975883722305298, "learning_rate": 4.19908615880069e-06, "loss": 0.6251, "step": 8913 }, { "epoch": 0.56, "grad_norm": 0.9073837399482727, "learning_rate": 4.198073430262199e-06, "loss": 0.617, "step": 8914 }, { "epoch": 0.56, "grad_norm": 0.8681656122207642, "learning_rate": 4.197060735490048e-06, "loss": 0.5584, "step": 8915 }, { "epoch": 0.56, "grad_norm": 0.8720282912254333, "learning_rate": 4.196048074526876e-06, "loss": 0.6311, "step": 8916 }, { "epoch": 0.56, "grad_norm": 0.9659051299095154, "learning_rate": 4.195035447415324e-06, "loss": 0.625, "step": 8917 }, { "epoch": 0.57, "grad_norm": 0.8648727536201477, "learning_rate": 4.194022854198026e-06, "loss": 0.6112, "step": 8918 }, { "epoch": 0.57, "grad_norm": 0.8482996225357056, "learning_rate": 4.193010294917624e-06, "loss": 0.5382, "step": 8919 }, { "epoch": 0.57, "grad_norm": 0.877569317817688, "learning_rate": 4.1919977696167515e-06, "loss": 0.5412, "step": 8920 }, { "epoch": 0.57, "grad_norm": 0.8534306287765503, "learning_rate": 4.190985278338042e-06, "loss": 0.5792, "step": 8921 }, { "epoch": 0.57, "grad_norm": 0.9521181583404541, "learning_rate": 4.189972821124126e-06, "loss": 0.6252, "step": 8922 }, { "epoch": 0.57, "grad_norm": 0.8283462524414062, "learning_rate": 4.188960398017638e-06, "loss": 0.5668, "step": 8923 }, { "epoch": 0.57, "grad_norm": 0.8727411031723022, "learning_rate": 4.187948009061207e-06, "loss": 0.5511, "step": 8924 }, { "epoch": 0.57, "grad_norm": 0.8867582678794861, "learning_rate": 4.186935654297461e-06, "loss": 0.59, "step": 8925 }, { "epoch": 0.57, "grad_norm": 0.859950602054596, "learning_rate": 4.1859233337690245e-06, "loss": 0.5337, "step": 8926 }, { "epoch": 0.57, "grad_norm": 0.9106714129447937, "learning_rate": 4.1849110475185225e-06, "loss": 0.5833, "step": 8927 }, { "epoch": 0.57, "grad_norm": 0.9669057130813599, "learning_rate": 4.183898795588584e-06, "loss": 0.5751, "step": 8928 }, { "epoch": 0.57, "grad_norm": 0.8599669933319092, "learning_rate": 4.1828865780218285e-06, "loss": 0.5511, "step": 8929 }, { "epoch": 0.57, "grad_norm": 0.8547632694244385, "learning_rate": 4.181874394860875e-06, "loss": 0.5715, "step": 8930 }, { "epoch": 0.57, "grad_norm": 0.8837994337081909, "learning_rate": 4.180862246148344e-06, "loss": 0.5981, "step": 8931 }, { "epoch": 0.57, "grad_norm": 0.8469040989875793, "learning_rate": 4.1798501319268565e-06, "loss": 0.5338, "step": 8932 }, { "epoch": 0.57, "grad_norm": 0.9077805876731873, "learning_rate": 4.178838052239027e-06, "loss": 0.5999, "step": 8933 }, { "epoch": 0.57, "grad_norm": 0.8736510276794434, "learning_rate": 4.177826007127468e-06, "loss": 0.561, "step": 8934 }, { "epoch": 0.57, "grad_norm": 0.8860734701156616, "learning_rate": 4.176813996634796e-06, "loss": 0.5776, "step": 8935 }, { "epoch": 0.57, "grad_norm": 0.8876895904541016, "learning_rate": 4.175802020803624e-06, "loss": 0.5732, "step": 8936 }, { "epoch": 0.57, "grad_norm": 0.9345043301582336, "learning_rate": 4.174790079676563e-06, "loss": 0.5576, "step": 8937 }, { "epoch": 0.57, "grad_norm": 0.8652613162994385, "learning_rate": 4.173778173296219e-06, "loss": 0.5698, "step": 8938 }, { "epoch": 0.57, "grad_norm": 0.990037739276886, "learning_rate": 4.172766301705202e-06, "loss": 0.6013, "step": 8939 }, { "epoch": 0.57, "grad_norm": 0.9194901585578918, "learning_rate": 4.171754464946119e-06, "loss": 0.6357, "step": 8940 }, { "epoch": 0.57, "grad_norm": 0.8597732782363892, "learning_rate": 4.170742663061575e-06, "loss": 0.5582, "step": 8941 }, { "epoch": 0.57, "grad_norm": 0.9255541563034058, "learning_rate": 4.169730896094172e-06, "loss": 0.5464, "step": 8942 }, { "epoch": 0.57, "grad_norm": 0.9251505732536316, "learning_rate": 4.1687191640865135e-06, "loss": 0.5523, "step": 8943 }, { "epoch": 0.57, "grad_norm": 0.8354572653770447, "learning_rate": 4.167707467081197e-06, "loss": 0.5892, "step": 8944 }, { "epoch": 0.57, "grad_norm": 0.9447482228279114, "learning_rate": 4.166695805120825e-06, "loss": 0.7007, "step": 8945 }, { "epoch": 0.57, "grad_norm": 0.8327589631080627, "learning_rate": 4.165684178247993e-06, "loss": 0.5409, "step": 8946 }, { "epoch": 0.57, "grad_norm": 0.9203556180000305, "learning_rate": 4.1646725865053005e-06, "loss": 0.6141, "step": 8947 }, { "epoch": 0.57, "grad_norm": 0.9170238375663757, "learning_rate": 4.163661029935336e-06, "loss": 0.545, "step": 8948 }, { "epoch": 0.57, "grad_norm": 0.8779581785202026, "learning_rate": 4.162649508580698e-06, "loss": 0.5778, "step": 8949 }, { "epoch": 0.57, "grad_norm": 0.8849088549613953, "learning_rate": 4.161638022483976e-06, "loss": 0.5548, "step": 8950 }, { "epoch": 0.57, "grad_norm": 0.8839111924171448, "learning_rate": 4.160626571687761e-06, "loss": 0.5711, "step": 8951 }, { "epoch": 0.57, "grad_norm": 0.8382406234741211, "learning_rate": 4.159615156234639e-06, "loss": 0.5591, "step": 8952 }, { "epoch": 0.57, "grad_norm": 0.8632530570030212, "learning_rate": 4.158603776167201e-06, "loss": 0.5473, "step": 8953 }, { "epoch": 0.57, "grad_norm": 0.8566288352012634, "learning_rate": 4.157592431528031e-06, "loss": 0.5317, "step": 8954 }, { "epoch": 0.57, "grad_norm": 0.8643941283226013, "learning_rate": 4.156581122359714e-06, "loss": 0.6105, "step": 8955 }, { "epoch": 0.57, "grad_norm": 0.8673588633537292, "learning_rate": 4.15556984870483e-06, "loss": 0.5787, "step": 8956 }, { "epoch": 0.57, "grad_norm": 0.8547856211662292, "learning_rate": 4.1545586106059636e-06, "loss": 0.5776, "step": 8957 }, { "epoch": 0.57, "grad_norm": 0.8713629841804504, "learning_rate": 4.153547408105691e-06, "loss": 0.605, "step": 8958 }, { "epoch": 0.57, "grad_norm": 0.8869353532791138, "learning_rate": 4.152536241246595e-06, "loss": 0.5782, "step": 8959 }, { "epoch": 0.57, "grad_norm": 0.9460669755935669, "learning_rate": 4.151525110071248e-06, "loss": 0.5934, "step": 8960 }, { "epoch": 0.57, "grad_norm": 0.8945161700248718, "learning_rate": 4.1505140146222276e-06, "loss": 0.6079, "step": 8961 }, { "epoch": 0.57, "grad_norm": 0.8501721024513245, "learning_rate": 4.149502954942107e-06, "loss": 0.58, "step": 8962 }, { "epoch": 0.57, "grad_norm": 0.8856709599494934, "learning_rate": 4.148491931073459e-06, "loss": 0.5962, "step": 8963 }, { "epoch": 0.57, "grad_norm": 0.8691068887710571, "learning_rate": 4.147480943058852e-06, "loss": 0.6176, "step": 8964 }, { "epoch": 0.57, "grad_norm": 0.9234523177146912, "learning_rate": 4.146469990940858e-06, "loss": 0.5897, "step": 8965 }, { "epoch": 0.57, "grad_norm": 0.8816432356834412, "learning_rate": 4.1454590747620424e-06, "loss": 0.6297, "step": 8966 }, { "epoch": 0.57, "grad_norm": 0.8956805467605591, "learning_rate": 4.144448194564973e-06, "loss": 0.5716, "step": 8967 }, { "epoch": 0.57, "grad_norm": 0.9496785998344421, "learning_rate": 4.1434373503922145e-06, "loss": 0.5964, "step": 8968 }, { "epoch": 0.57, "grad_norm": 0.8747205138206482, "learning_rate": 4.142426542286329e-06, "loss": 0.6098, "step": 8969 }, { "epoch": 0.57, "grad_norm": 0.9270417094230652, "learning_rate": 4.141415770289877e-06, "loss": 0.5876, "step": 8970 }, { "epoch": 0.57, "grad_norm": 0.8411609530448914, "learning_rate": 4.140405034445423e-06, "loss": 0.5882, "step": 8971 }, { "epoch": 0.57, "grad_norm": 0.926416277885437, "learning_rate": 4.13939433479552e-06, "loss": 0.5833, "step": 8972 }, { "epoch": 0.57, "grad_norm": 0.8865971565246582, "learning_rate": 4.13838367138273e-06, "loss": 0.5632, "step": 8973 }, { "epoch": 0.57, "grad_norm": 0.9375487565994263, "learning_rate": 4.137373044249604e-06, "loss": 0.5751, "step": 8974 }, { "epoch": 0.57, "grad_norm": 0.9372237324714661, "learning_rate": 4.1363624534387e-06, "loss": 0.5975, "step": 8975 }, { "epoch": 0.57, "grad_norm": 0.8482964634895325, "learning_rate": 4.135351898992568e-06, "loss": 0.5627, "step": 8976 }, { "epoch": 0.57, "grad_norm": 0.9388363361358643, "learning_rate": 4.134341380953761e-06, "loss": 0.5835, "step": 8977 }, { "epoch": 0.57, "grad_norm": 0.8574067950248718, "learning_rate": 4.133330899364824e-06, "loss": 0.5772, "step": 8978 }, { "epoch": 0.57, "grad_norm": 0.907927393913269, "learning_rate": 4.1323204542683105e-06, "loss": 0.6602, "step": 8979 }, { "epoch": 0.57, "grad_norm": 0.926572322845459, "learning_rate": 4.131310045706763e-06, "loss": 0.6016, "step": 8980 }, { "epoch": 0.57, "grad_norm": 0.9103202223777771, "learning_rate": 4.130299673722729e-06, "loss": 0.6473, "step": 8981 }, { "epoch": 0.57, "grad_norm": 0.9199764728546143, "learning_rate": 4.129289338358748e-06, "loss": 0.5697, "step": 8982 }, { "epoch": 0.57, "grad_norm": 0.904728889465332, "learning_rate": 4.128279039657366e-06, "loss": 0.627, "step": 8983 }, { "epoch": 0.57, "grad_norm": 0.8930543065071106, "learning_rate": 4.127268777661119e-06, "loss": 0.6144, "step": 8984 }, { "epoch": 0.57, "grad_norm": 0.8956807255744934, "learning_rate": 4.126258552412551e-06, "loss": 0.5895, "step": 8985 }, { "epoch": 0.57, "grad_norm": 0.8710659742355347, "learning_rate": 4.125248363954192e-06, "loss": 0.5893, "step": 8986 }, { "epoch": 0.57, "grad_norm": 0.852942943572998, "learning_rate": 4.124238212328585e-06, "loss": 0.5658, "step": 8987 }, { "epoch": 0.57, "grad_norm": 0.9231775403022766, "learning_rate": 4.123228097578258e-06, "loss": 0.6293, "step": 8988 }, { "epoch": 0.57, "grad_norm": 0.9329462647438049, "learning_rate": 4.122218019745748e-06, "loss": 0.6025, "step": 8989 }, { "epoch": 0.57, "grad_norm": 0.9070497751235962, "learning_rate": 4.121207978873582e-06, "loss": 0.537, "step": 8990 }, { "epoch": 0.57, "grad_norm": 0.8512255549430847, "learning_rate": 4.12019797500429e-06, "loss": 0.5603, "step": 8991 }, { "epoch": 0.57, "grad_norm": 0.9133707880973816, "learning_rate": 4.119188008180401e-06, "loss": 0.5676, "step": 8992 }, { "epoch": 0.57, "grad_norm": 0.9240803122520447, "learning_rate": 4.118178078444442e-06, "loss": 0.6035, "step": 8993 }, { "epoch": 0.57, "grad_norm": 0.951643705368042, "learning_rate": 4.117168185838936e-06, "loss": 0.5913, "step": 8994 }, { "epoch": 0.57, "grad_norm": 0.8662564754486084, "learning_rate": 4.1161583304064055e-06, "loss": 0.5592, "step": 8995 }, { "epoch": 0.57, "grad_norm": 0.8806678056716919, "learning_rate": 4.115148512189374e-06, "loss": 0.621, "step": 8996 }, { "epoch": 0.57, "grad_norm": 0.9231857657432556, "learning_rate": 4.114138731230362e-06, "loss": 0.6269, "step": 8997 }, { "epoch": 0.57, "grad_norm": 0.8965012431144714, "learning_rate": 4.113128987571885e-06, "loss": 0.6336, "step": 8998 }, { "epoch": 0.57, "grad_norm": 0.8867535591125488, "learning_rate": 4.1121192812564595e-06, "loss": 0.5398, "step": 8999 }, { "epoch": 0.57, "grad_norm": 0.9486203789710999, "learning_rate": 4.111109612326603e-06, "loss": 0.6183, "step": 9000 }, { "epoch": 0.57, "grad_norm": 0.9139353632926941, "learning_rate": 4.110099980824831e-06, "loss": 0.5937, "step": 9001 }, { "epoch": 0.57, "grad_norm": 0.8802381753921509, "learning_rate": 4.109090386793652e-06, "loss": 0.56, "step": 9002 }, { "epoch": 0.57, "grad_norm": 0.880913496017456, "learning_rate": 4.108080830275576e-06, "loss": 0.6136, "step": 9003 }, { "epoch": 0.57, "grad_norm": 0.9269407987594604, "learning_rate": 4.107071311313113e-06, "loss": 0.5653, "step": 9004 }, { "epoch": 0.57, "grad_norm": 0.9228689670562744, "learning_rate": 4.106061829948773e-06, "loss": 0.6108, "step": 9005 }, { "epoch": 0.57, "grad_norm": 0.921231746673584, "learning_rate": 4.10505238622506e-06, "loss": 0.5842, "step": 9006 }, { "epoch": 0.57, "grad_norm": 0.8868432641029358, "learning_rate": 4.104042980184476e-06, "loss": 0.6736, "step": 9007 }, { "epoch": 0.57, "grad_norm": 0.8439784646034241, "learning_rate": 4.103033611869525e-06, "loss": 0.5566, "step": 9008 }, { "epoch": 0.57, "grad_norm": 0.8885878920555115, "learning_rate": 4.1020242813227096e-06, "loss": 0.5977, "step": 9009 }, { "epoch": 0.57, "grad_norm": 0.8744617700576782, "learning_rate": 4.101014988586528e-06, "loss": 0.5894, "step": 9010 }, { "epoch": 0.57, "grad_norm": 0.8461993932723999, "learning_rate": 4.100005733703477e-06, "loss": 0.5497, "step": 9011 }, { "epoch": 0.57, "grad_norm": 0.8714662790298462, "learning_rate": 4.0989965167160526e-06, "loss": 0.5959, "step": 9012 }, { "epoch": 0.57, "grad_norm": 0.883986234664917, "learning_rate": 4.097987337666753e-06, "loss": 0.5854, "step": 9013 }, { "epoch": 0.57, "grad_norm": 0.8724504113197327, "learning_rate": 4.096978196598068e-06, "loss": 0.5916, "step": 9014 }, { "epoch": 0.57, "grad_norm": 0.9002840518951416, "learning_rate": 4.09596909355249e-06, "loss": 0.5975, "step": 9015 }, { "epoch": 0.57, "grad_norm": 0.8268336057662964, "learning_rate": 4.094960028572506e-06, "loss": 0.5894, "step": 9016 }, { "epoch": 0.57, "grad_norm": 0.8918128609657288, "learning_rate": 4.0939510017006095e-06, "loss": 0.5321, "step": 9017 }, { "epoch": 0.57, "grad_norm": 0.8402930498123169, "learning_rate": 4.092942012979285e-06, "loss": 0.5594, "step": 9018 }, { "epoch": 0.57, "grad_norm": 0.9471001029014587, "learning_rate": 4.091933062451015e-06, "loss": 0.5805, "step": 9019 }, { "epoch": 0.57, "grad_norm": 0.9170734286308289, "learning_rate": 4.0909241501582865e-06, "loss": 0.6064, "step": 9020 }, { "epoch": 0.57, "grad_norm": 0.9813190698623657, "learning_rate": 4.089915276143577e-06, "loss": 0.6328, "step": 9021 }, { "epoch": 0.57, "grad_norm": 0.9079948663711548, "learning_rate": 4.088906440449371e-06, "loss": 0.5542, "step": 9022 }, { "epoch": 0.57, "grad_norm": 0.855984091758728, "learning_rate": 4.087897643118145e-06, "loss": 0.5719, "step": 9023 }, { "epoch": 0.57, "grad_norm": 0.8991562724113464, "learning_rate": 4.086888884192377e-06, "loss": 0.5942, "step": 9024 }, { "epoch": 0.57, "grad_norm": 0.9502757787704468, "learning_rate": 4.0858801637145395e-06, "loss": 0.5597, "step": 9025 }, { "epoch": 0.57, "grad_norm": 0.910291314125061, "learning_rate": 4.084871481727111e-06, "loss": 0.5413, "step": 9026 }, { "epoch": 0.57, "grad_norm": 0.8761973977088928, "learning_rate": 4.083862838272559e-06, "loss": 0.588, "step": 9027 }, { "epoch": 0.57, "grad_norm": 0.935142993927002, "learning_rate": 4.082854233393358e-06, "loss": 0.5596, "step": 9028 }, { "epoch": 0.57, "grad_norm": 0.9134296178817749, "learning_rate": 4.081845667131971e-06, "loss": 0.6049, "step": 9029 }, { "epoch": 0.57, "grad_norm": 0.9424194693565369, "learning_rate": 4.080837139530872e-06, "loss": 0.5892, "step": 9030 }, { "epoch": 0.57, "grad_norm": 0.8782743215560913, "learning_rate": 4.0798286506325225e-06, "loss": 0.5253, "step": 9031 }, { "epoch": 0.57, "grad_norm": 0.8680989742279053, "learning_rate": 4.078820200479389e-06, "loss": 0.588, "step": 9032 }, { "epoch": 0.57, "grad_norm": 0.905407726764679, "learning_rate": 4.077811789113929e-06, "loss": 0.6169, "step": 9033 }, { "epoch": 0.57, "grad_norm": 0.9795319437980652, "learning_rate": 4.076803416578608e-06, "loss": 0.627, "step": 9034 }, { "epoch": 0.57, "grad_norm": 0.8296229243278503, "learning_rate": 4.0757950829158855e-06, "loss": 0.5553, "step": 9035 }, { "epoch": 0.57, "grad_norm": 0.8955538272857666, "learning_rate": 4.074786788168216e-06, "loss": 0.6238, "step": 9036 }, { "epoch": 0.57, "grad_norm": 0.8968479037284851, "learning_rate": 4.073778532378056e-06, "loss": 0.619, "step": 9037 }, { "epoch": 0.57, "grad_norm": 0.9429267644882202, "learning_rate": 4.072770315587858e-06, "loss": 0.5866, "step": 9038 }, { "epoch": 0.57, "grad_norm": 0.8396599292755127, "learning_rate": 4.071762137840079e-06, "loss": 0.5358, "step": 9039 }, { "epoch": 0.57, "grad_norm": 0.8964661955833435, "learning_rate": 4.070753999177167e-06, "loss": 0.5945, "step": 9040 }, { "epoch": 0.57, "grad_norm": 0.8709607720375061, "learning_rate": 4.069745899641571e-06, "loss": 0.5417, "step": 9041 }, { "epoch": 0.57, "grad_norm": 0.9348841309547424, "learning_rate": 4.0687378392757374e-06, "loss": 0.6048, "step": 9042 }, { "epoch": 0.57, "grad_norm": 0.8811603784561157, "learning_rate": 4.0677298181221155e-06, "loss": 0.5896, "step": 9043 }, { "epoch": 0.57, "grad_norm": 0.8758918046951294, "learning_rate": 4.066721836223149e-06, "loss": 0.5902, "step": 9044 }, { "epoch": 0.57, "grad_norm": 0.9369110465049744, "learning_rate": 4.065713893621278e-06, "loss": 0.6596, "step": 9045 }, { "epoch": 0.57, "grad_norm": 0.8938471674919128, "learning_rate": 4.064705990358943e-06, "loss": 0.5437, "step": 9046 }, { "epoch": 0.57, "grad_norm": 0.8944480419158936, "learning_rate": 4.063698126478587e-06, "loss": 0.5737, "step": 9047 }, { "epoch": 0.57, "grad_norm": 0.9232917428016663, "learning_rate": 4.062690302022647e-06, "loss": 0.5618, "step": 9048 }, { "epoch": 0.57, "grad_norm": 0.8441494107246399, "learning_rate": 4.0616825170335565e-06, "loss": 0.5753, "step": 9049 }, { "epoch": 0.57, "grad_norm": 0.8715497255325317, "learning_rate": 4.060674771553751e-06, "loss": 0.5928, "step": 9050 }, { "epoch": 0.57, "grad_norm": 0.863179087638855, "learning_rate": 4.059667065625662e-06, "loss": 0.5379, "step": 9051 }, { "epoch": 0.57, "grad_norm": 0.8319960236549377, "learning_rate": 4.058659399291724e-06, "loss": 0.5393, "step": 9052 }, { "epoch": 0.57, "grad_norm": 0.9125126004219055, "learning_rate": 4.057651772594362e-06, "loss": 0.6624, "step": 9053 }, { "epoch": 0.57, "grad_norm": 0.8907890915870667, "learning_rate": 4.056644185576007e-06, "loss": 0.6439, "step": 9054 }, { "epoch": 0.57, "grad_norm": 0.8446599841117859, "learning_rate": 4.055636638279082e-06, "loss": 0.537, "step": 9055 }, { "epoch": 0.57, "grad_norm": 0.9353048801422119, "learning_rate": 4.054629130746015e-06, "loss": 0.6079, "step": 9056 }, { "epoch": 0.57, "grad_norm": 0.9255784749984741, "learning_rate": 4.053621663019225e-06, "loss": 0.5508, "step": 9057 }, { "epoch": 0.57, "grad_norm": 0.8582807183265686, "learning_rate": 4.052614235141136e-06, "loss": 0.5567, "step": 9058 }, { "epoch": 0.57, "grad_norm": 0.8390825390815735, "learning_rate": 4.051606847154164e-06, "loss": 0.5467, "step": 9059 }, { "epoch": 0.57, "grad_norm": 0.8816949129104614, "learning_rate": 4.05059949910073e-06, "loss": 0.6016, "step": 9060 }, { "epoch": 0.57, "grad_norm": 0.875059187412262, "learning_rate": 4.049592191023247e-06, "loss": 0.5172, "step": 9061 }, { "epoch": 0.57, "grad_norm": 0.9037113189697266, "learning_rate": 4.0485849229641325e-06, "loss": 0.5985, "step": 9062 }, { "epoch": 0.57, "grad_norm": 0.8470078110694885, "learning_rate": 4.047577694965794e-06, "loss": 0.5989, "step": 9063 }, { "epoch": 0.57, "grad_norm": 0.9105969071388245, "learning_rate": 4.046570507070649e-06, "loss": 0.6467, "step": 9064 }, { "epoch": 0.57, "grad_norm": 0.8537124395370483, "learning_rate": 4.045563359321102e-06, "loss": 0.553, "step": 9065 }, { "epoch": 0.57, "grad_norm": 0.917719304561615, "learning_rate": 4.044556251759562e-06, "loss": 0.6008, "step": 9066 }, { "epoch": 0.57, "grad_norm": 0.8611663579940796, "learning_rate": 4.043549184428434e-06, "loss": 0.582, "step": 9067 }, { "epoch": 0.57, "grad_norm": 0.9147241115570068, "learning_rate": 4.042542157370122e-06, "loss": 0.6259, "step": 9068 }, { "epoch": 0.57, "grad_norm": 0.9152358770370483, "learning_rate": 4.041535170627029e-06, "loss": 0.5821, "step": 9069 }, { "epoch": 0.57, "grad_norm": 0.8824336528778076, "learning_rate": 4.040528224241558e-06, "loss": 0.5594, "step": 9070 }, { "epoch": 0.57, "grad_norm": 0.805570662021637, "learning_rate": 4.039521318256104e-06, "loss": 0.4743, "step": 9071 }, { "epoch": 0.57, "grad_norm": 0.9145142436027527, "learning_rate": 4.038514452713065e-06, "loss": 0.569, "step": 9072 }, { "epoch": 0.57, "grad_norm": 0.8494529128074646, "learning_rate": 4.037507627654838e-06, "loss": 0.5572, "step": 9073 }, { "epoch": 0.57, "grad_norm": 0.897002637386322, "learning_rate": 4.0365008431238184e-06, "loss": 0.6247, "step": 9074 }, { "epoch": 0.57, "grad_norm": 0.9091158509254456, "learning_rate": 4.035494099162396e-06, "loss": 0.6193, "step": 9075 }, { "epoch": 0.58, "grad_norm": 0.8396137952804565, "learning_rate": 4.03448739581296e-06, "loss": 0.5578, "step": 9076 }, { "epoch": 0.58, "grad_norm": 0.8554840683937073, "learning_rate": 4.033480733117902e-06, "loss": 0.5752, "step": 9077 }, { "epoch": 0.58, "grad_norm": 0.8953068852424622, "learning_rate": 4.032474111119609e-06, "loss": 0.6019, "step": 9078 }, { "epoch": 0.58, "grad_norm": 0.9367779493331909, "learning_rate": 4.031467529860466e-06, "loss": 0.6226, "step": 9079 }, { "epoch": 0.58, "grad_norm": 0.8574473857879639, "learning_rate": 4.030460989382853e-06, "loss": 0.5676, "step": 9080 }, { "epoch": 0.58, "grad_norm": 0.8605020046234131, "learning_rate": 4.029454489729156e-06, "loss": 0.5517, "step": 9081 }, { "epoch": 0.58, "grad_norm": 0.890446126461029, "learning_rate": 4.028448030941756e-06, "loss": 0.5869, "step": 9082 }, { "epoch": 0.58, "grad_norm": 0.8330382704734802, "learning_rate": 4.027441613063029e-06, "loss": 0.582, "step": 9083 }, { "epoch": 0.58, "grad_norm": 0.8821123242378235, "learning_rate": 4.026435236135351e-06, "loss": 0.6191, "step": 9084 }, { "epoch": 0.58, "grad_norm": 0.9941993355751038, "learning_rate": 4.025428900201098e-06, "loss": 0.6336, "step": 9085 }, { "epoch": 0.58, "grad_norm": 0.9467496275901794, "learning_rate": 4.024422605302646e-06, "loss": 0.629, "step": 9086 }, { "epoch": 0.58, "grad_norm": 0.8554012179374695, "learning_rate": 4.023416351482364e-06, "loss": 0.533, "step": 9087 }, { "epoch": 0.58, "grad_norm": 0.8936252593994141, "learning_rate": 4.022410138782621e-06, "loss": 0.6261, "step": 9088 }, { "epoch": 0.58, "grad_norm": 0.8527365922927856, "learning_rate": 4.021403967245786e-06, "loss": 0.54, "step": 9089 }, { "epoch": 0.58, "grad_norm": 0.8804008364677429, "learning_rate": 4.020397836914227e-06, "loss": 0.5582, "step": 9090 }, { "epoch": 0.58, "grad_norm": 0.8681939840316772, "learning_rate": 4.019391747830307e-06, "loss": 0.567, "step": 9091 }, { "epoch": 0.58, "grad_norm": 0.9508828520774841, "learning_rate": 4.018385700036389e-06, "loss": 0.611, "step": 9092 }, { "epoch": 0.58, "grad_norm": 0.8974758982658386, "learning_rate": 4.017379693574833e-06, "loss": 0.5596, "step": 9093 }, { "epoch": 0.58, "grad_norm": 0.8851727247238159, "learning_rate": 4.016373728488002e-06, "loss": 0.6184, "step": 9094 }, { "epoch": 0.58, "grad_norm": 0.8081380724906921, "learning_rate": 4.01536780481825e-06, "loss": 0.5798, "step": 9095 }, { "epoch": 0.58, "grad_norm": 0.8617830276489258, "learning_rate": 4.014361922607936e-06, "loss": 0.6021, "step": 9096 }, { "epoch": 0.58, "grad_norm": 0.895240306854248, "learning_rate": 4.013356081899412e-06, "loss": 0.5756, "step": 9097 }, { "epoch": 0.58, "grad_norm": 0.8988040089607239, "learning_rate": 4.0123502827350295e-06, "loss": 0.5726, "step": 9098 }, { "epoch": 0.58, "grad_norm": 0.932150661945343, "learning_rate": 4.011344525157141e-06, "loss": 0.6415, "step": 9099 }, { "epoch": 0.58, "grad_norm": 0.9083916544914246, "learning_rate": 4.010338809208098e-06, "loss": 0.6164, "step": 9100 }, { "epoch": 0.58, "grad_norm": 0.8748500943183899, "learning_rate": 4.009333134930244e-06, "loss": 0.5558, "step": 9101 }, { "epoch": 0.58, "grad_norm": 0.905542254447937, "learning_rate": 4.0083275023659236e-06, "loss": 0.6417, "step": 9102 }, { "epoch": 0.58, "grad_norm": 0.8471083045005798, "learning_rate": 4.007321911557483e-06, "loss": 0.6012, "step": 9103 }, { "epoch": 0.58, "grad_norm": 0.8830850124359131, "learning_rate": 4.0063163625472645e-06, "loss": 0.5628, "step": 9104 }, { "epoch": 0.58, "grad_norm": 0.8656706213951111, "learning_rate": 4.005310855377608e-06, "loss": 0.6062, "step": 9105 }, { "epoch": 0.58, "grad_norm": 0.8246329426765442, "learning_rate": 4.004305390090848e-06, "loss": 0.5715, "step": 9106 }, { "epoch": 0.58, "grad_norm": 0.9124095439910889, "learning_rate": 4.003299966729325e-06, "loss": 0.5901, "step": 9107 }, { "epoch": 0.58, "grad_norm": 0.9204335808753967, "learning_rate": 4.002294585335375e-06, "loss": 0.5478, "step": 9108 }, { "epoch": 0.58, "grad_norm": 0.9215397834777832, "learning_rate": 4.001289245951329e-06, "loss": 0.5763, "step": 9109 }, { "epoch": 0.58, "grad_norm": 0.9767603278160095, "learning_rate": 4.000283948619517e-06, "loss": 0.5892, "step": 9110 }, { "epoch": 0.58, "grad_norm": 0.8919650316238403, "learning_rate": 3.99927869338227e-06, "loss": 0.551, "step": 9111 }, { "epoch": 0.58, "grad_norm": 0.9372937679290771, "learning_rate": 3.998273480281919e-06, "loss": 0.5895, "step": 9112 }, { "epoch": 0.58, "grad_norm": 1.0142635107040405, "learning_rate": 3.997268309360785e-06, "loss": 0.5965, "step": 9113 }, { "epoch": 0.58, "grad_norm": 0.8675452470779419, "learning_rate": 3.996263180661194e-06, "loss": 0.5961, "step": 9114 }, { "epoch": 0.58, "grad_norm": 0.847707211971283, "learning_rate": 3.995258094225468e-06, "loss": 0.5375, "step": 9115 }, { "epoch": 0.58, "grad_norm": 0.8482071161270142, "learning_rate": 3.99425305009593e-06, "loss": 0.5372, "step": 9116 }, { "epoch": 0.58, "grad_norm": 0.9109798669815063, "learning_rate": 3.993248048314897e-06, "loss": 0.5797, "step": 9117 }, { "epoch": 0.58, "grad_norm": 0.9148001670837402, "learning_rate": 3.992243088924686e-06, "loss": 0.575, "step": 9118 }, { "epoch": 0.58, "grad_norm": 0.8979611396789551, "learning_rate": 3.991238171967612e-06, "loss": 0.5755, "step": 9119 }, { "epoch": 0.58, "grad_norm": 0.9315516352653503, "learning_rate": 3.9902332974859906e-06, "loss": 0.6237, "step": 9120 }, { "epoch": 0.58, "grad_norm": 0.8877137899398804, "learning_rate": 3.989228465522133e-06, "loss": 0.6188, "step": 9121 }, { "epoch": 0.58, "grad_norm": 0.9431552886962891, "learning_rate": 3.9882236761183476e-06, "loss": 0.6138, "step": 9122 }, { "epoch": 0.58, "grad_norm": 0.877837061882019, "learning_rate": 3.987218929316942e-06, "loss": 0.6009, "step": 9123 }, { "epoch": 0.58, "grad_norm": 0.8428844809532166, "learning_rate": 3.986214225160226e-06, "loss": 0.565, "step": 9124 }, { "epoch": 0.58, "grad_norm": 0.9297831654548645, "learning_rate": 3.9852095636905026e-06, "loss": 0.6212, "step": 9125 }, { "epoch": 0.58, "grad_norm": 0.9488120079040527, "learning_rate": 3.984204944950073e-06, "loss": 0.63, "step": 9126 }, { "epoch": 0.58, "grad_norm": 0.9093875288963318, "learning_rate": 3.983200368981241e-06, "loss": 0.6243, "step": 9127 }, { "epoch": 0.58, "grad_norm": 0.8893300294876099, "learning_rate": 3.982195835826302e-06, "loss": 0.5697, "step": 9128 }, { "epoch": 0.58, "grad_norm": 0.8863883018493652, "learning_rate": 3.981191345527558e-06, "loss": 0.6158, "step": 9129 }, { "epoch": 0.58, "grad_norm": 0.98292475938797, "learning_rate": 3.9801868981273e-06, "loss": 0.6121, "step": 9130 }, { "epoch": 0.58, "grad_norm": 0.8693172335624695, "learning_rate": 3.979182493667826e-06, "loss": 0.5718, "step": 9131 }, { "epoch": 0.58, "grad_norm": 0.9281517863273621, "learning_rate": 3.978178132191424e-06, "loss": 0.5888, "step": 9132 }, { "epoch": 0.58, "grad_norm": 0.9215491414070129, "learning_rate": 3.9771738137403885e-06, "loss": 0.5927, "step": 9133 }, { "epoch": 0.58, "grad_norm": 0.9008755683898926, "learning_rate": 3.976169538357004e-06, "loss": 0.5982, "step": 9134 }, { "epoch": 0.58, "grad_norm": 0.9302978515625, "learning_rate": 3.97516530608356e-06, "loss": 0.6214, "step": 9135 }, { "epoch": 0.58, "grad_norm": 0.8978670239448547, "learning_rate": 3.974161116962337e-06, "loss": 0.6003, "step": 9136 }, { "epoch": 0.58, "grad_norm": 0.851223886013031, "learning_rate": 3.973156971035623e-06, "loss": 0.5936, "step": 9137 }, { "epoch": 0.58, "grad_norm": 0.8626120686531067, "learning_rate": 3.9721528683456966e-06, "loss": 0.5732, "step": 9138 }, { "epoch": 0.58, "grad_norm": 0.9312442541122437, "learning_rate": 3.971148808934838e-06, "loss": 0.6243, "step": 9139 }, { "epoch": 0.58, "grad_norm": 0.8899321556091309, "learning_rate": 3.970144792845322e-06, "loss": 0.5334, "step": 9140 }, { "epoch": 0.58, "grad_norm": 0.8440714478492737, "learning_rate": 3.9691408201194275e-06, "loss": 0.6048, "step": 9141 }, { "epoch": 0.58, "grad_norm": 0.8218972086906433, "learning_rate": 3.968136890799426e-06, "loss": 0.5506, "step": 9142 }, { "epoch": 0.58, "grad_norm": 0.8766604661941528, "learning_rate": 3.967133004927592e-06, "loss": 0.5832, "step": 9143 }, { "epoch": 0.58, "grad_norm": 0.9405858516693115, "learning_rate": 3.9661291625461945e-06, "loss": 0.6241, "step": 9144 }, { "epoch": 0.58, "grad_norm": 0.8780211806297302, "learning_rate": 3.965125363697499e-06, "loss": 0.599, "step": 9145 }, { "epoch": 0.58, "grad_norm": 0.8795492649078369, "learning_rate": 3.964121608423775e-06, "loss": 0.6152, "step": 9146 }, { "epoch": 0.58, "grad_norm": 0.8778110146522522, "learning_rate": 3.963117896767288e-06, "loss": 0.6185, "step": 9147 }, { "epoch": 0.58, "grad_norm": 0.8248224258422852, "learning_rate": 3.962114228770299e-06, "loss": 0.5828, "step": 9148 }, { "epoch": 0.58, "grad_norm": 0.8475858569145203, "learning_rate": 3.961110604475067e-06, "loss": 0.5455, "step": 9149 }, { "epoch": 0.58, "grad_norm": 0.8509166240692139, "learning_rate": 3.960107023923855e-06, "loss": 0.6004, "step": 9150 }, { "epoch": 0.58, "grad_norm": 0.8684119582176208, "learning_rate": 3.959103487158919e-06, "loss": 0.5847, "step": 9151 }, { "epoch": 0.58, "grad_norm": 0.799341082572937, "learning_rate": 3.958099994222515e-06, "loss": 0.5715, "step": 9152 }, { "epoch": 0.58, "grad_norm": 0.8934925198554993, "learning_rate": 3.957096545156893e-06, "loss": 0.6094, "step": 9153 }, { "epoch": 0.58, "grad_norm": 0.9568246603012085, "learning_rate": 3.956093140004308e-06, "loss": 0.5888, "step": 9154 }, { "epoch": 0.58, "grad_norm": 0.9072986245155334, "learning_rate": 3.955089778807012e-06, "loss": 0.597, "step": 9155 }, { "epoch": 0.58, "grad_norm": 0.8377887010574341, "learning_rate": 3.954086461607248e-06, "loss": 0.5394, "step": 9156 }, { "epoch": 0.58, "grad_norm": 0.8361679315567017, "learning_rate": 3.9530831884472655e-06, "loss": 0.5454, "step": 9157 }, { "epoch": 0.58, "grad_norm": 0.9167816638946533, "learning_rate": 3.952079959369308e-06, "loss": 0.5748, "step": 9158 }, { "epoch": 0.58, "grad_norm": 1.0050288438796997, "learning_rate": 3.951076774415619e-06, "loss": 0.603, "step": 9159 }, { "epoch": 0.58, "grad_norm": 0.8782682418823242, "learning_rate": 3.950073633628436e-06, "loss": 0.6133, "step": 9160 }, { "epoch": 0.58, "grad_norm": 0.8905148506164551, "learning_rate": 3.949070537050002e-06, "loss": 0.6327, "step": 9161 }, { "epoch": 0.58, "grad_norm": 0.9059675335884094, "learning_rate": 3.948067484722549e-06, "loss": 0.638, "step": 9162 }, { "epoch": 0.58, "grad_norm": 0.8948028087615967, "learning_rate": 3.947064476688318e-06, "loss": 0.5723, "step": 9163 }, { "epoch": 0.58, "grad_norm": 0.9257702231407166, "learning_rate": 3.946061512989537e-06, "loss": 0.6301, "step": 9164 }, { "epoch": 0.58, "grad_norm": 0.8525533676147461, "learning_rate": 3.94505859366844e-06, "loss": 0.5937, "step": 9165 }, { "epoch": 0.58, "grad_norm": 0.8956154584884644, "learning_rate": 3.944055718767255e-06, "loss": 0.5908, "step": 9166 }, { "epoch": 0.58, "grad_norm": 0.8894206881523132, "learning_rate": 3.943052888328211e-06, "loss": 0.5536, "step": 9167 }, { "epoch": 0.58, "grad_norm": 0.8910138010978699, "learning_rate": 3.942050102393533e-06, "loss": 0.6453, "step": 9168 }, { "epoch": 0.58, "grad_norm": 0.8522058129310608, "learning_rate": 3.941047361005445e-06, "loss": 0.5562, "step": 9169 }, { "epoch": 0.58, "grad_norm": 0.9396253228187561, "learning_rate": 3.940044664206168e-06, "loss": 0.6234, "step": 9170 }, { "epoch": 0.58, "grad_norm": 0.914997398853302, "learning_rate": 3.939042012037924e-06, "loss": 0.5733, "step": 9171 }, { "epoch": 0.58, "grad_norm": 0.8962453603744507, "learning_rate": 3.938039404542929e-06, "loss": 0.5559, "step": 9172 }, { "epoch": 0.58, "grad_norm": 0.9689484238624573, "learning_rate": 3.937036841763401e-06, "loss": 0.5797, "step": 9173 }, { "epoch": 0.58, "grad_norm": 0.8645898103713989, "learning_rate": 3.936034323741555e-06, "loss": 0.5752, "step": 9174 }, { "epoch": 0.58, "grad_norm": 0.9438555240631104, "learning_rate": 3.935031850519599e-06, "loss": 0.5716, "step": 9175 }, { "epoch": 0.58, "grad_norm": 1.1834338903427124, "learning_rate": 3.934029422139749e-06, "loss": 0.5701, "step": 9176 }, { "epoch": 0.58, "grad_norm": 0.954289972782135, "learning_rate": 3.933027038644213e-06, "loss": 0.5488, "step": 9177 }, { "epoch": 0.58, "grad_norm": 0.9095122218132019, "learning_rate": 3.932024700075196e-06, "loss": 0.5646, "step": 9178 }, { "epoch": 0.58, "grad_norm": 0.8213743567466736, "learning_rate": 3.931022406474902e-06, "loss": 0.5665, "step": 9179 }, { "epoch": 0.58, "grad_norm": 0.9303811192512512, "learning_rate": 3.930020157885537e-06, "loss": 0.5638, "step": 9180 }, { "epoch": 0.58, "grad_norm": 0.8715723156929016, "learning_rate": 3.929017954349301e-06, "loss": 0.5983, "step": 9181 }, { "epoch": 0.58, "grad_norm": 0.9098017811775208, "learning_rate": 3.928015795908394e-06, "loss": 0.566, "step": 9182 }, { "epoch": 0.58, "grad_norm": 0.8678735494613647, "learning_rate": 3.927013682605011e-06, "loss": 0.5558, "step": 9183 }, { "epoch": 0.58, "grad_norm": 0.870380699634552, "learning_rate": 3.9260116144813495e-06, "loss": 0.6038, "step": 9184 }, { "epoch": 0.58, "grad_norm": 0.8809983730316162, "learning_rate": 3.925009591579604e-06, "loss": 0.5607, "step": 9185 }, { "epoch": 0.58, "grad_norm": 0.8992043137550354, "learning_rate": 3.9240076139419655e-06, "loss": 0.5617, "step": 9186 }, { "epoch": 0.58, "grad_norm": 0.884871244430542, "learning_rate": 3.92300568161062e-06, "loss": 0.5958, "step": 9187 }, { "epoch": 0.58, "grad_norm": 0.9241304993629456, "learning_rate": 3.9220037946277606e-06, "loss": 0.5833, "step": 9188 }, { "epoch": 0.58, "grad_norm": 0.837876558303833, "learning_rate": 3.921001953035573e-06, "loss": 0.5648, "step": 9189 }, { "epoch": 0.58, "grad_norm": 0.8822311162948608, "learning_rate": 3.920000156876238e-06, "loss": 0.5887, "step": 9190 }, { "epoch": 0.58, "grad_norm": 0.8584680557250977, "learning_rate": 3.91899840619194e-06, "loss": 0.5667, "step": 9191 }, { "epoch": 0.58, "grad_norm": 0.9705455303192139, "learning_rate": 3.9179967010248556e-06, "loss": 0.6367, "step": 9192 }, { "epoch": 0.58, "grad_norm": 0.8703861236572266, "learning_rate": 3.91699504141717e-06, "loss": 0.5526, "step": 9193 }, { "epoch": 0.58, "grad_norm": 0.8837298154830933, "learning_rate": 3.915993427411054e-06, "loss": 0.6264, "step": 9194 }, { "epoch": 0.58, "grad_norm": 0.9295274615287781, "learning_rate": 3.914991859048684e-06, "loss": 0.5771, "step": 9195 }, { "epoch": 0.58, "grad_norm": 0.8776589035987854, "learning_rate": 3.913990336372231e-06, "loss": 0.562, "step": 9196 }, { "epoch": 0.58, "grad_norm": 0.9219998717308044, "learning_rate": 3.912988859423869e-06, "loss": 0.5872, "step": 9197 }, { "epoch": 0.58, "grad_norm": 0.9244682788848877, "learning_rate": 3.911987428245765e-06, "loss": 0.5853, "step": 9198 }, { "epoch": 0.58, "grad_norm": 0.908510148525238, "learning_rate": 3.9109860428800845e-06, "loss": 0.5842, "step": 9199 }, { "epoch": 0.58, "grad_norm": 0.8179001212120056, "learning_rate": 3.909984703368992e-06, "loss": 0.5537, "step": 9200 }, { "epoch": 0.58, "grad_norm": 0.8670381903648376, "learning_rate": 3.9089834097546534e-06, "loss": 0.5738, "step": 9201 }, { "epoch": 0.58, "grad_norm": 0.9407733678817749, "learning_rate": 3.907982162079229e-06, "loss": 0.5843, "step": 9202 }, { "epoch": 0.58, "grad_norm": 0.8975993394851685, "learning_rate": 3.906980960384875e-06, "loss": 0.592, "step": 9203 }, { "epoch": 0.58, "grad_norm": 0.9089202880859375, "learning_rate": 3.90597980471375e-06, "loss": 0.5626, "step": 9204 }, { "epoch": 0.58, "grad_norm": 0.9043983221054077, "learning_rate": 3.904978695108011e-06, "loss": 0.6234, "step": 9205 }, { "epoch": 0.58, "grad_norm": 0.8751869201660156, "learning_rate": 3.9039776316098104e-06, "loss": 0.584, "step": 9206 }, { "epoch": 0.58, "grad_norm": 0.9038695693016052, "learning_rate": 3.902976614261298e-06, "loss": 0.5378, "step": 9207 }, { "epoch": 0.58, "grad_norm": 0.920074462890625, "learning_rate": 3.901975643104625e-06, "loss": 0.5855, "step": 9208 }, { "epoch": 0.58, "grad_norm": 0.8642706871032715, "learning_rate": 3.9009747181819355e-06, "loss": 0.5913, "step": 9209 }, { "epoch": 0.58, "grad_norm": 0.9230958223342896, "learning_rate": 3.8999738395353795e-06, "loss": 0.6275, "step": 9210 }, { "epoch": 0.58, "grad_norm": 0.8580319285392761, "learning_rate": 3.898973007207097e-06, "loss": 0.5848, "step": 9211 }, { "epoch": 0.58, "grad_norm": 0.9039139747619629, "learning_rate": 3.897972221239233e-06, "loss": 0.6364, "step": 9212 }, { "epoch": 0.58, "grad_norm": 0.8444435596466064, "learning_rate": 3.896971481673923e-06, "loss": 0.5956, "step": 9213 }, { "epoch": 0.58, "grad_norm": 0.832820475101471, "learning_rate": 3.895970788553308e-06, "loss": 0.5442, "step": 9214 }, { "epoch": 0.58, "grad_norm": 0.9238991141319275, "learning_rate": 3.894970141919522e-06, "loss": 0.5961, "step": 9215 }, { "epoch": 0.58, "grad_norm": 0.8753307461738586, "learning_rate": 3.8939695418147e-06, "loss": 0.5827, "step": 9216 }, { "epoch": 0.58, "grad_norm": 0.8677202463150024, "learning_rate": 3.892968988280971e-06, "loss": 0.6043, "step": 9217 }, { "epoch": 0.58, "grad_norm": 0.8751778602600098, "learning_rate": 3.891968481360469e-06, "loss": 0.6046, "step": 9218 }, { "epoch": 0.58, "grad_norm": 0.8408955931663513, "learning_rate": 3.890968021095318e-06, "loss": 0.5691, "step": 9219 }, { "epoch": 0.58, "grad_norm": 0.8872222900390625, "learning_rate": 3.889967607527648e-06, "loss": 0.5943, "step": 9220 }, { "epoch": 0.58, "grad_norm": 0.8292039036750793, "learning_rate": 3.888967240699578e-06, "loss": 0.5651, "step": 9221 }, { "epoch": 0.58, "grad_norm": 0.8514560461044312, "learning_rate": 3.887966920653234e-06, "loss": 0.5667, "step": 9222 }, { "epoch": 0.58, "grad_norm": 0.8210045695304871, "learning_rate": 3.886966647430733e-06, "loss": 0.4967, "step": 9223 }, { "epoch": 0.58, "grad_norm": 0.9208805561065674, "learning_rate": 3.8859664210741965e-06, "loss": 0.537, "step": 9224 }, { "epoch": 0.58, "grad_norm": 1.0000219345092773, "learning_rate": 3.884966241625737e-06, "loss": 0.6296, "step": 9225 }, { "epoch": 0.58, "grad_norm": 0.9203490018844604, "learning_rate": 3.88396610912747e-06, "loss": 0.5744, "step": 9226 }, { "epoch": 0.58, "grad_norm": 0.8076592087745667, "learning_rate": 3.882966023621509e-06, "loss": 0.5488, "step": 9227 }, { "epoch": 0.58, "grad_norm": 0.8537278771400452, "learning_rate": 3.881965985149962e-06, "loss": 0.5721, "step": 9228 }, { "epoch": 0.58, "grad_norm": 0.9415518641471863, "learning_rate": 3.880965993754939e-06, "loss": 0.6237, "step": 9229 }, { "epoch": 0.58, "grad_norm": 0.8622970581054688, "learning_rate": 3.879966049478544e-06, "loss": 0.5502, "step": 9230 }, { "epoch": 0.58, "grad_norm": 0.9145261645317078, "learning_rate": 3.878966152362882e-06, "loss": 0.5996, "step": 9231 }, { "epoch": 0.58, "grad_norm": 0.8953229784965515, "learning_rate": 3.877966302450057e-06, "loss": 0.5741, "step": 9232 }, { "epoch": 0.58, "grad_norm": 0.8525993824005127, "learning_rate": 3.876966499782168e-06, "loss": 0.5531, "step": 9233 }, { "epoch": 0.59, "grad_norm": 0.8899672627449036, "learning_rate": 3.875966744401311e-06, "loss": 0.6231, "step": 9234 }, { "epoch": 0.59, "grad_norm": 0.9033567905426025, "learning_rate": 3.874967036349585e-06, "loss": 0.5987, "step": 9235 }, { "epoch": 0.59, "grad_norm": 0.8257200717926025, "learning_rate": 3.8739673756690845e-06, "loss": 0.5333, "step": 9236 }, { "epoch": 0.59, "grad_norm": 0.9355111718177795, "learning_rate": 3.872967762401899e-06, "loss": 0.5712, "step": 9237 }, { "epoch": 0.59, "grad_norm": 0.9281190037727356, "learning_rate": 3.8719681965901225e-06, "loss": 0.6097, "step": 9238 }, { "epoch": 0.59, "grad_norm": 0.8676934242248535, "learning_rate": 3.870968678275838e-06, "loss": 0.6332, "step": 9239 }, { "epoch": 0.59, "grad_norm": 0.8591299057006836, "learning_rate": 3.869969207501138e-06, "loss": 0.616, "step": 9240 }, { "epoch": 0.59, "grad_norm": 0.9023558497428894, "learning_rate": 3.868969784308101e-06, "loss": 0.6094, "step": 9241 }, { "epoch": 0.59, "grad_norm": 0.8794646859169006, "learning_rate": 3.867970408738814e-06, "loss": 0.566, "step": 9242 }, { "epoch": 0.59, "grad_norm": 0.8649892210960388, "learning_rate": 3.866971080835352e-06, "loss": 0.5934, "step": 9243 }, { "epoch": 0.59, "grad_norm": 0.9679709672927856, "learning_rate": 3.8659718006398e-06, "loss": 0.6249, "step": 9244 }, { "epoch": 0.59, "grad_norm": 0.8926246166229248, "learning_rate": 3.864972568194227e-06, "loss": 0.5787, "step": 9245 }, { "epoch": 0.59, "grad_norm": 0.8640733957290649, "learning_rate": 3.863973383540714e-06, "loss": 0.6095, "step": 9246 }, { "epoch": 0.59, "grad_norm": 0.8310282230377197, "learning_rate": 3.8629742467213266e-06, "loss": 0.5379, "step": 9247 }, { "epoch": 0.59, "grad_norm": 0.8928051590919495, "learning_rate": 3.86197515777814e-06, "loss": 0.538, "step": 9248 }, { "epoch": 0.59, "grad_norm": 0.9271872639656067, "learning_rate": 3.860976116753221e-06, "loss": 0.5781, "step": 9249 }, { "epoch": 0.59, "grad_norm": 0.8660386204719543, "learning_rate": 3.859977123688636e-06, "loss": 0.6074, "step": 9250 }, { "epoch": 0.59, "grad_norm": 0.8669256567955017, "learning_rate": 3.858978178626446e-06, "loss": 0.59, "step": 9251 }, { "epoch": 0.59, "grad_norm": 0.8511551022529602, "learning_rate": 3.8579792816087175e-06, "loss": 0.5496, "step": 9252 }, { "epoch": 0.59, "grad_norm": 0.8670158386230469, "learning_rate": 3.856980432677508e-06, "loss": 0.5846, "step": 9253 }, { "epoch": 0.59, "grad_norm": 0.9106800556182861, "learning_rate": 3.855981631874877e-06, "loss": 0.6221, "step": 9254 }, { "epoch": 0.59, "grad_norm": 0.9327592253684998, "learning_rate": 3.85498287924288e-06, "loss": 0.6351, "step": 9255 }, { "epoch": 0.59, "grad_norm": 0.8513845801353455, "learning_rate": 3.853984174823568e-06, "loss": 0.5642, "step": 9256 }, { "epoch": 0.59, "grad_norm": 0.9598379135131836, "learning_rate": 3.852985518658997e-06, "loss": 0.5821, "step": 9257 }, { "epoch": 0.59, "grad_norm": 0.9493588209152222, "learning_rate": 3.851986910791217e-06, "loss": 0.575, "step": 9258 }, { "epoch": 0.59, "grad_norm": 0.8645276427268982, "learning_rate": 3.850988351262274e-06, "loss": 0.5513, "step": 9259 }, { "epoch": 0.59, "grad_norm": 0.8836858868598938, "learning_rate": 3.849989840114213e-06, "loss": 0.6298, "step": 9260 }, { "epoch": 0.59, "grad_norm": 0.8826265931129456, "learning_rate": 3.84899137738908e-06, "loss": 0.5903, "step": 9261 }, { "epoch": 0.59, "grad_norm": 0.8552426695823669, "learning_rate": 3.847992963128917e-06, "loss": 0.578, "step": 9262 }, { "epoch": 0.59, "grad_norm": 0.8744699954986572, "learning_rate": 3.846994597375763e-06, "loss": 0.5525, "step": 9263 }, { "epoch": 0.59, "grad_norm": 0.9690203070640564, "learning_rate": 3.845996280171653e-06, "loss": 0.633, "step": 9264 }, { "epoch": 0.59, "grad_norm": 0.9128517508506775, "learning_rate": 3.844998011558626e-06, "loss": 0.6275, "step": 9265 }, { "epoch": 0.59, "grad_norm": 0.8809550404548645, "learning_rate": 3.843999791578716e-06, "loss": 0.5585, "step": 9266 }, { "epoch": 0.59, "grad_norm": 0.8937491178512573, "learning_rate": 3.843001620273954e-06, "loss": 0.5942, "step": 9267 }, { "epoch": 0.59, "grad_norm": 0.8887850046157837, "learning_rate": 3.842003497686367e-06, "loss": 0.5823, "step": 9268 }, { "epoch": 0.59, "grad_norm": 0.8240920305252075, "learning_rate": 3.841005423857984e-06, "loss": 0.561, "step": 9269 }, { "epoch": 0.59, "grad_norm": 0.889115035533905, "learning_rate": 3.840007398830833e-06, "loss": 0.5673, "step": 9270 }, { "epoch": 0.59, "grad_norm": 0.9033503532409668, "learning_rate": 3.839009422646935e-06, "loss": 0.6556, "step": 9271 }, { "epoch": 0.59, "grad_norm": 0.8702270984649658, "learning_rate": 3.8380114953483095e-06, "loss": 0.6081, "step": 9272 }, { "epoch": 0.59, "grad_norm": 0.8264375329017639, "learning_rate": 3.837013616976977e-06, "loss": 0.5227, "step": 9273 }, { "epoch": 0.59, "grad_norm": 0.9013060927391052, "learning_rate": 3.8360157875749575e-06, "loss": 0.6422, "step": 9274 }, { "epoch": 0.59, "grad_norm": 0.8687025904655457, "learning_rate": 3.835018007184265e-06, "loss": 0.6144, "step": 9275 }, { "epoch": 0.59, "grad_norm": 0.8397945761680603, "learning_rate": 3.834020275846909e-06, "loss": 0.5611, "step": 9276 }, { "epoch": 0.59, "grad_norm": 0.8652381896972656, "learning_rate": 3.833022593604902e-06, "loss": 0.5984, "step": 9277 }, { "epoch": 0.59, "grad_norm": 0.8577977418899536, "learning_rate": 3.832024960500257e-06, "loss": 0.5211, "step": 9278 }, { "epoch": 0.59, "grad_norm": 0.9175687432289124, "learning_rate": 3.8310273765749774e-06, "loss": 0.6194, "step": 9279 }, { "epoch": 0.59, "grad_norm": 0.8669849038124084, "learning_rate": 3.830029841871067e-06, "loss": 0.5919, "step": 9280 }, { "epoch": 0.59, "grad_norm": 0.9313320517539978, "learning_rate": 3.82903235643053e-06, "loss": 0.6049, "step": 9281 }, { "epoch": 0.59, "grad_norm": 0.8776915669441223, "learning_rate": 3.828034920295368e-06, "loss": 0.5875, "step": 9282 }, { "epoch": 0.59, "grad_norm": 0.9610856175422668, "learning_rate": 3.827037533507579e-06, "loss": 0.5978, "step": 9283 }, { "epoch": 0.59, "grad_norm": 0.8849360942840576, "learning_rate": 3.826040196109158e-06, "loss": 0.5807, "step": 9284 }, { "epoch": 0.59, "grad_norm": 0.9191281795501709, "learning_rate": 3.825042908142102e-06, "loss": 0.5998, "step": 9285 }, { "epoch": 0.59, "grad_norm": 0.8342413306236267, "learning_rate": 3.824045669648398e-06, "loss": 0.5753, "step": 9286 }, { "epoch": 0.59, "grad_norm": 0.8650674819946289, "learning_rate": 3.823048480670044e-06, "loss": 0.5979, "step": 9287 }, { "epoch": 0.59, "grad_norm": 0.8660332560539246, "learning_rate": 3.8220513412490215e-06, "loss": 0.5876, "step": 9288 }, { "epoch": 0.59, "grad_norm": 0.9197229743003845, "learning_rate": 3.821054251427321e-06, "loss": 0.622, "step": 9289 }, { "epoch": 0.59, "grad_norm": 0.8805333375930786, "learning_rate": 3.820057211246923e-06, "loss": 0.5349, "step": 9290 }, { "epoch": 0.59, "grad_norm": 1.0064138174057007, "learning_rate": 3.819060220749813e-06, "loss": 0.6236, "step": 9291 }, { "epoch": 0.59, "grad_norm": 0.9283258318901062, "learning_rate": 3.8180632799779675e-06, "loss": 0.56, "step": 9292 }, { "epoch": 0.59, "grad_norm": 0.9393151998519897, "learning_rate": 3.817066388973367e-06, "loss": 0.5407, "step": 9293 }, { "epoch": 0.59, "grad_norm": 0.87945157289505, "learning_rate": 3.816069547777983e-06, "loss": 0.6007, "step": 9294 }, { "epoch": 0.59, "grad_norm": 0.9038872718811035, "learning_rate": 3.815072756433794e-06, "loss": 0.6051, "step": 9295 }, { "epoch": 0.59, "grad_norm": 0.8580070734024048, "learning_rate": 3.814076014982769e-06, "loss": 0.5844, "step": 9296 }, { "epoch": 0.59, "grad_norm": 0.9397634863853455, "learning_rate": 3.8130793234668782e-06, "loss": 0.6006, "step": 9297 }, { "epoch": 0.59, "grad_norm": 0.9336340427398682, "learning_rate": 3.812082681928086e-06, "loss": 0.6343, "step": 9298 }, { "epoch": 0.59, "grad_norm": 0.8567546606063843, "learning_rate": 3.81108609040836e-06, "loss": 0.5669, "step": 9299 }, { "epoch": 0.59, "grad_norm": 0.9026763439178467, "learning_rate": 3.810089548949665e-06, "loss": 0.6, "step": 9300 }, { "epoch": 0.59, "grad_norm": 0.8796485066413879, "learning_rate": 3.8090930575939588e-06, "loss": 0.56, "step": 9301 }, { "epoch": 0.59, "grad_norm": 0.9571773409843445, "learning_rate": 3.8080966163832e-06, "loss": 0.6081, "step": 9302 }, { "epoch": 0.59, "grad_norm": 0.8654407262802124, "learning_rate": 3.807100225359346e-06, "loss": 0.5752, "step": 9303 }, { "epoch": 0.59, "grad_norm": 0.942138135433197, "learning_rate": 3.8061038845643535e-06, "loss": 0.6286, "step": 9304 }, { "epoch": 0.59, "grad_norm": 0.9119827747344971, "learning_rate": 3.8051075940401727e-06, "loss": 0.5855, "step": 9305 }, { "epoch": 0.59, "grad_norm": 0.908536970615387, "learning_rate": 3.8041113538287537e-06, "loss": 0.6099, "step": 9306 }, { "epoch": 0.59, "grad_norm": 0.9727365970611572, "learning_rate": 3.803115163972044e-06, "loss": 0.5992, "step": 9307 }, { "epoch": 0.59, "grad_norm": 0.9480968713760376, "learning_rate": 3.8021190245119937e-06, "loss": 0.6685, "step": 9308 }, { "epoch": 0.59, "grad_norm": 0.9626975655555725, "learning_rate": 3.8011229354905445e-06, "loss": 0.633, "step": 9309 }, { "epoch": 0.59, "grad_norm": 0.9095605611801147, "learning_rate": 3.8001268969496357e-06, "loss": 0.5897, "step": 9310 }, { "epoch": 0.59, "grad_norm": 0.9230700135231018, "learning_rate": 3.799130908931209e-06, "loss": 0.6167, "step": 9311 }, { "epoch": 0.59, "grad_norm": 0.8941061496734619, "learning_rate": 3.7981349714772044e-06, "loss": 0.5678, "step": 9312 }, { "epoch": 0.59, "grad_norm": 0.9019367098808289, "learning_rate": 3.7971390846295546e-06, "loss": 0.6376, "step": 9313 }, { "epoch": 0.59, "grad_norm": 0.9550539255142212, "learning_rate": 3.7961432484301925e-06, "loss": 0.6213, "step": 9314 }, { "epoch": 0.59, "grad_norm": 0.8824061155319214, "learning_rate": 3.7951474629210517e-06, "loss": 0.5651, "step": 9315 }, { "epoch": 0.59, "grad_norm": 0.9145764708518982, "learning_rate": 3.7941517281440577e-06, "loss": 0.5492, "step": 9316 }, { "epoch": 0.59, "grad_norm": 0.8980282545089722, "learning_rate": 3.7931560441411413e-06, "loss": 0.5882, "step": 9317 }, { "epoch": 0.59, "grad_norm": 0.8780221343040466, "learning_rate": 3.792160410954225e-06, "loss": 0.5892, "step": 9318 }, { "epoch": 0.59, "grad_norm": 0.8798972368240356, "learning_rate": 3.791164828625233e-06, "loss": 0.5706, "step": 9319 }, { "epoch": 0.59, "grad_norm": 0.8012358546257019, "learning_rate": 3.7901692971960823e-06, "loss": 0.5133, "step": 9320 }, { "epoch": 0.59, "grad_norm": 0.9223332405090332, "learning_rate": 3.7891738167086968e-06, "loss": 0.5617, "step": 9321 }, { "epoch": 0.59, "grad_norm": 0.8617243766784668, "learning_rate": 3.7881783872049875e-06, "loss": 0.6052, "step": 9322 }, { "epoch": 0.59, "grad_norm": 0.8890591859817505, "learning_rate": 3.7871830087268726e-06, "loss": 0.5505, "step": 9323 }, { "epoch": 0.59, "grad_norm": 0.9087486267089844, "learning_rate": 3.7861876813162596e-06, "loss": 0.5906, "step": 9324 }, { "epoch": 0.59, "grad_norm": 0.9168681502342224, "learning_rate": 3.7851924050150633e-06, "loss": 0.6217, "step": 9325 }, { "epoch": 0.59, "grad_norm": 0.8641383051872253, "learning_rate": 3.7841971798651876e-06, "loss": 0.5495, "step": 9326 }, { "epoch": 0.59, "grad_norm": 0.9064701795578003, "learning_rate": 3.78320200590854e-06, "loss": 0.6339, "step": 9327 }, { "epoch": 0.59, "grad_norm": 0.844735324382782, "learning_rate": 3.782206883187021e-06, "loss": 0.6285, "step": 9328 }, { "epoch": 0.59, "grad_norm": 0.953070878982544, "learning_rate": 3.7812118117425363e-06, "loss": 0.5556, "step": 9329 }, { "epoch": 0.59, "grad_norm": 0.8404299020767212, "learning_rate": 3.7802167916169808e-06, "loss": 0.6154, "step": 9330 }, { "epoch": 0.59, "grad_norm": 0.8700167536735535, "learning_rate": 3.7792218228522536e-06, "loss": 0.6355, "step": 9331 }, { "epoch": 0.59, "grad_norm": 0.8953800797462463, "learning_rate": 3.7782269054902493e-06, "loss": 0.5983, "step": 9332 }, { "epoch": 0.59, "grad_norm": 0.8571730256080627, "learning_rate": 3.777232039572858e-06, "loss": 0.5895, "step": 9333 }, { "epoch": 0.59, "grad_norm": 0.9391055107116699, "learning_rate": 3.7762372251419722e-06, "loss": 0.64, "step": 9334 }, { "epoch": 0.59, "grad_norm": 0.8176417350769043, "learning_rate": 3.7752424622394807e-06, "loss": 0.5433, "step": 9335 }, { "epoch": 0.59, "grad_norm": 0.8655744194984436, "learning_rate": 3.7742477509072684e-06, "loss": 0.5325, "step": 9336 }, { "epoch": 0.59, "grad_norm": 0.9132146835327148, "learning_rate": 3.7732530911872177e-06, "loss": 0.5687, "step": 9337 }, { "epoch": 0.59, "grad_norm": 0.9163744449615479, "learning_rate": 3.7722584831212127e-06, "loss": 0.5955, "step": 9338 }, { "epoch": 0.59, "grad_norm": 0.9320352673530579, "learning_rate": 3.771263926751133e-06, "loss": 0.5999, "step": 9339 }, { "epoch": 0.59, "grad_norm": 0.8946950435638428, "learning_rate": 3.7702694221188548e-06, "loss": 0.599, "step": 9340 }, { "epoch": 0.59, "grad_norm": 0.8757476210594177, "learning_rate": 3.769274969266251e-06, "loss": 0.5752, "step": 9341 }, { "epoch": 0.59, "grad_norm": 0.9394705295562744, "learning_rate": 3.768280568235198e-06, "loss": 0.607, "step": 9342 }, { "epoch": 0.59, "grad_norm": 0.8652751445770264, "learning_rate": 3.767286219067566e-06, "loss": 0.5999, "step": 9343 }, { "epoch": 0.59, "grad_norm": 0.9048978686332703, "learning_rate": 3.766291921805224e-06, "loss": 0.5745, "step": 9344 }, { "epoch": 0.59, "grad_norm": 0.8632850050926208, "learning_rate": 3.765297676490035e-06, "loss": 0.5459, "step": 9345 }, { "epoch": 0.59, "grad_norm": 0.8732088804244995, "learning_rate": 3.764303483163867e-06, "loss": 0.6096, "step": 9346 }, { "epoch": 0.59, "grad_norm": 0.914079487323761, "learning_rate": 3.7633093418685806e-06, "loss": 0.5492, "step": 9347 }, { "epoch": 0.59, "grad_norm": 0.8865966200828552, "learning_rate": 3.7623152526460365e-06, "loss": 0.5839, "step": 9348 }, { "epoch": 0.59, "grad_norm": 0.9960510730743408, "learning_rate": 3.7613212155380907e-06, "loss": 0.6053, "step": 9349 }, { "epoch": 0.59, "grad_norm": 0.8940538763999939, "learning_rate": 3.760327230586598e-06, "loss": 0.56, "step": 9350 }, { "epoch": 0.59, "grad_norm": 0.9300123453140259, "learning_rate": 3.7593332978334153e-06, "loss": 0.5792, "step": 9351 }, { "epoch": 0.59, "grad_norm": 0.8757148385047913, "learning_rate": 3.7583394173203913e-06, "loss": 0.5871, "step": 9352 }, { "epoch": 0.59, "grad_norm": 0.9180594682693481, "learning_rate": 3.757345589089374e-06, "loss": 0.5897, "step": 9353 }, { "epoch": 0.59, "grad_norm": 0.9074809551239014, "learning_rate": 3.75635181318221e-06, "loss": 0.6202, "step": 9354 }, { "epoch": 0.59, "grad_norm": 0.9342314600944519, "learning_rate": 3.755358089640747e-06, "loss": 0.6069, "step": 9355 }, { "epoch": 0.59, "grad_norm": 0.9455806016921997, "learning_rate": 3.754364418506825e-06, "loss": 0.5676, "step": 9356 }, { "epoch": 0.59, "grad_norm": 0.8812072277069092, "learning_rate": 3.7533707998222835e-06, "loss": 0.6217, "step": 9357 }, { "epoch": 0.59, "grad_norm": 0.8865488767623901, "learning_rate": 3.7523772336289594e-06, "loss": 0.5901, "step": 9358 }, { "epoch": 0.59, "grad_norm": 0.8491330146789551, "learning_rate": 3.751383719968692e-06, "loss": 0.5475, "step": 9359 }, { "epoch": 0.59, "grad_norm": 0.8442015647888184, "learning_rate": 3.7503902588833124e-06, "loss": 0.5699, "step": 9360 }, { "epoch": 0.59, "grad_norm": 0.8833118081092834, "learning_rate": 3.7493968504146513e-06, "loss": 0.5281, "step": 9361 }, { "epoch": 0.59, "grad_norm": 0.9113878011703491, "learning_rate": 3.748403494604539e-06, "loss": 0.5694, "step": 9362 }, { "epoch": 0.59, "grad_norm": 0.8851996064186096, "learning_rate": 3.747410191494799e-06, "loss": 0.6256, "step": 9363 }, { "epoch": 0.59, "grad_norm": 0.9763572216033936, "learning_rate": 3.74641694112726e-06, "loss": 0.5549, "step": 9364 }, { "epoch": 0.59, "grad_norm": 0.900982141494751, "learning_rate": 3.745423743543744e-06, "loss": 0.5617, "step": 9365 }, { "epoch": 0.59, "grad_norm": 0.9159935116767883, "learning_rate": 3.7444305987860698e-06, "loss": 0.6144, "step": 9366 }, { "epoch": 0.59, "grad_norm": 0.949503481388092, "learning_rate": 3.7434375068960528e-06, "loss": 0.583, "step": 9367 }, { "epoch": 0.59, "grad_norm": 0.8828021287918091, "learning_rate": 3.7424444679155126e-06, "loss": 0.6385, "step": 9368 }, { "epoch": 0.59, "grad_norm": 0.8816289901733398, "learning_rate": 3.7414514818862613e-06, "loss": 0.6269, "step": 9369 }, { "epoch": 0.59, "grad_norm": 0.9142639636993408, "learning_rate": 3.7404585488501106e-06, "loss": 0.6134, "step": 9370 }, { "epoch": 0.59, "grad_norm": 0.8727523684501648, "learning_rate": 3.7394656688488663e-06, "loss": 0.5525, "step": 9371 }, { "epoch": 0.59, "grad_norm": 0.9459042549133301, "learning_rate": 3.7384728419243386e-06, "loss": 0.5931, "step": 9372 }, { "epoch": 0.59, "grad_norm": 0.916693389415741, "learning_rate": 3.7374800681183334e-06, "loss": 0.5991, "step": 9373 }, { "epoch": 0.59, "grad_norm": 0.9326224327087402, "learning_rate": 3.736487347472649e-06, "loss": 0.6123, "step": 9374 }, { "epoch": 0.59, "grad_norm": 0.962369978427887, "learning_rate": 3.735494680029086e-06, "loss": 0.6034, "step": 9375 }, { "epoch": 0.59, "grad_norm": 0.8627373576164246, "learning_rate": 3.734502065829443e-06, "loss": 0.5739, "step": 9376 }, { "epoch": 0.59, "grad_norm": 0.9000149369239807, "learning_rate": 3.7335095049155173e-06, "loss": 0.5426, "step": 9377 }, { "epoch": 0.59, "grad_norm": 0.9197530150413513, "learning_rate": 3.732516997329101e-06, "loss": 0.6075, "step": 9378 }, { "epoch": 0.59, "grad_norm": 0.8494321703910828, "learning_rate": 3.731524543111983e-06, "loss": 0.6488, "step": 9379 }, { "epoch": 0.59, "grad_norm": 0.9430056810379028, "learning_rate": 3.7305321423059526e-06, "loss": 0.5836, "step": 9380 }, { "epoch": 0.59, "grad_norm": 0.8478774428367615, "learning_rate": 3.7295397949528e-06, "loss": 0.565, "step": 9381 }, { "epoch": 0.59, "grad_norm": 0.9042559266090393, "learning_rate": 3.7285475010943067e-06, "loss": 0.5992, "step": 9382 }, { "epoch": 0.59, "grad_norm": 0.8787109851837158, "learning_rate": 3.7275552607722544e-06, "loss": 0.5903, "step": 9383 }, { "epoch": 0.59, "grad_norm": 0.9351126551628113, "learning_rate": 3.726563074028422e-06, "loss": 0.5989, "step": 9384 }, { "epoch": 0.59, "grad_norm": 0.8641285300254822, "learning_rate": 3.7255709409045914e-06, "loss": 0.5639, "step": 9385 }, { "epoch": 0.59, "grad_norm": 0.8513421416282654, "learning_rate": 3.724578861442535e-06, "loss": 0.5561, "step": 9386 }, { "epoch": 0.59, "grad_norm": 0.9056752324104309, "learning_rate": 3.7235868356840244e-06, "loss": 0.6007, "step": 9387 }, { "epoch": 0.59, "grad_norm": 0.8228696584701538, "learning_rate": 3.722594863670831e-06, "loss": 0.5458, "step": 9388 }, { "epoch": 0.59, "grad_norm": 0.8777575492858887, "learning_rate": 3.7216029454447262e-06, "loss": 0.5547, "step": 9389 }, { "epoch": 0.59, "grad_norm": 0.8967665433883667, "learning_rate": 3.720611081047474e-06, "loss": 0.5842, "step": 9390 }, { "epoch": 0.59, "grad_norm": 0.8031774759292603, "learning_rate": 3.7196192705208378e-06, "loss": 0.5461, "step": 9391 }, { "epoch": 0.6, "grad_norm": 0.9115647077560425, "learning_rate": 3.7186275139065807e-06, "loss": 0.5559, "step": 9392 }, { "epoch": 0.6, "grad_norm": 0.9450206756591797, "learning_rate": 3.7176358112464593e-06, "loss": 0.6226, "step": 9393 }, { "epoch": 0.6, "grad_norm": 0.866363525390625, "learning_rate": 3.716644162582235e-06, "loss": 0.5949, "step": 9394 }, { "epoch": 0.6, "grad_norm": 0.8881354331970215, "learning_rate": 3.7156525679556597e-06, "loss": 0.6358, "step": 9395 }, { "epoch": 0.6, "grad_norm": 0.8575799465179443, "learning_rate": 3.7146610274084875e-06, "loss": 0.6055, "step": 9396 }, { "epoch": 0.6, "grad_norm": 0.9607718586921692, "learning_rate": 3.7136695409824665e-06, "loss": 0.5996, "step": 9397 }, { "epoch": 0.6, "grad_norm": 0.922845184803009, "learning_rate": 3.712678108719348e-06, "loss": 0.6158, "step": 9398 }, { "epoch": 0.6, "grad_norm": 0.9459704756736755, "learning_rate": 3.711686730660875e-06, "loss": 0.5811, "step": 9399 }, { "epoch": 0.6, "grad_norm": 0.8620908856391907, "learning_rate": 3.710695406848794e-06, "loss": 0.6261, "step": 9400 }, { "epoch": 0.6, "grad_norm": 0.9615730047225952, "learning_rate": 3.709704137324841e-06, "loss": 0.6142, "step": 9401 }, { "epoch": 0.6, "grad_norm": 0.8960047960281372, "learning_rate": 3.7087129221307605e-06, "loss": 0.5535, "step": 9402 }, { "epoch": 0.6, "grad_norm": 0.9143571257591248, "learning_rate": 3.7077217613082863e-06, "loss": 0.5854, "step": 9403 }, { "epoch": 0.6, "grad_norm": 0.9054227471351624, "learning_rate": 3.7067306548991543e-06, "loss": 0.6092, "step": 9404 }, { "epoch": 0.6, "grad_norm": 0.9034886956214905, "learning_rate": 3.7057396029450925e-06, "loss": 0.6547, "step": 9405 }, { "epoch": 0.6, "grad_norm": 0.862893283367157, "learning_rate": 3.7047486054878367e-06, "loss": 0.6051, "step": 9406 }, { "epoch": 0.6, "grad_norm": 0.8760849237442017, "learning_rate": 3.7037576625691095e-06, "loss": 0.5925, "step": 9407 }, { "epoch": 0.6, "grad_norm": 0.8564549684524536, "learning_rate": 3.7027667742306393e-06, "loss": 0.5745, "step": 9408 }, { "epoch": 0.6, "grad_norm": 0.8729873895645142, "learning_rate": 3.7017759405141476e-06, "loss": 0.5559, "step": 9409 }, { "epoch": 0.6, "grad_norm": 0.8863754868507385, "learning_rate": 3.7007851614613522e-06, "loss": 0.6388, "step": 9410 }, { "epoch": 0.6, "grad_norm": 0.9098048210144043, "learning_rate": 3.699794437113975e-06, "loss": 0.6317, "step": 9411 }, { "epoch": 0.6, "grad_norm": 0.8687307834625244, "learning_rate": 3.698803767513732e-06, "loss": 0.5728, "step": 9412 }, { "epoch": 0.6, "grad_norm": 0.9310614466667175, "learning_rate": 3.6978131527023363e-06, "loss": 0.5108, "step": 9413 }, { "epoch": 0.6, "grad_norm": 0.8796650767326355, "learning_rate": 3.696822592721497e-06, "loss": 0.6081, "step": 9414 }, { "epoch": 0.6, "grad_norm": 0.8547250628471375, "learning_rate": 3.695832087612925e-06, "loss": 0.5948, "step": 9415 }, { "epoch": 0.6, "grad_norm": 0.8584107756614685, "learning_rate": 3.6948416374183287e-06, "loss": 0.6096, "step": 9416 }, { "epoch": 0.6, "grad_norm": 0.8845054507255554, "learning_rate": 3.6938512421794103e-06, "loss": 0.6239, "step": 9417 }, { "epoch": 0.6, "grad_norm": 0.8871665596961975, "learning_rate": 3.6928609019378702e-06, "loss": 0.6064, "step": 9418 }, { "epoch": 0.6, "grad_norm": 0.9024144411087036, "learning_rate": 3.6918706167354125e-06, "loss": 0.5947, "step": 9419 }, { "epoch": 0.6, "grad_norm": 0.8818714618682861, "learning_rate": 3.690880386613732e-06, "loss": 0.5635, "step": 9420 }, { "epoch": 0.6, "grad_norm": 0.8889881372451782, "learning_rate": 3.689890211614525e-06, "loss": 0.5769, "step": 9421 }, { "epoch": 0.6, "grad_norm": 0.8806608319282532, "learning_rate": 3.6889000917794816e-06, "loss": 0.5825, "step": 9422 }, { "epoch": 0.6, "grad_norm": 0.9338346719741821, "learning_rate": 3.6879100271502953e-06, "loss": 0.6212, "step": 9423 }, { "epoch": 0.6, "grad_norm": 0.8248009085655212, "learning_rate": 3.6869200177686543e-06, "loss": 0.5429, "step": 9424 }, { "epoch": 0.6, "grad_norm": 0.8343362212181091, "learning_rate": 3.6859300636762423e-06, "loss": 0.5304, "step": 9425 }, { "epoch": 0.6, "grad_norm": 0.9038220643997192, "learning_rate": 3.6849401649147453e-06, "loss": 0.6251, "step": 9426 }, { "epoch": 0.6, "grad_norm": 0.929233968257904, "learning_rate": 3.683950321525841e-06, "loss": 0.5699, "step": 9427 }, { "epoch": 0.6, "grad_norm": 0.8593815565109253, "learning_rate": 3.682960533551213e-06, "loss": 0.5558, "step": 9428 }, { "epoch": 0.6, "grad_norm": 0.8797299861907959, "learning_rate": 3.681970801032534e-06, "loss": 0.5799, "step": 9429 }, { "epoch": 0.6, "grad_norm": 0.8691550493240356, "learning_rate": 3.68098112401148e-06, "loss": 0.5446, "step": 9430 }, { "epoch": 0.6, "grad_norm": 0.8693859577178955, "learning_rate": 3.6799915025297206e-06, "loss": 0.5527, "step": 9431 }, { "epoch": 0.6, "grad_norm": 0.8916878700256348, "learning_rate": 3.6790019366289293e-06, "loss": 0.566, "step": 9432 }, { "epoch": 0.6, "grad_norm": 0.907010018825531, "learning_rate": 3.67801242635077e-06, "loss": 0.6145, "step": 9433 }, { "epoch": 0.6, "grad_norm": 0.8580716848373413, "learning_rate": 3.6770229717369086e-06, "loss": 0.5711, "step": 9434 }, { "epoch": 0.6, "grad_norm": 0.8871995806694031, "learning_rate": 3.6760335728290062e-06, "loss": 0.6209, "step": 9435 }, { "epoch": 0.6, "grad_norm": 0.8500985503196716, "learning_rate": 3.6750442296687272e-06, "loss": 0.584, "step": 9436 }, { "epoch": 0.6, "grad_norm": 0.871005654335022, "learning_rate": 3.6740549422977244e-06, "loss": 0.5876, "step": 9437 }, { "epoch": 0.6, "grad_norm": 0.929032564163208, "learning_rate": 3.6730657107576574e-06, "loss": 0.6177, "step": 9438 }, { "epoch": 0.6, "grad_norm": 0.9027160406112671, "learning_rate": 3.6720765350901765e-06, "loss": 0.5653, "step": 9439 }, { "epoch": 0.6, "grad_norm": 0.939904510974884, "learning_rate": 3.671087415336931e-06, "loss": 0.6247, "step": 9440 }, { "epoch": 0.6, "grad_norm": 0.8883887529373169, "learning_rate": 3.6700983515395726e-06, "loss": 0.5748, "step": 9441 }, { "epoch": 0.6, "grad_norm": 0.9077379703521729, "learning_rate": 3.669109343739747e-06, "loss": 0.5666, "step": 9442 }, { "epoch": 0.6, "grad_norm": 0.91063392162323, "learning_rate": 3.668120391979098e-06, "loss": 0.5704, "step": 9443 }, { "epoch": 0.6, "grad_norm": 0.8031530380249023, "learning_rate": 3.6671314962992634e-06, "loss": 0.5456, "step": 9444 }, { "epoch": 0.6, "grad_norm": 0.8312580585479736, "learning_rate": 3.666142656741886e-06, "loss": 0.5977, "step": 9445 }, { "epoch": 0.6, "grad_norm": 0.8705939054489136, "learning_rate": 3.6651538733486027e-06, "loss": 0.5965, "step": 9446 }, { "epoch": 0.6, "grad_norm": 0.9241865873336792, "learning_rate": 3.664165146161045e-06, "loss": 0.5973, "step": 9447 }, { "epoch": 0.6, "grad_norm": 0.8538462519645691, "learning_rate": 3.663176475220844e-06, "loss": 0.5936, "step": 9448 }, { "epoch": 0.6, "grad_norm": 0.8608808517456055, "learning_rate": 3.6621878605696338e-06, "loss": 0.5336, "step": 9449 }, { "epoch": 0.6, "grad_norm": 0.9277994632720947, "learning_rate": 3.6611993022490383e-06, "loss": 0.5753, "step": 9450 }, { "epoch": 0.6, "grad_norm": 0.8902243375778198, "learning_rate": 3.660210800300683e-06, "loss": 0.5836, "step": 9451 }, { "epoch": 0.6, "grad_norm": 0.9182867407798767, "learning_rate": 3.6592223547661888e-06, "loss": 0.616, "step": 9452 }, { "epoch": 0.6, "grad_norm": 0.8831941485404968, "learning_rate": 3.6582339656871778e-06, "loss": 0.5666, "step": 9453 }, { "epoch": 0.6, "grad_norm": 0.9414946436882019, "learning_rate": 3.6572456331052673e-06, "loss": 0.5904, "step": 9454 }, { "epoch": 0.6, "grad_norm": 0.8741511106491089, "learning_rate": 3.656257357062073e-06, "loss": 0.6501, "step": 9455 }, { "epoch": 0.6, "grad_norm": 0.882854700088501, "learning_rate": 3.6552691375992056e-06, "loss": 0.661, "step": 9456 }, { "epoch": 0.6, "grad_norm": 0.8658345937728882, "learning_rate": 3.6542809747582755e-06, "loss": 0.593, "step": 9457 }, { "epoch": 0.6, "grad_norm": 0.856181800365448, "learning_rate": 3.6532928685808937e-06, "loss": 0.6158, "step": 9458 }, { "epoch": 0.6, "grad_norm": 0.9174668788909912, "learning_rate": 3.6523048191086654e-06, "loss": 0.5738, "step": 9459 }, { "epoch": 0.6, "grad_norm": 0.9109136462211609, "learning_rate": 3.6513168263831913e-06, "loss": 0.5629, "step": 9460 }, { "epoch": 0.6, "grad_norm": 0.9288260340690613, "learning_rate": 3.6503288904460725e-06, "loss": 0.5806, "step": 9461 }, { "epoch": 0.6, "grad_norm": 0.912398636341095, "learning_rate": 3.6493410113389116e-06, "loss": 0.6132, "step": 9462 }, { "epoch": 0.6, "grad_norm": 0.9341295957565308, "learning_rate": 3.648353189103302e-06, "loss": 0.6314, "step": 9463 }, { "epoch": 0.6, "grad_norm": 0.885158121585846, "learning_rate": 3.6473654237808365e-06, "loss": 0.5599, "step": 9464 }, { "epoch": 0.6, "grad_norm": 0.8969646096229553, "learning_rate": 3.6463777154131065e-06, "loss": 0.6136, "step": 9465 }, { "epoch": 0.6, "grad_norm": 0.8847031593322754, "learning_rate": 3.645390064041704e-06, "loss": 0.5568, "step": 9466 }, { "epoch": 0.6, "grad_norm": 0.857172429561615, "learning_rate": 3.6444024697082137e-06, "loss": 0.5342, "step": 9467 }, { "epoch": 0.6, "grad_norm": 0.8913065791130066, "learning_rate": 3.6434149324542185e-06, "loss": 0.6032, "step": 9468 }, { "epoch": 0.6, "grad_norm": 0.856587290763855, "learning_rate": 3.6424274523213e-06, "loss": 0.5779, "step": 9469 }, { "epoch": 0.6, "grad_norm": 0.9097947478294373, "learning_rate": 3.641440029351041e-06, "loss": 0.5989, "step": 9470 }, { "epoch": 0.6, "grad_norm": 0.9367252588272095, "learning_rate": 3.640452663585017e-06, "loss": 0.6136, "step": 9471 }, { "epoch": 0.6, "grad_norm": 0.8729375004768372, "learning_rate": 3.6394653550647996e-06, "loss": 0.5497, "step": 9472 }, { "epoch": 0.6, "grad_norm": 0.8242816925048828, "learning_rate": 3.638478103831965e-06, "loss": 0.5366, "step": 9473 }, { "epoch": 0.6, "grad_norm": 0.8420335054397583, "learning_rate": 3.6374909099280786e-06, "loss": 0.5695, "step": 9474 }, { "epoch": 0.6, "grad_norm": 0.8642702698707581, "learning_rate": 3.636503773394713e-06, "loss": 0.6007, "step": 9475 }, { "epoch": 0.6, "grad_norm": 0.9021347761154175, "learning_rate": 3.635516694273428e-06, "loss": 0.6437, "step": 9476 }, { "epoch": 0.6, "grad_norm": 0.9246450662612915, "learning_rate": 3.63452967260579e-06, "loss": 0.5961, "step": 9477 }, { "epoch": 0.6, "grad_norm": 0.8637540340423584, "learning_rate": 3.633542708433355e-06, "loss": 0.5337, "step": 9478 }, { "epoch": 0.6, "grad_norm": 0.8642069697380066, "learning_rate": 3.632555801797686e-06, "loss": 0.5905, "step": 9479 }, { "epoch": 0.6, "grad_norm": 0.9082743525505066, "learning_rate": 3.631568952740333e-06, "loss": 0.5878, "step": 9480 }, { "epoch": 0.6, "grad_norm": 0.9280330538749695, "learning_rate": 3.6305821613028524e-06, "loss": 0.5967, "step": 9481 }, { "epoch": 0.6, "grad_norm": 0.8534235954284668, "learning_rate": 3.6295954275267914e-06, "loss": 0.5841, "step": 9482 }, { "epoch": 0.6, "grad_norm": 0.8653678297996521, "learning_rate": 3.6286087514537017e-06, "loss": 0.5635, "step": 9483 }, { "epoch": 0.6, "grad_norm": 0.8563132882118225, "learning_rate": 3.6276221331251253e-06, "loss": 0.5565, "step": 9484 }, { "epoch": 0.6, "grad_norm": 0.7956026196479797, "learning_rate": 3.626635572582608e-06, "loss": 0.5475, "step": 9485 }, { "epoch": 0.6, "grad_norm": 0.9152180552482605, "learning_rate": 3.6256490698676884e-06, "loss": 0.5893, "step": 9486 }, { "epoch": 0.6, "grad_norm": 0.8446356058120728, "learning_rate": 3.6246626250219047e-06, "loss": 0.5365, "step": 9487 }, { "epoch": 0.6, "grad_norm": 0.8718549013137817, "learning_rate": 3.623676238086794e-06, "loss": 0.5374, "step": 9488 }, { "epoch": 0.6, "grad_norm": 0.8933292031288147, "learning_rate": 3.6226899091038896e-06, "loss": 0.6108, "step": 9489 }, { "epoch": 0.6, "grad_norm": 0.9709043502807617, "learning_rate": 3.6217036381147216e-06, "loss": 0.6381, "step": 9490 }, { "epoch": 0.6, "grad_norm": 0.9374916553497314, "learning_rate": 3.620717425160818e-06, "loss": 0.5966, "step": 9491 }, { "epoch": 0.6, "grad_norm": 0.906370997428894, "learning_rate": 3.619731270283705e-06, "loss": 0.6183, "step": 9492 }, { "epoch": 0.6, "grad_norm": 0.9059337377548218, "learning_rate": 3.6187451735249085e-06, "loss": 0.5477, "step": 9493 }, { "epoch": 0.6, "grad_norm": 0.9128062725067139, "learning_rate": 3.6177591349259465e-06, "loss": 0.638, "step": 9494 }, { "epoch": 0.6, "grad_norm": 0.912139892578125, "learning_rate": 3.616773154528339e-06, "loss": 0.6009, "step": 9495 }, { "epoch": 0.6, "grad_norm": 0.8537312150001526, "learning_rate": 3.6157872323736017e-06, "loss": 0.5281, "step": 9496 }, { "epoch": 0.6, "grad_norm": 0.9160687923431396, "learning_rate": 3.61480136850325e-06, "loss": 0.5757, "step": 9497 }, { "epoch": 0.6, "grad_norm": 0.8970745205879211, "learning_rate": 3.6138155629587925e-06, "loss": 0.5668, "step": 9498 }, { "epoch": 0.6, "grad_norm": 0.8694158792495728, "learning_rate": 3.61282981578174e-06, "loss": 0.6049, "step": 9499 }, { "epoch": 0.6, "grad_norm": 0.8502684235572815, "learning_rate": 3.611844127013598e-06, "loss": 0.5737, "step": 9500 }, { "epoch": 0.6, "grad_norm": 0.9084693193435669, "learning_rate": 3.6108584966958717e-06, "loss": 0.5383, "step": 9501 }, { "epoch": 0.6, "grad_norm": 0.9308486580848694, "learning_rate": 3.6098729248700604e-06, "loss": 0.6029, "step": 9502 }, { "epoch": 0.6, "grad_norm": 0.8839983344078064, "learning_rate": 3.6088874115776664e-06, "loss": 0.5604, "step": 9503 }, { "epoch": 0.6, "grad_norm": 0.8351526856422424, "learning_rate": 3.6079019568601816e-06, "loss": 0.5777, "step": 9504 }, { "epoch": 0.6, "grad_norm": 0.8404673933982849, "learning_rate": 3.606916560759104e-06, "loss": 0.5518, "step": 9505 }, { "epoch": 0.6, "grad_norm": 0.8732911348342896, "learning_rate": 3.6059312233159237e-06, "loss": 0.5913, "step": 9506 }, { "epoch": 0.6, "grad_norm": 0.8823626637458801, "learning_rate": 3.6049459445721303e-06, "loss": 0.567, "step": 9507 }, { "epoch": 0.6, "grad_norm": 0.8690586686134338, "learning_rate": 3.6039607245692086e-06, "loss": 0.5204, "step": 9508 }, { "epoch": 0.6, "grad_norm": 0.8530438542366028, "learning_rate": 3.6029755633486464e-06, "loss": 0.6059, "step": 9509 }, { "epoch": 0.6, "grad_norm": 0.8257336616516113, "learning_rate": 3.601990460951922e-06, "loss": 0.5089, "step": 9510 }, { "epoch": 0.6, "grad_norm": 0.8363312482833862, "learning_rate": 3.6010054174205167e-06, "loss": 0.5395, "step": 9511 }, { "epoch": 0.6, "grad_norm": 0.9421717524528503, "learning_rate": 3.6000204327959055e-06, "loss": 0.6033, "step": 9512 }, { "epoch": 0.6, "grad_norm": 0.8582902550697327, "learning_rate": 3.599035507119565e-06, "loss": 0.5413, "step": 9513 }, { "epoch": 0.6, "grad_norm": 0.8793736696243286, "learning_rate": 3.5980506404329647e-06, "loss": 0.6307, "step": 9514 }, { "epoch": 0.6, "grad_norm": 0.9074476361274719, "learning_rate": 3.597065832777576e-06, "loss": 0.6315, "step": 9515 }, { "epoch": 0.6, "grad_norm": 0.8699880242347717, "learning_rate": 3.5960810841948622e-06, "loss": 0.6043, "step": 9516 }, { "epoch": 0.6, "grad_norm": 0.8704454302787781, "learning_rate": 3.595096394726293e-06, "loss": 0.5378, "step": 9517 }, { "epoch": 0.6, "grad_norm": 0.8879233002662659, "learning_rate": 3.594111764413326e-06, "loss": 0.5705, "step": 9518 }, { "epoch": 0.6, "grad_norm": 0.9449933171272278, "learning_rate": 3.5931271932974227e-06, "loss": 0.589, "step": 9519 }, { "epoch": 0.6, "grad_norm": 0.8365026116371155, "learning_rate": 3.592142681420039e-06, "loss": 0.5869, "step": 9520 }, { "epoch": 0.6, "grad_norm": 0.8679017424583435, "learning_rate": 3.5911582288226275e-06, "loss": 0.5936, "step": 9521 }, { "epoch": 0.6, "grad_norm": 0.9098031520843506, "learning_rate": 3.5901738355466433e-06, "loss": 0.6482, "step": 9522 }, { "epoch": 0.6, "grad_norm": 0.8863396048545837, "learning_rate": 3.5891895016335347e-06, "loss": 0.5826, "step": 9523 }, { "epoch": 0.6, "grad_norm": 0.8773937225341797, "learning_rate": 3.588205227124749e-06, "loss": 0.5994, "step": 9524 }, { "epoch": 0.6, "grad_norm": 0.9152101874351501, "learning_rate": 3.587221012061728e-06, "loss": 0.5859, "step": 9525 }, { "epoch": 0.6, "grad_norm": 0.8691193461418152, "learning_rate": 3.586236856485916e-06, "loss": 0.5964, "step": 9526 }, { "epoch": 0.6, "grad_norm": 0.8620315194129944, "learning_rate": 3.5852527604387533e-06, "loss": 0.569, "step": 9527 }, { "epoch": 0.6, "grad_norm": 0.864154040813446, "learning_rate": 3.5842687239616745e-06, "loss": 0.5576, "step": 9528 }, { "epoch": 0.6, "grad_norm": 0.8283834457397461, "learning_rate": 3.583284747096114e-06, "loss": 0.5909, "step": 9529 }, { "epoch": 0.6, "grad_norm": 0.9094521403312683, "learning_rate": 3.5823008298835044e-06, "loss": 0.6439, "step": 9530 }, { "epoch": 0.6, "grad_norm": 0.8895583748817444, "learning_rate": 3.5813169723652763e-06, "loss": 0.5486, "step": 9531 }, { "epoch": 0.6, "grad_norm": 0.9506862759590149, "learning_rate": 3.5803331745828558e-06, "loss": 0.6163, "step": 9532 }, { "epoch": 0.6, "grad_norm": 0.855083703994751, "learning_rate": 3.579349436577665e-06, "loss": 0.6066, "step": 9533 }, { "epoch": 0.6, "grad_norm": 0.9323597550392151, "learning_rate": 3.5783657583911268e-06, "loss": 0.6049, "step": 9534 }, { "epoch": 0.6, "grad_norm": 0.865875780582428, "learning_rate": 3.5773821400646623e-06, "loss": 0.5621, "step": 9535 }, { "epoch": 0.6, "grad_norm": 0.846933901309967, "learning_rate": 3.5763985816396873e-06, "loss": 0.5495, "step": 9536 }, { "epoch": 0.6, "grad_norm": 0.8472744822502136, "learning_rate": 3.575415083157615e-06, "loss": 0.5131, "step": 9537 }, { "epoch": 0.6, "grad_norm": 0.8493777513504028, "learning_rate": 3.5744316446598565e-06, "loss": 0.5477, "step": 9538 }, { "epoch": 0.6, "grad_norm": 0.8486292958259583, "learning_rate": 3.5734482661878244e-06, "loss": 0.5877, "step": 9539 }, { "epoch": 0.6, "grad_norm": 0.9552314281463623, "learning_rate": 3.5724649477829232e-06, "loss": 0.6654, "step": 9540 }, { "epoch": 0.6, "grad_norm": 0.8565940260887146, "learning_rate": 3.5714816894865556e-06, "loss": 0.532, "step": 9541 }, { "epoch": 0.6, "grad_norm": 0.8645039200782776, "learning_rate": 3.570498491340124e-06, "loss": 0.5583, "step": 9542 }, { "epoch": 0.6, "grad_norm": 0.8806138038635254, "learning_rate": 3.5695153533850302e-06, "loss": 0.5508, "step": 9543 }, { "epoch": 0.6, "grad_norm": 0.8636948466300964, "learning_rate": 3.5685322756626683e-06, "loss": 0.5921, "step": 9544 }, { "epoch": 0.6, "grad_norm": 0.8807823657989502, "learning_rate": 3.5675492582144322e-06, "loss": 0.62, "step": 9545 }, { "epoch": 0.6, "grad_norm": 0.85367751121521, "learning_rate": 3.566566301081712e-06, "loss": 0.5503, "step": 9546 }, { "epoch": 0.6, "grad_norm": 0.8549711108207703, "learning_rate": 3.5655834043059e-06, "loss": 0.5368, "step": 9547 }, { "epoch": 0.6, "grad_norm": 0.9118362069129944, "learning_rate": 3.5646005679283813e-06, "loss": 0.585, "step": 9548 }, { "epoch": 0.6, "grad_norm": 0.8719263672828674, "learning_rate": 3.5636177919905385e-06, "loss": 0.5321, "step": 9549 }, { "epoch": 0.61, "grad_norm": 0.8751515746116638, "learning_rate": 3.5626350765337546e-06, "loss": 0.6133, "step": 9550 }, { "epoch": 0.61, "grad_norm": 0.8825535774230957, "learning_rate": 3.5616524215994052e-06, "loss": 0.5868, "step": 9551 }, { "epoch": 0.61, "grad_norm": 1.0655604600906372, "learning_rate": 3.560669827228871e-06, "loss": 0.6187, "step": 9552 }, { "epoch": 0.61, "grad_norm": 0.8702925443649292, "learning_rate": 3.559687293463522e-06, "loss": 0.5919, "step": 9553 }, { "epoch": 0.61, "grad_norm": 0.8378567695617676, "learning_rate": 3.5587048203447314e-06, "loss": 0.5988, "step": 9554 }, { "epoch": 0.61, "grad_norm": 0.8837084770202637, "learning_rate": 3.557722407913865e-06, "loss": 0.5909, "step": 9555 }, { "epoch": 0.61, "grad_norm": 0.8577612042427063, "learning_rate": 3.5567400562122934e-06, "loss": 0.4996, "step": 9556 }, { "epoch": 0.61, "grad_norm": 0.8624299764633179, "learning_rate": 3.5557577652813758e-06, "loss": 0.5573, "step": 9557 }, { "epoch": 0.61, "grad_norm": 0.9122274518013, "learning_rate": 3.554775535162475e-06, "loss": 0.6072, "step": 9558 }, { "epoch": 0.61, "grad_norm": 0.8915830254554749, "learning_rate": 3.5537933658969475e-06, "loss": 0.5344, "step": 9559 }, { "epoch": 0.61, "grad_norm": 1.072354793548584, "learning_rate": 3.5528112575261525e-06, "loss": 0.6077, "step": 9560 }, { "epoch": 0.61, "grad_norm": 0.9154992699623108, "learning_rate": 3.5518292100914396e-06, "loss": 0.5795, "step": 9561 }, { "epoch": 0.61, "grad_norm": 0.8368164896965027, "learning_rate": 3.550847223634162e-06, "loss": 0.6176, "step": 9562 }, { "epoch": 0.61, "grad_norm": 0.8458346724510193, "learning_rate": 3.549865298195665e-06, "loss": 0.5834, "step": 9563 }, { "epoch": 0.61, "grad_norm": 0.8798760771751404, "learning_rate": 3.5488834338172974e-06, "loss": 0.5863, "step": 9564 }, { "epoch": 0.61, "grad_norm": 0.9277465343475342, "learning_rate": 3.547901630540399e-06, "loss": 0.5814, "step": 9565 }, { "epoch": 0.61, "grad_norm": 0.9117797017097473, "learning_rate": 3.546919888406313e-06, "loss": 0.5731, "step": 9566 }, { "epoch": 0.61, "grad_norm": 0.9465237855911255, "learning_rate": 3.5459382074563737e-06, "loss": 0.584, "step": 9567 }, { "epoch": 0.61, "grad_norm": 0.8362681269645691, "learning_rate": 3.5449565877319175e-06, "loss": 0.5362, "step": 9568 }, { "epoch": 0.61, "grad_norm": 0.8532899022102356, "learning_rate": 3.54397502927428e-06, "loss": 0.5545, "step": 9569 }, { "epoch": 0.61, "grad_norm": 0.8592314124107361, "learning_rate": 3.5429935321247887e-06, "loss": 0.6031, "step": 9570 }, { "epoch": 0.61, "grad_norm": 0.9131558537483215, "learning_rate": 3.5420120963247706e-06, "loss": 0.5733, "step": 9571 }, { "epoch": 0.61, "grad_norm": 0.9123291373252869, "learning_rate": 3.5410307219155495e-06, "loss": 0.5963, "step": 9572 }, { "epoch": 0.61, "grad_norm": 0.9059653282165527, "learning_rate": 3.540049408938452e-06, "loss": 0.5867, "step": 9573 }, { "epoch": 0.61, "grad_norm": 0.8649680614471436, "learning_rate": 3.539068157434794e-06, "loss": 0.5413, "step": 9574 }, { "epoch": 0.61, "grad_norm": 0.8861904144287109, "learning_rate": 3.538086967445894e-06, "loss": 0.5541, "step": 9575 }, { "epoch": 0.61, "grad_norm": 0.8411959409713745, "learning_rate": 3.5371058390130643e-06, "loss": 0.6017, "step": 9576 }, { "epoch": 0.61, "grad_norm": 0.8303496837615967, "learning_rate": 3.536124772177621e-06, "loss": 0.5385, "step": 9577 }, { "epoch": 0.61, "grad_norm": 0.8831817507743835, "learning_rate": 3.535143766980871e-06, "loss": 0.5553, "step": 9578 }, { "epoch": 0.61, "grad_norm": 0.8853132128715515, "learning_rate": 3.53416282346412e-06, "loss": 0.5881, "step": 9579 }, { "epoch": 0.61, "grad_norm": 0.9052870869636536, "learning_rate": 3.533181941668675e-06, "loss": 0.5308, "step": 9580 }, { "epoch": 0.61, "grad_norm": 0.836897611618042, "learning_rate": 3.5322011216358325e-06, "loss": 0.5369, "step": 9581 }, { "epoch": 0.61, "grad_norm": 0.9233197569847107, "learning_rate": 3.5312203634068977e-06, "loss": 0.6164, "step": 9582 }, { "epoch": 0.61, "grad_norm": 0.883269727230072, "learning_rate": 3.5302396670231622e-06, "loss": 0.6196, "step": 9583 }, { "epoch": 0.61, "grad_norm": 0.8604480028152466, "learning_rate": 3.529259032525923e-06, "loss": 0.5845, "step": 9584 }, { "epoch": 0.61, "grad_norm": 0.8859973549842834, "learning_rate": 3.5282784599564667e-06, "loss": 0.5217, "step": 9585 }, { "epoch": 0.61, "grad_norm": 0.8377750515937805, "learning_rate": 3.5272979493560877e-06, "loss": 0.5654, "step": 9586 }, { "epoch": 0.61, "grad_norm": 0.901696503162384, "learning_rate": 3.5263175007660676e-06, "loss": 0.6074, "step": 9587 }, { "epoch": 0.61, "grad_norm": 0.8750433325767517, "learning_rate": 3.5253371142276915e-06, "loss": 0.5781, "step": 9588 }, { "epoch": 0.61, "grad_norm": 0.988763689994812, "learning_rate": 3.5243567897822382e-06, "loss": 0.6265, "step": 9589 }, { "epoch": 0.61, "grad_norm": 0.9134507179260254, "learning_rate": 3.5233765274709885e-06, "loss": 0.5861, "step": 9590 }, { "epoch": 0.61, "grad_norm": 0.8977269530296326, "learning_rate": 3.5223963273352157e-06, "loss": 0.6029, "step": 9591 }, { "epoch": 0.61, "grad_norm": 0.9446091651916504, "learning_rate": 3.5214161894161948e-06, "loss": 0.6057, "step": 9592 }, { "epoch": 0.61, "grad_norm": 0.9235208630561829, "learning_rate": 3.5204361137551924e-06, "loss": 0.5999, "step": 9593 }, { "epoch": 0.61, "grad_norm": 0.9178057312965393, "learning_rate": 3.5194561003934798e-06, "loss": 0.5572, "step": 9594 }, { "epoch": 0.61, "grad_norm": 0.886055052280426, "learning_rate": 3.5184761493723197e-06, "loss": 0.5808, "step": 9595 }, { "epoch": 0.61, "grad_norm": 0.9092001914978027, "learning_rate": 3.5174962607329755e-06, "loss": 0.5728, "step": 9596 }, { "epoch": 0.61, "grad_norm": 0.830111026763916, "learning_rate": 3.516516434516707e-06, "loss": 0.5996, "step": 9597 }, { "epoch": 0.61, "grad_norm": 0.9029525518417358, "learning_rate": 3.5155366707647686e-06, "loss": 0.566, "step": 9598 }, { "epoch": 0.61, "grad_norm": 0.9275105595588684, "learning_rate": 3.514556969518418e-06, "loss": 0.6466, "step": 9599 }, { "epoch": 0.61, "grad_norm": 0.8675131797790527, "learning_rate": 3.513577330818907e-06, "loss": 0.5825, "step": 9600 }, { "epoch": 0.61, "grad_norm": 0.9288623929023743, "learning_rate": 3.512597754707484e-06, "loss": 0.6074, "step": 9601 }, { "epoch": 0.61, "grad_norm": 0.8611968159675598, "learning_rate": 3.511618241225393e-06, "loss": 0.586, "step": 9602 }, { "epoch": 0.61, "grad_norm": 0.8688474893569946, "learning_rate": 3.5106387904138804e-06, "loss": 0.5534, "step": 9603 }, { "epoch": 0.61, "grad_norm": 0.8907158374786377, "learning_rate": 3.5096594023141895e-06, "loss": 0.5984, "step": 9604 }, { "epoch": 0.61, "grad_norm": 0.9105345606803894, "learning_rate": 3.508680076967556e-06, "loss": 0.6486, "step": 9605 }, { "epoch": 0.61, "grad_norm": 0.9449050426483154, "learning_rate": 3.507700814415215e-06, "loss": 0.563, "step": 9606 }, { "epoch": 0.61, "grad_norm": 0.9659131169319153, "learning_rate": 3.5067216146984016e-06, "loss": 0.6047, "step": 9607 }, { "epoch": 0.61, "grad_norm": 0.8895873427391052, "learning_rate": 3.505742477858348e-06, "loss": 0.5464, "step": 9608 }, { "epoch": 0.61, "grad_norm": 0.875363826751709, "learning_rate": 3.50476340393628e-06, "loss": 0.534, "step": 9609 }, { "epoch": 0.61, "grad_norm": 0.8972344398498535, "learning_rate": 3.5037843929734216e-06, "loss": 0.5933, "step": 9610 }, { "epoch": 0.61, "grad_norm": 0.9015132784843445, "learning_rate": 3.502805445010998e-06, "loss": 0.5126, "step": 9611 }, { "epoch": 0.61, "grad_norm": 0.8442445993423462, "learning_rate": 3.5018265600902313e-06, "loss": 0.5688, "step": 9612 }, { "epoch": 0.61, "grad_norm": 0.9495238065719604, "learning_rate": 3.5008477382523355e-06, "loss": 0.5862, "step": 9613 }, { "epoch": 0.61, "grad_norm": 0.8787839412689209, "learning_rate": 3.4998689795385245e-06, "loss": 0.5882, "step": 9614 }, { "epoch": 0.61, "grad_norm": 0.9087364673614502, "learning_rate": 3.4988902839900118e-06, "loss": 0.6195, "step": 9615 }, { "epoch": 0.61, "grad_norm": 0.9128758907318115, "learning_rate": 3.4979116516480094e-06, "loss": 0.5856, "step": 9616 }, { "epoch": 0.61, "grad_norm": 0.9483041167259216, "learning_rate": 3.496933082553722e-06, "loss": 0.6421, "step": 9617 }, { "epoch": 0.61, "grad_norm": 0.8916476964950562, "learning_rate": 3.495954576748353e-06, "loss": 0.5491, "step": 9618 }, { "epoch": 0.61, "grad_norm": 0.895670473575592, "learning_rate": 3.4949761342731025e-06, "loss": 0.6149, "step": 9619 }, { "epoch": 0.61, "grad_norm": 0.9284258484840393, "learning_rate": 3.493997755169174e-06, "loss": 0.6185, "step": 9620 }, { "epoch": 0.61, "grad_norm": 0.8784351348876953, "learning_rate": 3.4930194394777615e-06, "loss": 0.5884, "step": 9621 }, { "epoch": 0.61, "grad_norm": 0.8456250429153442, "learning_rate": 3.492041187240056e-06, "loss": 0.5612, "step": 9622 }, { "epoch": 0.61, "grad_norm": 0.8818730115890503, "learning_rate": 3.49106299849725e-06, "loss": 0.5623, "step": 9623 }, { "epoch": 0.61, "grad_norm": 0.9130449891090393, "learning_rate": 3.4900848732905348e-06, "loss": 0.6577, "step": 9624 }, { "epoch": 0.61, "grad_norm": 0.9004276990890503, "learning_rate": 3.4891068116610914e-06, "loss": 0.6268, "step": 9625 }, { "epoch": 0.61, "grad_norm": 0.8974410891532898, "learning_rate": 3.4881288136501036e-06, "loss": 0.6242, "step": 9626 }, { "epoch": 0.61, "grad_norm": 0.9068264961242676, "learning_rate": 3.487150879298753e-06, "loss": 0.5797, "step": 9627 }, { "epoch": 0.61, "grad_norm": 0.8939555883407593, "learning_rate": 3.486173008648215e-06, "loss": 0.5922, "step": 9628 }, { "epoch": 0.61, "grad_norm": 0.8704434037208557, "learning_rate": 3.485195201739665e-06, "loss": 0.5675, "step": 9629 }, { "epoch": 0.61, "grad_norm": 0.8694623112678528, "learning_rate": 3.4842174586142772e-06, "loss": 0.5603, "step": 9630 }, { "epoch": 0.61, "grad_norm": 0.9141775965690613, "learning_rate": 3.4832397793132187e-06, "loss": 0.6373, "step": 9631 }, { "epoch": 0.61, "grad_norm": 0.9108834862709045, "learning_rate": 3.4822621638776555e-06, "loss": 0.5756, "step": 9632 }, { "epoch": 0.61, "grad_norm": 0.8546727895736694, "learning_rate": 3.4812846123487532e-06, "loss": 0.5232, "step": 9633 }, { "epoch": 0.61, "grad_norm": 0.9427882432937622, "learning_rate": 3.4803071247676735e-06, "loss": 0.5932, "step": 9634 }, { "epoch": 0.61, "grad_norm": 0.846181333065033, "learning_rate": 3.4793297011755746e-06, "loss": 0.5629, "step": 9635 }, { "epoch": 0.61, "grad_norm": 0.8653784394264221, "learning_rate": 3.4783523416136096e-06, "loss": 0.6068, "step": 9636 }, { "epoch": 0.61, "grad_norm": 0.8583688139915466, "learning_rate": 3.477375046122935e-06, "loss": 0.583, "step": 9637 }, { "epoch": 0.61, "grad_norm": 0.9034234881401062, "learning_rate": 3.476397814744702e-06, "loss": 0.6093, "step": 9638 }, { "epoch": 0.61, "grad_norm": 0.8735195994377136, "learning_rate": 3.4754206475200556e-06, "loss": 0.5769, "step": 9639 }, { "epoch": 0.61, "grad_norm": 0.9497086405754089, "learning_rate": 3.4744435444901412e-06, "loss": 0.5695, "step": 9640 }, { "epoch": 0.61, "grad_norm": 0.8908088207244873, "learning_rate": 3.473466505696103e-06, "loss": 0.5208, "step": 9641 }, { "epoch": 0.61, "grad_norm": 0.9061847925186157, "learning_rate": 3.4724895311790806e-06, "loss": 0.5601, "step": 9642 }, { "epoch": 0.61, "grad_norm": 0.8748029470443726, "learning_rate": 3.4715126209802104e-06, "loss": 0.5772, "step": 9643 }, { "epoch": 0.61, "grad_norm": 0.8988456130027771, "learning_rate": 3.4705357751406256e-06, "loss": 0.596, "step": 9644 }, { "epoch": 0.61, "grad_norm": 0.8625943064689636, "learning_rate": 3.469558993701457e-06, "loss": 0.5393, "step": 9645 }, { "epoch": 0.61, "grad_norm": 0.915705680847168, "learning_rate": 3.468582276703838e-06, "loss": 0.5994, "step": 9646 }, { "epoch": 0.61, "grad_norm": 0.9504239559173584, "learning_rate": 3.467605624188891e-06, "loss": 0.5692, "step": 9647 }, { "epoch": 0.61, "grad_norm": 0.8617990016937256, "learning_rate": 3.46662903619774e-06, "loss": 0.572, "step": 9648 }, { "epoch": 0.61, "grad_norm": 0.9895144104957581, "learning_rate": 3.4656525127715045e-06, "loss": 0.5718, "step": 9649 }, { "epoch": 0.61, "grad_norm": 0.9112670421600342, "learning_rate": 3.464676053951307e-06, "loss": 0.5922, "step": 9650 }, { "epoch": 0.61, "grad_norm": 0.8914951682090759, "learning_rate": 3.463699659778259e-06, "loss": 0.5979, "step": 9651 }, { "epoch": 0.61, "grad_norm": 0.9271389245986938, "learning_rate": 3.4627233302934737e-06, "loss": 0.6252, "step": 9652 }, { "epoch": 0.61, "grad_norm": 0.9418599009513855, "learning_rate": 3.4617470655380597e-06, "loss": 0.5709, "step": 9653 }, { "epoch": 0.61, "grad_norm": 0.8820015788078308, "learning_rate": 3.460770865553128e-06, "loss": 0.5428, "step": 9654 }, { "epoch": 0.61, "grad_norm": 0.9147693514823914, "learning_rate": 3.4597947303797795e-06, "loss": 0.636, "step": 9655 }, { "epoch": 0.61, "grad_norm": 0.9040268063545227, "learning_rate": 3.458818660059117e-06, "loss": 0.5763, "step": 9656 }, { "epoch": 0.61, "grad_norm": 0.893278956413269, "learning_rate": 3.4578426546322403e-06, "loss": 0.6656, "step": 9657 }, { "epoch": 0.61, "grad_norm": 0.8358849883079529, "learning_rate": 3.4568667141402425e-06, "loss": 0.5922, "step": 9658 }, { "epoch": 0.61, "grad_norm": 0.889038622379303, "learning_rate": 3.4558908386242208e-06, "loss": 0.5502, "step": 9659 }, { "epoch": 0.61, "grad_norm": 0.8911066055297852, "learning_rate": 3.4549150281252635e-06, "loss": 0.6117, "step": 9660 }, { "epoch": 0.61, "grad_norm": 0.9033711552619934, "learning_rate": 3.4539392826844607e-06, "loss": 0.6014, "step": 9661 }, { "epoch": 0.61, "grad_norm": 0.935605525970459, "learning_rate": 3.4529636023428935e-06, "loss": 0.5503, "step": 9662 }, { "epoch": 0.61, "grad_norm": 0.9369493126869202, "learning_rate": 3.4519879871416505e-06, "loss": 0.5497, "step": 9663 }, { "epoch": 0.61, "grad_norm": 0.8504288792610168, "learning_rate": 3.451012437121806e-06, "loss": 0.5578, "step": 9664 }, { "epoch": 0.61, "grad_norm": 0.9002702832221985, "learning_rate": 3.4500369523244414e-06, "loss": 0.5544, "step": 9665 }, { "epoch": 0.61, "grad_norm": 0.8431366086006165, "learning_rate": 3.4490615327906264e-06, "loss": 0.5637, "step": 9666 }, { "epoch": 0.61, "grad_norm": 0.8854979872703552, "learning_rate": 3.448086178561436e-06, "loss": 0.5606, "step": 9667 }, { "epoch": 0.61, "grad_norm": 0.9128515720367432, "learning_rate": 3.447110889677938e-06, "loss": 0.6465, "step": 9668 }, { "epoch": 0.61, "grad_norm": 0.9017817974090576, "learning_rate": 3.4461356661811997e-06, "loss": 0.5738, "step": 9669 }, { "epoch": 0.61, "grad_norm": 0.831354558467865, "learning_rate": 3.4451605081122797e-06, "loss": 0.5026, "step": 9670 }, { "epoch": 0.61, "grad_norm": 0.9157218933105469, "learning_rate": 3.4441854155122446e-06, "loss": 0.5886, "step": 9671 }, { "epoch": 0.61, "grad_norm": 0.9166000485420227, "learning_rate": 3.443210388422148e-06, "loss": 0.5729, "step": 9672 }, { "epoch": 0.61, "grad_norm": 0.8740888833999634, "learning_rate": 3.4422354268830473e-06, "loss": 0.6174, "step": 9673 }, { "epoch": 0.61, "grad_norm": 0.8689664006233215, "learning_rate": 3.441260530935994e-06, "loss": 0.5786, "step": 9674 }, { "epoch": 0.61, "grad_norm": 0.9302951693534851, "learning_rate": 3.4402857006220353e-06, "loss": 0.6591, "step": 9675 }, { "epoch": 0.61, "grad_norm": 0.9434636235237122, "learning_rate": 3.439310935982221e-06, "loss": 0.6147, "step": 9676 }, { "epoch": 0.61, "grad_norm": 0.9156984686851501, "learning_rate": 3.4383362370575947e-06, "loss": 0.5605, "step": 9677 }, { "epoch": 0.61, "grad_norm": 0.866672933101654, "learning_rate": 3.4373616038891966e-06, "loss": 0.6085, "step": 9678 }, { "epoch": 0.61, "grad_norm": 0.8342917561531067, "learning_rate": 3.4363870365180634e-06, "loss": 0.5376, "step": 9679 }, { "epoch": 0.61, "grad_norm": 0.8971079587936401, "learning_rate": 3.435412534985234e-06, "loss": 0.5982, "step": 9680 }, { "epoch": 0.61, "grad_norm": 0.8973367810249329, "learning_rate": 3.4344380993317404e-06, "loss": 0.601, "step": 9681 }, { "epoch": 0.61, "grad_norm": 0.9082698822021484, "learning_rate": 3.433463729598613e-06, "loss": 0.6087, "step": 9682 }, { "epoch": 0.61, "grad_norm": 0.8526648879051208, "learning_rate": 3.432489425826876e-06, "loss": 0.5689, "step": 9683 }, { "epoch": 0.61, "grad_norm": 0.8531283736228943, "learning_rate": 3.431515188057557e-06, "loss": 0.5591, "step": 9684 }, { "epoch": 0.61, "grad_norm": 0.8188491463661194, "learning_rate": 3.4305410163316788e-06, "loss": 0.5402, "step": 9685 }, { "epoch": 0.61, "grad_norm": 0.8680586218833923, "learning_rate": 3.429566910690258e-06, "loss": 0.5776, "step": 9686 }, { "epoch": 0.61, "grad_norm": 0.9405946731567383, "learning_rate": 3.42859287117431e-06, "loss": 0.611, "step": 9687 }, { "epoch": 0.61, "grad_norm": 0.9357609152793884, "learning_rate": 3.42761889782485e-06, "loss": 0.621, "step": 9688 }, { "epoch": 0.61, "grad_norm": 0.9674767255783081, "learning_rate": 3.4266449906828897e-06, "loss": 0.5803, "step": 9689 }, { "epoch": 0.61, "grad_norm": 0.9120761752128601, "learning_rate": 3.4256711497894346e-06, "loss": 0.5951, "step": 9690 }, { "epoch": 0.61, "grad_norm": 0.8053117394447327, "learning_rate": 3.4246973751854917e-06, "loss": 0.5611, "step": 9691 }, { "epoch": 0.61, "grad_norm": 0.9436960220336914, "learning_rate": 3.42372366691206e-06, "loss": 0.5907, "step": 9692 }, { "epoch": 0.61, "grad_norm": 0.9401655197143555, "learning_rate": 3.422750025010143e-06, "loss": 0.5508, "step": 9693 }, { "epoch": 0.61, "grad_norm": 0.9254014492034912, "learning_rate": 3.421776449520735e-06, "loss": 0.5881, "step": 9694 }, { "epoch": 0.61, "grad_norm": 0.9455965757369995, "learning_rate": 3.4208029404848315e-06, "loss": 0.5914, "step": 9695 }, { "epoch": 0.61, "grad_norm": 0.8691204786300659, "learning_rate": 3.4198294979434207e-06, "loss": 0.5917, "step": 9696 }, { "epoch": 0.61, "grad_norm": 0.9164184927940369, "learning_rate": 3.418856121937494e-06, "loss": 0.5272, "step": 9697 }, { "epoch": 0.61, "grad_norm": 0.857101559638977, "learning_rate": 3.4178828125080354e-06, "loss": 0.6098, "step": 9698 }, { "epoch": 0.61, "grad_norm": 0.9042714238166809, "learning_rate": 3.4169095696960287e-06, "loss": 0.6457, "step": 9699 }, { "epoch": 0.61, "grad_norm": 0.87809157371521, "learning_rate": 3.4159363935424505e-06, "loss": 0.5905, "step": 9700 }, { "epoch": 0.61, "grad_norm": 0.91737961769104, "learning_rate": 3.4149632840882817e-06, "loss": 0.5974, "step": 9701 }, { "epoch": 0.61, "grad_norm": 0.8448576331138611, "learning_rate": 3.413990241374495e-06, "loss": 0.5751, "step": 9702 }, { "epoch": 0.61, "grad_norm": 0.8721796274185181, "learning_rate": 3.4130172654420623e-06, "loss": 0.5554, "step": 9703 }, { "epoch": 0.61, "grad_norm": 0.9579261541366577, "learning_rate": 3.4120443563319527e-06, "loss": 0.5359, "step": 9704 }, { "epoch": 0.61, "grad_norm": 0.9207855463027954, "learning_rate": 3.4110715140851286e-06, "loss": 0.5518, "step": 9705 }, { "epoch": 0.61, "grad_norm": 0.9483494758605957, "learning_rate": 3.4100987387425566e-06, "loss": 0.6161, "step": 9706 }, { "epoch": 0.61, "grad_norm": 0.9150481820106506, "learning_rate": 3.4091260303451967e-06, "loss": 0.5725, "step": 9707 }, { "epoch": 0.62, "grad_norm": 0.8735103607177734, "learning_rate": 3.4081533889340056e-06, "loss": 0.5568, "step": 9708 }, { "epoch": 0.62, "grad_norm": 0.8997935652732849, "learning_rate": 3.407180814549935e-06, "loss": 0.5814, "step": 9709 }, { "epoch": 0.62, "grad_norm": 0.9537015557289124, "learning_rate": 3.4062083072339415e-06, "loss": 0.6278, "step": 9710 }, { "epoch": 0.62, "grad_norm": 0.9637327194213867, "learning_rate": 3.4052358670269727e-06, "loss": 0.6356, "step": 9711 }, { "epoch": 0.62, "grad_norm": 0.8522918224334717, "learning_rate": 3.4042634939699728e-06, "loss": 0.5623, "step": 9712 }, { "epoch": 0.62, "grad_norm": 0.9028704762458801, "learning_rate": 3.4032911881038842e-06, "loss": 0.6201, "step": 9713 }, { "epoch": 0.62, "grad_norm": 0.9436061382293701, "learning_rate": 3.4023189494696506e-06, "loss": 0.6674, "step": 9714 }, { "epoch": 0.62, "grad_norm": 0.9471274018287659, "learning_rate": 3.401346778108209e-06, "loss": 0.6199, "step": 9715 }, { "epoch": 0.62, "grad_norm": 0.9044203162193298, "learning_rate": 3.4003746740604925e-06, "loss": 0.5994, "step": 9716 }, { "epoch": 0.62, "grad_norm": 0.8830143213272095, "learning_rate": 3.399402637367433e-06, "loss": 0.5824, "step": 9717 }, { "epoch": 0.62, "grad_norm": 0.8798513412475586, "learning_rate": 3.398430668069961e-06, "loss": 0.5765, "step": 9718 }, { "epoch": 0.62, "grad_norm": 0.9042376279830933, "learning_rate": 3.3974587662090026e-06, "loss": 0.6328, "step": 9719 }, { "epoch": 0.62, "grad_norm": 0.9810076355934143, "learning_rate": 3.396486931825481e-06, "loss": 0.6196, "step": 9720 }, { "epoch": 0.62, "grad_norm": 0.8763681054115295, "learning_rate": 3.395515164960316e-06, "loss": 0.5587, "step": 9721 }, { "epoch": 0.62, "grad_norm": 0.8596003651618958, "learning_rate": 3.394543465654424e-06, "loss": 0.6053, "step": 9722 }, { "epoch": 0.62, "grad_norm": 0.9068569540977478, "learning_rate": 3.393571833948724e-06, "loss": 0.5969, "step": 9723 }, { "epoch": 0.62, "grad_norm": 0.8864340782165527, "learning_rate": 3.3926002698841253e-06, "loss": 0.5411, "step": 9724 }, { "epoch": 0.62, "grad_norm": 0.8910347819328308, "learning_rate": 3.3916287735015375e-06, "loss": 0.5857, "step": 9725 }, { "epoch": 0.62, "grad_norm": 0.8630258440971375, "learning_rate": 3.390657344841865e-06, "loss": 0.5274, "step": 9726 }, { "epoch": 0.62, "grad_norm": 0.8938130140304565, "learning_rate": 3.3896859839460155e-06, "loss": 0.5875, "step": 9727 }, { "epoch": 0.62, "grad_norm": 0.9419970512390137, "learning_rate": 3.3887146908548875e-06, "loss": 0.6277, "step": 9728 }, { "epoch": 0.62, "grad_norm": 0.8735063672065735, "learning_rate": 3.3877434656093777e-06, "loss": 0.5373, "step": 9729 }, { "epoch": 0.62, "grad_norm": 0.9380084276199341, "learning_rate": 3.3867723082503807e-06, "loss": 0.664, "step": 9730 }, { "epoch": 0.62, "grad_norm": 0.8645982146263123, "learning_rate": 3.385801218818792e-06, "loss": 0.5602, "step": 9731 }, { "epoch": 0.62, "grad_norm": 0.9503861665725708, "learning_rate": 3.384830197355499e-06, "loss": 0.629, "step": 9732 }, { "epoch": 0.62, "grad_norm": 0.875836968421936, "learning_rate": 3.383859243901385e-06, "loss": 0.5647, "step": 9733 }, { "epoch": 0.62, "grad_norm": 0.841139018535614, "learning_rate": 3.3828883584973364e-06, "loss": 0.5904, "step": 9734 }, { "epoch": 0.62, "grad_norm": 0.9259316325187683, "learning_rate": 3.3819175411842353e-06, "loss": 0.6008, "step": 9735 }, { "epoch": 0.62, "grad_norm": 0.8969772458076477, "learning_rate": 3.3809467920029574e-06, "loss": 0.5846, "step": 9736 }, { "epoch": 0.62, "grad_norm": 0.8963971138000488, "learning_rate": 3.3799761109943775e-06, "loss": 0.5553, "step": 9737 }, { "epoch": 0.62, "grad_norm": 0.8801413178443909, "learning_rate": 3.3790054981993683e-06, "loss": 0.5912, "step": 9738 }, { "epoch": 0.62, "grad_norm": 0.9127761125564575, "learning_rate": 3.3780349536587965e-06, "loss": 0.5684, "step": 9739 }, { "epoch": 0.62, "grad_norm": 0.8743903636932373, "learning_rate": 3.377064477413533e-06, "loss": 0.6024, "step": 9740 }, { "epoch": 0.62, "grad_norm": 0.8177317380905151, "learning_rate": 3.376094069504437e-06, "loss": 0.5123, "step": 9741 }, { "epoch": 0.62, "grad_norm": 0.9717310070991516, "learning_rate": 3.3751237299723715e-06, "loss": 0.5859, "step": 9742 }, { "epoch": 0.62, "grad_norm": 0.9141378402709961, "learning_rate": 3.3741534588581915e-06, "loss": 0.6222, "step": 9743 }, { "epoch": 0.62, "grad_norm": 0.9228180050849915, "learning_rate": 3.3731832562027555e-06, "loss": 0.5765, "step": 9744 }, { "epoch": 0.62, "grad_norm": 0.8938828110694885, "learning_rate": 3.372213122046912e-06, "loss": 0.5691, "step": 9745 }, { "epoch": 0.62, "grad_norm": 0.8958885669708252, "learning_rate": 3.3712430564315124e-06, "loss": 0.5607, "step": 9746 }, { "epoch": 0.62, "grad_norm": 0.9243308305740356, "learning_rate": 3.3702730593974e-06, "loss": 0.5898, "step": 9747 }, { "epoch": 0.62, "grad_norm": 0.9135646224021912, "learning_rate": 3.3693031309854214e-06, "loss": 0.5836, "step": 9748 }, { "epoch": 0.62, "grad_norm": 0.8542222380638123, "learning_rate": 3.3683332712364138e-06, "loss": 0.5579, "step": 9749 }, { "epoch": 0.62, "grad_norm": 0.9198238849639893, "learning_rate": 3.3673634801912175e-06, "loss": 0.5955, "step": 9750 }, { "epoch": 0.62, "grad_norm": 0.8537915349006653, "learning_rate": 3.3663937578906642e-06, "loss": 0.5355, "step": 9751 }, { "epoch": 0.62, "grad_norm": 0.8687244057655334, "learning_rate": 3.365424104375587e-06, "loss": 0.5622, "step": 9752 }, { "epoch": 0.62, "grad_norm": 0.8461039662361145, "learning_rate": 3.3644545196868146e-06, "loss": 0.594, "step": 9753 }, { "epoch": 0.62, "grad_norm": 0.8379154801368713, "learning_rate": 3.3634850038651734e-06, "loss": 0.6012, "step": 9754 }, { "epoch": 0.62, "grad_norm": 0.870664119720459, "learning_rate": 3.362515556951485e-06, "loss": 0.552, "step": 9755 }, { "epoch": 0.62, "grad_norm": 0.8928505182266235, "learning_rate": 3.36154617898657e-06, "loss": 0.5454, "step": 9756 }, { "epoch": 0.62, "grad_norm": 0.9355548620223999, "learning_rate": 3.360576870011246e-06, "loss": 0.5332, "step": 9757 }, { "epoch": 0.62, "grad_norm": 0.910213828086853, "learning_rate": 3.3596076300663273e-06, "loss": 0.5907, "step": 9758 }, { "epoch": 0.62, "grad_norm": 0.8772505521774292, "learning_rate": 3.3586384591926235e-06, "loss": 0.5826, "step": 9759 }, { "epoch": 0.62, "grad_norm": 0.932346522808075, "learning_rate": 3.3576693574309447e-06, "loss": 0.6092, "step": 9760 }, { "epoch": 0.62, "grad_norm": 0.8593097925186157, "learning_rate": 3.3567003248220966e-06, "loss": 0.6045, "step": 9761 }, { "epoch": 0.62, "grad_norm": 0.8780187368392944, "learning_rate": 3.355731361406882e-06, "loss": 0.5656, "step": 9762 }, { "epoch": 0.62, "grad_norm": 0.9253993630409241, "learning_rate": 3.354762467226098e-06, "loss": 0.5928, "step": 9763 }, { "epoch": 0.62, "grad_norm": 0.9301041960716248, "learning_rate": 3.3537936423205435e-06, "loss": 0.5931, "step": 9764 }, { "epoch": 0.62, "grad_norm": 0.8613686561584473, "learning_rate": 3.352824886731012e-06, "loss": 0.528, "step": 9765 }, { "epoch": 0.62, "grad_norm": 0.9040000438690186, "learning_rate": 3.351856200498296e-06, "loss": 0.6182, "step": 9766 }, { "epoch": 0.62, "grad_norm": 0.8548308610916138, "learning_rate": 3.3508875836631806e-06, "loss": 0.5758, "step": 9767 }, { "epoch": 0.62, "grad_norm": 0.8682760000228882, "learning_rate": 3.3499190362664523e-06, "loss": 0.515, "step": 9768 }, { "epoch": 0.62, "grad_norm": 0.8267245292663574, "learning_rate": 3.3489505583488925e-06, "loss": 0.5448, "step": 9769 }, { "epoch": 0.62, "grad_norm": 0.9359412789344788, "learning_rate": 3.3479821499512823e-06, "loss": 0.6277, "step": 9770 }, { "epoch": 0.62, "grad_norm": 0.9154059290885925, "learning_rate": 3.347013811114396e-06, "loss": 0.6081, "step": 9771 }, { "epoch": 0.62, "grad_norm": 0.8913496136665344, "learning_rate": 3.346045541879009e-06, "loss": 0.5149, "step": 9772 }, { "epoch": 0.62, "grad_norm": 0.8516811728477478, "learning_rate": 3.3450773422858886e-06, "loss": 0.605, "step": 9773 }, { "epoch": 0.62, "grad_norm": 0.854370653629303, "learning_rate": 3.3441092123758055e-06, "loss": 0.5382, "step": 9774 }, { "epoch": 0.62, "grad_norm": 0.9357313513755798, "learning_rate": 3.3431411521895228e-06, "loss": 0.5528, "step": 9775 }, { "epoch": 0.62, "grad_norm": 0.9238491058349609, "learning_rate": 3.342173161767803e-06, "loss": 0.6366, "step": 9776 }, { "epoch": 0.62, "grad_norm": 0.8327895402908325, "learning_rate": 3.341205241151403e-06, "loss": 0.5646, "step": 9777 }, { "epoch": 0.62, "grad_norm": 0.9005405306816101, "learning_rate": 3.3402373903810807e-06, "loss": 0.5825, "step": 9778 }, { "epoch": 0.62, "grad_norm": 0.8644494414329529, "learning_rate": 3.3392696094975875e-06, "loss": 0.5506, "step": 9779 }, { "epoch": 0.62, "grad_norm": 0.8813982009887695, "learning_rate": 3.338301898541675e-06, "loss": 0.6004, "step": 9780 }, { "epoch": 0.62, "grad_norm": 0.9440781474113464, "learning_rate": 3.337334257554086e-06, "loss": 0.6181, "step": 9781 }, { "epoch": 0.62, "grad_norm": 0.8736399412155151, "learning_rate": 3.3363666865755708e-06, "loss": 0.5939, "step": 9782 }, { "epoch": 0.62, "grad_norm": 0.929132342338562, "learning_rate": 3.335399185646865e-06, "loss": 0.6048, "step": 9783 }, { "epoch": 0.62, "grad_norm": 0.8123360872268677, "learning_rate": 3.33443175480871e-06, "loss": 0.5391, "step": 9784 }, { "epoch": 0.62, "grad_norm": 0.8572057485580444, "learning_rate": 3.3334643941018398e-06, "loss": 0.546, "step": 9785 }, { "epoch": 0.62, "grad_norm": 0.8945849537849426, "learning_rate": 3.3324971035669844e-06, "loss": 0.6196, "step": 9786 }, { "epoch": 0.62, "grad_norm": 0.8958498239517212, "learning_rate": 3.3315298832448762e-06, "loss": 0.5671, "step": 9787 }, { "epoch": 0.62, "grad_norm": 0.9150652885437012, "learning_rate": 3.330562733176242e-06, "loss": 0.5584, "step": 9788 }, { "epoch": 0.62, "grad_norm": 0.9838289022445679, "learning_rate": 3.3295956534018033e-06, "loss": 0.5973, "step": 9789 }, { "epoch": 0.62, "grad_norm": 0.9296243786811829, "learning_rate": 3.328628643962278e-06, "loss": 0.5697, "step": 9790 }, { "epoch": 0.62, "grad_norm": 0.8552951216697693, "learning_rate": 3.3276617048983876e-06, "loss": 0.5475, "step": 9791 }, { "epoch": 0.62, "grad_norm": 0.958833634853363, "learning_rate": 3.326694836250847e-06, "loss": 0.615, "step": 9792 }, { "epoch": 0.62, "grad_norm": 0.9096387028694153, "learning_rate": 3.325728038060365e-06, "loss": 0.5835, "step": 9793 }, { "epoch": 0.62, "grad_norm": 0.9228499531745911, "learning_rate": 3.324761310367649e-06, "loss": 0.5948, "step": 9794 }, { "epoch": 0.62, "grad_norm": 0.9237566590309143, "learning_rate": 3.3237946532134077e-06, "loss": 0.5998, "step": 9795 }, { "epoch": 0.62, "grad_norm": 0.8698447942733765, "learning_rate": 3.322828066638343e-06, "loss": 0.5345, "step": 9796 }, { "epoch": 0.62, "grad_norm": 0.9300669431686401, "learning_rate": 3.321861550683154e-06, "loss": 0.6291, "step": 9797 }, { "epoch": 0.62, "grad_norm": 0.9495216012001038, "learning_rate": 3.3208951053885367e-06, "loss": 0.6271, "step": 9798 }, { "epoch": 0.62, "grad_norm": 0.8946996331214905, "learning_rate": 3.3199287307951844e-06, "loss": 0.5963, "step": 9799 }, { "epoch": 0.62, "grad_norm": 0.8824841976165771, "learning_rate": 3.31896242694379e-06, "loss": 0.5869, "step": 9800 }, { "epoch": 0.62, "grad_norm": 0.9500332474708557, "learning_rate": 3.317996193875041e-06, "loss": 0.6316, "step": 9801 }, { "epoch": 0.62, "grad_norm": 0.8856709599494934, "learning_rate": 3.3170300316296194e-06, "loss": 0.6012, "step": 9802 }, { "epoch": 0.62, "grad_norm": 0.8249387741088867, "learning_rate": 3.3160639402482077e-06, "loss": 0.5547, "step": 9803 }, { "epoch": 0.62, "grad_norm": 0.9641744494438171, "learning_rate": 3.3150979197714874e-06, "loss": 0.573, "step": 9804 }, { "epoch": 0.62, "grad_norm": 0.9111761450767517, "learning_rate": 3.314131970240132e-06, "loss": 0.6098, "step": 9805 }, { "epoch": 0.62, "grad_norm": 0.8709757924079895, "learning_rate": 3.3131660916948137e-06, "loss": 0.6311, "step": 9806 }, { "epoch": 0.62, "grad_norm": 0.8643493056297302, "learning_rate": 3.3122002841762023e-06, "loss": 0.6367, "step": 9807 }, { "epoch": 0.62, "grad_norm": 0.9116371870040894, "learning_rate": 3.311234547724968e-06, "loss": 0.6311, "step": 9808 }, { "epoch": 0.62, "grad_norm": 0.8542858958244324, "learning_rate": 3.310268882381772e-06, "loss": 0.5615, "step": 9809 }, { "epoch": 0.62, "grad_norm": 0.8505398035049438, "learning_rate": 3.3093032881872738e-06, "loss": 0.537, "step": 9810 }, { "epoch": 0.62, "grad_norm": 0.9205043315887451, "learning_rate": 3.3083377651821314e-06, "loss": 0.5719, "step": 9811 }, { "epoch": 0.62, "grad_norm": 0.8506627082824707, "learning_rate": 3.3073723134070033e-06, "loss": 0.5692, "step": 9812 }, { "epoch": 0.62, "grad_norm": 0.8526927828788757, "learning_rate": 3.3064069329025394e-06, "loss": 0.602, "step": 9813 }, { "epoch": 0.62, "grad_norm": 0.8983103036880493, "learning_rate": 3.3054416237093863e-06, "loss": 0.6207, "step": 9814 }, { "epoch": 0.62, "grad_norm": 0.8063021898269653, "learning_rate": 3.3044763858681928e-06, "loss": 0.5113, "step": 9815 }, { "epoch": 0.62, "grad_norm": 0.9379715323448181, "learning_rate": 3.303511219419598e-06, "loss": 0.603, "step": 9816 }, { "epoch": 0.62, "grad_norm": 0.8616729378700256, "learning_rate": 3.3025461244042462e-06, "loss": 0.5416, "step": 9817 }, { "epoch": 0.62, "grad_norm": 0.8878458142280579, "learning_rate": 3.3015811008627707e-06, "loss": 0.597, "step": 9818 }, { "epoch": 0.62, "grad_norm": 0.9654011726379395, "learning_rate": 3.3006161488358084e-06, "loss": 0.5782, "step": 9819 }, { "epoch": 0.62, "grad_norm": 0.8611358404159546, "learning_rate": 3.299651268363986e-06, "loss": 0.5884, "step": 9820 }, { "epoch": 0.62, "grad_norm": 0.8918520212173462, "learning_rate": 3.298686459487936e-06, "loss": 0.5348, "step": 9821 }, { "epoch": 0.62, "grad_norm": 0.8771730065345764, "learning_rate": 3.2977217222482794e-06, "loss": 0.5562, "step": 9822 }, { "epoch": 0.62, "grad_norm": 0.8952116966247559, "learning_rate": 3.29675705668564e-06, "loss": 0.5843, "step": 9823 }, { "epoch": 0.62, "grad_norm": 0.7943683862686157, "learning_rate": 3.2957924628406347e-06, "loss": 0.4923, "step": 9824 }, { "epoch": 0.62, "grad_norm": 0.8794922828674316, "learning_rate": 3.2948279407538813e-06, "loss": 0.5498, "step": 9825 }, { "epoch": 0.62, "grad_norm": 0.8752865195274353, "learning_rate": 3.2938634904659903e-06, "loss": 0.563, "step": 9826 }, { "epoch": 0.62, "grad_norm": 0.8498966097831726, "learning_rate": 3.2928991120175747e-06, "loss": 0.5738, "step": 9827 }, { "epoch": 0.62, "grad_norm": 0.9079285860061646, "learning_rate": 3.2919348054492363e-06, "loss": 0.5089, "step": 9828 }, { "epoch": 0.62, "grad_norm": 0.9044334292411804, "learning_rate": 3.2909705708015834e-06, "loss": 0.6078, "step": 9829 }, { "epoch": 0.62, "grad_norm": 0.8988425135612488, "learning_rate": 3.290006408115213e-06, "loss": 0.6227, "step": 9830 }, { "epoch": 0.62, "grad_norm": 0.914106011390686, "learning_rate": 3.289042317430726e-06, "loss": 0.5588, "step": 9831 }, { "epoch": 0.62, "grad_norm": 0.8243867754936218, "learning_rate": 3.2880782987887128e-06, "loss": 0.5413, "step": 9832 }, { "epoch": 0.62, "grad_norm": 0.8767701983451843, "learning_rate": 3.2871143522297672e-06, "loss": 0.5734, "step": 9833 }, { "epoch": 0.62, "grad_norm": 0.8820706605911255, "learning_rate": 3.286150477794479e-06, "loss": 0.5971, "step": 9834 }, { "epoch": 0.62, "grad_norm": 0.8267252445220947, "learning_rate": 3.2851866755234324e-06, "loss": 0.584, "step": 9835 }, { "epoch": 0.62, "grad_norm": 0.8427024483680725, "learning_rate": 3.2842229454572084e-06, "loss": 0.571, "step": 9836 }, { "epoch": 0.62, "grad_norm": 0.9389393925666809, "learning_rate": 3.2832592876363866e-06, "loss": 0.6084, "step": 9837 }, { "epoch": 0.62, "grad_norm": 0.9303972125053406, "learning_rate": 3.2822957021015455e-06, "loss": 0.5381, "step": 9838 }, { "epoch": 0.62, "grad_norm": 0.9479062557220459, "learning_rate": 3.2813321888932573e-06, "loss": 0.5407, "step": 9839 }, { "epoch": 0.62, "grad_norm": 0.808933436870575, "learning_rate": 3.2803687480520905e-06, "loss": 0.5186, "step": 9840 }, { "epoch": 0.62, "grad_norm": 0.9015218019485474, "learning_rate": 3.279405379618613e-06, "loss": 0.5446, "step": 9841 }, { "epoch": 0.62, "grad_norm": 0.8663600087165833, "learning_rate": 3.278442083633392e-06, "loss": 0.5311, "step": 9842 }, { "epoch": 0.62, "grad_norm": 0.94172203540802, "learning_rate": 3.277478860136985e-06, "loss": 0.6051, "step": 9843 }, { "epoch": 0.62, "grad_norm": 0.8853269219398499, "learning_rate": 3.276515709169951e-06, "loss": 0.5771, "step": 9844 }, { "epoch": 0.62, "grad_norm": 0.8446550965309143, "learning_rate": 3.2755526307728447e-06, "loss": 0.5803, "step": 9845 }, { "epoch": 0.62, "grad_norm": 0.8651383519172668, "learning_rate": 3.2745896249862166e-06, "loss": 0.5394, "step": 9846 }, { "epoch": 0.62, "grad_norm": 0.923086941242218, "learning_rate": 3.2736266918506195e-06, "loss": 0.6304, "step": 9847 }, { "epoch": 0.62, "grad_norm": 0.9140406847000122, "learning_rate": 3.272663831406595e-06, "loss": 0.6575, "step": 9848 }, { "epoch": 0.62, "grad_norm": 0.8631356358528137, "learning_rate": 3.2717010436946894e-06, "loss": 0.577, "step": 9849 }, { "epoch": 0.62, "grad_norm": 1.065402626991272, "learning_rate": 3.270738328755438e-06, "loss": 0.6418, "step": 9850 }, { "epoch": 0.62, "grad_norm": 0.9519265294075012, "learning_rate": 3.269775686629383e-06, "loss": 0.6477, "step": 9851 }, { "epoch": 0.62, "grad_norm": 0.9092915058135986, "learning_rate": 3.2688131173570523e-06, "loss": 0.6198, "step": 9852 }, { "epoch": 0.62, "grad_norm": 0.9024431109428406, "learning_rate": 3.2678506209789805e-06, "loss": 0.6181, "step": 9853 }, { "epoch": 0.62, "grad_norm": 0.8769482970237732, "learning_rate": 3.2668881975356915e-06, "loss": 0.5536, "step": 9854 }, { "epoch": 0.62, "grad_norm": 0.9238791465759277, "learning_rate": 3.2659258470677137e-06, "loss": 0.6364, "step": 9855 }, { "epoch": 0.62, "grad_norm": 0.7877947092056274, "learning_rate": 3.2649635696155646e-06, "loss": 0.4979, "step": 9856 }, { "epoch": 0.62, "grad_norm": 0.84283447265625, "learning_rate": 3.2640013652197654e-06, "loss": 0.5688, "step": 9857 }, { "epoch": 0.62, "grad_norm": 0.8760327100753784, "learning_rate": 3.263039233920827e-06, "loss": 0.616, "step": 9858 }, { "epoch": 0.62, "grad_norm": 0.9331052303314209, "learning_rate": 3.2620771757592663e-06, "loss": 0.5612, "step": 9859 }, { "epoch": 0.62, "grad_norm": 0.9472546577453613, "learning_rate": 3.261115190775589e-06, "loss": 0.5579, "step": 9860 }, { "epoch": 0.62, "grad_norm": 0.9176338315010071, "learning_rate": 3.2601532790103026e-06, "loss": 0.5493, "step": 9861 }, { "epoch": 0.62, "grad_norm": 0.9220177531242371, "learning_rate": 3.259191440503909e-06, "loss": 0.6003, "step": 9862 }, { "epoch": 0.62, "grad_norm": 0.9254795908927917, "learning_rate": 3.258229675296907e-06, "loss": 0.607, "step": 9863 }, { "epoch": 0.62, "grad_norm": 0.9141079187393188, "learning_rate": 3.257267983429794e-06, "loss": 0.6196, "step": 9864 }, { "epoch": 0.62, "grad_norm": 0.8655530214309692, "learning_rate": 3.2563063649430647e-06, "loss": 0.5738, "step": 9865 }, { "epoch": 0.63, "grad_norm": 0.9000252485275269, "learning_rate": 3.25534481987721e-06, "loss": 0.6145, "step": 9866 }, { "epoch": 0.63, "grad_norm": 0.9283547401428223, "learning_rate": 3.2543833482727123e-06, "loss": 0.6134, "step": 9867 }, { "epoch": 0.63, "grad_norm": 0.8959989547729492, "learning_rate": 3.2534219501700603e-06, "loss": 0.5565, "step": 9868 }, { "epoch": 0.63, "grad_norm": 0.8951772451400757, "learning_rate": 3.252460625609736e-06, "loss": 0.5718, "step": 9869 }, { "epoch": 0.63, "grad_norm": 0.9525763988494873, "learning_rate": 3.251499374632214e-06, "loss": 0.5916, "step": 9870 }, { "epoch": 0.63, "grad_norm": 0.8921371102333069, "learning_rate": 3.2505381972779704e-06, "loss": 0.5674, "step": 9871 }, { "epoch": 0.63, "grad_norm": 0.8959813714027405, "learning_rate": 3.249577093587477e-06, "loss": 0.5605, "step": 9872 }, { "epoch": 0.63, "grad_norm": 0.9102842807769775, "learning_rate": 3.2486160636012054e-06, "loss": 0.6122, "step": 9873 }, { "epoch": 0.63, "grad_norm": 0.8645837306976318, "learning_rate": 3.2476551073596173e-06, "loss": 0.5871, "step": 9874 }, { "epoch": 0.63, "grad_norm": 0.9292948246002197, "learning_rate": 3.246694224903175e-06, "loss": 0.563, "step": 9875 }, { "epoch": 0.63, "grad_norm": 0.9219769835472107, "learning_rate": 3.245733416272341e-06, "loss": 0.5999, "step": 9876 }, { "epoch": 0.63, "grad_norm": 0.9211458563804626, "learning_rate": 3.2447726815075715e-06, "loss": 0.5903, "step": 9877 }, { "epoch": 0.63, "grad_norm": 0.8986589312553406, "learning_rate": 3.243812020649318e-06, "loss": 0.5675, "step": 9878 }, { "epoch": 0.63, "grad_norm": 0.9727473855018616, "learning_rate": 3.24285143373803e-06, "loss": 0.6265, "step": 9879 }, { "epoch": 0.63, "grad_norm": 0.9438537955284119, "learning_rate": 3.241890920814154e-06, "loss": 0.5264, "step": 9880 }, { "epoch": 0.63, "grad_norm": 0.9068976640701294, "learning_rate": 3.2409304819181377e-06, "loss": 0.5723, "step": 9881 }, { "epoch": 0.63, "grad_norm": 0.8970639705657959, "learning_rate": 3.2399701170904197e-06, "loss": 0.5673, "step": 9882 }, { "epoch": 0.63, "grad_norm": 0.8697466850280762, "learning_rate": 3.239009826371436e-06, "loss": 0.5677, "step": 9883 }, { "epoch": 0.63, "grad_norm": 0.8627969622612, "learning_rate": 3.238049609801621e-06, "loss": 0.5892, "step": 9884 }, { "epoch": 0.63, "grad_norm": 0.8970100283622742, "learning_rate": 3.2370894674214102e-06, "loss": 0.5913, "step": 9885 }, { "epoch": 0.63, "grad_norm": 0.811985969543457, "learning_rate": 3.2361293992712295e-06, "loss": 0.4866, "step": 9886 }, { "epoch": 0.63, "grad_norm": 0.8113346099853516, "learning_rate": 3.2351694053915027e-06, "loss": 0.5692, "step": 9887 }, { "epoch": 0.63, "grad_norm": 0.9210705757141113, "learning_rate": 3.2342094858226514e-06, "loss": 0.6221, "step": 9888 }, { "epoch": 0.63, "grad_norm": 0.8682329654693604, "learning_rate": 3.233249640605098e-06, "loss": 0.5479, "step": 9889 }, { "epoch": 0.63, "grad_norm": 0.8741553425788879, "learning_rate": 3.232289869779256e-06, "loss": 0.5479, "step": 9890 }, { "epoch": 0.63, "grad_norm": 0.8522763252258301, "learning_rate": 3.231330173385537e-06, "loss": 0.565, "step": 9891 }, { "epoch": 0.63, "grad_norm": 0.8624328374862671, "learning_rate": 3.2303705514643537e-06, "loss": 0.6031, "step": 9892 }, { "epoch": 0.63, "grad_norm": 0.952092707157135, "learning_rate": 3.229411004056108e-06, "loss": 0.5751, "step": 9893 }, { "epoch": 0.63, "grad_norm": 0.9104188084602356, "learning_rate": 3.2284515312012056e-06, "loss": 0.5759, "step": 9894 }, { "epoch": 0.63, "grad_norm": 0.8913605213165283, "learning_rate": 3.2274921329400484e-06, "loss": 0.6126, "step": 9895 }, { "epoch": 0.63, "grad_norm": 0.8454800844192505, "learning_rate": 3.226532809313031e-06, "loss": 0.5386, "step": 9896 }, { "epoch": 0.63, "grad_norm": 0.9178531765937805, "learning_rate": 3.2255735603605454e-06, "loss": 0.6037, "step": 9897 }, { "epoch": 0.63, "grad_norm": 0.9100960493087769, "learning_rate": 3.2246143861229857e-06, "loss": 0.5579, "step": 9898 }, { "epoch": 0.63, "grad_norm": 0.8924016952514648, "learning_rate": 3.223655286640739e-06, "loss": 0.5699, "step": 9899 }, { "epoch": 0.63, "grad_norm": 0.9032720327377319, "learning_rate": 3.2226962619541885e-06, "loss": 0.5988, "step": 9900 }, { "epoch": 0.63, "grad_norm": 0.8333504796028137, "learning_rate": 3.221737312103714e-06, "loss": 0.5045, "step": 9901 }, { "epoch": 0.63, "grad_norm": 0.8808243274688721, "learning_rate": 3.2207784371296957e-06, "loss": 0.6074, "step": 9902 }, { "epoch": 0.63, "grad_norm": 0.906588613986969, "learning_rate": 3.2198196370725095e-06, "loss": 0.6131, "step": 9903 }, { "epoch": 0.63, "grad_norm": 0.9039662480354309, "learning_rate": 3.218860911972525e-06, "loss": 0.637, "step": 9904 }, { "epoch": 0.63, "grad_norm": 0.9129331707954407, "learning_rate": 3.2179022618701093e-06, "loss": 0.624, "step": 9905 }, { "epoch": 0.63, "grad_norm": 0.904314398765564, "learning_rate": 3.2169436868056316e-06, "loss": 0.5856, "step": 9906 }, { "epoch": 0.63, "grad_norm": 0.8561462163925171, "learning_rate": 3.215985186819453e-06, "loss": 0.6, "step": 9907 }, { "epoch": 0.63, "grad_norm": 0.8671022653579712, "learning_rate": 3.2150267619519326e-06, "loss": 0.5943, "step": 9908 }, { "epoch": 0.63, "grad_norm": 0.895698070526123, "learning_rate": 3.214068412243424e-06, "loss": 0.5647, "step": 9909 }, { "epoch": 0.63, "grad_norm": 0.8691821694374084, "learning_rate": 3.213110137734281e-06, "loss": 0.6168, "step": 9910 }, { "epoch": 0.63, "grad_norm": 0.8884726166725159, "learning_rate": 3.2121519384648558e-06, "loss": 0.6183, "step": 9911 }, { "epoch": 0.63, "grad_norm": 0.8356814980506897, "learning_rate": 3.211193814475494e-06, "loss": 0.5989, "step": 9912 }, { "epoch": 0.63, "grad_norm": 0.8705270290374756, "learning_rate": 3.2102357658065357e-06, "loss": 0.6103, "step": 9913 }, { "epoch": 0.63, "grad_norm": 0.9131333827972412, "learning_rate": 3.2092777924983224e-06, "loss": 0.6342, "step": 9914 }, { "epoch": 0.63, "grad_norm": 0.9165261387825012, "learning_rate": 3.208319894591194e-06, "loss": 0.6076, "step": 9915 }, { "epoch": 0.63, "grad_norm": 0.858545184135437, "learning_rate": 3.207362072125482e-06, "loss": 0.5796, "step": 9916 }, { "epoch": 0.63, "grad_norm": 0.8806081414222717, "learning_rate": 3.2064043251415166e-06, "loss": 0.5396, "step": 9917 }, { "epoch": 0.63, "grad_norm": 0.9189614057540894, "learning_rate": 3.2054466536796236e-06, "loss": 0.5817, "step": 9918 }, { "epoch": 0.63, "grad_norm": 0.9014858603477478, "learning_rate": 3.2044890577801317e-06, "loss": 0.5843, "step": 9919 }, { "epoch": 0.63, "grad_norm": 0.9093121886253357, "learning_rate": 3.2035315374833596e-06, "loss": 0.5669, "step": 9920 }, { "epoch": 0.63, "grad_norm": 0.8906499743461609, "learning_rate": 3.2025740928296235e-06, "loss": 0.5766, "step": 9921 }, { "epoch": 0.63, "grad_norm": 0.9178594350814819, "learning_rate": 3.201616723859241e-06, "loss": 0.6233, "step": 9922 }, { "epoch": 0.63, "grad_norm": 0.8954256772994995, "learning_rate": 3.20065943061252e-06, "loss": 0.6048, "step": 9923 }, { "epoch": 0.63, "grad_norm": 0.8136070370674133, "learning_rate": 3.199702213129773e-06, "loss": 0.5394, "step": 9924 }, { "epoch": 0.63, "grad_norm": 0.8871577382087708, "learning_rate": 3.1987450714513018e-06, "loss": 0.5906, "step": 9925 }, { "epoch": 0.63, "grad_norm": 0.8907647728919983, "learning_rate": 3.1977880056174105e-06, "loss": 0.5741, "step": 9926 }, { "epoch": 0.63, "grad_norm": 0.8544868230819702, "learning_rate": 3.196831015668396e-06, "loss": 0.5634, "step": 9927 }, { "epoch": 0.63, "grad_norm": 0.9441981911659241, "learning_rate": 3.195874101644555e-06, "loss": 0.5918, "step": 9928 }, { "epoch": 0.63, "grad_norm": 0.8469243049621582, "learning_rate": 3.194917263586179e-06, "loss": 0.5407, "step": 9929 }, { "epoch": 0.63, "grad_norm": 0.8396049737930298, "learning_rate": 3.1939605015335588e-06, "loss": 0.5383, "step": 9930 }, { "epoch": 0.63, "grad_norm": 0.8566557168960571, "learning_rate": 3.193003815526977e-06, "loss": 0.5878, "step": 9931 }, { "epoch": 0.63, "grad_norm": 0.9029106497764587, "learning_rate": 3.192047205606721e-06, "loss": 0.6431, "step": 9932 }, { "epoch": 0.63, "grad_norm": 0.9187177419662476, "learning_rate": 3.1910906718130665e-06, "loss": 0.5857, "step": 9933 }, { "epoch": 0.63, "grad_norm": 0.8693289756774902, "learning_rate": 3.1901342141862917e-06, "loss": 0.5984, "step": 9934 }, { "epoch": 0.63, "grad_norm": 0.9296219944953918, "learning_rate": 3.1891778327666673e-06, "loss": 0.6454, "step": 9935 }, { "epoch": 0.63, "grad_norm": 0.944770097732544, "learning_rate": 3.1882215275944673e-06, "loss": 0.6541, "step": 9936 }, { "epoch": 0.63, "grad_norm": 0.8502100706100464, "learning_rate": 3.187265298709954e-06, "loss": 0.5676, "step": 9937 }, { "epoch": 0.63, "grad_norm": 0.8540067076683044, "learning_rate": 3.1863091461533945e-06, "loss": 0.5624, "step": 9938 }, { "epoch": 0.63, "grad_norm": 0.8409416079521179, "learning_rate": 3.1853530699650483e-06, "loss": 0.5844, "step": 9939 }, { "epoch": 0.63, "grad_norm": 0.858970046043396, "learning_rate": 3.184397070185169e-06, "loss": 0.6213, "step": 9940 }, { "epoch": 0.63, "grad_norm": 0.8982256054878235, "learning_rate": 3.183441146854014e-06, "loss": 0.5477, "step": 9941 }, { "epoch": 0.63, "grad_norm": 0.924256443977356, "learning_rate": 3.182485300011834e-06, "loss": 0.6534, "step": 9942 }, { "epoch": 0.63, "grad_norm": 0.8575473427772522, "learning_rate": 3.181529529698875e-06, "loss": 0.5467, "step": 9943 }, { "epoch": 0.63, "grad_norm": 0.8267804980278015, "learning_rate": 3.1805738359553796e-06, "loss": 0.5687, "step": 9944 }, { "epoch": 0.63, "grad_norm": 0.8258667588233948, "learning_rate": 3.1796182188215917e-06, "loss": 0.5367, "step": 9945 }, { "epoch": 0.63, "grad_norm": 0.9159985184669495, "learning_rate": 3.1786626783377494e-06, "loss": 0.576, "step": 9946 }, { "epoch": 0.63, "grad_norm": 0.8569443225860596, "learning_rate": 3.177707214544086e-06, "loss": 0.5744, "step": 9947 }, { "epoch": 0.63, "grad_norm": 0.8471035957336426, "learning_rate": 3.1767518274808298e-06, "loss": 0.5411, "step": 9948 }, { "epoch": 0.63, "grad_norm": 0.8953260779380798, "learning_rate": 3.175796517188212e-06, "loss": 0.6003, "step": 9949 }, { "epoch": 0.63, "grad_norm": 0.868668258190155, "learning_rate": 3.174841283706459e-06, "loss": 0.5516, "step": 9950 }, { "epoch": 0.63, "grad_norm": 0.8107344508171082, "learning_rate": 3.17388612707579e-06, "loss": 0.5666, "step": 9951 }, { "epoch": 0.63, "grad_norm": 0.9241723418235779, "learning_rate": 3.172931047336421e-06, "loss": 0.5836, "step": 9952 }, { "epoch": 0.63, "grad_norm": 0.8751961588859558, "learning_rate": 3.1719760445285712e-06, "loss": 0.5113, "step": 9953 }, { "epoch": 0.63, "grad_norm": 0.9166142344474792, "learning_rate": 3.1710211186924524e-06, "loss": 0.555, "step": 9954 }, { "epoch": 0.63, "grad_norm": 0.889083981513977, "learning_rate": 3.170066269868271e-06, "loss": 0.5905, "step": 9955 }, { "epoch": 0.63, "grad_norm": 0.905120313167572, "learning_rate": 3.169111498096232e-06, "loss": 0.5809, "step": 9956 }, { "epoch": 0.63, "grad_norm": 0.852555513381958, "learning_rate": 3.1681568034165383e-06, "loss": 0.5447, "step": 9957 }, { "epoch": 0.63, "grad_norm": 0.8818122148513794, "learning_rate": 3.167202185869391e-06, "loss": 0.5774, "step": 9958 }, { "epoch": 0.63, "grad_norm": 0.9433296918869019, "learning_rate": 3.166247645494982e-06, "loss": 0.6099, "step": 9959 }, { "epoch": 0.63, "grad_norm": 0.9000284671783447, "learning_rate": 3.1652931823335074e-06, "loss": 0.5622, "step": 9960 }, { "epoch": 0.63, "grad_norm": 0.9485234618186951, "learning_rate": 3.164338796425152e-06, "loss": 0.6129, "step": 9961 }, { "epoch": 0.63, "grad_norm": 0.8851210474967957, "learning_rate": 3.163384487810106e-06, "loss": 0.542, "step": 9962 }, { "epoch": 0.63, "grad_norm": 0.8798405528068542, "learning_rate": 3.162430256528549e-06, "loss": 0.5844, "step": 9963 }, { "epoch": 0.63, "grad_norm": 0.921736478805542, "learning_rate": 3.161476102620663e-06, "loss": 0.6119, "step": 9964 }, { "epoch": 0.63, "grad_norm": 0.9609774947166443, "learning_rate": 3.16052202612662e-06, "loss": 0.5531, "step": 9965 }, { "epoch": 0.63, "grad_norm": 0.8847622275352478, "learning_rate": 3.159568027086598e-06, "loss": 0.6304, "step": 9966 }, { "epoch": 0.63, "grad_norm": 0.9161363244056702, "learning_rate": 3.1586141055407627e-06, "loss": 0.6271, "step": 9967 }, { "epoch": 0.63, "grad_norm": 0.8306808471679688, "learning_rate": 3.157660261529283e-06, "loss": 0.5713, "step": 9968 }, { "epoch": 0.63, "grad_norm": 0.932395875453949, "learning_rate": 3.15670649509232e-06, "loss": 0.5708, "step": 9969 }, { "epoch": 0.63, "grad_norm": 0.890895426273346, "learning_rate": 3.155752806270033e-06, "loss": 0.6783, "step": 9970 }, { "epoch": 0.63, "grad_norm": 0.8662253618240356, "learning_rate": 3.1547991951025795e-06, "loss": 0.5452, "step": 9971 }, { "epoch": 0.63, "grad_norm": 0.9375318884849548, "learning_rate": 3.153845661630115e-06, "loss": 0.6196, "step": 9972 }, { "epoch": 0.63, "grad_norm": 0.8994795680046082, "learning_rate": 3.152892205892787e-06, "loss": 0.5902, "step": 9973 }, { "epoch": 0.63, "grad_norm": 0.890771210193634, "learning_rate": 3.15193882793074e-06, "loss": 0.5948, "step": 9974 }, { "epoch": 0.63, "grad_norm": 0.8573660254478455, "learning_rate": 3.150985527784122e-06, "loss": 0.5463, "step": 9975 }, { "epoch": 0.63, "grad_norm": 0.8332209587097168, "learning_rate": 3.1500323054930715e-06, "loss": 0.5577, "step": 9976 }, { "epoch": 0.63, "grad_norm": 0.9283886551856995, "learning_rate": 3.149079161097725e-06, "loss": 0.5936, "step": 9977 }, { "epoch": 0.63, "grad_norm": 0.8500183820724487, "learning_rate": 3.1481260946382143e-06, "loss": 0.5424, "step": 9978 }, { "epoch": 0.63, "grad_norm": 0.8809803128242493, "learning_rate": 3.147173106154673e-06, "loss": 0.6419, "step": 9979 }, { "epoch": 0.63, "grad_norm": 0.8598153591156006, "learning_rate": 3.146220195687227e-06, "loss": 0.6031, "step": 9980 }, { "epoch": 0.63, "grad_norm": 0.8905846476554871, "learning_rate": 3.145267363276e-06, "loss": 0.5879, "step": 9981 }, { "epoch": 0.63, "grad_norm": 0.8749983906745911, "learning_rate": 3.1443146089611102e-06, "loss": 0.566, "step": 9982 }, { "epoch": 0.63, "grad_norm": 0.8304601907730103, "learning_rate": 3.143361932782678e-06, "loss": 0.5731, "step": 9983 }, { "epoch": 0.63, "grad_norm": 0.8867066502571106, "learning_rate": 3.142409334780817e-06, "loss": 0.5504, "step": 9984 }, { "epoch": 0.63, "grad_norm": 0.8217571377754211, "learning_rate": 3.1414568149956366e-06, "loss": 0.4873, "step": 9985 }, { "epoch": 0.63, "grad_norm": 0.8885734677314758, "learning_rate": 3.1405043734672436e-06, "loss": 0.5873, "step": 9986 }, { "epoch": 0.63, "grad_norm": 0.8465083241462708, "learning_rate": 3.1395520102357413e-06, "loss": 0.5652, "step": 9987 }, { "epoch": 0.63, "grad_norm": 0.9155653119087219, "learning_rate": 3.1385997253412336e-06, "loss": 0.5387, "step": 9988 }, { "epoch": 0.63, "grad_norm": 0.9237584471702576, "learning_rate": 3.137647518823817e-06, "loss": 0.5978, "step": 9989 }, { "epoch": 0.63, "grad_norm": 0.9412940144538879, "learning_rate": 3.136695390723583e-06, "loss": 0.6437, "step": 9990 }, { "epoch": 0.63, "grad_norm": 0.9255321025848389, "learning_rate": 3.135743341080624e-06, "loss": 0.59, "step": 9991 }, { "epoch": 0.63, "grad_norm": 0.9065369367599487, "learning_rate": 3.1347913699350286e-06, "loss": 0.5753, "step": 9992 }, { "epoch": 0.63, "grad_norm": 0.8333830237388611, "learning_rate": 3.1338394773268805e-06, "loss": 0.5217, "step": 9993 }, { "epoch": 0.63, "grad_norm": 0.9209916591644287, "learning_rate": 3.132887663296259e-06, "loss": 0.6099, "step": 9994 }, { "epoch": 0.63, "grad_norm": 0.9044961929321289, "learning_rate": 3.131935927883242e-06, "loss": 0.566, "step": 9995 }, { "epoch": 0.63, "grad_norm": 0.9058372378349304, "learning_rate": 3.1309842711279066e-06, "loss": 0.5774, "step": 9996 }, { "epoch": 0.63, "grad_norm": 0.8610040545463562, "learning_rate": 3.130032693070322e-06, "loss": 0.5434, "step": 9997 }, { "epoch": 0.63, "grad_norm": 0.894743800163269, "learning_rate": 3.129081193750554e-06, "loss": 0.5637, "step": 9998 }, { "epoch": 0.63, "grad_norm": 0.9117133021354675, "learning_rate": 3.1281297732086666e-06, "loss": 0.5844, "step": 9999 }, { "epoch": 0.63, "grad_norm": 0.9719625115394592, "learning_rate": 3.1271784314847266e-06, "loss": 0.6132, "step": 10000 }, { "epoch": 0.63, "grad_norm": 0.8312113881111145, "learning_rate": 3.126227168618786e-06, "loss": 0.5298, "step": 10001 }, { "epoch": 0.63, "grad_norm": 0.85428786277771, "learning_rate": 3.1252759846509013e-06, "loss": 0.5784, "step": 10002 }, { "epoch": 0.63, "grad_norm": 0.8149659633636475, "learning_rate": 3.1243248796211234e-06, "loss": 0.5674, "step": 10003 }, { "epoch": 0.63, "grad_norm": 0.8940887451171875, "learning_rate": 3.123373853569498e-06, "loss": 0.5869, "step": 10004 }, { "epoch": 0.63, "grad_norm": 0.9396780133247375, "learning_rate": 3.1224229065360734e-06, "loss": 0.5875, "step": 10005 }, { "epoch": 0.63, "grad_norm": 0.8760607838630676, "learning_rate": 3.1214720385608875e-06, "loss": 0.6323, "step": 10006 }, { "epoch": 0.63, "grad_norm": 0.9258213043212891, "learning_rate": 3.120521249683981e-06, "loss": 0.6057, "step": 10007 }, { "epoch": 0.63, "grad_norm": 0.9452094435691833, "learning_rate": 3.1195705399453833e-06, "loss": 0.547, "step": 10008 }, { "epoch": 0.63, "grad_norm": 0.8690341711044312, "learning_rate": 3.118619909385131e-06, "loss": 0.6319, "step": 10009 }, { "epoch": 0.63, "grad_norm": 0.8699579238891602, "learning_rate": 3.117669358043248e-06, "loss": 0.568, "step": 10010 }, { "epoch": 0.63, "grad_norm": 0.9703599214553833, "learning_rate": 3.116718885959762e-06, "loss": 0.592, "step": 10011 }, { "epoch": 0.63, "grad_norm": 0.8900342583656311, "learning_rate": 3.1157684931746902e-06, "loss": 0.5536, "step": 10012 }, { "epoch": 0.63, "grad_norm": 0.8778373003005981, "learning_rate": 3.1148181797280543e-06, "loss": 0.6032, "step": 10013 }, { "epoch": 0.63, "grad_norm": 0.8625448942184448, "learning_rate": 3.1138679456598654e-06, "loss": 0.5673, "step": 10014 }, { "epoch": 0.63, "grad_norm": 0.8882395029067993, "learning_rate": 3.112917791010137e-06, "loss": 0.6069, "step": 10015 }, { "epoch": 0.63, "grad_norm": 0.8981207013130188, "learning_rate": 3.111967715818876e-06, "loss": 0.579, "step": 10016 }, { "epoch": 0.63, "grad_norm": 0.891898512840271, "learning_rate": 3.1110177201260845e-06, "loss": 0.5742, "step": 10017 }, { "epoch": 0.63, "grad_norm": 0.9222726225852966, "learning_rate": 3.1100678039717665e-06, "loss": 0.553, "step": 10018 }, { "epoch": 0.63, "grad_norm": 0.8938819766044617, "learning_rate": 3.1091179673959194e-06, "loss": 0.5761, "step": 10019 }, { "epoch": 0.63, "grad_norm": 0.9439987540245056, "learning_rate": 3.108168210438536e-06, "loss": 0.6038, "step": 10020 }, { "epoch": 0.63, "grad_norm": 0.9559965133666992, "learning_rate": 3.1072185331396083e-06, "loss": 0.5841, "step": 10021 }, { "epoch": 0.63, "grad_norm": 0.912056565284729, "learning_rate": 3.106268935539123e-06, "loss": 0.6017, "step": 10022 }, { "epoch": 0.64, "grad_norm": 0.9289038181304932, "learning_rate": 3.1053194176770662e-06, "loss": 0.6042, "step": 10023 }, { "epoch": 0.64, "grad_norm": 0.864149808883667, "learning_rate": 3.1043699795934172e-06, "loss": 0.549, "step": 10024 }, { "epoch": 0.64, "grad_norm": 0.9083261489868164, "learning_rate": 3.1034206213281536e-06, "loss": 0.6446, "step": 10025 }, { "epoch": 0.64, "grad_norm": 0.8593977689743042, "learning_rate": 3.10247134292125e-06, "loss": 0.5199, "step": 10026 }, { "epoch": 0.64, "grad_norm": 0.9130897521972656, "learning_rate": 3.1015221444126776e-06, "loss": 0.5645, "step": 10027 }, { "epoch": 0.64, "grad_norm": 0.8100042939186096, "learning_rate": 3.1005730258424025e-06, "loss": 0.5811, "step": 10028 }, { "epoch": 0.64, "grad_norm": 0.7985337376594543, "learning_rate": 3.099623987250391e-06, "loss": 0.5514, "step": 10029 }, { "epoch": 0.64, "grad_norm": 0.8951230049133301, "learning_rate": 3.098675028676601e-06, "loss": 0.6081, "step": 10030 }, { "epoch": 0.64, "grad_norm": 0.844353973865509, "learning_rate": 3.0977261501609924e-06, "loss": 0.5776, "step": 10031 }, { "epoch": 0.64, "grad_norm": 0.9215499758720398, "learning_rate": 3.0967773517435173e-06, "loss": 0.5944, "step": 10032 }, { "epoch": 0.64, "grad_norm": 0.8491506576538086, "learning_rate": 3.0958286334641284e-06, "loss": 0.5225, "step": 10033 }, { "epoch": 0.64, "grad_norm": 0.9156690835952759, "learning_rate": 3.0948799953627696e-06, "loss": 0.5782, "step": 10034 }, { "epoch": 0.64, "grad_norm": 0.8732212781906128, "learning_rate": 3.093931437479388e-06, "loss": 0.5373, "step": 10035 }, { "epoch": 0.64, "grad_norm": 0.8561059236526489, "learning_rate": 3.092982959853923e-06, "loss": 0.5791, "step": 10036 }, { "epoch": 0.64, "grad_norm": 0.9923532605171204, "learning_rate": 3.092034562526312e-06, "loss": 0.619, "step": 10037 }, { "epoch": 0.64, "grad_norm": 0.8949557542800903, "learning_rate": 3.0910862455364864e-06, "loss": 0.5534, "step": 10038 }, { "epoch": 0.64, "grad_norm": 0.9672521948814392, "learning_rate": 3.09013800892438e-06, "loss": 0.6306, "step": 10039 }, { "epoch": 0.64, "grad_norm": 0.8525355458259583, "learning_rate": 3.0891898527299167e-06, "loss": 0.5502, "step": 10040 }, { "epoch": 0.64, "grad_norm": 0.8738742470741272, "learning_rate": 3.088241776993024e-06, "loss": 0.5939, "step": 10041 }, { "epoch": 0.64, "grad_norm": 0.9154573082923889, "learning_rate": 3.0872937817536165e-06, "loss": 0.6274, "step": 10042 }, { "epoch": 0.64, "grad_norm": 0.8766052722930908, "learning_rate": 3.0863458670516157e-06, "loss": 0.5632, "step": 10043 }, { "epoch": 0.64, "grad_norm": 0.9145663976669312, "learning_rate": 3.085398032926933e-06, "loss": 0.5808, "step": 10044 }, { "epoch": 0.64, "grad_norm": 0.9256823062896729, "learning_rate": 3.0844502794194795e-06, "loss": 0.6116, "step": 10045 }, { "epoch": 0.64, "grad_norm": 0.8836879730224609, "learning_rate": 3.083502606569159e-06, "loss": 0.5633, "step": 10046 }, { "epoch": 0.64, "grad_norm": 0.8442484736442566, "learning_rate": 3.0825550144158788e-06, "loss": 0.5449, "step": 10047 }, { "epoch": 0.64, "grad_norm": 0.8978825807571411, "learning_rate": 3.081607502999536e-06, "loss": 0.5596, "step": 10048 }, { "epoch": 0.64, "grad_norm": 0.8920104503631592, "learning_rate": 3.0806600723600275e-06, "loss": 0.5789, "step": 10049 }, { "epoch": 0.64, "grad_norm": 0.8824292421340942, "learning_rate": 3.0797127225372477e-06, "loss": 0.5769, "step": 10050 }, { "epoch": 0.64, "grad_norm": 0.8434662818908691, "learning_rate": 3.078765453571082e-06, "loss": 0.5399, "step": 10051 }, { "epoch": 0.64, "grad_norm": 0.8964858055114746, "learning_rate": 3.077818265501421e-06, "loss": 0.5802, "step": 10052 }, { "epoch": 0.64, "grad_norm": 0.8828626871109009, "learning_rate": 3.0768711583681475e-06, "loss": 0.5715, "step": 10053 }, { "epoch": 0.64, "grad_norm": 0.8482038974761963, "learning_rate": 3.075924132211139e-06, "loss": 0.5171, "step": 10054 }, { "epoch": 0.64, "grad_norm": 0.8848569989204407, "learning_rate": 3.07497718707027e-06, "loss": 0.5214, "step": 10055 }, { "epoch": 0.64, "grad_norm": 0.9475182294845581, "learning_rate": 3.074030322985416e-06, "loss": 0.5741, "step": 10056 }, { "epoch": 0.64, "grad_norm": 0.8911900520324707, "learning_rate": 3.073083539996446e-06, "loss": 0.5845, "step": 10057 }, { "epoch": 0.64, "grad_norm": 0.8566701412200928, "learning_rate": 3.072136838143225e-06, "loss": 0.5495, "step": 10058 }, { "epoch": 0.64, "grad_norm": 0.8495940566062927, "learning_rate": 3.0711902174656126e-06, "loss": 0.5523, "step": 10059 }, { "epoch": 0.64, "grad_norm": 0.9519007802009583, "learning_rate": 3.070243678003472e-06, "loss": 0.6046, "step": 10060 }, { "epoch": 0.64, "grad_norm": 0.8953449726104736, "learning_rate": 3.069297219796658e-06, "loss": 0.6238, "step": 10061 }, { "epoch": 0.64, "grad_norm": 0.8640437126159668, "learning_rate": 3.068350842885022e-06, "loss": 0.5685, "step": 10062 }, { "epoch": 0.64, "grad_norm": 0.9119696021080017, "learning_rate": 3.0674045473084103e-06, "loss": 0.5888, "step": 10063 }, { "epoch": 0.64, "grad_norm": 0.8524396419525146, "learning_rate": 3.0664583331066695e-06, "loss": 0.521, "step": 10064 }, { "epoch": 0.64, "grad_norm": 0.896168053150177, "learning_rate": 3.0655122003196443e-06, "loss": 0.5385, "step": 10065 }, { "epoch": 0.64, "grad_norm": 0.9223374724388123, "learning_rate": 3.06456614898717e-06, "loss": 0.6304, "step": 10066 }, { "epoch": 0.64, "grad_norm": 0.877920389175415, "learning_rate": 3.0636201791490823e-06, "loss": 0.6141, "step": 10067 }, { "epoch": 0.64, "grad_norm": 0.8867497444152832, "learning_rate": 3.062674290845211e-06, "loss": 0.54, "step": 10068 }, { "epoch": 0.64, "grad_norm": 0.8688225746154785, "learning_rate": 3.061728484115388e-06, "loss": 0.5374, "step": 10069 }, { "epoch": 0.64, "grad_norm": 0.8275909423828125, "learning_rate": 3.0607827589994353e-06, "loss": 0.5417, "step": 10070 }, { "epoch": 0.64, "grad_norm": 0.8562379479408264, "learning_rate": 3.0598371155371747e-06, "loss": 0.5275, "step": 10071 }, { "epoch": 0.64, "grad_norm": 0.8817172646522522, "learning_rate": 3.058891553768422e-06, "loss": 0.5717, "step": 10072 }, { "epoch": 0.64, "grad_norm": 0.9288895130157471, "learning_rate": 3.0579460737329958e-06, "loss": 0.61, "step": 10073 }, { "epoch": 0.64, "grad_norm": 0.9470510482788086, "learning_rate": 3.0570006754707044e-06, "loss": 0.6149, "step": 10074 }, { "epoch": 0.64, "grad_norm": 0.9388991594314575, "learning_rate": 3.056055359021354e-06, "loss": 0.5868, "step": 10075 }, { "epoch": 0.64, "grad_norm": 0.8381592035293579, "learning_rate": 3.0551101244247494e-06, "loss": 0.5587, "step": 10076 }, { "epoch": 0.64, "grad_norm": 0.8484103679656982, "learning_rate": 3.0541649717206933e-06, "loss": 0.544, "step": 10077 }, { "epoch": 0.64, "grad_norm": 0.9049462080001831, "learning_rate": 3.0532199009489814e-06, "loss": 0.5781, "step": 10078 }, { "epoch": 0.64, "grad_norm": 0.9403258562088013, "learning_rate": 3.052274912149406e-06, "loss": 0.5796, "step": 10079 }, { "epoch": 0.64, "grad_norm": 0.8975145220756531, "learning_rate": 3.0513300053617595e-06, "loss": 0.564, "step": 10080 }, { "epoch": 0.64, "grad_norm": 0.9125024676322937, "learning_rate": 3.0503851806258257e-06, "loss": 0.5705, "step": 10081 }, { "epoch": 0.64, "grad_norm": 0.8600341081619263, "learning_rate": 3.0494404379813914e-06, "loss": 0.6273, "step": 10082 }, { "epoch": 0.64, "grad_norm": 0.8747133016586304, "learning_rate": 3.048495777468234e-06, "loss": 0.6381, "step": 10083 }, { "epoch": 0.64, "grad_norm": 0.9442613124847412, "learning_rate": 3.047551199126131e-06, "loss": 0.5865, "step": 10084 }, { "epoch": 0.64, "grad_norm": 0.8632836937904358, "learning_rate": 3.046606702994854e-06, "loss": 0.6283, "step": 10085 }, { "epoch": 0.64, "grad_norm": 0.8757123351097107, "learning_rate": 3.0456622891141748e-06, "loss": 0.5375, "step": 10086 }, { "epoch": 0.64, "grad_norm": 0.8973109126091003, "learning_rate": 3.0447179575238565e-06, "loss": 0.6009, "step": 10087 }, { "epoch": 0.64, "grad_norm": 0.9889295101165771, "learning_rate": 3.0437737082636647e-06, "loss": 0.6492, "step": 10088 }, { "epoch": 0.64, "grad_norm": 0.8877894282341003, "learning_rate": 3.0428295413733546e-06, "loss": 0.4994, "step": 10089 }, { "epoch": 0.64, "grad_norm": 0.9340519309043884, "learning_rate": 3.0418854568926866e-06, "loss": 0.5923, "step": 10090 }, { "epoch": 0.64, "grad_norm": 0.8820253610610962, "learning_rate": 3.0409414548614086e-06, "loss": 0.5531, "step": 10091 }, { "epoch": 0.64, "grad_norm": 0.8349282145500183, "learning_rate": 3.039997535319272e-06, "loss": 0.572, "step": 10092 }, { "epoch": 0.64, "grad_norm": 0.8930419087409973, "learning_rate": 3.039053698306019e-06, "loss": 0.5305, "step": 10093 }, { "epoch": 0.64, "grad_norm": 0.8789426684379578, "learning_rate": 3.0381099438613948e-06, "loss": 0.5386, "step": 10094 }, { "epoch": 0.64, "grad_norm": 0.8947481513023376, "learning_rate": 3.037166272025135e-06, "loss": 0.5672, "step": 10095 }, { "epoch": 0.64, "grad_norm": 0.8884199261665344, "learning_rate": 3.0362226828369767e-06, "loss": 0.6096, "step": 10096 }, { "epoch": 0.64, "grad_norm": 0.885449230670929, "learning_rate": 3.0352791763366484e-06, "loss": 0.5831, "step": 10097 }, { "epoch": 0.64, "grad_norm": 0.836551308631897, "learning_rate": 3.0343357525638787e-06, "loss": 0.5132, "step": 10098 }, { "epoch": 0.64, "grad_norm": 0.9031782150268555, "learning_rate": 3.0333924115583935e-06, "loss": 0.5898, "step": 10099 }, { "epoch": 0.64, "grad_norm": 0.9216272234916687, "learning_rate": 3.032449153359913e-06, "loss": 0.5271, "step": 10100 }, { "epoch": 0.64, "grad_norm": 0.8929412961006165, "learning_rate": 3.031505978008153e-06, "loss": 0.5852, "step": 10101 }, { "epoch": 0.64, "grad_norm": 0.884545087814331, "learning_rate": 3.030562885542827e-06, "loss": 0.5595, "step": 10102 }, { "epoch": 0.64, "grad_norm": 0.881131649017334, "learning_rate": 3.0296198760036493e-06, "loss": 0.5557, "step": 10103 }, { "epoch": 0.64, "grad_norm": 0.9663098454475403, "learning_rate": 3.0286769494303237e-06, "loss": 0.5894, "step": 10104 }, { "epoch": 0.64, "grad_norm": 0.936959445476532, "learning_rate": 3.0277341058625537e-06, "loss": 0.5987, "step": 10105 }, { "epoch": 0.64, "grad_norm": 0.8869735598564148, "learning_rate": 3.026791345340038e-06, "loss": 0.5943, "step": 10106 }, { "epoch": 0.64, "grad_norm": 0.8177929520606995, "learning_rate": 3.0258486679024767e-06, "loss": 0.528, "step": 10107 }, { "epoch": 0.64, "grad_norm": 0.9447188377380371, "learning_rate": 3.0249060735895603e-06, "loss": 0.5911, "step": 10108 }, { "epoch": 0.64, "grad_norm": 0.9261248707771301, "learning_rate": 3.0239635624409767e-06, "loss": 0.5625, "step": 10109 }, { "epoch": 0.64, "grad_norm": 0.8987361788749695, "learning_rate": 3.0230211344964154e-06, "loss": 0.5519, "step": 10110 }, { "epoch": 0.64, "grad_norm": 0.8797249794006348, "learning_rate": 3.0220787897955544e-06, "loss": 0.5839, "step": 10111 }, { "epoch": 0.64, "grad_norm": 0.9020108580589294, "learning_rate": 3.021136528378077e-06, "loss": 0.5724, "step": 10112 }, { "epoch": 0.64, "grad_norm": 1.0054893493652344, "learning_rate": 3.020194350283655e-06, "loss": 0.6108, "step": 10113 }, { "epoch": 0.64, "grad_norm": 0.9611765146255493, "learning_rate": 3.019252255551963e-06, "loss": 0.6548, "step": 10114 }, { "epoch": 0.64, "grad_norm": 0.8305823802947998, "learning_rate": 3.0183102442226653e-06, "loss": 0.5477, "step": 10115 }, { "epoch": 0.64, "grad_norm": 0.8949651718139648, "learning_rate": 3.017368316335432e-06, "loss": 0.576, "step": 10116 }, { "epoch": 0.64, "grad_norm": 0.919265627861023, "learning_rate": 3.0164264719299207e-06, "loss": 0.585, "step": 10117 }, { "epoch": 0.64, "grad_norm": 0.8549671173095703, "learning_rate": 3.0154847110457918e-06, "loss": 0.5094, "step": 10118 }, { "epoch": 0.64, "grad_norm": 0.9418630003929138, "learning_rate": 3.0145430337226955e-06, "loss": 0.5906, "step": 10119 }, { "epoch": 0.64, "grad_norm": 0.9273284673690796, "learning_rate": 3.013601440000288e-06, "loss": 0.5946, "step": 10120 }, { "epoch": 0.64, "grad_norm": 0.9151654243469238, "learning_rate": 3.0126599299182114e-06, "loss": 0.5878, "step": 10121 }, { "epoch": 0.64, "grad_norm": 0.8435792922973633, "learning_rate": 3.0117185035161135e-06, "loss": 0.5951, "step": 10122 }, { "epoch": 0.64, "grad_norm": 0.8737865090370178, "learning_rate": 3.01077716083363e-06, "loss": 0.5739, "step": 10123 }, { "epoch": 0.64, "grad_norm": 0.8778201937675476, "learning_rate": 3.009835901910403e-06, "loss": 0.5487, "step": 10124 }, { "epoch": 0.64, "grad_norm": 0.862269937992096, "learning_rate": 3.008894726786062e-06, "loss": 0.5627, "step": 10125 }, { "epoch": 0.64, "grad_norm": 0.8969505429267883, "learning_rate": 3.007953635500238e-06, "loss": 0.5473, "step": 10126 }, { "epoch": 0.64, "grad_norm": 0.9201652407646179, "learning_rate": 3.0070126280925564e-06, "loss": 0.6661, "step": 10127 }, { "epoch": 0.64, "grad_norm": 0.882611870765686, "learning_rate": 3.0060717046026387e-06, "loss": 0.6069, "step": 10128 }, { "epoch": 0.64, "grad_norm": 0.9005841016769409, "learning_rate": 3.0051308650701054e-06, "loss": 0.6415, "step": 10129 }, { "epoch": 0.64, "grad_norm": 0.8846337795257568, "learning_rate": 3.004190109534573e-06, "loss": 0.5779, "step": 10130 }, { "epoch": 0.64, "grad_norm": 0.9538823366165161, "learning_rate": 3.0032494380356523e-06, "loss": 0.5801, "step": 10131 }, { "epoch": 0.64, "grad_norm": 0.8762175440788269, "learning_rate": 3.002308850612949e-06, "loss": 0.5717, "step": 10132 }, { "epoch": 0.64, "grad_norm": 0.8317214846611023, "learning_rate": 3.001368347306073e-06, "loss": 0.5577, "step": 10133 }, { "epoch": 0.64, "grad_norm": 0.8527503609657288, "learning_rate": 3.0004279281546235e-06, "loss": 0.5818, "step": 10134 }, { "epoch": 0.64, "grad_norm": 0.861371636390686, "learning_rate": 2.999487593198197e-06, "loss": 0.5475, "step": 10135 }, { "epoch": 0.64, "grad_norm": 0.8559701442718506, "learning_rate": 2.9985473424763876e-06, "loss": 0.5565, "step": 10136 }, { "epoch": 0.64, "grad_norm": 0.9402846693992615, "learning_rate": 2.9976071760287874e-06, "loss": 0.6107, "step": 10137 }, { "epoch": 0.64, "grad_norm": 0.8749223351478577, "learning_rate": 2.9966670938949847e-06, "loss": 0.5484, "step": 10138 }, { "epoch": 0.64, "grad_norm": 0.8214702606201172, "learning_rate": 2.995727096114561e-06, "loss": 0.5719, "step": 10139 }, { "epoch": 0.64, "grad_norm": 0.8834431767463684, "learning_rate": 2.9947871827270956e-06, "loss": 0.6228, "step": 10140 }, { "epoch": 0.64, "grad_norm": 0.9178330302238464, "learning_rate": 2.993847353772168e-06, "loss": 0.5815, "step": 10141 }, { "epoch": 0.64, "grad_norm": 0.8592966198921204, "learning_rate": 2.9929076092893496e-06, "loss": 0.5508, "step": 10142 }, { "epoch": 0.64, "grad_norm": 0.8537743091583252, "learning_rate": 2.991967949318209e-06, "loss": 0.6015, "step": 10143 }, { "epoch": 0.64, "grad_norm": 0.8182849884033203, "learning_rate": 2.9910283738983125e-06, "loss": 0.5648, "step": 10144 }, { "epoch": 0.64, "grad_norm": 0.9029396772384644, "learning_rate": 2.9900888830692208e-06, "loss": 0.6084, "step": 10145 }, { "epoch": 0.64, "grad_norm": 0.8994178175926208, "learning_rate": 2.9891494768704964e-06, "loss": 0.6156, "step": 10146 }, { "epoch": 0.64, "grad_norm": 0.8991573452949524, "learning_rate": 2.9882101553416932e-06, "loss": 0.5458, "step": 10147 }, { "epoch": 0.64, "grad_norm": 0.89846271276474, "learning_rate": 2.9872709185223596e-06, "loss": 0.6052, "step": 10148 }, { "epoch": 0.64, "grad_norm": 0.8613349199295044, "learning_rate": 2.9863317664520453e-06, "loss": 0.5582, "step": 10149 }, { "epoch": 0.64, "grad_norm": 0.9185076951980591, "learning_rate": 2.9853926991702974e-06, "loss": 0.6023, "step": 10150 }, { "epoch": 0.64, "grad_norm": 0.9413586258888245, "learning_rate": 2.984453716716655e-06, "loss": 0.5681, "step": 10151 }, { "epoch": 0.64, "grad_norm": 0.843997061252594, "learning_rate": 2.9835148191306535e-06, "loss": 0.5453, "step": 10152 }, { "epoch": 0.64, "grad_norm": 0.8782387375831604, "learning_rate": 2.9825760064518273e-06, "loss": 0.5996, "step": 10153 }, { "epoch": 0.64, "grad_norm": 0.7524551749229431, "learning_rate": 2.981637278719709e-06, "loss": 0.5087, "step": 10154 }, { "epoch": 0.64, "grad_norm": 0.8854588270187378, "learning_rate": 2.9806986359738244e-06, "loss": 0.5919, "step": 10155 }, { "epoch": 0.64, "grad_norm": 0.8053493499755859, "learning_rate": 2.979760078253694e-06, "loss": 0.5397, "step": 10156 }, { "epoch": 0.64, "grad_norm": 0.9677163362503052, "learning_rate": 2.9788216055988397e-06, "loss": 0.6133, "step": 10157 }, { "epoch": 0.64, "grad_norm": 0.8523488640785217, "learning_rate": 2.977883218048775e-06, "loss": 0.5826, "step": 10158 }, { "epoch": 0.64, "grad_norm": 0.9211286902427673, "learning_rate": 2.9769449156430147e-06, "loss": 0.6244, "step": 10159 }, { "epoch": 0.64, "grad_norm": 0.9199965596199036, "learning_rate": 2.9760066984210655e-06, "loss": 0.6112, "step": 10160 }, { "epoch": 0.64, "grad_norm": 0.8444145321846008, "learning_rate": 2.975068566422434e-06, "loss": 0.5704, "step": 10161 }, { "epoch": 0.64, "grad_norm": 0.907306969165802, "learning_rate": 2.97413051968662e-06, "loss": 0.6156, "step": 10162 }, { "epoch": 0.64, "grad_norm": 0.8991623520851135, "learning_rate": 2.9731925582531227e-06, "loss": 0.5905, "step": 10163 }, { "epoch": 0.64, "grad_norm": 0.8663104772567749, "learning_rate": 2.9722546821614373e-06, "loss": 0.5704, "step": 10164 }, { "epoch": 0.64, "grad_norm": 0.8777760863304138, "learning_rate": 2.9713168914510533e-06, "loss": 0.5416, "step": 10165 }, { "epoch": 0.64, "grad_norm": 0.8857688307762146, "learning_rate": 2.970379186161455e-06, "loss": 0.5669, "step": 10166 }, { "epoch": 0.64, "grad_norm": 0.8899209499359131, "learning_rate": 2.96944156633213e-06, "loss": 0.6229, "step": 10167 }, { "epoch": 0.64, "grad_norm": 0.8441648483276367, "learning_rate": 2.9685040320025583e-06, "loss": 0.5529, "step": 10168 }, { "epoch": 0.64, "grad_norm": 0.8758301138877869, "learning_rate": 2.9675665832122146e-06, "loss": 0.6021, "step": 10169 }, { "epoch": 0.64, "grad_norm": 0.8592897057533264, "learning_rate": 2.966629220000569e-06, "loss": 0.5656, "step": 10170 }, { "epoch": 0.64, "grad_norm": 0.8968542814254761, "learning_rate": 2.965691942407095e-06, "loss": 0.5639, "step": 10171 }, { "epoch": 0.64, "grad_norm": 0.8699895143508911, "learning_rate": 2.9647547504712577e-06, "loss": 0.6159, "step": 10172 }, { "epoch": 0.64, "grad_norm": 0.8662521243095398, "learning_rate": 2.9638176442325173e-06, "loss": 0.5801, "step": 10173 }, { "epoch": 0.64, "grad_norm": 0.8635749220848083, "learning_rate": 2.962880623730332e-06, "loss": 0.5819, "step": 10174 }, { "epoch": 0.64, "grad_norm": 0.9355505108833313, "learning_rate": 2.9619436890041555e-06, "loss": 0.5975, "step": 10175 }, { "epoch": 0.64, "grad_norm": 0.9172835350036621, "learning_rate": 2.961006840093442e-06, "loss": 0.5504, "step": 10176 }, { "epoch": 0.64, "grad_norm": 0.814353346824646, "learning_rate": 2.9600700770376384e-06, "loss": 0.551, "step": 10177 }, { "epoch": 0.64, "grad_norm": 0.8739163875579834, "learning_rate": 2.959133399876186e-06, "loss": 0.5662, "step": 10178 }, { "epoch": 0.64, "grad_norm": 0.8901175856590271, "learning_rate": 2.958196808648525e-06, "loss": 0.6053, "step": 10179 }, { "epoch": 0.64, "grad_norm": 0.9073649644851685, "learning_rate": 2.957260303394096e-06, "loss": 0.5715, "step": 10180 }, { "epoch": 0.65, "grad_norm": 0.8607237935066223, "learning_rate": 2.9563238841523293e-06, "loss": 0.5847, "step": 10181 }, { "epoch": 0.65, "grad_norm": 0.8739342093467712, "learning_rate": 2.955387550962654e-06, "loss": 0.5199, "step": 10182 }, { "epoch": 0.65, "grad_norm": 0.8706129193305969, "learning_rate": 2.954451303864494e-06, "loss": 0.61, "step": 10183 }, { "epoch": 0.65, "grad_norm": 0.9067039489746094, "learning_rate": 2.9535151428972762e-06, "loss": 0.5869, "step": 10184 }, { "epoch": 0.65, "grad_norm": 0.856227695941925, "learning_rate": 2.9525790681004172e-06, "loss": 0.5495, "step": 10185 }, { "epoch": 0.65, "grad_norm": 0.8174280524253845, "learning_rate": 2.9516430795133294e-06, "loss": 0.5439, "step": 10186 }, { "epoch": 0.65, "grad_norm": 0.9031091332435608, "learning_rate": 2.950707177175427e-06, "loss": 0.5668, "step": 10187 }, { "epoch": 0.65, "grad_norm": 0.8566731810569763, "learning_rate": 2.9497713611261146e-06, "loss": 0.5512, "step": 10188 }, { "epoch": 0.65, "grad_norm": 0.9484649300575256, "learning_rate": 2.9488356314047994e-06, "loss": 0.6265, "step": 10189 }, { "epoch": 0.65, "grad_norm": 0.8249022364616394, "learning_rate": 2.94789998805088e-06, "loss": 0.604, "step": 10190 }, { "epoch": 0.65, "grad_norm": 0.880988597869873, "learning_rate": 2.9469644311037545e-06, "loss": 0.6227, "step": 10191 }, { "epoch": 0.65, "grad_norm": 0.9004330635070801, "learning_rate": 2.946028960602812e-06, "loss": 0.58, "step": 10192 }, { "epoch": 0.65, "grad_norm": 0.9128255844116211, "learning_rate": 2.9450935765874474e-06, "loss": 0.5868, "step": 10193 }, { "epoch": 0.65, "grad_norm": 0.9160966873168945, "learning_rate": 2.9441582790970425e-06, "loss": 0.6127, "step": 10194 }, { "epoch": 0.65, "grad_norm": 0.9519477486610413, "learning_rate": 2.9432230681709815e-06, "loss": 0.6271, "step": 10195 }, { "epoch": 0.65, "grad_norm": 0.8393691778182983, "learning_rate": 2.942287943848641e-06, "loss": 0.552, "step": 10196 }, { "epoch": 0.65, "grad_norm": 0.9617919921875, "learning_rate": 2.941352906169398e-06, "loss": 0.6222, "step": 10197 }, { "epoch": 0.65, "grad_norm": 0.8588807582855225, "learning_rate": 2.9404179551726214e-06, "loss": 0.5407, "step": 10198 }, { "epoch": 0.65, "grad_norm": 0.9286124110221863, "learning_rate": 2.939483090897681e-06, "loss": 0.6136, "step": 10199 }, { "epoch": 0.65, "grad_norm": 0.9325358867645264, "learning_rate": 2.9385483133839386e-06, "loss": 0.5356, "step": 10200 }, { "epoch": 0.65, "grad_norm": 0.9960110187530518, "learning_rate": 2.937613622670756e-06, "loss": 0.6458, "step": 10201 }, { "epoch": 0.65, "grad_norm": 0.8421880006790161, "learning_rate": 2.9366790187974897e-06, "loss": 0.5576, "step": 10202 }, { "epoch": 0.65, "grad_norm": 0.8421469330787659, "learning_rate": 2.9357445018034926e-06, "loss": 0.5855, "step": 10203 }, { "epoch": 0.65, "grad_norm": 0.8795361518859863, "learning_rate": 2.934810071728114e-06, "loss": 0.5877, "step": 10204 }, { "epoch": 0.65, "grad_norm": 0.9030759930610657, "learning_rate": 2.9338757286106955e-06, "loss": 0.5878, "step": 10205 }, { "epoch": 0.65, "grad_norm": 0.8403552770614624, "learning_rate": 2.9329414724905845e-06, "loss": 0.5818, "step": 10206 }, { "epoch": 0.65, "grad_norm": 0.8796659708023071, "learning_rate": 2.9320073034071187e-06, "loss": 0.5653, "step": 10207 }, { "epoch": 0.65, "grad_norm": 0.8549631237983704, "learning_rate": 2.9310732213996305e-06, "loss": 0.5763, "step": 10208 }, { "epoch": 0.65, "grad_norm": 0.8520306944847107, "learning_rate": 2.9301392265074506e-06, "loss": 0.5754, "step": 10209 }, { "epoch": 0.65, "grad_norm": 0.8692139387130737, "learning_rate": 2.9292053187699075e-06, "loss": 0.5272, "step": 10210 }, { "epoch": 0.65, "grad_norm": 0.8986145257949829, "learning_rate": 2.9282714982263265e-06, "loss": 0.5251, "step": 10211 }, { "epoch": 0.65, "grad_norm": 0.9022727012634277, "learning_rate": 2.927337764916025e-06, "loss": 0.5878, "step": 10212 }, { "epoch": 0.65, "grad_norm": 0.8935984969139099, "learning_rate": 2.926404118878319e-06, "loss": 0.6037, "step": 10213 }, { "epoch": 0.65, "grad_norm": 0.8796955347061157, "learning_rate": 2.925470560152522e-06, "loss": 0.5815, "step": 10214 }, { "epoch": 0.65, "grad_norm": 0.8789433240890503, "learning_rate": 2.924537088777944e-06, "loss": 0.6031, "step": 10215 }, { "epoch": 0.65, "grad_norm": 0.8689199686050415, "learning_rate": 2.9236037047938894e-06, "loss": 0.5566, "step": 10216 }, { "epoch": 0.65, "grad_norm": 0.850175678730011, "learning_rate": 2.922670408239657e-06, "loss": 0.5817, "step": 10217 }, { "epoch": 0.65, "grad_norm": 0.8795483112335205, "learning_rate": 2.921737199154549e-06, "loss": 0.6259, "step": 10218 }, { "epoch": 0.65, "grad_norm": 0.8465956449508667, "learning_rate": 2.920804077577859e-06, "loss": 0.5467, "step": 10219 }, { "epoch": 0.65, "grad_norm": 0.8541370630264282, "learning_rate": 2.919871043548875e-06, "loss": 0.5509, "step": 10220 }, { "epoch": 0.65, "grad_norm": 0.8528336882591248, "learning_rate": 2.9189380971068864e-06, "loss": 0.5237, "step": 10221 }, { "epoch": 0.65, "grad_norm": 1.0272489786148071, "learning_rate": 2.918005238291172e-06, "loss": 0.5948, "step": 10222 }, { "epoch": 0.65, "grad_norm": 0.8642032742500305, "learning_rate": 2.9170724671410155e-06, "loss": 0.605, "step": 10223 }, { "epoch": 0.65, "grad_norm": 0.8577390313148499, "learning_rate": 2.916139783695694e-06, "loss": 0.5634, "step": 10224 }, { "epoch": 0.65, "grad_norm": 0.935626208782196, "learning_rate": 2.9152071879944743e-06, "loss": 0.5815, "step": 10225 }, { "epoch": 0.65, "grad_norm": 0.9437475800514221, "learning_rate": 2.914274680076628e-06, "loss": 0.6359, "step": 10226 }, { "epoch": 0.65, "grad_norm": 0.8789603114128113, "learning_rate": 2.913342259981419e-06, "loss": 0.6031, "step": 10227 }, { "epoch": 0.65, "grad_norm": 0.9114549160003662, "learning_rate": 2.9124099277481088e-06, "loss": 0.544, "step": 10228 }, { "epoch": 0.65, "grad_norm": 0.8359835743904114, "learning_rate": 2.9114776834159563e-06, "loss": 0.5686, "step": 10229 }, { "epoch": 0.65, "grad_norm": 0.9180512428283691, "learning_rate": 2.910545527024209e-06, "loss": 0.58, "step": 10230 }, { "epoch": 0.65, "grad_norm": 0.9041998386383057, "learning_rate": 2.9096134586121227e-06, "loss": 0.6303, "step": 10231 }, { "epoch": 0.65, "grad_norm": 0.8931963443756104, "learning_rate": 2.908681478218944e-06, "loss": 0.5667, "step": 10232 }, { "epoch": 0.65, "grad_norm": 0.8502830266952515, "learning_rate": 2.907749585883911e-06, "loss": 0.5689, "step": 10233 }, { "epoch": 0.65, "grad_norm": 0.8675402998924255, "learning_rate": 2.906817781646264e-06, "loss": 0.5585, "step": 10234 }, { "epoch": 0.65, "grad_norm": 0.9389364719390869, "learning_rate": 2.905886065545239e-06, "loss": 0.6007, "step": 10235 }, { "epoch": 0.65, "grad_norm": 0.8219680786132812, "learning_rate": 2.9049544376200674e-06, "loss": 0.5835, "step": 10236 }, { "epoch": 0.65, "grad_norm": 0.9516189098358154, "learning_rate": 2.9040228979099777e-06, "loss": 0.5689, "step": 10237 }, { "epoch": 0.65, "grad_norm": 0.8465138077735901, "learning_rate": 2.9030914464541904e-06, "loss": 0.5335, "step": 10238 }, { "epoch": 0.65, "grad_norm": 0.8579193949699402, "learning_rate": 2.902160083291926e-06, "loss": 0.5572, "step": 10239 }, { "epoch": 0.65, "grad_norm": 0.9451611042022705, "learning_rate": 2.9012288084624065e-06, "loss": 0.5445, "step": 10240 }, { "epoch": 0.65, "grad_norm": 0.8656702637672424, "learning_rate": 2.9002976220048383e-06, "loss": 0.5438, "step": 10241 }, { "epoch": 0.65, "grad_norm": 0.8685592412948608, "learning_rate": 2.899366523958434e-06, "loss": 0.6194, "step": 10242 }, { "epoch": 0.65, "grad_norm": 0.8590168356895447, "learning_rate": 2.898435514362397e-06, "loss": 0.5964, "step": 10243 }, { "epoch": 0.65, "grad_norm": 0.8453319668769836, "learning_rate": 2.89750459325593e-06, "loss": 0.5445, "step": 10244 }, { "epoch": 0.65, "grad_norm": 0.8947049379348755, "learning_rate": 2.896573760678232e-06, "loss": 0.6004, "step": 10245 }, { "epoch": 0.65, "grad_norm": 0.8810886144638062, "learning_rate": 2.8956430166684945e-06, "loss": 0.5597, "step": 10246 }, { "epoch": 0.65, "grad_norm": 0.9045408964157104, "learning_rate": 2.8947123612659068e-06, "loss": 0.5975, "step": 10247 }, { "epoch": 0.65, "grad_norm": 0.907370388507843, "learning_rate": 2.8937817945096614e-06, "loss": 0.546, "step": 10248 }, { "epoch": 0.65, "grad_norm": 0.929260790348053, "learning_rate": 2.8928513164389353e-06, "loss": 0.6313, "step": 10249 }, { "epoch": 0.65, "grad_norm": 0.8894972205162048, "learning_rate": 2.8919209270929106e-06, "loss": 0.6308, "step": 10250 }, { "epoch": 0.65, "grad_norm": 0.8753820061683655, "learning_rate": 2.8909906265107647e-06, "loss": 0.5576, "step": 10251 }, { "epoch": 0.65, "grad_norm": 0.9265826940536499, "learning_rate": 2.890060414731662e-06, "loss": 0.5626, "step": 10252 }, { "epoch": 0.65, "grad_norm": 0.9352290034294128, "learning_rate": 2.8891302917947794e-06, "loss": 0.628, "step": 10253 }, { "epoch": 0.65, "grad_norm": 0.9359737038612366, "learning_rate": 2.8882002577392752e-06, "loss": 0.6278, "step": 10254 }, { "epoch": 0.65, "grad_norm": 0.9087960124015808, "learning_rate": 2.8872703126043116e-06, "loss": 0.6675, "step": 10255 }, { "epoch": 0.65, "grad_norm": 0.9556131958961487, "learning_rate": 2.8863404564290455e-06, "loss": 0.5625, "step": 10256 }, { "epoch": 0.65, "grad_norm": 0.8998469710350037, "learning_rate": 2.88541068925263e-06, "loss": 0.6218, "step": 10257 }, { "epoch": 0.65, "grad_norm": 0.8599625825881958, "learning_rate": 2.8844810111142143e-06, "loss": 0.5521, "step": 10258 }, { "epoch": 0.65, "grad_norm": 0.8799909353256226, "learning_rate": 2.883551422052946e-06, "loss": 0.5713, "step": 10259 }, { "epoch": 0.65, "grad_norm": 0.864239513874054, "learning_rate": 2.8826219221079597e-06, "loss": 0.6036, "step": 10260 }, { "epoch": 0.65, "grad_norm": 0.8341729044914246, "learning_rate": 2.8816925113184034e-06, "loss": 0.5587, "step": 10261 }, { "epoch": 0.65, "grad_norm": 0.8841572403907776, "learning_rate": 2.8807631897234045e-06, "loss": 0.6003, "step": 10262 }, { "epoch": 0.65, "grad_norm": 0.9406521320343018, "learning_rate": 2.8798339573620953e-06, "loss": 0.6259, "step": 10263 }, { "epoch": 0.65, "grad_norm": 0.8605220913887024, "learning_rate": 2.8789048142736026e-06, "loss": 0.5397, "step": 10264 }, { "epoch": 0.65, "grad_norm": 0.9191677570343018, "learning_rate": 2.8779757604970495e-06, "loss": 0.5754, "step": 10265 }, { "epoch": 0.65, "grad_norm": 0.8478958010673523, "learning_rate": 2.877046796071554e-06, "loss": 0.5911, "step": 10266 }, { "epoch": 0.65, "grad_norm": 0.909317135810852, "learning_rate": 2.8761179210362365e-06, "loss": 0.5999, "step": 10267 }, { "epoch": 0.65, "grad_norm": 0.9130200743675232, "learning_rate": 2.8751891354302018e-06, "loss": 0.6098, "step": 10268 }, { "epoch": 0.65, "grad_norm": 0.8185581564903259, "learning_rate": 2.8742604392925587e-06, "loss": 0.5674, "step": 10269 }, { "epoch": 0.65, "grad_norm": 0.8762167692184448, "learning_rate": 2.8733318326624182e-06, "loss": 0.5917, "step": 10270 }, { "epoch": 0.65, "grad_norm": 0.852927029132843, "learning_rate": 2.8724033155788743e-06, "loss": 0.587, "step": 10271 }, { "epoch": 0.65, "grad_norm": 0.8949410915374756, "learning_rate": 2.871474888081025e-06, "loss": 0.6095, "step": 10272 }, { "epoch": 0.65, "grad_norm": 0.8751702904701233, "learning_rate": 2.870546550207964e-06, "loss": 0.5567, "step": 10273 }, { "epoch": 0.65, "grad_norm": 0.9688418507575989, "learning_rate": 2.8696183019987796e-06, "loss": 0.559, "step": 10274 }, { "epoch": 0.65, "grad_norm": 0.9164302945137024, "learning_rate": 2.868690143492559e-06, "loss": 0.6014, "step": 10275 }, { "epoch": 0.65, "grad_norm": 0.9164918065071106, "learning_rate": 2.8677620747283807e-06, "loss": 0.5787, "step": 10276 }, { "epoch": 0.65, "grad_norm": 0.8945170044898987, "learning_rate": 2.8668340957453224e-06, "loss": 0.5649, "step": 10277 }, { "epoch": 0.65, "grad_norm": 0.8914811015129089, "learning_rate": 2.865906206582463e-06, "loss": 0.5866, "step": 10278 }, { "epoch": 0.65, "grad_norm": 0.8111115097999573, "learning_rate": 2.8649784072788668e-06, "loss": 0.534, "step": 10279 }, { "epoch": 0.65, "grad_norm": 0.9475454092025757, "learning_rate": 2.8640506978736027e-06, "loss": 0.6491, "step": 10280 }, { "epoch": 0.65, "grad_norm": 0.9642074704170227, "learning_rate": 2.8631230784057362e-06, "loss": 0.6173, "step": 10281 }, { "epoch": 0.65, "grad_norm": 0.9231216907501221, "learning_rate": 2.862195548914318e-06, "loss": 0.6038, "step": 10282 }, { "epoch": 0.65, "grad_norm": 0.9643025994300842, "learning_rate": 2.8612681094384135e-06, "loss": 0.5809, "step": 10283 }, { "epoch": 0.65, "grad_norm": 0.8661615252494812, "learning_rate": 2.8603407600170664e-06, "loss": 0.5797, "step": 10284 }, { "epoch": 0.65, "grad_norm": 0.8539398908615112, "learning_rate": 2.8594135006893264e-06, "loss": 0.595, "step": 10285 }, { "epoch": 0.65, "grad_norm": 0.8886363506317139, "learning_rate": 2.858486331494238e-06, "loss": 0.5977, "step": 10286 }, { "epoch": 0.65, "grad_norm": 0.8894230127334595, "learning_rate": 2.8575592524708397e-06, "loss": 0.5999, "step": 10287 }, { "epoch": 0.65, "grad_norm": 0.8313820362091064, "learning_rate": 2.856632263658169e-06, "loss": 0.5703, "step": 10288 }, { "epoch": 0.65, "grad_norm": 0.8702353239059448, "learning_rate": 2.855705365095258e-06, "loss": 0.6152, "step": 10289 }, { "epoch": 0.65, "grad_norm": 0.8346042037010193, "learning_rate": 2.854778556821132e-06, "loss": 0.5277, "step": 10290 }, { "epoch": 0.65, "grad_norm": 0.9115665555000305, "learning_rate": 2.8538518388748214e-06, "loss": 0.595, "step": 10291 }, { "epoch": 0.65, "grad_norm": 0.9286834001541138, "learning_rate": 2.8529252112953434e-06, "loss": 0.6031, "step": 10292 }, { "epoch": 0.65, "grad_norm": 0.9434182047843933, "learning_rate": 2.8519986741217144e-06, "loss": 0.5983, "step": 10293 }, { "epoch": 0.65, "grad_norm": 0.8886797428131104, "learning_rate": 2.8510722273929486e-06, "loss": 0.595, "step": 10294 }, { "epoch": 0.65, "grad_norm": 0.868736207485199, "learning_rate": 2.8501458711480564e-06, "loss": 0.5769, "step": 10295 }, { "epoch": 0.65, "grad_norm": 0.8849626183509827, "learning_rate": 2.8492196054260424e-06, "loss": 0.6066, "step": 10296 }, { "epoch": 0.65, "grad_norm": 0.860435426235199, "learning_rate": 2.848293430265911e-06, "loss": 0.5701, "step": 10297 }, { "epoch": 0.65, "grad_norm": 0.9047563672065735, "learning_rate": 2.8473673457066564e-06, "loss": 0.5482, "step": 10298 }, { "epoch": 0.65, "grad_norm": 0.8450853824615479, "learning_rate": 2.8464413517872737e-06, "loss": 0.5659, "step": 10299 }, { "epoch": 0.65, "grad_norm": 0.8788303732872009, "learning_rate": 2.845515448546754e-06, "loss": 0.5781, "step": 10300 }, { "epoch": 0.65, "grad_norm": 0.8010481595993042, "learning_rate": 2.8445896360240845e-06, "loss": 0.5364, "step": 10301 }, { "epoch": 0.65, "grad_norm": 0.9223700761795044, "learning_rate": 2.843663914258249e-06, "loss": 0.5826, "step": 10302 }, { "epoch": 0.65, "grad_norm": 0.8434270024299622, "learning_rate": 2.8427382832882207e-06, "loss": 0.5676, "step": 10303 }, { "epoch": 0.65, "grad_norm": 0.9163960218429565, "learning_rate": 2.8418127431529807e-06, "loss": 0.5913, "step": 10304 }, { "epoch": 0.65, "grad_norm": 0.8485933542251587, "learning_rate": 2.8408872938915e-06, "loss": 0.5494, "step": 10305 }, { "epoch": 0.65, "grad_norm": 0.9408286213874817, "learning_rate": 2.8399619355427427e-06, "loss": 0.6158, "step": 10306 }, { "epoch": 0.65, "grad_norm": 0.8759029507637024, "learning_rate": 2.839036668145674e-06, "loss": 0.6119, "step": 10307 }, { "epoch": 0.65, "grad_norm": 0.8358346819877625, "learning_rate": 2.8381114917392538e-06, "loss": 0.5738, "step": 10308 }, { "epoch": 0.65, "grad_norm": 0.8680429458618164, "learning_rate": 2.8371864063624375e-06, "loss": 0.5452, "step": 10309 }, { "epoch": 0.65, "grad_norm": 0.9013274312019348, "learning_rate": 2.836261412054181e-06, "loss": 0.5305, "step": 10310 }, { "epoch": 0.65, "grad_norm": 0.8434852361679077, "learning_rate": 2.8353365088534247e-06, "loss": 0.6082, "step": 10311 }, { "epoch": 0.65, "grad_norm": 0.8728095889091492, "learning_rate": 2.8344116967991197e-06, "loss": 0.549, "step": 10312 }, { "epoch": 0.65, "grad_norm": 0.8872493505477905, "learning_rate": 2.8334869759302064e-06, "loss": 0.5777, "step": 10313 }, { "epoch": 0.65, "grad_norm": 0.8925797343254089, "learning_rate": 2.8325623462856176e-06, "loss": 0.5751, "step": 10314 }, { "epoch": 0.65, "grad_norm": 0.903728187084198, "learning_rate": 2.8316378079042887e-06, "loss": 0.6265, "step": 10315 }, { "epoch": 0.65, "grad_norm": 0.8824670910835266, "learning_rate": 2.8307133608251486e-06, "loss": 0.5769, "step": 10316 }, { "epoch": 0.65, "grad_norm": 0.8991369605064392, "learning_rate": 2.8297890050871222e-06, "loss": 0.5767, "step": 10317 }, { "epoch": 0.65, "grad_norm": 0.8974249362945557, "learning_rate": 2.8288647407291337e-06, "loss": 0.6057, "step": 10318 }, { "epoch": 0.65, "grad_norm": 0.859311580657959, "learning_rate": 2.827940567790096e-06, "loss": 0.5939, "step": 10319 }, { "epoch": 0.65, "grad_norm": 0.9485636949539185, "learning_rate": 2.8270164863089227e-06, "loss": 0.6018, "step": 10320 }, { "epoch": 0.65, "grad_norm": 0.8993692398071289, "learning_rate": 2.82609249632453e-06, "loss": 0.5957, "step": 10321 }, { "epoch": 0.65, "grad_norm": 0.9110742807388306, "learning_rate": 2.825168597875818e-06, "loss": 0.5788, "step": 10322 }, { "epoch": 0.65, "grad_norm": 0.9139736890792847, "learning_rate": 2.82424479100169e-06, "loss": 0.5432, "step": 10323 }, { "epoch": 0.65, "grad_norm": 0.909750759601593, "learning_rate": 2.8233210757410454e-06, "loss": 0.6235, "step": 10324 }, { "epoch": 0.65, "grad_norm": 0.8736597299575806, "learning_rate": 2.8223974521327787e-06, "loss": 0.5876, "step": 10325 }, { "epoch": 0.65, "grad_norm": 0.886572003364563, "learning_rate": 2.8214739202157794e-06, "loss": 0.581, "step": 10326 }, { "epoch": 0.65, "grad_norm": 0.8689284920692444, "learning_rate": 2.820550480028937e-06, "loss": 0.5974, "step": 10327 }, { "epoch": 0.65, "grad_norm": 0.9559029936790466, "learning_rate": 2.81962713161113e-06, "loss": 0.6199, "step": 10328 }, { "epoch": 0.65, "grad_norm": 0.854682445526123, "learning_rate": 2.8187038750012396e-06, "loss": 0.5861, "step": 10329 }, { "epoch": 0.65, "grad_norm": 0.8388245105743408, "learning_rate": 2.8177807102381404e-06, "loss": 0.5608, "step": 10330 }, { "epoch": 0.65, "grad_norm": 0.8935778737068176, "learning_rate": 2.816857637360705e-06, "loss": 0.5666, "step": 10331 }, { "epoch": 0.65, "grad_norm": 0.876492440700531, "learning_rate": 2.8159346564078006e-06, "loss": 0.5852, "step": 10332 }, { "epoch": 0.65, "grad_norm": 0.9023503661155701, "learning_rate": 2.815011767418287e-06, "loss": 0.6174, "step": 10333 }, { "epoch": 0.65, "grad_norm": 0.9186480045318604, "learning_rate": 2.8140889704310287e-06, "loss": 0.5975, "step": 10334 }, { "epoch": 0.65, "grad_norm": 0.8938761949539185, "learning_rate": 2.8131662654848814e-06, "loss": 0.5741, "step": 10335 }, { "epoch": 0.65, "grad_norm": 0.8748285174369812, "learning_rate": 2.8122436526186935e-06, "loss": 0.6341, "step": 10336 }, { "epoch": 0.65, "grad_norm": 0.8347454071044922, "learning_rate": 2.8113211318713146e-06, "loss": 0.6091, "step": 10337 }, { "epoch": 0.65, "grad_norm": 0.8568246364593506, "learning_rate": 2.810398703281589e-06, "loss": 0.5535, "step": 10338 }, { "epoch": 0.66, "grad_norm": 0.9079662561416626, "learning_rate": 2.8094763668883567e-06, "loss": 0.5603, "step": 10339 }, { "epoch": 0.66, "grad_norm": 0.9075840711593628, "learning_rate": 2.808554122730457e-06, "loss": 0.5642, "step": 10340 }, { "epoch": 0.66, "grad_norm": 0.8704594373703003, "learning_rate": 2.8076319708467146e-06, "loss": 0.5842, "step": 10341 }, { "epoch": 0.66, "grad_norm": 0.9282211661338806, "learning_rate": 2.8067099112759665e-06, "loss": 0.5972, "step": 10342 }, { "epoch": 0.66, "grad_norm": 0.8854076266288757, "learning_rate": 2.8057879440570356e-06, "loss": 0.5837, "step": 10343 }, { "epoch": 0.66, "grad_norm": 0.8140289783477783, "learning_rate": 2.804866069228739e-06, "loss": 0.5405, "step": 10344 }, { "epoch": 0.66, "grad_norm": 0.9335722923278809, "learning_rate": 2.803944286829896e-06, "loss": 0.5488, "step": 10345 }, { "epoch": 0.66, "grad_norm": 0.8789125084877014, "learning_rate": 2.8030225968993198e-06, "loss": 0.5977, "step": 10346 }, { "epoch": 0.66, "grad_norm": 0.9545979499816895, "learning_rate": 2.802100999475819e-06, "loss": 0.5622, "step": 10347 }, { "epoch": 0.66, "grad_norm": 0.9649593830108643, "learning_rate": 2.8011794945982013e-06, "loss": 0.6229, "step": 10348 }, { "epoch": 0.66, "grad_norm": 0.8558527827262878, "learning_rate": 2.8002580823052638e-06, "loss": 0.5659, "step": 10349 }, { "epoch": 0.66, "grad_norm": 0.9221006631851196, "learning_rate": 2.7993367626358047e-06, "loss": 0.5421, "step": 10350 }, { "epoch": 0.66, "grad_norm": 0.8340117335319519, "learning_rate": 2.7984155356286224e-06, "loss": 0.5119, "step": 10351 }, { "epoch": 0.66, "grad_norm": 0.8941150903701782, "learning_rate": 2.7974944013225013e-06, "loss": 0.5676, "step": 10352 }, { "epoch": 0.66, "grad_norm": 0.857522189617157, "learning_rate": 2.796573359756229e-06, "loss": 0.5856, "step": 10353 }, { "epoch": 0.66, "grad_norm": 0.986824631690979, "learning_rate": 2.7956524109685874e-06, "loss": 0.6455, "step": 10354 }, { "epoch": 0.66, "grad_norm": 0.8224316239356995, "learning_rate": 2.7947315549983545e-06, "loss": 0.584, "step": 10355 }, { "epoch": 0.66, "grad_norm": 0.8816094994544983, "learning_rate": 2.793810791884306e-06, "loss": 0.5838, "step": 10356 }, { "epoch": 0.66, "grad_norm": 0.8999599814414978, "learning_rate": 2.792890121665208e-06, "loss": 0.5797, "step": 10357 }, { "epoch": 0.66, "grad_norm": 0.9199798703193665, "learning_rate": 2.791969544379828e-06, "loss": 0.606, "step": 10358 }, { "epoch": 0.66, "grad_norm": 0.8767827153205872, "learning_rate": 2.7910490600669327e-06, "loss": 0.5771, "step": 10359 }, { "epoch": 0.66, "grad_norm": 0.8857783675193787, "learning_rate": 2.790128668765275e-06, "loss": 0.6235, "step": 10360 }, { "epoch": 0.66, "grad_norm": 0.873058021068573, "learning_rate": 2.789208370513612e-06, "loss": 0.5137, "step": 10361 }, { "epoch": 0.66, "grad_norm": 0.9512156248092651, "learning_rate": 2.7882881653506947e-06, "loss": 0.6124, "step": 10362 }, { "epoch": 0.66, "grad_norm": 0.8597283959388733, "learning_rate": 2.787368053315266e-06, "loss": 0.5774, "step": 10363 }, { "epoch": 0.66, "grad_norm": 0.9025830626487732, "learning_rate": 2.7864480344460743e-06, "loss": 0.5343, "step": 10364 }, { "epoch": 0.66, "grad_norm": 0.9169187545776367, "learning_rate": 2.7855281087818543e-06, "loss": 0.5986, "step": 10365 }, { "epoch": 0.66, "grad_norm": 0.8351139426231384, "learning_rate": 2.7846082763613412e-06, "loss": 0.5107, "step": 10366 }, { "epoch": 0.66, "grad_norm": 0.9500547647476196, "learning_rate": 2.783688537223268e-06, "loss": 0.6406, "step": 10367 }, { "epoch": 0.66, "grad_norm": 0.9017059803009033, "learning_rate": 2.7827688914063596e-06, "loss": 0.6178, "step": 10368 }, { "epoch": 0.66, "grad_norm": 0.9711951613426208, "learning_rate": 2.78184933894934e-06, "loss": 0.5575, "step": 10369 }, { "epoch": 0.66, "grad_norm": 0.8450467586517334, "learning_rate": 2.780929879890931e-06, "loss": 0.5841, "step": 10370 }, { "epoch": 0.66, "grad_norm": 0.9280916452407837, "learning_rate": 2.780010514269841e-06, "loss": 0.5718, "step": 10371 }, { "epoch": 0.66, "grad_norm": 0.9153168201446533, "learning_rate": 2.7790912421247883e-06, "loss": 0.6029, "step": 10372 }, { "epoch": 0.66, "grad_norm": 0.9410317540168762, "learning_rate": 2.7781720634944766e-06, "loss": 0.6092, "step": 10373 }, { "epoch": 0.66, "grad_norm": 0.8701797723770142, "learning_rate": 2.77725297841761e-06, "loss": 0.5228, "step": 10374 }, { "epoch": 0.66, "grad_norm": 0.8795192241668701, "learning_rate": 2.7763339869328897e-06, "loss": 0.5848, "step": 10375 }, { "epoch": 0.66, "grad_norm": 0.920274555683136, "learning_rate": 2.7754150890790067e-06, "loss": 0.5968, "step": 10376 }, { "epoch": 0.66, "grad_norm": 0.8954097032546997, "learning_rate": 2.7744962848946565e-06, "loss": 0.5599, "step": 10377 }, { "epoch": 0.66, "grad_norm": 0.9171625375747681, "learning_rate": 2.7735775744185276e-06, "loss": 0.5803, "step": 10378 }, { "epoch": 0.66, "grad_norm": 0.8371365666389465, "learning_rate": 2.7726589576893004e-06, "loss": 0.5397, "step": 10379 }, { "epoch": 0.66, "grad_norm": 0.8804381489753723, "learning_rate": 2.7717404347456567e-06, "loss": 0.5273, "step": 10380 }, { "epoch": 0.66, "grad_norm": 0.8363378643989563, "learning_rate": 2.7708220056262706e-06, "loss": 0.578, "step": 10381 }, { "epoch": 0.66, "grad_norm": 0.9080025553703308, "learning_rate": 2.7699036703698158e-06, "loss": 0.5766, "step": 10382 }, { "epoch": 0.66, "grad_norm": 0.9054446220397949, "learning_rate": 2.7689854290149608e-06, "loss": 0.6039, "step": 10383 }, { "epoch": 0.66, "grad_norm": 0.8597883582115173, "learning_rate": 2.768067281600365e-06, "loss": 0.5887, "step": 10384 }, { "epoch": 0.66, "grad_norm": 0.939932644367218, "learning_rate": 2.7671492281646937e-06, "loss": 0.5608, "step": 10385 }, { "epoch": 0.66, "grad_norm": 0.8852954506874084, "learning_rate": 2.7662312687466026e-06, "loss": 0.5328, "step": 10386 }, { "epoch": 0.66, "grad_norm": 0.8315883278846741, "learning_rate": 2.7653134033847393e-06, "loss": 0.5873, "step": 10387 }, { "epoch": 0.66, "grad_norm": 0.8988177180290222, "learning_rate": 2.7643956321177558e-06, "loss": 0.5822, "step": 10388 }, { "epoch": 0.66, "grad_norm": 0.8246173858642578, "learning_rate": 2.763477954984295e-06, "loss": 0.5949, "step": 10389 }, { "epoch": 0.66, "grad_norm": 0.8828296065330505, "learning_rate": 2.7625603720229964e-06, "loss": 0.5875, "step": 10390 }, { "epoch": 0.66, "grad_norm": 0.9308893084526062, "learning_rate": 2.7616428832724983e-06, "loss": 0.6503, "step": 10391 }, { "epoch": 0.66, "grad_norm": 0.914340078830719, "learning_rate": 2.760725488771433e-06, "loss": 0.6379, "step": 10392 }, { "epoch": 0.66, "grad_norm": 0.8978453874588013, "learning_rate": 2.7598081885584237e-06, "loss": 0.5879, "step": 10393 }, { "epoch": 0.66, "grad_norm": 0.9084619283676147, "learning_rate": 2.758890982672102e-06, "loss": 0.5752, "step": 10394 }, { "epoch": 0.66, "grad_norm": 0.8810911774635315, "learning_rate": 2.757973871151083e-06, "loss": 0.6372, "step": 10395 }, { "epoch": 0.66, "grad_norm": 0.8374783992767334, "learning_rate": 2.757056854033985e-06, "loss": 0.5807, "step": 10396 }, { "epoch": 0.66, "grad_norm": 0.9119501709938049, "learning_rate": 2.7561399313594205e-06, "loss": 0.586, "step": 10397 }, { "epoch": 0.66, "grad_norm": 1.0351508855819702, "learning_rate": 2.7552231031659972e-06, "loss": 0.5944, "step": 10398 }, { "epoch": 0.66, "grad_norm": 0.9276666045188904, "learning_rate": 2.75430636949232e-06, "loss": 0.6075, "step": 10399 }, { "epoch": 0.66, "grad_norm": 0.851760745048523, "learning_rate": 2.753389730376992e-06, "loss": 0.5774, "step": 10400 }, { "epoch": 0.66, "grad_norm": 0.8853036165237427, "learning_rate": 2.752473185858603e-06, "loss": 0.581, "step": 10401 }, { "epoch": 0.66, "grad_norm": 0.9542864561080933, "learning_rate": 2.7515567359757526e-06, "loss": 0.6231, "step": 10402 }, { "epoch": 0.66, "grad_norm": 0.8955079317092896, "learning_rate": 2.750640380767025e-06, "loss": 0.5894, "step": 10403 }, { "epoch": 0.66, "grad_norm": 0.9134573340415955, "learning_rate": 2.7497241202710056e-06, "loss": 0.5966, "step": 10404 }, { "epoch": 0.66, "grad_norm": 0.8695476651191711, "learning_rate": 2.7488079545262757e-06, "loss": 0.6017, "step": 10405 }, { "epoch": 0.66, "grad_norm": 0.9271215796470642, "learning_rate": 2.747891883571412e-06, "loss": 0.6063, "step": 10406 }, { "epoch": 0.66, "grad_norm": 0.8371964693069458, "learning_rate": 2.746975907444986e-06, "loss": 0.5274, "step": 10407 }, { "epoch": 0.66, "grad_norm": 0.9001272320747375, "learning_rate": 2.7460600261855687e-06, "loss": 0.6252, "step": 10408 }, { "epoch": 0.66, "grad_norm": 0.8996703028678894, "learning_rate": 2.7451442398317206e-06, "loss": 0.6346, "step": 10409 }, { "epoch": 0.66, "grad_norm": 0.8631662130355835, "learning_rate": 2.7442285484220055e-06, "loss": 0.5648, "step": 10410 }, { "epoch": 0.66, "grad_norm": 0.9067828059196472, "learning_rate": 2.7433129519949784e-06, "loss": 0.596, "step": 10411 }, { "epoch": 0.66, "grad_norm": 0.8877487182617188, "learning_rate": 2.742397450589193e-06, "loss": 0.6091, "step": 10412 }, { "epoch": 0.66, "grad_norm": 0.8369250893592834, "learning_rate": 2.7414820442431976e-06, "loss": 0.5867, "step": 10413 }, { "epoch": 0.66, "grad_norm": 0.9432762265205383, "learning_rate": 2.7405667329955344e-06, "loss": 0.6184, "step": 10414 }, { "epoch": 0.66, "grad_norm": 0.8785738348960876, "learning_rate": 2.739651516884747e-06, "loss": 0.5174, "step": 10415 }, { "epoch": 0.66, "grad_norm": 0.8684585690498352, "learning_rate": 2.7387363959493733e-06, "loss": 0.5862, "step": 10416 }, { "epoch": 0.66, "grad_norm": 0.8876842856407166, "learning_rate": 2.737821370227942e-06, "loss": 0.6136, "step": 10417 }, { "epoch": 0.66, "grad_norm": 0.9569928050041199, "learning_rate": 2.7369064397589828e-06, "loss": 0.5912, "step": 10418 }, { "epoch": 0.66, "grad_norm": 0.8708109259605408, "learning_rate": 2.7359916045810207e-06, "loss": 0.5547, "step": 10419 }, { "epoch": 0.66, "grad_norm": 0.8798702359199524, "learning_rate": 2.7350768647325766e-06, "loss": 0.5619, "step": 10420 }, { "epoch": 0.66, "grad_norm": 0.8539235591888428, "learning_rate": 2.734162220252168e-06, "loss": 0.5034, "step": 10421 }, { "epoch": 0.66, "grad_norm": 0.9067310094833374, "learning_rate": 2.7332476711783044e-06, "loss": 0.6071, "step": 10422 }, { "epoch": 0.66, "grad_norm": 0.8697945475578308, "learning_rate": 2.732333217549494e-06, "loss": 0.598, "step": 10423 }, { "epoch": 0.66, "grad_norm": 0.8847575187683105, "learning_rate": 2.7314188594042466e-06, "loss": 0.5674, "step": 10424 }, { "epoch": 0.66, "grad_norm": 0.9137183427810669, "learning_rate": 2.7305045967810585e-06, "loss": 0.5582, "step": 10425 }, { "epoch": 0.66, "grad_norm": 0.8915376663208008, "learning_rate": 2.7295904297184262e-06, "loss": 0.5746, "step": 10426 }, { "epoch": 0.66, "grad_norm": 0.8941647410392761, "learning_rate": 2.7286763582548424e-06, "loss": 0.5729, "step": 10427 }, { "epoch": 0.66, "grad_norm": 0.9105641841888428, "learning_rate": 2.7277623824287957e-06, "loss": 0.5698, "step": 10428 }, { "epoch": 0.66, "grad_norm": 0.8876394033432007, "learning_rate": 2.726848502278773e-06, "loss": 0.645, "step": 10429 }, { "epoch": 0.66, "grad_norm": 0.9088033437728882, "learning_rate": 2.7259347178432493e-06, "loss": 0.5629, "step": 10430 }, { "epoch": 0.66, "grad_norm": 0.910689115524292, "learning_rate": 2.7250210291607026e-06, "loss": 0.5819, "step": 10431 }, { "epoch": 0.66, "grad_norm": 0.8679473996162415, "learning_rate": 2.7241074362696108e-06, "loss": 0.5183, "step": 10432 }, { "epoch": 0.66, "grad_norm": 0.9289723634719849, "learning_rate": 2.7231939392084347e-06, "loss": 0.6181, "step": 10433 }, { "epoch": 0.66, "grad_norm": 0.895182728767395, "learning_rate": 2.7222805380156414e-06, "loss": 0.6143, "step": 10434 }, { "epoch": 0.66, "grad_norm": 0.8651720881462097, "learning_rate": 2.7213672327296914e-06, "loss": 0.58, "step": 10435 }, { "epoch": 0.66, "grad_norm": 0.8684262037277222, "learning_rate": 2.72045402338904e-06, "loss": 0.5928, "step": 10436 }, { "epoch": 0.66, "grad_norm": 0.9024814367294312, "learning_rate": 2.719540910032142e-06, "loss": 0.5643, "step": 10437 }, { "epoch": 0.66, "grad_norm": 0.9180070161819458, "learning_rate": 2.7186278926974406e-06, "loss": 0.5972, "step": 10438 }, { "epoch": 0.66, "grad_norm": 0.867103099822998, "learning_rate": 2.717714971423383e-06, "loss": 0.5539, "step": 10439 }, { "epoch": 0.66, "grad_norm": 0.864017903804779, "learning_rate": 2.7168021462484084e-06, "loss": 0.5505, "step": 10440 }, { "epoch": 0.66, "grad_norm": 0.8561496138572693, "learning_rate": 2.715889417210953e-06, "loss": 0.586, "step": 10441 }, { "epoch": 0.66, "grad_norm": 0.874715268611908, "learning_rate": 2.714976784349448e-06, "loss": 0.5803, "step": 10442 }, { "epoch": 0.66, "grad_norm": 0.9094971418380737, "learning_rate": 2.7140642477023237e-06, "loss": 0.5973, "step": 10443 }, { "epoch": 0.66, "grad_norm": 0.9093654155731201, "learning_rate": 2.7131518073079976e-06, "loss": 0.6123, "step": 10444 }, { "epoch": 0.66, "grad_norm": 0.8765634894371033, "learning_rate": 2.7122394632048974e-06, "loss": 0.5875, "step": 10445 }, { "epoch": 0.66, "grad_norm": 0.9068828821182251, "learning_rate": 2.7113272154314328e-06, "loss": 0.6061, "step": 10446 }, { "epoch": 0.66, "grad_norm": 0.8776718378067017, "learning_rate": 2.710415064026018e-06, "loss": 0.5145, "step": 10447 }, { "epoch": 0.66, "grad_norm": 0.8980036377906799, "learning_rate": 2.7095030090270596e-06, "loss": 0.5571, "step": 10448 }, { "epoch": 0.66, "grad_norm": 0.837546706199646, "learning_rate": 2.7085910504729617e-06, "loss": 0.6024, "step": 10449 }, { "epoch": 0.66, "grad_norm": 0.8471895456314087, "learning_rate": 2.7076791884021236e-06, "loss": 0.5205, "step": 10450 }, { "epoch": 0.66, "grad_norm": 0.882883608341217, "learning_rate": 2.7067674228529417e-06, "loss": 0.5888, "step": 10451 }, { "epoch": 0.66, "grad_norm": 0.8597538471221924, "learning_rate": 2.7058557538638026e-06, "loss": 0.555, "step": 10452 }, { "epoch": 0.66, "grad_norm": 0.8812461495399475, "learning_rate": 2.7049441814731007e-06, "loss": 0.5738, "step": 10453 }, { "epoch": 0.66, "grad_norm": 0.8679195642471313, "learning_rate": 2.704032705719214e-06, "loss": 0.5855, "step": 10454 }, { "epoch": 0.66, "grad_norm": 0.9121565818786621, "learning_rate": 2.703121326640522e-06, "loss": 0.6203, "step": 10455 }, { "epoch": 0.66, "grad_norm": 0.8402708768844604, "learning_rate": 2.702210044275401e-06, "loss": 0.5068, "step": 10456 }, { "epoch": 0.66, "grad_norm": 0.9229235053062439, "learning_rate": 2.7012988586622224e-06, "loss": 0.5719, "step": 10457 }, { "epoch": 0.66, "grad_norm": 0.874308168888092, "learning_rate": 2.7003877698393512e-06, "loss": 0.5587, "step": 10458 }, { "epoch": 0.66, "grad_norm": 0.8813081383705139, "learning_rate": 2.6994767778451535e-06, "loss": 0.5455, "step": 10459 }, { "epoch": 0.66, "grad_norm": 0.8940520286560059, "learning_rate": 2.6985658827179845e-06, "loss": 0.5927, "step": 10460 }, { "epoch": 0.66, "grad_norm": 0.8507505059242249, "learning_rate": 2.6976550844961992e-06, "loss": 0.5388, "step": 10461 }, { "epoch": 0.66, "grad_norm": 0.9301406741142273, "learning_rate": 2.6967443832181496e-06, "loss": 0.57, "step": 10462 }, { "epoch": 0.66, "grad_norm": 0.8438676595687866, "learning_rate": 2.6958337789221813e-06, "loss": 0.555, "step": 10463 }, { "epoch": 0.66, "grad_norm": 0.9643988609313965, "learning_rate": 2.694923271646637e-06, "loss": 0.5663, "step": 10464 }, { "epoch": 0.66, "grad_norm": 0.9135273098945618, "learning_rate": 2.694012861429855e-06, "loss": 0.603, "step": 10465 }, { "epoch": 0.66, "grad_norm": 0.9375592470169067, "learning_rate": 2.693102548310169e-06, "loss": 0.5981, "step": 10466 }, { "epoch": 0.66, "grad_norm": 0.8663008809089661, "learning_rate": 2.6921923323259124e-06, "loss": 0.5962, "step": 10467 }, { "epoch": 0.66, "grad_norm": 0.9672373533248901, "learning_rate": 2.691282213515406e-06, "loss": 0.5845, "step": 10468 }, { "epoch": 0.66, "grad_norm": 0.8220438957214355, "learning_rate": 2.690372191916974e-06, "loss": 0.57, "step": 10469 }, { "epoch": 0.66, "grad_norm": 0.905386745929718, "learning_rate": 2.6894622675689345e-06, "loss": 0.6044, "step": 10470 }, { "epoch": 0.66, "grad_norm": 0.860525906085968, "learning_rate": 2.6885524405096007e-06, "loss": 0.6008, "step": 10471 }, { "epoch": 0.66, "grad_norm": 0.9347862005233765, "learning_rate": 2.687642710777284e-06, "loss": 0.6183, "step": 10472 }, { "epoch": 0.66, "grad_norm": 0.8891615867614746, "learning_rate": 2.6867330784102896e-06, "loss": 0.5547, "step": 10473 }, { "epoch": 0.66, "grad_norm": 0.9229059815406799, "learning_rate": 2.6858235434469138e-06, "loss": 0.6138, "step": 10474 }, { "epoch": 0.66, "grad_norm": 0.8102059364318848, "learning_rate": 2.684914105925463e-06, "loss": 0.5539, "step": 10475 }, { "epoch": 0.66, "grad_norm": 0.8751254081726074, "learning_rate": 2.6840047658842226e-06, "loss": 0.5189, "step": 10476 }, { "epoch": 0.66, "grad_norm": 0.9399062991142273, "learning_rate": 2.683095523361486e-06, "loss": 0.6127, "step": 10477 }, { "epoch": 0.66, "grad_norm": 0.9292119145393372, "learning_rate": 2.682186378395536e-06, "loss": 0.6257, "step": 10478 }, { "epoch": 0.66, "grad_norm": 0.9873320460319519, "learning_rate": 2.6812773310246547e-06, "loss": 0.5942, "step": 10479 }, { "epoch": 0.66, "grad_norm": 0.8316569328308105, "learning_rate": 2.680368381287119e-06, "loss": 0.5663, "step": 10480 }, { "epoch": 0.66, "grad_norm": 0.893159031867981, "learning_rate": 2.6794595292212035e-06, "loss": 0.5561, "step": 10481 }, { "epoch": 0.66, "grad_norm": 0.905292272567749, "learning_rate": 2.67855077486517e-06, "loss": 0.5821, "step": 10482 }, { "epoch": 0.66, "grad_norm": 0.9160034656524658, "learning_rate": 2.677642118257292e-06, "loss": 0.6112, "step": 10483 }, { "epoch": 0.66, "grad_norm": 0.88798987865448, "learning_rate": 2.6767335594358234e-06, "loss": 0.6043, "step": 10484 }, { "epoch": 0.66, "grad_norm": 0.8431712985038757, "learning_rate": 2.675825098439023e-06, "loss": 0.5438, "step": 10485 }, { "epoch": 0.66, "grad_norm": 0.9120664596557617, "learning_rate": 2.6749167353051443e-06, "loss": 0.6029, "step": 10486 }, { "epoch": 0.66, "grad_norm": 0.8885997533798218, "learning_rate": 2.674008470072429e-06, "loss": 0.5532, "step": 10487 }, { "epoch": 0.66, "grad_norm": 0.9783884286880493, "learning_rate": 2.673100302779128e-06, "loss": 0.5793, "step": 10488 }, { "epoch": 0.66, "grad_norm": 0.8485262393951416, "learning_rate": 2.6721922334634804e-06, "loss": 0.5563, "step": 10489 }, { "epoch": 0.66, "grad_norm": 0.896809458732605, "learning_rate": 2.671284262163718e-06, "loss": 0.5856, "step": 10490 }, { "epoch": 0.66, "grad_norm": 0.8948637843132019, "learning_rate": 2.6703763889180746e-06, "loss": 0.5547, "step": 10491 }, { "epoch": 0.66, "grad_norm": 0.8857586979866028, "learning_rate": 2.6694686137647767e-06, "loss": 0.5937, "step": 10492 }, { "epoch": 0.66, "grad_norm": 0.8958655595779419, "learning_rate": 2.668560936742048e-06, "loss": 0.5439, "step": 10493 }, { "epoch": 0.66, "grad_norm": 0.8610227704048157, "learning_rate": 2.6676533578881102e-06, "loss": 0.5449, "step": 10494 }, { "epoch": 0.66, "grad_norm": 0.8370438814163208, "learning_rate": 2.6667458772411724e-06, "loss": 0.5593, "step": 10495 }, { "epoch": 0.66, "grad_norm": 0.886195182800293, "learning_rate": 2.66583849483945e-06, "loss": 0.6025, "step": 10496 }, { "epoch": 0.67, "grad_norm": 0.8641106486320496, "learning_rate": 2.664931210721151e-06, "loss": 0.5801, "step": 10497 }, { "epoch": 0.67, "grad_norm": 0.8426538109779358, "learning_rate": 2.6640240249244744e-06, "loss": 0.5569, "step": 10498 }, { "epoch": 0.67, "grad_norm": 0.8817174434661865, "learning_rate": 2.6631169374876185e-06, "loss": 0.562, "step": 10499 }, { "epoch": 0.67, "grad_norm": 0.8603051900863647, "learning_rate": 2.6622099484487794e-06, "loss": 0.5917, "step": 10500 }, { "epoch": 0.67, "grad_norm": 0.9094916582107544, "learning_rate": 2.6613030578461476e-06, "loss": 0.5641, "step": 10501 }, { "epoch": 0.67, "grad_norm": 0.8471028804779053, "learning_rate": 2.6603962657179094e-06, "loss": 0.5664, "step": 10502 }, { "epoch": 0.67, "grad_norm": 0.9582904577255249, "learning_rate": 2.6594895721022436e-06, "loss": 0.651, "step": 10503 }, { "epoch": 0.67, "grad_norm": 0.8720226287841797, "learning_rate": 2.6585829770373286e-06, "loss": 0.5729, "step": 10504 }, { "epoch": 0.67, "grad_norm": 1.0530695915222168, "learning_rate": 2.657676480561342e-06, "loss": 0.6192, "step": 10505 }, { "epoch": 0.67, "grad_norm": 0.8338209986686707, "learning_rate": 2.6567700827124494e-06, "loss": 0.5738, "step": 10506 }, { "epoch": 0.67, "grad_norm": 0.9002853631973267, "learning_rate": 2.655863783528817e-06, "loss": 0.5746, "step": 10507 }, { "epoch": 0.67, "grad_norm": 0.9461910128593445, "learning_rate": 2.6549575830486053e-06, "loss": 0.6294, "step": 10508 }, { "epoch": 0.67, "grad_norm": 0.883553683757782, "learning_rate": 2.6540514813099728e-06, "loss": 0.6029, "step": 10509 }, { "epoch": 0.67, "grad_norm": 0.9209686517715454, "learning_rate": 2.6531454783510736e-06, "loss": 0.6247, "step": 10510 }, { "epoch": 0.67, "grad_norm": 0.9430029988288879, "learning_rate": 2.6522395742100514e-06, "loss": 0.6145, "step": 10511 }, { "epoch": 0.67, "grad_norm": 0.8918984532356262, "learning_rate": 2.651333768925052e-06, "loss": 0.5791, "step": 10512 }, { "epoch": 0.67, "grad_norm": 0.9535161256790161, "learning_rate": 2.6504280625342203e-06, "loss": 0.6567, "step": 10513 }, { "epoch": 0.67, "grad_norm": 0.8918493390083313, "learning_rate": 2.6495224550756888e-06, "loss": 0.6135, "step": 10514 }, { "epoch": 0.67, "grad_norm": 0.8543890118598938, "learning_rate": 2.6486169465875887e-06, "loss": 0.5809, "step": 10515 }, { "epoch": 0.67, "grad_norm": 0.9117350578308105, "learning_rate": 2.647711537108052e-06, "loss": 0.5975, "step": 10516 }, { "epoch": 0.67, "grad_norm": 0.9220753908157349, "learning_rate": 2.6468062266751955e-06, "loss": 0.5826, "step": 10517 }, { "epoch": 0.67, "grad_norm": 0.8443688750267029, "learning_rate": 2.6459010153271456e-06, "loss": 0.5437, "step": 10518 }, { "epoch": 0.67, "grad_norm": 0.9096937775611877, "learning_rate": 2.6449959031020134e-06, "loss": 0.5755, "step": 10519 }, { "epoch": 0.67, "grad_norm": 0.9416838884353638, "learning_rate": 2.6440908900379115e-06, "loss": 0.5631, "step": 10520 }, { "epoch": 0.67, "grad_norm": 0.855556309223175, "learning_rate": 2.6431859761729462e-06, "loss": 0.596, "step": 10521 }, { "epoch": 0.67, "grad_norm": 0.8807106018066406, "learning_rate": 2.6422811615452205e-06, "loss": 0.5545, "step": 10522 }, { "epoch": 0.67, "grad_norm": 0.8947232365608215, "learning_rate": 2.6413764461928335e-06, "loss": 0.5682, "step": 10523 }, { "epoch": 0.67, "grad_norm": 0.7953035235404968, "learning_rate": 2.6404718301538814e-06, "loss": 0.6003, "step": 10524 }, { "epoch": 0.67, "grad_norm": 0.880653977394104, "learning_rate": 2.639567313466448e-06, "loss": 0.5458, "step": 10525 }, { "epoch": 0.67, "grad_norm": 0.8574607968330383, "learning_rate": 2.6386628961686277e-06, "loss": 0.5645, "step": 10526 }, { "epoch": 0.67, "grad_norm": 0.9444485306739807, "learning_rate": 2.6377585782984972e-06, "loss": 0.572, "step": 10527 }, { "epoch": 0.67, "grad_norm": 0.942674994468689, "learning_rate": 2.636854359894134e-06, "loss": 0.5551, "step": 10528 }, { "epoch": 0.67, "grad_norm": 0.8828451633453369, "learning_rate": 2.635950240993614e-06, "loss": 0.6088, "step": 10529 }, { "epoch": 0.67, "grad_norm": 0.9297851920127869, "learning_rate": 2.635046221635005e-06, "loss": 0.6244, "step": 10530 }, { "epoch": 0.67, "grad_norm": 0.8250426054000854, "learning_rate": 2.6341423018563727e-06, "loss": 0.5795, "step": 10531 }, { "epoch": 0.67, "grad_norm": 0.8772184252738953, "learning_rate": 2.633238481695779e-06, "loss": 0.5588, "step": 10532 }, { "epoch": 0.67, "grad_norm": 0.8727168440818787, "learning_rate": 2.6323347611912786e-06, "loss": 0.5697, "step": 10533 }, { "epoch": 0.67, "grad_norm": 0.9432665705680847, "learning_rate": 2.6314311403809224e-06, "loss": 0.6384, "step": 10534 }, { "epoch": 0.67, "grad_norm": 0.8159708380699158, "learning_rate": 2.630527619302765e-06, "loss": 0.5078, "step": 10535 }, { "epoch": 0.67, "grad_norm": 0.8695153594017029, "learning_rate": 2.6296241979948455e-06, "loss": 0.5961, "step": 10536 }, { "epoch": 0.67, "grad_norm": 0.9193745255470276, "learning_rate": 2.6287208764952045e-06, "loss": 0.6085, "step": 10537 }, { "epoch": 0.67, "grad_norm": 0.8767115473747253, "learning_rate": 2.6278176548418783e-06, "loss": 0.5908, "step": 10538 }, { "epoch": 0.67, "grad_norm": 0.9035547971725464, "learning_rate": 2.6269145330728985e-06, "loss": 0.5646, "step": 10539 }, { "epoch": 0.67, "grad_norm": 0.8972700238227844, "learning_rate": 2.626011511226294e-06, "loss": 0.5871, "step": 10540 }, { "epoch": 0.67, "grad_norm": 0.9117726683616638, "learning_rate": 2.625108589340085e-06, "loss": 0.5854, "step": 10541 }, { "epoch": 0.67, "grad_norm": 0.8025404810905457, "learning_rate": 2.624205767452289e-06, "loss": 0.5042, "step": 10542 }, { "epoch": 0.67, "grad_norm": 0.9173393249511719, "learning_rate": 2.623303045600928e-06, "loss": 0.527, "step": 10543 }, { "epoch": 0.67, "grad_norm": 0.9533704519271851, "learning_rate": 2.622400423824005e-06, "loss": 0.6099, "step": 10544 }, { "epoch": 0.67, "grad_norm": 0.9197595119476318, "learning_rate": 2.62149790215953e-06, "loss": 0.5371, "step": 10545 }, { "epoch": 0.67, "grad_norm": 0.8468542098999023, "learning_rate": 2.6205954806455057e-06, "loss": 0.5773, "step": 10546 }, { "epoch": 0.67, "grad_norm": 0.8749061226844788, "learning_rate": 2.6196931593199247e-06, "loss": 0.5984, "step": 10547 }, { "epoch": 0.67, "grad_norm": 0.9503610134124756, "learning_rate": 2.618790938220788e-06, "loss": 0.5798, "step": 10548 }, { "epoch": 0.67, "grad_norm": 0.8924551010131836, "learning_rate": 2.617888817386079e-06, "loss": 0.5438, "step": 10549 }, { "epoch": 0.67, "grad_norm": 0.9542580246925354, "learning_rate": 2.6169867968537856e-06, "loss": 0.5912, "step": 10550 }, { "epoch": 0.67, "grad_norm": 0.8375207781791687, "learning_rate": 2.616084876661888e-06, "loss": 0.5299, "step": 10551 }, { "epoch": 0.67, "grad_norm": 0.8957962989807129, "learning_rate": 2.6151830568483627e-06, "loss": 0.5979, "step": 10552 }, { "epoch": 0.67, "grad_norm": 0.9477977156639099, "learning_rate": 2.614281337451183e-06, "loss": 0.5815, "step": 10553 }, { "epoch": 0.67, "grad_norm": 0.8835856914520264, "learning_rate": 2.61337971850832e-06, "loss": 0.6355, "step": 10554 }, { "epoch": 0.67, "grad_norm": 0.8357982039451599, "learning_rate": 2.6124782000577296e-06, "loss": 0.5298, "step": 10555 }, { "epoch": 0.67, "grad_norm": 0.8705008625984192, "learning_rate": 2.6115767821373807e-06, "loss": 0.5278, "step": 10556 }, { "epoch": 0.67, "grad_norm": 0.8560452461242676, "learning_rate": 2.610675464785223e-06, "loss": 0.6022, "step": 10557 }, { "epoch": 0.67, "grad_norm": 0.8124215006828308, "learning_rate": 2.6097742480392097e-06, "loss": 0.5591, "step": 10558 }, { "epoch": 0.67, "grad_norm": 0.865249752998352, "learning_rate": 2.6088731319372874e-06, "loss": 0.5972, "step": 10559 }, { "epoch": 0.67, "grad_norm": 0.9022778868675232, "learning_rate": 2.6079721165173994e-06, "loss": 0.5793, "step": 10560 }, { "epoch": 0.67, "grad_norm": 0.9692482352256775, "learning_rate": 2.6070712018174847e-06, "loss": 0.5374, "step": 10561 }, { "epoch": 0.67, "grad_norm": 0.8842456340789795, "learning_rate": 2.6061703878754784e-06, "loss": 0.592, "step": 10562 }, { "epoch": 0.67, "grad_norm": 0.8886352777481079, "learning_rate": 2.6052696747293087e-06, "loss": 0.5585, "step": 10563 }, { "epoch": 0.67, "grad_norm": 0.9306212663650513, "learning_rate": 2.6043690624169014e-06, "loss": 0.6042, "step": 10564 }, { "epoch": 0.67, "grad_norm": 0.9206665754318237, "learning_rate": 2.6034685509761803e-06, "loss": 0.5784, "step": 10565 }, { "epoch": 0.67, "grad_norm": 0.9081207513809204, "learning_rate": 2.602568140445061e-06, "loss": 0.5858, "step": 10566 }, { "epoch": 0.67, "grad_norm": 0.9279916882514954, "learning_rate": 2.6016678308614583e-06, "loss": 0.5536, "step": 10567 }, { "epoch": 0.67, "grad_norm": 0.8986056447029114, "learning_rate": 2.600767622263277e-06, "loss": 0.5619, "step": 10568 }, { "epoch": 0.67, "grad_norm": 0.9222875833511353, "learning_rate": 2.599867514688427e-06, "loss": 0.587, "step": 10569 }, { "epoch": 0.67, "grad_norm": 0.880499005317688, "learning_rate": 2.598967508174808e-06, "loss": 0.5327, "step": 10570 }, { "epoch": 0.67, "grad_norm": 0.8843125700950623, "learning_rate": 2.598067602760313e-06, "loss": 0.5566, "step": 10571 }, { "epoch": 0.67, "grad_norm": 0.8370311856269836, "learning_rate": 2.597167798482835e-06, "loss": 0.5875, "step": 10572 }, { "epoch": 0.67, "grad_norm": 0.9187718033790588, "learning_rate": 2.596268095380263e-06, "loss": 0.5995, "step": 10573 }, { "epoch": 0.67, "grad_norm": 0.8443053364753723, "learning_rate": 2.5953684934904788e-06, "loss": 0.5484, "step": 10574 }, { "epoch": 0.67, "grad_norm": 0.8494760990142822, "learning_rate": 2.5944689928513643e-06, "loss": 0.5316, "step": 10575 }, { "epoch": 0.67, "grad_norm": 0.9031586647033691, "learning_rate": 2.593569593500789e-06, "loss": 0.5691, "step": 10576 }, { "epoch": 0.67, "grad_norm": 0.8875634074211121, "learning_rate": 2.592670295476628e-06, "loss": 0.5378, "step": 10577 }, { "epoch": 0.67, "grad_norm": 0.8447946310043335, "learning_rate": 2.591771098816749e-06, "loss": 0.6219, "step": 10578 }, { "epoch": 0.67, "grad_norm": 0.8940092921257019, "learning_rate": 2.5908720035590085e-06, "loss": 0.5784, "step": 10579 }, { "epoch": 0.67, "grad_norm": 0.8719146251678467, "learning_rate": 2.5899730097412678e-06, "loss": 0.5754, "step": 10580 }, { "epoch": 0.67, "grad_norm": 0.8950543403625488, "learning_rate": 2.58907411740138e-06, "loss": 0.5846, "step": 10581 }, { "epoch": 0.67, "grad_norm": 0.8580577373504639, "learning_rate": 2.5881753265771938e-06, "loss": 0.5897, "step": 10582 }, { "epoch": 0.67, "grad_norm": 0.8067103028297424, "learning_rate": 2.587276637306556e-06, "loss": 0.5128, "step": 10583 }, { "epoch": 0.67, "grad_norm": 0.8843825459480286, "learning_rate": 2.586378049627304e-06, "loss": 0.5823, "step": 10584 }, { "epoch": 0.67, "grad_norm": 0.8826472759246826, "learning_rate": 2.5854795635772743e-06, "loss": 0.5843, "step": 10585 }, { "epoch": 0.67, "grad_norm": 0.8553101420402527, "learning_rate": 2.584581179194304e-06, "loss": 0.5796, "step": 10586 }, { "epoch": 0.67, "grad_norm": 0.8672010898590088, "learning_rate": 2.5836828965162167e-06, "loss": 0.5931, "step": 10587 }, { "epoch": 0.67, "grad_norm": 0.927105188369751, "learning_rate": 2.582784715580836e-06, "loss": 0.5827, "step": 10588 }, { "epoch": 0.67, "grad_norm": 0.8936177492141724, "learning_rate": 2.581886636425983e-06, "loss": 0.5892, "step": 10589 }, { "epoch": 0.67, "grad_norm": 0.9585930109024048, "learning_rate": 2.580988659089471e-06, "loss": 0.6104, "step": 10590 }, { "epoch": 0.67, "grad_norm": 0.957203209400177, "learning_rate": 2.580090783609114e-06, "loss": 0.6265, "step": 10591 }, { "epoch": 0.67, "grad_norm": 0.8555622696876526, "learning_rate": 2.5791930100227133e-06, "loss": 0.5533, "step": 10592 }, { "epoch": 0.67, "grad_norm": 0.8494757413864136, "learning_rate": 2.5782953383680733e-06, "loss": 0.5352, "step": 10593 }, { "epoch": 0.67, "grad_norm": 0.8753517270088196, "learning_rate": 2.5773977686829928e-06, "loss": 0.5861, "step": 10594 }, { "epoch": 0.67, "grad_norm": 0.8782363533973694, "learning_rate": 2.5765003010052643e-06, "loss": 0.55, "step": 10595 }, { "epoch": 0.67, "grad_norm": 0.9527836441993713, "learning_rate": 2.5756029353726777e-06, "loss": 0.5831, "step": 10596 }, { "epoch": 0.67, "grad_norm": 0.9011462926864624, "learning_rate": 2.574705671823019e-06, "loss": 0.6172, "step": 10597 }, { "epoch": 0.67, "grad_norm": 0.935152530670166, "learning_rate": 2.5738085103940634e-06, "loss": 0.6276, "step": 10598 }, { "epoch": 0.67, "grad_norm": 0.8869521021842957, "learning_rate": 2.572911451123594e-06, "loss": 0.6076, "step": 10599 }, { "epoch": 0.67, "grad_norm": 0.918903648853302, "learning_rate": 2.572014494049382e-06, "loss": 0.5971, "step": 10600 }, { "epoch": 0.67, "grad_norm": 0.8759111166000366, "learning_rate": 2.571117639209191e-06, "loss": 0.6048, "step": 10601 }, { "epoch": 0.67, "grad_norm": 0.8874173164367676, "learning_rate": 2.5702208866407873e-06, "loss": 0.5135, "step": 10602 }, { "epoch": 0.67, "grad_norm": 0.8090372681617737, "learning_rate": 2.5693242363819292e-06, "loss": 0.5409, "step": 10603 }, { "epoch": 0.67, "grad_norm": 0.8543607592582703, "learning_rate": 2.5684276884703717e-06, "loss": 0.5645, "step": 10604 }, { "epoch": 0.67, "grad_norm": 0.9561940431594849, "learning_rate": 2.567531242943867e-06, "loss": 0.6001, "step": 10605 }, { "epoch": 0.67, "grad_norm": 0.9354879260063171, "learning_rate": 2.5666348998401565e-06, "loss": 0.5695, "step": 10606 }, { "epoch": 0.67, "grad_norm": 0.8753422498703003, "learning_rate": 2.565738659196987e-06, "loss": 0.556, "step": 10607 }, { "epoch": 0.67, "grad_norm": 0.9323849678039551, "learning_rate": 2.5648425210520967e-06, "loss": 0.6462, "step": 10608 }, { "epoch": 0.67, "grad_norm": 0.8231973052024841, "learning_rate": 2.563946485443214e-06, "loss": 0.5039, "step": 10609 }, { "epoch": 0.67, "grad_norm": 0.8657350540161133, "learning_rate": 2.5630505524080707e-06, "loss": 0.5553, "step": 10610 }, { "epoch": 0.67, "grad_norm": 0.8768414258956909, "learning_rate": 2.5621547219843905e-06, "loss": 0.5786, "step": 10611 }, { "epoch": 0.67, "grad_norm": 0.9179761409759521, "learning_rate": 2.5612589942098952e-06, "loss": 0.5519, "step": 10612 }, { "epoch": 0.67, "grad_norm": 0.8903763294219971, "learning_rate": 2.560363369122301e-06, "loss": 0.6134, "step": 10613 }, { "epoch": 0.67, "grad_norm": 0.9475022554397583, "learning_rate": 2.559467846759317e-06, "loss": 0.5948, "step": 10614 }, { "epoch": 0.67, "grad_norm": 0.9176366329193115, "learning_rate": 2.5585724271586505e-06, "loss": 0.5935, "step": 10615 }, { "epoch": 0.67, "grad_norm": 0.9265202283859253, "learning_rate": 2.557677110358009e-06, "loss": 0.5969, "step": 10616 }, { "epoch": 0.67, "grad_norm": 0.8886149525642395, "learning_rate": 2.556781896395087e-06, "loss": 0.571, "step": 10617 }, { "epoch": 0.67, "grad_norm": 0.942081093788147, "learning_rate": 2.55588678530758e-06, "loss": 0.5508, "step": 10618 }, { "epoch": 0.67, "grad_norm": 0.9063771367073059, "learning_rate": 2.5549917771331767e-06, "loss": 0.5414, "step": 10619 }, { "epoch": 0.67, "grad_norm": 0.8666827082633972, "learning_rate": 2.5540968719095656e-06, "loss": 0.5374, "step": 10620 }, { "epoch": 0.67, "grad_norm": 0.9030107259750366, "learning_rate": 2.5532020696744277e-06, "loss": 0.5554, "step": 10621 }, { "epoch": 0.67, "grad_norm": 0.9230242967605591, "learning_rate": 2.5523073704654374e-06, "loss": 0.6067, "step": 10622 }, { "epoch": 0.67, "grad_norm": 0.9134321808815002, "learning_rate": 2.5514127743202668e-06, "loss": 0.5651, "step": 10623 }, { "epoch": 0.67, "grad_norm": 0.8783094882965088, "learning_rate": 2.5505182812765894e-06, "loss": 0.5925, "step": 10624 }, { "epoch": 0.67, "grad_norm": 0.9257774949073792, "learning_rate": 2.549623891372065e-06, "loss": 0.6196, "step": 10625 }, { "epoch": 0.67, "grad_norm": 0.924429714679718, "learning_rate": 2.5487296046443537e-06, "loss": 0.5452, "step": 10626 }, { "epoch": 0.67, "grad_norm": 0.9063295125961304, "learning_rate": 2.547835421131114e-06, "loss": 0.6303, "step": 10627 }, { "epoch": 0.67, "grad_norm": 0.8978242874145508, "learning_rate": 2.5469413408699894e-06, "loss": 0.5459, "step": 10628 }, { "epoch": 0.67, "grad_norm": 0.9666255712509155, "learning_rate": 2.546047363898636e-06, "loss": 0.6131, "step": 10629 }, { "epoch": 0.67, "grad_norm": 0.9179185032844543, "learning_rate": 2.545153490254689e-06, "loss": 0.6264, "step": 10630 }, { "epoch": 0.67, "grad_norm": 0.8218669295310974, "learning_rate": 2.5442597199757896e-06, "loss": 0.5331, "step": 10631 }, { "epoch": 0.67, "grad_norm": 0.8918872475624084, "learning_rate": 2.5433660530995696e-06, "loss": 0.6075, "step": 10632 }, { "epoch": 0.67, "grad_norm": 0.8965834379196167, "learning_rate": 2.54247248966366e-06, "loss": 0.5411, "step": 10633 }, { "epoch": 0.67, "grad_norm": 0.9281273484230042, "learning_rate": 2.5415790297056843e-06, "loss": 0.545, "step": 10634 }, { "epoch": 0.67, "grad_norm": 0.882722795009613, "learning_rate": 2.5406856732632647e-06, "loss": 0.6063, "step": 10635 }, { "epoch": 0.67, "grad_norm": 0.8930636048316956, "learning_rate": 2.539792420374013e-06, "loss": 0.6173, "step": 10636 }, { "epoch": 0.67, "grad_norm": 0.8792672753334045, "learning_rate": 2.5388992710755477e-06, "loss": 0.5656, "step": 10637 }, { "epoch": 0.67, "grad_norm": 0.8245922327041626, "learning_rate": 2.5380062254054706e-06, "loss": 0.5317, "step": 10638 }, { "epoch": 0.67, "grad_norm": 0.964028537273407, "learning_rate": 2.5371132834013867e-06, "loss": 0.5868, "step": 10639 }, { "epoch": 0.67, "grad_norm": 0.8802077770233154, "learning_rate": 2.5362204451008963e-06, "loss": 0.5438, "step": 10640 }, { "epoch": 0.67, "grad_norm": 0.8937103152275085, "learning_rate": 2.5353277105415887e-06, "loss": 0.6146, "step": 10641 }, { "epoch": 0.67, "grad_norm": 1.144944667816162, "learning_rate": 2.5344350797610597e-06, "loss": 0.5291, "step": 10642 }, { "epoch": 0.67, "grad_norm": 0.8854457139968872, "learning_rate": 2.533542552796893e-06, "loss": 0.5663, "step": 10643 }, { "epoch": 0.67, "grad_norm": 0.9482905864715576, "learning_rate": 2.5326501296866677e-06, "loss": 0.5978, "step": 10644 }, { "epoch": 0.67, "grad_norm": 0.8801560401916504, "learning_rate": 2.531757810467963e-06, "loss": 0.5489, "step": 10645 }, { "epoch": 0.67, "grad_norm": 0.8408117890357971, "learning_rate": 2.53086559517835e-06, "loss": 0.5729, "step": 10646 }, { "epoch": 0.67, "grad_norm": 0.8972226977348328, "learning_rate": 2.529973483855397e-06, "loss": 0.507, "step": 10647 }, { "epoch": 0.67, "grad_norm": 0.8918501734733582, "learning_rate": 2.52908147653667e-06, "loss": 0.5221, "step": 10648 }, { "epoch": 0.67, "grad_norm": 0.8490516543388367, "learning_rate": 2.5281895732597227e-06, "loss": 0.5688, "step": 10649 }, { "epoch": 0.67, "grad_norm": 0.9360247850418091, "learning_rate": 2.527297774062115e-06, "loss": 0.5614, "step": 10650 }, { "epoch": 0.67, "grad_norm": 0.8709716796875, "learning_rate": 2.5264060789813994e-06, "loss": 0.5421, "step": 10651 }, { "epoch": 0.67, "grad_norm": 0.8740971088409424, "learning_rate": 2.525514488055116e-06, "loss": 0.5832, "step": 10652 }, { "epoch": 0.67, "grad_norm": 0.9189413785934448, "learning_rate": 2.5246230013208093e-06, "loss": 0.6433, "step": 10653 }, { "epoch": 0.67, "grad_norm": 0.7839402556419373, "learning_rate": 2.5237316188160165e-06, "loss": 0.4975, "step": 10654 }, { "epoch": 0.68, "grad_norm": 0.9069191217422485, "learning_rate": 2.522840340578272e-06, "loss": 0.5909, "step": 10655 }, { "epoch": 0.68, "grad_norm": 0.9287664294242859, "learning_rate": 2.521949166645102e-06, "loss": 0.6464, "step": 10656 }, { "epoch": 0.68, "grad_norm": 0.8806740045547485, "learning_rate": 2.5210580970540354e-06, "loss": 0.5462, "step": 10657 }, { "epoch": 0.68, "grad_norm": 0.8837103247642517, "learning_rate": 2.5201671318425834e-06, "loss": 0.5808, "step": 10658 }, { "epoch": 0.68, "grad_norm": 0.9099284410476685, "learning_rate": 2.519276271048272e-06, "loss": 0.5988, "step": 10659 }, { "epoch": 0.68, "grad_norm": 0.9354240894317627, "learning_rate": 2.5183855147086045e-06, "loss": 0.6039, "step": 10660 }, { "epoch": 0.68, "grad_norm": 0.8624934554100037, "learning_rate": 2.51749486286109e-06, "loss": 0.5186, "step": 10661 }, { "epoch": 0.68, "grad_norm": 0.8302717208862305, "learning_rate": 2.516604315543231e-06, "loss": 0.578, "step": 10662 }, { "epoch": 0.68, "grad_norm": 0.9564114212989807, "learning_rate": 2.515713872792525e-06, "loss": 0.6205, "step": 10663 }, { "epoch": 0.68, "grad_norm": 0.9036477208137512, "learning_rate": 2.5148235346464654e-06, "loss": 0.5378, "step": 10664 }, { "epoch": 0.68, "grad_norm": 0.850906252861023, "learning_rate": 2.5139333011425435e-06, "loss": 0.5776, "step": 10665 }, { "epoch": 0.68, "grad_norm": 0.8165357708930969, "learning_rate": 2.5130431723182386e-06, "loss": 0.5129, "step": 10666 }, { "epoch": 0.68, "grad_norm": 0.9315398931503296, "learning_rate": 2.512153148211038e-06, "loss": 0.602, "step": 10667 }, { "epoch": 0.68, "grad_norm": 0.97512286901474, "learning_rate": 2.5112632288584116e-06, "loss": 0.5975, "step": 10668 }, { "epoch": 0.68, "grad_norm": 0.9549464583396912, "learning_rate": 2.5103734142978325e-06, "loss": 0.6342, "step": 10669 }, { "epoch": 0.68, "grad_norm": 0.9069748520851135, "learning_rate": 2.5094837045667684e-06, "loss": 0.5671, "step": 10670 }, { "epoch": 0.68, "grad_norm": 0.8227144479751587, "learning_rate": 2.508594099702682e-06, "loss": 0.5805, "step": 10671 }, { "epoch": 0.68, "grad_norm": 0.9068019390106201, "learning_rate": 2.5077045997430304e-06, "loss": 0.5906, "step": 10672 }, { "epoch": 0.68, "grad_norm": 0.9003850221633911, "learning_rate": 2.5068152047252702e-06, "loss": 0.6073, "step": 10673 }, { "epoch": 0.68, "grad_norm": 0.9493726491928101, "learning_rate": 2.5059259146868474e-06, "loss": 0.5954, "step": 10674 }, { "epoch": 0.68, "grad_norm": 0.8758067488670349, "learning_rate": 2.5050367296652075e-06, "loss": 0.5644, "step": 10675 }, { "epoch": 0.68, "grad_norm": 0.9231355786323547, "learning_rate": 2.504147649697791e-06, "loss": 0.6068, "step": 10676 }, { "epoch": 0.68, "grad_norm": 0.8375126123428345, "learning_rate": 2.5032586748220354e-06, "loss": 0.5932, "step": 10677 }, { "epoch": 0.68, "grad_norm": 0.886325478553772, "learning_rate": 2.5023698050753732e-06, "loss": 0.6023, "step": 10678 }, { "epoch": 0.68, "grad_norm": 0.8187273740768433, "learning_rate": 2.5014810404952262e-06, "loss": 0.5291, "step": 10679 }, { "epoch": 0.68, "grad_norm": 0.8535604476928711, "learning_rate": 2.5005923811190226e-06, "loss": 0.5721, "step": 10680 }, { "epoch": 0.68, "grad_norm": 0.8962329030036926, "learning_rate": 2.4997038269841804e-06, "loss": 0.5637, "step": 10681 }, { "epoch": 0.68, "grad_norm": 0.9370246529579163, "learning_rate": 2.498815378128111e-06, "loss": 0.61, "step": 10682 }, { "epoch": 0.68, "grad_norm": 0.8292384743690491, "learning_rate": 2.497927034588225e-06, "loss": 0.5253, "step": 10683 }, { "epoch": 0.68, "grad_norm": 0.8883755803108215, "learning_rate": 2.497038796401927e-06, "loss": 0.5684, "step": 10684 }, { "epoch": 0.68, "grad_norm": 0.833806037902832, "learning_rate": 2.4961506636066185e-06, "loss": 0.623, "step": 10685 }, { "epoch": 0.68, "grad_norm": 0.9147443771362305, "learning_rate": 2.495262636239697e-06, "loss": 0.6173, "step": 10686 }, { "epoch": 0.68, "grad_norm": 0.8724647760391235, "learning_rate": 2.4943747143385503e-06, "loss": 0.6106, "step": 10687 }, { "epoch": 0.68, "grad_norm": 0.8868297934532166, "learning_rate": 2.4934868979405667e-06, "loss": 0.5995, "step": 10688 }, { "epoch": 0.68, "grad_norm": 0.8661412000656128, "learning_rate": 2.492599187083134e-06, "loss": 0.6492, "step": 10689 }, { "epoch": 0.68, "grad_norm": 0.8457236289978027, "learning_rate": 2.491711581803625e-06, "loss": 0.5432, "step": 10690 }, { "epoch": 0.68, "grad_norm": 0.864971935749054, "learning_rate": 2.490824082139415e-06, "loss": 0.5379, "step": 10691 }, { "epoch": 0.68, "grad_norm": 0.9583846926689148, "learning_rate": 2.489936688127875e-06, "loss": 0.6022, "step": 10692 }, { "epoch": 0.68, "grad_norm": 0.8547648787498474, "learning_rate": 2.4890493998063685e-06, "loss": 0.5668, "step": 10693 }, { "epoch": 0.68, "grad_norm": 0.8871789574623108, "learning_rate": 2.4881622172122595e-06, "loss": 0.6422, "step": 10694 }, { "epoch": 0.68, "grad_norm": 0.9174278974533081, "learning_rate": 2.4872751403828986e-06, "loss": 0.6206, "step": 10695 }, { "epoch": 0.68, "grad_norm": 0.9678024649620056, "learning_rate": 2.4863881693556393e-06, "loss": 0.5992, "step": 10696 }, { "epoch": 0.68, "grad_norm": 0.8451238870620728, "learning_rate": 2.4855013041678335e-06, "loss": 0.544, "step": 10697 }, { "epoch": 0.68, "grad_norm": 0.8987723588943481, "learning_rate": 2.484614544856819e-06, "loss": 0.5851, "step": 10698 }, { "epoch": 0.68, "grad_norm": 0.8522927761077881, "learning_rate": 2.483727891459935e-06, "loss": 0.5805, "step": 10699 }, { "epoch": 0.68, "grad_norm": 0.9000210762023926, "learning_rate": 2.482841344014516e-06, "loss": 0.5587, "step": 10700 }, { "epoch": 0.68, "grad_norm": 0.8868560791015625, "learning_rate": 2.4819549025578917e-06, "loss": 0.5957, "step": 10701 }, { "epoch": 0.68, "grad_norm": 0.8964491486549377, "learning_rate": 2.481068567127389e-06, "loss": 0.5865, "step": 10702 }, { "epoch": 0.68, "grad_norm": 0.8719425797462463, "learning_rate": 2.4801823377603236e-06, "loss": 0.5505, "step": 10703 }, { "epoch": 0.68, "grad_norm": 0.9729426503181458, "learning_rate": 2.4792962144940148e-06, "loss": 0.5863, "step": 10704 }, { "epoch": 0.68, "grad_norm": 0.906240701675415, "learning_rate": 2.4784101973657724e-06, "loss": 0.6275, "step": 10705 }, { "epoch": 0.68, "grad_norm": 0.8632292747497559, "learning_rate": 2.4775242864129055e-06, "loss": 0.5586, "step": 10706 }, { "epoch": 0.68, "grad_norm": 0.9146695733070374, "learning_rate": 2.4766384816727164e-06, "loss": 0.5932, "step": 10707 }, { "epoch": 0.68, "grad_norm": 0.8897523880004883, "learning_rate": 2.475752783182504e-06, "loss": 0.5804, "step": 10708 }, { "epoch": 0.68, "grad_norm": 0.870412290096283, "learning_rate": 2.4748671909795568e-06, "loss": 0.5777, "step": 10709 }, { "epoch": 0.68, "grad_norm": 0.8530032634735107, "learning_rate": 2.4739817051011717e-06, "loss": 0.5358, "step": 10710 }, { "epoch": 0.68, "grad_norm": 0.8868164420127869, "learning_rate": 2.473096325584628e-06, "loss": 0.5883, "step": 10711 }, { "epoch": 0.68, "grad_norm": 0.9638000130653381, "learning_rate": 2.4722110524672074e-06, "loss": 0.6171, "step": 10712 }, { "epoch": 0.68, "grad_norm": 0.945044755935669, "learning_rate": 2.4713258857861856e-06, "loss": 0.5875, "step": 10713 }, { "epoch": 0.68, "grad_norm": 0.8738934993743896, "learning_rate": 2.4704408255788342e-06, "loss": 0.6121, "step": 10714 }, { "epoch": 0.68, "grad_norm": 0.8028507828712463, "learning_rate": 2.4695558718824204e-06, "loss": 0.5547, "step": 10715 }, { "epoch": 0.68, "grad_norm": 0.8492550253868103, "learning_rate": 2.468671024734208e-06, "loss": 0.5684, "step": 10716 }, { "epoch": 0.68, "grad_norm": 0.875840961933136, "learning_rate": 2.4677862841714485e-06, "loss": 0.5418, "step": 10717 }, { "epoch": 0.68, "grad_norm": 0.8230100870132446, "learning_rate": 2.4669016502314038e-06, "loss": 0.5823, "step": 10718 }, { "epoch": 0.68, "grad_norm": 0.8757971525192261, "learning_rate": 2.4660171229513165e-06, "loss": 0.5821, "step": 10719 }, { "epoch": 0.68, "grad_norm": 0.8600106239318848, "learning_rate": 2.465132702368433e-06, "loss": 0.5618, "step": 10720 }, { "epoch": 0.68, "grad_norm": 0.8039467930793762, "learning_rate": 2.4642483885199938e-06, "loss": 0.5544, "step": 10721 }, { "epoch": 0.68, "grad_norm": 0.8692030906677246, "learning_rate": 2.463364181443233e-06, "loss": 0.6235, "step": 10722 }, { "epoch": 0.68, "grad_norm": 0.8942098617553711, "learning_rate": 2.4624800811753826e-06, "loss": 0.6117, "step": 10723 }, { "epoch": 0.68, "grad_norm": 0.8640264272689819, "learning_rate": 2.4615960877536706e-06, "loss": 0.5282, "step": 10724 }, { "epoch": 0.68, "grad_norm": 0.8637884259223938, "learning_rate": 2.4607122012153146e-06, "loss": 0.5566, "step": 10725 }, { "epoch": 0.68, "grad_norm": 0.8512043356895447, "learning_rate": 2.459828421597534e-06, "loss": 0.5275, "step": 10726 }, { "epoch": 0.68, "grad_norm": 0.9386641383171082, "learning_rate": 2.458944748937543e-06, "loss": 0.619, "step": 10727 }, { "epoch": 0.68, "grad_norm": 0.9329386949539185, "learning_rate": 2.4580611832725482e-06, "loss": 0.555, "step": 10728 }, { "epoch": 0.68, "grad_norm": 0.8880377411842346, "learning_rate": 2.4571777246397543e-06, "loss": 0.6331, "step": 10729 }, { "epoch": 0.68, "grad_norm": 0.9043840765953064, "learning_rate": 2.456294373076361e-06, "loss": 0.5705, "step": 10730 }, { "epoch": 0.68, "grad_norm": 0.8265879154205322, "learning_rate": 2.455411128619562e-06, "loss": 0.5232, "step": 10731 }, { "epoch": 0.68, "grad_norm": 0.9159626364707947, "learning_rate": 2.4545279913065513e-06, "loss": 0.6011, "step": 10732 }, { "epoch": 0.68, "grad_norm": 0.9764483571052551, "learning_rate": 2.4536449611745087e-06, "loss": 0.6575, "step": 10733 }, { "epoch": 0.68, "grad_norm": 0.9006572365760803, "learning_rate": 2.45276203826062e-06, "loss": 0.5808, "step": 10734 }, { "epoch": 0.68, "grad_norm": 0.8400965929031372, "learning_rate": 2.451879222602059e-06, "loss": 0.5425, "step": 10735 }, { "epoch": 0.68, "grad_norm": 1.0129814147949219, "learning_rate": 2.4509965142360013e-06, "loss": 0.6147, "step": 10736 }, { "epoch": 0.68, "grad_norm": 1.019564151763916, "learning_rate": 2.4501139131996122e-06, "loss": 0.5869, "step": 10737 }, { "epoch": 0.68, "grad_norm": 0.9409759044647217, "learning_rate": 2.4492314195300583e-06, "loss": 0.5929, "step": 10738 }, { "epoch": 0.68, "grad_norm": 0.924321711063385, "learning_rate": 2.4483490332644918e-06, "loss": 0.602, "step": 10739 }, { "epoch": 0.68, "grad_norm": 0.8794954419136047, "learning_rate": 2.4474667544400744e-06, "loss": 0.5726, "step": 10740 }, { "epoch": 0.68, "grad_norm": 0.9003675580024719, "learning_rate": 2.4465845830939504e-06, "loss": 0.6345, "step": 10741 }, { "epoch": 0.68, "grad_norm": 0.8394778370857239, "learning_rate": 2.4457025192632672e-06, "loss": 0.5132, "step": 10742 }, { "epoch": 0.68, "grad_norm": 0.8338208198547363, "learning_rate": 2.444820562985165e-06, "loss": 0.5275, "step": 10743 }, { "epoch": 0.68, "grad_norm": 0.9329877495765686, "learning_rate": 2.443938714296781e-06, "loss": 0.5879, "step": 10744 }, { "epoch": 0.68, "grad_norm": 0.8452143669128418, "learning_rate": 2.4430569732352444e-06, "loss": 0.5829, "step": 10745 }, { "epoch": 0.68, "grad_norm": 0.9036096930503845, "learning_rate": 2.4421753398376865e-06, "loss": 0.6168, "step": 10746 }, { "epoch": 0.68, "grad_norm": 0.8841857314109802, "learning_rate": 2.441293814141223e-06, "loss": 0.5878, "step": 10747 }, { "epoch": 0.68, "grad_norm": 0.8716691732406616, "learning_rate": 2.4404123961829795e-06, "loss": 0.5744, "step": 10748 }, { "epoch": 0.68, "grad_norm": 0.8817796111106873, "learning_rate": 2.4395310860000644e-06, "loss": 0.5486, "step": 10749 }, { "epoch": 0.68, "grad_norm": 0.9194137454032898, "learning_rate": 2.438649883629588e-06, "loss": 0.5834, "step": 10750 }, { "epoch": 0.68, "grad_norm": 0.9086952805519104, "learning_rate": 2.437768789108656e-06, "loss": 0.6189, "step": 10751 }, { "epoch": 0.68, "grad_norm": 0.9165956377983093, "learning_rate": 2.4368878024743638e-06, "loss": 0.5459, "step": 10752 }, { "epoch": 0.68, "grad_norm": 0.9193375110626221, "learning_rate": 2.4360069237638114e-06, "loss": 0.5939, "step": 10753 }, { "epoch": 0.68, "grad_norm": 0.9131724238395691, "learning_rate": 2.43512615301409e-06, "loss": 0.5554, "step": 10754 }, { "epoch": 0.68, "grad_norm": 0.9217658638954163, "learning_rate": 2.434245490262282e-06, "loss": 0.5851, "step": 10755 }, { "epoch": 0.68, "grad_norm": 0.8388816714286804, "learning_rate": 2.4333649355454704e-06, "loss": 0.6016, "step": 10756 }, { "epoch": 0.68, "grad_norm": 0.8548718690872192, "learning_rate": 2.4324844889007328e-06, "loss": 0.628, "step": 10757 }, { "epoch": 0.68, "grad_norm": 0.8744621276855469, "learning_rate": 2.4316041503651417e-06, "loss": 0.5213, "step": 10758 }, { "epoch": 0.68, "grad_norm": 0.9566894769668579, "learning_rate": 2.430723919975767e-06, "loss": 0.5779, "step": 10759 }, { "epoch": 0.68, "grad_norm": 0.8871926665306091, "learning_rate": 2.4298437977696658e-06, "loss": 0.6031, "step": 10760 }, { "epoch": 0.68, "grad_norm": 0.8938164114952087, "learning_rate": 2.428963783783904e-06, "loss": 0.5478, "step": 10761 }, { "epoch": 0.68, "grad_norm": 0.9035009741783142, "learning_rate": 2.4280838780555347e-06, "loss": 0.5693, "step": 10762 }, { "epoch": 0.68, "grad_norm": 0.9277425408363342, "learning_rate": 2.427204080621605e-06, "loss": 0.5697, "step": 10763 }, { "epoch": 0.68, "grad_norm": 0.8887539505958557, "learning_rate": 2.426324391519161e-06, "loss": 0.5116, "step": 10764 }, { "epoch": 0.68, "grad_norm": 0.8846824765205383, "learning_rate": 2.4254448107852434e-06, "loss": 0.6138, "step": 10765 }, { "epoch": 0.68, "grad_norm": 0.8920591473579407, "learning_rate": 2.424565338456889e-06, "loss": 0.5854, "step": 10766 }, { "epoch": 0.68, "grad_norm": 0.8790110945701599, "learning_rate": 2.4236859745711305e-06, "loss": 0.6154, "step": 10767 }, { "epoch": 0.68, "grad_norm": 0.9227504730224609, "learning_rate": 2.4228067191649917e-06, "loss": 0.5714, "step": 10768 }, { "epoch": 0.68, "grad_norm": 0.8249009847640991, "learning_rate": 2.421927572275494e-06, "loss": 0.5788, "step": 10769 }, { "epoch": 0.68, "grad_norm": 0.8537331223487854, "learning_rate": 2.4210485339396627e-06, "loss": 0.5664, "step": 10770 }, { "epoch": 0.68, "grad_norm": 0.9200884103775024, "learning_rate": 2.4201696041945033e-06, "loss": 0.6134, "step": 10771 }, { "epoch": 0.68, "grad_norm": 0.8930040001869202, "learning_rate": 2.419290783077028e-06, "loss": 0.5332, "step": 10772 }, { "epoch": 0.68, "grad_norm": 0.932697057723999, "learning_rate": 2.41841207062424e-06, "loss": 0.5486, "step": 10773 }, { "epoch": 0.68, "grad_norm": 0.9301908016204834, "learning_rate": 2.4175334668731383e-06, "loss": 0.6119, "step": 10774 }, { "epoch": 0.68, "grad_norm": 0.8337537050247192, "learning_rate": 2.416654971860721e-06, "loss": 0.5692, "step": 10775 }, { "epoch": 0.68, "grad_norm": 0.881458580493927, "learning_rate": 2.415776585623974e-06, "loss": 0.5814, "step": 10776 }, { "epoch": 0.68, "grad_norm": 0.9348959922790527, "learning_rate": 2.4148983081998834e-06, "loss": 0.5929, "step": 10777 }, { "epoch": 0.68, "grad_norm": 0.8827102184295654, "learning_rate": 2.414020139625436e-06, "loss": 0.5264, "step": 10778 }, { "epoch": 0.68, "grad_norm": 0.8925660848617554, "learning_rate": 2.413142079937602e-06, "loss": 0.6432, "step": 10779 }, { "epoch": 0.68, "grad_norm": 0.9171149134635925, "learning_rate": 2.4122641291733567e-06, "loss": 0.5957, "step": 10780 }, { "epoch": 0.68, "grad_norm": 0.9240100383758545, "learning_rate": 2.4113862873696687e-06, "loss": 0.6231, "step": 10781 }, { "epoch": 0.68, "grad_norm": 0.8504339456558228, "learning_rate": 2.410508554563495e-06, "loss": 0.5734, "step": 10782 }, { "epoch": 0.68, "grad_norm": 0.8871136903762817, "learning_rate": 2.4096309307918013e-06, "loss": 0.5962, "step": 10783 }, { "epoch": 0.68, "grad_norm": 0.9689726829528809, "learning_rate": 2.4087534160915364e-06, "loss": 0.6037, "step": 10784 }, { "epoch": 0.68, "grad_norm": 0.8371800780296326, "learning_rate": 2.407876010499651e-06, "loss": 0.6064, "step": 10785 }, { "epoch": 0.68, "grad_norm": 0.9337158799171448, "learning_rate": 2.4069987140530893e-06, "loss": 0.5893, "step": 10786 }, { "epoch": 0.68, "grad_norm": 0.8576418161392212, "learning_rate": 2.4061215267887915e-06, "loss": 0.5729, "step": 10787 }, { "epoch": 0.68, "grad_norm": 0.859890341758728, "learning_rate": 2.4052444487436925e-06, "loss": 0.5478, "step": 10788 }, { "epoch": 0.68, "grad_norm": 0.9343597292900085, "learning_rate": 2.4043674799547252e-06, "loss": 0.5488, "step": 10789 }, { "epoch": 0.68, "grad_norm": 0.9209311008453369, "learning_rate": 2.4034906204588104e-06, "loss": 0.5923, "step": 10790 }, { "epoch": 0.68, "grad_norm": 0.8784055709838867, "learning_rate": 2.4026138702928763e-06, "loss": 0.5471, "step": 10791 }, { "epoch": 0.68, "grad_norm": 0.8925771117210388, "learning_rate": 2.4017372294938347e-06, "loss": 0.6078, "step": 10792 }, { "epoch": 0.68, "grad_norm": 0.9076485633850098, "learning_rate": 2.4008606980985994e-06, "loss": 0.6088, "step": 10793 }, { "epoch": 0.68, "grad_norm": 0.9135996699333191, "learning_rate": 2.399984276144079e-06, "loss": 0.6064, "step": 10794 }, { "epoch": 0.68, "grad_norm": 0.8818286657333374, "learning_rate": 2.3991079636671755e-06, "loss": 0.5986, "step": 10795 }, { "epoch": 0.68, "grad_norm": 0.8494421243667603, "learning_rate": 2.398231760704788e-06, "loss": 0.5909, "step": 10796 }, { "epoch": 0.68, "grad_norm": 0.8972966074943542, "learning_rate": 2.397355667293812e-06, "loss": 0.5537, "step": 10797 }, { "epoch": 0.68, "grad_norm": 0.884488582611084, "learning_rate": 2.396479683471133e-06, "loss": 0.6015, "step": 10798 }, { "epoch": 0.68, "grad_norm": 0.8640215992927551, "learning_rate": 2.395603809273635e-06, "loss": 0.6178, "step": 10799 }, { "epoch": 0.68, "grad_norm": 0.9510016441345215, "learning_rate": 2.3947280447382055e-06, "loss": 0.6046, "step": 10800 }, { "epoch": 0.68, "grad_norm": 0.8891522288322449, "learning_rate": 2.3938523899017124e-06, "loss": 0.5477, "step": 10801 }, { "epoch": 0.68, "grad_norm": 0.8761363625526428, "learning_rate": 2.392976844801029e-06, "loss": 0.5773, "step": 10802 }, { "epoch": 0.68, "grad_norm": 0.8561110496520996, "learning_rate": 2.3921014094730216e-06, "loss": 0.5879, "step": 10803 }, { "epoch": 0.68, "grad_norm": 0.8632552623748779, "learning_rate": 2.3912260839545514e-06, "loss": 0.5905, "step": 10804 }, { "epoch": 0.68, "grad_norm": 0.8456622958183289, "learning_rate": 2.390350868282478e-06, "loss": 0.6465, "step": 10805 }, { "epoch": 0.68, "grad_norm": 0.9606796503067017, "learning_rate": 2.389475762493649e-06, "loss": 0.5823, "step": 10806 }, { "epoch": 0.68, "grad_norm": 0.8369455337524414, "learning_rate": 2.3886007666249124e-06, "loss": 0.5502, "step": 10807 }, { "epoch": 0.68, "grad_norm": 0.8868955373764038, "learning_rate": 2.387725880713117e-06, "loss": 0.5796, "step": 10808 }, { "epoch": 0.68, "grad_norm": 0.9382752180099487, "learning_rate": 2.3868511047950955e-06, "loss": 0.625, "step": 10809 }, { "epoch": 0.68, "grad_norm": 0.9117169380187988, "learning_rate": 2.3859764389076834e-06, "loss": 0.5422, "step": 10810 }, { "epoch": 0.68, "grad_norm": 0.8499246835708618, "learning_rate": 2.3851018830877115e-06, "loss": 0.5505, "step": 10811 }, { "epoch": 0.68, "grad_norm": 0.9279850125312805, "learning_rate": 2.3842274373719994e-06, "loss": 0.5964, "step": 10812 }, { "epoch": 0.69, "grad_norm": 0.9057535529136658, "learning_rate": 2.383353101797374e-06, "loss": 0.5497, "step": 10813 }, { "epoch": 0.69, "grad_norm": 0.8557054996490479, "learning_rate": 2.3824788764006446e-06, "loss": 0.5935, "step": 10814 }, { "epoch": 0.69, "grad_norm": 0.8106154203414917, "learning_rate": 2.3816047612186243e-06, "loss": 0.5335, "step": 10815 }, { "epoch": 0.69, "grad_norm": 0.8800660371780396, "learning_rate": 2.3807307562881188e-06, "loss": 0.5915, "step": 10816 }, { "epoch": 0.69, "grad_norm": 0.901800811290741, "learning_rate": 2.3798568616459295e-06, "loss": 0.6108, "step": 10817 }, { "epoch": 0.69, "grad_norm": 0.9201937913894653, "learning_rate": 2.378983077328853e-06, "loss": 0.563, "step": 10818 }, { "epoch": 0.69, "grad_norm": 0.8660761713981628, "learning_rate": 2.378109403373683e-06, "loss": 0.5706, "step": 10819 }, { "epoch": 0.69, "grad_norm": 0.8939432501792908, "learning_rate": 2.3772358398172013e-06, "loss": 0.6153, "step": 10820 }, { "epoch": 0.69, "grad_norm": 0.8241981863975525, "learning_rate": 2.3763623866961984e-06, "loss": 0.5213, "step": 10821 }, { "epoch": 0.69, "grad_norm": 0.9262666702270508, "learning_rate": 2.375489044047446e-06, "loss": 0.5569, "step": 10822 }, { "epoch": 0.69, "grad_norm": 0.9339314699172974, "learning_rate": 2.37461581190772e-06, "loss": 0.62, "step": 10823 }, { "epoch": 0.69, "grad_norm": 0.8499922156333923, "learning_rate": 2.37374269031379e-06, "loss": 0.568, "step": 10824 }, { "epoch": 0.69, "grad_norm": 0.8106879591941833, "learning_rate": 2.3728696793024187e-06, "loss": 0.5473, "step": 10825 }, { "epoch": 0.69, "grad_norm": 0.9187078475952148, "learning_rate": 2.371996778910366e-06, "loss": 0.5985, "step": 10826 }, { "epoch": 0.69, "grad_norm": 0.9437380433082581, "learning_rate": 2.3711239891743886e-06, "loss": 0.5821, "step": 10827 }, { "epoch": 0.69, "grad_norm": 0.8216588497161865, "learning_rate": 2.370251310131233e-06, "loss": 0.4918, "step": 10828 }, { "epoch": 0.69, "grad_norm": 0.8683214783668518, "learning_rate": 2.369378741817647e-06, "loss": 0.5689, "step": 10829 }, { "epoch": 0.69, "grad_norm": 0.8889510035514832, "learning_rate": 2.3685062842703697e-06, "loss": 0.5838, "step": 10830 }, { "epoch": 0.69, "grad_norm": 0.9381679892539978, "learning_rate": 2.3676339375261394e-06, "loss": 0.6048, "step": 10831 }, { "epoch": 0.69, "grad_norm": 0.9227031469345093, "learning_rate": 2.3667617016216885e-06, "loss": 0.5528, "step": 10832 }, { "epoch": 0.69, "grad_norm": 0.8656090497970581, "learning_rate": 2.365889576593738e-06, "loss": 0.5258, "step": 10833 }, { "epoch": 0.69, "grad_norm": 0.9459344148635864, "learning_rate": 2.365017562479016e-06, "loss": 0.5723, "step": 10834 }, { "epoch": 0.69, "grad_norm": 0.8714156150817871, "learning_rate": 2.36414565931424e-06, "loss": 0.5408, "step": 10835 }, { "epoch": 0.69, "grad_norm": 0.9365402460098267, "learning_rate": 2.3632738671361187e-06, "loss": 0.5667, "step": 10836 }, { "epoch": 0.69, "grad_norm": 0.952563464641571, "learning_rate": 2.362402185981363e-06, "loss": 0.5606, "step": 10837 }, { "epoch": 0.69, "grad_norm": 0.8872142434120178, "learning_rate": 2.3615306158866745e-06, "loss": 0.6201, "step": 10838 }, { "epoch": 0.69, "grad_norm": 0.9767509698867798, "learning_rate": 2.360659156888754e-06, "loss": 0.6305, "step": 10839 }, { "epoch": 0.69, "grad_norm": 0.8186350464820862, "learning_rate": 2.359787809024297e-06, "loss": 0.5489, "step": 10840 }, { "epoch": 0.69, "grad_norm": 0.891994297504425, "learning_rate": 2.358916572329986e-06, "loss": 0.5613, "step": 10841 }, { "epoch": 0.69, "grad_norm": 0.8956696391105652, "learning_rate": 2.3580454468425136e-06, "loss": 0.558, "step": 10842 }, { "epoch": 0.69, "grad_norm": 0.8858946561813354, "learning_rate": 2.357174432598558e-06, "loss": 0.5871, "step": 10843 }, { "epoch": 0.69, "grad_norm": 0.9084068536758423, "learning_rate": 2.356303529634791e-06, "loss": 0.6036, "step": 10844 }, { "epoch": 0.69, "grad_norm": 0.8893341422080994, "learning_rate": 2.355432737987886e-06, "loss": 0.6102, "step": 10845 }, { "epoch": 0.69, "grad_norm": 0.8843010067939758, "learning_rate": 2.3545620576945088e-06, "loss": 0.5974, "step": 10846 }, { "epoch": 0.69, "grad_norm": 0.9258445501327515, "learning_rate": 2.3536914887913203e-06, "loss": 0.5951, "step": 10847 }, { "epoch": 0.69, "grad_norm": 0.8948039412498474, "learning_rate": 2.3528210313149793e-06, "loss": 0.556, "step": 10848 }, { "epoch": 0.69, "grad_norm": 0.9238641262054443, "learning_rate": 2.351950685302134e-06, "loss": 0.6082, "step": 10849 }, { "epoch": 0.69, "grad_norm": 0.9253204464912415, "learning_rate": 2.351080450789431e-06, "loss": 0.5741, "step": 10850 }, { "epoch": 0.69, "grad_norm": 0.8795269727706909, "learning_rate": 2.3502103278135203e-06, "loss": 0.5695, "step": 10851 }, { "epoch": 0.69, "grad_norm": 0.905057430267334, "learning_rate": 2.349340316411032e-06, "loss": 0.5569, "step": 10852 }, { "epoch": 0.69, "grad_norm": 0.8453585505485535, "learning_rate": 2.3484704166186024e-06, "loss": 0.5595, "step": 10853 }, { "epoch": 0.69, "grad_norm": 0.9011979699134827, "learning_rate": 2.347600628472859e-06, "loss": 0.5623, "step": 10854 }, { "epoch": 0.69, "grad_norm": 0.874869704246521, "learning_rate": 2.3467309520104265e-06, "loss": 0.5435, "step": 10855 }, { "epoch": 0.69, "grad_norm": 0.965836763381958, "learning_rate": 2.3458613872679255e-06, "loss": 0.6046, "step": 10856 }, { "epoch": 0.69, "grad_norm": 0.8620368242263794, "learning_rate": 2.344991934281966e-06, "loss": 0.5409, "step": 10857 }, { "epoch": 0.69, "grad_norm": 0.9628636240959167, "learning_rate": 2.344122593089161e-06, "loss": 0.6049, "step": 10858 }, { "epoch": 0.69, "grad_norm": 0.8034865856170654, "learning_rate": 2.3432533637261135e-06, "loss": 0.5112, "step": 10859 }, { "epoch": 0.69, "grad_norm": 0.8890984058380127, "learning_rate": 2.3423842462294257e-06, "loss": 0.5682, "step": 10860 }, { "epoch": 0.69, "grad_norm": 0.9449944496154785, "learning_rate": 2.341515240635691e-06, "loss": 0.5434, "step": 10861 }, { "epoch": 0.69, "grad_norm": 0.9405069947242737, "learning_rate": 2.340646346981504e-06, "loss": 0.5712, "step": 10862 }, { "epoch": 0.69, "grad_norm": 0.9340393543243408, "learning_rate": 2.339777565303444e-06, "loss": 0.6118, "step": 10863 }, { "epoch": 0.69, "grad_norm": 0.8473518490791321, "learning_rate": 2.3389088956380982e-06, "loss": 0.4942, "step": 10864 }, { "epoch": 0.69, "grad_norm": 0.8445674777030945, "learning_rate": 2.338040338022044e-06, "loss": 0.548, "step": 10865 }, { "epoch": 0.69, "grad_norm": 0.9048270583152771, "learning_rate": 2.3371718924918487e-06, "loss": 0.5564, "step": 10866 }, { "epoch": 0.69, "grad_norm": 0.9229834675788879, "learning_rate": 2.3363035590840814e-06, "loss": 0.5522, "step": 10867 }, { "epoch": 0.69, "grad_norm": 0.89118891954422, "learning_rate": 2.3354353378353056e-06, "loss": 0.5978, "step": 10868 }, { "epoch": 0.69, "grad_norm": 0.8255208730697632, "learning_rate": 2.334567228782078e-06, "loss": 0.5616, "step": 10869 }, { "epoch": 0.69, "grad_norm": 0.9787054061889648, "learning_rate": 2.3336992319609534e-06, "loss": 0.6014, "step": 10870 }, { "epoch": 0.69, "grad_norm": 0.8942728638648987, "learning_rate": 2.3328313474084755e-06, "loss": 0.6, "step": 10871 }, { "epoch": 0.69, "grad_norm": 0.8750494718551636, "learning_rate": 2.3319635751611937e-06, "loss": 0.5883, "step": 10872 }, { "epoch": 0.69, "grad_norm": 0.8585361838340759, "learning_rate": 2.3310959152556453e-06, "loss": 0.6229, "step": 10873 }, { "epoch": 0.69, "grad_norm": 0.9201778173446655, "learning_rate": 2.3302283677283618e-06, "loss": 0.596, "step": 10874 }, { "epoch": 0.69, "grad_norm": 0.8352607488632202, "learning_rate": 2.3293609326158745e-06, "loss": 0.5342, "step": 10875 }, { "epoch": 0.69, "grad_norm": 0.8795650005340576, "learning_rate": 2.328493609954707e-06, "loss": 0.5886, "step": 10876 }, { "epoch": 0.69, "grad_norm": 0.9259792566299438, "learning_rate": 2.3276263997813812e-06, "loss": 0.5542, "step": 10877 }, { "epoch": 0.69, "grad_norm": 0.933414876461029, "learning_rate": 2.3267593021324127e-06, "loss": 0.6461, "step": 10878 }, { "epoch": 0.69, "grad_norm": 0.9607113599777222, "learning_rate": 2.3258923170443087e-06, "loss": 0.6609, "step": 10879 }, { "epoch": 0.69, "grad_norm": 0.8267933130264282, "learning_rate": 2.3250254445535743e-06, "loss": 0.5456, "step": 10880 }, { "epoch": 0.69, "grad_norm": 0.884596586227417, "learning_rate": 2.324158684696717e-06, "loss": 0.5046, "step": 10881 }, { "epoch": 0.69, "grad_norm": 0.8587662577629089, "learning_rate": 2.323292037510227e-06, "loss": 0.4938, "step": 10882 }, { "epoch": 0.69, "grad_norm": 0.9369049668312073, "learning_rate": 2.3224255030305977e-06, "loss": 0.5844, "step": 10883 }, { "epoch": 0.69, "grad_norm": 0.830431342124939, "learning_rate": 2.321559081294316e-06, "loss": 0.5372, "step": 10884 }, { "epoch": 0.69, "grad_norm": 0.9005808234214783, "learning_rate": 2.3206927723378638e-06, "loss": 0.5994, "step": 10885 }, { "epoch": 0.69, "grad_norm": 0.9264594912528992, "learning_rate": 2.3198265761977196e-06, "loss": 0.5876, "step": 10886 }, { "epoch": 0.69, "grad_norm": 0.871841549873352, "learning_rate": 2.3189604929103533e-06, "loss": 0.5646, "step": 10887 }, { "epoch": 0.69, "grad_norm": 0.9147515296936035, "learning_rate": 2.318094522512232e-06, "loss": 0.6012, "step": 10888 }, { "epoch": 0.69, "grad_norm": 0.8173208236694336, "learning_rate": 2.3172286650398247e-06, "loss": 0.5651, "step": 10889 }, { "epoch": 0.69, "grad_norm": 0.8764269351959229, "learning_rate": 2.3163629205295833e-06, "loss": 0.5715, "step": 10890 }, { "epoch": 0.69, "grad_norm": 0.8883751034736633, "learning_rate": 2.3154972890179638e-06, "loss": 0.563, "step": 10891 }, { "epoch": 0.69, "grad_norm": 0.941449761390686, "learning_rate": 2.3146317705414168e-06, "loss": 0.5512, "step": 10892 }, { "epoch": 0.69, "grad_norm": 0.8946614861488342, "learning_rate": 2.31376636513638e-06, "loss": 0.5787, "step": 10893 }, { "epoch": 0.69, "grad_norm": 0.8711824417114258, "learning_rate": 2.3129010728393012e-06, "loss": 0.6297, "step": 10894 }, { "epoch": 0.69, "grad_norm": 0.9191935658454895, "learning_rate": 2.3120358936866084e-06, "loss": 0.6161, "step": 10895 }, { "epoch": 0.69, "grad_norm": 0.9106520414352417, "learning_rate": 2.3111708277147333e-06, "loss": 0.5355, "step": 10896 }, { "epoch": 0.69, "grad_norm": 0.9305688142776489, "learning_rate": 2.310305874960101e-06, "loss": 0.6004, "step": 10897 }, { "epoch": 0.69, "grad_norm": 0.9202895760536194, "learning_rate": 2.3094410354591314e-06, "loss": 0.6412, "step": 10898 }, { "epoch": 0.69, "grad_norm": 0.9189572930335999, "learning_rate": 2.30857630924824e-06, "loss": 0.5783, "step": 10899 }, { "epoch": 0.69, "grad_norm": 0.8673662543296814, "learning_rate": 2.3077116963638396e-06, "loss": 0.5999, "step": 10900 }, { "epoch": 0.69, "grad_norm": 0.8311372399330139, "learning_rate": 2.3068471968423296e-06, "loss": 0.5998, "step": 10901 }, { "epoch": 0.69, "grad_norm": 0.9261046051979065, "learning_rate": 2.305982810720119e-06, "loss": 0.6344, "step": 10902 }, { "epoch": 0.69, "grad_norm": 0.8744479417800903, "learning_rate": 2.3051185380335995e-06, "loss": 0.556, "step": 10903 }, { "epoch": 0.69, "grad_norm": 0.8244556784629822, "learning_rate": 2.304254378819163e-06, "loss": 0.5574, "step": 10904 }, { "epoch": 0.69, "grad_norm": 0.8999570608139038, "learning_rate": 2.3033903331131986e-06, "loss": 0.609, "step": 10905 }, { "epoch": 0.69, "grad_norm": 0.9004625082015991, "learning_rate": 2.3025264009520833e-06, "loss": 0.596, "step": 10906 }, { "epoch": 0.69, "grad_norm": 0.8306798338890076, "learning_rate": 2.3016625823721985e-06, "loss": 0.5895, "step": 10907 }, { "epoch": 0.69, "grad_norm": 0.9338074922561646, "learning_rate": 2.300798877409918e-06, "loss": 0.6051, "step": 10908 }, { "epoch": 0.69, "grad_norm": 0.9324320554733276, "learning_rate": 2.2999352861016042e-06, "loss": 0.5728, "step": 10909 }, { "epoch": 0.69, "grad_norm": 0.8564440608024597, "learning_rate": 2.299071808483623e-06, "loss": 0.5535, "step": 10910 }, { "epoch": 0.69, "grad_norm": 0.9293292164802551, "learning_rate": 2.2982084445923327e-06, "loss": 0.625, "step": 10911 }, { "epoch": 0.69, "grad_norm": 0.8664717078208923, "learning_rate": 2.297345194464086e-06, "loss": 0.5831, "step": 10912 }, { "epoch": 0.69, "grad_norm": 0.8852226734161377, "learning_rate": 2.2964820581352325e-06, "loss": 0.6052, "step": 10913 }, { "epoch": 0.69, "grad_norm": 0.8343265056610107, "learning_rate": 2.295619035642111e-06, "loss": 0.5413, "step": 10914 }, { "epoch": 0.69, "grad_norm": 0.8824000358581543, "learning_rate": 2.294756127021066e-06, "loss": 0.5667, "step": 10915 }, { "epoch": 0.69, "grad_norm": 0.9818698763847351, "learning_rate": 2.2938933323084315e-06, "loss": 0.6607, "step": 10916 }, { "epoch": 0.69, "grad_norm": 0.8719751238822937, "learning_rate": 2.293030651540534e-06, "loss": 0.6078, "step": 10917 }, { "epoch": 0.69, "grad_norm": 0.8655606508255005, "learning_rate": 2.2921680847536976e-06, "loss": 0.5596, "step": 10918 }, { "epoch": 0.69, "grad_norm": 0.9295132160186768, "learning_rate": 2.2913056319842436e-06, "loss": 0.5886, "step": 10919 }, { "epoch": 0.69, "grad_norm": 0.8664971590042114, "learning_rate": 2.2904432932684865e-06, "loss": 0.5728, "step": 10920 }, { "epoch": 0.69, "grad_norm": 0.857593297958374, "learning_rate": 2.289581068642737e-06, "loss": 0.5657, "step": 10921 }, { "epoch": 0.69, "grad_norm": 0.9390791058540344, "learning_rate": 2.2887189581433016e-06, "loss": 0.5587, "step": 10922 }, { "epoch": 0.69, "grad_norm": 0.9358313679695129, "learning_rate": 2.287856961806475e-06, "loss": 0.6039, "step": 10923 }, { "epoch": 0.69, "grad_norm": 0.862331211566925, "learning_rate": 2.286995079668561e-06, "loss": 0.5494, "step": 10924 }, { "epoch": 0.69, "grad_norm": 0.841224193572998, "learning_rate": 2.2861333117658442e-06, "loss": 0.5516, "step": 10925 }, { "epoch": 0.69, "grad_norm": 0.8751315474510193, "learning_rate": 2.2852716581346124e-06, "loss": 0.6054, "step": 10926 }, { "epoch": 0.69, "grad_norm": 0.8290528655052185, "learning_rate": 2.2844101188111477e-06, "loss": 0.5849, "step": 10927 }, { "epoch": 0.69, "grad_norm": 0.9079095125198364, "learning_rate": 2.283548693831726e-06, "loss": 0.5642, "step": 10928 }, { "epoch": 0.69, "grad_norm": 0.9231603145599365, "learning_rate": 2.2826873832326192e-06, "loss": 0.5922, "step": 10929 }, { "epoch": 0.69, "grad_norm": 0.9110752940177917, "learning_rate": 2.2818261870500954e-06, "loss": 0.5498, "step": 10930 }, { "epoch": 0.69, "grad_norm": 0.8755868077278137, "learning_rate": 2.280965105320411e-06, "loss": 0.5596, "step": 10931 }, { "epoch": 0.69, "grad_norm": 0.8865872621536255, "learning_rate": 2.280104138079831e-06, "loss": 0.6292, "step": 10932 }, { "epoch": 0.69, "grad_norm": 0.8371679186820984, "learning_rate": 2.2792432853646023e-06, "loss": 0.5717, "step": 10933 }, { "epoch": 0.69, "grad_norm": 0.9466820359230042, "learning_rate": 2.2783825472109743e-06, "loss": 0.5489, "step": 10934 }, { "epoch": 0.69, "grad_norm": 0.8898562788963318, "learning_rate": 2.277521923655189e-06, "loss": 0.599, "step": 10935 }, { "epoch": 0.69, "grad_norm": 0.904425323009491, "learning_rate": 2.276661414733485e-06, "loss": 0.5712, "step": 10936 }, { "epoch": 0.69, "grad_norm": 0.936082661151886, "learning_rate": 2.2758010204820945e-06, "loss": 0.6113, "step": 10937 }, { "epoch": 0.69, "grad_norm": 0.8919061422348022, "learning_rate": 2.2749407409372487e-06, "loss": 0.5971, "step": 10938 }, { "epoch": 0.69, "grad_norm": 0.884014368057251, "learning_rate": 2.2740805761351664e-06, "loss": 0.5329, "step": 10939 }, { "epoch": 0.69, "grad_norm": 0.9374119639396667, "learning_rate": 2.273220526112068e-06, "loss": 0.5739, "step": 10940 }, { "epoch": 0.69, "grad_norm": 0.8741660714149475, "learning_rate": 2.272360590904168e-06, "loss": 0.5725, "step": 10941 }, { "epoch": 0.69, "grad_norm": 0.8772330284118652, "learning_rate": 2.2715007705476744e-06, "loss": 0.5734, "step": 10942 }, { "epoch": 0.69, "grad_norm": 0.9191374182701111, "learning_rate": 2.2706410650787937e-06, "loss": 0.5994, "step": 10943 }, { "epoch": 0.69, "grad_norm": 0.8320372700691223, "learning_rate": 2.2697814745337186e-06, "loss": 0.5521, "step": 10944 }, { "epoch": 0.69, "grad_norm": 0.835684597492218, "learning_rate": 2.2689219989486506e-06, "loss": 0.6022, "step": 10945 }, { "epoch": 0.69, "grad_norm": 0.9576183557510376, "learning_rate": 2.2680626383597782e-06, "loss": 0.63, "step": 10946 }, { "epoch": 0.69, "grad_norm": 0.877859354019165, "learning_rate": 2.267203392803282e-06, "loss": 0.5517, "step": 10947 }, { "epoch": 0.69, "grad_norm": 0.9066639542579651, "learning_rate": 2.266344262315345e-06, "loss": 0.6017, "step": 10948 }, { "epoch": 0.69, "grad_norm": 1.0002546310424805, "learning_rate": 2.2654852469321405e-06, "loss": 0.581, "step": 10949 }, { "epoch": 0.69, "grad_norm": 1.0141836404800415, "learning_rate": 2.26462634668984e-06, "loss": 0.6754, "step": 10950 }, { "epoch": 0.69, "grad_norm": 0.8675405979156494, "learning_rate": 2.2637675616246103e-06, "loss": 0.6055, "step": 10951 }, { "epoch": 0.69, "grad_norm": 0.8971235752105713, "learning_rate": 2.262908891772608e-06, "loss": 0.6545, "step": 10952 }, { "epoch": 0.69, "grad_norm": 0.8403980731964111, "learning_rate": 2.2620503371699886e-06, "loss": 0.5979, "step": 10953 }, { "epoch": 0.69, "grad_norm": 0.948631227016449, "learning_rate": 2.261191897852909e-06, "loss": 0.5405, "step": 10954 }, { "epoch": 0.69, "grad_norm": 0.8814859986305237, "learning_rate": 2.260333573857509e-06, "loss": 0.6071, "step": 10955 }, { "epoch": 0.69, "grad_norm": 0.8945904970169067, "learning_rate": 2.2594753652199313e-06, "loss": 0.6018, "step": 10956 }, { "epoch": 0.69, "grad_norm": 0.8850582838058472, "learning_rate": 2.2586172719763126e-06, "loss": 0.5572, "step": 10957 }, { "epoch": 0.69, "grad_norm": 0.8527590036392212, "learning_rate": 2.2577592941627842e-06, "loss": 0.5759, "step": 10958 }, { "epoch": 0.69, "grad_norm": 0.870134711265564, "learning_rate": 2.2569014318154735e-06, "loss": 0.576, "step": 10959 }, { "epoch": 0.69, "grad_norm": 1.019551396369934, "learning_rate": 2.2560436849704996e-06, "loss": 0.6245, "step": 10960 }, { "epoch": 0.69, "grad_norm": 0.9043488502502441, "learning_rate": 2.255186053663979e-06, "loss": 0.6191, "step": 10961 }, { "epoch": 0.69, "grad_norm": 0.8273271918296814, "learning_rate": 2.2543285379320283e-06, "loss": 0.5481, "step": 10962 }, { "epoch": 0.69, "grad_norm": 0.833625078201294, "learning_rate": 2.2534711378107498e-06, "loss": 0.5749, "step": 10963 }, { "epoch": 0.69, "grad_norm": 0.9222172498703003, "learning_rate": 2.2526138533362475e-06, "loss": 0.5493, "step": 10964 }, { "epoch": 0.69, "grad_norm": 0.9397459626197815, "learning_rate": 2.2517566845446182e-06, "loss": 0.5928, "step": 10965 }, { "epoch": 0.69, "grad_norm": 0.8722630143165588, "learning_rate": 2.2508996314719544e-06, "loss": 0.5606, "step": 10966 }, { "epoch": 0.69, "grad_norm": 0.870911717414856, "learning_rate": 2.250042694154345e-06, "loss": 0.5453, "step": 10967 }, { "epoch": 0.69, "grad_norm": 0.8412066698074341, "learning_rate": 2.2491858726278704e-06, "loss": 0.5291, "step": 10968 }, { "epoch": 0.69, "grad_norm": 0.8742692470550537, "learning_rate": 2.248329166928609e-06, "loss": 0.6004, "step": 10969 }, { "epoch": 0.7, "grad_norm": 0.9923862814903259, "learning_rate": 2.2474725770926337e-06, "loss": 0.552, "step": 10970 }, { "epoch": 0.7, "grad_norm": 0.9316403865814209, "learning_rate": 2.2466161031560136e-06, "loss": 0.6006, "step": 10971 }, { "epoch": 0.7, "grad_norm": 0.8640490174293518, "learning_rate": 2.2457597451548102e-06, "loss": 0.521, "step": 10972 }, { "epoch": 0.7, "grad_norm": 0.8956085443496704, "learning_rate": 2.2449035031250847e-06, "loss": 0.5854, "step": 10973 }, { "epoch": 0.7, "grad_norm": 0.8568456768989563, "learning_rate": 2.2440473771028855e-06, "loss": 0.5973, "step": 10974 }, { "epoch": 0.7, "grad_norm": 0.8945633769035339, "learning_rate": 2.2431913671242666e-06, "loss": 0.6168, "step": 10975 }, { "epoch": 0.7, "grad_norm": 0.8528943657875061, "learning_rate": 2.242335473225268e-06, "loss": 0.5569, "step": 10976 }, { "epoch": 0.7, "grad_norm": 0.8728605508804321, "learning_rate": 2.2414796954419286e-06, "loss": 0.5558, "step": 10977 }, { "epoch": 0.7, "grad_norm": 0.8296922445297241, "learning_rate": 2.2406240338102836e-06, "loss": 0.5519, "step": 10978 }, { "epoch": 0.7, "grad_norm": 0.9309175610542297, "learning_rate": 2.239768488366361e-06, "loss": 0.6021, "step": 10979 }, { "epoch": 0.7, "grad_norm": 0.8594921827316284, "learning_rate": 2.2389130591461855e-06, "loss": 0.5878, "step": 10980 }, { "epoch": 0.7, "grad_norm": 0.9349560737609863, "learning_rate": 2.2380577461857777e-06, "loss": 0.5937, "step": 10981 }, { "epoch": 0.7, "grad_norm": 0.8946079611778259, "learning_rate": 2.2372025495211465e-06, "loss": 0.6016, "step": 10982 }, { "epoch": 0.7, "grad_norm": 0.8826418519020081, "learning_rate": 2.236347469188308e-06, "loss": 0.6071, "step": 10983 }, { "epoch": 0.7, "grad_norm": 0.9132988452911377, "learning_rate": 2.2354925052232625e-06, "loss": 0.5728, "step": 10984 }, { "epoch": 0.7, "grad_norm": 0.8709650039672852, "learning_rate": 2.2346376576620103e-06, "loss": 0.5415, "step": 10985 }, { "epoch": 0.7, "grad_norm": 0.982613205909729, "learning_rate": 2.2337829265405466e-06, "loss": 0.5818, "step": 10986 }, { "epoch": 0.7, "grad_norm": 0.9030888676643372, "learning_rate": 2.2329283118948604e-06, "loss": 0.5771, "step": 10987 }, { "epoch": 0.7, "grad_norm": 0.8313351273536682, "learning_rate": 2.232073813760937e-06, "loss": 0.6115, "step": 10988 }, { "epoch": 0.7, "grad_norm": 0.8704630136489868, "learning_rate": 2.2312194321747582e-06, "loss": 0.5722, "step": 10989 }, { "epoch": 0.7, "grad_norm": 0.9125388264656067, "learning_rate": 2.230365167172296e-06, "loss": 0.5761, "step": 10990 }, { "epoch": 0.7, "grad_norm": 0.9968715906143188, "learning_rate": 2.2295110187895215e-06, "loss": 0.5553, "step": 10991 }, { "epoch": 0.7, "grad_norm": 0.8663219809532166, "learning_rate": 2.2286569870624e-06, "loss": 0.5965, "step": 10992 }, { "epoch": 0.7, "grad_norm": 0.899998664855957, "learning_rate": 2.227803072026892e-06, "loss": 0.5645, "step": 10993 }, { "epoch": 0.7, "grad_norm": 0.9184356927871704, "learning_rate": 2.226949273718953e-06, "loss": 0.6109, "step": 10994 }, { "epoch": 0.7, "grad_norm": 0.9209024906158447, "learning_rate": 2.226095592174533e-06, "loss": 0.6007, "step": 10995 }, { "epoch": 0.7, "grad_norm": 0.812882125377655, "learning_rate": 2.2252420274295782e-06, "loss": 0.5157, "step": 10996 }, { "epoch": 0.7, "grad_norm": 0.8180590271949768, "learning_rate": 2.224388579520031e-06, "loss": 0.5697, "step": 10997 }, { "epoch": 0.7, "grad_norm": 0.9130131602287292, "learning_rate": 2.2235352484818228e-06, "loss": 0.5966, "step": 10998 }, { "epoch": 0.7, "grad_norm": 0.8782884478569031, "learning_rate": 2.222682034350887e-06, "loss": 0.5733, "step": 10999 }, { "epoch": 0.7, "grad_norm": 0.912164032459259, "learning_rate": 2.221828937163149e-06, "loss": 0.5988, "step": 11000 }, { "epoch": 0.7, "grad_norm": 0.8906491994857788, "learning_rate": 2.22097595695453e-06, "loss": 0.5479, "step": 11001 }, { "epoch": 0.7, "grad_norm": 0.8602820634841919, "learning_rate": 2.220123093760946e-06, "loss": 0.5901, "step": 11002 }, { "epoch": 0.7, "grad_norm": 0.9242262244224548, "learning_rate": 2.2192703476183093e-06, "loss": 0.5587, "step": 11003 }, { "epoch": 0.7, "grad_norm": 0.919808566570282, "learning_rate": 2.2184177185625217e-06, "loss": 0.6188, "step": 11004 }, { "epoch": 0.7, "grad_norm": 0.8946382999420166, "learning_rate": 2.217565206629491e-06, "loss": 0.5634, "step": 11005 }, { "epoch": 0.7, "grad_norm": 0.8390125632286072, "learning_rate": 2.2167128118551084e-06, "loss": 0.5738, "step": 11006 }, { "epoch": 0.7, "grad_norm": 0.8909174203872681, "learning_rate": 2.2158605342752667e-06, "loss": 0.6098, "step": 11007 }, { "epoch": 0.7, "grad_norm": 0.851243257522583, "learning_rate": 2.2150083739258525e-06, "loss": 0.5296, "step": 11008 }, { "epoch": 0.7, "grad_norm": 0.866870641708374, "learning_rate": 2.214156330842748e-06, "loss": 0.5695, "step": 11009 }, { "epoch": 0.7, "grad_norm": 0.8538164496421814, "learning_rate": 2.2133044050618286e-06, "loss": 0.5897, "step": 11010 }, { "epoch": 0.7, "grad_norm": 0.8874875903129578, "learning_rate": 2.2124525966189685e-06, "loss": 0.5415, "step": 11011 }, { "epoch": 0.7, "grad_norm": 0.9058599472045898, "learning_rate": 2.211600905550029e-06, "loss": 0.5472, "step": 11012 }, { "epoch": 0.7, "grad_norm": 0.8872851729393005, "learning_rate": 2.2107493318908785e-06, "loss": 0.5927, "step": 11013 }, { "epoch": 0.7, "grad_norm": 0.8519189953804016, "learning_rate": 2.2098978756773687e-06, "loss": 0.5767, "step": 11014 }, { "epoch": 0.7, "grad_norm": 0.9283721446990967, "learning_rate": 2.2090465369453533e-06, "loss": 0.6112, "step": 11015 }, { "epoch": 0.7, "grad_norm": 0.863922655582428, "learning_rate": 2.208195315730681e-06, "loss": 0.5684, "step": 11016 }, { "epoch": 0.7, "grad_norm": 0.9413056969642639, "learning_rate": 2.207344212069189e-06, "loss": 0.6224, "step": 11017 }, { "epoch": 0.7, "grad_norm": 0.92134690284729, "learning_rate": 2.2064932259967188e-06, "loss": 0.6007, "step": 11018 }, { "epoch": 0.7, "grad_norm": 0.8620043396949768, "learning_rate": 2.2056423575491026e-06, "loss": 0.6119, "step": 11019 }, { "epoch": 0.7, "grad_norm": 0.9397025108337402, "learning_rate": 2.204791606762164e-06, "loss": 0.6156, "step": 11020 }, { "epoch": 0.7, "grad_norm": 0.8872060179710388, "learning_rate": 2.2039409736717273e-06, "loss": 0.5733, "step": 11021 }, { "epoch": 0.7, "grad_norm": 0.8693039417266846, "learning_rate": 2.2030904583136085e-06, "loss": 0.587, "step": 11022 }, { "epoch": 0.7, "grad_norm": 0.8837648034095764, "learning_rate": 2.2022400607236214e-06, "loss": 0.6031, "step": 11023 }, { "epoch": 0.7, "grad_norm": 0.848081648349762, "learning_rate": 2.2013897809375753e-06, "loss": 0.6094, "step": 11024 }, { "epoch": 0.7, "grad_norm": 0.945680558681488, "learning_rate": 2.2005396189912647e-06, "loss": 0.5531, "step": 11025 }, { "epoch": 0.7, "grad_norm": 0.8282198309898376, "learning_rate": 2.199689574920495e-06, "loss": 0.5546, "step": 11026 }, { "epoch": 0.7, "grad_norm": 0.8442526459693909, "learning_rate": 2.198839648761057e-06, "loss": 0.5722, "step": 11027 }, { "epoch": 0.7, "grad_norm": 0.8687816858291626, "learning_rate": 2.1979898405487354e-06, "loss": 0.5748, "step": 11028 }, { "epoch": 0.7, "grad_norm": 0.8658022284507751, "learning_rate": 2.197140150319314e-06, "loss": 0.5626, "step": 11029 }, { "epoch": 0.7, "grad_norm": 0.905732274055481, "learning_rate": 2.19629057810857e-06, "loss": 0.6178, "step": 11030 }, { "epoch": 0.7, "grad_norm": 0.8981587290763855, "learning_rate": 2.195441123952277e-06, "loss": 0.5984, "step": 11031 }, { "epoch": 0.7, "grad_norm": 0.91184002161026, "learning_rate": 2.1945917878862037e-06, "loss": 0.5684, "step": 11032 }, { "epoch": 0.7, "grad_norm": 0.8562777042388916, "learning_rate": 2.193742569946109e-06, "loss": 0.5981, "step": 11033 }, { "epoch": 0.7, "grad_norm": 0.842792272567749, "learning_rate": 2.1928934701677507e-06, "loss": 0.5707, "step": 11034 }, { "epoch": 0.7, "grad_norm": 0.8843762278556824, "learning_rate": 2.1920444885868862e-06, "loss": 0.5663, "step": 11035 }, { "epoch": 0.7, "grad_norm": 0.8537455201148987, "learning_rate": 2.1911956252392593e-06, "loss": 0.5627, "step": 11036 }, { "epoch": 0.7, "grad_norm": 0.9044625163078308, "learning_rate": 2.1903468801606125e-06, "loss": 0.5699, "step": 11037 }, { "epoch": 0.7, "grad_norm": 0.8944157958030701, "learning_rate": 2.1894982533866852e-06, "loss": 0.5917, "step": 11038 }, { "epoch": 0.7, "grad_norm": 0.8653507232666016, "learning_rate": 2.188649744953209e-06, "loss": 0.5735, "step": 11039 }, { "epoch": 0.7, "grad_norm": 0.9325670003890991, "learning_rate": 2.1878013548959145e-06, "loss": 0.5949, "step": 11040 }, { "epoch": 0.7, "grad_norm": 0.9095918536186218, "learning_rate": 2.186953083250519e-06, "loss": 0.632, "step": 11041 }, { "epoch": 0.7, "grad_norm": 0.8838375806808472, "learning_rate": 2.1861049300527426e-06, "loss": 0.5716, "step": 11042 }, { "epoch": 0.7, "grad_norm": 0.9739626049995422, "learning_rate": 2.1852568953383025e-06, "loss": 0.5902, "step": 11043 }, { "epoch": 0.7, "grad_norm": 0.8509230017662048, "learning_rate": 2.1844089791429002e-06, "loss": 0.5383, "step": 11044 }, { "epoch": 0.7, "grad_norm": 0.8938042521476746, "learning_rate": 2.1835611815022412e-06, "loss": 0.6017, "step": 11045 }, { "epoch": 0.7, "grad_norm": 0.8988103866577148, "learning_rate": 2.182713502452025e-06, "loss": 0.5487, "step": 11046 }, { "epoch": 0.7, "grad_norm": 0.8871778845787048, "learning_rate": 2.181865942027939e-06, "loss": 0.5941, "step": 11047 }, { "epoch": 0.7, "grad_norm": 0.9358175992965698, "learning_rate": 2.181018500265679e-06, "loss": 0.6045, "step": 11048 }, { "epoch": 0.7, "grad_norm": 0.8977616429328918, "learning_rate": 2.1801711772009203e-06, "loss": 0.5579, "step": 11049 }, { "epoch": 0.7, "grad_norm": 0.8415703177452087, "learning_rate": 2.179323972869345e-06, "loss": 0.6, "step": 11050 }, { "epoch": 0.7, "grad_norm": 0.8636377453804016, "learning_rate": 2.1784768873066243e-06, "loss": 0.5714, "step": 11051 }, { "epoch": 0.7, "grad_norm": 0.9031801223754883, "learning_rate": 2.1776299205484265e-06, "loss": 0.5426, "step": 11052 }, { "epoch": 0.7, "grad_norm": 0.9137712121009827, "learning_rate": 2.176783072630414e-06, "loss": 0.569, "step": 11053 }, { "epoch": 0.7, "grad_norm": 0.8924576044082642, "learning_rate": 2.1759363435882475e-06, "loss": 0.5756, "step": 11054 }, { "epoch": 0.7, "grad_norm": 0.8783155083656311, "learning_rate": 2.1750897334575736e-06, "loss": 0.5775, "step": 11055 }, { "epoch": 0.7, "grad_norm": 0.8635226488113403, "learning_rate": 2.174243242274047e-06, "loss": 0.5589, "step": 11056 }, { "epoch": 0.7, "grad_norm": 0.9452078938484192, "learning_rate": 2.1733968700733066e-06, "loss": 0.6036, "step": 11057 }, { "epoch": 0.7, "grad_norm": 0.9600135684013367, "learning_rate": 2.1725506168909903e-06, "loss": 0.6068, "step": 11058 }, { "epoch": 0.7, "grad_norm": 0.8594204187393188, "learning_rate": 2.1717044827627314e-06, "loss": 0.5309, "step": 11059 }, { "epoch": 0.7, "grad_norm": 0.9110593199729919, "learning_rate": 2.1708584677241586e-06, "loss": 0.6443, "step": 11060 }, { "epoch": 0.7, "grad_norm": 0.8541653156280518, "learning_rate": 2.170012571810893e-06, "loss": 0.532, "step": 11061 }, { "epoch": 0.7, "grad_norm": 0.8981989622116089, "learning_rate": 2.1691667950585552e-06, "loss": 0.5661, "step": 11062 }, { "epoch": 0.7, "grad_norm": 0.8796327710151672, "learning_rate": 2.1683211375027543e-06, "loss": 0.6167, "step": 11063 }, { "epoch": 0.7, "grad_norm": 0.8893603086471558, "learning_rate": 2.1674755991790976e-06, "loss": 0.6118, "step": 11064 }, { "epoch": 0.7, "grad_norm": 0.8742503523826599, "learning_rate": 2.1666301801231937e-06, "loss": 0.5821, "step": 11065 }, { "epoch": 0.7, "grad_norm": 0.9033846259117126, "learning_rate": 2.1657848803706344e-06, "loss": 0.589, "step": 11066 }, { "epoch": 0.7, "grad_norm": 0.8721830248832703, "learning_rate": 2.1649396999570137e-06, "loss": 0.5571, "step": 11067 }, { "epoch": 0.7, "grad_norm": 0.8391421437263489, "learning_rate": 2.1640946389179207e-06, "loss": 0.5393, "step": 11068 }, { "epoch": 0.7, "grad_norm": 0.8839281797409058, "learning_rate": 2.1632496972889366e-06, "loss": 0.5608, "step": 11069 }, { "epoch": 0.7, "grad_norm": 0.9285804033279419, "learning_rate": 2.162404875105641e-06, "loss": 0.6456, "step": 11070 }, { "epoch": 0.7, "grad_norm": 0.908049464225769, "learning_rate": 2.1615601724036033e-06, "loss": 0.5772, "step": 11071 }, { "epoch": 0.7, "grad_norm": 0.8621760010719299, "learning_rate": 2.1607155892183905e-06, "loss": 0.566, "step": 11072 }, { "epoch": 0.7, "grad_norm": 0.8190118670463562, "learning_rate": 2.1598711255855713e-06, "loss": 0.5507, "step": 11073 }, { "epoch": 0.7, "grad_norm": 0.9111891984939575, "learning_rate": 2.1590267815406968e-06, "loss": 0.5911, "step": 11074 }, { "epoch": 0.7, "grad_norm": 0.9198263883590698, "learning_rate": 2.1581825571193216e-06, "loss": 0.5695, "step": 11075 }, { "epoch": 0.7, "grad_norm": 0.863237202167511, "learning_rate": 2.1573384523569945e-06, "loss": 0.5976, "step": 11076 }, { "epoch": 0.7, "grad_norm": 0.914863109588623, "learning_rate": 2.1564944672892524e-06, "loss": 0.5644, "step": 11077 }, { "epoch": 0.7, "grad_norm": 0.8742169141769409, "learning_rate": 2.1556506019516405e-06, "loss": 0.5602, "step": 11078 }, { "epoch": 0.7, "grad_norm": 0.862916886806488, "learning_rate": 2.1548068563796855e-06, "loss": 0.5522, "step": 11079 }, { "epoch": 0.7, "grad_norm": 0.8305811882019043, "learning_rate": 2.1539632306089153e-06, "loss": 0.543, "step": 11080 }, { "epoch": 0.7, "grad_norm": 0.8726207613945007, "learning_rate": 2.153119724674853e-06, "loss": 0.5805, "step": 11081 }, { "epoch": 0.7, "grad_norm": 0.9057608246803284, "learning_rate": 2.1522763386130156e-06, "loss": 0.5893, "step": 11082 }, { "epoch": 0.7, "grad_norm": 0.8841626644134521, "learning_rate": 2.1514330724589156e-06, "loss": 0.5744, "step": 11083 }, { "epoch": 0.7, "grad_norm": 0.9211553931236267, "learning_rate": 2.1505899262480607e-06, "loss": 0.6397, "step": 11084 }, { "epoch": 0.7, "grad_norm": 1.0019750595092773, "learning_rate": 2.149746900015948e-06, "loss": 0.6107, "step": 11085 }, { "epoch": 0.7, "grad_norm": 0.8821682929992676, "learning_rate": 2.148903993798082e-06, "loss": 0.5628, "step": 11086 }, { "epoch": 0.7, "grad_norm": 0.874483048915863, "learning_rate": 2.148061207629949e-06, "loss": 0.6007, "step": 11087 }, { "epoch": 0.7, "grad_norm": 0.8956743478775024, "learning_rate": 2.1472185415470365e-06, "loss": 0.5535, "step": 11088 }, { "epoch": 0.7, "grad_norm": 0.9166892766952515, "learning_rate": 2.1463759955848277e-06, "loss": 0.551, "step": 11089 }, { "epoch": 0.7, "grad_norm": 0.8699899911880493, "learning_rate": 2.1455335697787987e-06, "loss": 0.6144, "step": 11090 }, { "epoch": 0.7, "grad_norm": 0.8954592347145081, "learning_rate": 2.1446912641644206e-06, "loss": 0.5944, "step": 11091 }, { "epoch": 0.7, "grad_norm": 0.8515070080757141, "learning_rate": 2.1438490787771634e-06, "loss": 0.5716, "step": 11092 }, { "epoch": 0.7, "grad_norm": 0.8956817388534546, "learning_rate": 2.1430070136524826e-06, "loss": 0.555, "step": 11093 }, { "epoch": 0.7, "grad_norm": 0.9041964411735535, "learning_rate": 2.1421650688258384e-06, "loss": 0.5484, "step": 11094 }, { "epoch": 0.7, "grad_norm": 0.8969672322273254, "learning_rate": 2.1413232443326813e-06, "loss": 0.6186, "step": 11095 }, { "epoch": 0.7, "grad_norm": 0.9423562288284302, "learning_rate": 2.140481540208458e-06, "loss": 0.6008, "step": 11096 }, { "epoch": 0.7, "grad_norm": 0.8758630156517029, "learning_rate": 2.1396399564886113e-06, "loss": 0.5685, "step": 11097 }, { "epoch": 0.7, "grad_norm": 0.8483191728591919, "learning_rate": 2.1387984932085714e-06, "loss": 0.5524, "step": 11098 }, { "epoch": 0.7, "grad_norm": 0.9557432532310486, "learning_rate": 2.1379571504037754e-06, "loss": 0.6017, "step": 11099 }, { "epoch": 0.7, "grad_norm": 0.9311283230781555, "learning_rate": 2.1371159281096497e-06, "loss": 0.5575, "step": 11100 }, { "epoch": 0.7, "grad_norm": 0.8768463134765625, "learning_rate": 2.1362748263616112e-06, "loss": 0.6389, "step": 11101 }, { "epoch": 0.7, "grad_norm": 0.926892876625061, "learning_rate": 2.1354338451950774e-06, "loss": 0.5177, "step": 11102 }, { "epoch": 0.7, "grad_norm": 0.8700109124183655, "learning_rate": 2.1345929846454593e-06, "loss": 0.5717, "step": 11103 }, { "epoch": 0.7, "grad_norm": 0.9436931014060974, "learning_rate": 2.133752244748163e-06, "loss": 0.5981, "step": 11104 }, { "epoch": 0.7, "grad_norm": 0.8545697927474976, "learning_rate": 2.1329116255385902e-06, "loss": 0.559, "step": 11105 }, { "epoch": 0.7, "grad_norm": 1.0063272714614868, "learning_rate": 2.132071127052131e-06, "loss": 0.6422, "step": 11106 }, { "epoch": 0.7, "grad_norm": 0.8825348019599915, "learning_rate": 2.1312307493241825e-06, "loss": 0.5503, "step": 11107 }, { "epoch": 0.7, "grad_norm": 0.837097704410553, "learning_rate": 2.1303904923901288e-06, "loss": 0.5861, "step": 11108 }, { "epoch": 0.7, "grad_norm": 0.8669401407241821, "learning_rate": 2.1295503562853466e-06, "loss": 0.5819, "step": 11109 }, { "epoch": 0.7, "grad_norm": 0.9110631346702576, "learning_rate": 2.1287103410452135e-06, "loss": 0.5713, "step": 11110 }, { "epoch": 0.7, "grad_norm": 0.8955477476119995, "learning_rate": 2.1278704467050996e-06, "loss": 0.6162, "step": 11111 }, { "epoch": 0.7, "grad_norm": 0.8396604061126709, "learning_rate": 2.1270306733003697e-06, "loss": 0.5518, "step": 11112 }, { "epoch": 0.7, "grad_norm": 0.9126778841018677, "learning_rate": 2.126191020866386e-06, "loss": 0.6153, "step": 11113 }, { "epoch": 0.7, "grad_norm": 0.9138805866241455, "learning_rate": 2.125351489438499e-06, "loss": 0.5754, "step": 11114 }, { "epoch": 0.7, "grad_norm": 0.8429421782493591, "learning_rate": 2.124512079052059e-06, "loss": 0.5369, "step": 11115 }, { "epoch": 0.7, "grad_norm": 0.8413889408111572, "learning_rate": 2.123672789742416e-06, "loss": 0.5845, "step": 11116 }, { "epoch": 0.7, "grad_norm": 0.9169177412986755, "learning_rate": 2.1228336215449036e-06, "loss": 0.5887, "step": 11117 }, { "epoch": 0.7, "grad_norm": 0.8493983745574951, "learning_rate": 2.1219945744948584e-06, "loss": 0.5727, "step": 11118 }, { "epoch": 0.7, "grad_norm": 0.8498938083648682, "learning_rate": 2.12115564862761e-06, "loss": 0.5381, "step": 11119 }, { "epoch": 0.7, "grad_norm": 0.8709191083908081, "learning_rate": 2.1203168439784828e-06, "loss": 0.5768, "step": 11120 }, { "epoch": 0.7, "grad_norm": 0.8937205076217651, "learning_rate": 2.119478160582797e-06, "loss": 0.6019, "step": 11121 }, { "epoch": 0.7, "grad_norm": 0.8882603645324707, "learning_rate": 2.1186395984758633e-06, "loss": 0.5199, "step": 11122 }, { "epoch": 0.7, "grad_norm": 0.8753896951675415, "learning_rate": 2.117801157692993e-06, "loss": 0.5835, "step": 11123 }, { "epoch": 0.7, "grad_norm": 0.92037034034729, "learning_rate": 2.1169628382694894e-06, "loss": 0.5877, "step": 11124 }, { "epoch": 0.7, "grad_norm": 0.8579007983207703, "learning_rate": 2.1161246402406518e-06, "loss": 0.5617, "step": 11125 }, { "epoch": 0.7, "grad_norm": 0.8872489929199219, "learning_rate": 2.1152865636417723e-06, "loss": 0.5598, "step": 11126 }, { "epoch": 0.7, "grad_norm": 0.9075922966003418, "learning_rate": 2.114448608508143e-06, "loss": 0.5544, "step": 11127 }, { "epoch": 0.71, "grad_norm": 0.8997741937637329, "learning_rate": 2.113610774875041e-06, "loss": 0.5516, "step": 11128 }, { "epoch": 0.71, "grad_norm": 0.8664461970329285, "learning_rate": 2.1127730627777497e-06, "loss": 0.5554, "step": 11129 }, { "epoch": 0.71, "grad_norm": 0.9150891900062561, "learning_rate": 2.111935472251543e-06, "loss": 0.5794, "step": 11130 }, { "epoch": 0.71, "grad_norm": 0.9180150628089905, "learning_rate": 2.1110980033316846e-06, "loss": 0.5755, "step": 11131 }, { "epoch": 0.71, "grad_norm": 0.9424551129341125, "learning_rate": 2.1102606560534393e-06, "loss": 0.6034, "step": 11132 }, { "epoch": 0.71, "grad_norm": 0.893530011177063, "learning_rate": 2.1094234304520655e-06, "loss": 0.6, "step": 11133 }, { "epoch": 0.71, "grad_norm": 0.9302678108215332, "learning_rate": 2.108586326562816e-06, "loss": 0.6037, "step": 11134 }, { "epoch": 0.71, "grad_norm": 0.9196210503578186, "learning_rate": 2.1077493444209385e-06, "loss": 0.6183, "step": 11135 }, { "epoch": 0.71, "grad_norm": 0.9748576879501343, "learning_rate": 2.1069124840616717e-06, "loss": 0.5905, "step": 11136 }, { "epoch": 0.71, "grad_norm": 0.8754902482032776, "learning_rate": 2.1060757455202574e-06, "loss": 0.5659, "step": 11137 }, { "epoch": 0.71, "grad_norm": 0.9038988947868347, "learning_rate": 2.1052391288319285e-06, "loss": 0.6012, "step": 11138 }, { "epoch": 0.71, "grad_norm": 0.9287976026535034, "learning_rate": 2.1044026340319075e-06, "loss": 0.5397, "step": 11139 }, { "epoch": 0.71, "grad_norm": 0.9258267879486084, "learning_rate": 2.1035662611554187e-06, "loss": 0.586, "step": 11140 }, { "epoch": 0.71, "grad_norm": 0.9034359455108643, "learning_rate": 2.1027300102376787e-06, "loss": 0.5757, "step": 11141 }, { "epoch": 0.71, "grad_norm": 0.8997130393981934, "learning_rate": 2.101893881313899e-06, "loss": 0.5749, "step": 11142 }, { "epoch": 0.71, "grad_norm": 0.8986077308654785, "learning_rate": 2.1010578744192885e-06, "loss": 0.6169, "step": 11143 }, { "epoch": 0.71, "grad_norm": 0.9187172055244446, "learning_rate": 2.1002219895890435e-06, "loss": 0.6212, "step": 11144 }, { "epoch": 0.71, "grad_norm": 0.8708627223968506, "learning_rate": 2.099386226858362e-06, "loss": 0.5887, "step": 11145 }, { "epoch": 0.71, "grad_norm": 0.8534128665924072, "learning_rate": 2.098550586262439e-06, "loss": 0.597, "step": 11146 }, { "epoch": 0.71, "grad_norm": 0.8883056044578552, "learning_rate": 2.097715067836456e-06, "loss": 0.5597, "step": 11147 }, { "epoch": 0.71, "grad_norm": 0.9250147938728333, "learning_rate": 2.096879671615595e-06, "loss": 0.6114, "step": 11148 }, { "epoch": 0.71, "grad_norm": 0.9054756164550781, "learning_rate": 2.0960443976350315e-06, "loss": 0.6134, "step": 11149 }, { "epoch": 0.71, "grad_norm": 0.9181949496269226, "learning_rate": 2.0952092459299366e-06, "loss": 0.6393, "step": 11150 }, { "epoch": 0.71, "grad_norm": 0.8578399419784546, "learning_rate": 2.0943742165354776e-06, "loss": 0.5453, "step": 11151 }, { "epoch": 0.71, "grad_norm": 0.8718449473381042, "learning_rate": 2.0935393094868094e-06, "loss": 0.5702, "step": 11152 }, { "epoch": 0.71, "grad_norm": 0.8640325665473938, "learning_rate": 2.092704524819089e-06, "loss": 0.529, "step": 11153 }, { "epoch": 0.71, "grad_norm": 0.878528892993927, "learning_rate": 2.091869862567471e-06, "loss": 0.6061, "step": 11154 }, { "epoch": 0.71, "grad_norm": 0.9092130064964294, "learning_rate": 2.091035322767095e-06, "loss": 0.5999, "step": 11155 }, { "epoch": 0.71, "grad_norm": 0.8933854699134827, "learning_rate": 2.0902009054531013e-06, "loss": 0.6148, "step": 11156 }, { "epoch": 0.71, "grad_norm": 0.9362192153930664, "learning_rate": 2.089366610660627e-06, "loss": 0.6049, "step": 11157 }, { "epoch": 0.71, "grad_norm": 0.8995941281318665, "learning_rate": 2.0885324384247956e-06, "loss": 0.5718, "step": 11158 }, { "epoch": 0.71, "grad_norm": 0.8591166734695435, "learning_rate": 2.087698388780739e-06, "loss": 0.5622, "step": 11159 }, { "epoch": 0.71, "grad_norm": 0.8993247151374817, "learning_rate": 2.0868644617635697e-06, "loss": 0.5731, "step": 11160 }, { "epoch": 0.71, "grad_norm": 0.9018330574035645, "learning_rate": 2.0860306574084043e-06, "loss": 0.5593, "step": 11161 }, { "epoch": 0.71, "grad_norm": 0.889884889125824, "learning_rate": 2.085196975750351e-06, "loss": 0.5715, "step": 11162 }, { "epoch": 0.71, "grad_norm": 0.8837161660194397, "learning_rate": 2.084363416824513e-06, "loss": 0.5673, "step": 11163 }, { "epoch": 0.71, "grad_norm": 0.825923502445221, "learning_rate": 2.0835299806659885e-06, "loss": 0.5608, "step": 11164 }, { "epoch": 0.71, "grad_norm": 0.9410537481307983, "learning_rate": 2.0826966673098737e-06, "loss": 0.6276, "step": 11165 }, { "epoch": 0.71, "grad_norm": 0.8919404149055481, "learning_rate": 2.0818634767912495e-06, "loss": 0.6048, "step": 11166 }, { "epoch": 0.71, "grad_norm": 0.9027935266494751, "learning_rate": 2.081030409145206e-06, "loss": 0.5912, "step": 11167 }, { "epoch": 0.71, "grad_norm": 0.9064032435417175, "learning_rate": 2.080197464406816e-06, "loss": 0.6004, "step": 11168 }, { "epoch": 0.71, "grad_norm": 0.8723354935646057, "learning_rate": 2.0793646426111536e-06, "loss": 0.5449, "step": 11169 }, { "epoch": 0.71, "grad_norm": 0.9101514220237732, "learning_rate": 2.078531943793288e-06, "loss": 0.605, "step": 11170 }, { "epoch": 0.71, "grad_norm": 0.9378718137741089, "learning_rate": 2.0776993679882752e-06, "loss": 0.5605, "step": 11171 }, { "epoch": 0.71, "grad_norm": 0.8340771198272705, "learning_rate": 2.076866915231178e-06, "loss": 0.5802, "step": 11172 }, { "epoch": 0.71, "grad_norm": 0.9529073238372803, "learning_rate": 2.076034585557048e-06, "loss": 0.5803, "step": 11173 }, { "epoch": 0.71, "grad_norm": 0.9297928214073181, "learning_rate": 2.075202379000928e-06, "loss": 0.5573, "step": 11174 }, { "epoch": 0.71, "grad_norm": 0.8736124038696289, "learning_rate": 2.074370295597861e-06, "loss": 0.5904, "step": 11175 }, { "epoch": 0.71, "grad_norm": 0.947452962398529, "learning_rate": 2.0735383353828843e-06, "loss": 0.6098, "step": 11176 }, { "epoch": 0.71, "grad_norm": 0.8607105612754822, "learning_rate": 2.0727064983910266e-06, "loss": 0.5983, "step": 11177 }, { "epoch": 0.71, "grad_norm": 0.9094382524490356, "learning_rate": 2.071874784657318e-06, "loss": 0.6039, "step": 11178 }, { "epoch": 0.71, "grad_norm": 0.868175745010376, "learning_rate": 2.0710431942167713e-06, "loss": 0.5535, "step": 11179 }, { "epoch": 0.71, "grad_norm": 0.9550389647483826, "learning_rate": 2.070211727104409e-06, "loss": 0.5303, "step": 11180 }, { "epoch": 0.71, "grad_norm": 0.939507246017456, "learning_rate": 2.0693803833552407e-06, "loss": 0.5674, "step": 11181 }, { "epoch": 0.71, "grad_norm": 0.8654747009277344, "learning_rate": 2.0685491630042677e-06, "loss": 0.5973, "step": 11182 }, { "epoch": 0.71, "grad_norm": 0.8690040111541748, "learning_rate": 2.0677180660864916e-06, "loss": 0.5844, "step": 11183 }, { "epoch": 0.71, "grad_norm": 0.9559879302978516, "learning_rate": 2.0668870926369068e-06, "loss": 0.5674, "step": 11184 }, { "epoch": 0.71, "grad_norm": 0.8681148290634155, "learning_rate": 2.066056242690503e-06, "loss": 0.6035, "step": 11185 }, { "epoch": 0.71, "grad_norm": 0.8854528665542603, "learning_rate": 2.0652255162822665e-06, "loss": 0.5315, "step": 11186 }, { "epoch": 0.71, "grad_norm": 0.866400957107544, "learning_rate": 2.0643949134471726e-06, "loss": 0.5466, "step": 11187 }, { "epoch": 0.71, "grad_norm": 0.8909302949905396, "learning_rate": 2.0635644342201942e-06, "loss": 0.5889, "step": 11188 }, { "epoch": 0.71, "grad_norm": 0.884699821472168, "learning_rate": 2.0627340786363063e-06, "loss": 0.6031, "step": 11189 }, { "epoch": 0.71, "grad_norm": 0.930429995059967, "learning_rate": 2.0619038467304663e-06, "loss": 0.5815, "step": 11190 }, { "epoch": 0.71, "grad_norm": 0.8738210201263428, "learning_rate": 2.061073738537635e-06, "loss": 0.57, "step": 11191 }, { "epoch": 0.71, "grad_norm": 0.8566862344741821, "learning_rate": 2.0602437540927644e-06, "loss": 0.564, "step": 11192 }, { "epoch": 0.71, "grad_norm": 0.9492089152336121, "learning_rate": 2.0594138934308027e-06, "loss": 0.6218, "step": 11193 }, { "epoch": 0.71, "grad_norm": 0.929764986038208, "learning_rate": 2.058584156586692e-06, "loss": 0.6217, "step": 11194 }, { "epoch": 0.71, "grad_norm": 0.9115621447563171, "learning_rate": 2.0577545435953727e-06, "loss": 0.5181, "step": 11195 }, { "epoch": 0.71, "grad_norm": 0.8706603646278381, "learning_rate": 2.05692505449177e-06, "loss": 0.578, "step": 11196 }, { "epoch": 0.71, "grad_norm": 0.958949863910675, "learning_rate": 2.0560956893108188e-06, "loss": 0.6523, "step": 11197 }, { "epoch": 0.71, "grad_norm": 0.888208270072937, "learning_rate": 2.0552664480874353e-06, "loss": 0.5606, "step": 11198 }, { "epoch": 0.71, "grad_norm": 1.0087759494781494, "learning_rate": 2.0544373308565374e-06, "loss": 0.6203, "step": 11199 }, { "epoch": 0.71, "grad_norm": 0.8741673827171326, "learning_rate": 2.0536083376530368e-06, "loss": 0.5617, "step": 11200 }, { "epoch": 0.71, "grad_norm": 0.86680668592453, "learning_rate": 2.0527794685118397e-06, "loss": 0.5924, "step": 11201 }, { "epoch": 0.71, "grad_norm": 0.8506894111633301, "learning_rate": 2.0519507234678464e-06, "loss": 0.6104, "step": 11202 }, { "epoch": 0.71, "grad_norm": 0.8975198864936829, "learning_rate": 2.051122102555954e-06, "loss": 0.5931, "step": 11203 }, { "epoch": 0.71, "grad_norm": 0.9341747164726257, "learning_rate": 2.0502936058110502e-06, "loss": 0.6311, "step": 11204 }, { "epoch": 0.71, "grad_norm": 0.8767626285552979, "learning_rate": 2.049465233268021e-06, "loss": 0.5843, "step": 11205 }, { "epoch": 0.71, "grad_norm": 0.8813466429710388, "learning_rate": 2.0486369849617467e-06, "loss": 0.5672, "step": 11206 }, { "epoch": 0.71, "grad_norm": 1.044753909111023, "learning_rate": 2.0478088609271018e-06, "loss": 0.5594, "step": 11207 }, { "epoch": 0.71, "grad_norm": 0.9174667000770569, "learning_rate": 2.0469808611989583e-06, "loss": 0.6105, "step": 11208 }, { "epoch": 0.71, "grad_norm": 0.9028404951095581, "learning_rate": 2.0461529858121737e-06, "loss": 0.5766, "step": 11209 }, { "epoch": 0.71, "grad_norm": 0.9191893339157104, "learning_rate": 2.0453252348016133e-06, "loss": 0.592, "step": 11210 }, { "epoch": 0.71, "grad_norm": 0.9252839088439941, "learning_rate": 2.04449760820213e-06, "loss": 0.5725, "step": 11211 }, { "epoch": 0.71, "grad_norm": 0.8581644296646118, "learning_rate": 2.04367010604857e-06, "loss": 0.5989, "step": 11212 }, { "epoch": 0.71, "grad_norm": 0.8583932518959045, "learning_rate": 2.042842728375777e-06, "loss": 0.5921, "step": 11213 }, { "epoch": 0.71, "grad_norm": 0.8592872023582458, "learning_rate": 2.0420154752185896e-06, "loss": 0.5688, "step": 11214 }, { "epoch": 0.71, "grad_norm": 0.8804487586021423, "learning_rate": 2.0411883466118406e-06, "loss": 0.6106, "step": 11215 }, { "epoch": 0.71, "grad_norm": 0.8709439039230347, "learning_rate": 2.0403613425903584e-06, "loss": 0.5829, "step": 11216 }, { "epoch": 0.71, "grad_norm": 0.9017482995986938, "learning_rate": 2.0395344631889636e-06, "loss": 0.5525, "step": 11217 }, { "epoch": 0.71, "grad_norm": 0.8507391810417175, "learning_rate": 2.038707708442471e-06, "loss": 0.6201, "step": 11218 }, { "epoch": 0.71, "grad_norm": 0.9192302227020264, "learning_rate": 2.0378810783856996e-06, "loss": 0.5915, "step": 11219 }, { "epoch": 0.71, "grad_norm": 0.8469076156616211, "learning_rate": 2.0370545730534493e-06, "loss": 0.5402, "step": 11220 }, { "epoch": 0.71, "grad_norm": 0.9404274225234985, "learning_rate": 2.0362281924805238e-06, "loss": 0.5853, "step": 11221 }, { "epoch": 0.71, "grad_norm": 0.8778280019760132, "learning_rate": 2.035401936701719e-06, "loss": 0.5735, "step": 11222 }, { "epoch": 0.71, "grad_norm": 0.8963788151741028, "learning_rate": 2.034575805751825e-06, "loss": 0.5796, "step": 11223 }, { "epoch": 0.71, "grad_norm": 0.8657545447349548, "learning_rate": 2.0337497996656303e-06, "loss": 0.5865, "step": 11224 }, { "epoch": 0.71, "grad_norm": 0.8633813261985779, "learning_rate": 2.03292391847791e-06, "loss": 0.568, "step": 11225 }, { "epoch": 0.71, "grad_norm": 1.0021978616714478, "learning_rate": 2.032098162223441e-06, "loss": 0.5911, "step": 11226 }, { "epoch": 0.71, "grad_norm": 0.8717944622039795, "learning_rate": 2.031272530936997e-06, "loss": 0.5538, "step": 11227 }, { "epoch": 0.71, "grad_norm": 0.8620628118515015, "learning_rate": 2.0304470246533377e-06, "loss": 0.583, "step": 11228 }, { "epoch": 0.71, "grad_norm": 0.8687443733215332, "learning_rate": 2.0296216434072237e-06, "loss": 0.5445, "step": 11229 }, { "epoch": 0.71, "grad_norm": 0.9198354482650757, "learning_rate": 2.0287963872334093e-06, "loss": 0.5932, "step": 11230 }, { "epoch": 0.71, "grad_norm": 0.83836430311203, "learning_rate": 2.0279712561666425e-06, "loss": 0.5579, "step": 11231 }, { "epoch": 0.71, "grad_norm": 0.8580030202865601, "learning_rate": 2.0271462502416694e-06, "loss": 0.5878, "step": 11232 }, { "epoch": 0.71, "grad_norm": 0.8928140997886658, "learning_rate": 2.0263213694932238e-06, "loss": 0.517, "step": 11233 }, { "epoch": 0.71, "grad_norm": 0.8412920832633972, "learning_rate": 2.0254966139560404e-06, "loss": 0.5409, "step": 11234 }, { "epoch": 0.71, "grad_norm": 0.9490091800689697, "learning_rate": 2.0246719836648476e-06, "loss": 0.5876, "step": 11235 }, { "epoch": 0.71, "grad_norm": 0.9710505604743958, "learning_rate": 2.0238474786543673e-06, "loss": 0.5687, "step": 11236 }, { "epoch": 0.71, "grad_norm": 0.8756168484687805, "learning_rate": 2.0230230989593157e-06, "loss": 0.5713, "step": 11237 }, { "epoch": 0.71, "grad_norm": 0.9873241782188416, "learning_rate": 2.0221988446144076e-06, "loss": 0.6178, "step": 11238 }, { "epoch": 0.71, "grad_norm": 0.9938790798187256, "learning_rate": 2.0213747156543432e-06, "loss": 0.565, "step": 11239 }, { "epoch": 0.71, "grad_norm": 0.8901024460792542, "learning_rate": 2.0205507121138316e-06, "loss": 0.565, "step": 11240 }, { "epoch": 0.71, "grad_norm": 0.923038899898529, "learning_rate": 2.019726834027563e-06, "loss": 0.5797, "step": 11241 }, { "epoch": 0.71, "grad_norm": 0.925470769405365, "learning_rate": 2.0189030814302295e-06, "loss": 0.5736, "step": 11242 }, { "epoch": 0.71, "grad_norm": 0.9232540130615234, "learning_rate": 2.018079454356517e-06, "loss": 0.5231, "step": 11243 }, { "epoch": 0.71, "grad_norm": 0.9192769527435303, "learning_rate": 2.017255952841105e-06, "loss": 0.593, "step": 11244 }, { "epoch": 0.71, "grad_norm": 0.9208205938339233, "learning_rate": 2.016432576918669e-06, "loss": 0.5463, "step": 11245 }, { "epoch": 0.71, "grad_norm": 0.8803871870040894, "learning_rate": 2.0156093266238795e-06, "loss": 0.5155, "step": 11246 }, { "epoch": 0.71, "grad_norm": 0.9068865776062012, "learning_rate": 2.014786201991396e-06, "loss": 0.613, "step": 11247 }, { "epoch": 0.71, "grad_norm": 0.9430435299873352, "learning_rate": 2.0139632030558844e-06, "loss": 0.6024, "step": 11248 }, { "epoch": 0.71, "grad_norm": 0.8339919447898865, "learning_rate": 2.0131403298519927e-06, "loss": 0.5304, "step": 11249 }, { "epoch": 0.71, "grad_norm": 0.9149238467216492, "learning_rate": 2.012317582414371e-06, "loss": 0.5962, "step": 11250 }, { "epoch": 0.71, "grad_norm": 0.8416147828102112, "learning_rate": 2.011494960777663e-06, "loss": 0.5355, "step": 11251 }, { "epoch": 0.71, "grad_norm": 0.8826677203178406, "learning_rate": 2.0106724649765055e-06, "loss": 0.6031, "step": 11252 }, { "epoch": 0.71, "grad_norm": 0.9082532525062561, "learning_rate": 2.0098500950455313e-06, "loss": 0.6503, "step": 11253 }, { "epoch": 0.71, "grad_norm": 0.924066960811615, "learning_rate": 2.00902785101937e-06, "loss": 0.5527, "step": 11254 }, { "epoch": 0.71, "grad_norm": 0.8147642016410828, "learning_rate": 2.008205732932639e-06, "loss": 0.5123, "step": 11255 }, { "epoch": 0.71, "grad_norm": 0.8650907278060913, "learning_rate": 2.0073837408199566e-06, "loss": 0.5695, "step": 11256 }, { "epoch": 0.71, "grad_norm": 0.9047468900680542, "learning_rate": 2.0065618747159342e-06, "loss": 0.556, "step": 11257 }, { "epoch": 0.71, "grad_norm": 0.8059144020080566, "learning_rate": 2.0057401346551785e-06, "loss": 0.5433, "step": 11258 }, { "epoch": 0.71, "grad_norm": 0.8873769044876099, "learning_rate": 2.004918520672289e-06, "loss": 0.6051, "step": 11259 }, { "epoch": 0.71, "grad_norm": 0.8762856125831604, "learning_rate": 2.0040970328018618e-06, "loss": 0.6158, "step": 11260 }, { "epoch": 0.71, "grad_norm": 0.8870139718055725, "learning_rate": 2.0032756710784864e-06, "loss": 0.5862, "step": 11261 }, { "epoch": 0.71, "grad_norm": 0.9140628576278687, "learning_rate": 2.0024544355367494e-06, "loss": 0.5758, "step": 11262 }, { "epoch": 0.71, "grad_norm": 0.9125930666923523, "learning_rate": 2.001633326211227e-06, "loss": 0.5277, "step": 11263 }, { "epoch": 0.71, "grad_norm": 0.8611398339271545, "learning_rate": 2.000812343136494e-06, "loss": 0.5982, "step": 11264 }, { "epoch": 0.71, "grad_norm": 0.9212353229522705, "learning_rate": 1.99999148634712e-06, "loss": 0.5773, "step": 11265 }, { "epoch": 0.71, "grad_norm": 0.8876082897186279, "learning_rate": 1.9991707558776686e-06, "loss": 0.5718, "step": 11266 }, { "epoch": 0.71, "grad_norm": 0.9632240533828735, "learning_rate": 1.9983501517626976e-06, "loss": 0.654, "step": 11267 }, { "epoch": 0.71, "grad_norm": 0.8377987742424011, "learning_rate": 1.997529674036761e-06, "loss": 0.5421, "step": 11268 }, { "epoch": 0.71, "grad_norm": 0.8990334272384644, "learning_rate": 1.9967093227344013e-06, "loss": 0.6147, "step": 11269 }, { "epoch": 0.71, "grad_norm": 0.8912368416786194, "learning_rate": 1.9958890978901685e-06, "loss": 0.5817, "step": 11270 }, { "epoch": 0.71, "grad_norm": 0.9113243818283081, "learning_rate": 1.9950689995385936e-06, "loss": 0.5697, "step": 11271 }, { "epoch": 0.71, "grad_norm": 0.9373201727867126, "learning_rate": 1.994249027714209e-06, "loss": 0.6274, "step": 11272 }, { "epoch": 0.71, "grad_norm": 0.8835095167160034, "learning_rate": 1.9934291824515423e-06, "loss": 0.57, "step": 11273 }, { "epoch": 0.71, "grad_norm": 0.9121303558349609, "learning_rate": 1.9926094637851135e-06, "loss": 0.5945, "step": 11274 }, { "epoch": 0.71, "grad_norm": 0.8844984769821167, "learning_rate": 1.9917898717494377e-06, "loss": 0.5921, "step": 11275 }, { "epoch": 0.71, "grad_norm": 0.8488909006118774, "learning_rate": 1.990970406379028e-06, "loss": 0.5555, "step": 11276 }, { "epoch": 0.71, "grad_norm": 0.9113077521324158, "learning_rate": 1.990151067708383e-06, "loss": 0.6299, "step": 11277 }, { "epoch": 0.71, "grad_norm": 0.9047096967697144, "learning_rate": 1.9893318557720093e-06, "loss": 0.5794, "step": 11278 }, { "epoch": 0.71, "grad_norm": 0.8998475670814514, "learning_rate": 1.9885127706043966e-06, "loss": 0.5783, "step": 11279 }, { "epoch": 0.71, "grad_norm": 0.9234053492546082, "learning_rate": 1.9876938122400348e-06, "loss": 0.5779, "step": 11280 }, { "epoch": 0.71, "grad_norm": 0.9886752367019653, "learning_rate": 1.9868749807134087e-06, "loss": 0.5843, "step": 11281 }, { "epoch": 0.71, "grad_norm": 0.870291531085968, "learning_rate": 1.9860562760589926e-06, "loss": 0.5362, "step": 11282 }, { "epoch": 0.71, "grad_norm": 0.9425791501998901, "learning_rate": 1.9852376983112632e-06, "loss": 0.6392, "step": 11283 }, { "epoch": 0.71, "grad_norm": 0.8813480734825134, "learning_rate": 1.9844192475046885e-06, "loss": 0.5949, "step": 11284 }, { "epoch": 0.71, "grad_norm": 0.8936353921890259, "learning_rate": 1.983600923673727e-06, "loss": 0.5256, "step": 11285 }, { "epoch": 0.72, "grad_norm": 0.87742018699646, "learning_rate": 1.9827827268528378e-06, "loss": 0.544, "step": 11286 }, { "epoch": 0.72, "grad_norm": 0.9305248260498047, "learning_rate": 1.9819646570764712e-06, "loss": 0.5996, "step": 11287 }, { "epoch": 0.72, "grad_norm": 0.9481449723243713, "learning_rate": 1.981146714379074e-06, "loss": 0.617, "step": 11288 }, { "epoch": 0.72, "grad_norm": 0.8779386281967163, "learning_rate": 1.980328898795089e-06, "loss": 0.5538, "step": 11289 }, { "epoch": 0.72, "grad_norm": 0.9480637311935425, "learning_rate": 1.979511210358946e-06, "loss": 0.571, "step": 11290 }, { "epoch": 0.72, "grad_norm": 0.8718064427375793, "learning_rate": 1.9786936491050803e-06, "loss": 0.5632, "step": 11291 }, { "epoch": 0.72, "grad_norm": 0.8864429593086243, "learning_rate": 1.9778762150679155e-06, "loss": 0.5402, "step": 11292 }, { "epoch": 0.72, "grad_norm": 0.861854076385498, "learning_rate": 1.9770589082818694e-06, "loss": 0.5858, "step": 11293 }, { "epoch": 0.72, "grad_norm": 0.8690637350082397, "learning_rate": 1.9762417287813557e-06, "loss": 0.5755, "step": 11294 }, { "epoch": 0.72, "grad_norm": 0.9482481479644775, "learning_rate": 1.9754246766007847e-06, "loss": 0.5812, "step": 11295 }, { "epoch": 0.72, "grad_norm": 0.8821942806243896, "learning_rate": 1.9746077517745582e-06, "loss": 0.5189, "step": 11296 }, { "epoch": 0.72, "grad_norm": 0.8461850881576538, "learning_rate": 1.9737909543370764e-06, "loss": 0.5615, "step": 11297 }, { "epoch": 0.72, "grad_norm": 0.8924559950828552, "learning_rate": 1.972974284322729e-06, "loss": 0.5045, "step": 11298 }, { "epoch": 0.72, "grad_norm": 0.8439890742301941, "learning_rate": 1.9721577417659023e-06, "loss": 0.5655, "step": 11299 }, { "epoch": 0.72, "grad_norm": 0.8429501056671143, "learning_rate": 1.9713413267009827e-06, "loss": 0.5588, "step": 11300 }, { "epoch": 0.72, "grad_norm": 0.8670548796653748, "learning_rate": 1.970525039162343e-06, "loss": 0.6076, "step": 11301 }, { "epoch": 0.72, "grad_norm": 0.8741304874420166, "learning_rate": 1.969708879184355e-06, "loss": 0.6139, "step": 11302 }, { "epoch": 0.72, "grad_norm": 0.8507691025733948, "learning_rate": 1.9688928468013846e-06, "loss": 0.5363, "step": 11303 }, { "epoch": 0.72, "grad_norm": 0.8928526043891907, "learning_rate": 1.968076942047791e-06, "loss": 0.5258, "step": 11304 }, { "epoch": 0.72, "grad_norm": 0.9080408215522766, "learning_rate": 1.9672611649579332e-06, "loss": 0.5913, "step": 11305 }, { "epoch": 0.72, "grad_norm": 0.8880747556686401, "learning_rate": 1.966445515566155e-06, "loss": 0.5521, "step": 11306 }, { "epoch": 0.72, "grad_norm": 0.9175702929496765, "learning_rate": 1.965629993906802e-06, "loss": 0.5995, "step": 11307 }, { "epoch": 0.72, "grad_norm": 0.8689432144165039, "learning_rate": 1.9648146000142173e-06, "loss": 0.5661, "step": 11308 }, { "epoch": 0.72, "grad_norm": 0.868963897228241, "learning_rate": 1.963999333922729e-06, "loss": 0.52, "step": 11309 }, { "epoch": 0.72, "grad_norm": 0.8676355481147766, "learning_rate": 1.963184195666668e-06, "loss": 0.576, "step": 11310 }, { "epoch": 0.72, "grad_norm": 0.8827881217002869, "learning_rate": 1.9623691852803577e-06, "loss": 0.5648, "step": 11311 }, { "epoch": 0.72, "grad_norm": 0.8609069585800171, "learning_rate": 1.9615543027981105e-06, "loss": 0.5785, "step": 11312 }, { "epoch": 0.72, "grad_norm": 0.8777223229408264, "learning_rate": 1.9607395482542446e-06, "loss": 0.5395, "step": 11313 }, { "epoch": 0.72, "grad_norm": 0.9304616451263428, "learning_rate": 1.9599249216830624e-06, "loss": 0.6124, "step": 11314 }, { "epoch": 0.72, "grad_norm": 0.945838987827301, "learning_rate": 1.9591104231188656e-06, "loss": 0.5889, "step": 11315 }, { "epoch": 0.72, "grad_norm": 0.8509537577629089, "learning_rate": 1.958296052595951e-06, "loss": 0.558, "step": 11316 }, { "epoch": 0.72, "grad_norm": 0.9135622978210449, "learning_rate": 1.9574818101486075e-06, "loss": 0.5564, "step": 11317 }, { "epoch": 0.72, "grad_norm": 0.9122533202171326, "learning_rate": 1.9566676958111214e-06, "loss": 0.5926, "step": 11318 }, { "epoch": 0.72, "grad_norm": 0.9296271800994873, "learning_rate": 1.955853709617773e-06, "loss": 0.597, "step": 11319 }, { "epoch": 0.72, "grad_norm": 0.8591296672821045, "learning_rate": 1.955039851602832e-06, "loss": 0.6263, "step": 11320 }, { "epoch": 0.72, "grad_norm": 0.9196903705596924, "learning_rate": 1.9542261218005737e-06, "loss": 0.5828, "step": 11321 }, { "epoch": 0.72, "grad_norm": 0.9328646659851074, "learning_rate": 1.9534125202452557e-06, "loss": 0.5962, "step": 11322 }, { "epoch": 0.72, "grad_norm": 0.9008827209472656, "learning_rate": 1.952599046971139e-06, "loss": 0.6416, "step": 11323 }, { "epoch": 0.72, "grad_norm": 0.9060094952583313, "learning_rate": 1.951785702012475e-06, "loss": 0.5831, "step": 11324 }, { "epoch": 0.72, "grad_norm": 0.8941810727119446, "learning_rate": 1.9509724854035105e-06, "loss": 0.5775, "step": 11325 }, { "epoch": 0.72, "grad_norm": 0.9267244935035706, "learning_rate": 1.950159397178488e-06, "loss": 0.6576, "step": 11326 }, { "epoch": 0.72, "grad_norm": 0.908926784992218, "learning_rate": 1.9493464373716458e-06, "loss": 0.5861, "step": 11327 }, { "epoch": 0.72, "grad_norm": 0.8545692563056946, "learning_rate": 1.9485336060172106e-06, "loss": 0.5916, "step": 11328 }, { "epoch": 0.72, "grad_norm": 0.9397866725921631, "learning_rate": 1.9477209031494104e-06, "loss": 0.5883, "step": 11329 }, { "epoch": 0.72, "grad_norm": 0.8065406680107117, "learning_rate": 1.9469083288024647e-06, "loss": 0.5156, "step": 11330 }, { "epoch": 0.72, "grad_norm": 0.8719428181648254, "learning_rate": 1.9460958830105882e-06, "loss": 0.61, "step": 11331 }, { "epoch": 0.72, "grad_norm": 0.9234678149223328, "learning_rate": 1.9452835658079905e-06, "loss": 0.5471, "step": 11332 }, { "epoch": 0.72, "grad_norm": 0.941314160823822, "learning_rate": 1.9444713772288747e-06, "loss": 0.6378, "step": 11333 }, { "epoch": 0.72, "grad_norm": 0.9694302082061768, "learning_rate": 1.94365931730744e-06, "loss": 0.6152, "step": 11334 }, { "epoch": 0.72, "grad_norm": 0.9199761152267456, "learning_rate": 1.9428473860778817e-06, "loss": 0.5797, "step": 11335 }, { "epoch": 0.72, "grad_norm": 0.8677429556846619, "learning_rate": 1.9420355835743826e-06, "loss": 0.6189, "step": 11336 }, { "epoch": 0.72, "grad_norm": 0.866114616394043, "learning_rate": 1.941223909831125e-06, "loss": 0.5872, "step": 11337 }, { "epoch": 0.72, "grad_norm": 1.0049126148223877, "learning_rate": 1.9404123648822924e-06, "loss": 0.6329, "step": 11338 }, { "epoch": 0.72, "grad_norm": 0.9035833477973938, "learning_rate": 1.9396009487620494e-06, "loss": 0.5539, "step": 11339 }, { "epoch": 0.72, "grad_norm": 0.9090478420257568, "learning_rate": 1.9387896615045636e-06, "loss": 0.5676, "step": 11340 }, { "epoch": 0.72, "grad_norm": 0.9102824330329895, "learning_rate": 1.9379785031439985e-06, "loss": 0.5586, "step": 11341 }, { "epoch": 0.72, "grad_norm": 0.8439232707023621, "learning_rate": 1.9371674737145023e-06, "loss": 0.5545, "step": 11342 }, { "epoch": 0.72, "grad_norm": 0.8065714836120605, "learning_rate": 1.936356573250233e-06, "loss": 0.5784, "step": 11343 }, { "epoch": 0.72, "grad_norm": 0.8955844044685364, "learning_rate": 1.935545801785329e-06, "loss": 0.5508, "step": 11344 }, { "epoch": 0.72, "grad_norm": 0.8924664258956909, "learning_rate": 1.934735159353931e-06, "loss": 0.5664, "step": 11345 }, { "epoch": 0.72, "grad_norm": 0.8322812914848328, "learning_rate": 1.9339246459901715e-06, "loss": 0.5858, "step": 11346 }, { "epoch": 0.72, "grad_norm": 0.8678402304649353, "learning_rate": 1.93311426172818e-06, "loss": 0.5856, "step": 11347 }, { "epoch": 0.72, "grad_norm": 0.8565698266029358, "learning_rate": 1.9323040066020774e-06, "loss": 0.6008, "step": 11348 }, { "epoch": 0.72, "grad_norm": 0.8522049188613892, "learning_rate": 1.931493880645983e-06, "loss": 0.5971, "step": 11349 }, { "epoch": 0.72, "grad_norm": 0.8686321973800659, "learning_rate": 1.9306838838940035e-06, "loss": 0.552, "step": 11350 }, { "epoch": 0.72, "grad_norm": 0.9111335873603821, "learning_rate": 1.9298740163802523e-06, "loss": 0.597, "step": 11351 }, { "epoch": 0.72, "grad_norm": 0.8459984064102173, "learning_rate": 1.929064278138823e-06, "loss": 0.5981, "step": 11352 }, { "epoch": 0.72, "grad_norm": 0.9223425388336182, "learning_rate": 1.928254669203815e-06, "loss": 0.6072, "step": 11353 }, { "epoch": 0.72, "grad_norm": 0.9331545233726501, "learning_rate": 1.9274451896093164e-06, "loss": 0.6259, "step": 11354 }, { "epoch": 0.72, "grad_norm": 0.8875550627708435, "learning_rate": 1.926635839389413e-06, "loss": 0.5238, "step": 11355 }, { "epoch": 0.72, "grad_norm": 0.8523957133293152, "learning_rate": 1.925826618578182e-06, "loss": 0.5549, "step": 11356 }, { "epoch": 0.72, "grad_norm": 0.8597106337547302, "learning_rate": 1.9250175272097003e-06, "loss": 0.5347, "step": 11357 }, { "epoch": 0.72, "grad_norm": 0.865592360496521, "learning_rate": 1.9242085653180314e-06, "loss": 0.5577, "step": 11358 }, { "epoch": 0.72, "grad_norm": 0.9318245649337769, "learning_rate": 1.9233997329372402e-06, "loss": 0.6011, "step": 11359 }, { "epoch": 0.72, "grad_norm": 0.9316973090171814, "learning_rate": 1.9225910301013834e-06, "loss": 0.5626, "step": 11360 }, { "epoch": 0.72, "grad_norm": 0.9204529523849487, "learning_rate": 1.9217824568445125e-06, "loss": 0.6112, "step": 11361 }, { "epoch": 0.72, "grad_norm": 0.8521873950958252, "learning_rate": 1.920974013200676e-06, "loss": 0.5743, "step": 11362 }, { "epoch": 0.72, "grad_norm": 0.8950269222259521, "learning_rate": 1.9201656992039092e-06, "loss": 0.5561, "step": 11363 }, { "epoch": 0.72, "grad_norm": 0.9938862323760986, "learning_rate": 1.9193575148882526e-06, "loss": 0.6297, "step": 11364 }, { "epoch": 0.72, "grad_norm": 0.9481446743011475, "learning_rate": 1.918549460287736e-06, "loss": 0.6234, "step": 11365 }, { "epoch": 0.72, "grad_norm": 0.8465852737426758, "learning_rate": 1.9177415354363802e-06, "loss": 0.5314, "step": 11366 }, { "epoch": 0.72, "grad_norm": 0.8947675824165344, "learning_rate": 1.916933740368206e-06, "loss": 0.5545, "step": 11367 }, { "epoch": 0.72, "grad_norm": 0.8321127891540527, "learning_rate": 1.916126075117227e-06, "loss": 0.5376, "step": 11368 }, { "epoch": 0.72, "grad_norm": 0.8513221740722656, "learning_rate": 1.9153185397174506e-06, "loss": 0.5966, "step": 11369 }, { "epoch": 0.72, "grad_norm": 0.8549067974090576, "learning_rate": 1.9145111342028817e-06, "loss": 0.5757, "step": 11370 }, { "epoch": 0.72, "grad_norm": 0.9566909074783325, "learning_rate": 1.9137038586075117e-06, "loss": 0.5956, "step": 11371 }, { "epoch": 0.72, "grad_norm": 0.8595585823059082, "learning_rate": 1.9128967129653375e-06, "loss": 0.5711, "step": 11372 }, { "epoch": 0.72, "grad_norm": 0.8773391842842102, "learning_rate": 1.9120896973103453e-06, "loss": 0.6042, "step": 11373 }, { "epoch": 0.72, "grad_norm": 0.970403790473938, "learning_rate": 1.911282811676512e-06, "loss": 0.6186, "step": 11374 }, { "epoch": 0.72, "grad_norm": 0.838644802570343, "learning_rate": 1.9104760560978147e-06, "loss": 0.5634, "step": 11375 }, { "epoch": 0.72, "grad_norm": 0.8876333236694336, "learning_rate": 1.909669430608223e-06, "loss": 0.543, "step": 11376 }, { "epoch": 0.72, "grad_norm": 0.9171946048736572, "learning_rate": 1.908862935241701e-06, "loss": 0.5908, "step": 11377 }, { "epoch": 0.72, "grad_norm": 0.8265011310577393, "learning_rate": 1.9080565700322095e-06, "loss": 0.5305, "step": 11378 }, { "epoch": 0.72, "grad_norm": 0.9624162912368774, "learning_rate": 1.9072503350136979e-06, "loss": 0.5419, "step": 11379 }, { "epoch": 0.72, "grad_norm": 0.8564184308052063, "learning_rate": 1.9064442302201136e-06, "loss": 0.5412, "step": 11380 }, { "epoch": 0.72, "grad_norm": 0.9162154793739319, "learning_rate": 1.9056382556854053e-06, "loss": 0.598, "step": 11381 }, { "epoch": 0.72, "grad_norm": 0.8687850832939148, "learning_rate": 1.9048324114435036e-06, "loss": 0.5839, "step": 11382 }, { "epoch": 0.72, "grad_norm": 0.8435238003730774, "learning_rate": 1.9040266975283417e-06, "loss": 0.5566, "step": 11383 }, { "epoch": 0.72, "grad_norm": 0.8652970790863037, "learning_rate": 1.9032211139738455e-06, "loss": 0.5781, "step": 11384 }, { "epoch": 0.72, "grad_norm": 0.863194465637207, "learning_rate": 1.902415660813935e-06, "loss": 0.6132, "step": 11385 }, { "epoch": 0.72, "grad_norm": 0.9415022134780884, "learning_rate": 1.9016103380825274e-06, "loss": 0.5613, "step": 11386 }, { "epoch": 0.72, "grad_norm": 0.9320086240768433, "learning_rate": 1.900805145813528e-06, "loss": 0.5869, "step": 11387 }, { "epoch": 0.72, "grad_norm": 0.8881116509437561, "learning_rate": 1.9000000840408421e-06, "loss": 0.5784, "step": 11388 }, { "epoch": 0.72, "grad_norm": 0.8864371180534363, "learning_rate": 1.8991951527983694e-06, "loss": 0.5878, "step": 11389 }, { "epoch": 0.72, "grad_norm": 0.922127902507782, "learning_rate": 1.8983903521200015e-06, "loss": 0.5998, "step": 11390 }, { "epoch": 0.72, "grad_norm": 0.8979513049125671, "learning_rate": 1.8975856820396265e-06, "loss": 0.5647, "step": 11391 }, { "epoch": 0.72, "grad_norm": 0.8566264510154724, "learning_rate": 1.8967811425911275e-06, "loss": 0.6127, "step": 11392 }, { "epoch": 0.72, "grad_norm": 0.9450397491455078, "learning_rate": 1.8959767338083758e-06, "loss": 0.5713, "step": 11393 }, { "epoch": 0.72, "grad_norm": 0.8804638385772705, "learning_rate": 1.8951724557252472e-06, "loss": 0.5315, "step": 11394 }, { "epoch": 0.72, "grad_norm": 0.8379377722740173, "learning_rate": 1.8943683083756075e-06, "loss": 0.5763, "step": 11395 }, { "epoch": 0.72, "grad_norm": 0.9465924501419067, "learning_rate": 1.8935642917933128e-06, "loss": 0.6547, "step": 11396 }, { "epoch": 0.72, "grad_norm": 0.9012244343757629, "learning_rate": 1.8927604060122196e-06, "loss": 0.5796, "step": 11397 }, { "epoch": 0.72, "grad_norm": 0.9090456962585449, "learning_rate": 1.8919566510661758e-06, "loss": 0.5763, "step": 11398 }, { "epoch": 0.72, "grad_norm": 0.8996036648750305, "learning_rate": 1.891153026989026e-06, "loss": 0.5953, "step": 11399 }, { "epoch": 0.72, "grad_norm": 0.8748338222503662, "learning_rate": 1.8903495338146089e-06, "loss": 0.5869, "step": 11400 }, { "epoch": 0.72, "grad_norm": 0.8042425513267517, "learning_rate": 1.8895461715767517e-06, "loss": 0.5118, "step": 11401 }, { "epoch": 0.72, "grad_norm": 0.8664458990097046, "learning_rate": 1.888742940309286e-06, "loss": 0.5208, "step": 11402 }, { "epoch": 0.72, "grad_norm": 0.9896268248558044, "learning_rate": 1.8879398400460342e-06, "loss": 0.6079, "step": 11403 }, { "epoch": 0.72, "grad_norm": 0.849636435508728, "learning_rate": 1.8871368708208076e-06, "loss": 0.5507, "step": 11404 }, { "epoch": 0.72, "grad_norm": 0.8900498151779175, "learning_rate": 1.8863340326674184e-06, "loss": 0.6023, "step": 11405 }, { "epoch": 0.72, "grad_norm": 0.8602756261825562, "learning_rate": 1.8855313256196722e-06, "loss": 0.6087, "step": 11406 }, { "epoch": 0.72, "grad_norm": 0.8602705001831055, "learning_rate": 1.8847287497113664e-06, "loss": 0.5247, "step": 11407 }, { "epoch": 0.72, "grad_norm": 0.879084587097168, "learning_rate": 1.883926304976298e-06, "loss": 0.544, "step": 11408 }, { "epoch": 0.72, "grad_norm": 0.8612745404243469, "learning_rate": 1.8831239914482512e-06, "loss": 0.5575, "step": 11409 }, { "epoch": 0.72, "grad_norm": 0.8964210152626038, "learning_rate": 1.8823218091610085e-06, "loss": 0.6014, "step": 11410 }, { "epoch": 0.72, "grad_norm": 0.891295313835144, "learning_rate": 1.8815197581483523e-06, "loss": 0.5591, "step": 11411 }, { "epoch": 0.72, "grad_norm": 0.9350022673606873, "learning_rate": 1.880717838444049e-06, "loss": 0.5922, "step": 11412 }, { "epoch": 0.72, "grad_norm": 0.8329875469207764, "learning_rate": 1.879916050081866e-06, "loss": 0.5386, "step": 11413 }, { "epoch": 0.72, "grad_norm": 0.8741490244865417, "learning_rate": 1.8791143930955641e-06, "loss": 0.5588, "step": 11414 }, { "epoch": 0.72, "grad_norm": 0.8966600894927979, "learning_rate": 1.8783128675188988e-06, "loss": 0.6352, "step": 11415 }, { "epoch": 0.72, "grad_norm": 0.8930423855781555, "learning_rate": 1.8775114733856203e-06, "loss": 0.5551, "step": 11416 }, { "epoch": 0.72, "grad_norm": 0.835688591003418, "learning_rate": 1.87671021072947e-06, "loss": 0.5729, "step": 11417 }, { "epoch": 0.72, "grad_norm": 0.9322239756584167, "learning_rate": 1.8759090795841856e-06, "loss": 0.5968, "step": 11418 }, { "epoch": 0.72, "grad_norm": 0.9086197018623352, "learning_rate": 1.8751080799835059e-06, "loss": 0.5331, "step": 11419 }, { "epoch": 0.72, "grad_norm": 0.8828703165054321, "learning_rate": 1.8743072119611522e-06, "loss": 0.5389, "step": 11420 }, { "epoch": 0.72, "grad_norm": 0.9224802255630493, "learning_rate": 1.873506475550848e-06, "loss": 0.6192, "step": 11421 }, { "epoch": 0.72, "grad_norm": 0.8025329113006592, "learning_rate": 1.8727058707863121e-06, "loss": 0.5028, "step": 11422 }, { "epoch": 0.72, "grad_norm": 0.8815546631813049, "learning_rate": 1.871905397701249e-06, "loss": 0.6344, "step": 11423 }, { "epoch": 0.72, "grad_norm": 0.9033147692680359, "learning_rate": 1.8711050563293714e-06, "loss": 0.5781, "step": 11424 }, { "epoch": 0.72, "grad_norm": 0.8625471591949463, "learning_rate": 1.8703048467043732e-06, "loss": 0.5251, "step": 11425 }, { "epoch": 0.72, "grad_norm": 0.9039772152900696, "learning_rate": 1.869504768859951e-06, "loss": 0.5467, "step": 11426 }, { "epoch": 0.72, "grad_norm": 0.9913069605827332, "learning_rate": 1.8687048228297928e-06, "loss": 0.6187, "step": 11427 }, { "epoch": 0.72, "grad_norm": 0.9143205881118774, "learning_rate": 1.8679050086475814e-06, "loss": 0.5619, "step": 11428 }, { "epoch": 0.72, "grad_norm": 0.9044589400291443, "learning_rate": 1.867105326346994e-06, "loss": 0.6366, "step": 11429 }, { "epoch": 0.72, "grad_norm": 0.8686836361885071, "learning_rate": 1.8663057759617048e-06, "loss": 0.5955, "step": 11430 }, { "epoch": 0.72, "grad_norm": 0.9669235348701477, "learning_rate": 1.8655063575253746e-06, "loss": 0.5954, "step": 11431 }, { "epoch": 0.72, "grad_norm": 0.9471785426139832, "learning_rate": 1.8647070710716709e-06, "loss": 0.6246, "step": 11432 }, { "epoch": 0.72, "grad_norm": 0.8249446153640747, "learning_rate": 1.8639079166342438e-06, "loss": 0.5258, "step": 11433 }, { "epoch": 0.72, "grad_norm": 0.8744306564331055, "learning_rate": 1.8631088942467452e-06, "loss": 0.5818, "step": 11434 }, { "epoch": 0.72, "grad_norm": 0.8946027159690857, "learning_rate": 1.8623100039428194e-06, "loss": 0.5504, "step": 11435 }, { "epoch": 0.72, "grad_norm": 0.8908700346946716, "learning_rate": 1.8615112457561013e-06, "loss": 0.582, "step": 11436 }, { "epoch": 0.72, "grad_norm": 0.86359703540802, "learning_rate": 1.860712619720228e-06, "loss": 0.6233, "step": 11437 }, { "epoch": 0.72, "grad_norm": 0.92805016040802, "learning_rate": 1.8599141258688274e-06, "loss": 0.5893, "step": 11438 }, { "epoch": 0.72, "grad_norm": 0.8947566151618958, "learning_rate": 1.8591157642355179e-06, "loss": 0.5218, "step": 11439 }, { "epoch": 0.72, "grad_norm": 0.9130182266235352, "learning_rate": 1.8583175348539173e-06, "loss": 0.5615, "step": 11440 }, { "epoch": 0.72, "grad_norm": 0.8366416096687317, "learning_rate": 1.8575194377576355e-06, "loss": 0.5131, "step": 11441 }, { "epoch": 0.72, "grad_norm": 0.8980015516281128, "learning_rate": 1.856721472980279e-06, "loss": 0.5609, "step": 11442 }, { "epoch": 0.72, "grad_norm": 0.8970168232917786, "learning_rate": 1.855923640555448e-06, "loss": 0.5611, "step": 11443 }, { "epoch": 0.73, "grad_norm": 0.8903645873069763, "learning_rate": 1.8551259405167315e-06, "loss": 0.5523, "step": 11444 }, { "epoch": 0.73, "grad_norm": 0.8101871609687805, "learning_rate": 1.8543283728977234e-06, "loss": 0.5412, "step": 11445 }, { "epoch": 0.73, "grad_norm": 0.8400049209594727, "learning_rate": 1.8535309377320059e-06, "loss": 0.5745, "step": 11446 }, { "epoch": 0.73, "grad_norm": 0.8555065393447876, "learning_rate": 1.8527336350531532e-06, "loss": 0.5557, "step": 11447 }, { "epoch": 0.73, "grad_norm": 0.858265221118927, "learning_rate": 1.851936464894739e-06, "loss": 0.5744, "step": 11448 }, { "epoch": 0.73, "grad_norm": 0.925983190536499, "learning_rate": 1.8511394272903287e-06, "loss": 0.6146, "step": 11449 }, { "epoch": 0.73, "grad_norm": 0.9145652651786804, "learning_rate": 1.8503425222734834e-06, "loss": 0.5427, "step": 11450 }, { "epoch": 0.73, "grad_norm": 0.8962170481681824, "learning_rate": 1.8495457498777585e-06, "loss": 0.6176, "step": 11451 }, { "epoch": 0.73, "grad_norm": 0.8576472997665405, "learning_rate": 1.8487491101367016e-06, "loss": 0.5613, "step": 11452 }, { "epoch": 0.73, "grad_norm": 0.9138413667678833, "learning_rate": 1.8479526030838552e-06, "loss": 0.6351, "step": 11453 }, { "epoch": 0.73, "grad_norm": 0.8952200412750244, "learning_rate": 1.8471562287527627e-06, "loss": 0.5977, "step": 11454 }, { "epoch": 0.73, "grad_norm": 0.9184353351593018, "learning_rate": 1.8463599871769516e-06, "loss": 0.6327, "step": 11455 }, { "epoch": 0.73, "grad_norm": 0.9197295904159546, "learning_rate": 1.8455638783899515e-06, "loss": 0.6019, "step": 11456 }, { "epoch": 0.73, "grad_norm": 0.8394375443458557, "learning_rate": 1.8447679024252825e-06, "loss": 0.5873, "step": 11457 }, { "epoch": 0.73, "grad_norm": 0.8649691939353943, "learning_rate": 1.8439720593164606e-06, "loss": 0.5927, "step": 11458 }, { "epoch": 0.73, "grad_norm": 0.8406863808631897, "learning_rate": 1.8431763490969968e-06, "loss": 0.5791, "step": 11459 }, { "epoch": 0.73, "grad_norm": 0.8677908182144165, "learning_rate": 1.8423807718003967e-06, "loss": 0.5523, "step": 11460 }, { "epoch": 0.73, "grad_norm": 0.8684352040290833, "learning_rate": 1.8415853274601541e-06, "loss": 0.5395, "step": 11461 }, { "epoch": 0.73, "grad_norm": 0.9893013834953308, "learning_rate": 1.8407900161097698e-06, "loss": 0.5907, "step": 11462 }, { "epoch": 0.73, "grad_norm": 0.9031257033348083, "learning_rate": 1.839994837782726e-06, "loss": 0.576, "step": 11463 }, { "epoch": 0.73, "grad_norm": 0.8636795282363892, "learning_rate": 1.8391997925125066e-06, "loss": 0.6069, "step": 11464 }, { "epoch": 0.73, "grad_norm": 0.8956241607666016, "learning_rate": 1.8384048803325887e-06, "loss": 0.5795, "step": 11465 }, { "epoch": 0.73, "grad_norm": 0.857373058795929, "learning_rate": 1.8376101012764424e-06, "loss": 0.5387, "step": 11466 }, { "epoch": 0.73, "grad_norm": 0.875593364238739, "learning_rate": 1.8368154553775342e-06, "loss": 0.5665, "step": 11467 }, { "epoch": 0.73, "grad_norm": 0.90203857421875, "learning_rate": 1.8360209426693242e-06, "loss": 0.5414, "step": 11468 }, { "epoch": 0.73, "grad_norm": 0.8598183393478394, "learning_rate": 1.8352265631852645e-06, "loss": 0.5466, "step": 11469 }, { "epoch": 0.73, "grad_norm": 0.9762682914733887, "learning_rate": 1.8344323169588045e-06, "loss": 0.5858, "step": 11470 }, { "epoch": 0.73, "grad_norm": 0.8744803071022034, "learning_rate": 1.8336382040233874e-06, "loss": 0.5745, "step": 11471 }, { "epoch": 0.73, "grad_norm": 0.8582815527915955, "learning_rate": 1.8328442244124506e-06, "loss": 0.5103, "step": 11472 }, { "epoch": 0.73, "grad_norm": 0.8855782747268677, "learning_rate": 1.8320503781594273e-06, "loss": 0.624, "step": 11473 }, { "epoch": 0.73, "grad_norm": 0.8730263710021973, "learning_rate": 1.8312566652977393e-06, "loss": 0.5789, "step": 11474 }, { "epoch": 0.73, "grad_norm": 0.926342248916626, "learning_rate": 1.8304630858608107e-06, "loss": 0.6076, "step": 11475 }, { "epoch": 0.73, "grad_norm": 0.872226357460022, "learning_rate": 1.8296696398820579e-06, "loss": 0.5964, "step": 11476 }, { "epoch": 0.73, "grad_norm": 0.9411280751228333, "learning_rate": 1.828876327394886e-06, "loss": 0.579, "step": 11477 }, { "epoch": 0.73, "grad_norm": 0.8571912050247192, "learning_rate": 1.8280831484327006e-06, "loss": 0.5736, "step": 11478 }, { "epoch": 0.73, "grad_norm": 0.8927587866783142, "learning_rate": 1.8272901030288991e-06, "loss": 0.6098, "step": 11479 }, { "epoch": 0.73, "grad_norm": 0.845928966999054, "learning_rate": 1.8264971912168744e-06, "loss": 0.5546, "step": 11480 }, { "epoch": 0.73, "grad_norm": 0.8704530000686646, "learning_rate": 1.825704413030015e-06, "loss": 0.5941, "step": 11481 }, { "epoch": 0.73, "grad_norm": 0.9911707639694214, "learning_rate": 1.8249117685016983e-06, "loss": 0.5893, "step": 11482 }, { "epoch": 0.73, "grad_norm": 0.9240842461585999, "learning_rate": 1.8241192576653e-06, "loss": 0.6098, "step": 11483 }, { "epoch": 0.73, "grad_norm": 1.0558674335479736, "learning_rate": 1.8233268805541953e-06, "loss": 0.5122, "step": 11484 }, { "epoch": 0.73, "grad_norm": 0.8903986215591431, "learning_rate": 1.8225346372017432e-06, "loss": 0.5622, "step": 11485 }, { "epoch": 0.73, "grad_norm": 0.9144458174705505, "learning_rate": 1.8217425276413037e-06, "loss": 0.5719, "step": 11486 }, { "epoch": 0.73, "grad_norm": 0.8681707978248596, "learning_rate": 1.8209505519062299e-06, "loss": 0.554, "step": 11487 }, { "epoch": 0.73, "grad_norm": 0.8360025882720947, "learning_rate": 1.8201587100298694e-06, "loss": 0.5348, "step": 11488 }, { "epoch": 0.73, "grad_norm": 0.8641213178634644, "learning_rate": 1.8193670020455656e-06, "loss": 0.5632, "step": 11489 }, { "epoch": 0.73, "grad_norm": 0.9490131735801697, "learning_rate": 1.8185754279866508e-06, "loss": 0.5454, "step": 11490 }, { "epoch": 0.73, "grad_norm": 0.9039450883865356, "learning_rate": 1.8177839878864562e-06, "loss": 0.5132, "step": 11491 }, { "epoch": 0.73, "grad_norm": 0.8818166851997375, "learning_rate": 1.8169926817783106e-06, "loss": 0.5789, "step": 11492 }, { "epoch": 0.73, "grad_norm": 0.9329283237457275, "learning_rate": 1.8162015096955288e-06, "loss": 0.593, "step": 11493 }, { "epoch": 0.73, "grad_norm": 0.9215501546859741, "learning_rate": 1.8154104716714254e-06, "loss": 0.5819, "step": 11494 }, { "epoch": 0.73, "grad_norm": 0.9087458252906799, "learning_rate": 1.814619567739309e-06, "loss": 0.563, "step": 11495 }, { "epoch": 0.73, "grad_norm": 0.9145926237106323, "learning_rate": 1.8138287979324815e-06, "loss": 0.571, "step": 11496 }, { "epoch": 0.73, "grad_norm": 0.8668627142906189, "learning_rate": 1.8130381622842414e-06, "loss": 0.4685, "step": 11497 }, { "epoch": 0.73, "grad_norm": 0.8847333788871765, "learning_rate": 1.8122476608278755e-06, "loss": 0.5919, "step": 11498 }, { "epoch": 0.73, "grad_norm": 0.857651948928833, "learning_rate": 1.8114572935966713e-06, "loss": 0.574, "step": 11499 }, { "epoch": 0.73, "grad_norm": 0.8987635374069214, "learning_rate": 1.8106670606239086e-06, "loss": 0.5646, "step": 11500 }, { "epoch": 0.73, "grad_norm": 0.8433480262756348, "learning_rate": 1.8098769619428607e-06, "loss": 0.5461, "step": 11501 }, { "epoch": 0.73, "grad_norm": 0.8456798791885376, "learning_rate": 1.8090869975867964e-06, "loss": 0.5271, "step": 11502 }, { "epoch": 0.73, "grad_norm": 0.9002053737640381, "learning_rate": 1.8082971675889798e-06, "loss": 0.6169, "step": 11503 }, { "epoch": 0.73, "grad_norm": 0.8994352221488953, "learning_rate": 1.8075074719826636e-06, "loss": 0.5652, "step": 11504 }, { "epoch": 0.73, "grad_norm": 0.8358734846115112, "learning_rate": 1.8067179108011047e-06, "loss": 0.5523, "step": 11505 }, { "epoch": 0.73, "grad_norm": 0.8662353754043579, "learning_rate": 1.8059284840775443e-06, "loss": 0.5645, "step": 11506 }, { "epoch": 0.73, "grad_norm": 0.8541300296783447, "learning_rate": 1.8051391918452244e-06, "loss": 0.5972, "step": 11507 }, { "epoch": 0.73, "grad_norm": 0.9039734601974487, "learning_rate": 1.8043500341373788e-06, "loss": 0.5526, "step": 11508 }, { "epoch": 0.73, "grad_norm": 0.8430119156837463, "learning_rate": 1.8035610109872364e-06, "loss": 0.588, "step": 11509 }, { "epoch": 0.73, "grad_norm": 0.9176574349403381, "learning_rate": 1.8027721224280204e-06, "loss": 0.6203, "step": 11510 }, { "epoch": 0.73, "grad_norm": 0.874259352684021, "learning_rate": 1.8019833684929493e-06, "loss": 0.5835, "step": 11511 }, { "epoch": 0.73, "grad_norm": 0.8891341686248779, "learning_rate": 1.8011947492152303e-06, "loss": 0.59, "step": 11512 }, { "epoch": 0.73, "grad_norm": 0.9052767753601074, "learning_rate": 1.8004062646280762e-06, "loss": 0.5455, "step": 11513 }, { "epoch": 0.73, "grad_norm": 0.9022553563117981, "learning_rate": 1.799617914764682e-06, "loss": 0.5658, "step": 11514 }, { "epoch": 0.73, "grad_norm": 0.9340382814407349, "learning_rate": 1.7988296996582438e-06, "loss": 0.6134, "step": 11515 }, { "epoch": 0.73, "grad_norm": 0.9259970188140869, "learning_rate": 1.7980416193419509e-06, "loss": 0.561, "step": 11516 }, { "epoch": 0.73, "grad_norm": 0.9039214849472046, "learning_rate": 1.7972536738489865e-06, "loss": 0.5975, "step": 11517 }, { "epoch": 0.73, "grad_norm": 0.8830257654190063, "learning_rate": 1.7964658632125286e-06, "loss": 0.6171, "step": 11518 }, { "epoch": 0.73, "grad_norm": 0.8530765771865845, "learning_rate": 1.7956781874657508e-06, "loss": 0.5773, "step": 11519 }, { "epoch": 0.73, "grad_norm": 0.9169198274612427, "learning_rate": 1.7948906466418154e-06, "loss": 0.6076, "step": 11520 }, { "epoch": 0.73, "grad_norm": 0.8221704959869385, "learning_rate": 1.7941032407738857e-06, "loss": 0.5213, "step": 11521 }, { "epoch": 0.73, "grad_norm": 0.9299726486206055, "learning_rate": 1.7933159698951153e-06, "loss": 0.5747, "step": 11522 }, { "epoch": 0.73, "grad_norm": 0.8379265666007996, "learning_rate": 1.7925288340386543e-06, "loss": 0.6024, "step": 11523 }, { "epoch": 0.73, "grad_norm": 0.8032079339027405, "learning_rate": 1.7917418332376463e-06, "loss": 0.5009, "step": 11524 }, { "epoch": 0.73, "grad_norm": 0.885210394859314, "learning_rate": 1.7909549675252291e-06, "loss": 0.5925, "step": 11525 }, { "epoch": 0.73, "grad_norm": 0.8500308394432068, "learning_rate": 1.7901682369345346e-06, "loss": 0.5507, "step": 11526 }, { "epoch": 0.73, "grad_norm": 0.8852202296257019, "learning_rate": 1.7893816414986915e-06, "loss": 0.5658, "step": 11527 }, { "epoch": 0.73, "grad_norm": 0.9225091934204102, "learning_rate": 1.7885951812508163e-06, "loss": 0.5696, "step": 11528 }, { "epoch": 0.73, "grad_norm": 0.9719336032867432, "learning_rate": 1.787808856224027e-06, "loss": 0.6464, "step": 11529 }, { "epoch": 0.73, "grad_norm": 0.9021725654602051, "learning_rate": 1.7870226664514318e-06, "loss": 0.5704, "step": 11530 }, { "epoch": 0.73, "grad_norm": 0.8923550844192505, "learning_rate": 1.786236611966135e-06, "loss": 0.605, "step": 11531 }, { "epoch": 0.73, "grad_norm": 0.9258638620376587, "learning_rate": 1.7854506928012349e-06, "loss": 0.5321, "step": 11532 }, { "epoch": 0.73, "grad_norm": 0.8562982082366943, "learning_rate": 1.784664908989825e-06, "loss": 0.5608, "step": 11533 }, { "epoch": 0.73, "grad_norm": 0.9579175710678101, "learning_rate": 1.7838792605649874e-06, "loss": 0.6364, "step": 11534 }, { "epoch": 0.73, "grad_norm": 0.8694881200790405, "learning_rate": 1.7830937475598092e-06, "loss": 0.563, "step": 11535 }, { "epoch": 0.73, "grad_norm": 0.9621427655220032, "learning_rate": 1.7823083700073607e-06, "loss": 0.5745, "step": 11536 }, { "epoch": 0.73, "grad_norm": 0.8175010085105896, "learning_rate": 1.781523127940713e-06, "loss": 0.5574, "step": 11537 }, { "epoch": 0.73, "grad_norm": 0.9051305055618286, "learning_rate": 1.7807380213929304e-06, "loss": 0.5485, "step": 11538 }, { "epoch": 0.73, "grad_norm": 0.9284529089927673, "learning_rate": 1.7799530503970707e-06, "loss": 0.5776, "step": 11539 }, { "epoch": 0.73, "grad_norm": 0.9079828262329102, "learning_rate": 1.7791682149861866e-06, "loss": 0.5703, "step": 11540 }, { "epoch": 0.73, "grad_norm": 0.8822880387306213, "learning_rate": 1.778383515193326e-06, "loss": 0.6083, "step": 11541 }, { "epoch": 0.73, "grad_norm": 0.8300610184669495, "learning_rate": 1.777598951051525e-06, "loss": 0.5038, "step": 11542 }, { "epoch": 0.73, "grad_norm": 0.8688510656356812, "learning_rate": 1.7768145225938254e-06, "loss": 0.5166, "step": 11543 }, { "epoch": 0.73, "grad_norm": 0.8871831297874451, "learning_rate": 1.7760302298532522e-06, "loss": 0.5148, "step": 11544 }, { "epoch": 0.73, "grad_norm": 0.8553435802459717, "learning_rate": 1.7752460728628308e-06, "loss": 0.5087, "step": 11545 }, { "epoch": 0.73, "grad_norm": 0.8729947805404663, "learning_rate": 1.7744620516555804e-06, "loss": 0.5862, "step": 11546 }, { "epoch": 0.73, "grad_norm": 0.9969896078109741, "learning_rate": 1.7736781662645092e-06, "loss": 0.6322, "step": 11547 }, { "epoch": 0.73, "grad_norm": 0.9456208944320679, "learning_rate": 1.7728944167226287e-06, "loss": 0.5841, "step": 11548 }, { "epoch": 0.73, "grad_norm": 0.9061382412910461, "learning_rate": 1.772110803062939e-06, "loss": 0.5942, "step": 11549 }, { "epoch": 0.73, "grad_norm": 0.9012535810470581, "learning_rate": 1.7713273253184331e-06, "loss": 0.5462, "step": 11550 }, { "epoch": 0.73, "grad_norm": 0.8942342400550842, "learning_rate": 1.7705439835221022e-06, "loss": 0.6231, "step": 11551 }, { "epoch": 0.73, "grad_norm": 0.9196451902389526, "learning_rate": 1.7697607777069291e-06, "loss": 0.5851, "step": 11552 }, { "epoch": 0.73, "grad_norm": 0.9161397814750671, "learning_rate": 1.7689777079058929e-06, "loss": 0.5397, "step": 11553 }, { "epoch": 0.73, "grad_norm": 0.870907187461853, "learning_rate": 1.7681947741519668e-06, "loss": 0.5578, "step": 11554 }, { "epoch": 0.73, "grad_norm": 0.9699699282646179, "learning_rate": 1.7674119764781129e-06, "loss": 0.639, "step": 11555 }, { "epoch": 0.73, "grad_norm": 0.8969030380249023, "learning_rate": 1.7666293149172969e-06, "loss": 0.6149, "step": 11556 }, { "epoch": 0.73, "grad_norm": 0.8886342644691467, "learning_rate": 1.7658467895024744e-06, "loss": 0.5669, "step": 11557 }, { "epoch": 0.73, "grad_norm": 0.9235454797744751, "learning_rate": 1.7650644002665906e-06, "loss": 0.6232, "step": 11558 }, { "epoch": 0.73, "grad_norm": 0.8997302055358887, "learning_rate": 1.7642821472425918e-06, "loss": 0.5862, "step": 11559 }, { "epoch": 0.73, "grad_norm": 0.965051531791687, "learning_rate": 1.7635000304634154e-06, "loss": 0.569, "step": 11560 }, { "epoch": 0.73, "grad_norm": 0.8321825861930847, "learning_rate": 1.762718049961994e-06, "loss": 0.5761, "step": 11561 }, { "epoch": 0.73, "grad_norm": 0.8804091811180115, "learning_rate": 1.7619362057712552e-06, "loss": 0.5491, "step": 11562 }, { "epoch": 0.73, "grad_norm": 0.8647125363349915, "learning_rate": 1.761154497924117e-06, "loss": 0.5474, "step": 11563 }, { "epoch": 0.73, "grad_norm": 0.9047082662582397, "learning_rate": 1.7603729264534936e-06, "loss": 0.5333, "step": 11564 }, { "epoch": 0.73, "grad_norm": 0.8758959174156189, "learning_rate": 1.7595914913923001e-06, "loss": 0.6063, "step": 11565 }, { "epoch": 0.73, "grad_norm": 0.8838177919387817, "learning_rate": 1.7588101927734346e-06, "loss": 0.5993, "step": 11566 }, { "epoch": 0.73, "grad_norm": 0.8192143440246582, "learning_rate": 1.7580290306297965e-06, "loss": 0.5965, "step": 11567 }, { "epoch": 0.73, "grad_norm": 0.8735188245773315, "learning_rate": 1.7572480049942781e-06, "loss": 0.6107, "step": 11568 }, { "epoch": 0.73, "grad_norm": 0.8884807229042053, "learning_rate": 1.7564671158997653e-06, "loss": 0.5726, "step": 11569 }, { "epoch": 0.73, "grad_norm": 0.9197561144828796, "learning_rate": 1.755686363379141e-06, "loss": 0.5968, "step": 11570 }, { "epoch": 0.73, "grad_norm": 0.9510713815689087, "learning_rate": 1.7549057474652753e-06, "loss": 0.6455, "step": 11571 }, { "epoch": 0.73, "grad_norm": 0.9018495082855225, "learning_rate": 1.7541252681910386e-06, "loss": 0.5914, "step": 11572 }, { "epoch": 0.73, "grad_norm": 0.8656198382377625, "learning_rate": 1.7533449255892986e-06, "loss": 0.5308, "step": 11573 }, { "epoch": 0.73, "grad_norm": 0.9031473994255066, "learning_rate": 1.7525647196929079e-06, "loss": 0.5924, "step": 11574 }, { "epoch": 0.73, "grad_norm": 0.966624915599823, "learning_rate": 1.7517846505347197e-06, "loss": 0.5615, "step": 11575 }, { "epoch": 0.73, "grad_norm": 0.8910838961601257, "learning_rate": 1.751004718147582e-06, "loss": 0.5599, "step": 11576 }, { "epoch": 0.73, "grad_norm": 0.8924875855445862, "learning_rate": 1.7502249225643291e-06, "loss": 0.5545, "step": 11577 }, { "epoch": 0.73, "grad_norm": 0.8473634123802185, "learning_rate": 1.7494452638178039e-06, "loss": 0.5356, "step": 11578 }, { "epoch": 0.73, "grad_norm": 0.8900013566017151, "learning_rate": 1.7486657419408287e-06, "loss": 0.5623, "step": 11579 }, { "epoch": 0.73, "grad_norm": 0.859286367893219, "learning_rate": 1.7478863569662286e-06, "loss": 0.5538, "step": 11580 }, { "epoch": 0.73, "grad_norm": 0.9663856029510498, "learning_rate": 1.7471071089268204e-06, "loss": 0.5877, "step": 11581 }, { "epoch": 0.73, "grad_norm": 0.9221107959747314, "learning_rate": 1.7463279978554166e-06, "loss": 0.5999, "step": 11582 }, { "epoch": 0.73, "grad_norm": 0.8356893062591553, "learning_rate": 1.745549023784821e-06, "loss": 0.5129, "step": 11583 }, { "epoch": 0.73, "grad_norm": 0.9485192894935608, "learning_rate": 1.7447701867478372e-06, "loss": 0.5601, "step": 11584 }, { "epoch": 0.73, "grad_norm": 0.9047239422798157, "learning_rate": 1.7439914867772529e-06, "loss": 0.5176, "step": 11585 }, { "epoch": 0.73, "grad_norm": 0.9019331932067871, "learning_rate": 1.7432129239058637e-06, "loss": 0.5698, "step": 11586 }, { "epoch": 0.73, "grad_norm": 0.8926165699958801, "learning_rate": 1.7424344981664475e-06, "loss": 0.6147, "step": 11587 }, { "epoch": 0.73, "grad_norm": 0.9237696528434753, "learning_rate": 1.7416562095917822e-06, "loss": 0.5531, "step": 11588 }, { "epoch": 0.73, "grad_norm": 0.8881582021713257, "learning_rate": 1.7408780582146383e-06, "loss": 0.5981, "step": 11589 }, { "epoch": 0.73, "grad_norm": 0.8784075975418091, "learning_rate": 1.7401000440677824e-06, "loss": 0.5443, "step": 11590 }, { "epoch": 0.73, "grad_norm": 0.8897961974143982, "learning_rate": 1.7393221671839727e-06, "loss": 0.5622, "step": 11591 }, { "epoch": 0.73, "grad_norm": 0.9459214806556702, "learning_rate": 1.7385444275959657e-06, "loss": 0.5154, "step": 11592 }, { "epoch": 0.73, "grad_norm": 0.8417472839355469, "learning_rate": 1.7377668253365054e-06, "loss": 0.5909, "step": 11593 }, { "epoch": 0.73, "grad_norm": 0.8860768675804138, "learning_rate": 1.7369893604383353e-06, "loss": 0.5721, "step": 11594 }, { "epoch": 0.73, "grad_norm": 0.8790547251701355, "learning_rate": 1.736212032934192e-06, "loss": 0.5747, "step": 11595 }, { "epoch": 0.73, "grad_norm": 0.8245856165885925, "learning_rate": 1.7354348428568063e-06, "loss": 0.5412, "step": 11596 }, { "epoch": 0.73, "grad_norm": 0.9556723237037659, "learning_rate": 1.7346577902389028e-06, "loss": 0.5856, "step": 11597 }, { "epoch": 0.73, "grad_norm": 0.8890882730484009, "learning_rate": 1.7338808751132002e-06, "loss": 0.564, "step": 11598 }, { "epoch": 0.73, "grad_norm": 0.8770986795425415, "learning_rate": 1.7331040975124125e-06, "loss": 0.5536, "step": 11599 }, { "epoch": 0.73, "grad_norm": 0.8428150415420532, "learning_rate": 1.7323274574692479e-06, "loss": 0.529, "step": 11600 }, { "epoch": 0.73, "grad_norm": 0.9159516096115112, "learning_rate": 1.7315509550164044e-06, "loss": 0.5696, "step": 11601 }, { "epoch": 0.74, "grad_norm": 0.8555203676223755, "learning_rate": 1.730774590186579e-06, "loss": 0.6464, "step": 11602 }, { "epoch": 0.74, "grad_norm": 0.911897599697113, "learning_rate": 1.7299983630124663e-06, "loss": 0.5728, "step": 11603 }, { "epoch": 0.74, "grad_norm": 0.8873314261436462, "learning_rate": 1.729222273526745e-06, "loss": 0.5747, "step": 11604 }, { "epoch": 0.74, "grad_norm": 0.8664464354515076, "learning_rate": 1.7284463217620955e-06, "loss": 0.551, "step": 11605 }, { "epoch": 0.74, "grad_norm": 0.9194732308387756, "learning_rate": 1.727670507751193e-06, "loss": 0.5568, "step": 11606 }, { "epoch": 0.74, "grad_norm": 0.9157373905181885, "learning_rate": 1.7268948315266975e-06, "loss": 0.6275, "step": 11607 }, { "epoch": 0.74, "grad_norm": 0.9254802465438843, "learning_rate": 1.7261192931212783e-06, "loss": 0.604, "step": 11608 }, { "epoch": 0.74, "grad_norm": 0.870588481426239, "learning_rate": 1.7253438925675847e-06, "loss": 0.526, "step": 11609 }, { "epoch": 0.74, "grad_norm": 0.9618417024612427, "learning_rate": 1.7245686298982678e-06, "loss": 0.6359, "step": 11610 }, { "epoch": 0.74, "grad_norm": 0.9000369310379028, "learning_rate": 1.723793505145972e-06, "loss": 0.6009, "step": 11611 }, { "epoch": 0.74, "grad_norm": 0.8883331418037415, "learning_rate": 1.7230185183433345e-06, "loss": 0.5614, "step": 11612 }, { "epoch": 0.74, "grad_norm": 0.8387264609336853, "learning_rate": 1.722243669522987e-06, "loss": 0.5603, "step": 11613 }, { "epoch": 0.74, "grad_norm": 0.8569300770759583, "learning_rate": 1.7214689587175582e-06, "loss": 0.6069, "step": 11614 }, { "epoch": 0.74, "grad_norm": 0.8399550318717957, "learning_rate": 1.720694385959663e-06, "loss": 0.5225, "step": 11615 }, { "epoch": 0.74, "grad_norm": 0.8741680979728699, "learning_rate": 1.7199199512819225e-06, "loss": 0.5591, "step": 11616 }, { "epoch": 0.74, "grad_norm": 0.9207227826118469, "learning_rate": 1.7191456547169405e-06, "loss": 0.6151, "step": 11617 }, { "epoch": 0.74, "grad_norm": 0.8906126022338867, "learning_rate": 1.718371496297322e-06, "loss": 0.606, "step": 11618 }, { "epoch": 0.74, "grad_norm": 0.9442402720451355, "learning_rate": 1.717597476055664e-06, "loss": 0.6094, "step": 11619 }, { "epoch": 0.74, "grad_norm": 0.9012939929962158, "learning_rate": 1.716823594024557e-06, "loss": 0.5826, "step": 11620 }, { "epoch": 0.74, "grad_norm": 0.880403995513916, "learning_rate": 1.716049850236588e-06, "loss": 0.5725, "step": 11621 }, { "epoch": 0.74, "grad_norm": 0.9011920690536499, "learning_rate": 1.7152762447243365e-06, "loss": 0.5993, "step": 11622 }, { "epoch": 0.74, "grad_norm": 0.8702940940856934, "learning_rate": 1.7145027775203748e-06, "loss": 0.5512, "step": 11623 }, { "epoch": 0.74, "grad_norm": 0.8984467387199402, "learning_rate": 1.7137294486572714e-06, "loss": 0.5759, "step": 11624 }, { "epoch": 0.74, "grad_norm": 0.9334822297096252, "learning_rate": 1.7129562581675885e-06, "loss": 0.5788, "step": 11625 }, { "epoch": 0.74, "grad_norm": 0.8600862622261047, "learning_rate": 1.7121832060838833e-06, "loss": 0.5691, "step": 11626 }, { "epoch": 0.74, "grad_norm": 0.9075511693954468, "learning_rate": 1.711410292438707e-06, "loss": 0.6158, "step": 11627 }, { "epoch": 0.74, "grad_norm": 0.8380544185638428, "learning_rate": 1.7106375172646e-06, "loss": 0.5239, "step": 11628 }, { "epoch": 0.74, "grad_norm": 0.8987744450569153, "learning_rate": 1.709864880594106e-06, "loss": 0.5677, "step": 11629 }, { "epoch": 0.74, "grad_norm": 0.8771459460258484, "learning_rate": 1.7090923824597578e-06, "loss": 0.6342, "step": 11630 }, { "epoch": 0.74, "grad_norm": 0.9104797840118408, "learning_rate": 1.70832002289408e-06, "loss": 0.6225, "step": 11631 }, { "epoch": 0.74, "grad_norm": 0.8693386316299438, "learning_rate": 1.7075478019295943e-06, "loss": 0.5796, "step": 11632 }, { "epoch": 0.74, "grad_norm": 0.8541246056556702, "learning_rate": 1.7067757195988178e-06, "loss": 0.5259, "step": 11633 }, { "epoch": 0.74, "grad_norm": 0.9266880750656128, "learning_rate": 1.706003775934259e-06, "loss": 0.5968, "step": 11634 }, { "epoch": 0.74, "grad_norm": 0.8783169388771057, "learning_rate": 1.705231970968424e-06, "loss": 0.5978, "step": 11635 }, { "epoch": 0.74, "grad_norm": 0.9030970335006714, "learning_rate": 1.704460304733806e-06, "loss": 0.6016, "step": 11636 }, { "epoch": 0.74, "grad_norm": 0.9360423684120178, "learning_rate": 1.7036887772629012e-06, "loss": 0.6177, "step": 11637 }, { "epoch": 0.74, "grad_norm": 0.8983248472213745, "learning_rate": 1.7029173885881973e-06, "loss": 0.5724, "step": 11638 }, { "epoch": 0.74, "grad_norm": 0.8801354765892029, "learning_rate": 1.7021461387421705e-06, "loss": 0.5717, "step": 11639 }, { "epoch": 0.74, "grad_norm": 0.8851686120033264, "learning_rate": 1.7013750277572977e-06, "loss": 0.5777, "step": 11640 }, { "epoch": 0.74, "grad_norm": 0.9012311100959778, "learning_rate": 1.7006040556660468e-06, "loss": 0.5995, "step": 11641 }, { "epoch": 0.74, "grad_norm": 0.7989736199378967, "learning_rate": 1.6998332225008817e-06, "loss": 0.5581, "step": 11642 }, { "epoch": 0.74, "grad_norm": 0.8802455067634583, "learning_rate": 1.6990625282942607e-06, "loss": 0.5862, "step": 11643 }, { "epoch": 0.74, "grad_norm": 0.938679039478302, "learning_rate": 1.6982919730786323e-06, "loss": 0.5618, "step": 11644 }, { "epoch": 0.74, "grad_norm": 0.9325195550918579, "learning_rate": 1.697521556886441e-06, "loss": 0.6066, "step": 11645 }, { "epoch": 0.74, "grad_norm": 0.8584638237953186, "learning_rate": 1.6967512797501317e-06, "loss": 0.5666, "step": 11646 }, { "epoch": 0.74, "grad_norm": 0.9101821780204773, "learning_rate": 1.6959811417021338e-06, "loss": 0.5435, "step": 11647 }, { "epoch": 0.74, "grad_norm": 0.9440627694129944, "learning_rate": 1.6952111427748758e-06, "loss": 0.5705, "step": 11648 }, { "epoch": 0.74, "grad_norm": 0.9121119379997253, "learning_rate": 1.69444128300078e-06, "loss": 0.538, "step": 11649 }, { "epoch": 0.74, "grad_norm": 0.8702934980392456, "learning_rate": 1.6936715624122623e-06, "loss": 0.561, "step": 11650 }, { "epoch": 0.74, "grad_norm": 0.872009813785553, "learning_rate": 1.6929019810417352e-06, "loss": 0.5535, "step": 11651 }, { "epoch": 0.74, "grad_norm": 0.8676707744598389, "learning_rate": 1.6921325389215993e-06, "loss": 0.5736, "step": 11652 }, { "epoch": 0.74, "grad_norm": 0.8695118427276611, "learning_rate": 1.6913632360842553e-06, "loss": 0.5864, "step": 11653 }, { "epoch": 0.74, "grad_norm": 0.8931376934051514, "learning_rate": 1.6905940725620951e-06, "loss": 0.5808, "step": 11654 }, { "epoch": 0.74, "grad_norm": 0.9246284365653992, "learning_rate": 1.6898250483875063e-06, "loss": 0.6203, "step": 11655 }, { "epoch": 0.74, "grad_norm": 0.8424333333969116, "learning_rate": 1.6890561635928692e-06, "loss": 0.5167, "step": 11656 }, { "epoch": 0.74, "grad_norm": 0.8894586563110352, "learning_rate": 1.6882874182105613e-06, "loss": 0.5479, "step": 11657 }, { "epoch": 0.74, "grad_norm": 0.8806304931640625, "learning_rate": 1.6875188122729458e-06, "loss": 0.5519, "step": 11658 }, { "epoch": 0.74, "grad_norm": 0.8468473553657532, "learning_rate": 1.6867503458123913e-06, "loss": 0.5492, "step": 11659 }, { "epoch": 0.74, "grad_norm": 0.8903117775917053, "learning_rate": 1.6859820188612557e-06, "loss": 0.5855, "step": 11660 }, { "epoch": 0.74, "grad_norm": 0.8897051215171814, "learning_rate": 1.6852138314518873e-06, "loss": 0.5357, "step": 11661 }, { "epoch": 0.74, "grad_norm": 0.9220659732818604, "learning_rate": 1.6844457836166329e-06, "loss": 0.5354, "step": 11662 }, { "epoch": 0.74, "grad_norm": 0.9147717356681824, "learning_rate": 1.6836778753878324e-06, "loss": 0.5965, "step": 11663 }, { "epoch": 0.74, "grad_norm": 0.9581725597381592, "learning_rate": 1.68291010679782e-06, "loss": 0.556, "step": 11664 }, { "epoch": 0.74, "grad_norm": 0.8362496495246887, "learning_rate": 1.6821424778789252e-06, "loss": 0.556, "step": 11665 }, { "epoch": 0.74, "grad_norm": 0.8997658491134644, "learning_rate": 1.6813749886634657e-06, "loss": 0.5754, "step": 11666 }, { "epoch": 0.74, "grad_norm": 0.8712424039840698, "learning_rate": 1.6806076391837622e-06, "loss": 0.5229, "step": 11667 }, { "epoch": 0.74, "grad_norm": 0.9129472374916077, "learning_rate": 1.6798404294721254e-06, "loss": 0.5505, "step": 11668 }, { "epoch": 0.74, "grad_norm": 0.8357523083686829, "learning_rate": 1.6790733595608567e-06, "loss": 0.5906, "step": 11669 }, { "epoch": 0.74, "grad_norm": 0.9335298538208008, "learning_rate": 1.6783064294822559e-06, "loss": 0.5469, "step": 11670 }, { "epoch": 0.74, "grad_norm": 0.8826762437820435, "learning_rate": 1.677539639268616e-06, "loss": 0.5969, "step": 11671 }, { "epoch": 0.74, "grad_norm": 0.8778190612792969, "learning_rate": 1.6767729889522239e-06, "loss": 0.5918, "step": 11672 }, { "epoch": 0.74, "grad_norm": 0.9307227730751038, "learning_rate": 1.6760064785653624e-06, "loss": 0.593, "step": 11673 }, { "epoch": 0.74, "grad_norm": 0.9407718181610107, "learning_rate": 1.675240108140303e-06, "loss": 0.5833, "step": 11674 }, { "epoch": 0.74, "grad_norm": 0.8945533633232117, "learning_rate": 1.674473877709315e-06, "loss": 0.5462, "step": 11675 }, { "epoch": 0.74, "grad_norm": 0.9327276945114136, "learning_rate": 1.6737077873046669e-06, "loss": 0.6013, "step": 11676 }, { "epoch": 0.74, "grad_norm": 0.9071036577224731, "learning_rate": 1.672941836958611e-06, "loss": 0.5889, "step": 11677 }, { "epoch": 0.74, "grad_norm": 0.9192063212394714, "learning_rate": 1.6721760267033998e-06, "loss": 0.5819, "step": 11678 }, { "epoch": 0.74, "grad_norm": 0.8941338062286377, "learning_rate": 1.6714103565712798e-06, "loss": 0.5678, "step": 11679 }, { "epoch": 0.74, "grad_norm": 0.8962938785552979, "learning_rate": 1.6706448265944902e-06, "loss": 0.5746, "step": 11680 }, { "epoch": 0.74, "grad_norm": 0.8910273313522339, "learning_rate": 1.6698794368052669e-06, "loss": 0.5355, "step": 11681 }, { "epoch": 0.74, "grad_norm": 0.872856855392456, "learning_rate": 1.6691141872358336e-06, "loss": 0.5956, "step": 11682 }, { "epoch": 0.74, "grad_norm": 0.8627503514289856, "learning_rate": 1.668349077918413e-06, "loss": 0.5305, "step": 11683 }, { "epoch": 0.74, "grad_norm": 0.9181475043296814, "learning_rate": 1.6675841088852268e-06, "loss": 0.5133, "step": 11684 }, { "epoch": 0.74, "grad_norm": 0.8350986242294312, "learning_rate": 1.666819280168479e-06, "loss": 0.5133, "step": 11685 }, { "epoch": 0.74, "grad_norm": 0.8253143429756165, "learning_rate": 1.6660545918003762e-06, "loss": 0.5165, "step": 11686 }, { "epoch": 0.74, "grad_norm": 0.9417761564254761, "learning_rate": 1.6652900438131181e-06, "loss": 0.6201, "step": 11687 }, { "epoch": 0.74, "grad_norm": 0.9664661288261414, "learning_rate": 1.6645256362388922e-06, "loss": 0.569, "step": 11688 }, { "epoch": 0.74, "grad_norm": 0.9326826333999634, "learning_rate": 1.663761369109892e-06, "loss": 0.6134, "step": 11689 }, { "epoch": 0.74, "grad_norm": 0.9516881108283997, "learning_rate": 1.662997242458293e-06, "loss": 0.5727, "step": 11690 }, { "epoch": 0.74, "grad_norm": 0.9566120505332947, "learning_rate": 1.6622332563162714e-06, "loss": 0.6568, "step": 11691 }, { "epoch": 0.74, "grad_norm": 0.838861882686615, "learning_rate": 1.6614694107159962e-06, "loss": 0.5633, "step": 11692 }, { "epoch": 0.74, "grad_norm": 0.8725398182868958, "learning_rate": 1.6607057056896304e-06, "loss": 0.537, "step": 11693 }, { "epoch": 0.74, "grad_norm": 0.8720897436141968, "learning_rate": 1.6599421412693307e-06, "loss": 0.5786, "step": 11694 }, { "epoch": 0.74, "grad_norm": 0.8999429941177368, "learning_rate": 1.65917871748725e-06, "loss": 0.5867, "step": 11695 }, { "epoch": 0.74, "grad_norm": 0.9211562275886536, "learning_rate": 1.6584154343755276e-06, "loss": 0.5425, "step": 11696 }, { "epoch": 0.74, "grad_norm": 0.856549859046936, "learning_rate": 1.6576522919663107e-06, "loss": 0.5257, "step": 11697 }, { "epoch": 0.74, "grad_norm": 0.88596510887146, "learning_rate": 1.6568892902917267e-06, "loss": 0.564, "step": 11698 }, { "epoch": 0.74, "grad_norm": 0.8725159168243408, "learning_rate": 1.6561264293839051e-06, "loss": 0.5379, "step": 11699 }, { "epoch": 0.74, "grad_norm": 0.9307702779769897, "learning_rate": 1.6553637092749685e-06, "loss": 0.5769, "step": 11700 }, { "epoch": 0.74, "grad_norm": 0.8692091107368469, "learning_rate": 1.6546011299970276e-06, "loss": 0.5311, "step": 11701 }, { "epoch": 0.74, "grad_norm": 0.8620391488075256, "learning_rate": 1.6538386915821975e-06, "loss": 0.5296, "step": 11702 }, { "epoch": 0.74, "grad_norm": 0.8763535022735596, "learning_rate": 1.6530763940625805e-06, "loss": 0.5436, "step": 11703 }, { "epoch": 0.74, "grad_norm": 0.970310628414154, "learning_rate": 1.6523142374702722e-06, "loss": 0.607, "step": 11704 }, { "epoch": 0.74, "grad_norm": 0.9026583433151245, "learning_rate": 1.6515522218373658e-06, "loss": 0.5318, "step": 11705 }, { "epoch": 0.74, "grad_norm": 0.9070267081260681, "learning_rate": 1.6507903471959468e-06, "loss": 0.5649, "step": 11706 }, { "epoch": 0.74, "grad_norm": 0.9056694507598877, "learning_rate": 1.6500286135780951e-06, "loss": 0.6071, "step": 11707 }, { "epoch": 0.74, "grad_norm": 0.8863142132759094, "learning_rate": 1.6492670210158863e-06, "loss": 0.5855, "step": 11708 }, { "epoch": 0.74, "grad_norm": 0.8576910495758057, "learning_rate": 1.6485055695413838e-06, "loss": 0.5702, "step": 11709 }, { "epoch": 0.74, "grad_norm": 0.9119299650192261, "learning_rate": 1.6477442591866544e-06, "loss": 0.5564, "step": 11710 }, { "epoch": 0.74, "grad_norm": 0.9486945867538452, "learning_rate": 1.6469830899837547e-06, "loss": 0.5378, "step": 11711 }, { "epoch": 0.74, "grad_norm": 0.8819604516029358, "learning_rate": 1.6462220619647306e-06, "loss": 0.5903, "step": 11712 }, { "epoch": 0.74, "grad_norm": 0.8723688125610352, "learning_rate": 1.6454611751616283e-06, "loss": 0.5676, "step": 11713 }, { "epoch": 0.74, "grad_norm": 0.8502789735794067, "learning_rate": 1.6447004296064867e-06, "loss": 0.543, "step": 11714 }, { "epoch": 0.74, "grad_norm": 0.8809540867805481, "learning_rate": 1.6439398253313377e-06, "loss": 0.5899, "step": 11715 }, { "epoch": 0.74, "grad_norm": 0.8517667055130005, "learning_rate": 1.6431793623682096e-06, "loss": 0.5581, "step": 11716 }, { "epoch": 0.74, "grad_norm": 0.9315950870513916, "learning_rate": 1.642419040749119e-06, "loss": 0.6287, "step": 11717 }, { "epoch": 0.74, "grad_norm": 0.9184224605560303, "learning_rate": 1.6416588605060812e-06, "loss": 0.5907, "step": 11718 }, { "epoch": 0.74, "grad_norm": 0.9085866808891296, "learning_rate": 1.6408988216711092e-06, "loss": 0.5375, "step": 11719 }, { "epoch": 0.74, "grad_norm": 0.8682625889778137, "learning_rate": 1.6401389242762006e-06, "loss": 0.5492, "step": 11720 }, { "epoch": 0.74, "grad_norm": 0.871749997138977, "learning_rate": 1.639379168353354e-06, "loss": 0.5566, "step": 11721 }, { "epoch": 0.74, "grad_norm": 0.8846398591995239, "learning_rate": 1.6386195539345596e-06, "loss": 0.566, "step": 11722 }, { "epoch": 0.74, "grad_norm": 0.8326940536499023, "learning_rate": 1.6378600810518026e-06, "loss": 0.6035, "step": 11723 }, { "epoch": 0.74, "grad_norm": 0.8957687616348267, "learning_rate": 1.6371007497370612e-06, "loss": 0.5666, "step": 11724 }, { "epoch": 0.74, "grad_norm": 0.9184751510620117, "learning_rate": 1.6363415600223103e-06, "loss": 0.6243, "step": 11725 }, { "epoch": 0.74, "grad_norm": 0.8690382838249207, "learning_rate": 1.6355825119395118e-06, "loss": 0.5514, "step": 11726 }, { "epoch": 0.74, "grad_norm": 0.8801531791687012, "learning_rate": 1.634823605520633e-06, "loss": 0.5549, "step": 11727 }, { "epoch": 0.74, "grad_norm": 0.8974312543869019, "learning_rate": 1.634064840797624e-06, "loss": 0.5657, "step": 11728 }, { "epoch": 0.74, "grad_norm": 0.8583878874778748, "learning_rate": 1.6333062178024355e-06, "loss": 0.5825, "step": 11729 }, { "epoch": 0.74, "grad_norm": 0.8436487913131714, "learning_rate": 1.63254773656701e-06, "loss": 0.5514, "step": 11730 }, { "epoch": 0.74, "grad_norm": 0.8887004852294922, "learning_rate": 1.6317893971232852e-06, "loss": 0.5557, "step": 11731 }, { "epoch": 0.74, "grad_norm": 0.9396257400512695, "learning_rate": 1.6310311995031913e-06, "loss": 0.5989, "step": 11732 }, { "epoch": 0.74, "grad_norm": 0.9944149851799011, "learning_rate": 1.6302731437386555e-06, "loss": 0.6509, "step": 11733 }, { "epoch": 0.74, "grad_norm": 0.8768121600151062, "learning_rate": 1.6295152298615936e-06, "loss": 0.5258, "step": 11734 }, { "epoch": 0.74, "grad_norm": 0.9114717245101929, "learning_rate": 1.62875745790392e-06, "loss": 0.5883, "step": 11735 }, { "epoch": 0.74, "grad_norm": 0.9182329177856445, "learning_rate": 1.6279998278975428e-06, "loss": 0.6177, "step": 11736 }, { "epoch": 0.74, "grad_norm": 0.8736885190010071, "learning_rate": 1.627242339874362e-06, "loss": 0.5619, "step": 11737 }, { "epoch": 0.74, "grad_norm": 0.8888165950775146, "learning_rate": 1.6264849938662753e-06, "loss": 0.6056, "step": 11738 }, { "epoch": 0.74, "grad_norm": 0.9017614126205444, "learning_rate": 1.6257277899051666e-06, "loss": 0.6036, "step": 11739 }, { "epoch": 0.74, "grad_norm": 0.9094336628913879, "learning_rate": 1.6249707280229237e-06, "loss": 0.6252, "step": 11740 }, { "epoch": 0.74, "grad_norm": 0.8804279565811157, "learning_rate": 1.6242138082514247e-06, "loss": 0.6023, "step": 11741 }, { "epoch": 0.74, "grad_norm": 0.8932421207427979, "learning_rate": 1.6234570306225366e-06, "loss": 0.5898, "step": 11742 }, { "epoch": 0.74, "grad_norm": 0.8643161058425903, "learning_rate": 1.6227003951681276e-06, "loss": 0.5163, "step": 11743 }, { "epoch": 0.74, "grad_norm": 0.8888043165206909, "learning_rate": 1.6219439019200557e-06, "loss": 0.5626, "step": 11744 }, { "epoch": 0.74, "grad_norm": 0.9490756988525391, "learning_rate": 1.6211875509101744e-06, "loss": 0.6331, "step": 11745 }, { "epoch": 0.74, "grad_norm": 0.8775157928466797, "learning_rate": 1.6204313421703332e-06, "loss": 0.5488, "step": 11746 }, { "epoch": 0.74, "grad_norm": 0.9012529850006104, "learning_rate": 1.6196752757323698e-06, "loss": 0.6517, "step": 11747 }, { "epoch": 0.74, "grad_norm": 0.8449646830558777, "learning_rate": 1.61891935162812e-06, "loss": 0.5482, "step": 11748 }, { "epoch": 0.74, "grad_norm": 0.8353961110115051, "learning_rate": 1.6181635698894171e-06, "loss": 0.5155, "step": 11749 }, { "epoch": 0.74, "grad_norm": 0.9023754596710205, "learning_rate": 1.61740793054808e-06, "loss": 0.6113, "step": 11750 }, { "epoch": 0.74, "grad_norm": 0.8526588082313538, "learning_rate": 1.6166524336359285e-06, "loss": 0.537, "step": 11751 }, { "epoch": 0.74, "grad_norm": 0.9162303805351257, "learning_rate": 1.6158970791847728e-06, "loss": 0.6146, "step": 11752 }, { "epoch": 0.74, "grad_norm": 0.8880906701087952, "learning_rate": 1.6151418672264186e-06, "loss": 0.5587, "step": 11753 }, { "epoch": 0.74, "grad_norm": 0.8889377117156982, "learning_rate": 1.614386797792667e-06, "loss": 0.5929, "step": 11754 }, { "epoch": 0.74, "grad_norm": 0.817284882068634, "learning_rate": 1.6136318709153075e-06, "loss": 0.5572, "step": 11755 }, { "epoch": 0.74, "grad_norm": 0.785580039024353, "learning_rate": 1.612877086626129e-06, "loss": 0.6277, "step": 11756 }, { "epoch": 0.74, "grad_norm": 0.8508361577987671, "learning_rate": 1.612122444956916e-06, "loss": 0.5669, "step": 11757 }, { "epoch": 0.74, "grad_norm": 0.8702815175056458, "learning_rate": 1.6113679459394398e-06, "loss": 0.5907, "step": 11758 }, { "epoch": 0.74, "grad_norm": 0.8659467697143555, "learning_rate": 1.6106135896054714e-06, "loss": 0.574, "step": 11759 }, { "epoch": 0.75, "grad_norm": 0.8882265686988831, "learning_rate": 1.6098593759867736e-06, "loss": 0.5649, "step": 11760 }, { "epoch": 0.75, "grad_norm": 0.8908340334892273, "learning_rate": 1.609105305115104e-06, "loss": 0.5496, "step": 11761 }, { "epoch": 0.75, "grad_norm": 0.8687838315963745, "learning_rate": 1.6083513770222158e-06, "loss": 0.5963, "step": 11762 }, { "epoch": 0.75, "grad_norm": 0.893989622592926, "learning_rate": 1.6075975917398512e-06, "loss": 0.5502, "step": 11763 }, { "epoch": 0.75, "grad_norm": 0.8989611864089966, "learning_rate": 1.60684394929975e-06, "loss": 0.6383, "step": 11764 }, { "epoch": 0.75, "grad_norm": 0.8593994975090027, "learning_rate": 1.6060904497336465e-06, "loss": 0.5928, "step": 11765 }, { "epoch": 0.75, "grad_norm": 0.8752898573875427, "learning_rate": 1.6053370930732676e-06, "loss": 0.5127, "step": 11766 }, { "epoch": 0.75, "grad_norm": 0.8868995904922485, "learning_rate": 1.6045838793503342e-06, "loss": 0.5745, "step": 11767 }, { "epoch": 0.75, "grad_norm": 0.872316837310791, "learning_rate": 1.6038308085965642e-06, "loss": 0.5879, "step": 11768 }, { "epoch": 0.75, "grad_norm": 0.9360784888267517, "learning_rate": 1.6030778808436609e-06, "loss": 0.5412, "step": 11769 }, { "epoch": 0.75, "grad_norm": 0.8613805174827576, "learning_rate": 1.6023250961233338e-06, "loss": 0.5846, "step": 11770 }, { "epoch": 0.75, "grad_norm": 0.9219672083854675, "learning_rate": 1.6015724544672762e-06, "loss": 0.5664, "step": 11771 }, { "epoch": 0.75, "grad_norm": 0.9142691493034363, "learning_rate": 1.6008199559071795e-06, "loss": 0.602, "step": 11772 }, { "epoch": 0.75, "grad_norm": 0.8564930558204651, "learning_rate": 1.6000676004747306e-06, "loss": 0.5597, "step": 11773 }, { "epoch": 0.75, "grad_norm": 0.9067575335502625, "learning_rate": 1.5993153882016065e-06, "loss": 0.6287, "step": 11774 }, { "epoch": 0.75, "grad_norm": 0.8603774309158325, "learning_rate": 1.5985633191194821e-06, "loss": 0.6032, "step": 11775 }, { "epoch": 0.75, "grad_norm": 0.9035540819168091, "learning_rate": 1.5978113932600248e-06, "loss": 0.5868, "step": 11776 }, { "epoch": 0.75, "grad_norm": 0.9324126839637756, "learning_rate": 1.5970596106548913e-06, "loss": 0.5675, "step": 11777 }, { "epoch": 0.75, "grad_norm": 0.8501653671264648, "learning_rate": 1.5963079713357432e-06, "loss": 0.5974, "step": 11778 }, { "epoch": 0.75, "grad_norm": 0.8585829734802246, "learning_rate": 1.595556475334224e-06, "loss": 0.5757, "step": 11779 }, { "epoch": 0.75, "grad_norm": 0.8865067362785339, "learning_rate": 1.5948051226819783e-06, "loss": 0.5329, "step": 11780 }, { "epoch": 0.75, "grad_norm": 0.8885084986686707, "learning_rate": 1.5940539134106442e-06, "loss": 0.5593, "step": 11781 }, { "epoch": 0.75, "grad_norm": 0.8946758508682251, "learning_rate": 1.5933028475518486e-06, "loss": 0.5682, "step": 11782 }, { "epoch": 0.75, "grad_norm": 0.9001892805099487, "learning_rate": 1.5925519251372212e-06, "loss": 0.4956, "step": 11783 }, { "epoch": 0.75, "grad_norm": 0.8397232890129089, "learning_rate": 1.5918011461983796e-06, "loss": 0.5367, "step": 11784 }, { "epoch": 0.75, "grad_norm": 0.9217719435691833, "learning_rate": 1.5910505107669339e-06, "loss": 0.5639, "step": 11785 }, { "epoch": 0.75, "grad_norm": 0.8915478587150574, "learning_rate": 1.5903000188744922e-06, "loss": 0.5673, "step": 11786 }, { "epoch": 0.75, "grad_norm": 0.8859308362007141, "learning_rate": 1.589549670552656e-06, "loss": 0.589, "step": 11787 }, { "epoch": 0.75, "grad_norm": 0.8340456485748291, "learning_rate": 1.588799465833018e-06, "loss": 0.5375, "step": 11788 }, { "epoch": 0.75, "grad_norm": 0.950278639793396, "learning_rate": 1.5880494047471683e-06, "loss": 0.6206, "step": 11789 }, { "epoch": 0.75, "grad_norm": 0.8300553560256958, "learning_rate": 1.587299487326689e-06, "loss": 0.5973, "step": 11790 }, { "epoch": 0.75, "grad_norm": 0.9166631698608398, "learning_rate": 1.586549713603156e-06, "loss": 0.6043, "step": 11791 }, { "epoch": 0.75, "grad_norm": 0.8950029611587524, "learning_rate": 1.5858000836081422e-06, "loss": 0.5707, "step": 11792 }, { "epoch": 0.75, "grad_norm": 0.9038580060005188, "learning_rate": 1.5850505973732077e-06, "loss": 0.5714, "step": 11793 }, { "epoch": 0.75, "grad_norm": 0.8897790908813477, "learning_rate": 1.5843012549299131e-06, "loss": 0.5668, "step": 11794 }, { "epoch": 0.75, "grad_norm": 0.9130045175552368, "learning_rate": 1.58355205630981e-06, "loss": 0.6087, "step": 11795 }, { "epoch": 0.75, "grad_norm": 0.8878775835037231, "learning_rate": 1.5828030015444451e-06, "loss": 0.5306, "step": 11796 }, { "epoch": 0.75, "grad_norm": 0.8759022951126099, "learning_rate": 1.5820540906653581e-06, "loss": 0.6105, "step": 11797 }, { "epoch": 0.75, "grad_norm": 0.8387483358383179, "learning_rate": 1.5813053237040849e-06, "loss": 0.565, "step": 11798 }, { "epoch": 0.75, "grad_norm": 0.8774323463439941, "learning_rate": 1.580556700692148e-06, "loss": 0.5536, "step": 11799 }, { "epoch": 0.75, "grad_norm": 0.9281049370765686, "learning_rate": 1.5798082216610766e-06, "loss": 0.5497, "step": 11800 }, { "epoch": 0.75, "grad_norm": 0.8957639932632446, "learning_rate": 1.5790598866423818e-06, "loss": 0.6225, "step": 11801 }, { "epoch": 0.75, "grad_norm": 0.8487939834594727, "learning_rate": 1.5783116956675742e-06, "loss": 0.5618, "step": 11802 }, { "epoch": 0.75, "grad_norm": 0.8974397778511047, "learning_rate": 1.5775636487681579e-06, "loss": 0.5703, "step": 11803 }, { "epoch": 0.75, "grad_norm": 0.9455395340919495, "learning_rate": 1.5768157459756307e-06, "loss": 0.6104, "step": 11804 }, { "epoch": 0.75, "grad_norm": 0.9057279825210571, "learning_rate": 1.576067987321484e-06, "loss": 0.553, "step": 11805 }, { "epoch": 0.75, "grad_norm": 0.9172567129135132, "learning_rate": 1.5753203728372052e-06, "loss": 0.5335, "step": 11806 }, { "epoch": 0.75, "grad_norm": 0.8080207705497742, "learning_rate": 1.5745729025542684e-06, "loss": 0.5155, "step": 11807 }, { "epoch": 0.75, "grad_norm": 0.9321126937866211, "learning_rate": 1.5738255765041537e-06, "loss": 0.6143, "step": 11808 }, { "epoch": 0.75, "grad_norm": 0.8731662631034851, "learning_rate": 1.5730783947183237e-06, "loss": 0.5733, "step": 11809 }, { "epoch": 0.75, "grad_norm": 0.8786374926567078, "learning_rate": 1.5723313572282412e-06, "loss": 0.5162, "step": 11810 }, { "epoch": 0.75, "grad_norm": 0.9051015973091125, "learning_rate": 1.5715844640653627e-06, "loss": 0.5836, "step": 11811 }, { "epoch": 0.75, "grad_norm": 0.9382368922233582, "learning_rate": 1.5708377152611326e-06, "loss": 0.5949, "step": 11812 }, { "epoch": 0.75, "grad_norm": 0.8949106931686401, "learning_rate": 1.5700911108469986e-06, "loss": 0.5708, "step": 11813 }, { "epoch": 0.75, "grad_norm": 0.925713837146759, "learning_rate": 1.569344650854398e-06, "loss": 0.5473, "step": 11814 }, { "epoch": 0.75, "grad_norm": 0.8979496955871582, "learning_rate": 1.5685983353147582e-06, "loss": 0.578, "step": 11815 }, { "epoch": 0.75, "grad_norm": 0.8621270060539246, "learning_rate": 1.5678521642595052e-06, "loss": 0.5517, "step": 11816 }, { "epoch": 0.75, "grad_norm": 0.8650081157684326, "learning_rate": 1.567106137720058e-06, "loss": 0.507, "step": 11817 }, { "epoch": 0.75, "grad_norm": 0.9151085615158081, "learning_rate": 1.5663602557278297e-06, "loss": 0.565, "step": 11818 }, { "epoch": 0.75, "grad_norm": 0.8362554311752319, "learning_rate": 1.5656145183142274e-06, "loss": 0.5517, "step": 11819 }, { "epoch": 0.75, "grad_norm": 0.9083791971206665, "learning_rate": 1.5648689255106474e-06, "loss": 0.5975, "step": 11820 }, { "epoch": 0.75, "grad_norm": 0.9755656123161316, "learning_rate": 1.5641234773484887e-06, "loss": 0.5784, "step": 11821 }, { "epoch": 0.75, "grad_norm": 0.8774923086166382, "learning_rate": 1.5633781738591392e-06, "loss": 0.5766, "step": 11822 }, { "epoch": 0.75, "grad_norm": 0.9168820977210999, "learning_rate": 1.5626330150739776e-06, "loss": 0.616, "step": 11823 }, { "epoch": 0.75, "grad_norm": 0.8971782326698303, "learning_rate": 1.5618880010243831e-06, "loss": 0.5556, "step": 11824 }, { "epoch": 0.75, "grad_norm": 0.9524270296096802, "learning_rate": 1.5611431317417235e-06, "loss": 0.5918, "step": 11825 }, { "epoch": 0.75, "grad_norm": 0.9112175107002258, "learning_rate": 1.5603984072573648e-06, "loss": 0.5589, "step": 11826 }, { "epoch": 0.75, "grad_norm": 0.856706440448761, "learning_rate": 1.5596538276026641e-06, "loss": 0.5309, "step": 11827 }, { "epoch": 0.75, "grad_norm": 0.8865464329719543, "learning_rate": 1.5589093928089715e-06, "loss": 0.5807, "step": 11828 }, { "epoch": 0.75, "grad_norm": 0.8657694458961487, "learning_rate": 1.5581651029076322e-06, "loss": 0.5807, "step": 11829 }, { "epoch": 0.75, "grad_norm": 0.9261035919189453, "learning_rate": 1.5574209579299903e-06, "loss": 0.5876, "step": 11830 }, { "epoch": 0.75, "grad_norm": 0.918413519859314, "learning_rate": 1.5566769579073747e-06, "loss": 0.5667, "step": 11831 }, { "epoch": 0.75, "grad_norm": 0.9813733696937561, "learning_rate": 1.555933102871114e-06, "loss": 0.5733, "step": 11832 }, { "epoch": 0.75, "grad_norm": 0.9484089016914368, "learning_rate": 1.5551893928525285e-06, "loss": 0.6259, "step": 11833 }, { "epoch": 0.75, "grad_norm": 0.9082149267196655, "learning_rate": 1.5544458278829344e-06, "loss": 0.6183, "step": 11834 }, { "epoch": 0.75, "grad_norm": 0.9003174304962158, "learning_rate": 1.5537024079936425e-06, "loss": 0.5506, "step": 11835 }, { "epoch": 0.75, "grad_norm": 0.9653313755989075, "learning_rate": 1.5529591332159511e-06, "loss": 0.6133, "step": 11836 }, { "epoch": 0.75, "grad_norm": 0.9120768904685974, "learning_rate": 1.5522160035811578e-06, "loss": 0.5768, "step": 11837 }, { "epoch": 0.75, "grad_norm": 0.8551223278045654, "learning_rate": 1.551473019120558e-06, "loss": 0.5806, "step": 11838 }, { "epoch": 0.75, "grad_norm": 0.9125446677207947, "learning_rate": 1.5507301798654313e-06, "loss": 0.5718, "step": 11839 }, { "epoch": 0.75, "grad_norm": 0.8963059782981873, "learning_rate": 1.549987485847057e-06, "loss": 0.556, "step": 11840 }, { "epoch": 0.75, "grad_norm": 0.8334496021270752, "learning_rate": 1.54924493709671e-06, "loss": 0.5308, "step": 11841 }, { "epoch": 0.75, "grad_norm": 0.8776934742927551, "learning_rate": 1.5485025336456511e-06, "loss": 0.643, "step": 11842 }, { "epoch": 0.75, "grad_norm": 0.8814354538917542, "learning_rate": 1.547760275525147e-06, "loss": 0.5543, "step": 11843 }, { "epoch": 0.75, "grad_norm": 0.8887062072753906, "learning_rate": 1.547018162766446e-06, "loss": 0.6306, "step": 11844 }, { "epoch": 0.75, "grad_norm": 0.9002584218978882, "learning_rate": 1.5462761954007987e-06, "loss": 0.5831, "step": 11845 }, { "epoch": 0.75, "grad_norm": 0.9003365635871887, "learning_rate": 1.5455343734594463e-06, "loss": 0.5889, "step": 11846 }, { "epoch": 0.75, "grad_norm": 0.8967679142951965, "learning_rate": 1.5447926969736237e-06, "loss": 0.599, "step": 11847 }, { "epoch": 0.75, "grad_norm": 0.9296191334724426, "learning_rate": 1.5440511659745611e-06, "loss": 0.5842, "step": 11848 }, { "epoch": 0.75, "grad_norm": 0.903057873249054, "learning_rate": 1.5433097804934833e-06, "loss": 0.5436, "step": 11849 }, { "epoch": 0.75, "grad_norm": 0.9655782580375671, "learning_rate": 1.5425685405616026e-06, "loss": 0.603, "step": 11850 }, { "epoch": 0.75, "grad_norm": 0.9100805521011353, "learning_rate": 1.5418274462101358e-06, "loss": 0.5269, "step": 11851 }, { "epoch": 0.75, "grad_norm": 0.9555662274360657, "learning_rate": 1.541086497470284e-06, "loss": 0.5998, "step": 11852 }, { "epoch": 0.75, "grad_norm": 0.9616516828536987, "learning_rate": 1.540345694373247e-06, "loss": 0.5653, "step": 11853 }, { "epoch": 0.75, "grad_norm": 0.8906912207603455, "learning_rate": 1.5396050369502175e-06, "loss": 0.5835, "step": 11854 }, { "epoch": 0.75, "grad_norm": 0.9743589162826538, "learning_rate": 1.538864525232382e-06, "loss": 0.5809, "step": 11855 }, { "epoch": 0.75, "grad_norm": 0.9580129981040955, "learning_rate": 1.538124159250921e-06, "loss": 0.5397, "step": 11856 }, { "epoch": 0.75, "grad_norm": 0.9036690592765808, "learning_rate": 1.5373839390370098e-06, "loss": 0.6106, "step": 11857 }, { "epoch": 0.75, "grad_norm": 0.9009885191917419, "learning_rate": 1.5366438646218146e-06, "loss": 0.5968, "step": 11858 }, { "epoch": 0.75, "grad_norm": 0.8845691084861755, "learning_rate": 1.5359039360364975e-06, "loss": 0.5573, "step": 11859 }, { "epoch": 0.75, "grad_norm": 0.8430015444755554, "learning_rate": 1.5351641533122153e-06, "loss": 0.5701, "step": 11860 }, { "epoch": 0.75, "grad_norm": 0.8827622532844543, "learning_rate": 1.5344245164801174e-06, "loss": 0.5712, "step": 11861 }, { "epoch": 0.75, "grad_norm": 0.8615781664848328, "learning_rate": 1.533685025571347e-06, "loss": 0.5173, "step": 11862 }, { "epoch": 0.75, "grad_norm": 0.8814289569854736, "learning_rate": 1.5329456806170418e-06, "loss": 0.6011, "step": 11863 }, { "epoch": 0.75, "grad_norm": 0.9278409481048584, "learning_rate": 1.5322064816483328e-06, "loss": 0.6135, "step": 11864 }, { "epoch": 0.75, "grad_norm": 0.875895082950592, "learning_rate": 1.5314674286963471e-06, "loss": 0.6038, "step": 11865 }, { "epoch": 0.75, "grad_norm": 0.9008811712265015, "learning_rate": 1.5307285217922003e-06, "loss": 0.6005, "step": 11866 }, { "epoch": 0.75, "grad_norm": 0.8702130913734436, "learning_rate": 1.529989760967005e-06, "loss": 0.5811, "step": 11867 }, { "epoch": 0.75, "grad_norm": 0.8915956616401672, "learning_rate": 1.5292511462518728e-06, "loss": 0.5713, "step": 11868 }, { "epoch": 0.75, "grad_norm": 0.8408598899841309, "learning_rate": 1.528512677677899e-06, "loss": 0.5299, "step": 11869 }, { "epoch": 0.75, "grad_norm": 0.8335807919502258, "learning_rate": 1.5277743552761809e-06, "loss": 0.5533, "step": 11870 }, { "epoch": 0.75, "grad_norm": 0.8974030613899231, "learning_rate": 1.5270361790778065e-06, "loss": 0.5777, "step": 11871 }, { "epoch": 0.75, "grad_norm": 0.8503268957138062, "learning_rate": 1.526298149113854e-06, "loss": 0.5747, "step": 11872 }, { "epoch": 0.75, "grad_norm": 0.9573015570640564, "learning_rate": 1.5255602654154055e-06, "loss": 0.5811, "step": 11873 }, { "epoch": 0.75, "grad_norm": 0.9180850386619568, "learning_rate": 1.5248225280135258e-06, "loss": 0.5726, "step": 11874 }, { "epoch": 0.75, "grad_norm": 0.8586685657501221, "learning_rate": 1.5240849369392807e-06, "loss": 0.5612, "step": 11875 }, { "epoch": 0.75, "grad_norm": 0.9390682578086853, "learning_rate": 1.5233474922237268e-06, "loss": 0.5809, "step": 11876 }, { "epoch": 0.75, "grad_norm": 0.8708896636962891, "learning_rate": 1.5226101938979153e-06, "loss": 0.5575, "step": 11877 }, { "epoch": 0.75, "grad_norm": 0.89445960521698, "learning_rate": 1.5218730419928917e-06, "loss": 0.5099, "step": 11878 }, { "epoch": 0.75, "grad_norm": 0.884432315826416, "learning_rate": 1.5211360365396972e-06, "loss": 0.6021, "step": 11879 }, { "epoch": 0.75, "grad_norm": 0.9297842979431152, "learning_rate": 1.5203991775693577e-06, "loss": 0.6123, "step": 11880 }, { "epoch": 0.75, "grad_norm": 0.9308014512062073, "learning_rate": 1.5196624651129084e-06, "loss": 0.5901, "step": 11881 }, { "epoch": 0.75, "grad_norm": 0.9186192154884338, "learning_rate": 1.5189258992013635e-06, "loss": 0.5711, "step": 11882 }, { "epoch": 0.75, "grad_norm": 0.9167851209640503, "learning_rate": 1.5181894798657388e-06, "loss": 0.5695, "step": 11883 }, { "epoch": 0.75, "grad_norm": 0.878422200679779, "learning_rate": 1.517453207137043e-06, "loss": 0.5535, "step": 11884 }, { "epoch": 0.75, "grad_norm": 0.8583741784095764, "learning_rate": 1.5167170810462777e-06, "loss": 0.5945, "step": 11885 }, { "epoch": 0.75, "grad_norm": 0.8769426345825195, "learning_rate": 1.5159811016244392e-06, "loss": 0.5756, "step": 11886 }, { "epoch": 0.75, "grad_norm": 0.9809277057647705, "learning_rate": 1.5152452689025176e-06, "loss": 0.5608, "step": 11887 }, { "epoch": 0.75, "grad_norm": 0.8417267203330994, "learning_rate": 1.5145095829114937e-06, "loss": 0.56, "step": 11888 }, { "epoch": 0.75, "grad_norm": 0.8778293132781982, "learning_rate": 1.5137740436823462e-06, "loss": 0.5685, "step": 11889 }, { "epoch": 0.75, "grad_norm": 0.9264646768569946, "learning_rate": 1.5130386512460454e-06, "loss": 0.6045, "step": 11890 }, { "epoch": 0.75, "grad_norm": 0.8687536120414734, "learning_rate": 1.5123034056335572e-06, "loss": 0.572, "step": 11891 }, { "epoch": 0.75, "grad_norm": 0.8998939394950867, "learning_rate": 1.5115683068758419e-06, "loss": 0.5886, "step": 11892 }, { "epoch": 0.75, "grad_norm": 0.9059341549873352, "learning_rate": 1.5108333550038461e-06, "loss": 0.581, "step": 11893 }, { "epoch": 0.75, "grad_norm": 0.9004920721054077, "learning_rate": 1.510098550048521e-06, "loss": 0.5742, "step": 11894 }, { "epoch": 0.75, "grad_norm": 0.8428323268890381, "learning_rate": 1.5093638920408077e-06, "loss": 0.5403, "step": 11895 }, { "epoch": 0.75, "grad_norm": 0.8726648688316345, "learning_rate": 1.508629381011636e-06, "loss": 0.5946, "step": 11896 }, { "epoch": 0.75, "grad_norm": 0.8611435294151306, "learning_rate": 1.507895016991936e-06, "loss": 0.5453, "step": 11897 }, { "epoch": 0.75, "grad_norm": 0.9121397137641907, "learning_rate": 1.507160800012628e-06, "loss": 0.5912, "step": 11898 }, { "epoch": 0.75, "grad_norm": 0.979377031326294, "learning_rate": 1.5064267301046281e-06, "loss": 0.5991, "step": 11899 }, { "epoch": 0.75, "grad_norm": 0.9422827363014221, "learning_rate": 1.5056928072988475e-06, "loss": 0.5693, "step": 11900 }, { "epoch": 0.75, "grad_norm": 0.9085085391998291, "learning_rate": 1.504959031626183e-06, "loss": 0.6133, "step": 11901 }, { "epoch": 0.75, "grad_norm": 0.9609709978103638, "learning_rate": 1.5042254031175373e-06, "loss": 0.6043, "step": 11902 }, { "epoch": 0.75, "grad_norm": 0.8676922917366028, "learning_rate": 1.5034919218038007e-06, "loss": 0.5184, "step": 11903 }, { "epoch": 0.75, "grad_norm": 0.8704044818878174, "learning_rate": 1.502758587715854e-06, "loss": 0.5492, "step": 11904 }, { "epoch": 0.75, "grad_norm": 0.9572499990463257, "learning_rate": 1.5020254008845775e-06, "loss": 0.6057, "step": 11905 }, { "epoch": 0.75, "grad_norm": 0.895881712436676, "learning_rate": 1.501292361340842e-06, "loss": 0.5942, "step": 11906 }, { "epoch": 0.75, "grad_norm": 0.8465459942817688, "learning_rate": 1.500559469115515e-06, "loss": 0.5872, "step": 11907 }, { "epoch": 0.75, "grad_norm": 0.9170262217521667, "learning_rate": 1.499826724239456e-06, "loss": 0.5747, "step": 11908 }, { "epoch": 0.75, "grad_norm": 0.8896523118019104, "learning_rate": 1.499094126743516e-06, "loss": 0.624, "step": 11909 }, { "epoch": 0.75, "grad_norm": 0.8543857336044312, "learning_rate": 1.4983616766585423e-06, "loss": 0.5329, "step": 11910 }, { "epoch": 0.75, "grad_norm": 0.9327712655067444, "learning_rate": 1.4976293740153803e-06, "loss": 0.6734, "step": 11911 }, { "epoch": 0.75, "grad_norm": 0.9487749338150024, "learning_rate": 1.4968972188448593e-06, "loss": 0.551, "step": 11912 }, { "epoch": 0.75, "grad_norm": 0.8792836666107178, "learning_rate": 1.4961652111778103e-06, "loss": 0.5467, "step": 11913 }, { "epoch": 0.75, "grad_norm": 0.873479962348938, "learning_rate": 1.4954333510450552e-06, "loss": 0.5726, "step": 11914 }, { "epoch": 0.75, "grad_norm": 0.8478526473045349, "learning_rate": 1.4947016384774105e-06, "loss": 0.5231, "step": 11915 }, { "epoch": 0.75, "grad_norm": 0.8277620673179626, "learning_rate": 1.4939700735056873e-06, "loss": 0.5512, "step": 11916 }, { "epoch": 0.76, "grad_norm": 0.9055673480033875, "learning_rate": 1.493238656160686e-06, "loss": 0.6138, "step": 11917 }, { "epoch": 0.76, "grad_norm": 0.8501646518707275, "learning_rate": 1.492507386473206e-06, "loss": 0.5596, "step": 11918 }, { "epoch": 0.76, "grad_norm": 0.9250453114509583, "learning_rate": 1.4917762644740381e-06, "loss": 0.5815, "step": 11919 }, { "epoch": 0.76, "grad_norm": 0.8276757597923279, "learning_rate": 1.4910452901939671e-06, "loss": 0.525, "step": 11920 }, { "epoch": 0.76, "grad_norm": 0.8669492602348328, "learning_rate": 1.4903144636637723e-06, "loss": 0.5612, "step": 11921 }, { "epoch": 0.76, "grad_norm": 0.9456034302711487, "learning_rate": 1.489583784914228e-06, "loss": 0.5591, "step": 11922 }, { "epoch": 0.76, "grad_norm": 0.971352756023407, "learning_rate": 1.4888532539760958e-06, "loss": 0.6341, "step": 11923 }, { "epoch": 0.76, "grad_norm": 0.8353976607322693, "learning_rate": 1.4881228708801409e-06, "loss": 0.5194, "step": 11924 }, { "epoch": 0.76, "grad_norm": 0.8877169489860535, "learning_rate": 1.4873926356571144e-06, "loss": 0.5992, "step": 11925 }, { "epoch": 0.76, "grad_norm": 0.9067363142967224, "learning_rate": 1.486662548337764e-06, "loss": 0.6173, "step": 11926 }, { "epoch": 0.76, "grad_norm": 0.891920268535614, "learning_rate": 1.485932608952832e-06, "loss": 0.5366, "step": 11927 }, { "epoch": 0.76, "grad_norm": 0.8979024291038513, "learning_rate": 1.485202817533053e-06, "loss": 0.5402, "step": 11928 }, { "epoch": 0.76, "grad_norm": 0.9207996129989624, "learning_rate": 1.4844731741091561e-06, "loss": 0.5968, "step": 11929 }, { "epoch": 0.76, "grad_norm": 0.9184231162071228, "learning_rate": 1.4837436787118665e-06, "loss": 0.5759, "step": 11930 }, { "epoch": 0.76, "grad_norm": 0.851150393486023, "learning_rate": 1.4830143313718943e-06, "loss": 0.5953, "step": 11931 }, { "epoch": 0.76, "grad_norm": 0.8827084898948669, "learning_rate": 1.482285132119956e-06, "loss": 0.5872, "step": 11932 }, { "epoch": 0.76, "grad_norm": 0.840436577796936, "learning_rate": 1.4815560809867551e-06, "loss": 0.5918, "step": 11933 }, { "epoch": 0.76, "grad_norm": 0.903814971446991, "learning_rate": 1.4808271780029864e-06, "loss": 0.5741, "step": 11934 }, { "epoch": 0.76, "grad_norm": 0.8789491057395935, "learning_rate": 1.4800984231993432e-06, "loss": 0.5188, "step": 11935 }, { "epoch": 0.76, "grad_norm": 0.9306246638298035, "learning_rate": 1.47936981660651e-06, "loss": 0.5841, "step": 11936 }, { "epoch": 0.76, "grad_norm": 0.9300113916397095, "learning_rate": 1.4786413582551668e-06, "loss": 0.6367, "step": 11937 }, { "epoch": 0.76, "grad_norm": 0.8994358777999878, "learning_rate": 1.4779130481759874e-06, "loss": 0.5961, "step": 11938 }, { "epoch": 0.76, "grad_norm": 0.872204065322876, "learning_rate": 1.4771848863996353e-06, "loss": 0.6409, "step": 11939 }, { "epoch": 0.76, "grad_norm": 0.897193431854248, "learning_rate": 1.4764568729567714e-06, "loss": 0.5399, "step": 11940 }, { "epoch": 0.76, "grad_norm": 0.8492984771728516, "learning_rate": 1.4757290078780545e-06, "loss": 0.5339, "step": 11941 }, { "epoch": 0.76, "grad_norm": 0.942596971988678, "learning_rate": 1.475001291194127e-06, "loss": 0.6122, "step": 11942 }, { "epoch": 0.76, "grad_norm": 0.9129643440246582, "learning_rate": 1.4742737229356324e-06, "loss": 0.6431, "step": 11943 }, { "epoch": 0.76, "grad_norm": 0.9358230829238892, "learning_rate": 1.473546303133207e-06, "loss": 0.6236, "step": 11944 }, { "epoch": 0.76, "grad_norm": 0.8448783159255981, "learning_rate": 1.4728190318174785e-06, "loss": 0.5691, "step": 11945 }, { "epoch": 0.76, "grad_norm": 0.8805672526359558, "learning_rate": 1.4720919090190723e-06, "loss": 0.5347, "step": 11946 }, { "epoch": 0.76, "grad_norm": 0.9884516596794128, "learning_rate": 1.471364934768601e-06, "loss": 0.5915, "step": 11947 }, { "epoch": 0.76, "grad_norm": 0.9252734780311584, "learning_rate": 1.470638109096676e-06, "loss": 0.5798, "step": 11948 }, { "epoch": 0.76, "grad_norm": 0.8685827851295471, "learning_rate": 1.469911432033906e-06, "loss": 0.5078, "step": 11949 }, { "epoch": 0.76, "grad_norm": 0.8583547472953796, "learning_rate": 1.469184903610883e-06, "loss": 0.5571, "step": 11950 }, { "epoch": 0.76, "grad_norm": 0.9142690300941467, "learning_rate": 1.468458523858201e-06, "loss": 0.5576, "step": 11951 }, { "epoch": 0.76, "grad_norm": 0.8868157267570496, "learning_rate": 1.467732292806447e-06, "loss": 0.6154, "step": 11952 }, { "epoch": 0.76, "grad_norm": 0.9219274520874023, "learning_rate": 1.4670062104861948e-06, "loss": 0.5348, "step": 11953 }, { "epoch": 0.76, "grad_norm": 0.8708443641662598, "learning_rate": 1.4662802769280244e-06, "loss": 0.5806, "step": 11954 }, { "epoch": 0.76, "grad_norm": 0.884825587272644, "learning_rate": 1.4655544921624964e-06, "loss": 0.5741, "step": 11955 }, { "epoch": 0.76, "grad_norm": 0.9130429625511169, "learning_rate": 1.464828856220174e-06, "loss": 0.5545, "step": 11956 }, { "epoch": 0.76, "grad_norm": 0.9295893907546997, "learning_rate": 1.4641033691316104e-06, "loss": 0.6212, "step": 11957 }, { "epoch": 0.76, "grad_norm": 0.8640606999397278, "learning_rate": 1.4633780309273532e-06, "loss": 0.5754, "step": 11958 }, { "epoch": 0.76, "grad_norm": 0.8814523220062256, "learning_rate": 1.4626528416379438e-06, "loss": 0.5948, "step": 11959 }, { "epoch": 0.76, "grad_norm": 0.91228848695755, "learning_rate": 1.4619278012939197e-06, "loss": 0.5684, "step": 11960 }, { "epoch": 0.76, "grad_norm": 0.8493878245353699, "learning_rate": 1.4612029099258046e-06, "loss": 0.5311, "step": 11961 }, { "epoch": 0.76, "grad_norm": 0.8996723890304565, "learning_rate": 1.4604781675641273e-06, "loss": 0.5628, "step": 11962 }, { "epoch": 0.76, "grad_norm": 0.9304405450820923, "learning_rate": 1.4597535742393998e-06, "loss": 0.5539, "step": 11963 }, { "epoch": 0.76, "grad_norm": 0.8904679417610168, "learning_rate": 1.459029129982134e-06, "loss": 0.5766, "step": 11964 }, { "epoch": 0.76, "grad_norm": 0.9363497495651245, "learning_rate": 1.4583048348228345e-06, "loss": 0.5826, "step": 11965 }, { "epoch": 0.76, "grad_norm": 0.8508997559547424, "learning_rate": 1.4575806887919951e-06, "loss": 0.5896, "step": 11966 }, { "epoch": 0.76, "grad_norm": 0.8754972219467163, "learning_rate": 1.456856691920111e-06, "loss": 0.579, "step": 11967 }, { "epoch": 0.76, "grad_norm": 0.9017912745475769, "learning_rate": 1.4561328442376678e-06, "loss": 0.5713, "step": 11968 }, { "epoch": 0.76, "grad_norm": 0.8693665862083435, "learning_rate": 1.45540914577514e-06, "loss": 0.5654, "step": 11969 }, { "epoch": 0.76, "grad_norm": 0.9070661664009094, "learning_rate": 1.454685596563003e-06, "loss": 0.5468, "step": 11970 }, { "epoch": 0.76, "grad_norm": 0.8635410666465759, "learning_rate": 1.4539621966317219e-06, "loss": 0.5684, "step": 11971 }, { "epoch": 0.76, "grad_norm": 0.8574047088623047, "learning_rate": 1.4532389460117574e-06, "loss": 0.5417, "step": 11972 }, { "epoch": 0.76, "grad_norm": 0.9325186610221863, "learning_rate": 1.4525158447335635e-06, "loss": 0.5479, "step": 11973 }, { "epoch": 0.76, "grad_norm": 0.8873754143714905, "learning_rate": 1.4517928928275843e-06, "loss": 0.5812, "step": 11974 }, { "epoch": 0.76, "grad_norm": 0.8907727599143982, "learning_rate": 1.4510700903242642e-06, "loss": 0.5342, "step": 11975 }, { "epoch": 0.76, "grad_norm": 0.8830631375312805, "learning_rate": 1.4503474372540382e-06, "loss": 0.6237, "step": 11976 }, { "epoch": 0.76, "grad_norm": 0.9112756252288818, "learning_rate": 1.4496249336473318e-06, "loss": 0.5756, "step": 11977 }, { "epoch": 0.76, "grad_norm": 0.9188751578330994, "learning_rate": 1.4489025795345686e-06, "loss": 0.5752, "step": 11978 }, { "epoch": 0.76, "grad_norm": 0.915205717086792, "learning_rate": 1.4481803749461643e-06, "loss": 0.5848, "step": 11979 }, { "epoch": 0.76, "grad_norm": 0.8783239722251892, "learning_rate": 1.4474583199125285e-06, "loss": 0.5691, "step": 11980 }, { "epoch": 0.76, "grad_norm": 0.8469722270965576, "learning_rate": 1.446736414464066e-06, "loss": 0.5367, "step": 11981 }, { "epoch": 0.76, "grad_norm": 0.855970025062561, "learning_rate": 1.4460146586311713e-06, "loss": 0.5862, "step": 11982 }, { "epoch": 0.76, "grad_norm": 0.9022277593612671, "learning_rate": 1.4452930524442338e-06, "loss": 0.5992, "step": 11983 }, { "epoch": 0.76, "grad_norm": 0.856157124042511, "learning_rate": 1.4445715959336432e-06, "loss": 0.5505, "step": 11984 }, { "epoch": 0.76, "grad_norm": 0.8723897933959961, "learning_rate": 1.4438502891297723e-06, "loss": 0.5493, "step": 11985 }, { "epoch": 0.76, "grad_norm": 0.8988958597183228, "learning_rate": 1.4431291320629953e-06, "loss": 0.5577, "step": 11986 }, { "epoch": 0.76, "grad_norm": 0.9534813165664673, "learning_rate": 1.4424081247636768e-06, "loss": 0.575, "step": 11987 }, { "epoch": 0.76, "grad_norm": 0.886587917804718, "learning_rate": 1.4416872672621762e-06, "loss": 0.6182, "step": 11988 }, { "epoch": 0.76, "grad_norm": 0.9059985280036926, "learning_rate": 1.440966559588846e-06, "loss": 0.5644, "step": 11989 }, { "epoch": 0.76, "grad_norm": 0.8605340719223022, "learning_rate": 1.4402460017740355e-06, "loss": 0.5692, "step": 11990 }, { "epoch": 0.76, "grad_norm": 0.8864085674285889, "learning_rate": 1.4395255938480785e-06, "loss": 0.5363, "step": 11991 }, { "epoch": 0.76, "grad_norm": 0.9561933875083923, "learning_rate": 1.4388053358413162e-06, "loss": 0.5968, "step": 11992 }, { "epoch": 0.76, "grad_norm": 0.8334751129150391, "learning_rate": 1.4380852277840712e-06, "loss": 0.5657, "step": 11993 }, { "epoch": 0.76, "grad_norm": 0.878257155418396, "learning_rate": 1.437365269706666e-06, "loss": 0.5811, "step": 11994 }, { "epoch": 0.76, "grad_norm": 0.8865748047828674, "learning_rate": 1.436645461639416e-06, "loss": 0.5639, "step": 11995 }, { "epoch": 0.76, "grad_norm": 0.9328632354736328, "learning_rate": 1.4359258036126295e-06, "loss": 0.5744, "step": 11996 }, { "epoch": 0.76, "grad_norm": 0.8493953943252563, "learning_rate": 1.4352062956566088e-06, "loss": 0.5575, "step": 11997 }, { "epoch": 0.76, "grad_norm": 0.8793647289276123, "learning_rate": 1.4344869378016518e-06, "loss": 0.5454, "step": 11998 }, { "epoch": 0.76, "grad_norm": 0.8968666195869446, "learning_rate": 1.4337677300780445e-06, "loss": 0.6076, "step": 11999 }, { "epoch": 0.76, "grad_norm": 0.8650113940238953, "learning_rate": 1.433048672516072e-06, "loss": 0.601, "step": 12000 }, { "epoch": 0.76, "grad_norm": 0.8847838044166565, "learning_rate": 1.4323297651460117e-06, "loss": 0.5885, "step": 12001 }, { "epoch": 0.76, "grad_norm": 0.9786666035652161, "learning_rate": 1.4316110079981339e-06, "loss": 0.5909, "step": 12002 }, { "epoch": 0.76, "grad_norm": 0.8795079588890076, "learning_rate": 1.4308924011027042e-06, "loss": 0.593, "step": 12003 }, { "epoch": 0.76, "grad_norm": 0.9203742146492004, "learning_rate": 1.430173944489977e-06, "loss": 0.576, "step": 12004 }, { "epoch": 0.76, "grad_norm": 0.8110754489898682, "learning_rate": 1.4294556381902074e-06, "loss": 0.5006, "step": 12005 }, { "epoch": 0.76, "grad_norm": 0.90155029296875, "learning_rate": 1.428737482233642e-06, "loss": 0.5475, "step": 12006 }, { "epoch": 0.76, "grad_norm": 0.8908960819244385, "learning_rate": 1.4280194766505156e-06, "loss": 0.5745, "step": 12007 }, { "epoch": 0.76, "grad_norm": 0.9284471869468689, "learning_rate": 1.427301621471064e-06, "loss": 0.6061, "step": 12008 }, { "epoch": 0.76, "grad_norm": 0.9098774790763855, "learning_rate": 1.4265839167255114e-06, "loss": 0.6325, "step": 12009 }, { "epoch": 0.76, "grad_norm": 0.8448832035064697, "learning_rate": 1.42586636244408e-06, "loss": 0.5629, "step": 12010 }, { "epoch": 0.76, "grad_norm": 0.898383378982544, "learning_rate": 1.4251489586569834e-06, "loss": 0.6486, "step": 12011 }, { "epoch": 0.76, "grad_norm": 0.915291428565979, "learning_rate": 1.4244317053944268e-06, "loss": 0.5692, "step": 12012 }, { "epoch": 0.76, "grad_norm": 0.8258968591690063, "learning_rate": 1.423714602686611e-06, "loss": 0.5649, "step": 12013 }, { "epoch": 0.76, "grad_norm": 0.8475250601768494, "learning_rate": 1.4229976505637361e-06, "loss": 0.5672, "step": 12014 }, { "epoch": 0.76, "grad_norm": 0.8996663689613342, "learning_rate": 1.4222808490559842e-06, "loss": 0.6316, "step": 12015 }, { "epoch": 0.76, "grad_norm": 0.8408166766166687, "learning_rate": 1.4215641981935403e-06, "loss": 0.4919, "step": 12016 }, { "epoch": 0.76, "grad_norm": 0.9476755261421204, "learning_rate": 1.4208476980065794e-06, "loss": 0.5853, "step": 12017 }, { "epoch": 0.76, "grad_norm": 0.8874031901359558, "learning_rate": 1.420131348525271e-06, "loss": 0.5749, "step": 12018 }, { "epoch": 0.76, "grad_norm": 0.8719558119773865, "learning_rate": 1.4194151497797793e-06, "loss": 0.6041, "step": 12019 }, { "epoch": 0.76, "grad_norm": 0.9502779841423035, "learning_rate": 1.4186991018002582e-06, "loss": 0.5715, "step": 12020 }, { "epoch": 0.76, "grad_norm": 0.8630761504173279, "learning_rate": 1.4179832046168584e-06, "loss": 0.6021, "step": 12021 }, { "epoch": 0.76, "grad_norm": 0.922612190246582, "learning_rate": 1.417267458259728e-06, "loss": 0.5775, "step": 12022 }, { "epoch": 0.76, "grad_norm": 0.8538753390312195, "learning_rate": 1.4165518627589991e-06, "loss": 0.5034, "step": 12023 }, { "epoch": 0.76, "grad_norm": 0.9272584915161133, "learning_rate": 1.4158364181448065e-06, "loss": 0.5819, "step": 12024 }, { "epoch": 0.76, "grad_norm": 0.9062113761901855, "learning_rate": 1.4151211244472734e-06, "loss": 0.6303, "step": 12025 }, { "epoch": 0.76, "grad_norm": 1.0064603090286255, "learning_rate": 1.414405981696519e-06, "loss": 0.5895, "step": 12026 }, { "epoch": 0.76, "grad_norm": 0.8894251585006714, "learning_rate": 1.4136909899226564e-06, "loss": 0.6, "step": 12027 }, { "epoch": 0.76, "grad_norm": 0.8731244206428528, "learning_rate": 1.412976149155789e-06, "loss": 0.5272, "step": 12028 }, { "epoch": 0.76, "grad_norm": 0.9041507840156555, "learning_rate": 1.412261459426018e-06, "loss": 0.5635, "step": 12029 }, { "epoch": 0.76, "grad_norm": 0.9179893136024475, "learning_rate": 1.4115469207634358e-06, "loss": 0.6162, "step": 12030 }, { "epoch": 0.76, "grad_norm": 0.9017035365104675, "learning_rate": 1.4108325331981298e-06, "loss": 0.5674, "step": 12031 }, { "epoch": 0.76, "grad_norm": 0.8612952828407288, "learning_rate": 1.4101182967601796e-06, "loss": 0.6167, "step": 12032 }, { "epoch": 0.76, "grad_norm": 0.8670690059661865, "learning_rate": 1.4094042114796613e-06, "loss": 0.5648, "step": 12033 }, { "epoch": 0.76, "grad_norm": 0.900267481803894, "learning_rate": 1.4086902773866379e-06, "loss": 0.5819, "step": 12034 }, { "epoch": 0.76, "grad_norm": 0.8597514629364014, "learning_rate": 1.4079764945111767e-06, "loss": 0.5914, "step": 12035 }, { "epoch": 0.76, "grad_norm": 0.8889679908752441, "learning_rate": 1.407262862883328e-06, "loss": 0.5411, "step": 12036 }, { "epoch": 0.76, "grad_norm": 0.8739466071128845, "learning_rate": 1.4065493825331416e-06, "loss": 0.5116, "step": 12037 }, { "epoch": 0.76, "grad_norm": 0.9049973487854004, "learning_rate": 1.4058360534906607e-06, "loss": 0.6136, "step": 12038 }, { "epoch": 0.76, "grad_norm": 0.9659464359283447, "learning_rate": 1.4051228757859197e-06, "loss": 0.6078, "step": 12039 }, { "epoch": 0.76, "grad_norm": 0.9126960635185242, "learning_rate": 1.4044098494489494e-06, "loss": 0.5604, "step": 12040 }, { "epoch": 0.76, "grad_norm": 0.8697749972343445, "learning_rate": 1.4036969745097735e-06, "loss": 0.558, "step": 12041 }, { "epoch": 0.76, "grad_norm": 0.8654941320419312, "learning_rate": 1.4029842509984043e-06, "loss": 0.5581, "step": 12042 }, { "epoch": 0.76, "grad_norm": 0.8910323977470398, "learning_rate": 1.4022716789448581e-06, "loss": 0.5612, "step": 12043 }, { "epoch": 0.76, "grad_norm": 0.8871368765830994, "learning_rate": 1.4015592583791343e-06, "loss": 0.5548, "step": 12044 }, { "epoch": 0.76, "grad_norm": 0.8820889592170715, "learning_rate": 1.4008469893312321e-06, "loss": 0.5956, "step": 12045 }, { "epoch": 0.76, "grad_norm": 0.8938196301460266, "learning_rate": 1.4001348718311446e-06, "loss": 0.5659, "step": 12046 }, { "epoch": 0.76, "grad_norm": 0.864248514175415, "learning_rate": 1.399422905908851e-06, "loss": 0.5602, "step": 12047 }, { "epoch": 0.76, "grad_norm": 0.9130455851554871, "learning_rate": 1.3987110915943352e-06, "loss": 0.5987, "step": 12048 }, { "epoch": 0.76, "grad_norm": 0.8632736802101135, "learning_rate": 1.397999428917569e-06, "loss": 0.5656, "step": 12049 }, { "epoch": 0.76, "grad_norm": 0.9414397478103638, "learning_rate": 1.3972879179085147e-06, "loss": 0.5893, "step": 12050 }, { "epoch": 0.76, "grad_norm": 0.9404668211936951, "learning_rate": 1.396576558597133e-06, "loss": 0.608, "step": 12051 }, { "epoch": 0.76, "grad_norm": 0.9048896431922913, "learning_rate": 1.3958653510133774e-06, "loss": 0.5404, "step": 12052 }, { "epoch": 0.76, "grad_norm": 0.9009268879890442, "learning_rate": 1.3951542951871938e-06, "loss": 0.5708, "step": 12053 }, { "epoch": 0.76, "grad_norm": 0.8709206581115723, "learning_rate": 1.3944433911485229e-06, "loss": 0.5458, "step": 12054 }, { "epoch": 0.76, "grad_norm": 0.8912920951843262, "learning_rate": 1.3937326389272977e-06, "loss": 0.6084, "step": 12055 }, { "epoch": 0.76, "grad_norm": 0.8818450570106506, "learning_rate": 1.3930220385534453e-06, "loss": 0.6093, "step": 12056 }, { "epoch": 0.76, "grad_norm": 0.8648407459259033, "learning_rate": 1.3923115900568896e-06, "loss": 0.6086, "step": 12057 }, { "epoch": 0.76, "grad_norm": 0.9007489681243896, "learning_rate": 1.3916012934675405e-06, "loss": 0.5638, "step": 12058 }, { "epoch": 0.76, "grad_norm": 0.8429774641990662, "learning_rate": 1.3908911488153081e-06, "loss": 0.5916, "step": 12059 }, { "epoch": 0.76, "grad_norm": 0.8665831685066223, "learning_rate": 1.3901811561300944e-06, "loss": 0.5793, "step": 12060 }, { "epoch": 0.76, "grad_norm": 0.8880481719970703, "learning_rate": 1.3894713154417944e-06, "loss": 0.567, "step": 12061 }, { "epoch": 0.76, "grad_norm": 0.8977078199386597, "learning_rate": 1.3887616267802972e-06, "loss": 0.5831, "step": 12062 }, { "epoch": 0.76, "grad_norm": 0.8818337917327881, "learning_rate": 1.3880520901754874e-06, "loss": 0.584, "step": 12063 }, { "epoch": 0.76, "grad_norm": 0.9489629864692688, "learning_rate": 1.3873427056572354e-06, "loss": 0.6143, "step": 12064 }, { "epoch": 0.76, "grad_norm": 0.8898563981056213, "learning_rate": 1.386633473255418e-06, "loss": 0.5799, "step": 12065 }, { "epoch": 0.76, "grad_norm": 0.9059821963310242, "learning_rate": 1.3859243929998933e-06, "loss": 0.6092, "step": 12066 }, { "epoch": 0.76, "grad_norm": 0.8613783121109009, "learning_rate": 1.3852154649205201e-06, "loss": 0.5234, "step": 12067 }, { "epoch": 0.76, "grad_norm": 0.9153664112091064, "learning_rate": 1.3845066890471487e-06, "loss": 0.5966, "step": 12068 }, { "epoch": 0.76, "grad_norm": 0.8898327350616455, "learning_rate": 1.3837980654096229e-06, "loss": 0.6014, "step": 12069 }, { "epoch": 0.76, "grad_norm": 0.9291636347770691, "learning_rate": 1.383089594037781e-06, "loss": 0.5429, "step": 12070 }, { "epoch": 0.76, "grad_norm": 0.9276854991912842, "learning_rate": 1.3823812749614556e-06, "loss": 0.5632, "step": 12071 }, { "epoch": 0.76, "grad_norm": 0.9263901114463806, "learning_rate": 1.3816731082104668e-06, "loss": 0.5816, "step": 12072 }, { "epoch": 0.76, "grad_norm": 0.9651497602462769, "learning_rate": 1.3809650938146391e-06, "loss": 0.6368, "step": 12073 }, { "epoch": 0.76, "grad_norm": 0.8885228037834167, "learning_rate": 1.3802572318037804e-06, "loss": 0.5443, "step": 12074 }, { "epoch": 0.77, "grad_norm": 0.8886858820915222, "learning_rate": 1.379549522207697e-06, "loss": 0.5806, "step": 12075 }, { "epoch": 0.77, "grad_norm": 0.9202280640602112, "learning_rate": 1.3788419650561908e-06, "loss": 0.6229, "step": 12076 }, { "epoch": 0.77, "grad_norm": 0.875375509262085, "learning_rate": 1.3781345603790485e-06, "loss": 0.4984, "step": 12077 }, { "epoch": 0.77, "grad_norm": 0.8515941500663757, "learning_rate": 1.3774273082060625e-06, "loss": 0.5577, "step": 12078 }, { "epoch": 0.77, "grad_norm": 0.9056531190872192, "learning_rate": 1.3767202085670118e-06, "loss": 0.6189, "step": 12079 }, { "epoch": 0.77, "grad_norm": 0.877628743648529, "learning_rate": 1.3760132614916672e-06, "loss": 0.5966, "step": 12080 }, { "epoch": 0.77, "grad_norm": 0.9349701404571533, "learning_rate": 1.375306467009797e-06, "loss": 0.64, "step": 12081 }, { "epoch": 0.77, "grad_norm": 0.9644002914428711, "learning_rate": 1.3745998251511622e-06, "loss": 0.5834, "step": 12082 }, { "epoch": 0.77, "grad_norm": 0.8802942037582397, "learning_rate": 1.373893335945517e-06, "loss": 0.5498, "step": 12083 }, { "epoch": 0.77, "grad_norm": 0.936839759349823, "learning_rate": 1.373186999422611e-06, "loss": 0.5698, "step": 12084 }, { "epoch": 0.77, "grad_norm": 0.8854506015777588, "learning_rate": 1.3724808156121799e-06, "loss": 0.5429, "step": 12085 }, { "epoch": 0.77, "grad_norm": 0.882511556148529, "learning_rate": 1.3717747845439645e-06, "loss": 0.531, "step": 12086 }, { "epoch": 0.77, "grad_norm": 0.8254367709159851, "learning_rate": 1.371068906247693e-06, "loss": 0.5357, "step": 12087 }, { "epoch": 0.77, "grad_norm": 0.9685107469558716, "learning_rate": 1.3703631807530831e-06, "loss": 0.6293, "step": 12088 }, { "epoch": 0.77, "grad_norm": 0.8970738649368286, "learning_rate": 1.3696576080898538e-06, "loss": 0.6471, "step": 12089 }, { "epoch": 0.77, "grad_norm": 0.8906121253967285, "learning_rate": 1.3689521882877137e-06, "loss": 0.5589, "step": 12090 }, { "epoch": 0.77, "grad_norm": 0.8889774084091187, "learning_rate": 1.3682469213763655e-06, "loss": 0.5734, "step": 12091 }, { "epoch": 0.77, "grad_norm": 0.8506032228469849, "learning_rate": 1.367541807385507e-06, "loss": 0.5613, "step": 12092 }, { "epoch": 0.77, "grad_norm": 0.9361366629600525, "learning_rate": 1.3668368463448246e-06, "loss": 0.5551, "step": 12093 }, { "epoch": 0.77, "grad_norm": 0.867592990398407, "learning_rate": 1.3661320382840026e-06, "loss": 0.4914, "step": 12094 }, { "epoch": 0.77, "grad_norm": 0.9990113973617554, "learning_rate": 1.3654273832327219e-06, "loss": 0.5793, "step": 12095 }, { "epoch": 0.77, "grad_norm": 0.9370816946029663, "learning_rate": 1.3647228812206493e-06, "loss": 0.5923, "step": 12096 }, { "epoch": 0.77, "grad_norm": 0.8764215111732483, "learning_rate": 1.3640185322774495e-06, "loss": 0.5852, "step": 12097 }, { "epoch": 0.77, "grad_norm": 0.8534221053123474, "learning_rate": 1.3633143364327812e-06, "loss": 0.5271, "step": 12098 }, { "epoch": 0.77, "grad_norm": 0.8506825566291809, "learning_rate": 1.3626102937162943e-06, "loss": 0.5034, "step": 12099 }, { "epoch": 0.77, "grad_norm": 0.9237973690032959, "learning_rate": 1.3619064041576368e-06, "loss": 0.5503, "step": 12100 }, { "epoch": 0.77, "grad_norm": 0.9016441106796265, "learning_rate": 1.3612026677864426e-06, "loss": 0.6057, "step": 12101 }, { "epoch": 0.77, "grad_norm": 0.9441617727279663, "learning_rate": 1.360499084632344e-06, "loss": 0.6116, "step": 12102 }, { "epoch": 0.77, "grad_norm": 0.8589941263198853, "learning_rate": 1.3597956547249713e-06, "loss": 0.5755, "step": 12103 }, { "epoch": 0.77, "grad_norm": 0.8942433595657349, "learning_rate": 1.3590923780939386e-06, "loss": 0.575, "step": 12104 }, { "epoch": 0.77, "grad_norm": 0.8741679787635803, "learning_rate": 1.3583892547688598e-06, "loss": 0.5754, "step": 12105 }, { "epoch": 0.77, "grad_norm": 0.8796617984771729, "learning_rate": 1.357686284779343e-06, "loss": 0.5743, "step": 12106 }, { "epoch": 0.77, "grad_norm": 0.8270777463912964, "learning_rate": 1.3569834681549832e-06, "loss": 0.5867, "step": 12107 }, { "epoch": 0.77, "grad_norm": 0.8669138550758362, "learning_rate": 1.3562808049253795e-06, "loss": 0.575, "step": 12108 }, { "epoch": 0.77, "grad_norm": 0.8774738311767578, "learning_rate": 1.3555782951201134e-06, "loss": 0.6112, "step": 12109 }, { "epoch": 0.77, "grad_norm": 0.8828439712524414, "learning_rate": 1.3548759387687683e-06, "loss": 0.5896, "step": 12110 }, { "epoch": 0.77, "grad_norm": 0.9196562170982361, "learning_rate": 1.3541737359009161e-06, "loss": 0.5742, "step": 12111 }, { "epoch": 0.77, "grad_norm": 0.8660917282104492, "learning_rate": 1.3534716865461256e-06, "loss": 0.5293, "step": 12112 }, { "epoch": 0.77, "grad_norm": 0.8525222539901733, "learning_rate": 1.3527697907339565e-06, "loss": 0.5479, "step": 12113 }, { "epoch": 0.77, "grad_norm": 0.855554461479187, "learning_rate": 1.3520680484939651e-06, "loss": 0.5706, "step": 12114 }, { "epoch": 0.77, "grad_norm": 1.0038961172103882, "learning_rate": 1.3513664598556952e-06, "loss": 0.5432, "step": 12115 }, { "epoch": 0.77, "grad_norm": 0.8975916504859924, "learning_rate": 1.3506650248486946e-06, "loss": 0.5811, "step": 12116 }, { "epoch": 0.77, "grad_norm": 1.0002868175506592, "learning_rate": 1.3499637435024926e-06, "loss": 0.5884, "step": 12117 }, { "epoch": 0.77, "grad_norm": 0.9339752197265625, "learning_rate": 1.34926261584662e-06, "loss": 0.5654, "step": 12118 }, { "epoch": 0.77, "grad_norm": 0.8323291540145874, "learning_rate": 1.3485616419105985e-06, "loss": 0.6116, "step": 12119 }, { "epoch": 0.77, "grad_norm": 0.9814819097518921, "learning_rate": 1.3478608217239435e-06, "loss": 0.5862, "step": 12120 }, { "epoch": 0.77, "grad_norm": 0.8952215313911438, "learning_rate": 1.347160155316165e-06, "loss": 0.5829, "step": 12121 }, { "epoch": 0.77, "grad_norm": 0.9084662795066833, "learning_rate": 1.3464596427167663e-06, "loss": 0.574, "step": 12122 }, { "epoch": 0.77, "grad_norm": 0.8473517298698425, "learning_rate": 1.3457592839552409e-06, "loss": 0.5339, "step": 12123 }, { "epoch": 0.77, "grad_norm": 0.8757284879684448, "learning_rate": 1.3450590790610795e-06, "loss": 0.5858, "step": 12124 }, { "epoch": 0.77, "grad_norm": 0.9462736248970032, "learning_rate": 1.3443590280637664e-06, "loss": 0.5508, "step": 12125 }, { "epoch": 0.77, "grad_norm": 0.9609660506248474, "learning_rate": 1.3436591309927772e-06, "loss": 0.58, "step": 12126 }, { "epoch": 0.77, "grad_norm": 0.8770208358764648, "learning_rate": 1.3429593878775825e-06, "loss": 0.5592, "step": 12127 }, { "epoch": 0.77, "grad_norm": 0.9094352722167969, "learning_rate": 1.342259798747646e-06, "loss": 0.5542, "step": 12128 }, { "epoch": 0.77, "grad_norm": 0.8885565996170044, "learning_rate": 1.3415603636324248e-06, "loss": 0.5852, "step": 12129 }, { "epoch": 0.77, "grad_norm": 0.839444637298584, "learning_rate": 1.3408610825613722e-06, "loss": 0.5314, "step": 12130 }, { "epoch": 0.77, "grad_norm": 0.9045486450195312, "learning_rate": 1.340161955563928e-06, "loss": 0.5268, "step": 12131 }, { "epoch": 0.77, "grad_norm": 0.886461079120636, "learning_rate": 1.339462982669531e-06, "loss": 0.5484, "step": 12132 }, { "epoch": 0.77, "grad_norm": 0.8416271805763245, "learning_rate": 1.3387641639076165e-06, "loss": 0.5461, "step": 12133 }, { "epoch": 0.77, "grad_norm": 0.8772505521774292, "learning_rate": 1.3380654993076054e-06, "loss": 0.5332, "step": 12134 }, { "epoch": 0.77, "grad_norm": 0.824565589427948, "learning_rate": 1.3373669888989167e-06, "loss": 0.5231, "step": 12135 }, { "epoch": 0.77, "grad_norm": 0.8874905109405518, "learning_rate": 1.3366686327109645e-06, "loss": 0.5964, "step": 12136 }, { "epoch": 0.77, "grad_norm": 0.9241152405738831, "learning_rate": 1.3359704307731491e-06, "loss": 0.563, "step": 12137 }, { "epoch": 0.77, "grad_norm": 0.9180853366851807, "learning_rate": 1.3352723831148761e-06, "loss": 0.5685, "step": 12138 }, { "epoch": 0.77, "grad_norm": 0.9550989270210266, "learning_rate": 1.3345744897655327e-06, "loss": 0.5835, "step": 12139 }, { "epoch": 0.77, "grad_norm": 0.8818813562393188, "learning_rate": 1.3338767507545064e-06, "loss": 0.5739, "step": 12140 }, { "epoch": 0.77, "grad_norm": 0.8973625302314758, "learning_rate": 1.3331791661111765e-06, "loss": 0.5607, "step": 12141 }, { "epoch": 0.77, "grad_norm": 0.9165273904800415, "learning_rate": 1.3324817358649162e-06, "loss": 0.5993, "step": 12142 }, { "epoch": 0.77, "grad_norm": 0.8530603647232056, "learning_rate": 1.3317844600450912e-06, "loss": 0.5488, "step": 12143 }, { "epoch": 0.77, "grad_norm": 0.9110085964202881, "learning_rate": 1.3310873386810641e-06, "loss": 0.6053, "step": 12144 }, { "epoch": 0.77, "grad_norm": 0.8425561189651489, "learning_rate": 1.330390371802182e-06, "loss": 0.5784, "step": 12145 }, { "epoch": 0.77, "grad_norm": 0.9364494681358337, "learning_rate": 1.3296935594377996e-06, "loss": 0.6112, "step": 12146 }, { "epoch": 0.77, "grad_norm": 0.9249874949455261, "learning_rate": 1.3289969016172515e-06, "loss": 0.6136, "step": 12147 }, { "epoch": 0.77, "grad_norm": 0.8937221169471741, "learning_rate": 1.3283003983698733e-06, "loss": 0.5601, "step": 12148 }, { "epoch": 0.77, "grad_norm": 0.9098302721977234, "learning_rate": 1.3276040497249926e-06, "loss": 0.5666, "step": 12149 }, { "epoch": 0.77, "grad_norm": 0.862511396408081, "learning_rate": 1.3269078557119297e-06, "loss": 0.5542, "step": 12150 }, { "epoch": 0.77, "grad_norm": 0.9537906646728516, "learning_rate": 1.3262118163599992e-06, "loss": 0.5299, "step": 12151 }, { "epoch": 0.77, "grad_norm": 0.9067496657371521, "learning_rate": 1.3255159316985105e-06, "loss": 0.5696, "step": 12152 }, { "epoch": 0.77, "grad_norm": 0.8889273405075073, "learning_rate": 1.3248202017567624e-06, "loss": 0.5981, "step": 12153 }, { "epoch": 0.77, "grad_norm": 0.8882769346237183, "learning_rate": 1.32412462656405e-06, "loss": 0.61, "step": 12154 }, { "epoch": 0.77, "grad_norm": 0.9011073112487793, "learning_rate": 1.3234292061496622e-06, "loss": 0.5695, "step": 12155 }, { "epoch": 0.77, "grad_norm": 0.8439561724662781, "learning_rate": 1.3227339405428807e-06, "loss": 0.5706, "step": 12156 }, { "epoch": 0.77, "grad_norm": 0.9296759366989136, "learning_rate": 1.3220388297729825e-06, "loss": 0.5967, "step": 12157 }, { "epoch": 0.77, "grad_norm": 0.8398501873016357, "learning_rate": 1.3213438738692313e-06, "loss": 0.5288, "step": 12158 }, { "epoch": 0.77, "grad_norm": 0.908816397190094, "learning_rate": 1.320649072860894e-06, "loss": 0.5917, "step": 12159 }, { "epoch": 0.77, "grad_norm": 0.90097975730896, "learning_rate": 1.3199544267772257e-06, "loss": 0.5321, "step": 12160 }, { "epoch": 0.77, "grad_norm": 0.899578869342804, "learning_rate": 1.3192599356474733e-06, "loss": 0.6484, "step": 12161 }, { "epoch": 0.77, "grad_norm": 0.8797482252120972, "learning_rate": 1.318565599500881e-06, "loss": 0.6243, "step": 12162 }, { "epoch": 0.77, "grad_norm": 0.873276948928833, "learning_rate": 1.3178714183666846e-06, "loss": 0.5852, "step": 12163 }, { "epoch": 0.77, "grad_norm": 0.9565367102622986, "learning_rate": 1.3171773922741132e-06, "loss": 0.573, "step": 12164 }, { "epoch": 0.77, "grad_norm": 0.8669590950012207, "learning_rate": 1.316483521252392e-06, "loss": 0.6123, "step": 12165 }, { "epoch": 0.77, "grad_norm": 0.7968014478683472, "learning_rate": 1.3157898053307322e-06, "loss": 0.5062, "step": 12166 }, { "epoch": 0.77, "grad_norm": 0.9194180369377136, "learning_rate": 1.3150962445383492e-06, "loss": 0.5859, "step": 12167 }, { "epoch": 0.77, "grad_norm": 0.8830471038818359, "learning_rate": 1.314402838904446e-06, "loss": 0.5655, "step": 12168 }, { "epoch": 0.77, "grad_norm": 0.9183891415596008, "learning_rate": 1.3137095884582163e-06, "loss": 0.6396, "step": 12169 }, { "epoch": 0.77, "grad_norm": 0.8405441045761108, "learning_rate": 1.3130164932288524e-06, "loss": 0.5072, "step": 12170 }, { "epoch": 0.77, "grad_norm": 0.9240639209747314, "learning_rate": 1.3123235532455376e-06, "loss": 0.6048, "step": 12171 }, { "epoch": 0.77, "grad_norm": 0.8928464651107788, "learning_rate": 1.3116307685374497e-06, "loss": 0.5657, "step": 12172 }, { "epoch": 0.77, "grad_norm": 0.8829059600830078, "learning_rate": 1.3109381391337605e-06, "loss": 0.5598, "step": 12173 }, { "epoch": 0.77, "grad_norm": 0.8686420321464539, "learning_rate": 1.3102456650636314e-06, "loss": 0.6097, "step": 12174 }, { "epoch": 0.77, "grad_norm": 0.9173951745033264, "learning_rate": 1.3095533463562204e-06, "loss": 0.6205, "step": 12175 }, { "epoch": 0.77, "grad_norm": 0.9148120880126953, "learning_rate": 1.3088611830406828e-06, "loss": 0.5677, "step": 12176 }, { "epoch": 0.77, "grad_norm": 0.8983868360519409, "learning_rate": 1.3081691751461588e-06, "loss": 0.5204, "step": 12177 }, { "epoch": 0.77, "grad_norm": 0.9133874177932739, "learning_rate": 1.3074773227017878e-06, "loss": 0.601, "step": 12178 }, { "epoch": 0.77, "grad_norm": 0.9384349584579468, "learning_rate": 1.3067856257367018e-06, "loss": 0.5376, "step": 12179 }, { "epoch": 0.77, "grad_norm": 0.9111471772193909, "learning_rate": 1.3060940842800247e-06, "loss": 0.5794, "step": 12180 }, { "epoch": 0.77, "grad_norm": 0.8366988897323608, "learning_rate": 1.3054026983608776e-06, "loss": 0.5528, "step": 12181 }, { "epoch": 0.77, "grad_norm": 0.887912392616272, "learning_rate": 1.3047114680083683e-06, "loss": 0.544, "step": 12182 }, { "epoch": 0.77, "grad_norm": 0.8583880662918091, "learning_rate": 1.3040203932516043e-06, "loss": 0.5857, "step": 12183 }, { "epoch": 0.77, "grad_norm": 0.8937926888465881, "learning_rate": 1.303329474119684e-06, "loss": 0.5397, "step": 12184 }, { "epoch": 0.77, "grad_norm": 0.9099065661430359, "learning_rate": 1.3026387106417e-06, "loss": 0.5527, "step": 12185 }, { "epoch": 0.77, "grad_norm": 0.8292108774185181, "learning_rate": 1.301948102846738e-06, "loss": 0.5656, "step": 12186 }, { "epoch": 0.77, "grad_norm": 0.8275082111358643, "learning_rate": 1.301257650763878e-06, "loss": 0.5818, "step": 12187 }, { "epoch": 0.77, "grad_norm": 0.973997950553894, "learning_rate": 1.3005673544221882e-06, "loss": 0.6322, "step": 12188 }, { "epoch": 0.77, "grad_norm": 0.8978073596954346, "learning_rate": 1.299877213850741e-06, "loss": 0.5376, "step": 12189 }, { "epoch": 0.77, "grad_norm": 0.8951266407966614, "learning_rate": 1.2991872290785906e-06, "loss": 0.5989, "step": 12190 }, { "epoch": 0.77, "grad_norm": 1.023902416229248, "learning_rate": 1.2984974001347922e-06, "loss": 0.5635, "step": 12191 }, { "epoch": 0.77, "grad_norm": 0.8716408014297485, "learning_rate": 1.2978077270483913e-06, "loss": 0.5602, "step": 12192 }, { "epoch": 0.77, "grad_norm": 0.865201473236084, "learning_rate": 1.2971182098484286e-06, "loss": 0.5535, "step": 12193 }, { "epoch": 0.77, "grad_norm": 0.9294458031654358, "learning_rate": 1.2964288485639366e-06, "loss": 0.5739, "step": 12194 }, { "epoch": 0.77, "grad_norm": 0.9854139089584351, "learning_rate": 1.2957396432239427e-06, "loss": 0.5758, "step": 12195 }, { "epoch": 0.77, "grad_norm": 0.9392171502113342, "learning_rate": 1.2950505938574643e-06, "loss": 0.5966, "step": 12196 }, { "epoch": 0.77, "grad_norm": 0.9069497585296631, "learning_rate": 1.2943617004935176e-06, "loss": 0.5433, "step": 12197 }, { "epoch": 0.77, "grad_norm": 0.9116702079772949, "learning_rate": 1.2936729631611106e-06, "loss": 0.596, "step": 12198 }, { "epoch": 0.77, "grad_norm": 0.9324621558189392, "learning_rate": 1.2929843818892401e-06, "loss": 0.5372, "step": 12199 }, { "epoch": 0.77, "grad_norm": 0.8757584691047668, "learning_rate": 1.2922959567069016e-06, "loss": 0.5684, "step": 12200 }, { "epoch": 0.77, "grad_norm": 0.9068610668182373, "learning_rate": 1.2916076876430821e-06, "loss": 0.6232, "step": 12201 }, { "epoch": 0.77, "grad_norm": 0.8894833326339722, "learning_rate": 1.2909195747267622e-06, "loss": 0.6127, "step": 12202 }, { "epoch": 0.77, "grad_norm": 0.8847464323043823, "learning_rate": 1.2902316179869179e-06, "loss": 0.5741, "step": 12203 }, { "epoch": 0.77, "grad_norm": 0.9438949823379517, "learning_rate": 1.2895438174525127e-06, "loss": 0.6046, "step": 12204 }, { "epoch": 0.77, "grad_norm": 0.8600268363952637, "learning_rate": 1.288856173152509e-06, "loss": 0.574, "step": 12205 }, { "epoch": 0.77, "grad_norm": 0.9138484001159668, "learning_rate": 1.2881686851158642e-06, "loss": 0.6331, "step": 12206 }, { "epoch": 0.77, "grad_norm": 0.8854186534881592, "learning_rate": 1.287481353371522e-06, "loss": 0.5998, "step": 12207 }, { "epoch": 0.77, "grad_norm": 0.8981321454048157, "learning_rate": 1.286794177948425e-06, "loss": 0.6245, "step": 12208 }, { "epoch": 0.77, "grad_norm": 0.9491480588912964, "learning_rate": 1.286107158875508e-06, "loss": 0.6036, "step": 12209 }, { "epoch": 0.77, "grad_norm": 0.8554275631904602, "learning_rate": 1.285420296181699e-06, "loss": 0.5711, "step": 12210 }, { "epoch": 0.77, "grad_norm": 0.8955265283584595, "learning_rate": 1.2847335898959207e-06, "loss": 0.5782, "step": 12211 }, { "epoch": 0.77, "grad_norm": 0.8879252076148987, "learning_rate": 1.284047040047085e-06, "loss": 0.6219, "step": 12212 }, { "epoch": 0.77, "grad_norm": 0.922939121723175, "learning_rate": 1.2833606466641001e-06, "loss": 0.6014, "step": 12213 }, { "epoch": 0.77, "grad_norm": 0.8796207904815674, "learning_rate": 1.282674409775872e-06, "loss": 0.5638, "step": 12214 }, { "epoch": 0.77, "grad_norm": 0.8656979203224182, "learning_rate": 1.2819883294112918e-06, "loss": 0.5692, "step": 12215 }, { "epoch": 0.77, "grad_norm": 0.8584392666816711, "learning_rate": 1.2813024055992486e-06, "loss": 0.5548, "step": 12216 }, { "epoch": 0.77, "grad_norm": 0.8941633105278015, "learning_rate": 1.2806166383686258e-06, "loss": 0.5758, "step": 12217 }, { "epoch": 0.77, "grad_norm": 0.8649379014968872, "learning_rate": 1.2799310277482952e-06, "loss": 0.5681, "step": 12218 }, { "epoch": 0.77, "grad_norm": 0.9806539416313171, "learning_rate": 1.2792455737671306e-06, "loss": 0.6265, "step": 12219 }, { "epoch": 0.77, "grad_norm": 0.884787380695343, "learning_rate": 1.27856027645399e-06, "loss": 0.5289, "step": 12220 }, { "epoch": 0.77, "grad_norm": 0.8743571043014526, "learning_rate": 1.27787513583773e-06, "loss": 0.5108, "step": 12221 }, { "epoch": 0.77, "grad_norm": 0.8686521649360657, "learning_rate": 1.2771901519471997e-06, "loss": 0.5756, "step": 12222 }, { "epoch": 0.77, "grad_norm": 0.830317497253418, "learning_rate": 1.2765053248112414e-06, "loss": 0.5872, "step": 12223 }, { "epoch": 0.77, "grad_norm": 0.9129568338394165, "learning_rate": 1.2758206544586909e-06, "loss": 0.602, "step": 12224 }, { "epoch": 0.77, "grad_norm": 0.9156956076622009, "learning_rate": 1.2751361409183788e-06, "loss": 0.6216, "step": 12225 }, { "epoch": 0.77, "grad_norm": 0.831794261932373, "learning_rate": 1.2744517842191228e-06, "loss": 0.5739, "step": 12226 }, { "epoch": 0.77, "grad_norm": 0.9067423343658447, "learning_rate": 1.2737675843897452e-06, "loss": 0.5913, "step": 12227 }, { "epoch": 0.77, "grad_norm": 0.9277194142341614, "learning_rate": 1.2730835414590498e-06, "loss": 0.6272, "step": 12228 }, { "epoch": 0.77, "grad_norm": 0.8326361775398254, "learning_rate": 1.272399655455842e-06, "loss": 0.5507, "step": 12229 }, { "epoch": 0.77, "grad_norm": 0.8325486183166504, "learning_rate": 1.2717159264089185e-06, "loss": 0.5397, "step": 12230 }, { "epoch": 0.77, "grad_norm": 0.9493119716644287, "learning_rate": 1.2710323543470648e-06, "loss": 0.5241, "step": 12231 }, { "epoch": 0.77, "grad_norm": 0.9216598868370056, "learning_rate": 1.2703489392990682e-06, "loss": 0.5895, "step": 12232 }, { "epoch": 0.78, "grad_norm": 0.9237011671066284, "learning_rate": 1.2696656812937047e-06, "loss": 0.6019, "step": 12233 }, { "epoch": 0.78, "grad_norm": 0.8747559189796448, "learning_rate": 1.268982580359741e-06, "loss": 0.5257, "step": 12234 }, { "epoch": 0.78, "grad_norm": 0.877472460269928, "learning_rate": 1.2682996365259415e-06, "loss": 0.5535, "step": 12235 }, { "epoch": 0.78, "grad_norm": 0.8679016828536987, "learning_rate": 1.2676168498210623e-06, "loss": 0.5842, "step": 12236 }, { "epoch": 0.78, "grad_norm": 0.8848540782928467, "learning_rate": 1.2669342202738537e-06, "loss": 0.5703, "step": 12237 }, { "epoch": 0.78, "grad_norm": 0.8749752640724182, "learning_rate": 1.2662517479130605e-06, "loss": 0.5588, "step": 12238 }, { "epoch": 0.78, "grad_norm": 0.9668585062026978, "learning_rate": 1.2655694327674145e-06, "loss": 0.5812, "step": 12239 }, { "epoch": 0.78, "grad_norm": 0.9180838465690613, "learning_rate": 1.2648872748656498e-06, "loss": 0.5711, "step": 12240 }, { "epoch": 0.78, "grad_norm": 0.9085766077041626, "learning_rate": 1.2642052742364903e-06, "loss": 0.5718, "step": 12241 }, { "epoch": 0.78, "grad_norm": 0.8867596983909607, "learning_rate": 1.2635234309086486e-06, "loss": 0.5779, "step": 12242 }, { "epoch": 0.78, "grad_norm": 0.9551423788070679, "learning_rate": 1.2628417449108376e-06, "loss": 0.6215, "step": 12243 }, { "epoch": 0.78, "grad_norm": 0.9479497671127319, "learning_rate": 1.2621602162717594e-06, "loss": 0.5509, "step": 12244 }, { "epoch": 0.78, "grad_norm": 0.8776799440383911, "learning_rate": 1.261478845020112e-06, "loss": 0.5186, "step": 12245 }, { "epoch": 0.78, "grad_norm": 0.8843742609024048, "learning_rate": 1.2607976311845865e-06, "loss": 0.51, "step": 12246 }, { "epoch": 0.78, "grad_norm": 0.8797292709350586, "learning_rate": 1.2601165747938638e-06, "loss": 0.5066, "step": 12247 }, { "epoch": 0.78, "grad_norm": 0.9293206930160522, "learning_rate": 1.2594356758766201e-06, "loss": 0.542, "step": 12248 }, { "epoch": 0.78, "grad_norm": 0.8314594030380249, "learning_rate": 1.2587549344615308e-06, "loss": 0.5139, "step": 12249 }, { "epoch": 0.78, "grad_norm": 0.893222451210022, "learning_rate": 1.2580743505772553e-06, "loss": 0.5823, "step": 12250 }, { "epoch": 0.78, "grad_norm": 0.8597607016563416, "learning_rate": 1.2573939242524508e-06, "loss": 0.5671, "step": 12251 }, { "epoch": 0.78, "grad_norm": 0.8614668846130371, "learning_rate": 1.2567136555157694e-06, "loss": 0.5764, "step": 12252 }, { "epoch": 0.78, "grad_norm": 0.8970388174057007, "learning_rate": 1.2560335443958533e-06, "loss": 0.5439, "step": 12253 }, { "epoch": 0.78, "grad_norm": 0.8344459533691406, "learning_rate": 1.2553535909213422e-06, "loss": 0.5791, "step": 12254 }, { "epoch": 0.78, "grad_norm": 0.8963201642036438, "learning_rate": 1.254673795120863e-06, "loss": 0.578, "step": 12255 }, { "epoch": 0.78, "grad_norm": 0.9341084957122803, "learning_rate": 1.2539941570230402e-06, "loss": 0.547, "step": 12256 }, { "epoch": 0.78, "grad_norm": 0.8028890490531921, "learning_rate": 1.2533146766564946e-06, "loss": 0.5386, "step": 12257 }, { "epoch": 0.78, "grad_norm": 0.959701657295227, "learning_rate": 1.252635354049833e-06, "loss": 0.5884, "step": 12258 }, { "epoch": 0.78, "grad_norm": 0.8597055077552795, "learning_rate": 1.2519561892316606e-06, "loss": 0.5718, "step": 12259 }, { "epoch": 0.78, "grad_norm": 0.8948055505752563, "learning_rate": 1.2512771822305742e-06, "loss": 0.5758, "step": 12260 }, { "epoch": 0.78, "grad_norm": 0.9038350582122803, "learning_rate": 1.2505983330751654e-06, "loss": 0.5624, "step": 12261 }, { "epoch": 0.78, "grad_norm": 0.8576204776763916, "learning_rate": 1.2499196417940168e-06, "loss": 0.6144, "step": 12262 }, { "epoch": 0.78, "grad_norm": 0.8939236998558044, "learning_rate": 1.2492411084157086e-06, "loss": 0.5711, "step": 12263 }, { "epoch": 0.78, "grad_norm": 0.859489381313324, "learning_rate": 1.2485627329688076e-06, "loss": 0.5811, "step": 12264 }, { "epoch": 0.78, "grad_norm": 0.834520161151886, "learning_rate": 1.2478845154818798e-06, "loss": 0.5552, "step": 12265 }, { "epoch": 0.78, "grad_norm": 0.9146906733512878, "learning_rate": 1.2472064559834818e-06, "loss": 0.6202, "step": 12266 }, { "epoch": 0.78, "grad_norm": 0.8180127739906311, "learning_rate": 1.2465285545021655e-06, "loss": 0.5558, "step": 12267 }, { "epoch": 0.78, "grad_norm": 0.8662042021751404, "learning_rate": 1.2458508110664758e-06, "loss": 0.5808, "step": 12268 }, { "epoch": 0.78, "grad_norm": 0.8688540458679199, "learning_rate": 1.2451732257049458e-06, "loss": 0.6074, "step": 12269 }, { "epoch": 0.78, "grad_norm": 0.8783148527145386, "learning_rate": 1.2444957984461103e-06, "loss": 0.5658, "step": 12270 }, { "epoch": 0.78, "grad_norm": 0.8849241733551025, "learning_rate": 1.243818529318494e-06, "loss": 0.6209, "step": 12271 }, { "epoch": 0.78, "grad_norm": 0.8112246990203857, "learning_rate": 1.2431414183506114e-06, "loss": 0.5052, "step": 12272 }, { "epoch": 0.78, "grad_norm": 0.9490674734115601, "learning_rate": 1.2424644655709744e-06, "loss": 0.5935, "step": 12273 }, { "epoch": 0.78, "grad_norm": 0.892665445804596, "learning_rate": 1.2417876710080872e-06, "loss": 0.5661, "step": 12274 }, { "epoch": 0.78, "grad_norm": 0.8753570318222046, "learning_rate": 1.2411110346904471e-06, "loss": 0.5632, "step": 12275 }, { "epoch": 0.78, "grad_norm": 0.8728823661804199, "learning_rate": 1.2404345566465464e-06, "loss": 0.5697, "step": 12276 }, { "epoch": 0.78, "grad_norm": 0.8521443009376526, "learning_rate": 1.2397582369048672e-06, "loss": 0.5444, "step": 12277 }, { "epoch": 0.78, "grad_norm": 0.8864396214485168, "learning_rate": 1.2390820754938859e-06, "loss": 0.6033, "step": 12278 }, { "epoch": 0.78, "grad_norm": 0.9159855842590332, "learning_rate": 1.2384060724420776e-06, "loss": 0.6399, "step": 12279 }, { "epoch": 0.78, "grad_norm": 0.8791429400444031, "learning_rate": 1.2377302277779029e-06, "loss": 0.5495, "step": 12280 }, { "epoch": 0.78, "grad_norm": 0.8864124417304993, "learning_rate": 1.2370545415298207e-06, "loss": 0.555, "step": 12281 }, { "epoch": 0.78, "grad_norm": 0.8805786967277527, "learning_rate": 1.236379013726281e-06, "loss": 0.576, "step": 12282 }, { "epoch": 0.78, "grad_norm": 0.9043353199958801, "learning_rate": 1.2357036443957283e-06, "loss": 0.5943, "step": 12283 }, { "epoch": 0.78, "grad_norm": 0.8610161542892456, "learning_rate": 1.2350284335666019e-06, "loss": 0.5871, "step": 12284 }, { "epoch": 0.78, "grad_norm": 0.8844594359397888, "learning_rate": 1.2343533812673286e-06, "loss": 0.5531, "step": 12285 }, { "epoch": 0.78, "grad_norm": 0.9110302925109863, "learning_rate": 1.2336784875263341e-06, "loss": 0.5183, "step": 12286 }, { "epoch": 0.78, "grad_norm": 0.9357644319534302, "learning_rate": 1.233003752372039e-06, "loss": 0.5973, "step": 12287 }, { "epoch": 0.78, "grad_norm": 0.865262508392334, "learning_rate": 1.23232917583285e-06, "loss": 0.5508, "step": 12288 }, { "epoch": 0.78, "grad_norm": 0.8521873354911804, "learning_rate": 1.2316547579371724e-06, "loss": 0.5563, "step": 12289 }, { "epoch": 0.78, "grad_norm": 0.8528589010238647, "learning_rate": 1.230980498713404e-06, "loss": 0.5659, "step": 12290 }, { "epoch": 0.78, "grad_norm": 0.9339171648025513, "learning_rate": 1.2303063981899355e-06, "loss": 0.5992, "step": 12291 }, { "epoch": 0.78, "grad_norm": 0.8944584131240845, "learning_rate": 1.2296324563951517e-06, "loss": 0.5916, "step": 12292 }, { "epoch": 0.78, "grad_norm": 0.8510122299194336, "learning_rate": 1.2289586733574283e-06, "loss": 0.5496, "step": 12293 }, { "epoch": 0.78, "grad_norm": 0.9237475991249084, "learning_rate": 1.2282850491051363e-06, "loss": 0.5481, "step": 12294 }, { "epoch": 0.78, "grad_norm": 0.8931830525398254, "learning_rate": 1.2276115836666396e-06, "loss": 0.5749, "step": 12295 }, { "epoch": 0.78, "grad_norm": 0.9052478671073914, "learning_rate": 1.2269382770702964e-06, "loss": 0.5891, "step": 12296 }, { "epoch": 0.78, "grad_norm": 0.8357118964195251, "learning_rate": 1.2262651293444572e-06, "loss": 0.552, "step": 12297 }, { "epoch": 0.78, "grad_norm": 0.8756887912750244, "learning_rate": 1.2255921405174664e-06, "loss": 0.5766, "step": 12298 }, { "epoch": 0.78, "grad_norm": 0.8769047260284424, "learning_rate": 1.2249193106176578e-06, "loss": 0.5404, "step": 12299 }, { "epoch": 0.78, "grad_norm": 0.7944373488426208, "learning_rate": 1.224246639673367e-06, "loss": 0.5492, "step": 12300 }, { "epoch": 0.78, "grad_norm": 0.904240071773529, "learning_rate": 1.2235741277129143e-06, "loss": 0.5788, "step": 12301 }, { "epoch": 0.78, "grad_norm": 0.8664789795875549, "learning_rate": 1.2229017747646178e-06, "loss": 0.5623, "step": 12302 }, { "epoch": 0.78, "grad_norm": 0.9359251260757446, "learning_rate": 1.2222295808567874e-06, "loss": 0.5974, "step": 12303 }, { "epoch": 0.78, "grad_norm": 0.9124470353126526, "learning_rate": 1.2215575460177282e-06, "loss": 0.5919, "step": 12304 }, { "epoch": 0.78, "grad_norm": 0.8797950148582458, "learning_rate": 1.220885670275736e-06, "loss": 0.5813, "step": 12305 }, { "epoch": 0.78, "grad_norm": 0.9038450717926025, "learning_rate": 1.2202139536591035e-06, "loss": 0.5688, "step": 12306 }, { "epoch": 0.78, "grad_norm": 0.8724334239959717, "learning_rate": 1.2195423961961089e-06, "loss": 0.5828, "step": 12307 }, { "epoch": 0.78, "grad_norm": 0.8944981098175049, "learning_rate": 1.2188709979150366e-06, "loss": 0.589, "step": 12308 }, { "epoch": 0.78, "grad_norm": 0.9792714715003967, "learning_rate": 1.2181997588441507e-06, "loss": 0.591, "step": 12309 }, { "epoch": 0.78, "grad_norm": 0.8511162400245667, "learning_rate": 1.2175286790117174e-06, "loss": 0.5557, "step": 12310 }, { "epoch": 0.78, "grad_norm": 0.8353309035301208, "learning_rate": 1.2168577584459944e-06, "loss": 0.5897, "step": 12311 }, { "epoch": 0.78, "grad_norm": 0.9010828733444214, "learning_rate": 1.2161869971752283e-06, "loss": 0.6036, "step": 12312 }, { "epoch": 0.78, "grad_norm": 0.9711151719093323, "learning_rate": 1.2155163952276654e-06, "loss": 0.626, "step": 12313 }, { "epoch": 0.78, "grad_norm": 0.8932445049285889, "learning_rate": 1.2148459526315442e-06, "loss": 0.6017, "step": 12314 }, { "epoch": 0.78, "grad_norm": 0.8985554575920105, "learning_rate": 1.2141756694150903e-06, "loss": 0.5927, "step": 12315 }, { "epoch": 0.78, "grad_norm": 0.8908417820930481, "learning_rate": 1.2135055456065292e-06, "loss": 0.5449, "step": 12316 }, { "epoch": 0.78, "grad_norm": 0.9284818768501282, "learning_rate": 1.2128355812340776e-06, "loss": 0.583, "step": 12317 }, { "epoch": 0.78, "grad_norm": 0.9066043496131897, "learning_rate": 1.2121657763259448e-06, "loss": 0.5134, "step": 12318 }, { "epoch": 0.78, "grad_norm": 0.8915185332298279, "learning_rate": 1.211496130910334e-06, "loss": 0.5315, "step": 12319 }, { "epoch": 0.78, "grad_norm": 0.8998283743858337, "learning_rate": 1.2108266450154422e-06, "loss": 0.5922, "step": 12320 }, { "epoch": 0.78, "grad_norm": 0.9139837622642517, "learning_rate": 1.2101573186694587e-06, "loss": 0.5897, "step": 12321 }, { "epoch": 0.78, "grad_norm": 0.8933039307594299, "learning_rate": 1.209488151900568e-06, "loss": 0.5763, "step": 12322 }, { "epoch": 0.78, "grad_norm": 0.9665024876594543, "learning_rate": 1.2088191447369436e-06, "loss": 0.6145, "step": 12323 }, { "epoch": 0.78, "grad_norm": 0.9376837015151978, "learning_rate": 1.2081502972067567e-06, "loss": 0.5251, "step": 12324 }, { "epoch": 0.78, "grad_norm": 0.8636725544929504, "learning_rate": 1.2074816093381696e-06, "loss": 0.5737, "step": 12325 }, { "epoch": 0.78, "grad_norm": 0.8847807049751282, "learning_rate": 1.2068130811593387e-06, "loss": 0.5633, "step": 12326 }, { "epoch": 0.78, "grad_norm": 0.8365536332130432, "learning_rate": 1.2061447126984138e-06, "loss": 0.5499, "step": 12327 }, { "epoch": 0.78, "grad_norm": 0.9122150540351868, "learning_rate": 1.2054765039835382e-06, "loss": 0.5775, "step": 12328 }, { "epoch": 0.78, "grad_norm": 0.8604024052619934, "learning_rate": 1.2048084550428442e-06, "loss": 0.5087, "step": 12329 }, { "epoch": 0.78, "grad_norm": 0.9017311334609985, "learning_rate": 1.2041405659044664e-06, "loss": 0.5894, "step": 12330 }, { "epoch": 0.78, "grad_norm": 0.8747026324272156, "learning_rate": 1.203472836596523e-06, "loss": 0.5661, "step": 12331 }, { "epoch": 0.78, "grad_norm": 0.8790405988693237, "learning_rate": 1.2028052671471318e-06, "loss": 0.6278, "step": 12332 }, { "epoch": 0.78, "grad_norm": 0.8900148868560791, "learning_rate": 1.2021378575844005e-06, "loss": 0.5758, "step": 12333 }, { "epoch": 0.78, "grad_norm": 0.85635906457901, "learning_rate": 1.201470607936433e-06, "loss": 0.5376, "step": 12334 }, { "epoch": 0.78, "grad_norm": 0.9126538038253784, "learning_rate": 1.2008035182313237e-06, "loss": 0.5883, "step": 12335 }, { "epoch": 0.78, "grad_norm": 0.8496696949005127, "learning_rate": 1.2001365884971634e-06, "loss": 0.5212, "step": 12336 }, { "epoch": 0.78, "grad_norm": 0.9417339563369751, "learning_rate": 1.1994698187620297e-06, "loss": 0.6001, "step": 12337 }, { "epoch": 0.78, "grad_norm": 0.9529904127120972, "learning_rate": 1.1988032090540036e-06, "loss": 0.6143, "step": 12338 }, { "epoch": 0.78, "grad_norm": 0.8431176543235779, "learning_rate": 1.1981367594011496e-06, "loss": 0.5776, "step": 12339 }, { "epoch": 0.78, "grad_norm": 0.8899187445640564, "learning_rate": 1.1974704698315309e-06, "loss": 0.5197, "step": 12340 }, { "epoch": 0.78, "grad_norm": 0.8935467004776001, "learning_rate": 1.1968043403732044e-06, "loss": 0.5667, "step": 12341 }, { "epoch": 0.78, "grad_norm": 0.9127129316329956, "learning_rate": 1.1961383710542135e-06, "loss": 0.5768, "step": 12342 }, { "epoch": 0.78, "grad_norm": 0.8471307754516602, "learning_rate": 1.1954725619026048e-06, "loss": 0.5569, "step": 12343 }, { "epoch": 0.78, "grad_norm": 0.7840201258659363, "learning_rate": 1.1948069129464128e-06, "loss": 0.572, "step": 12344 }, { "epoch": 0.78, "grad_norm": 0.9100561738014221, "learning_rate": 1.1941414242136635e-06, "loss": 0.5413, "step": 12345 }, { "epoch": 0.78, "grad_norm": 0.910991370677948, "learning_rate": 1.1934760957323782e-06, "loss": 0.569, "step": 12346 }, { "epoch": 0.78, "grad_norm": 0.9393359422683716, "learning_rate": 1.1928109275305734e-06, "loss": 0.6035, "step": 12347 }, { "epoch": 0.78, "grad_norm": 0.8058403134346008, "learning_rate": 1.1921459196362562e-06, "loss": 0.5626, "step": 12348 }, { "epoch": 0.78, "grad_norm": 0.8618589639663696, "learning_rate": 1.1914810720774289e-06, "loss": 0.5869, "step": 12349 }, { "epoch": 0.78, "grad_norm": 0.924644410610199, "learning_rate": 1.190816384882082e-06, "loss": 0.6129, "step": 12350 }, { "epoch": 0.78, "grad_norm": 0.8994777202606201, "learning_rate": 1.1901518580782073e-06, "loss": 0.5791, "step": 12351 }, { "epoch": 0.78, "grad_norm": 0.9266136288642883, "learning_rate": 1.1894874916937855e-06, "loss": 0.6058, "step": 12352 }, { "epoch": 0.78, "grad_norm": 0.854226291179657, "learning_rate": 1.1888232857567888e-06, "loss": 0.5411, "step": 12353 }, { "epoch": 0.78, "grad_norm": 0.9290048480033875, "learning_rate": 1.1881592402951853e-06, "loss": 0.5852, "step": 12354 }, { "epoch": 0.78, "grad_norm": 0.9401707649230957, "learning_rate": 1.1874953553369351e-06, "loss": 0.5687, "step": 12355 }, { "epoch": 0.78, "grad_norm": 0.8896312713623047, "learning_rate": 1.1868316309099937e-06, "loss": 0.5607, "step": 12356 }, { "epoch": 0.78, "grad_norm": 0.9019138216972351, "learning_rate": 1.186168067042308e-06, "loss": 0.6016, "step": 12357 }, { "epoch": 0.78, "grad_norm": 0.8993564248085022, "learning_rate": 1.1855046637618168e-06, "loss": 0.6566, "step": 12358 }, { "epoch": 0.78, "grad_norm": 0.9466603994369507, "learning_rate": 1.1848414210964526e-06, "loss": 0.6339, "step": 12359 }, { "epoch": 0.78, "grad_norm": 0.9109975099563599, "learning_rate": 1.1841783390741473e-06, "loss": 0.5418, "step": 12360 }, { "epoch": 0.78, "grad_norm": 0.963789701461792, "learning_rate": 1.1835154177228165e-06, "loss": 0.5697, "step": 12361 }, { "epoch": 0.78, "grad_norm": 0.9295591115951538, "learning_rate": 1.1828526570703747e-06, "loss": 0.6339, "step": 12362 }, { "epoch": 0.78, "grad_norm": 0.9075334668159485, "learning_rate": 1.1821900571447286e-06, "loss": 0.5751, "step": 12363 }, { "epoch": 0.78, "grad_norm": 0.8823397159576416, "learning_rate": 1.1815276179737778e-06, "loss": 0.5023, "step": 12364 }, { "epoch": 0.78, "grad_norm": 0.8924038410186768, "learning_rate": 1.1808653395854174e-06, "loss": 0.5934, "step": 12365 }, { "epoch": 0.78, "grad_norm": 0.8544875979423523, "learning_rate": 1.1802032220075299e-06, "loss": 0.5516, "step": 12366 }, { "epoch": 0.78, "grad_norm": 0.9305124282836914, "learning_rate": 1.1795412652679955e-06, "loss": 0.5981, "step": 12367 }, { "epoch": 0.78, "grad_norm": 0.8870397806167603, "learning_rate": 1.178879469394691e-06, "loss": 0.5729, "step": 12368 }, { "epoch": 0.78, "grad_norm": 0.9008588194847107, "learning_rate": 1.1782178344154776e-06, "loss": 0.5062, "step": 12369 }, { "epoch": 0.78, "grad_norm": 0.8877758383750916, "learning_rate": 1.1775563603582162e-06, "loss": 0.5799, "step": 12370 }, { "epoch": 0.78, "grad_norm": 0.882616400718689, "learning_rate": 1.1768950472507605e-06, "loss": 0.5285, "step": 12371 }, { "epoch": 0.78, "grad_norm": 0.9182950854301453, "learning_rate": 1.1762338951209524e-06, "loss": 0.6087, "step": 12372 }, { "epoch": 0.78, "grad_norm": 0.9180057048797607, "learning_rate": 1.1755729039966358e-06, "loss": 0.5914, "step": 12373 }, { "epoch": 0.78, "grad_norm": 0.8797052502632141, "learning_rate": 1.174912073905638e-06, "loss": 0.5412, "step": 12374 }, { "epoch": 0.78, "grad_norm": 0.8645214438438416, "learning_rate": 1.174251404875787e-06, "loss": 0.564, "step": 12375 }, { "epoch": 0.78, "grad_norm": 0.8359499573707581, "learning_rate": 1.1735908969349002e-06, "loss": 0.546, "step": 12376 }, { "epoch": 0.78, "grad_norm": 0.8409955501556396, "learning_rate": 1.1729305501107897e-06, "loss": 0.5585, "step": 12377 }, { "epoch": 0.78, "grad_norm": 0.8773937821388245, "learning_rate": 1.1722703644312599e-06, "loss": 0.5409, "step": 12378 }, { "epoch": 0.78, "grad_norm": 0.8302714824676514, "learning_rate": 1.1716103399241113e-06, "loss": 0.5417, "step": 12379 }, { "epoch": 0.78, "grad_norm": 0.8925114870071411, "learning_rate": 1.1709504766171298e-06, "loss": 0.5455, "step": 12380 }, { "epoch": 0.78, "grad_norm": 0.8733065724372864, "learning_rate": 1.170290774538107e-06, "loss": 0.6242, "step": 12381 }, { "epoch": 0.78, "grad_norm": 0.8538454174995422, "learning_rate": 1.1696312337148152e-06, "loss": 0.5389, "step": 12382 }, { "epoch": 0.78, "grad_norm": 0.9496648907661438, "learning_rate": 1.1689718541750278e-06, "loss": 0.6046, "step": 12383 }, { "epoch": 0.78, "grad_norm": 0.9294766187667847, "learning_rate": 1.168312635946508e-06, "loss": 0.6338, "step": 12384 }, { "epoch": 0.78, "grad_norm": 0.8941949605941772, "learning_rate": 1.1676535790570137e-06, "loss": 0.5433, "step": 12385 }, { "epoch": 0.78, "grad_norm": 0.9269332885742188, "learning_rate": 1.1669946835342956e-06, "loss": 0.5374, "step": 12386 }, { "epoch": 0.78, "grad_norm": 0.9914242029190063, "learning_rate": 1.1663359494060983e-06, "loss": 0.6343, "step": 12387 }, { "epoch": 0.78, "grad_norm": 0.8914863467216492, "learning_rate": 1.1656773767001566e-06, "loss": 0.6055, "step": 12388 }, { "epoch": 0.78, "grad_norm": 0.9513469934463501, "learning_rate": 1.1650189654442024e-06, "loss": 0.5674, "step": 12389 }, { "epoch": 0.78, "grad_norm": 0.891490638256073, "learning_rate": 1.1643607156659582e-06, "loss": 0.5888, "step": 12390 }, { "epoch": 0.79, "grad_norm": 0.9232016205787659, "learning_rate": 1.1637026273931413e-06, "loss": 0.5609, "step": 12391 }, { "epoch": 0.79, "grad_norm": 0.9406624436378479, "learning_rate": 1.1630447006534606e-06, "loss": 0.5978, "step": 12392 }, { "epoch": 0.79, "grad_norm": 0.9257485866546631, "learning_rate": 1.1623869354746203e-06, "loss": 0.5876, "step": 12393 }, { "epoch": 0.79, "grad_norm": 0.985722541809082, "learning_rate": 1.1617293318843164e-06, "loss": 0.6456, "step": 12394 }, { "epoch": 0.79, "grad_norm": 0.8931677341461182, "learning_rate": 1.1610718899102392e-06, "loss": 0.5579, "step": 12395 }, { "epoch": 0.79, "grad_norm": 0.9458318948745728, "learning_rate": 1.1604146095800684e-06, "loss": 0.5987, "step": 12396 }, { "epoch": 0.79, "grad_norm": 0.8708502650260925, "learning_rate": 1.1597574909214808e-06, "loss": 0.6126, "step": 12397 }, { "epoch": 0.79, "grad_norm": 0.937856912612915, "learning_rate": 1.159100533962147e-06, "loss": 0.5574, "step": 12398 }, { "epoch": 0.79, "grad_norm": 0.9945967793464661, "learning_rate": 1.1584437387297283e-06, "loss": 0.6743, "step": 12399 }, { "epoch": 0.79, "grad_norm": 0.8693322539329529, "learning_rate": 1.157787105251879e-06, "loss": 0.5495, "step": 12400 }, { "epoch": 0.79, "grad_norm": 0.8383262157440186, "learning_rate": 1.157130633556251e-06, "loss": 0.4967, "step": 12401 }, { "epoch": 0.79, "grad_norm": 0.9006306529045105, "learning_rate": 1.1564743236704801e-06, "loss": 0.5194, "step": 12402 }, { "epoch": 0.79, "grad_norm": 0.909695565700531, "learning_rate": 1.1558181756222081e-06, "loss": 0.5596, "step": 12403 }, { "epoch": 0.79, "grad_norm": 0.9043020009994507, "learning_rate": 1.1551621894390586e-06, "loss": 0.5476, "step": 12404 }, { "epoch": 0.79, "grad_norm": 0.9523054957389832, "learning_rate": 1.1545063651486533e-06, "loss": 0.6002, "step": 12405 }, { "epoch": 0.79, "grad_norm": 0.8855312466621399, "learning_rate": 1.1538507027786077e-06, "loss": 0.6329, "step": 12406 }, { "epoch": 0.79, "grad_norm": 0.8605347275733948, "learning_rate": 1.1531952023565295e-06, "loss": 0.5569, "step": 12407 }, { "epoch": 0.79, "grad_norm": 0.8833476901054382, "learning_rate": 1.1525398639100194e-06, "loss": 0.5713, "step": 12408 }, { "epoch": 0.79, "grad_norm": 0.9746968746185303, "learning_rate": 1.1518846874666723e-06, "loss": 0.5846, "step": 12409 }, { "epoch": 0.79, "grad_norm": 0.9625856280326843, "learning_rate": 1.1512296730540717e-06, "loss": 0.5859, "step": 12410 }, { "epoch": 0.79, "grad_norm": 0.8508451581001282, "learning_rate": 1.1505748206998036e-06, "loss": 0.5348, "step": 12411 }, { "epoch": 0.79, "grad_norm": 0.8078241944313049, "learning_rate": 1.1499201304314372e-06, "loss": 0.5757, "step": 12412 }, { "epoch": 0.79, "grad_norm": 0.9019641280174255, "learning_rate": 1.149265602276541e-06, "loss": 0.6191, "step": 12413 }, { "epoch": 0.79, "grad_norm": 0.9142687320709229, "learning_rate": 1.1486112362626738e-06, "loss": 0.6163, "step": 12414 }, { "epoch": 0.79, "grad_norm": 0.8982365131378174, "learning_rate": 1.14795703241739e-06, "loss": 0.5845, "step": 12415 }, { "epoch": 0.79, "grad_norm": 0.8687730431556702, "learning_rate": 1.1473029907682348e-06, "loss": 0.497, "step": 12416 }, { "epoch": 0.79, "grad_norm": 0.9555364847183228, "learning_rate": 1.1466491113427503e-06, "loss": 0.5448, "step": 12417 }, { "epoch": 0.79, "grad_norm": 0.9418555498123169, "learning_rate": 1.1459953941684648e-06, "loss": 0.5793, "step": 12418 }, { "epoch": 0.79, "grad_norm": 0.8871247172355652, "learning_rate": 1.1453418392729065e-06, "loss": 0.5731, "step": 12419 }, { "epoch": 0.79, "grad_norm": 0.9137433171272278, "learning_rate": 1.1446884466835933e-06, "loss": 0.6143, "step": 12420 }, { "epoch": 0.79, "grad_norm": 0.9435691237449646, "learning_rate": 1.1440352164280388e-06, "loss": 0.5642, "step": 12421 }, { "epoch": 0.79, "grad_norm": 0.8553792834281921, "learning_rate": 1.1433821485337487e-06, "loss": 0.55, "step": 12422 }, { "epoch": 0.79, "grad_norm": 0.8792483806610107, "learning_rate": 1.1427292430282165e-06, "loss": 0.5809, "step": 12423 }, { "epoch": 0.79, "grad_norm": 0.8634614944458008, "learning_rate": 1.14207649993894e-06, "loss": 0.5819, "step": 12424 }, { "epoch": 0.79, "grad_norm": 0.904400110244751, "learning_rate": 1.1414239192934019e-06, "loss": 0.5542, "step": 12425 }, { "epoch": 0.79, "grad_norm": 0.9054979085922241, "learning_rate": 1.1407715011190784e-06, "loss": 0.5524, "step": 12426 }, { "epoch": 0.79, "grad_norm": 0.9589418768882751, "learning_rate": 1.1401192454434418e-06, "loss": 0.5837, "step": 12427 }, { "epoch": 0.79, "grad_norm": 0.9193122386932373, "learning_rate": 1.139467152293956e-06, "loss": 0.5805, "step": 12428 }, { "epoch": 0.79, "grad_norm": 0.9566795825958252, "learning_rate": 1.138815221698079e-06, "loss": 0.6101, "step": 12429 }, { "epoch": 0.79, "grad_norm": 0.8651120662689209, "learning_rate": 1.138163453683262e-06, "loss": 0.5513, "step": 12430 }, { "epoch": 0.79, "grad_norm": 0.8350364565849304, "learning_rate": 1.1375118482769447e-06, "loss": 0.5298, "step": 12431 }, { "epoch": 0.79, "grad_norm": 0.8395058512687683, "learning_rate": 1.136860405506569e-06, "loss": 0.5459, "step": 12432 }, { "epoch": 0.79, "grad_norm": 0.8627316951751709, "learning_rate": 1.1362091253995632e-06, "loss": 0.5212, "step": 12433 }, { "epoch": 0.79, "grad_norm": 0.8814694285392761, "learning_rate": 1.1355580079833496e-06, "loss": 0.5991, "step": 12434 }, { "epoch": 0.79, "grad_norm": 0.8934352397918701, "learning_rate": 1.134907053285344e-06, "loss": 0.5512, "step": 12435 }, { "epoch": 0.79, "grad_norm": 0.9029948711395264, "learning_rate": 1.1342562613329571e-06, "loss": 0.5956, "step": 12436 }, { "epoch": 0.79, "grad_norm": 0.8928791284561157, "learning_rate": 1.133605632153591e-06, "loss": 0.5683, "step": 12437 }, { "epoch": 0.79, "grad_norm": 0.9214066863059998, "learning_rate": 1.1329551657746429e-06, "loss": 0.6537, "step": 12438 }, { "epoch": 0.79, "grad_norm": 0.9235839247703552, "learning_rate": 1.132304862223499e-06, "loss": 0.587, "step": 12439 }, { "epoch": 0.79, "grad_norm": 0.8503764271736145, "learning_rate": 1.1316547215275409e-06, "loss": 0.6163, "step": 12440 }, { "epoch": 0.79, "grad_norm": 0.8700659275054932, "learning_rate": 1.1310047437141485e-06, "loss": 0.5875, "step": 12441 }, { "epoch": 0.79, "grad_norm": 0.9010173678398132, "learning_rate": 1.1303549288106857e-06, "loss": 0.5572, "step": 12442 }, { "epoch": 0.79, "grad_norm": 0.906274676322937, "learning_rate": 1.1297052768445154e-06, "loss": 0.5747, "step": 12443 }, { "epoch": 0.79, "grad_norm": 0.9110028147697449, "learning_rate": 1.129055787842992e-06, "loss": 0.5646, "step": 12444 }, { "epoch": 0.79, "grad_norm": 0.9146189093589783, "learning_rate": 1.1284064618334634e-06, "loss": 0.5341, "step": 12445 }, { "epoch": 0.79, "grad_norm": 0.9614166021347046, "learning_rate": 1.1277572988432716e-06, "loss": 0.5856, "step": 12446 }, { "epoch": 0.79, "grad_norm": 0.8432893753051758, "learning_rate": 1.1271082988997485e-06, "loss": 0.5756, "step": 12447 }, { "epoch": 0.79, "grad_norm": 0.9383045434951782, "learning_rate": 1.1264594620302216e-06, "loss": 0.6037, "step": 12448 }, { "epoch": 0.79, "grad_norm": 0.8760130405426025, "learning_rate": 1.1258107882620117e-06, "loss": 0.5568, "step": 12449 }, { "epoch": 0.79, "grad_norm": 0.9355485439300537, "learning_rate": 1.1251622776224325e-06, "loss": 0.5927, "step": 12450 }, { "epoch": 0.79, "grad_norm": 0.9326279759407043, "learning_rate": 1.1245139301387903e-06, "loss": 0.5338, "step": 12451 }, { "epoch": 0.79, "grad_norm": 0.9106242060661316, "learning_rate": 1.1238657458383857e-06, "loss": 0.5986, "step": 12452 }, { "epoch": 0.79, "grad_norm": 0.9031466841697693, "learning_rate": 1.1232177247485076e-06, "loss": 0.5867, "step": 12453 }, { "epoch": 0.79, "grad_norm": 0.8840919137001038, "learning_rate": 1.122569866896448e-06, "loss": 0.5369, "step": 12454 }, { "epoch": 0.79, "grad_norm": 0.875639021396637, "learning_rate": 1.1219221723094815e-06, "loss": 0.5746, "step": 12455 }, { "epoch": 0.79, "grad_norm": 0.9193491339683533, "learning_rate": 1.1212746410148807e-06, "loss": 0.6223, "step": 12456 }, { "epoch": 0.79, "grad_norm": 0.9045842885971069, "learning_rate": 1.120627273039912e-06, "loss": 0.6001, "step": 12457 }, { "epoch": 0.79, "grad_norm": 0.9147197008132935, "learning_rate": 1.119980068411834e-06, "loss": 0.5921, "step": 12458 }, { "epoch": 0.79, "grad_norm": 0.9671141505241394, "learning_rate": 1.1193330271578968e-06, "loss": 0.6136, "step": 12459 }, { "epoch": 0.79, "grad_norm": 0.8843987584114075, "learning_rate": 1.118686149305348e-06, "loss": 0.5372, "step": 12460 }, { "epoch": 0.79, "grad_norm": 0.9339917302131653, "learning_rate": 1.1180394348814206e-06, "loss": 0.5736, "step": 12461 }, { "epoch": 0.79, "grad_norm": 0.8633296489715576, "learning_rate": 1.117392883913349e-06, "loss": 0.5683, "step": 12462 }, { "epoch": 0.79, "grad_norm": 0.856465220451355, "learning_rate": 1.1167464964283587e-06, "loss": 0.5533, "step": 12463 }, { "epoch": 0.79, "grad_norm": 0.851737916469574, "learning_rate": 1.1161002724536623e-06, "loss": 0.5454, "step": 12464 }, { "epoch": 0.79, "grad_norm": 0.897743821144104, "learning_rate": 1.115454212016473e-06, "loss": 0.5616, "step": 12465 }, { "epoch": 0.79, "grad_norm": 0.8601440191268921, "learning_rate": 1.1148083151439932e-06, "loss": 0.5598, "step": 12466 }, { "epoch": 0.79, "grad_norm": 0.913490891456604, "learning_rate": 1.1141625818634194e-06, "loss": 0.5871, "step": 12467 }, { "epoch": 0.79, "grad_norm": 0.9724521636962891, "learning_rate": 1.1135170122019433e-06, "loss": 0.6231, "step": 12468 }, { "epoch": 0.79, "grad_norm": 0.9473910927772522, "learning_rate": 1.112871606186744e-06, "loss": 0.5913, "step": 12469 }, { "epoch": 0.79, "grad_norm": 0.902228593826294, "learning_rate": 1.112226363844998e-06, "loss": 0.6149, "step": 12470 }, { "epoch": 0.79, "grad_norm": 0.8123179078102112, "learning_rate": 1.1115812852038777e-06, "loss": 0.4786, "step": 12471 }, { "epoch": 0.79, "grad_norm": 0.8619558811187744, "learning_rate": 1.1109363702905419e-06, "loss": 0.6023, "step": 12472 }, { "epoch": 0.79, "grad_norm": 0.9255017042160034, "learning_rate": 1.1102916191321456e-06, "loss": 0.5984, "step": 12473 }, { "epoch": 0.79, "grad_norm": 0.8794713616371155, "learning_rate": 1.1096470317558384e-06, "loss": 0.5654, "step": 12474 }, { "epoch": 0.79, "grad_norm": 0.9066189527511597, "learning_rate": 1.1090026081887611e-06, "loss": 0.5686, "step": 12475 }, { "epoch": 0.79, "grad_norm": 0.8906106352806091, "learning_rate": 1.1083583484580495e-06, "loss": 0.5434, "step": 12476 }, { "epoch": 0.79, "grad_norm": 0.8132662177085876, "learning_rate": 1.107714252590828e-06, "loss": 0.5387, "step": 12477 }, { "epoch": 0.79, "grad_norm": 0.8702597618103027, "learning_rate": 1.1070703206142186e-06, "loss": 0.5839, "step": 12478 }, { "epoch": 0.79, "grad_norm": 0.9468421339988708, "learning_rate": 1.1064265525553375e-06, "loss": 0.6534, "step": 12479 }, { "epoch": 0.79, "grad_norm": 0.8762499690055847, "learning_rate": 1.1057829484412885e-06, "loss": 0.5579, "step": 12480 }, { "epoch": 0.79, "grad_norm": 0.9076083302497864, "learning_rate": 1.1051395082991722e-06, "loss": 0.5715, "step": 12481 }, { "epoch": 0.79, "grad_norm": 0.8767365217208862, "learning_rate": 1.1044962321560837e-06, "loss": 0.5713, "step": 12482 }, { "epoch": 0.79, "grad_norm": 0.864399254322052, "learning_rate": 1.1038531200391045e-06, "loss": 0.6256, "step": 12483 }, { "epoch": 0.79, "grad_norm": 0.8748794794082642, "learning_rate": 1.1032101719753197e-06, "loss": 0.5357, "step": 12484 }, { "epoch": 0.79, "grad_norm": 0.8488878607749939, "learning_rate": 1.102567387991797e-06, "loss": 0.5536, "step": 12485 }, { "epoch": 0.79, "grad_norm": 0.8816309571266174, "learning_rate": 1.101924768115603e-06, "loss": 0.5564, "step": 12486 }, { "epoch": 0.79, "grad_norm": 0.8800366520881653, "learning_rate": 1.101282312373797e-06, "loss": 0.5739, "step": 12487 }, { "epoch": 0.79, "grad_norm": 0.9694901704788208, "learning_rate": 1.1006400207934304e-06, "loss": 0.6166, "step": 12488 }, { "epoch": 0.79, "grad_norm": 0.9245271682739258, "learning_rate": 1.0999978934015475e-06, "loss": 0.5898, "step": 12489 }, { "epoch": 0.79, "grad_norm": 0.9495474100112915, "learning_rate": 1.0993559302251878e-06, "loss": 0.6334, "step": 12490 }, { "epoch": 0.79, "grad_norm": 0.9290176630020142, "learning_rate": 1.0987141312913773e-06, "loss": 0.583, "step": 12491 }, { "epoch": 0.79, "grad_norm": 0.9221222400665283, "learning_rate": 1.098072496627146e-06, "loss": 0.6511, "step": 12492 }, { "epoch": 0.79, "grad_norm": 0.9218513369560242, "learning_rate": 1.0974310262595067e-06, "loss": 0.6061, "step": 12493 }, { "epoch": 0.79, "grad_norm": 0.9661274552345276, "learning_rate": 1.096789720215471e-06, "loss": 0.682, "step": 12494 }, { "epoch": 0.79, "grad_norm": 0.9002541303634644, "learning_rate": 1.0961485785220434e-06, "loss": 0.5775, "step": 12495 }, { "epoch": 0.79, "grad_norm": 0.8728871941566467, "learning_rate": 1.0955076012062155e-06, "loss": 0.6046, "step": 12496 }, { "epoch": 0.79, "grad_norm": 0.9101660251617432, "learning_rate": 1.094866788294981e-06, "loss": 0.5323, "step": 12497 }, { "epoch": 0.79, "grad_norm": 0.8614184856414795, "learning_rate": 1.094226139815323e-06, "loss": 0.5448, "step": 12498 }, { "epoch": 0.79, "grad_norm": 0.934859037399292, "learning_rate": 1.0935856557942132e-06, "loss": 0.5444, "step": 12499 }, { "epoch": 0.79, "grad_norm": 0.8698869943618774, "learning_rate": 1.0929453362586223e-06, "loss": 0.5316, "step": 12500 }, { "epoch": 0.79, "grad_norm": 0.941087007522583, "learning_rate": 1.0923051812355117e-06, "loss": 0.6492, "step": 12501 }, { "epoch": 0.79, "grad_norm": 0.9395822882652283, "learning_rate": 1.091665190751836e-06, "loss": 0.5986, "step": 12502 }, { "epoch": 0.79, "grad_norm": 0.9707981944084167, "learning_rate": 1.0910253648345442e-06, "loss": 0.6301, "step": 12503 }, { "epoch": 0.79, "grad_norm": 0.8878294825553894, "learning_rate": 1.0903857035105736e-06, "loss": 0.5955, "step": 12504 }, { "epoch": 0.79, "grad_norm": 0.8388591408729553, "learning_rate": 1.0897462068068616e-06, "loss": 0.5295, "step": 12505 }, { "epoch": 0.79, "grad_norm": 0.8977577686309814, "learning_rate": 1.0891068747503353e-06, "loss": 0.5855, "step": 12506 }, { "epoch": 0.79, "grad_norm": 0.8421580195426941, "learning_rate": 1.0884677073679123e-06, "loss": 0.5343, "step": 12507 }, { "epoch": 0.79, "grad_norm": 0.777437150478363, "learning_rate": 1.0878287046865072e-06, "loss": 0.5167, "step": 12508 }, { "epoch": 0.79, "grad_norm": 0.8942268490791321, "learning_rate": 1.0871898667330249e-06, "loss": 0.5571, "step": 12509 }, { "epoch": 0.79, "grad_norm": 0.8648400902748108, "learning_rate": 1.0865511935343664e-06, "loss": 0.5088, "step": 12510 }, { "epoch": 0.79, "grad_norm": 0.8418681621551514, "learning_rate": 1.0859126851174246e-06, "loss": 0.5647, "step": 12511 }, { "epoch": 0.79, "grad_norm": 0.9093899130821228, "learning_rate": 1.0852743415090823e-06, "loss": 0.5833, "step": 12512 }, { "epoch": 0.79, "grad_norm": 0.8962028622627258, "learning_rate": 1.0846361627362174e-06, "loss": 0.5429, "step": 12513 }, { "epoch": 0.79, "grad_norm": 0.8653958439826965, "learning_rate": 1.0839981488257061e-06, "loss": 0.5451, "step": 12514 }, { "epoch": 0.79, "grad_norm": 0.8512267470359802, "learning_rate": 1.0833602998044085e-06, "loss": 0.5307, "step": 12515 }, { "epoch": 0.79, "grad_norm": 0.8976225852966309, "learning_rate": 1.0827226156991838e-06, "loss": 0.605, "step": 12516 }, { "epoch": 0.79, "grad_norm": 0.8847118020057678, "learning_rate": 1.0820850965368822e-06, "loss": 0.5519, "step": 12517 }, { "epoch": 0.79, "grad_norm": 0.9687950611114502, "learning_rate": 1.0814477423443482e-06, "loss": 0.635, "step": 12518 }, { "epoch": 0.79, "grad_norm": 0.8953339457511902, "learning_rate": 1.0808105531484192e-06, "loss": 0.5767, "step": 12519 }, { "epoch": 0.79, "grad_norm": 0.9060798287391663, "learning_rate": 1.0801735289759225e-06, "loss": 0.5794, "step": 12520 }, { "epoch": 0.79, "grad_norm": 0.9054228663444519, "learning_rate": 1.0795366698536812e-06, "loss": 0.5836, "step": 12521 }, { "epoch": 0.79, "grad_norm": 0.8784095644950867, "learning_rate": 1.078899975808515e-06, "loss": 0.5729, "step": 12522 }, { "epoch": 0.79, "grad_norm": 0.947877049446106, "learning_rate": 1.0782634468672293e-06, "loss": 0.5233, "step": 12523 }, { "epoch": 0.79, "grad_norm": 0.866150438785553, "learning_rate": 1.0776270830566266e-06, "loss": 0.5557, "step": 12524 }, { "epoch": 0.79, "grad_norm": 0.8818299174308777, "learning_rate": 1.0769908844035032e-06, "loss": 0.5335, "step": 12525 }, { "epoch": 0.79, "grad_norm": 0.8382863998413086, "learning_rate": 1.0763548509346461e-06, "loss": 0.5066, "step": 12526 }, { "epoch": 0.79, "grad_norm": 0.876054584980011, "learning_rate": 1.0757189826768367e-06, "loss": 0.5483, "step": 12527 }, { "epoch": 0.79, "grad_norm": 0.9083865284919739, "learning_rate": 1.075083279656851e-06, "loss": 0.5776, "step": 12528 }, { "epoch": 0.79, "grad_norm": 0.8849220275878906, "learning_rate": 1.0744477419014532e-06, "loss": 0.5793, "step": 12529 }, { "epoch": 0.79, "grad_norm": 0.8889400959014893, "learning_rate": 1.0738123694374047e-06, "loss": 0.55, "step": 12530 }, { "epoch": 0.79, "grad_norm": 0.8188003897666931, "learning_rate": 1.0731771622914595e-06, "loss": 0.5511, "step": 12531 }, { "epoch": 0.79, "grad_norm": 0.8689089417457581, "learning_rate": 1.072542120490363e-06, "loss": 0.5708, "step": 12532 }, { "epoch": 0.79, "grad_norm": 0.8809791803359985, "learning_rate": 1.0719072440608575e-06, "loss": 0.5782, "step": 12533 }, { "epoch": 0.79, "grad_norm": 0.8765868544578552, "learning_rate": 1.0712725330296697e-06, "loss": 0.564, "step": 12534 }, { "epoch": 0.79, "grad_norm": 0.8565429449081421, "learning_rate": 1.07063798742353e-06, "loss": 0.6002, "step": 12535 }, { "epoch": 0.79, "grad_norm": 0.9748111367225647, "learning_rate": 1.0700036072691566e-06, "loss": 0.6289, "step": 12536 }, { "epoch": 0.79, "grad_norm": 0.9657660722732544, "learning_rate": 1.0693693925932585e-06, "loss": 0.6292, "step": 12537 }, { "epoch": 0.79, "grad_norm": 0.8865155577659607, "learning_rate": 1.0687353434225418e-06, "loss": 0.6005, "step": 12538 }, { "epoch": 0.79, "grad_norm": 0.9077226519584656, "learning_rate": 1.0681014597837042e-06, "loss": 0.599, "step": 12539 }, { "epoch": 0.79, "grad_norm": 0.8932228088378906, "learning_rate": 1.0674677417034358e-06, "loss": 0.5546, "step": 12540 }, { "epoch": 0.79, "grad_norm": 0.8195880651473999, "learning_rate": 1.0668341892084217e-06, "loss": 0.5231, "step": 12541 }, { "epoch": 0.79, "grad_norm": 0.9403937458992004, "learning_rate": 1.0662008023253356e-06, "loss": 0.5589, "step": 12542 }, { "epoch": 0.79, "grad_norm": 0.9366670250892639, "learning_rate": 1.0655675810808485e-06, "loss": 0.6032, "step": 12543 }, { "epoch": 0.79, "grad_norm": 0.8904660940170288, "learning_rate": 1.0649345255016258e-06, "loss": 0.6044, "step": 12544 }, { "epoch": 0.79, "grad_norm": 0.9282307624816895, "learning_rate": 1.0643016356143204e-06, "loss": 0.624, "step": 12545 }, { "epoch": 0.79, "grad_norm": 0.8701701164245605, "learning_rate": 1.0636689114455811e-06, "loss": 0.6007, "step": 12546 }, { "epoch": 0.79, "grad_norm": 0.8869695067405701, "learning_rate": 1.063036353022051e-06, "loss": 0.6469, "step": 12547 }, { "epoch": 0.79, "grad_norm": 0.8800140619277954, "learning_rate": 1.0624039603703645e-06, "loss": 0.509, "step": 12548 }, { "epoch": 0.8, "grad_norm": 0.8784294724464417, "learning_rate": 1.06177173351715e-06, "loss": 0.5827, "step": 12549 }, { "epoch": 0.8, "grad_norm": 0.9010189771652222, "learning_rate": 1.061139672489027e-06, "loss": 0.5904, "step": 12550 }, { "epoch": 0.8, "grad_norm": 0.9229983687400818, "learning_rate": 1.0605077773126083e-06, "loss": 0.5881, "step": 12551 }, { "epoch": 0.8, "grad_norm": 0.863856852054596, "learning_rate": 1.059876048014506e-06, "loss": 0.5963, "step": 12552 }, { "epoch": 0.8, "grad_norm": 0.8975127935409546, "learning_rate": 1.0592444846213145e-06, "loss": 0.597, "step": 12553 }, { "epoch": 0.8, "grad_norm": 0.8481269478797913, "learning_rate": 1.058613087159629e-06, "loss": 0.5336, "step": 12554 }, { "epoch": 0.8, "grad_norm": 0.8915879130363464, "learning_rate": 1.0579818556560357e-06, "loss": 0.6215, "step": 12555 }, { "epoch": 0.8, "grad_norm": 0.9215599298477173, "learning_rate": 1.0573507901371126e-06, "loss": 0.5748, "step": 12556 }, { "epoch": 0.8, "grad_norm": 0.8889424800872803, "learning_rate": 1.0567198906294341e-06, "loss": 0.5658, "step": 12557 }, { "epoch": 0.8, "grad_norm": 0.8827781081199646, "learning_rate": 1.0560891571595616e-06, "loss": 0.5804, "step": 12558 }, { "epoch": 0.8, "grad_norm": 0.8699508905410767, "learning_rate": 1.0554585897540553e-06, "loss": 0.5575, "step": 12559 }, { "epoch": 0.8, "grad_norm": 0.9525449872016907, "learning_rate": 1.0548281884394657e-06, "loss": 0.5723, "step": 12560 }, { "epoch": 0.8, "grad_norm": 0.868190348148346, "learning_rate": 1.0541979532423362e-06, "loss": 0.5423, "step": 12561 }, { "epoch": 0.8, "grad_norm": 0.8393450975418091, "learning_rate": 1.053567884189205e-06, "loss": 0.5703, "step": 12562 }, { "epoch": 0.8, "grad_norm": 0.8783072829246521, "learning_rate": 1.0529379813066026e-06, "loss": 0.5426, "step": 12563 }, { "epoch": 0.8, "grad_norm": 0.871197521686554, "learning_rate": 1.0523082446210487e-06, "loss": 0.5194, "step": 12564 }, { "epoch": 0.8, "grad_norm": 0.9241814017295837, "learning_rate": 1.051678674159064e-06, "loss": 0.6151, "step": 12565 }, { "epoch": 0.8, "grad_norm": 0.9252364039421082, "learning_rate": 1.0510492699471536e-06, "loss": 0.5901, "step": 12566 }, { "epoch": 0.8, "grad_norm": 0.9119340777397156, "learning_rate": 1.0504200320118214e-06, "loss": 0.5864, "step": 12567 }, { "epoch": 0.8, "grad_norm": 1.0206106901168823, "learning_rate": 1.049790960379562e-06, "loss": 0.6423, "step": 12568 }, { "epoch": 0.8, "grad_norm": 0.9204949140548706, "learning_rate": 1.0491620550768633e-06, "loss": 0.5887, "step": 12569 }, { "epoch": 0.8, "grad_norm": 0.9017525911331177, "learning_rate": 1.048533316130207e-06, "loss": 0.5684, "step": 12570 }, { "epoch": 0.8, "grad_norm": 0.950289249420166, "learning_rate": 1.0479047435660671e-06, "loss": 0.5176, "step": 12571 }, { "epoch": 0.8, "grad_norm": 0.9097518920898438, "learning_rate": 1.047276337410908e-06, "loss": 0.5952, "step": 12572 }, { "epoch": 0.8, "grad_norm": 0.8936463594436646, "learning_rate": 1.0466480976911947e-06, "loss": 0.6109, "step": 12573 }, { "epoch": 0.8, "grad_norm": 0.8422192931175232, "learning_rate": 1.0460200244333758e-06, "loss": 0.5667, "step": 12574 }, { "epoch": 0.8, "grad_norm": 0.8249906301498413, "learning_rate": 1.0453921176638981e-06, "loss": 0.5465, "step": 12575 }, { "epoch": 0.8, "grad_norm": 0.8235678672790527, "learning_rate": 1.044764377409203e-06, "loss": 0.5726, "step": 12576 }, { "epoch": 0.8, "grad_norm": 1.0038381814956665, "learning_rate": 1.0441368036957184e-06, "loss": 0.5896, "step": 12577 }, { "epoch": 0.8, "grad_norm": 0.8613317012786865, "learning_rate": 1.0435093965498727e-06, "loss": 0.5448, "step": 12578 }, { "epoch": 0.8, "grad_norm": 0.8612802624702454, "learning_rate": 1.0428821559980839e-06, "loss": 0.5702, "step": 12579 }, { "epoch": 0.8, "grad_norm": 0.8284898996353149, "learning_rate": 1.0422550820667605e-06, "loss": 0.5696, "step": 12580 }, { "epoch": 0.8, "grad_norm": 0.8467788696289062, "learning_rate": 1.0416281747823076e-06, "loss": 0.5265, "step": 12581 }, { "epoch": 0.8, "grad_norm": 0.9856624603271484, "learning_rate": 1.0410014341711216e-06, "loss": 0.6143, "step": 12582 }, { "epoch": 0.8, "grad_norm": 0.8942157626152039, "learning_rate": 1.0403748602595937e-06, "loss": 0.6032, "step": 12583 }, { "epoch": 0.8, "grad_norm": 0.9039360880851746, "learning_rate": 1.0397484530741053e-06, "loss": 0.6271, "step": 12584 }, { "epoch": 0.8, "grad_norm": 0.9321849942207336, "learning_rate": 1.0391222126410327e-06, "loss": 0.6062, "step": 12585 }, { "epoch": 0.8, "grad_norm": 0.8697063326835632, "learning_rate": 1.0384961389867454e-06, "loss": 0.546, "step": 12586 }, { "epoch": 0.8, "grad_norm": 0.9680486917495728, "learning_rate": 1.0378702321376054e-06, "loss": 0.5974, "step": 12587 }, { "epoch": 0.8, "grad_norm": 0.8630591034889221, "learning_rate": 1.037244492119966e-06, "loss": 0.5821, "step": 12588 }, { "epoch": 0.8, "grad_norm": 0.8251073360443115, "learning_rate": 1.036618918960175e-06, "loss": 0.5908, "step": 12589 }, { "epoch": 0.8, "grad_norm": 0.8866623044013977, "learning_rate": 1.0359935126845738e-06, "loss": 0.5549, "step": 12590 }, { "epoch": 0.8, "grad_norm": 0.8582077622413635, "learning_rate": 1.0353682733194965e-06, "loss": 0.5637, "step": 12591 }, { "epoch": 0.8, "grad_norm": 0.8953722715377808, "learning_rate": 1.0347432008912688e-06, "loss": 0.6297, "step": 12592 }, { "epoch": 0.8, "grad_norm": 0.9254661798477173, "learning_rate": 1.0341182954262125e-06, "loss": 0.5432, "step": 12593 }, { "epoch": 0.8, "grad_norm": 0.8666430711746216, "learning_rate": 1.0334935569506355e-06, "loss": 0.5653, "step": 12594 }, { "epoch": 0.8, "grad_norm": 0.9944994449615479, "learning_rate": 1.0328689854908492e-06, "loss": 0.5792, "step": 12595 }, { "epoch": 0.8, "grad_norm": 0.8367435336112976, "learning_rate": 1.032244581073148e-06, "loss": 0.5453, "step": 12596 }, { "epoch": 0.8, "grad_norm": 0.9115063548088074, "learning_rate": 1.0316203437238242e-06, "loss": 0.6038, "step": 12597 }, { "epoch": 0.8, "grad_norm": 0.8422768712043762, "learning_rate": 1.0309962734691632e-06, "loss": 0.5879, "step": 12598 }, { "epoch": 0.8, "grad_norm": 0.9396683573722839, "learning_rate": 1.0303723703354418e-06, "loss": 0.6432, "step": 12599 }, { "epoch": 0.8, "grad_norm": 0.8688830733299255, "learning_rate": 1.0297486343489304e-06, "loss": 0.578, "step": 12600 }, { "epoch": 0.8, "grad_norm": 0.8575296401977539, "learning_rate": 1.0291250655358942e-06, "loss": 0.5906, "step": 12601 }, { "epoch": 0.8, "grad_norm": 0.8545289635658264, "learning_rate": 1.0285016639225849e-06, "loss": 0.5982, "step": 12602 }, { "epoch": 0.8, "grad_norm": 0.8576691150665283, "learning_rate": 1.0278784295352572e-06, "loss": 0.589, "step": 12603 }, { "epoch": 0.8, "grad_norm": 0.8619968295097351, "learning_rate": 1.0272553624001502e-06, "loss": 0.5483, "step": 12604 }, { "epoch": 0.8, "grad_norm": 0.8848919868469238, "learning_rate": 1.0266324625434992e-06, "loss": 0.5735, "step": 12605 }, { "epoch": 0.8, "grad_norm": 0.8641508221626282, "learning_rate": 1.0260097299915345e-06, "loss": 0.5249, "step": 12606 }, { "epoch": 0.8, "grad_norm": 0.8593783378601074, "learning_rate": 1.0253871647704722e-06, "loss": 0.5686, "step": 12607 }, { "epoch": 0.8, "grad_norm": 0.860919713973999, "learning_rate": 1.024764766906532e-06, "loss": 0.5702, "step": 12608 }, { "epoch": 0.8, "grad_norm": 0.9240328669548035, "learning_rate": 1.0241425364259195e-06, "loss": 0.6011, "step": 12609 }, { "epoch": 0.8, "grad_norm": 0.8484996557235718, "learning_rate": 1.0235204733548321e-06, "loss": 0.5523, "step": 12610 }, { "epoch": 0.8, "grad_norm": 0.8490926027297974, "learning_rate": 1.022898577719465e-06, "loss": 0.5931, "step": 12611 }, { "epoch": 0.8, "grad_norm": 0.8484750986099243, "learning_rate": 1.0222768495460029e-06, "loss": 0.5313, "step": 12612 }, { "epoch": 0.8, "grad_norm": 0.904883086681366, "learning_rate": 1.0216552888606256e-06, "loss": 0.5397, "step": 12613 }, { "epoch": 0.8, "grad_norm": 0.8598143458366394, "learning_rate": 1.0210338956895054e-06, "loss": 0.5831, "step": 12614 }, { "epoch": 0.8, "grad_norm": 0.8726117014884949, "learning_rate": 1.020412670058804e-06, "loss": 0.5869, "step": 12615 }, { "epoch": 0.8, "grad_norm": 0.9262253642082214, "learning_rate": 1.0197916119946821e-06, "loss": 0.607, "step": 12616 }, { "epoch": 0.8, "grad_norm": 0.9659039974212646, "learning_rate": 1.0191707215232905e-06, "loss": 0.6243, "step": 12617 }, { "epoch": 0.8, "grad_norm": 0.9009899497032166, "learning_rate": 1.0185499986707702e-06, "loss": 0.6024, "step": 12618 }, { "epoch": 0.8, "grad_norm": 0.8743886351585388, "learning_rate": 1.0179294434632593e-06, "loss": 0.5578, "step": 12619 }, { "epoch": 0.8, "grad_norm": 0.8483142256736755, "learning_rate": 1.0173090559268867e-06, "loss": 0.5586, "step": 12620 }, { "epoch": 0.8, "grad_norm": 0.9112587571144104, "learning_rate": 1.0166888360877747e-06, "loss": 0.5717, "step": 12621 }, { "epoch": 0.8, "grad_norm": 0.9113679528236389, "learning_rate": 1.0160687839720407e-06, "loss": 0.5826, "step": 12622 }, { "epoch": 0.8, "grad_norm": 0.8618003129959106, "learning_rate": 1.0154488996057894e-06, "loss": 0.6087, "step": 12623 }, { "epoch": 0.8, "grad_norm": 0.9165884256362915, "learning_rate": 1.0148291830151224e-06, "loss": 0.6375, "step": 12624 }, { "epoch": 0.8, "grad_norm": 0.9440232515335083, "learning_rate": 1.014209634226138e-06, "loss": 0.6116, "step": 12625 }, { "epoch": 0.8, "grad_norm": 0.8831072449684143, "learning_rate": 1.013590253264919e-06, "loss": 0.6156, "step": 12626 }, { "epoch": 0.8, "grad_norm": 0.8621459603309631, "learning_rate": 1.0129710401575465e-06, "loss": 0.587, "step": 12627 }, { "epoch": 0.8, "grad_norm": 0.8391403555870056, "learning_rate": 1.0123519949300942e-06, "loss": 0.5753, "step": 12628 }, { "epoch": 0.8, "grad_norm": 0.8815181851387024, "learning_rate": 1.0117331176086264e-06, "loss": 0.5571, "step": 12629 }, { "epoch": 0.8, "grad_norm": 0.881256103515625, "learning_rate": 1.0111144082192048e-06, "loss": 0.5949, "step": 12630 }, { "epoch": 0.8, "grad_norm": 0.9096524715423584, "learning_rate": 1.0104958667878778e-06, "loss": 0.5858, "step": 12631 }, { "epoch": 0.8, "grad_norm": 0.9030601382255554, "learning_rate": 1.0098774933406903e-06, "loss": 0.5881, "step": 12632 }, { "epoch": 0.8, "grad_norm": 0.9352519512176514, "learning_rate": 1.0092592879036834e-06, "loss": 0.5795, "step": 12633 }, { "epoch": 0.8, "grad_norm": 0.9007598161697388, "learning_rate": 1.0086412505028836e-06, "loss": 0.5347, "step": 12634 }, { "epoch": 0.8, "grad_norm": 0.9302735328674316, "learning_rate": 1.0080233811643158e-06, "loss": 0.5944, "step": 12635 }, { "epoch": 0.8, "grad_norm": 0.8626806735992432, "learning_rate": 1.0074056799139981e-06, "loss": 0.6201, "step": 12636 }, { "epoch": 0.8, "grad_norm": 0.9001949429512024, "learning_rate": 1.006788146777935e-06, "loss": 0.5805, "step": 12637 }, { "epoch": 0.8, "grad_norm": 0.9080000519752502, "learning_rate": 1.0061707817821343e-06, "loss": 0.5803, "step": 12638 }, { "epoch": 0.8, "grad_norm": 0.8849290013313293, "learning_rate": 1.0055535849525872e-06, "loss": 0.5354, "step": 12639 }, { "epoch": 0.8, "grad_norm": 0.9328687787055969, "learning_rate": 1.004936556315283e-06, "loss": 0.6155, "step": 12640 }, { "epoch": 0.8, "grad_norm": 0.9027367234230042, "learning_rate": 1.004319695896202e-06, "loss": 0.5903, "step": 12641 }, { "epoch": 0.8, "grad_norm": 0.8899877667427063, "learning_rate": 1.0037030037213197e-06, "loss": 0.5407, "step": 12642 }, { "epoch": 0.8, "grad_norm": 0.9732675552368164, "learning_rate": 1.0030864798166013e-06, "loss": 0.6381, "step": 12643 }, { "epoch": 0.8, "grad_norm": 0.9128854870796204, "learning_rate": 1.0024701242080082e-06, "loss": 0.6036, "step": 12644 }, { "epoch": 0.8, "grad_norm": 0.905947744846344, "learning_rate": 1.0018539369214891e-06, "loss": 0.5918, "step": 12645 }, { "epoch": 0.8, "grad_norm": 0.8508647084236145, "learning_rate": 1.0012379179829951e-06, "loss": 0.5757, "step": 12646 }, { "epoch": 0.8, "grad_norm": 0.9843933582305908, "learning_rate": 1.0006220674184602e-06, "loss": 0.6191, "step": 12647 }, { "epoch": 0.8, "grad_norm": 0.9173324704170227, "learning_rate": 1.0000063852538172e-06, "loss": 0.5374, "step": 12648 }, { "epoch": 0.8, "grad_norm": 0.9240821599960327, "learning_rate": 9.993908715149902e-07, "loss": 0.5682, "step": 12649 }, { "epoch": 0.8, "grad_norm": 0.8344833850860596, "learning_rate": 9.98775526227897e-07, "loss": 0.5103, "step": 12650 }, { "epoch": 0.8, "grad_norm": 0.8877370953559875, "learning_rate": 9.981603494184473e-07, "loss": 0.6135, "step": 12651 }, { "epoch": 0.8, "grad_norm": 0.9172238111495972, "learning_rate": 9.975453411125447e-07, "loss": 0.5739, "step": 12652 }, { "epoch": 0.8, "grad_norm": 0.9218218326568604, "learning_rate": 9.969305013360825e-07, "loss": 0.5615, "step": 12653 }, { "epoch": 0.8, "grad_norm": 0.9229342341423035, "learning_rate": 9.963158301149522e-07, "loss": 0.6138, "step": 12654 }, { "epoch": 0.8, "grad_norm": 0.9228758215904236, "learning_rate": 9.957013274750338e-07, "loss": 0.6017, "step": 12655 }, { "epoch": 0.8, "grad_norm": 0.8756922483444214, "learning_rate": 9.95086993442203e-07, "loss": 0.577, "step": 12656 }, { "epoch": 0.8, "grad_norm": 0.9407891035079956, "learning_rate": 9.944728280423265e-07, "loss": 0.6189, "step": 12657 }, { "epoch": 0.8, "grad_norm": 0.8400014042854309, "learning_rate": 9.938588313012655e-07, "loss": 0.5349, "step": 12658 }, { "epoch": 0.8, "grad_norm": 0.9128040671348572, "learning_rate": 9.93245003244872e-07, "loss": 0.5806, "step": 12659 }, { "epoch": 0.8, "grad_norm": 0.9192377328872681, "learning_rate": 9.92631343898995e-07, "loss": 0.5908, "step": 12660 }, { "epoch": 0.8, "grad_norm": 0.9443216919898987, "learning_rate": 9.920178532894698e-07, "loss": 0.5576, "step": 12661 }, { "epoch": 0.8, "grad_norm": 0.8419626355171204, "learning_rate": 9.9140453144213e-07, "loss": 0.5135, "step": 12662 }, { "epoch": 0.8, "grad_norm": 0.8705309629440308, "learning_rate": 9.907913783828004e-07, "loss": 0.5648, "step": 12663 }, { "epoch": 0.8, "grad_norm": 0.9433914422988892, "learning_rate": 9.901783941372988e-07, "loss": 0.5512, "step": 12664 }, { "epoch": 0.8, "grad_norm": 0.9095032811164856, "learning_rate": 9.895655787314361e-07, "loss": 0.6271, "step": 12665 }, { "epoch": 0.8, "grad_norm": 0.8473713994026184, "learning_rate": 9.889529321910169e-07, "loss": 0.5568, "step": 12666 }, { "epoch": 0.8, "grad_norm": 0.8689250349998474, "learning_rate": 9.88340454541834e-07, "loss": 0.5617, "step": 12667 }, { "epoch": 0.8, "grad_norm": 0.94936603307724, "learning_rate": 9.87728145809681e-07, "loss": 0.6084, "step": 12668 }, { "epoch": 0.8, "grad_norm": 0.8550971150398254, "learning_rate": 9.871160060203371e-07, "loss": 0.485, "step": 12669 }, { "epoch": 0.8, "grad_norm": 0.8166051506996155, "learning_rate": 9.865040351995787e-07, "loss": 0.5548, "step": 12670 }, { "epoch": 0.8, "grad_norm": 0.8855223655700684, "learning_rate": 9.85892233373173e-07, "loss": 0.5517, "step": 12671 }, { "epoch": 0.8, "grad_norm": 0.8748183846473694, "learning_rate": 9.852806005668813e-07, "loss": 0.5437, "step": 12672 }, { "epoch": 0.8, "grad_norm": 0.9316419959068298, "learning_rate": 9.846691368064577e-07, "loss": 0.5686, "step": 12673 }, { "epoch": 0.8, "grad_norm": 0.8692405819892883, "learning_rate": 9.840578421176495e-07, "loss": 0.5458, "step": 12674 }, { "epoch": 0.8, "grad_norm": 0.9151699542999268, "learning_rate": 9.834467165261924e-07, "loss": 0.5581, "step": 12675 }, { "epoch": 0.8, "grad_norm": 0.8994660973548889, "learning_rate": 9.828357600578242e-07, "loss": 0.5499, "step": 12676 }, { "epoch": 0.8, "grad_norm": 0.9051674008369446, "learning_rate": 9.82224972738266e-07, "loss": 0.6041, "step": 12677 }, { "epoch": 0.8, "grad_norm": 0.845827043056488, "learning_rate": 9.816143545932378e-07, "loss": 0.5242, "step": 12678 }, { "epoch": 0.8, "grad_norm": 0.8907935619354248, "learning_rate": 9.8100390564845e-07, "loss": 0.5491, "step": 12679 }, { "epoch": 0.8, "grad_norm": 0.833772599697113, "learning_rate": 9.803936259296066e-07, "loss": 0.5004, "step": 12680 }, { "epoch": 0.8, "grad_norm": 0.9332374930381775, "learning_rate": 9.797835154624041e-07, "loss": 0.6143, "step": 12681 }, { "epoch": 0.8, "grad_norm": 0.906049370765686, "learning_rate": 9.791735742725339e-07, "loss": 0.641, "step": 12682 }, { "epoch": 0.8, "grad_norm": 0.9151736497879028, "learning_rate": 9.78563802385676e-07, "loss": 0.533, "step": 12683 }, { "epoch": 0.8, "grad_norm": 0.8841385841369629, "learning_rate": 9.779541998275067e-07, "loss": 0.5605, "step": 12684 }, { "epoch": 0.8, "grad_norm": 0.8864476084709167, "learning_rate": 9.773447666236946e-07, "loss": 0.5398, "step": 12685 }, { "epoch": 0.8, "grad_norm": 0.8535383343696594, "learning_rate": 9.767355027999004e-07, "loss": 0.5665, "step": 12686 }, { "epoch": 0.8, "grad_norm": 0.8663592338562012, "learning_rate": 9.761264083817795e-07, "loss": 0.5445, "step": 12687 }, { "epoch": 0.8, "grad_norm": 0.9647719860076904, "learning_rate": 9.755174833949749e-07, "loss": 0.528, "step": 12688 }, { "epoch": 0.8, "grad_norm": 0.9407845139503479, "learning_rate": 9.749087278651304e-07, "loss": 0.6414, "step": 12689 }, { "epoch": 0.8, "grad_norm": 0.869473934173584, "learning_rate": 9.743001418178782e-07, "loss": 0.5956, "step": 12690 }, { "epoch": 0.8, "grad_norm": 0.9082080125808716, "learning_rate": 9.736917252788414e-07, "loss": 0.5468, "step": 12691 }, { "epoch": 0.8, "grad_norm": 0.8994290232658386, "learning_rate": 9.730834782736393e-07, "loss": 0.5714, "step": 12692 }, { "epoch": 0.8, "grad_norm": 0.8263580799102783, "learning_rate": 9.724754008278836e-07, "loss": 0.5674, "step": 12693 }, { "epoch": 0.8, "grad_norm": 0.8548535704612732, "learning_rate": 9.718674929671778e-07, "loss": 0.5221, "step": 12694 }, { "epoch": 0.8, "grad_norm": 0.88187575340271, "learning_rate": 9.71259754717121e-07, "loss": 0.5945, "step": 12695 }, { "epoch": 0.8, "grad_norm": 0.9128777384757996, "learning_rate": 9.706521861032974e-07, "loss": 0.5188, "step": 12696 }, { "epoch": 0.8, "grad_norm": 0.9275169968605042, "learning_rate": 9.700447871512953e-07, "loss": 0.5871, "step": 12697 }, { "epoch": 0.8, "grad_norm": 0.9131552577018738, "learning_rate": 9.694375578866889e-07, "loss": 0.6075, "step": 12698 }, { "epoch": 0.8, "grad_norm": 0.9026870727539062, "learning_rate": 9.688304983350443e-07, "loss": 0.5856, "step": 12699 }, { "epoch": 0.8, "grad_norm": 0.8732842803001404, "learning_rate": 9.682236085219243e-07, "loss": 0.5447, "step": 12700 }, { "epoch": 0.8, "grad_norm": 0.9150475859642029, "learning_rate": 9.67616888472882e-07, "loss": 0.5613, "step": 12701 }, { "epoch": 0.8, "grad_norm": 0.8680015802383423, "learning_rate": 9.670103382134655e-07, "loss": 0.5447, "step": 12702 }, { "epoch": 0.8, "grad_norm": 0.9147303700447083, "learning_rate": 9.664039577692152e-07, "loss": 0.5829, "step": 12703 }, { "epoch": 0.8, "grad_norm": 0.9100850224494934, "learning_rate": 9.65797747165661e-07, "loss": 0.5835, "step": 12704 }, { "epoch": 0.8, "grad_norm": 0.8714893460273743, "learning_rate": 9.65191706428328e-07, "loss": 0.5811, "step": 12705 }, { "epoch": 0.8, "grad_norm": 0.8966681361198425, "learning_rate": 9.645858355827392e-07, "loss": 0.5887, "step": 12706 }, { "epoch": 0.81, "grad_norm": 0.8519495725631714, "learning_rate": 9.639801346544015e-07, "loss": 0.5868, "step": 12707 }, { "epoch": 0.81, "grad_norm": 0.9009888768196106, "learning_rate": 9.633746036688196e-07, "loss": 0.5386, "step": 12708 }, { "epoch": 0.81, "grad_norm": 0.8464866280555725, "learning_rate": 9.627692426514907e-07, "loss": 0.5542, "step": 12709 }, { "epoch": 0.81, "grad_norm": 0.9193606972694397, "learning_rate": 9.621640516279047e-07, "loss": 0.5917, "step": 12710 }, { "epoch": 0.81, "grad_norm": 0.9212644100189209, "learning_rate": 9.61559030623545e-07, "loss": 0.6036, "step": 12711 }, { "epoch": 0.81, "grad_norm": 0.9172996878623962, "learning_rate": 9.609541796638848e-07, "loss": 0.5267, "step": 12712 }, { "epoch": 0.81, "grad_norm": 0.8999651074409485, "learning_rate": 9.603494987743932e-07, "loss": 0.5834, "step": 12713 }, { "epoch": 0.81, "grad_norm": 0.9378758072853088, "learning_rate": 9.597449879805314e-07, "loss": 0.5732, "step": 12714 }, { "epoch": 0.81, "grad_norm": 0.8777378797531128, "learning_rate": 9.59140647307753e-07, "loss": 0.5429, "step": 12715 }, { "epoch": 0.81, "grad_norm": 0.9323769807815552, "learning_rate": 9.585364767815048e-07, "loss": 0.5651, "step": 12716 }, { "epoch": 0.81, "grad_norm": 0.9203583598136902, "learning_rate": 9.57932476427228e-07, "loss": 0.6104, "step": 12717 }, { "epoch": 0.81, "grad_norm": 0.8466483354568481, "learning_rate": 9.573286462703501e-07, "loss": 0.5486, "step": 12718 }, { "epoch": 0.81, "grad_norm": 0.9139605164527893, "learning_rate": 9.567249863363027e-07, "loss": 0.5501, "step": 12719 }, { "epoch": 0.81, "grad_norm": 0.9106989502906799, "learning_rate": 9.56121496650499e-07, "loss": 0.5969, "step": 12720 }, { "epoch": 0.81, "grad_norm": 0.8454228043556213, "learning_rate": 9.55518177238351e-07, "loss": 0.5935, "step": 12721 }, { "epoch": 0.81, "grad_norm": 0.8739069700241089, "learning_rate": 9.549150281252633e-07, "loss": 0.599, "step": 12722 }, { "epoch": 0.81, "grad_norm": 0.8933126926422119, "learning_rate": 9.54312049336632e-07, "loss": 0.5864, "step": 12723 }, { "epoch": 0.81, "grad_norm": 0.8254374861717224, "learning_rate": 9.53709240897846e-07, "loss": 0.5309, "step": 12724 }, { "epoch": 0.81, "grad_norm": 0.8132497668266296, "learning_rate": 9.531066028342895e-07, "loss": 0.6107, "step": 12725 }, { "epoch": 0.81, "grad_norm": 0.870490312576294, "learning_rate": 9.525041351713332e-07, "loss": 0.5699, "step": 12726 }, { "epoch": 0.81, "grad_norm": 0.8581969141960144, "learning_rate": 9.519018379343486e-07, "loss": 0.5235, "step": 12727 }, { "epoch": 0.81, "grad_norm": 0.8589658141136169, "learning_rate": 9.512997111486965e-07, "loss": 0.6124, "step": 12728 }, { "epoch": 0.81, "grad_norm": 0.885682225227356, "learning_rate": 9.506977548397284e-07, "loss": 0.6406, "step": 12729 }, { "epoch": 0.81, "grad_norm": 0.8724113702774048, "learning_rate": 9.50095969032791e-07, "loss": 0.6235, "step": 12730 }, { "epoch": 0.81, "grad_norm": 0.9281406998634338, "learning_rate": 9.494943537532242e-07, "loss": 0.5744, "step": 12731 }, { "epoch": 0.81, "grad_norm": 0.8291860222816467, "learning_rate": 9.488929090263588e-07, "loss": 0.5355, "step": 12732 }, { "epoch": 0.81, "grad_norm": 0.8633788228034973, "learning_rate": 9.482916348775217e-07, "loss": 0.5948, "step": 12733 }, { "epoch": 0.81, "grad_norm": 0.9064257740974426, "learning_rate": 9.476905313320283e-07, "loss": 0.627, "step": 12734 }, { "epoch": 0.81, "grad_norm": 0.9185280203819275, "learning_rate": 9.470895984151879e-07, "loss": 0.5504, "step": 12735 }, { "epoch": 0.81, "grad_norm": 0.9063805937767029, "learning_rate": 9.464888361523078e-07, "loss": 0.5554, "step": 12736 }, { "epoch": 0.81, "grad_norm": 0.9305859804153442, "learning_rate": 9.458882445686807e-07, "loss": 0.6012, "step": 12737 }, { "epoch": 0.81, "grad_norm": 0.9028577208518982, "learning_rate": 9.452878236895963e-07, "loss": 0.6199, "step": 12738 }, { "epoch": 0.81, "grad_norm": 0.870011568069458, "learning_rate": 9.446875735403366e-07, "loss": 0.5725, "step": 12739 }, { "epoch": 0.81, "grad_norm": 0.8897619247436523, "learning_rate": 9.440874941461753e-07, "loss": 0.5679, "step": 12740 }, { "epoch": 0.81, "grad_norm": 0.8889445662498474, "learning_rate": 9.434875855323816e-07, "loss": 0.5471, "step": 12741 }, { "epoch": 0.81, "grad_norm": 0.8454013466835022, "learning_rate": 9.428878477242131e-07, "loss": 0.5971, "step": 12742 }, { "epoch": 0.81, "grad_norm": 0.911864161491394, "learning_rate": 9.422882807469219e-07, "loss": 0.536, "step": 12743 }, { "epoch": 0.81, "grad_norm": 0.9062489867210388, "learning_rate": 9.416888846257588e-07, "loss": 0.5738, "step": 12744 }, { "epoch": 0.81, "grad_norm": 0.8988074660301208, "learning_rate": 9.41089659385957e-07, "loss": 0.5907, "step": 12745 }, { "epoch": 0.81, "grad_norm": 0.9005908370018005, "learning_rate": 9.404906050527496e-07, "loss": 0.5977, "step": 12746 }, { "epoch": 0.81, "grad_norm": 0.8807809352874756, "learning_rate": 9.398917216513625e-07, "loss": 0.5498, "step": 12747 }, { "epoch": 0.81, "grad_norm": 1.0026898384094238, "learning_rate": 9.39293009207008e-07, "loss": 0.6227, "step": 12748 }, { "epoch": 0.81, "grad_norm": 0.9482130408287048, "learning_rate": 9.386944677449017e-07, "loss": 0.5968, "step": 12749 }, { "epoch": 0.81, "grad_norm": 0.8382649421691895, "learning_rate": 9.380960972902414e-07, "loss": 0.5151, "step": 12750 }, { "epoch": 0.81, "grad_norm": 0.8774755597114563, "learning_rate": 9.374978978682248e-07, "loss": 0.558, "step": 12751 }, { "epoch": 0.81, "grad_norm": 1.0281962156295776, "learning_rate": 9.368998695040387e-07, "loss": 0.6242, "step": 12752 }, { "epoch": 0.81, "grad_norm": 0.8988599181175232, "learning_rate": 9.363020122228645e-07, "loss": 0.5831, "step": 12753 }, { "epoch": 0.81, "grad_norm": 0.9397704005241394, "learning_rate": 9.357043260498766e-07, "loss": 0.5837, "step": 12754 }, { "epoch": 0.81, "grad_norm": 0.851276695728302, "learning_rate": 9.351068110102418e-07, "loss": 0.5161, "step": 12755 }, { "epoch": 0.81, "grad_norm": 0.9624939560890198, "learning_rate": 9.345094671291155e-07, "loss": 0.5814, "step": 12756 }, { "epoch": 0.81, "grad_norm": 0.9801254868507385, "learning_rate": 9.339122944316559e-07, "loss": 0.6233, "step": 12757 }, { "epoch": 0.81, "grad_norm": 0.8699768781661987, "learning_rate": 9.333152929430029e-07, "loss": 0.5727, "step": 12758 }, { "epoch": 0.81, "grad_norm": 0.9153022766113281, "learning_rate": 9.327184626882963e-07, "loss": 0.6218, "step": 12759 }, { "epoch": 0.81, "grad_norm": 0.8925560712814331, "learning_rate": 9.321218036926677e-07, "loss": 0.5697, "step": 12760 }, { "epoch": 0.81, "grad_norm": 0.8836098313331604, "learning_rate": 9.315253159812359e-07, "loss": 0.5824, "step": 12761 }, { "epoch": 0.81, "grad_norm": 0.895380437374115, "learning_rate": 9.30928999579121e-07, "loss": 0.5481, "step": 12762 }, { "epoch": 0.81, "grad_norm": 0.9610360264778137, "learning_rate": 9.303328545114321e-07, "loss": 0.532, "step": 12763 }, { "epoch": 0.81, "grad_norm": 0.9137628078460693, "learning_rate": 9.29736880803268e-07, "loss": 0.53, "step": 12764 }, { "epoch": 0.81, "grad_norm": 0.9747650623321533, "learning_rate": 9.29141078479725e-07, "loss": 0.6109, "step": 12765 }, { "epoch": 0.81, "grad_norm": 0.9045560956001282, "learning_rate": 9.285454475658889e-07, "loss": 0.5683, "step": 12766 }, { "epoch": 0.81, "grad_norm": 0.8270063400268555, "learning_rate": 9.279499880868409e-07, "loss": 0.5004, "step": 12767 }, { "epoch": 0.81, "grad_norm": 0.8460723757743835, "learning_rate": 9.273547000676547e-07, "loss": 0.599, "step": 12768 }, { "epoch": 0.81, "grad_norm": 0.9193210601806641, "learning_rate": 9.267595835333915e-07, "loss": 0.5865, "step": 12769 }, { "epoch": 0.81, "grad_norm": 0.910054087638855, "learning_rate": 9.261646385091139e-07, "loss": 0.543, "step": 12770 }, { "epoch": 0.81, "grad_norm": 0.8721626400947571, "learning_rate": 9.25569865019873e-07, "loss": 0.5724, "step": 12771 }, { "epoch": 0.81, "grad_norm": 0.9765549302101135, "learning_rate": 9.249752630907094e-07, "loss": 0.5772, "step": 12772 }, { "epoch": 0.81, "grad_norm": 0.8811758756637573, "learning_rate": 9.243808327466619e-07, "loss": 0.5403, "step": 12773 }, { "epoch": 0.81, "grad_norm": 0.8623104095458984, "learning_rate": 9.237865740127594e-07, "loss": 0.5399, "step": 12774 }, { "epoch": 0.81, "grad_norm": 0.8554300665855408, "learning_rate": 9.231924869140241e-07, "loss": 0.5435, "step": 12775 }, { "epoch": 0.81, "grad_norm": 0.8593326210975647, "learning_rate": 9.225985714754721e-07, "loss": 0.5547, "step": 12776 }, { "epoch": 0.81, "grad_norm": 0.9124411344528198, "learning_rate": 9.220048277221089e-07, "loss": 0.5711, "step": 12777 }, { "epoch": 0.81, "grad_norm": 0.9051636457443237, "learning_rate": 9.214112556789345e-07, "loss": 0.5853, "step": 12778 }, { "epoch": 0.81, "grad_norm": 0.877150297164917, "learning_rate": 9.208178553709468e-07, "loss": 0.5834, "step": 12779 }, { "epoch": 0.81, "grad_norm": 0.9467854499816895, "learning_rate": 9.202246268231274e-07, "loss": 0.6122, "step": 12780 }, { "epoch": 0.81, "grad_norm": 0.9263243079185486, "learning_rate": 9.196315700604564e-07, "loss": 0.635, "step": 12781 }, { "epoch": 0.81, "grad_norm": 0.8572517037391663, "learning_rate": 9.190386851079053e-07, "loss": 0.5614, "step": 12782 }, { "epoch": 0.81, "grad_norm": 0.8789429664611816, "learning_rate": 9.184459719904388e-07, "loss": 0.5525, "step": 12783 }, { "epoch": 0.81, "grad_norm": 0.8996034264564514, "learning_rate": 9.178534307330145e-07, "loss": 0.5553, "step": 12784 }, { "epoch": 0.81, "grad_norm": 0.8926593661308289, "learning_rate": 9.17261061360581e-07, "loss": 0.603, "step": 12785 }, { "epoch": 0.81, "grad_norm": 0.9206883311271667, "learning_rate": 9.166688638980791e-07, "loss": 0.5725, "step": 12786 }, { "epoch": 0.81, "grad_norm": 0.8996316194534302, "learning_rate": 9.160768383704499e-07, "loss": 0.5316, "step": 12787 }, { "epoch": 0.81, "grad_norm": 0.8590518236160278, "learning_rate": 9.154849848026165e-07, "loss": 0.5715, "step": 12788 }, { "epoch": 0.81, "grad_norm": 0.8883064389228821, "learning_rate": 9.148933032195013e-07, "loss": 0.5745, "step": 12789 }, { "epoch": 0.81, "grad_norm": 0.9011886119842529, "learning_rate": 9.14301793646018e-07, "loss": 0.5894, "step": 12790 }, { "epoch": 0.81, "grad_norm": 0.8296880722045898, "learning_rate": 9.137104561070736e-07, "loss": 0.5376, "step": 12791 }, { "epoch": 0.81, "grad_norm": 0.8713788986206055, "learning_rate": 9.13119290627566e-07, "loss": 0.5824, "step": 12792 }, { "epoch": 0.81, "grad_norm": 0.8919610977172852, "learning_rate": 9.125282972323895e-07, "loss": 0.5717, "step": 12793 }, { "epoch": 0.81, "grad_norm": 0.9087851643562317, "learning_rate": 9.119374759464261e-07, "loss": 0.5855, "step": 12794 }, { "epoch": 0.81, "grad_norm": 0.8336042165756226, "learning_rate": 9.113468267945541e-07, "loss": 0.5096, "step": 12795 }, { "epoch": 0.81, "grad_norm": 0.8984754681587219, "learning_rate": 9.107563498016436e-07, "loss": 0.6249, "step": 12796 }, { "epoch": 0.81, "grad_norm": 0.9276543855667114, "learning_rate": 9.101660449925576e-07, "loss": 0.6166, "step": 12797 }, { "epoch": 0.81, "grad_norm": 0.9266611933708191, "learning_rate": 9.095759123921538e-07, "loss": 0.5569, "step": 12798 }, { "epoch": 0.81, "grad_norm": 0.8445834517478943, "learning_rate": 9.089859520252759e-07, "loss": 0.536, "step": 12799 }, { "epoch": 0.81, "grad_norm": 1.0016990900039673, "learning_rate": 9.083961639167693e-07, "loss": 0.624, "step": 12800 }, { "epoch": 0.81, "grad_norm": 0.9784378409385681, "learning_rate": 9.078065480914678e-07, "loss": 0.6467, "step": 12801 }, { "epoch": 0.81, "grad_norm": 0.9219988584518433, "learning_rate": 9.072171045741957e-07, "loss": 0.5891, "step": 12802 }, { "epoch": 0.81, "grad_norm": 0.9053341150283813, "learning_rate": 9.066278333897732e-07, "loss": 0.6182, "step": 12803 }, { "epoch": 0.81, "grad_norm": 0.9229487776756287, "learning_rate": 9.060387345630134e-07, "loss": 0.5547, "step": 12804 }, { "epoch": 0.81, "grad_norm": 0.8746492266654968, "learning_rate": 9.054498081187202e-07, "loss": 0.5368, "step": 12805 }, { "epoch": 0.81, "grad_norm": 0.9092094898223877, "learning_rate": 9.048610540816932e-07, "loss": 0.5661, "step": 12806 }, { "epoch": 0.81, "grad_norm": 0.8250091671943665, "learning_rate": 9.042724724767199e-07, "loss": 0.5977, "step": 12807 }, { "epoch": 0.81, "grad_norm": 0.856377363204956, "learning_rate": 9.036840633285837e-07, "loss": 0.5594, "step": 12808 }, { "epoch": 0.81, "grad_norm": 0.9337197542190552, "learning_rate": 9.030958266620637e-07, "loss": 0.6058, "step": 12809 }, { "epoch": 0.81, "grad_norm": 0.9406629204750061, "learning_rate": 9.025077625019252e-07, "loss": 0.5506, "step": 12810 }, { "epoch": 0.81, "grad_norm": 0.8978514671325684, "learning_rate": 9.01919870872931e-07, "loss": 0.6085, "step": 12811 }, { "epoch": 0.81, "grad_norm": 0.8674015998840332, "learning_rate": 9.013321517998347e-07, "loss": 0.5683, "step": 12812 }, { "epoch": 0.81, "grad_norm": 0.94971764087677, "learning_rate": 9.007446053073832e-07, "loss": 0.6416, "step": 12813 }, { "epoch": 0.81, "grad_norm": 0.836727499961853, "learning_rate": 9.001572314203172e-07, "loss": 0.5604, "step": 12814 }, { "epoch": 0.81, "grad_norm": 0.9299215078353882, "learning_rate": 8.99570030163367e-07, "loss": 0.5895, "step": 12815 }, { "epoch": 0.81, "grad_norm": 0.8771916031837463, "learning_rate": 8.989830015612566e-07, "loss": 0.5793, "step": 12816 }, { "epoch": 0.81, "grad_norm": 0.8739469647407532, "learning_rate": 8.983961456387086e-07, "loss": 0.6095, "step": 12817 }, { "epoch": 0.81, "grad_norm": 0.899440586566925, "learning_rate": 8.978094624204292e-07, "loss": 0.5347, "step": 12818 }, { "epoch": 0.81, "grad_norm": 0.916681706905365, "learning_rate": 8.972229519311227e-07, "loss": 0.5969, "step": 12819 }, { "epoch": 0.81, "grad_norm": 0.8973095417022705, "learning_rate": 8.966366141954852e-07, "loss": 0.6042, "step": 12820 }, { "epoch": 0.81, "grad_norm": 0.9100470542907715, "learning_rate": 8.960504492382055e-07, "loss": 0.5789, "step": 12821 }, { "epoch": 0.81, "grad_norm": 0.8430030345916748, "learning_rate": 8.95464457083966e-07, "loss": 0.5646, "step": 12822 }, { "epoch": 0.81, "grad_norm": 0.869049072265625, "learning_rate": 8.948786377574382e-07, "loss": 0.5889, "step": 12823 }, { "epoch": 0.81, "grad_norm": 0.8816308379173279, "learning_rate": 8.942929912832904e-07, "loss": 0.5535, "step": 12824 }, { "epoch": 0.81, "grad_norm": 0.8329145908355713, "learning_rate": 8.93707517686182e-07, "loss": 0.5467, "step": 12825 }, { "epoch": 0.81, "grad_norm": 0.8186325430870056, "learning_rate": 8.93122216990765e-07, "loss": 0.5437, "step": 12826 }, { "epoch": 0.81, "grad_norm": 0.9899107813835144, "learning_rate": 8.92537089221685e-07, "loss": 0.6088, "step": 12827 }, { "epoch": 0.81, "grad_norm": 0.8372784852981567, "learning_rate": 8.919521344035808e-07, "loss": 0.5536, "step": 12828 }, { "epoch": 0.81, "grad_norm": 0.8933220505714417, "learning_rate": 8.913673525610783e-07, "loss": 0.5864, "step": 12829 }, { "epoch": 0.81, "grad_norm": 0.8942568898200989, "learning_rate": 8.907827437188065e-07, "loss": 0.5824, "step": 12830 }, { "epoch": 0.81, "grad_norm": 0.8576558232307434, "learning_rate": 8.901983079013771e-07, "loss": 0.5339, "step": 12831 }, { "epoch": 0.81, "grad_norm": 0.892993152141571, "learning_rate": 8.896140451334001e-07, "loss": 0.5865, "step": 12832 }, { "epoch": 0.81, "grad_norm": 0.854968786239624, "learning_rate": 8.890299554394766e-07, "loss": 0.5602, "step": 12833 }, { "epoch": 0.81, "grad_norm": 0.8966131806373596, "learning_rate": 8.884460388442006e-07, "loss": 0.5696, "step": 12834 }, { "epoch": 0.81, "grad_norm": 0.9169580936431885, "learning_rate": 8.878622953721589e-07, "loss": 0.5467, "step": 12835 }, { "epoch": 0.81, "grad_norm": 0.867084801197052, "learning_rate": 8.87278725047932e-07, "loss": 0.5794, "step": 12836 }, { "epoch": 0.81, "grad_norm": 0.9113507866859436, "learning_rate": 8.866953278960888e-07, "loss": 0.5922, "step": 12837 }, { "epoch": 0.81, "grad_norm": 0.8957472443580627, "learning_rate": 8.86112103941198e-07, "loss": 0.5407, "step": 12838 }, { "epoch": 0.81, "grad_norm": 0.8440329432487488, "learning_rate": 8.855290532078148e-07, "loss": 0.507, "step": 12839 }, { "epoch": 0.81, "grad_norm": 0.8923792839050293, "learning_rate": 8.849461757204897e-07, "loss": 0.6195, "step": 12840 }, { "epoch": 0.81, "grad_norm": 0.8741909861564636, "learning_rate": 8.843634715037669e-07, "loss": 0.5827, "step": 12841 }, { "epoch": 0.81, "grad_norm": 0.8744585514068604, "learning_rate": 8.83780940582179e-07, "loss": 0.6142, "step": 12842 }, { "epoch": 0.81, "grad_norm": 0.8400830030441284, "learning_rate": 8.83198582980257e-07, "loss": 0.5726, "step": 12843 }, { "epoch": 0.81, "grad_norm": 0.9461512565612793, "learning_rate": 8.826163987225233e-07, "loss": 0.5863, "step": 12844 }, { "epoch": 0.81, "grad_norm": 0.9131925106048584, "learning_rate": 8.82034387833488e-07, "loss": 0.5959, "step": 12845 }, { "epoch": 0.81, "grad_norm": 0.9167430996894836, "learning_rate": 8.814525503376597e-07, "loss": 0.5696, "step": 12846 }, { "epoch": 0.81, "grad_norm": 0.8956706523895264, "learning_rate": 8.808708862595367e-07, "loss": 0.561, "step": 12847 }, { "epoch": 0.81, "grad_norm": 0.8876976370811462, "learning_rate": 8.802893956236114e-07, "loss": 0.5375, "step": 12848 }, { "epoch": 0.81, "grad_norm": 0.9218643307685852, "learning_rate": 8.797080784543699e-07, "loss": 0.561, "step": 12849 }, { "epoch": 0.81, "grad_norm": 0.9845806956291199, "learning_rate": 8.791269347762849e-07, "loss": 0.5885, "step": 12850 }, { "epoch": 0.81, "grad_norm": 0.8306980729103088, "learning_rate": 8.785459646138306e-07, "loss": 0.5472, "step": 12851 }, { "epoch": 0.81, "grad_norm": 0.867559015750885, "learning_rate": 8.779651679914692e-07, "loss": 0.5377, "step": 12852 }, { "epoch": 0.81, "grad_norm": 0.8420113921165466, "learning_rate": 8.773845449336537e-07, "loss": 0.5777, "step": 12853 }, { "epoch": 0.81, "grad_norm": 0.9076850414276123, "learning_rate": 8.768040954648338e-07, "loss": 0.6205, "step": 12854 }, { "epoch": 0.81, "grad_norm": 0.8947234153747559, "learning_rate": 8.762238196094502e-07, "loss": 0.5319, "step": 12855 }, { "epoch": 0.81, "grad_norm": 0.9484972357749939, "learning_rate": 8.756437173919352e-07, "loss": 0.5909, "step": 12856 }, { "epoch": 0.81, "grad_norm": 0.8586333990097046, "learning_rate": 8.750637888367164e-07, "loss": 0.5382, "step": 12857 }, { "epoch": 0.81, "grad_norm": 0.8808966875076294, "learning_rate": 8.744840339682126e-07, "loss": 0.5391, "step": 12858 }, { "epoch": 0.81, "grad_norm": 0.8085102438926697, "learning_rate": 8.73904452810832e-07, "loss": 0.5483, "step": 12859 }, { "epoch": 0.81, "grad_norm": 0.9202531576156616, "learning_rate": 8.733250453889841e-07, "loss": 0.5316, "step": 12860 }, { "epoch": 0.81, "grad_norm": 0.907964289188385, "learning_rate": 8.727458117270615e-07, "loss": 0.5327, "step": 12861 }, { "epoch": 0.81, "grad_norm": 0.9174656867980957, "learning_rate": 8.721667518494553e-07, "loss": 0.5938, "step": 12862 }, { "epoch": 0.81, "grad_norm": 0.8944279551506042, "learning_rate": 8.715878657805471e-07, "loss": 0.5871, "step": 12863 }, { "epoch": 0.82, "grad_norm": 0.8624773025512695, "learning_rate": 8.710091535447123e-07, "loss": 0.5784, "step": 12864 }, { "epoch": 0.82, "grad_norm": 0.8640050888061523, "learning_rate": 8.704306151663184e-07, "loss": 0.536, "step": 12865 }, { "epoch": 0.82, "grad_norm": 0.8937069177627563, "learning_rate": 8.698522506697271e-07, "loss": 0.5738, "step": 12866 }, { "epoch": 0.82, "grad_norm": 0.8589310646057129, "learning_rate": 8.692740600792871e-07, "loss": 0.5897, "step": 12867 }, { "epoch": 0.82, "grad_norm": 0.8561339378356934, "learning_rate": 8.686960434193486e-07, "loss": 0.5341, "step": 12868 }, { "epoch": 0.82, "grad_norm": 0.9550122618675232, "learning_rate": 8.681182007142475e-07, "loss": 0.5872, "step": 12869 }, { "epoch": 0.82, "grad_norm": 0.9209311008453369, "learning_rate": 8.675405319883146e-07, "loss": 0.5488, "step": 12870 }, { "epoch": 0.82, "grad_norm": 0.9099619388580322, "learning_rate": 8.66963037265876e-07, "loss": 0.5861, "step": 12871 }, { "epoch": 0.82, "grad_norm": 0.7935923337936401, "learning_rate": 8.663857165712431e-07, "loss": 0.504, "step": 12872 }, { "epoch": 0.82, "grad_norm": 0.8865057229995728, "learning_rate": 8.658085699287294e-07, "loss": 0.5812, "step": 12873 }, { "epoch": 0.82, "grad_norm": 0.8969137668609619, "learning_rate": 8.652315973626362e-07, "loss": 0.5974, "step": 12874 }, { "epoch": 0.82, "grad_norm": 0.8373164534568787, "learning_rate": 8.646547988972553e-07, "loss": 0.5351, "step": 12875 }, { "epoch": 0.82, "grad_norm": 0.8645594716072083, "learning_rate": 8.64078174556875e-07, "loss": 0.5746, "step": 12876 }, { "epoch": 0.82, "grad_norm": 0.921709418296814, "learning_rate": 8.635017243657751e-07, "loss": 0.5987, "step": 12877 }, { "epoch": 0.82, "grad_norm": 0.9052848815917969, "learning_rate": 8.629254483482274e-07, "loss": 0.5244, "step": 12878 }, { "epoch": 0.82, "grad_norm": 0.8511383533477783, "learning_rate": 8.623493465284987e-07, "loss": 0.505, "step": 12879 }, { "epoch": 0.82, "grad_norm": 0.893326997756958, "learning_rate": 8.61773418930843e-07, "loss": 0.5679, "step": 12880 }, { "epoch": 0.82, "grad_norm": 0.9681572914123535, "learning_rate": 8.611976655795135e-07, "loss": 0.6534, "step": 12881 }, { "epoch": 0.82, "grad_norm": 0.9120391607284546, "learning_rate": 8.606220864987541e-07, "loss": 0.5988, "step": 12882 }, { "epoch": 0.82, "grad_norm": 0.8803929686546326, "learning_rate": 8.600466817127972e-07, "loss": 0.5486, "step": 12883 }, { "epoch": 0.82, "grad_norm": 0.9291055798530579, "learning_rate": 8.59471451245873e-07, "loss": 0.5445, "step": 12884 }, { "epoch": 0.82, "grad_norm": 0.8664212226867676, "learning_rate": 8.588963951222024e-07, "loss": 0.5972, "step": 12885 }, { "epoch": 0.82, "grad_norm": 0.8658425211906433, "learning_rate": 8.583215133659983e-07, "loss": 0.6289, "step": 12886 }, { "epoch": 0.82, "grad_norm": 0.8955614566802979, "learning_rate": 8.577468060014688e-07, "loss": 0.5883, "step": 12887 }, { "epoch": 0.82, "grad_norm": 0.9566403031349182, "learning_rate": 8.571722730528098e-07, "loss": 0.6358, "step": 12888 }, { "epoch": 0.82, "grad_norm": 0.8918949365615845, "learning_rate": 8.565979145442138e-07, "loss": 0.5786, "step": 12889 }, { "epoch": 0.82, "grad_norm": 0.8985361456871033, "learning_rate": 8.560237304998681e-07, "loss": 0.5742, "step": 12890 }, { "epoch": 0.82, "grad_norm": 0.9355623722076416, "learning_rate": 8.554497209439461e-07, "loss": 0.6009, "step": 12891 }, { "epoch": 0.82, "grad_norm": 0.8992531299591064, "learning_rate": 8.548758859006184e-07, "loss": 0.5768, "step": 12892 }, { "epoch": 0.82, "grad_norm": 0.9270733594894409, "learning_rate": 8.543022253940475e-07, "loss": 0.5583, "step": 12893 }, { "epoch": 0.82, "grad_norm": 0.8681014180183411, "learning_rate": 8.537287394483878e-07, "loss": 0.5807, "step": 12894 }, { "epoch": 0.82, "grad_norm": 0.9098723530769348, "learning_rate": 8.531554280877885e-07, "loss": 0.5598, "step": 12895 }, { "epoch": 0.82, "grad_norm": 0.9069850444793701, "learning_rate": 8.525822913363868e-07, "loss": 0.6112, "step": 12896 }, { "epoch": 0.82, "grad_norm": 0.8686051368713379, "learning_rate": 8.520093292183163e-07, "loss": 0.5605, "step": 12897 }, { "epoch": 0.82, "grad_norm": 0.9454940557479858, "learning_rate": 8.514365417577048e-07, "loss": 0.5387, "step": 12898 }, { "epoch": 0.82, "grad_norm": 0.8750715851783752, "learning_rate": 8.50863928978668e-07, "loss": 0.5455, "step": 12899 }, { "epoch": 0.82, "grad_norm": 0.9609119892120361, "learning_rate": 8.502914909053173e-07, "loss": 0.5859, "step": 12900 }, { "epoch": 0.82, "grad_norm": 0.8676950931549072, "learning_rate": 8.497192275617577e-07, "loss": 0.5496, "step": 12901 }, { "epoch": 0.82, "grad_norm": 0.8623301386833191, "learning_rate": 8.491471389720807e-07, "loss": 0.6052, "step": 12902 }, { "epoch": 0.82, "grad_norm": 0.9404549598693848, "learning_rate": 8.485752251603807e-07, "loss": 0.5788, "step": 12903 }, { "epoch": 0.82, "grad_norm": 0.9115918278694153, "learning_rate": 8.480034861507347e-07, "loss": 0.5342, "step": 12904 }, { "epoch": 0.82, "grad_norm": 0.8364629149436951, "learning_rate": 8.474319219672183e-07, "loss": 0.5695, "step": 12905 }, { "epoch": 0.82, "grad_norm": 0.8777880072593689, "learning_rate": 8.46860532633898e-07, "loss": 0.5487, "step": 12906 }, { "epoch": 0.82, "grad_norm": 0.9011834263801575, "learning_rate": 8.462893181748327e-07, "loss": 0.5618, "step": 12907 }, { "epoch": 0.82, "grad_norm": 0.8608363270759583, "learning_rate": 8.457182786140744e-07, "loss": 0.5919, "step": 12908 }, { "epoch": 0.82, "grad_norm": 0.9006455540657043, "learning_rate": 8.451474139756693e-07, "loss": 0.6024, "step": 12909 }, { "epoch": 0.82, "grad_norm": 0.8328776955604553, "learning_rate": 8.445767242836506e-07, "loss": 0.5455, "step": 12910 }, { "epoch": 0.82, "grad_norm": 0.859550416469574, "learning_rate": 8.440062095620527e-07, "loss": 0.5565, "step": 12911 }, { "epoch": 0.82, "grad_norm": 0.8993778824806213, "learning_rate": 8.434358698348944e-07, "loss": 0.598, "step": 12912 }, { "epoch": 0.82, "grad_norm": 0.8660597205162048, "learning_rate": 8.428657051261918e-07, "loss": 0.5732, "step": 12913 }, { "epoch": 0.82, "grad_norm": 0.8744674324989319, "learning_rate": 8.422957154599526e-07, "loss": 0.5754, "step": 12914 }, { "epoch": 0.82, "grad_norm": 0.9497204422950745, "learning_rate": 8.417259008601775e-07, "loss": 0.5412, "step": 12915 }, { "epoch": 0.82, "grad_norm": 0.8864256739616394, "learning_rate": 8.411562613508595e-07, "loss": 0.5603, "step": 12916 }, { "epoch": 0.82, "grad_norm": 0.959272563457489, "learning_rate": 8.405867969559845e-07, "loss": 0.5884, "step": 12917 }, { "epoch": 0.82, "grad_norm": 0.8853299021720886, "learning_rate": 8.400175076995287e-07, "loss": 0.5456, "step": 12918 }, { "epoch": 0.82, "grad_norm": 0.8390821218490601, "learning_rate": 8.394483936054643e-07, "loss": 0.5739, "step": 12919 }, { "epoch": 0.82, "grad_norm": 0.8850178122520447, "learning_rate": 8.388794546977546e-07, "loss": 0.5718, "step": 12920 }, { "epoch": 0.82, "grad_norm": 0.9476692080497742, "learning_rate": 8.383106910003552e-07, "loss": 0.5619, "step": 12921 }, { "epoch": 0.82, "grad_norm": 0.9127770066261292, "learning_rate": 8.377421025372157e-07, "loss": 0.5741, "step": 12922 }, { "epoch": 0.82, "grad_norm": 0.8317306041717529, "learning_rate": 8.371736893322763e-07, "loss": 0.4702, "step": 12923 }, { "epoch": 0.82, "grad_norm": 0.87800532579422, "learning_rate": 8.366054514094718e-07, "loss": 0.5637, "step": 12924 }, { "epoch": 0.82, "grad_norm": 0.8989687561988831, "learning_rate": 8.360373887927298e-07, "loss": 0.5926, "step": 12925 }, { "epoch": 0.82, "grad_norm": 0.9448102712631226, "learning_rate": 8.35469501505966e-07, "loss": 0.5936, "step": 12926 }, { "epoch": 0.82, "grad_norm": 0.8820131421089172, "learning_rate": 8.349017895730948e-07, "loss": 0.5735, "step": 12927 }, { "epoch": 0.82, "grad_norm": 0.9099850654602051, "learning_rate": 8.343342530180198e-07, "loss": 0.5738, "step": 12928 }, { "epoch": 0.82, "grad_norm": 0.9121573567390442, "learning_rate": 8.33766891864638e-07, "loss": 0.5523, "step": 12929 }, { "epoch": 0.82, "grad_norm": 0.9185227155685425, "learning_rate": 8.331997061368391e-07, "loss": 0.5919, "step": 12930 }, { "epoch": 0.82, "grad_norm": 0.8707922101020813, "learning_rate": 8.326326958585062e-07, "loss": 0.6125, "step": 12931 }, { "epoch": 0.82, "grad_norm": 0.8843598365783691, "learning_rate": 8.320658610535115e-07, "loss": 0.5889, "step": 12932 }, { "epoch": 0.82, "grad_norm": 0.903973400592804, "learning_rate": 8.314992017457263e-07, "loss": 0.5731, "step": 12933 }, { "epoch": 0.82, "grad_norm": 0.8613129258155823, "learning_rate": 8.30932717959007e-07, "loss": 0.5461, "step": 12934 }, { "epoch": 0.82, "grad_norm": 0.912260890007019, "learning_rate": 8.303664097172087e-07, "loss": 0.5855, "step": 12935 }, { "epoch": 0.82, "grad_norm": 0.8741612434387207, "learning_rate": 8.298002770441749e-07, "loss": 0.5981, "step": 12936 }, { "epoch": 0.82, "grad_norm": 0.8477001190185547, "learning_rate": 8.292343199637448e-07, "loss": 0.5833, "step": 12937 }, { "epoch": 0.82, "grad_norm": 0.8845143914222717, "learning_rate": 8.286685384997484e-07, "loss": 0.5903, "step": 12938 }, { "epoch": 0.82, "grad_norm": 0.933994472026825, "learning_rate": 8.281029326760104e-07, "loss": 0.5752, "step": 12939 }, { "epoch": 0.82, "grad_norm": 0.8114098310470581, "learning_rate": 8.275375025163418e-07, "loss": 0.5763, "step": 12940 }, { "epoch": 0.82, "grad_norm": 0.9245671033859253, "learning_rate": 8.269722480445569e-07, "loss": 0.6329, "step": 12941 }, { "epoch": 0.82, "grad_norm": 0.9412350058555603, "learning_rate": 8.264071692844527e-07, "loss": 0.5522, "step": 12942 }, { "epoch": 0.82, "grad_norm": 0.8871721625328064, "learning_rate": 8.258422662598231e-07, "loss": 0.5602, "step": 12943 }, { "epoch": 0.82, "grad_norm": 0.9519109725952148, "learning_rate": 8.252775389944556e-07, "loss": 0.5704, "step": 12944 }, { "epoch": 0.82, "grad_norm": 0.9257845282554626, "learning_rate": 8.247129875121274e-07, "loss": 0.6062, "step": 12945 }, { "epoch": 0.82, "grad_norm": 0.9066646695137024, "learning_rate": 8.24148611836611e-07, "loss": 0.556, "step": 12946 }, { "epoch": 0.82, "grad_norm": 0.8821330070495605, "learning_rate": 8.235844119916708e-07, "loss": 0.5509, "step": 12947 }, { "epoch": 0.82, "grad_norm": 1.1398460865020752, "learning_rate": 8.230203880010612e-07, "loss": 0.588, "step": 12948 }, { "epoch": 0.82, "grad_norm": 0.8532936573028564, "learning_rate": 8.224565398885325e-07, "loss": 0.5312, "step": 12949 }, { "epoch": 0.82, "grad_norm": 0.8935076594352722, "learning_rate": 8.218928676778264e-07, "loss": 0.5871, "step": 12950 }, { "epoch": 0.82, "grad_norm": 0.8617026209831238, "learning_rate": 8.213293713926767e-07, "loss": 0.5039, "step": 12951 }, { "epoch": 0.82, "grad_norm": 0.9438952207565308, "learning_rate": 8.207660510568122e-07, "loss": 0.6125, "step": 12952 }, { "epoch": 0.82, "grad_norm": 0.8180469274520874, "learning_rate": 8.202029066939482e-07, "loss": 0.5147, "step": 12953 }, { "epoch": 0.82, "grad_norm": 0.8670182824134827, "learning_rate": 8.196399383278004e-07, "loss": 0.6175, "step": 12954 }, { "epoch": 0.82, "grad_norm": 0.9703617691993713, "learning_rate": 8.190771459820739e-07, "loss": 0.6071, "step": 12955 }, { "epoch": 0.82, "grad_norm": 0.9828335046768188, "learning_rate": 8.18514529680463e-07, "loss": 0.6214, "step": 12956 }, { "epoch": 0.82, "grad_norm": 0.8318359851837158, "learning_rate": 8.179520894466592e-07, "loss": 0.5637, "step": 12957 }, { "epoch": 0.82, "grad_norm": 0.8575620651245117, "learning_rate": 8.173898253043444e-07, "loss": 0.5122, "step": 12958 }, { "epoch": 0.82, "grad_norm": 0.8485636115074158, "learning_rate": 8.168277372771937e-07, "loss": 0.5165, "step": 12959 }, { "epoch": 0.82, "grad_norm": 0.8538296222686768, "learning_rate": 8.162658253888761e-07, "loss": 0.6073, "step": 12960 }, { "epoch": 0.82, "grad_norm": 0.8725820779800415, "learning_rate": 8.157040896630481e-07, "loss": 0.5341, "step": 12961 }, { "epoch": 0.82, "grad_norm": 0.855991780757904, "learning_rate": 8.151425301233656e-07, "loss": 0.5491, "step": 12962 }, { "epoch": 0.82, "grad_norm": 0.9150635600090027, "learning_rate": 8.14581146793475e-07, "loss": 0.5929, "step": 12963 }, { "epoch": 0.82, "grad_norm": 0.9065380692481995, "learning_rate": 8.140199396970106e-07, "loss": 0.5817, "step": 12964 }, { "epoch": 0.82, "grad_norm": 0.8524861335754395, "learning_rate": 8.13458908857605e-07, "loss": 0.5101, "step": 12965 }, { "epoch": 0.82, "grad_norm": 0.8974103331565857, "learning_rate": 8.128980542988801e-07, "loss": 0.5379, "step": 12966 }, { "epoch": 0.82, "grad_norm": 0.8953040242195129, "learning_rate": 8.12337376044453e-07, "loss": 0.5447, "step": 12967 }, { "epoch": 0.82, "grad_norm": 0.9523823261260986, "learning_rate": 8.117768741179322e-07, "loss": 0.6085, "step": 12968 }, { "epoch": 0.82, "grad_norm": 0.8712965846061707, "learning_rate": 8.112165485429163e-07, "loss": 0.5753, "step": 12969 }, { "epoch": 0.82, "grad_norm": 0.9363554120063782, "learning_rate": 8.106563993429983e-07, "loss": 0.5624, "step": 12970 }, { "epoch": 0.82, "grad_norm": 0.8442745208740234, "learning_rate": 8.100964265417682e-07, "loss": 0.5491, "step": 12971 }, { "epoch": 0.82, "grad_norm": 0.9169662594795227, "learning_rate": 8.09536630162801e-07, "loss": 0.5917, "step": 12972 }, { "epoch": 0.82, "grad_norm": 0.948613166809082, "learning_rate": 8.089770102296685e-07, "loss": 0.5397, "step": 12973 }, { "epoch": 0.82, "grad_norm": 0.8877300024032593, "learning_rate": 8.084175667659345e-07, "loss": 0.5818, "step": 12974 }, { "epoch": 0.82, "grad_norm": 0.8682299852371216, "learning_rate": 8.078582997951556e-07, "loss": 0.5694, "step": 12975 }, { "epoch": 0.82, "grad_norm": 0.8772991299629211, "learning_rate": 8.072992093408816e-07, "loss": 0.6045, "step": 12976 }, { "epoch": 0.82, "grad_norm": 0.8861331343650818, "learning_rate": 8.067402954266512e-07, "loss": 0.6145, "step": 12977 }, { "epoch": 0.82, "grad_norm": 0.8956562876701355, "learning_rate": 8.061815580759996e-07, "loss": 0.5567, "step": 12978 }, { "epoch": 0.82, "grad_norm": 0.8872475624084473, "learning_rate": 8.056229973124529e-07, "loss": 0.5827, "step": 12979 }, { "epoch": 0.82, "grad_norm": 0.8420911431312561, "learning_rate": 8.050646131595313e-07, "loss": 0.5742, "step": 12980 }, { "epoch": 0.82, "grad_norm": 0.8587638735771179, "learning_rate": 8.045064056407453e-07, "loss": 0.5755, "step": 12981 }, { "epoch": 0.82, "grad_norm": 0.8562715649604797, "learning_rate": 8.039483747796012e-07, "loss": 0.5786, "step": 12982 }, { "epoch": 0.82, "grad_norm": 0.8843387365341187, "learning_rate": 8.033905205995913e-07, "loss": 0.5752, "step": 12983 }, { "epoch": 0.82, "grad_norm": 0.8959712982177734, "learning_rate": 8.0283284312421e-07, "loss": 0.5532, "step": 12984 }, { "epoch": 0.82, "grad_norm": 0.8698373436927795, "learning_rate": 8.022753423769359e-07, "loss": 0.587, "step": 12985 }, { "epoch": 0.82, "grad_norm": 0.8483936190605164, "learning_rate": 8.017180183812439e-07, "loss": 0.5502, "step": 12986 }, { "epoch": 0.82, "grad_norm": 0.866079568862915, "learning_rate": 8.011608711606017e-07, "loss": 0.5588, "step": 12987 }, { "epoch": 0.82, "grad_norm": 0.8948245048522949, "learning_rate": 8.006039007384681e-07, "loss": 0.5838, "step": 12988 }, { "epoch": 0.82, "grad_norm": 0.9978700876235962, "learning_rate": 8.000471071382959e-07, "loss": 0.6032, "step": 12989 }, { "epoch": 0.82, "grad_norm": 0.8848072290420532, "learning_rate": 7.99490490383531e-07, "loss": 0.5889, "step": 12990 }, { "epoch": 0.82, "grad_norm": 0.8934358954429626, "learning_rate": 7.989340504976062e-07, "loss": 0.5968, "step": 12991 }, { "epoch": 0.82, "grad_norm": 0.8869682550430298, "learning_rate": 7.983777875039567e-07, "loss": 0.5398, "step": 12992 }, { "epoch": 0.82, "grad_norm": 0.8653879165649414, "learning_rate": 7.978217014260009e-07, "loss": 0.5723, "step": 12993 }, { "epoch": 0.82, "grad_norm": 0.9040364027023315, "learning_rate": 7.972657922871546e-07, "loss": 0.5593, "step": 12994 }, { "epoch": 0.82, "grad_norm": 0.8811683058738708, "learning_rate": 7.967100601108258e-07, "loss": 0.577, "step": 12995 }, { "epoch": 0.82, "grad_norm": 0.8992339968681335, "learning_rate": 7.961545049204145e-07, "loss": 0.634, "step": 12996 }, { "epoch": 0.82, "grad_norm": 0.8207805156707764, "learning_rate": 7.955991267393127e-07, "loss": 0.5304, "step": 12997 }, { "epoch": 0.82, "grad_norm": 0.9232082366943359, "learning_rate": 7.950439255909065e-07, "loss": 0.6293, "step": 12998 }, { "epoch": 0.82, "grad_norm": 0.884673535823822, "learning_rate": 7.944889014985718e-07, "loss": 0.5888, "step": 12999 }, { "epoch": 0.82, "grad_norm": 0.8796509504318237, "learning_rate": 7.939340544856783e-07, "loss": 0.5765, "step": 13000 }, { "epoch": 0.82, "grad_norm": 0.8928359746932983, "learning_rate": 7.933793845755922e-07, "loss": 0.5899, "step": 13001 }, { "epoch": 0.82, "grad_norm": 0.8858817219734192, "learning_rate": 7.928248917916653e-07, "loss": 0.573, "step": 13002 }, { "epoch": 0.82, "grad_norm": 0.9019994735717773, "learning_rate": 7.922705761572464e-07, "loss": 0.5574, "step": 13003 }, { "epoch": 0.82, "grad_norm": 0.8664145469665527, "learning_rate": 7.91716437695676e-07, "loss": 0.5113, "step": 13004 }, { "epoch": 0.82, "grad_norm": 0.91963791847229, "learning_rate": 7.911624764302872e-07, "loss": 0.6429, "step": 13005 }, { "epoch": 0.82, "grad_norm": 0.9109863042831421, "learning_rate": 7.906086923844059e-07, "loss": 0.5727, "step": 13006 }, { "epoch": 0.82, "grad_norm": 0.863783597946167, "learning_rate": 7.900550855813477e-07, "loss": 0.5765, "step": 13007 }, { "epoch": 0.82, "grad_norm": 0.9418416619300842, "learning_rate": 7.895016560444241e-07, "loss": 0.5862, "step": 13008 }, { "epoch": 0.82, "grad_norm": 0.8956203460693359, "learning_rate": 7.889484037969403e-07, "loss": 0.6175, "step": 13009 }, { "epoch": 0.82, "grad_norm": 0.8799732327461243, "learning_rate": 7.883953288621887e-07, "loss": 0.6195, "step": 13010 }, { "epoch": 0.82, "grad_norm": 1.00110924243927, "learning_rate": 7.878424312634592e-07, "loss": 0.5845, "step": 13011 }, { "epoch": 0.82, "grad_norm": 0.9354737401008606, "learning_rate": 7.87289711024033e-07, "loss": 0.592, "step": 13012 }, { "epoch": 0.82, "grad_norm": 0.8658231496810913, "learning_rate": 7.867371681671793e-07, "loss": 0.6014, "step": 13013 }, { "epoch": 0.82, "grad_norm": 0.9071126580238342, "learning_rate": 7.861848027161694e-07, "loss": 0.6201, "step": 13014 }, { "epoch": 0.82, "grad_norm": 0.8745089769363403, "learning_rate": 7.856326146942572e-07, "loss": 0.5287, "step": 13015 }, { "epoch": 0.82, "grad_norm": 0.890994131565094, "learning_rate": 7.85080604124695e-07, "loss": 0.634, "step": 13016 }, { "epoch": 0.82, "grad_norm": 0.8491596579551697, "learning_rate": 7.845287710307258e-07, "loss": 0.5487, "step": 13017 }, { "epoch": 0.82, "grad_norm": 0.942820131778717, "learning_rate": 7.839771154355858e-07, "loss": 0.5933, "step": 13018 }, { "epoch": 0.82, "grad_norm": 0.940209686756134, "learning_rate": 7.834256373625027e-07, "loss": 0.5907, "step": 13019 }, { "epoch": 0.82, "grad_norm": 0.8660345077514648, "learning_rate": 7.828743368346991e-07, "loss": 0.5164, "step": 13020 }, { "epoch": 0.82, "grad_norm": 0.8865716457366943, "learning_rate": 7.823232138753845e-07, "loss": 0.5352, "step": 13021 }, { "epoch": 0.83, "grad_norm": 0.9319779872894287, "learning_rate": 7.817722685077689e-07, "loss": 0.5374, "step": 13022 }, { "epoch": 0.83, "grad_norm": 0.8646177649497986, "learning_rate": 7.812215007550483e-07, "loss": 0.5976, "step": 13023 }, { "epoch": 0.83, "grad_norm": 0.9318941831588745, "learning_rate": 7.806709106404142e-07, "loss": 0.6182, "step": 13024 }, { "epoch": 0.83, "grad_norm": 0.9168413281440735, "learning_rate": 7.801204981870508e-07, "loss": 0.5817, "step": 13025 }, { "epoch": 0.83, "grad_norm": 0.8882789015769958, "learning_rate": 7.795702634181318e-07, "loss": 0.5534, "step": 13026 }, { "epoch": 0.83, "grad_norm": 0.8667416572570801, "learning_rate": 7.790202063568276e-07, "loss": 0.5252, "step": 13027 }, { "epoch": 0.83, "grad_norm": 0.8797557353973389, "learning_rate": 7.784703270263006e-07, "loss": 0.5719, "step": 13028 }, { "epoch": 0.83, "grad_norm": 0.8629273176193237, "learning_rate": 7.779206254497007e-07, "loss": 0.5397, "step": 13029 }, { "epoch": 0.83, "grad_norm": 0.9070542454719543, "learning_rate": 7.773711016501762e-07, "loss": 0.5972, "step": 13030 }, { "epoch": 0.83, "grad_norm": 0.8951036930084229, "learning_rate": 7.76821755650865e-07, "loss": 0.6304, "step": 13031 }, { "epoch": 0.83, "grad_norm": 0.9298555850982666, "learning_rate": 7.762725874748983e-07, "loss": 0.5728, "step": 13032 }, { "epoch": 0.83, "grad_norm": 0.9324959516525269, "learning_rate": 7.757235971454008e-07, "loss": 0.5416, "step": 13033 }, { "epoch": 0.83, "grad_norm": 0.8365843296051025, "learning_rate": 7.751747846854851e-07, "loss": 0.5546, "step": 13034 }, { "epoch": 0.83, "grad_norm": 0.9446489810943604, "learning_rate": 7.746261501182633e-07, "loss": 0.5714, "step": 13035 }, { "epoch": 0.83, "grad_norm": 0.8774089217185974, "learning_rate": 7.740776934668365e-07, "loss": 0.5605, "step": 13036 }, { "epoch": 0.83, "grad_norm": 0.8851078152656555, "learning_rate": 7.73529414754296e-07, "loss": 0.5426, "step": 13037 }, { "epoch": 0.83, "grad_norm": 0.9036283493041992, "learning_rate": 7.72981314003729e-07, "loss": 0.5378, "step": 13038 }, { "epoch": 0.83, "grad_norm": 0.9143775701522827, "learning_rate": 7.724333912382143e-07, "loss": 0.5731, "step": 13039 }, { "epoch": 0.83, "grad_norm": 0.8436862230300903, "learning_rate": 7.718856464808222e-07, "loss": 0.5392, "step": 13040 }, { "epoch": 0.83, "grad_norm": 0.8102920055389404, "learning_rate": 7.713380797546188e-07, "loss": 0.5208, "step": 13041 }, { "epoch": 0.83, "grad_norm": 0.922103762626648, "learning_rate": 7.707906910826574e-07, "loss": 0.5924, "step": 13042 }, { "epoch": 0.83, "grad_norm": 0.8845114707946777, "learning_rate": 7.702434804879861e-07, "loss": 0.5718, "step": 13043 }, { "epoch": 0.83, "grad_norm": 0.91489577293396, "learning_rate": 7.696964479936497e-07, "loss": 0.5519, "step": 13044 }, { "epoch": 0.83, "grad_norm": 0.8923588991165161, "learning_rate": 7.691495936226789e-07, "loss": 0.5516, "step": 13045 }, { "epoch": 0.83, "grad_norm": 1.006177544593811, "learning_rate": 7.686029173981008e-07, "loss": 0.6134, "step": 13046 }, { "epoch": 0.83, "grad_norm": 0.9382014870643616, "learning_rate": 7.680564193429336e-07, "loss": 0.5919, "step": 13047 }, { "epoch": 0.83, "grad_norm": 0.9472145438194275, "learning_rate": 7.675100994801888e-07, "loss": 0.5363, "step": 13048 }, { "epoch": 0.83, "grad_norm": 0.8798018097877502, "learning_rate": 7.669639578328713e-07, "loss": 0.5514, "step": 13049 }, { "epoch": 0.83, "grad_norm": 0.9217506647109985, "learning_rate": 7.664179944239746e-07, "loss": 0.5821, "step": 13050 }, { "epoch": 0.83, "grad_norm": 0.8195998072624207, "learning_rate": 7.658722092764876e-07, "loss": 0.4924, "step": 13051 }, { "epoch": 0.83, "grad_norm": 0.892219066619873, "learning_rate": 7.653266024133943e-07, "loss": 0.5429, "step": 13052 }, { "epoch": 0.83, "grad_norm": 0.9027977585792542, "learning_rate": 7.647811738576655e-07, "loss": 0.5846, "step": 13053 }, { "epoch": 0.83, "grad_norm": 0.8905366063117981, "learning_rate": 7.642359236322683e-07, "loss": 0.5554, "step": 13054 }, { "epoch": 0.83, "grad_norm": 0.9175378680229187, "learning_rate": 7.63690851760161e-07, "loss": 0.5582, "step": 13055 }, { "epoch": 0.83, "grad_norm": 0.945669412612915, "learning_rate": 7.631459582642947e-07, "loss": 0.556, "step": 13056 }, { "epoch": 0.83, "grad_norm": 0.8952832818031311, "learning_rate": 7.626012431676138e-07, "loss": 0.6063, "step": 13057 }, { "epoch": 0.83, "grad_norm": 0.8996466994285583, "learning_rate": 7.620567064930545e-07, "loss": 0.5752, "step": 13058 }, { "epoch": 0.83, "grad_norm": 0.8489691615104675, "learning_rate": 7.615123482635433e-07, "loss": 0.5823, "step": 13059 }, { "epoch": 0.83, "grad_norm": 0.9028809070587158, "learning_rate": 7.609681685020026e-07, "loss": 0.5796, "step": 13060 }, { "epoch": 0.83, "grad_norm": 0.8422659039497375, "learning_rate": 7.604241672313461e-07, "loss": 0.5435, "step": 13061 }, { "epoch": 0.83, "grad_norm": 0.837838888168335, "learning_rate": 7.59880344474479e-07, "loss": 0.5687, "step": 13062 }, { "epoch": 0.83, "grad_norm": 0.8525023460388184, "learning_rate": 7.593367002543018e-07, "loss": 0.5606, "step": 13063 }, { "epoch": 0.83, "grad_norm": 0.8720320463180542, "learning_rate": 7.587932345937016e-07, "loss": 0.5699, "step": 13064 }, { "epoch": 0.83, "grad_norm": 0.8558526635169983, "learning_rate": 7.582499475155653e-07, "loss": 0.6107, "step": 13065 }, { "epoch": 0.83, "grad_norm": 0.8588683009147644, "learning_rate": 7.577068390427689e-07, "loss": 0.5271, "step": 13066 }, { "epoch": 0.83, "grad_norm": 0.806747317314148, "learning_rate": 7.571639091981786e-07, "loss": 0.5362, "step": 13067 }, { "epoch": 0.83, "grad_norm": 0.9133474826812744, "learning_rate": 7.566211580046562e-07, "loss": 0.5963, "step": 13068 }, { "epoch": 0.83, "grad_norm": 0.7889014482498169, "learning_rate": 7.56078585485055e-07, "loss": 0.5403, "step": 13069 }, { "epoch": 0.83, "grad_norm": 0.86361163854599, "learning_rate": 7.555361916622217e-07, "loss": 0.5825, "step": 13070 }, { "epoch": 0.83, "grad_norm": 0.8512160181999207, "learning_rate": 7.549939765589942e-07, "loss": 0.5044, "step": 13071 }, { "epoch": 0.83, "grad_norm": 0.8855159282684326, "learning_rate": 7.544519401982025e-07, "loss": 0.5909, "step": 13072 }, { "epoch": 0.83, "grad_norm": 0.9207944273948669, "learning_rate": 7.539100826026691e-07, "loss": 0.5993, "step": 13073 }, { "epoch": 0.83, "grad_norm": 0.9316564798355103, "learning_rate": 7.533684037952133e-07, "loss": 0.5755, "step": 13074 }, { "epoch": 0.83, "grad_norm": 0.8831668496131897, "learning_rate": 7.528269037986402e-07, "loss": 0.6368, "step": 13075 }, { "epoch": 0.83, "grad_norm": 0.9168758988380432, "learning_rate": 7.522855826357511e-07, "loss": 0.5728, "step": 13076 }, { "epoch": 0.83, "grad_norm": 0.8036249876022339, "learning_rate": 7.517444403293394e-07, "loss": 0.5295, "step": 13077 }, { "epoch": 0.83, "grad_norm": 0.8695041537284851, "learning_rate": 7.512034769021909e-07, "loss": 0.592, "step": 13078 }, { "epoch": 0.83, "grad_norm": 0.9398552775382996, "learning_rate": 7.506626923770843e-07, "loss": 0.5927, "step": 13079 }, { "epoch": 0.83, "grad_norm": 0.8533617258071899, "learning_rate": 7.501220867767883e-07, "loss": 0.5991, "step": 13080 }, { "epoch": 0.83, "grad_norm": 0.877224862575531, "learning_rate": 7.495816601240664e-07, "loss": 0.5839, "step": 13081 }, { "epoch": 0.83, "grad_norm": 0.9243265390396118, "learning_rate": 7.490414124416761e-07, "loss": 0.6103, "step": 13082 }, { "epoch": 0.83, "grad_norm": 1.0017024278640747, "learning_rate": 7.485013437523636e-07, "loss": 0.6144, "step": 13083 }, { "epoch": 0.83, "grad_norm": 0.8907317519187927, "learning_rate": 7.479614540788687e-07, "loss": 0.6113, "step": 13084 }, { "epoch": 0.83, "grad_norm": 0.9147844910621643, "learning_rate": 7.474217434439263e-07, "loss": 0.5684, "step": 13085 }, { "epoch": 0.83, "grad_norm": 0.8742222785949707, "learning_rate": 7.468822118702596e-07, "loss": 0.5424, "step": 13086 }, { "epoch": 0.83, "grad_norm": 0.9334181547164917, "learning_rate": 7.463428593805894e-07, "loss": 0.5554, "step": 13087 }, { "epoch": 0.83, "grad_norm": 0.8707894682884216, "learning_rate": 7.458036859976225e-07, "loss": 0.6064, "step": 13088 }, { "epoch": 0.83, "grad_norm": 0.9232116341590881, "learning_rate": 7.452646917440631e-07, "loss": 0.6251, "step": 13089 }, { "epoch": 0.83, "grad_norm": 0.9443577527999878, "learning_rate": 7.447258766426063e-07, "loss": 0.5861, "step": 13090 }, { "epoch": 0.83, "grad_norm": 0.87910395860672, "learning_rate": 7.441872407159401e-07, "loss": 0.5628, "step": 13091 }, { "epoch": 0.83, "grad_norm": 0.8710011839866638, "learning_rate": 7.43648783986744e-07, "loss": 0.5954, "step": 13092 }, { "epoch": 0.83, "grad_norm": 0.9100737571716309, "learning_rate": 7.431105064776922e-07, "loss": 0.5956, "step": 13093 }, { "epoch": 0.83, "grad_norm": 0.8823485970497131, "learning_rate": 7.425724082114455e-07, "loss": 0.5534, "step": 13094 }, { "epoch": 0.83, "grad_norm": 0.9108067750930786, "learning_rate": 7.420344892106674e-07, "loss": 0.5459, "step": 13095 }, { "epoch": 0.83, "grad_norm": 0.9197466969490051, "learning_rate": 7.414967494980024e-07, "loss": 0.5779, "step": 13096 }, { "epoch": 0.83, "grad_norm": 0.8721498847007751, "learning_rate": 7.40959189096096e-07, "loss": 0.5026, "step": 13097 }, { "epoch": 0.83, "grad_norm": 0.9107875823974609, "learning_rate": 7.404218080275816e-07, "loss": 0.6035, "step": 13098 }, { "epoch": 0.83, "grad_norm": 0.8590791821479797, "learning_rate": 7.398846063150866e-07, "loss": 0.5347, "step": 13099 }, { "epoch": 0.83, "grad_norm": 0.874270498752594, "learning_rate": 7.393475839812314e-07, "loss": 0.5954, "step": 13100 }, { "epoch": 0.83, "grad_norm": 0.9111903309822083, "learning_rate": 7.388107410486289e-07, "loss": 0.5691, "step": 13101 }, { "epoch": 0.83, "grad_norm": 0.9081681370735168, "learning_rate": 7.3827407753988e-07, "loss": 0.5675, "step": 13102 }, { "epoch": 0.83, "grad_norm": 0.8183289766311646, "learning_rate": 7.377375934775865e-07, "loss": 0.5498, "step": 13103 }, { "epoch": 0.83, "grad_norm": 0.8380873203277588, "learning_rate": 7.372012888843344e-07, "loss": 0.5786, "step": 13104 }, { "epoch": 0.83, "grad_norm": 0.8212375044822693, "learning_rate": 7.366651637827065e-07, "loss": 0.5647, "step": 13105 }, { "epoch": 0.83, "grad_norm": 0.900518000125885, "learning_rate": 7.361292181952795e-07, "loss": 0.5743, "step": 13106 }, { "epoch": 0.83, "grad_norm": 0.9295457601547241, "learning_rate": 7.355934521446151e-07, "loss": 0.5577, "step": 13107 }, { "epoch": 0.83, "grad_norm": 0.8961006999015808, "learning_rate": 7.350578656532776e-07, "loss": 0.5885, "step": 13108 }, { "epoch": 0.83, "grad_norm": 0.8948516249656677, "learning_rate": 7.345224587438171e-07, "loss": 0.5077, "step": 13109 }, { "epoch": 0.83, "grad_norm": 0.9140964150428772, "learning_rate": 7.339872314387763e-07, "loss": 0.6131, "step": 13110 }, { "epoch": 0.83, "grad_norm": 0.9755547046661377, "learning_rate": 7.334521837606934e-07, "loss": 0.6061, "step": 13111 }, { "epoch": 0.83, "grad_norm": 0.8581327795982361, "learning_rate": 7.329173157320962e-07, "loss": 0.5332, "step": 13112 }, { "epoch": 0.83, "grad_norm": 0.9618088603019714, "learning_rate": 7.323826273755069e-07, "loss": 0.5948, "step": 13113 }, { "epoch": 0.83, "grad_norm": 0.8937922120094299, "learning_rate": 7.318481187134408e-07, "loss": 0.5915, "step": 13114 }, { "epoch": 0.83, "grad_norm": 0.9323161244392395, "learning_rate": 7.313137897683997e-07, "loss": 0.6016, "step": 13115 }, { "epoch": 0.83, "grad_norm": 0.8632552623748779, "learning_rate": 7.30779640562887e-07, "loss": 0.6145, "step": 13116 }, { "epoch": 0.83, "grad_norm": 0.8710545897483826, "learning_rate": 7.302456711193928e-07, "loss": 0.5644, "step": 13117 }, { "epoch": 0.83, "grad_norm": 0.874191164970398, "learning_rate": 7.297118814603987e-07, "loss": 0.5579, "step": 13118 }, { "epoch": 0.83, "grad_norm": 0.8017786741256714, "learning_rate": 7.291782716083823e-07, "loss": 0.5414, "step": 13119 }, { "epoch": 0.83, "grad_norm": 0.8626580834388733, "learning_rate": 7.286448415858116e-07, "loss": 0.5909, "step": 13120 }, { "epoch": 0.83, "grad_norm": 0.8846031427383423, "learning_rate": 7.281115914151477e-07, "loss": 0.5291, "step": 13121 }, { "epoch": 0.83, "grad_norm": 0.8800442814826965, "learning_rate": 7.275785211188441e-07, "loss": 0.5698, "step": 13122 }, { "epoch": 0.83, "grad_norm": 0.8646133542060852, "learning_rate": 7.270456307193474e-07, "loss": 0.5776, "step": 13123 }, { "epoch": 0.83, "grad_norm": 0.9423984289169312, "learning_rate": 7.265129202390924e-07, "loss": 0.5374, "step": 13124 }, { "epoch": 0.83, "grad_norm": 0.8401879072189331, "learning_rate": 7.259803897005141e-07, "loss": 0.5583, "step": 13125 }, { "epoch": 0.83, "grad_norm": 0.8532096147537231, "learning_rate": 7.254480391260321e-07, "loss": 0.5056, "step": 13126 }, { "epoch": 0.83, "grad_norm": 0.8508062958717346, "learning_rate": 7.249158685380631e-07, "loss": 0.5793, "step": 13127 }, { "epoch": 0.83, "grad_norm": 0.8456823825836182, "learning_rate": 7.243838779590151e-07, "loss": 0.5542, "step": 13128 }, { "epoch": 0.83, "grad_norm": 0.9003103375434875, "learning_rate": 7.238520674112881e-07, "loss": 0.5354, "step": 13129 }, { "epoch": 0.83, "grad_norm": 0.8607522249221802, "learning_rate": 7.233204369172753e-07, "loss": 0.544, "step": 13130 }, { "epoch": 0.83, "grad_norm": 0.8859104514122009, "learning_rate": 7.22788986499362e-07, "loss": 0.5419, "step": 13131 }, { "epoch": 0.83, "grad_norm": 0.9029030799865723, "learning_rate": 7.222577161799232e-07, "loss": 0.5825, "step": 13132 }, { "epoch": 0.83, "grad_norm": 0.934764564037323, "learning_rate": 7.217266259813332e-07, "loss": 0.5783, "step": 13133 }, { "epoch": 0.83, "grad_norm": 0.8300181031227112, "learning_rate": 7.211957159259503e-07, "loss": 0.5394, "step": 13134 }, { "epoch": 0.83, "grad_norm": 0.8454645276069641, "learning_rate": 7.206649860361314e-07, "loss": 0.5528, "step": 13135 }, { "epoch": 0.83, "grad_norm": 0.8897960782051086, "learning_rate": 7.201344363342245e-07, "loss": 0.5781, "step": 13136 }, { "epoch": 0.83, "grad_norm": 0.8986917734146118, "learning_rate": 7.196040668425653e-07, "loss": 0.6028, "step": 13137 }, { "epoch": 0.83, "grad_norm": 0.9327632784843445, "learning_rate": 7.190738775834894e-07, "loss": 0.5857, "step": 13138 }, { "epoch": 0.83, "grad_norm": 0.8856915235519409, "learning_rate": 7.185438685793217e-07, "loss": 0.5882, "step": 13139 }, { "epoch": 0.83, "grad_norm": 0.9135767221450806, "learning_rate": 7.180140398523761e-07, "loss": 0.59, "step": 13140 }, { "epoch": 0.83, "grad_norm": 0.8973036408424377, "learning_rate": 7.174843914249636e-07, "loss": 0.5655, "step": 13141 }, { "epoch": 0.83, "grad_norm": 0.8983938694000244, "learning_rate": 7.169549233193857e-07, "loss": 0.5778, "step": 13142 }, { "epoch": 0.83, "grad_norm": 0.9242495894432068, "learning_rate": 7.164256355579363e-07, "loss": 0.5819, "step": 13143 }, { "epoch": 0.83, "grad_norm": 0.9081816673278809, "learning_rate": 7.158965281629027e-07, "loss": 0.5798, "step": 13144 }, { "epoch": 0.83, "grad_norm": 0.9231504201889038, "learning_rate": 7.153676011565613e-07, "loss": 0.6053, "step": 13145 }, { "epoch": 0.83, "grad_norm": 0.866088330745697, "learning_rate": 7.148388545611856e-07, "loss": 0.5286, "step": 13146 }, { "epoch": 0.83, "grad_norm": 0.8189731240272522, "learning_rate": 7.143102883990405e-07, "loss": 0.5759, "step": 13147 }, { "epoch": 0.83, "grad_norm": 0.8492090702056885, "learning_rate": 7.137819026923786e-07, "loss": 0.5127, "step": 13148 }, { "epoch": 0.83, "grad_norm": 0.8900519609451294, "learning_rate": 7.132536974634508e-07, "loss": 0.5905, "step": 13149 }, { "epoch": 0.83, "grad_norm": 0.8588072657585144, "learning_rate": 7.127256727344967e-07, "loss": 0.5479, "step": 13150 }, { "epoch": 0.83, "grad_norm": 0.9344004988670349, "learning_rate": 7.121978285277503e-07, "loss": 0.5901, "step": 13151 }, { "epoch": 0.83, "grad_norm": 0.9044827222824097, "learning_rate": 7.116701648654384e-07, "loss": 0.5989, "step": 13152 }, { "epoch": 0.83, "grad_norm": 0.8644382953643799, "learning_rate": 7.11142681769777e-07, "loss": 0.5589, "step": 13153 }, { "epoch": 0.83, "grad_norm": 0.9335626363754272, "learning_rate": 7.106153792629761e-07, "loss": 0.5711, "step": 13154 }, { "epoch": 0.83, "grad_norm": 0.8146085143089294, "learning_rate": 7.100882573672419e-07, "loss": 0.5407, "step": 13155 }, { "epoch": 0.83, "grad_norm": 0.8309633731842041, "learning_rate": 7.095613161047666e-07, "loss": 0.5615, "step": 13156 }, { "epoch": 0.83, "grad_norm": 0.8940461277961731, "learning_rate": 7.09034555497739e-07, "loss": 0.561, "step": 13157 }, { "epoch": 0.83, "grad_norm": 0.9012131690979004, "learning_rate": 7.085079755683389e-07, "loss": 0.5582, "step": 13158 }, { "epoch": 0.83, "grad_norm": 0.8765063881874084, "learning_rate": 7.079815763387393e-07, "loss": 0.5955, "step": 13159 }, { "epoch": 0.83, "grad_norm": 0.8758644461631775, "learning_rate": 7.074553578311055e-07, "loss": 0.5402, "step": 13160 }, { "epoch": 0.83, "grad_norm": 0.8788025975227356, "learning_rate": 7.06929320067593e-07, "loss": 0.6192, "step": 13161 }, { "epoch": 0.83, "grad_norm": 0.9614549279212952, "learning_rate": 7.064034630703515e-07, "loss": 0.6092, "step": 13162 }, { "epoch": 0.83, "grad_norm": 0.9305884838104248, "learning_rate": 7.058777868615258e-07, "loss": 0.602, "step": 13163 }, { "epoch": 0.83, "grad_norm": 0.8970014452934265, "learning_rate": 7.053522914632466e-07, "loss": 0.5997, "step": 13164 }, { "epoch": 0.83, "grad_norm": 0.8438460230827332, "learning_rate": 7.048269768976429e-07, "loss": 0.5581, "step": 13165 }, { "epoch": 0.83, "grad_norm": 0.9222960472106934, "learning_rate": 7.043018431868348e-07, "loss": 0.5482, "step": 13166 }, { "epoch": 0.83, "grad_norm": 0.897331714630127, "learning_rate": 7.037768903529302e-07, "loss": 0.6095, "step": 13167 }, { "epoch": 0.83, "grad_norm": 0.8716689348220825, "learning_rate": 7.032521184180369e-07, "loss": 0.5955, "step": 13168 }, { "epoch": 0.83, "grad_norm": 0.8186154961585999, "learning_rate": 7.027275274042489e-07, "loss": 0.5867, "step": 13169 }, { "epoch": 0.83, "grad_norm": 0.9115201830863953, "learning_rate": 7.022031173336557e-07, "loss": 0.5619, "step": 13170 }, { "epoch": 0.83, "grad_norm": 0.8985578417778015, "learning_rate": 7.016788882283382e-07, "loss": 0.6085, "step": 13171 }, { "epoch": 0.83, "grad_norm": 0.9288114905357361, "learning_rate": 7.011548401103696e-07, "loss": 0.6011, "step": 13172 }, { "epoch": 0.83, "grad_norm": 0.8916085958480835, "learning_rate": 7.006309730018168e-07, "loss": 0.5845, "step": 13173 }, { "epoch": 0.83, "grad_norm": 0.8739166855812073, "learning_rate": 7.001072869247378e-07, "loss": 0.6088, "step": 13174 }, { "epoch": 0.83, "grad_norm": 0.9117295145988464, "learning_rate": 6.995837819011808e-07, "loss": 0.5982, "step": 13175 }, { "epoch": 0.83, "grad_norm": 0.8260350227355957, "learning_rate": 6.990604579531929e-07, "loss": 0.5691, "step": 13176 }, { "epoch": 0.83, "grad_norm": 0.940250039100647, "learning_rate": 6.985373151028058e-07, "loss": 0.5862, "step": 13177 }, { "epoch": 0.83, "grad_norm": 0.8562113046646118, "learning_rate": 6.980143533720491e-07, "loss": 0.5487, "step": 13178 }, { "epoch": 0.83, "grad_norm": 0.8613032698631287, "learning_rate": 6.974915727829423e-07, "loss": 0.5633, "step": 13179 }, { "epoch": 0.84, "grad_norm": 0.986914336681366, "learning_rate": 6.96968973357498e-07, "loss": 0.6344, "step": 13180 }, { "epoch": 0.84, "grad_norm": 0.8467575311660767, "learning_rate": 6.964465551177208e-07, "loss": 0.4884, "step": 13181 }, { "epoch": 0.84, "grad_norm": 0.8608553409576416, "learning_rate": 6.959243180856096e-07, "loss": 0.627, "step": 13182 }, { "epoch": 0.84, "grad_norm": 0.8423926830291748, "learning_rate": 6.954022622831514e-07, "loss": 0.5243, "step": 13183 }, { "epoch": 0.84, "grad_norm": 0.8840621113777161, "learning_rate": 6.948803877323296e-07, "loss": 0.5071, "step": 13184 }, { "epoch": 0.84, "grad_norm": 0.8253465294837952, "learning_rate": 6.943586944551178e-07, "loss": 0.571, "step": 13185 }, { "epoch": 0.84, "grad_norm": 0.8736525774002075, "learning_rate": 6.938371824734835e-07, "loss": 0.6001, "step": 13186 }, { "epoch": 0.84, "grad_norm": 0.9959997534751892, "learning_rate": 6.933158518093852e-07, "loss": 0.6158, "step": 13187 }, { "epoch": 0.84, "grad_norm": 0.9295116066932678, "learning_rate": 6.927947024847748e-07, "loss": 0.581, "step": 13188 }, { "epoch": 0.84, "grad_norm": 0.9184585809707642, "learning_rate": 6.922737345215952e-07, "loss": 0.544, "step": 13189 }, { "epoch": 0.84, "grad_norm": 0.8298773169517517, "learning_rate": 6.91752947941785e-07, "loss": 0.561, "step": 13190 }, { "epoch": 0.84, "grad_norm": 0.8674336075782776, "learning_rate": 6.912323427672691e-07, "loss": 0.5629, "step": 13191 }, { "epoch": 0.84, "grad_norm": 0.9086819887161255, "learning_rate": 6.907119190199706e-07, "loss": 0.5735, "step": 13192 }, { "epoch": 0.84, "grad_norm": 0.8917360305786133, "learning_rate": 6.901916767218019e-07, "loss": 0.5546, "step": 13193 }, { "epoch": 0.84, "grad_norm": 0.8581564426422119, "learning_rate": 6.896716158946692e-07, "loss": 0.5619, "step": 13194 }, { "epoch": 0.84, "grad_norm": 0.8573694229125977, "learning_rate": 6.891517365604705e-07, "loss": 0.5962, "step": 13195 }, { "epoch": 0.84, "grad_norm": 0.8820661306381226, "learning_rate": 6.886320387410967e-07, "loss": 0.6283, "step": 13196 }, { "epoch": 0.84, "grad_norm": 0.8105853796005249, "learning_rate": 6.881125224584273e-07, "loss": 0.528, "step": 13197 }, { "epoch": 0.84, "grad_norm": 0.9009973406791687, "learning_rate": 6.875931877343417e-07, "loss": 0.5681, "step": 13198 }, { "epoch": 0.84, "grad_norm": 0.8252160549163818, "learning_rate": 6.870740345907046e-07, "loss": 0.5771, "step": 13199 }, { "epoch": 0.84, "grad_norm": 0.9308204054832458, "learning_rate": 6.865550630493756e-07, "loss": 0.5311, "step": 13200 }, { "epoch": 0.84, "grad_norm": 0.9394121766090393, "learning_rate": 6.860362731322079e-07, "loss": 0.618, "step": 13201 }, { "epoch": 0.84, "grad_norm": 0.8921918869018555, "learning_rate": 6.855176648610457e-07, "loss": 0.5777, "step": 13202 }, { "epoch": 0.84, "grad_norm": 0.8987441062927246, "learning_rate": 6.849992382577253e-07, "loss": 0.5614, "step": 13203 }, { "epoch": 0.84, "grad_norm": 0.8814181089401245, "learning_rate": 6.844809933440776e-07, "loss": 0.5644, "step": 13204 }, { "epoch": 0.84, "grad_norm": 0.9095494151115417, "learning_rate": 6.839629301419204e-07, "loss": 0.5416, "step": 13205 }, { "epoch": 0.84, "grad_norm": 0.864000678062439, "learning_rate": 6.83445048673072e-07, "loss": 0.5482, "step": 13206 }, { "epoch": 0.84, "grad_norm": 0.8674211502075195, "learning_rate": 6.829273489593352e-07, "loss": 0.5395, "step": 13207 }, { "epoch": 0.84, "grad_norm": 0.8799319863319397, "learning_rate": 6.824098310225097e-07, "loss": 0.5647, "step": 13208 }, { "epoch": 0.84, "grad_norm": 0.9398074150085449, "learning_rate": 6.818924948843863e-07, "loss": 0.5973, "step": 13209 }, { "epoch": 0.84, "grad_norm": 0.8925483226776123, "learning_rate": 6.81375340566749e-07, "loss": 0.5715, "step": 13210 }, { "epoch": 0.84, "grad_norm": 0.8708029389381409, "learning_rate": 6.808583680913722e-07, "loss": 0.5579, "step": 13211 }, { "epoch": 0.84, "grad_norm": 0.8572626113891602, "learning_rate": 6.803415774800253e-07, "loss": 0.5613, "step": 13212 }, { "epoch": 0.84, "grad_norm": 0.8568171858787537, "learning_rate": 6.798249687544667e-07, "loss": 0.5321, "step": 13213 }, { "epoch": 0.84, "grad_norm": 0.8693404197692871, "learning_rate": 6.793085419364498e-07, "loss": 0.555, "step": 13214 }, { "epoch": 0.84, "grad_norm": 0.8741576075553894, "learning_rate": 6.787922970477196e-07, "loss": 0.5667, "step": 13215 }, { "epoch": 0.84, "grad_norm": 0.9199385046958923, "learning_rate": 6.782762341100135e-07, "loss": 0.6141, "step": 13216 }, { "epoch": 0.84, "grad_norm": 0.8483101725578308, "learning_rate": 6.777603531450617e-07, "loss": 0.5482, "step": 13217 }, { "epoch": 0.84, "grad_norm": 0.8694477081298828, "learning_rate": 6.772446541745836e-07, "loss": 0.5839, "step": 13218 }, { "epoch": 0.84, "grad_norm": 0.9048340320587158, "learning_rate": 6.767291372202967e-07, "loss": 0.5781, "step": 13219 }, { "epoch": 0.84, "grad_norm": 0.9429792761802673, "learning_rate": 6.762138023039072e-07, "loss": 0.5974, "step": 13220 }, { "epoch": 0.84, "grad_norm": 0.913020670413971, "learning_rate": 6.756986494471119e-07, "loss": 0.6104, "step": 13221 }, { "epoch": 0.84, "grad_norm": 0.8851649761199951, "learning_rate": 6.751836786716032e-07, "loss": 0.5967, "step": 13222 }, { "epoch": 0.84, "grad_norm": 0.9294677972793579, "learning_rate": 6.74668889999065e-07, "loss": 0.5813, "step": 13223 }, { "epoch": 0.84, "grad_norm": 0.8423077464103699, "learning_rate": 6.741542834511727e-07, "loss": 0.5543, "step": 13224 }, { "epoch": 0.84, "grad_norm": 0.8565467000007629, "learning_rate": 6.736398590495968e-07, "loss": 0.5139, "step": 13225 }, { "epoch": 0.84, "grad_norm": 0.8920080661773682, "learning_rate": 6.731256168159939e-07, "loss": 0.5972, "step": 13226 }, { "epoch": 0.84, "grad_norm": 0.8450667858123779, "learning_rate": 6.726115567720198e-07, "loss": 0.5539, "step": 13227 }, { "epoch": 0.84, "grad_norm": 0.901174783706665, "learning_rate": 6.720976789393202e-07, "loss": 0.611, "step": 13228 }, { "epoch": 0.84, "grad_norm": 0.8898508548736572, "learning_rate": 6.71583983339531e-07, "loss": 0.5798, "step": 13229 }, { "epoch": 0.84, "grad_norm": 0.881693422794342, "learning_rate": 6.710704699942827e-07, "loss": 0.5774, "step": 13230 }, { "epoch": 0.84, "grad_norm": 0.8955451846122742, "learning_rate": 6.705571389251975e-07, "loss": 0.5217, "step": 13231 }, { "epoch": 0.84, "grad_norm": 0.9116746187210083, "learning_rate": 6.700439901538902e-07, "loss": 0.5331, "step": 13232 }, { "epoch": 0.84, "grad_norm": 0.9191250801086426, "learning_rate": 6.695310237019692e-07, "loss": 0.5772, "step": 13233 }, { "epoch": 0.84, "grad_norm": 0.9273549914360046, "learning_rate": 6.690182395910305e-07, "loss": 0.6408, "step": 13234 }, { "epoch": 0.84, "grad_norm": 0.8469404578208923, "learning_rate": 6.685056378426663e-07, "loss": 0.5477, "step": 13235 }, { "epoch": 0.84, "grad_norm": 0.9355968236923218, "learning_rate": 6.679932184784638e-07, "loss": 0.5865, "step": 13236 }, { "epoch": 0.84, "grad_norm": 0.865906834602356, "learning_rate": 6.674809815199962e-07, "loss": 0.5217, "step": 13237 }, { "epoch": 0.84, "grad_norm": 0.9029650688171387, "learning_rate": 6.669689269888325e-07, "loss": 0.5829, "step": 13238 }, { "epoch": 0.84, "grad_norm": 0.8489553332328796, "learning_rate": 6.664570549065336e-07, "loss": 0.519, "step": 13239 }, { "epoch": 0.84, "grad_norm": 0.8921743631362915, "learning_rate": 6.659453652946529e-07, "loss": 0.5553, "step": 13240 }, { "epoch": 0.84, "grad_norm": 0.8686976432800293, "learning_rate": 6.654338581747366e-07, "loss": 0.5974, "step": 13241 }, { "epoch": 0.84, "grad_norm": 0.9544159173965454, "learning_rate": 6.649225335683213e-07, "loss": 0.5986, "step": 13242 }, { "epoch": 0.84, "grad_norm": 0.9924260973930359, "learning_rate": 6.644113914969369e-07, "loss": 0.6438, "step": 13243 }, { "epoch": 0.84, "grad_norm": 0.8223074674606323, "learning_rate": 6.639004319821063e-07, "loss": 0.5702, "step": 13244 }, { "epoch": 0.84, "grad_norm": 0.889176070690155, "learning_rate": 6.63389655045345e-07, "loss": 0.5799, "step": 13245 }, { "epoch": 0.84, "grad_norm": 0.9296001195907593, "learning_rate": 6.628790607081586e-07, "loss": 0.583, "step": 13246 }, { "epoch": 0.84, "grad_norm": 0.8625611662864685, "learning_rate": 6.623686489920489e-07, "loss": 0.5476, "step": 13247 }, { "epoch": 0.84, "grad_norm": 0.9394053220748901, "learning_rate": 6.61858419918503e-07, "loss": 0.5582, "step": 13248 }, { "epoch": 0.84, "grad_norm": 0.9077306389808655, "learning_rate": 6.613483735090104e-07, "loss": 0.5972, "step": 13249 }, { "epoch": 0.84, "grad_norm": 0.8808714151382446, "learning_rate": 6.608385097850439e-07, "loss": 0.5684, "step": 13250 }, { "epoch": 0.84, "grad_norm": 0.9206782579421997, "learning_rate": 6.603288287680726e-07, "loss": 0.5882, "step": 13251 }, { "epoch": 0.84, "grad_norm": 0.8908818960189819, "learning_rate": 6.598193304795575e-07, "loss": 0.5315, "step": 13252 }, { "epoch": 0.84, "grad_norm": 0.8861278891563416, "learning_rate": 6.593100149409521e-07, "loss": 0.6004, "step": 13253 }, { "epoch": 0.84, "grad_norm": 0.9618304967880249, "learning_rate": 6.588008821737019e-07, "loss": 0.5732, "step": 13254 }, { "epoch": 0.84, "grad_norm": 0.9097421169281006, "learning_rate": 6.582919321992459e-07, "loss": 0.5964, "step": 13255 }, { "epoch": 0.84, "grad_norm": 0.9288156032562256, "learning_rate": 6.577831650390104e-07, "loss": 0.5434, "step": 13256 }, { "epoch": 0.84, "grad_norm": 0.9109866619110107, "learning_rate": 6.572745807144226e-07, "loss": 0.5443, "step": 13257 }, { "epoch": 0.84, "grad_norm": 0.8743159770965576, "learning_rate": 6.567661792468944e-07, "loss": 0.548, "step": 13258 }, { "epoch": 0.84, "grad_norm": 0.8892823457717896, "learning_rate": 6.562579606578328e-07, "loss": 0.5657, "step": 13259 }, { "epoch": 0.84, "grad_norm": 0.8872804045677185, "learning_rate": 6.557499249686377e-07, "loss": 0.5829, "step": 13260 }, { "epoch": 0.84, "grad_norm": 0.9161667823791504, "learning_rate": 6.552420722007008e-07, "loss": 0.5586, "step": 13261 }, { "epoch": 0.84, "grad_norm": 0.9431544542312622, "learning_rate": 6.547344023754065e-07, "loss": 0.6225, "step": 13262 }, { "epoch": 0.84, "grad_norm": 0.9163276553153992, "learning_rate": 6.542269155141306e-07, "loss": 0.5697, "step": 13263 }, { "epoch": 0.84, "grad_norm": 0.8587558269500732, "learning_rate": 6.537196116382411e-07, "loss": 0.5308, "step": 13264 }, { "epoch": 0.84, "grad_norm": 0.9046618938446045, "learning_rate": 6.532124907690979e-07, "loss": 0.5743, "step": 13265 }, { "epoch": 0.84, "grad_norm": 0.825258731842041, "learning_rate": 6.527055529280574e-07, "loss": 0.5398, "step": 13266 }, { "epoch": 0.84, "grad_norm": 0.9225800037384033, "learning_rate": 6.521987981364614e-07, "loss": 0.5864, "step": 13267 }, { "epoch": 0.84, "grad_norm": 0.8566347360610962, "learning_rate": 6.516922264156495e-07, "loss": 0.5508, "step": 13268 }, { "epoch": 0.84, "grad_norm": 0.8173342943191528, "learning_rate": 6.511858377869517e-07, "loss": 0.5485, "step": 13269 }, { "epoch": 0.84, "grad_norm": 0.8830443620681763, "learning_rate": 6.506796322716891e-07, "loss": 0.5276, "step": 13270 }, { "epoch": 0.84, "grad_norm": 0.9083720445632935, "learning_rate": 6.501736098911787e-07, "loss": 0.5389, "step": 13271 }, { "epoch": 0.84, "grad_norm": 0.8697338104248047, "learning_rate": 6.496677706667243e-07, "loss": 0.5826, "step": 13272 }, { "epoch": 0.84, "grad_norm": 0.8422214984893799, "learning_rate": 6.491621146196253e-07, "loss": 0.5484, "step": 13273 }, { "epoch": 0.84, "grad_norm": 0.8826960921287537, "learning_rate": 6.486566417711765e-07, "loss": 0.557, "step": 13274 }, { "epoch": 0.84, "grad_norm": 0.8876155614852905, "learning_rate": 6.481513521426581e-07, "loss": 0.5637, "step": 13275 }, { "epoch": 0.84, "grad_norm": 0.8074238896369934, "learning_rate": 6.476462457553473e-07, "loss": 0.5298, "step": 13276 }, { "epoch": 0.84, "grad_norm": 0.9412943124771118, "learning_rate": 6.471413226305134e-07, "loss": 0.5909, "step": 13277 }, { "epoch": 0.84, "grad_norm": 0.9044212102890015, "learning_rate": 6.466365827894133e-07, "loss": 0.603, "step": 13278 }, { "epoch": 0.84, "grad_norm": 0.8331887722015381, "learning_rate": 6.461320262533055e-07, "loss": 0.5901, "step": 13279 }, { "epoch": 0.84, "grad_norm": 0.8750473856925964, "learning_rate": 6.456276530434302e-07, "loss": 0.5648, "step": 13280 }, { "epoch": 0.84, "grad_norm": 0.91391921043396, "learning_rate": 6.451234631810271e-07, "loss": 0.5972, "step": 13281 }, { "epoch": 0.84, "grad_norm": 0.9085570573806763, "learning_rate": 6.446194566873254e-07, "loss": 0.6251, "step": 13282 }, { "epoch": 0.84, "grad_norm": 0.8852720260620117, "learning_rate": 6.441156335835474e-07, "loss": 0.5953, "step": 13283 }, { "epoch": 0.84, "grad_norm": 0.82054603099823, "learning_rate": 6.436119938909069e-07, "loss": 0.534, "step": 13284 }, { "epoch": 0.84, "grad_norm": 0.886782169342041, "learning_rate": 6.431085376306112e-07, "loss": 0.5974, "step": 13285 }, { "epoch": 0.84, "grad_norm": 0.8668603897094727, "learning_rate": 6.426052648238568e-07, "loss": 0.5656, "step": 13286 }, { "epoch": 0.84, "grad_norm": 0.8825658559799194, "learning_rate": 6.421021754918383e-07, "loss": 0.5434, "step": 13287 }, { "epoch": 0.84, "grad_norm": 0.899597704410553, "learning_rate": 6.415992696557361e-07, "loss": 0.5724, "step": 13288 }, { "epoch": 0.84, "grad_norm": 0.8868544101715088, "learning_rate": 6.41096547336727e-07, "loss": 0.569, "step": 13289 }, { "epoch": 0.84, "grad_norm": 0.8770740628242493, "learning_rate": 6.405940085559797e-07, "loss": 0.5501, "step": 13290 }, { "epoch": 0.84, "grad_norm": 0.8351693153381348, "learning_rate": 6.400916533346518e-07, "loss": 0.4812, "step": 13291 }, { "epoch": 0.84, "grad_norm": 0.9115918278694153, "learning_rate": 6.39589481693898e-07, "loss": 0.5842, "step": 13292 }, { "epoch": 0.84, "grad_norm": 0.8377058506011963, "learning_rate": 6.390874936548635e-07, "loss": 0.5911, "step": 13293 }, { "epoch": 0.84, "grad_norm": 0.8732972145080566, "learning_rate": 6.385856892386826e-07, "loss": 0.5937, "step": 13294 }, { "epoch": 0.84, "grad_norm": 0.9321759939193726, "learning_rate": 6.380840684664869e-07, "loss": 0.5681, "step": 13295 }, { "epoch": 0.84, "grad_norm": 0.9192104339599609, "learning_rate": 6.375826313593963e-07, "loss": 0.6206, "step": 13296 }, { "epoch": 0.84, "grad_norm": 0.9148771166801453, "learning_rate": 6.37081377938526e-07, "loss": 0.6008, "step": 13297 }, { "epoch": 0.84, "grad_norm": 0.9211153984069824, "learning_rate": 6.365803082249822e-07, "loss": 0.5656, "step": 13298 }, { "epoch": 0.84, "grad_norm": 0.8448777794837952, "learning_rate": 6.360794222398603e-07, "loss": 0.5574, "step": 13299 }, { "epoch": 0.84, "grad_norm": 0.8851933479309082, "learning_rate": 6.35578720004254e-07, "loss": 0.601, "step": 13300 }, { "epoch": 0.84, "grad_norm": 0.9539099335670471, "learning_rate": 6.350782015392459e-07, "loss": 0.6214, "step": 13301 }, { "epoch": 0.84, "grad_norm": 0.9478552341461182, "learning_rate": 6.345778668659097e-07, "loss": 0.6167, "step": 13302 }, { "epoch": 0.84, "grad_norm": 0.9490789175033569, "learning_rate": 6.34077716005313e-07, "loss": 0.5713, "step": 13303 }, { "epoch": 0.84, "grad_norm": 0.9128775000572205, "learning_rate": 6.335777489785161e-07, "loss": 0.5915, "step": 13304 }, { "epoch": 0.84, "grad_norm": 0.8613923788070679, "learning_rate": 6.3307796580657e-07, "loss": 0.6341, "step": 13305 }, { "epoch": 0.84, "grad_norm": 0.9108010530471802, "learning_rate": 6.325783665105206e-07, "loss": 0.5904, "step": 13306 }, { "epoch": 0.84, "grad_norm": 0.8792107701301575, "learning_rate": 6.320789511114022e-07, "loss": 0.5576, "step": 13307 }, { "epoch": 0.84, "grad_norm": 0.8667570352554321, "learning_rate": 6.315797196302432e-07, "loss": 0.5332, "step": 13308 }, { "epoch": 0.84, "grad_norm": 0.8885064721107483, "learning_rate": 6.310806720880675e-07, "loss": 0.5781, "step": 13309 }, { "epoch": 0.84, "grad_norm": 0.8899162411689758, "learning_rate": 6.305818085058852e-07, "loss": 0.5392, "step": 13310 }, { "epoch": 0.84, "grad_norm": 0.9016544222831726, "learning_rate": 6.300831289047027e-07, "loss": 0.5896, "step": 13311 }, { "epoch": 0.84, "grad_norm": 0.8984227776527405, "learning_rate": 6.295846333055184e-07, "loss": 0.561, "step": 13312 }, { "epoch": 0.84, "grad_norm": 0.898589015007019, "learning_rate": 6.290863217293214e-07, "loss": 0.5771, "step": 13313 }, { "epoch": 0.84, "grad_norm": 0.9066430330276489, "learning_rate": 6.285881941970951e-07, "loss": 0.5539, "step": 13314 }, { "epoch": 0.84, "grad_norm": 0.8809421062469482, "learning_rate": 6.280902507298115e-07, "loss": 0.5963, "step": 13315 }, { "epoch": 0.84, "grad_norm": 0.9171636700630188, "learning_rate": 6.275924913484377e-07, "loss": 0.575, "step": 13316 }, { "epoch": 0.84, "grad_norm": 0.8545477390289307, "learning_rate": 6.270949160739359e-07, "loss": 0.5312, "step": 13317 }, { "epoch": 0.84, "grad_norm": 0.8714274764060974, "learning_rate": 6.265975249272544e-07, "loss": 0.5371, "step": 13318 }, { "epoch": 0.84, "grad_norm": 0.9174915552139282, "learning_rate": 6.261003179293368e-07, "loss": 0.5563, "step": 13319 }, { "epoch": 0.84, "grad_norm": 0.9050828218460083, "learning_rate": 6.256032951011188e-07, "loss": 0.5948, "step": 13320 }, { "epoch": 0.84, "grad_norm": 0.9102218747138977, "learning_rate": 6.25106456463529e-07, "loss": 0.6076, "step": 13321 }, { "epoch": 0.84, "grad_norm": 0.8744686245918274, "learning_rate": 6.246098020374869e-07, "loss": 0.6083, "step": 13322 }, { "epoch": 0.84, "grad_norm": 0.8967841267585754, "learning_rate": 6.241133318439063e-07, "loss": 0.6014, "step": 13323 }, { "epoch": 0.84, "grad_norm": 0.9219756722450256, "learning_rate": 6.236170459036894e-07, "loss": 0.5299, "step": 13324 }, { "epoch": 0.84, "grad_norm": 0.958886981010437, "learning_rate": 6.23120944237735e-07, "loss": 0.6052, "step": 13325 }, { "epoch": 0.84, "grad_norm": 0.8341507315635681, "learning_rate": 6.226250268669309e-07, "loss": 0.5881, "step": 13326 }, { "epoch": 0.84, "grad_norm": 0.885211169719696, "learning_rate": 6.221292938121598e-07, "loss": 0.6086, "step": 13327 }, { "epoch": 0.84, "grad_norm": 0.8747490644454956, "learning_rate": 6.216337450942955e-07, "loss": 0.5505, "step": 13328 }, { "epoch": 0.84, "grad_norm": 0.8842592835426331, "learning_rate": 6.211383807342008e-07, "loss": 0.5602, "step": 13329 }, { "epoch": 0.84, "grad_norm": 0.9496366381645203, "learning_rate": 6.206432007527368e-07, "loss": 0.5424, "step": 13330 }, { "epoch": 0.84, "grad_norm": 0.9109143018722534, "learning_rate": 6.201482051707542e-07, "loss": 0.5971, "step": 13331 }, { "epoch": 0.84, "grad_norm": 0.8485182523727417, "learning_rate": 6.196533940090932e-07, "loss": 0.5543, "step": 13332 }, { "epoch": 0.84, "grad_norm": 1.0051995515823364, "learning_rate": 6.191587672885896e-07, "loss": 0.5688, "step": 13333 }, { "epoch": 0.84, "grad_norm": 0.8793126940727234, "learning_rate": 6.186643250300706e-07, "loss": 0.5992, "step": 13334 }, { "epoch": 0.84, "grad_norm": 0.9340550303459167, "learning_rate": 6.18170067254355e-07, "loss": 0.5385, "step": 13335 }, { "epoch": 0.84, "grad_norm": 0.9096164107322693, "learning_rate": 6.176759939822557e-07, "loss": 0.551, "step": 13336 }, { "epoch": 0.84, "grad_norm": 0.9081304669380188, "learning_rate": 6.171821052345744e-07, "loss": 0.5687, "step": 13337 }, { "epoch": 0.85, "grad_norm": 0.943519651889801, "learning_rate": 6.166884010321072e-07, "loss": 0.6095, "step": 13338 }, { "epoch": 0.85, "grad_norm": 0.8517118096351624, "learning_rate": 6.161948813956447e-07, "loss": 0.6042, "step": 13339 }, { "epoch": 0.85, "grad_norm": 0.8399627804756165, "learning_rate": 6.157015463459648e-07, "loss": 0.5601, "step": 13340 }, { "epoch": 0.85, "grad_norm": 0.8427531123161316, "learning_rate": 6.152083959038407e-07, "loss": 0.5183, "step": 13341 }, { "epoch": 0.85, "grad_norm": 0.9020541310310364, "learning_rate": 6.147154300900377e-07, "loss": 0.5783, "step": 13342 }, { "epoch": 0.85, "grad_norm": 0.9388177990913391, "learning_rate": 6.142226489253122e-07, "loss": 0.58, "step": 13343 }, { "epoch": 0.85, "grad_norm": 0.8787881135940552, "learning_rate": 6.137300524304151e-07, "loss": 0.5774, "step": 13344 }, { "epoch": 0.85, "grad_norm": 0.8807479739189148, "learning_rate": 6.132376406260865e-07, "loss": 0.5687, "step": 13345 }, { "epoch": 0.85, "grad_norm": 0.8714962601661682, "learning_rate": 6.127454135330585e-07, "loss": 0.6012, "step": 13346 }, { "epoch": 0.85, "grad_norm": 0.8867830038070679, "learning_rate": 6.122533711720613e-07, "loss": 0.5601, "step": 13347 }, { "epoch": 0.85, "grad_norm": 0.9229559898376465, "learning_rate": 6.1176151356381e-07, "loss": 0.6503, "step": 13348 }, { "epoch": 0.85, "grad_norm": 0.8665587902069092, "learning_rate": 6.112698407290158e-07, "loss": 0.6234, "step": 13349 }, { "epoch": 0.85, "grad_norm": 0.8939769864082336, "learning_rate": 6.107783526883809e-07, "loss": 0.601, "step": 13350 }, { "epoch": 0.85, "grad_norm": 0.8392643332481384, "learning_rate": 6.102870494626006e-07, "loss": 0.548, "step": 13351 }, { "epoch": 0.85, "grad_norm": 0.9490659236907959, "learning_rate": 6.097959310723633e-07, "loss": 0.6169, "step": 13352 }, { "epoch": 0.85, "grad_norm": 0.8439939618110657, "learning_rate": 6.093049975383458e-07, "loss": 0.5877, "step": 13353 }, { "epoch": 0.85, "grad_norm": 0.9144013524055481, "learning_rate": 6.08814248881221e-07, "loss": 0.5566, "step": 13354 }, { "epoch": 0.85, "grad_norm": 0.9243726134300232, "learning_rate": 6.083236851216517e-07, "loss": 0.5481, "step": 13355 }, { "epoch": 0.85, "grad_norm": 0.9642614722251892, "learning_rate": 6.078333062802949e-07, "loss": 0.6463, "step": 13356 }, { "epoch": 0.85, "grad_norm": 0.959270715713501, "learning_rate": 6.073431123777984e-07, "loss": 0.6111, "step": 13357 }, { "epoch": 0.85, "grad_norm": 0.894008219242096, "learning_rate": 6.068531034348035e-07, "loss": 0.5951, "step": 13358 }, { "epoch": 0.85, "grad_norm": 0.8639335632324219, "learning_rate": 6.063632794719399e-07, "loss": 0.5667, "step": 13359 }, { "epoch": 0.85, "grad_norm": 0.9120550751686096, "learning_rate": 6.058736405098359e-07, "loss": 0.6197, "step": 13360 }, { "epoch": 0.85, "grad_norm": 0.9336058497428894, "learning_rate": 6.053841865691063e-07, "loss": 0.6587, "step": 13361 }, { "epoch": 0.85, "grad_norm": 0.8768007159233093, "learning_rate": 6.048949176703606e-07, "loss": 0.5653, "step": 13362 }, { "epoch": 0.85, "grad_norm": 0.8573430180549622, "learning_rate": 6.044058338342002e-07, "loss": 0.5534, "step": 13363 }, { "epoch": 0.85, "grad_norm": 0.8514514565467834, "learning_rate": 6.039169350812191e-07, "loss": 0.587, "step": 13364 }, { "epoch": 0.85, "grad_norm": 0.9030587673187256, "learning_rate": 6.034282214320031e-07, "loss": 0.5804, "step": 13365 }, { "epoch": 0.85, "grad_norm": 0.9698714017868042, "learning_rate": 6.029396929071313e-07, "loss": 0.6393, "step": 13366 }, { "epoch": 0.85, "grad_norm": 0.9271089434623718, "learning_rate": 6.024513495271705e-07, "loss": 0.5814, "step": 13367 }, { "epoch": 0.85, "grad_norm": 0.8566939234733582, "learning_rate": 6.019631913126877e-07, "loss": 0.4997, "step": 13368 }, { "epoch": 0.85, "grad_norm": 0.9297276139259338, "learning_rate": 6.014752182842343e-07, "loss": 0.5583, "step": 13369 }, { "epoch": 0.85, "grad_norm": 0.8690567016601562, "learning_rate": 6.009874304623576e-07, "loss": 0.5628, "step": 13370 }, { "epoch": 0.85, "grad_norm": 0.8401360511779785, "learning_rate": 6.004998278675988e-07, "loss": 0.5212, "step": 13371 }, { "epoch": 0.85, "grad_norm": 0.9174624681472778, "learning_rate": 6.000124105204847e-07, "loss": 0.5717, "step": 13372 }, { "epoch": 0.85, "grad_norm": 0.9405276775360107, "learning_rate": 5.995251784415435e-07, "loss": 0.5852, "step": 13373 }, { "epoch": 0.85, "grad_norm": 0.8843702673912048, "learning_rate": 5.990381316512894e-07, "loss": 0.5789, "step": 13374 }, { "epoch": 0.85, "grad_norm": 0.9235939383506775, "learning_rate": 5.985512701702284e-07, "loss": 0.5835, "step": 13375 }, { "epoch": 0.85, "grad_norm": 0.8629280924797058, "learning_rate": 5.980645940188623e-07, "loss": 0.5666, "step": 13376 }, { "epoch": 0.85, "grad_norm": 0.9397252202033997, "learning_rate": 5.975781032176831e-07, "loss": 0.5569, "step": 13377 }, { "epoch": 0.85, "grad_norm": 0.937901496887207, "learning_rate": 5.970917977871749e-07, "loss": 0.5956, "step": 13378 }, { "epoch": 0.85, "grad_norm": 0.87679123878479, "learning_rate": 5.966056777478152e-07, "loss": 0.5403, "step": 13379 }, { "epoch": 0.85, "grad_norm": 0.9953281283378601, "learning_rate": 5.961197431200705e-07, "loss": 0.5968, "step": 13380 }, { "epoch": 0.85, "grad_norm": 0.9243939518928528, "learning_rate": 5.956339939244044e-07, "loss": 0.5376, "step": 13381 }, { "epoch": 0.85, "grad_norm": 0.8928592205047607, "learning_rate": 5.951484301812699e-07, "loss": 0.5919, "step": 13382 }, { "epoch": 0.85, "grad_norm": 0.9274805188179016, "learning_rate": 5.946630519111107e-07, "loss": 0.5901, "step": 13383 }, { "epoch": 0.85, "grad_norm": 0.9007667303085327, "learning_rate": 5.941778591343656e-07, "loss": 0.5883, "step": 13384 }, { "epoch": 0.85, "grad_norm": 0.8749024868011475, "learning_rate": 5.936928518714641e-07, "loss": 0.5715, "step": 13385 }, { "epoch": 0.85, "grad_norm": 0.8843820691108704, "learning_rate": 5.932080301428278e-07, "loss": 0.5757, "step": 13386 }, { "epoch": 0.85, "grad_norm": 0.88556969165802, "learning_rate": 5.927233939688714e-07, "loss": 0.6128, "step": 13387 }, { "epoch": 0.85, "grad_norm": 0.8423168659210205, "learning_rate": 5.922389433700021e-07, "loss": 0.5531, "step": 13388 }, { "epoch": 0.85, "grad_norm": 0.8334605693817139, "learning_rate": 5.917546783666156e-07, "loss": 0.5426, "step": 13389 }, { "epoch": 0.85, "grad_norm": 0.9283615946769714, "learning_rate": 5.912705989791062e-07, "loss": 0.5903, "step": 13390 }, { "epoch": 0.85, "grad_norm": 0.8497453927993774, "learning_rate": 5.907867052278543e-07, "loss": 0.5435, "step": 13391 }, { "epoch": 0.85, "grad_norm": 0.9120060205459595, "learning_rate": 5.903029971332353e-07, "loss": 0.6199, "step": 13392 }, { "epoch": 0.85, "grad_norm": 0.912470817565918, "learning_rate": 5.898194747156171e-07, "loss": 0.6023, "step": 13393 }, { "epoch": 0.85, "grad_norm": 0.9185166358947754, "learning_rate": 5.893361379953588e-07, "loss": 0.5856, "step": 13394 }, { "epoch": 0.85, "grad_norm": 0.9453598260879517, "learning_rate": 5.888529869928122e-07, "loss": 0.6039, "step": 13395 }, { "epoch": 0.85, "grad_norm": 0.9415664672851562, "learning_rate": 5.883700217283223e-07, "loss": 0.5434, "step": 13396 }, { "epoch": 0.85, "grad_norm": 0.9080526232719421, "learning_rate": 5.878872422222215e-07, "loss": 0.5224, "step": 13397 }, { "epoch": 0.85, "grad_norm": 0.9032747745513916, "learning_rate": 5.874046484948426e-07, "loss": 0.6058, "step": 13398 }, { "epoch": 0.85, "grad_norm": 0.9231809377670288, "learning_rate": 5.869222405665026e-07, "loss": 0.5567, "step": 13399 }, { "epoch": 0.85, "grad_norm": 0.9746513366699219, "learning_rate": 5.864400184575153e-07, "loss": 0.5836, "step": 13400 }, { "epoch": 0.85, "grad_norm": 0.927158534526825, "learning_rate": 5.859579821881855e-07, "loss": 0.6028, "step": 13401 }, { "epoch": 0.85, "grad_norm": 0.8797077536582947, "learning_rate": 5.854761317788082e-07, "loss": 0.5583, "step": 13402 }, { "epoch": 0.85, "grad_norm": 0.8659250736236572, "learning_rate": 5.849944672496749e-07, "loss": 0.6296, "step": 13403 }, { "epoch": 0.85, "grad_norm": 0.9689622521400452, "learning_rate": 5.845129886210671e-07, "loss": 0.6281, "step": 13404 }, { "epoch": 0.85, "grad_norm": 0.8522788286209106, "learning_rate": 5.840316959132558e-07, "loss": 0.5162, "step": 13405 }, { "epoch": 0.85, "grad_norm": 0.904559850692749, "learning_rate": 5.835505891465076e-07, "loss": 0.5924, "step": 13406 }, { "epoch": 0.85, "grad_norm": 0.8650006651878357, "learning_rate": 5.830696683410802e-07, "loss": 0.5552, "step": 13407 }, { "epoch": 0.85, "grad_norm": 0.8110765218734741, "learning_rate": 5.825889335172241e-07, "loss": 0.5428, "step": 13408 }, { "epoch": 0.85, "grad_norm": 0.8690059185028076, "learning_rate": 5.821083846951819e-07, "loss": 0.6166, "step": 13409 }, { "epoch": 0.85, "grad_norm": 0.8721504807472229, "learning_rate": 5.816280218951847e-07, "loss": 0.5206, "step": 13410 }, { "epoch": 0.85, "grad_norm": 0.8614574074745178, "learning_rate": 5.811478451374625e-07, "loss": 0.563, "step": 13411 }, { "epoch": 0.85, "grad_norm": 0.9383098483085632, "learning_rate": 5.806678544422334e-07, "loss": 0.6108, "step": 13412 }, { "epoch": 0.85, "grad_norm": 0.8840879201889038, "learning_rate": 5.801880498297057e-07, "loss": 0.57, "step": 13413 }, { "epoch": 0.85, "grad_norm": 0.8915720582008362, "learning_rate": 5.797084313200846e-07, "loss": 0.5684, "step": 13414 }, { "epoch": 0.85, "grad_norm": 0.8662636876106262, "learning_rate": 5.792289989335637e-07, "loss": 0.6, "step": 13415 }, { "epoch": 0.85, "grad_norm": 0.9727985858917236, "learning_rate": 5.787497526903313e-07, "loss": 0.6059, "step": 13416 }, { "epoch": 0.85, "grad_norm": 0.8884052634239197, "learning_rate": 5.782706926105674e-07, "loss": 0.6134, "step": 13417 }, { "epoch": 0.85, "grad_norm": 0.8743575215339661, "learning_rate": 5.777918187144416e-07, "loss": 0.5764, "step": 13418 }, { "epoch": 0.85, "grad_norm": 0.9156510233879089, "learning_rate": 5.773131310221169e-07, "loss": 0.6167, "step": 13419 }, { "epoch": 0.85, "grad_norm": 0.898995578289032, "learning_rate": 5.768346295537536e-07, "loss": 0.6059, "step": 13420 }, { "epoch": 0.85, "grad_norm": 0.8932662606239319, "learning_rate": 5.76356314329496e-07, "loss": 0.6145, "step": 13421 }, { "epoch": 0.85, "grad_norm": 0.9043698310852051, "learning_rate": 5.758781853694845e-07, "loss": 0.6164, "step": 13422 }, { "epoch": 0.85, "grad_norm": 0.8941948413848877, "learning_rate": 5.754002426938532e-07, "loss": 0.5835, "step": 13423 }, { "epoch": 0.85, "grad_norm": 0.871859073638916, "learning_rate": 5.749224863227249e-07, "loss": 0.5474, "step": 13424 }, { "epoch": 0.85, "grad_norm": 0.9060640335083008, "learning_rate": 5.744449162762183e-07, "loss": 0.5546, "step": 13425 }, { "epoch": 0.85, "grad_norm": 0.9025922417640686, "learning_rate": 5.739675325744398e-07, "loss": 0.5765, "step": 13426 }, { "epoch": 0.85, "grad_norm": 0.9106086492538452, "learning_rate": 5.734903352374904e-07, "loss": 0.5614, "step": 13427 }, { "epoch": 0.85, "grad_norm": 0.8653062582015991, "learning_rate": 5.730133242854663e-07, "loss": 0.5451, "step": 13428 }, { "epoch": 0.85, "grad_norm": 0.8670951724052429, "learning_rate": 5.725364997384498e-07, "loss": 0.5791, "step": 13429 }, { "epoch": 0.85, "grad_norm": 0.8815758228302002, "learning_rate": 5.720598616165196e-07, "loss": 0.5999, "step": 13430 }, { "epoch": 0.85, "grad_norm": 0.8936425447463989, "learning_rate": 5.715834099397455e-07, "loss": 0.5746, "step": 13431 }, { "epoch": 0.85, "grad_norm": 0.8447661995887756, "learning_rate": 5.711071447281868e-07, "loss": 0.5322, "step": 13432 }, { "epoch": 0.85, "grad_norm": 0.8403939008712769, "learning_rate": 5.70631066001901e-07, "loss": 0.5512, "step": 13433 }, { "epoch": 0.85, "grad_norm": 0.8799472451210022, "learning_rate": 5.701551737809319e-07, "loss": 0.5443, "step": 13434 }, { "epoch": 0.85, "grad_norm": 0.9245263934135437, "learning_rate": 5.696794680853179e-07, "loss": 0.558, "step": 13435 }, { "epoch": 0.85, "grad_norm": 0.8561593890190125, "learning_rate": 5.692039489350892e-07, "loss": 0.5743, "step": 13436 }, { "epoch": 0.85, "grad_norm": 0.8703195452690125, "learning_rate": 5.687286163502687e-07, "loss": 0.5518, "step": 13437 }, { "epoch": 0.85, "grad_norm": 0.9124912619590759, "learning_rate": 5.682534703508713e-07, "loss": 0.5345, "step": 13438 }, { "epoch": 0.85, "grad_norm": 0.9110020995140076, "learning_rate": 5.67778510956904e-07, "loss": 0.5506, "step": 13439 }, { "epoch": 0.85, "grad_norm": 0.8271638751029968, "learning_rate": 5.673037381883634e-07, "loss": 0.4917, "step": 13440 }, { "epoch": 0.85, "grad_norm": 0.8652800917625427, "learning_rate": 5.668291520652436e-07, "loss": 0.5618, "step": 13441 }, { "epoch": 0.85, "grad_norm": 0.8255113959312439, "learning_rate": 5.663547526075258e-07, "loss": 0.5208, "step": 13442 }, { "epoch": 0.85, "grad_norm": 0.8780609965324402, "learning_rate": 5.658805398351858e-07, "loss": 0.6078, "step": 13443 }, { "epoch": 0.85, "grad_norm": 0.8495383858680725, "learning_rate": 5.654065137681907e-07, "loss": 0.5683, "step": 13444 }, { "epoch": 0.85, "grad_norm": 0.9232254028320312, "learning_rate": 5.64932674426501e-07, "loss": 0.6365, "step": 13445 }, { "epoch": 0.85, "grad_norm": 0.8610829710960388, "learning_rate": 5.644590218300672e-07, "loss": 0.5327, "step": 13446 }, { "epoch": 0.85, "grad_norm": 0.8897087574005127, "learning_rate": 5.639855559988356e-07, "loss": 0.5343, "step": 13447 }, { "epoch": 0.85, "grad_norm": 0.867492139339447, "learning_rate": 5.63512276952739e-07, "loss": 0.6033, "step": 13448 }, { "epoch": 0.85, "grad_norm": 0.9001726508140564, "learning_rate": 5.630391847117073e-07, "loss": 0.6116, "step": 13449 }, { "epoch": 0.85, "grad_norm": 0.9186358451843262, "learning_rate": 5.625662792956604e-07, "loss": 0.5888, "step": 13450 }, { "epoch": 0.85, "grad_norm": 0.8732519745826721, "learning_rate": 5.620935607245109e-07, "loss": 0.5883, "step": 13451 }, { "epoch": 0.85, "grad_norm": 0.8616448044776917, "learning_rate": 5.616210290181628e-07, "loss": 0.5351, "step": 13452 }, { "epoch": 0.85, "grad_norm": 0.9044156074523926, "learning_rate": 5.611486841965136e-07, "loss": 0.5497, "step": 13453 }, { "epoch": 0.85, "grad_norm": 0.8665462732315063, "learning_rate": 5.606765262794512e-07, "loss": 0.5753, "step": 13454 }, { "epoch": 0.85, "grad_norm": 0.9290836453437805, "learning_rate": 5.602045552868585e-07, "loss": 0.5808, "step": 13455 }, { "epoch": 0.85, "grad_norm": 0.8570681810379028, "learning_rate": 5.597327712386058e-07, "loss": 0.6147, "step": 13456 }, { "epoch": 0.85, "grad_norm": 0.8335081338882446, "learning_rate": 5.592611741545594e-07, "loss": 0.5146, "step": 13457 }, { "epoch": 0.85, "grad_norm": 0.8507091999053955, "learning_rate": 5.58789764054577e-07, "loss": 0.5839, "step": 13458 }, { "epoch": 0.85, "grad_norm": 0.8704282641410828, "learning_rate": 5.583185409585079e-07, "loss": 0.5506, "step": 13459 }, { "epoch": 0.85, "grad_norm": 0.9266949892044067, "learning_rate": 5.578475048861931e-07, "loss": 0.5791, "step": 13460 }, { "epoch": 0.85, "grad_norm": 0.8802145719528198, "learning_rate": 5.573766558574684e-07, "loss": 0.5466, "step": 13461 }, { "epoch": 0.85, "grad_norm": 0.7950432300567627, "learning_rate": 5.569059938921551e-07, "loss": 0.5532, "step": 13462 }, { "epoch": 0.85, "grad_norm": 0.850308895111084, "learning_rate": 5.564355190100768e-07, "loss": 0.5333, "step": 13463 }, { "epoch": 0.85, "grad_norm": 0.8470205664634705, "learning_rate": 5.559652312310393e-07, "loss": 0.5449, "step": 13464 }, { "epoch": 0.85, "grad_norm": 0.8567230701446533, "learning_rate": 5.554951305748462e-07, "loss": 0.5504, "step": 13465 }, { "epoch": 0.85, "grad_norm": 0.8885741829872131, "learning_rate": 5.550252170612924e-07, "loss": 0.5441, "step": 13466 }, { "epoch": 0.85, "grad_norm": 0.9018322229385376, "learning_rate": 5.545554907101636e-07, "loss": 0.5781, "step": 13467 }, { "epoch": 0.85, "grad_norm": 0.8181560039520264, "learning_rate": 5.540859515412378e-07, "loss": 0.5483, "step": 13468 }, { "epoch": 0.85, "grad_norm": 0.8753595352172852, "learning_rate": 5.536165995742882e-07, "loss": 0.5315, "step": 13469 }, { "epoch": 0.85, "grad_norm": 0.8592386841773987, "learning_rate": 5.531474348290733e-07, "loss": 0.5426, "step": 13470 }, { "epoch": 0.85, "grad_norm": 0.8794154524803162, "learning_rate": 5.526784573253525e-07, "loss": 0.5856, "step": 13471 }, { "epoch": 0.85, "grad_norm": 0.9070557951927185, "learning_rate": 5.522096670828703e-07, "loss": 0.5833, "step": 13472 }, { "epoch": 0.85, "grad_norm": 0.8681169152259827, "learning_rate": 5.517410641213656e-07, "loss": 0.5704, "step": 13473 }, { "epoch": 0.85, "grad_norm": 0.8716253042221069, "learning_rate": 5.512726484605707e-07, "loss": 0.557, "step": 13474 }, { "epoch": 0.85, "grad_norm": 0.8904623985290527, "learning_rate": 5.508044201202084e-07, "loss": 0.5565, "step": 13475 }, { "epoch": 0.85, "grad_norm": 0.842241644859314, "learning_rate": 5.503363791199945e-07, "loss": 0.526, "step": 13476 }, { "epoch": 0.85, "grad_norm": 0.8667955994606018, "learning_rate": 5.49868525479637e-07, "loss": 0.6341, "step": 13477 }, { "epoch": 0.85, "grad_norm": 0.8526463508605957, "learning_rate": 5.494008592188344e-07, "loss": 0.527, "step": 13478 }, { "epoch": 0.85, "grad_norm": 0.8465002775192261, "learning_rate": 5.489333803572788e-07, "loss": 0.5513, "step": 13479 }, { "epoch": 0.85, "grad_norm": 0.802689254283905, "learning_rate": 5.484660889146548e-07, "loss": 0.5247, "step": 13480 }, { "epoch": 0.85, "grad_norm": 0.9201193451881409, "learning_rate": 5.479989849106381e-07, "loss": 0.5893, "step": 13481 }, { "epoch": 0.85, "grad_norm": 0.8439991474151611, "learning_rate": 5.475320683648977e-07, "loss": 0.5606, "step": 13482 }, { "epoch": 0.85, "grad_norm": 0.8193072080612183, "learning_rate": 5.470653392970904e-07, "loss": 0.5555, "step": 13483 }, { "epoch": 0.85, "grad_norm": 0.876397967338562, "learning_rate": 5.465987977268727e-07, "loss": 0.5745, "step": 13484 }, { "epoch": 0.85, "grad_norm": 0.9382455348968506, "learning_rate": 5.46132443673888e-07, "loss": 0.6023, "step": 13485 }, { "epoch": 0.85, "grad_norm": 0.9134024977684021, "learning_rate": 5.456662771577714e-07, "loss": 0.56, "step": 13486 }, { "epoch": 0.85, "grad_norm": 0.9391716718673706, "learning_rate": 5.452002981981519e-07, "loss": 0.6151, "step": 13487 }, { "epoch": 0.85, "grad_norm": 0.9424962401390076, "learning_rate": 5.447345068146515e-07, "loss": 0.6357, "step": 13488 }, { "epoch": 0.85, "grad_norm": 0.8668440580368042, "learning_rate": 5.442689030268816e-07, "loss": 0.5539, "step": 13489 }, { "epoch": 0.85, "grad_norm": 0.8978198170661926, "learning_rate": 5.438034868544495e-07, "loss": 0.6061, "step": 13490 }, { "epoch": 0.85, "grad_norm": 0.8872178196907043, "learning_rate": 5.433382583169478e-07, "loss": 0.5772, "step": 13491 }, { "epoch": 0.85, "grad_norm": 0.9086841344833374, "learning_rate": 5.428732174339702e-07, "loss": 0.584, "step": 13492 }, { "epoch": 0.85, "grad_norm": 0.8926877975463867, "learning_rate": 5.424083642250966e-07, "loss": 0.5608, "step": 13493 }, { "epoch": 0.85, "grad_norm": 0.9636724591255188, "learning_rate": 5.419436987098991e-07, "loss": 0.6392, "step": 13494 }, { "epoch": 0.85, "grad_norm": 0.8971894383430481, "learning_rate": 5.414792209079445e-07, "loss": 0.5755, "step": 13495 }, { "epoch": 0.86, "grad_norm": 0.8618263006210327, "learning_rate": 5.410149308387891e-07, "loss": 0.6396, "step": 13496 }, { "epoch": 0.86, "grad_norm": 0.8926728963851929, "learning_rate": 5.405508285219835e-07, "loss": 0.5824, "step": 13497 }, { "epoch": 0.86, "grad_norm": 0.8346815705299377, "learning_rate": 5.400869139770704e-07, "loss": 0.5066, "step": 13498 }, { "epoch": 0.86, "grad_norm": 0.9091081023216248, "learning_rate": 5.396231872235819e-07, "loss": 0.5603, "step": 13499 }, { "epoch": 0.86, "grad_norm": 0.9015220403671265, "learning_rate": 5.391596482810424e-07, "loss": 0.5675, "step": 13500 }, { "epoch": 0.86, "grad_norm": 0.9150410294532776, "learning_rate": 5.386962971689746e-07, "loss": 0.5732, "step": 13501 }, { "epoch": 0.86, "grad_norm": 0.8379479050636292, "learning_rate": 5.382331339068853e-07, "loss": 0.5747, "step": 13502 }, { "epoch": 0.86, "grad_norm": 0.8525556325912476, "learning_rate": 5.377701585142769e-07, "loss": 0.5536, "step": 13503 }, { "epoch": 0.86, "grad_norm": 0.9342008233070374, "learning_rate": 5.373073710106441e-07, "loss": 0.6113, "step": 13504 }, { "epoch": 0.86, "grad_norm": 0.9186147451400757, "learning_rate": 5.368447714154734e-07, "loss": 0.5781, "step": 13505 }, { "epoch": 0.86, "grad_norm": 0.8697748780250549, "learning_rate": 5.363823597482443e-07, "loss": 0.5869, "step": 13506 }, { "epoch": 0.86, "grad_norm": 0.8578813076019287, "learning_rate": 5.359201360284255e-07, "loss": 0.5598, "step": 13507 }, { "epoch": 0.86, "grad_norm": 0.9658546447753906, "learning_rate": 5.354581002754799e-07, "loss": 0.5963, "step": 13508 }, { "epoch": 0.86, "grad_norm": 0.8002378344535828, "learning_rate": 5.349962525088631e-07, "loss": 0.5307, "step": 13509 }, { "epoch": 0.86, "grad_norm": 0.8562396764755249, "learning_rate": 5.345345927480211e-07, "loss": 0.5668, "step": 13510 }, { "epoch": 0.86, "grad_norm": 0.8851287961006165, "learning_rate": 5.340731210123934e-07, "loss": 0.5659, "step": 13511 }, { "epoch": 0.86, "grad_norm": 0.9325246214866638, "learning_rate": 5.336118373214116e-07, "loss": 0.6068, "step": 13512 }, { "epoch": 0.86, "grad_norm": 0.9005350470542908, "learning_rate": 5.331507416944965e-07, "loss": 0.5734, "step": 13513 }, { "epoch": 0.86, "grad_norm": 0.915073573589325, "learning_rate": 5.326898341510655e-07, "loss": 0.6254, "step": 13514 }, { "epoch": 0.86, "grad_norm": 0.8757150173187256, "learning_rate": 5.322291147105246e-07, "loss": 0.5644, "step": 13515 }, { "epoch": 0.86, "grad_norm": 0.8947983384132385, "learning_rate": 5.317685833922737e-07, "loss": 0.6423, "step": 13516 }, { "epoch": 0.86, "grad_norm": 0.941947877407074, "learning_rate": 5.313082402157039e-07, "loss": 0.5757, "step": 13517 }, { "epoch": 0.86, "grad_norm": 0.8160790801048279, "learning_rate": 5.308480852001979e-07, "loss": 0.5549, "step": 13518 }, { "epoch": 0.86, "grad_norm": 0.8949527740478516, "learning_rate": 5.303881183651327e-07, "loss": 0.5, "step": 13519 }, { "epoch": 0.86, "grad_norm": 0.9550206065177917, "learning_rate": 5.29928339729876e-07, "loss": 0.6233, "step": 13520 }, { "epoch": 0.86, "grad_norm": 0.8570389747619629, "learning_rate": 5.294687493137845e-07, "loss": 0.5913, "step": 13521 }, { "epoch": 0.86, "grad_norm": 0.8469735980033875, "learning_rate": 5.290093471362145e-07, "loss": 0.5931, "step": 13522 }, { "epoch": 0.86, "grad_norm": 0.8493378162384033, "learning_rate": 5.28550133216506e-07, "loss": 0.608, "step": 13523 }, { "epoch": 0.86, "grad_norm": 0.8677387237548828, "learning_rate": 5.28091107573997e-07, "loss": 0.5272, "step": 13524 }, { "epoch": 0.86, "grad_norm": 0.8398542404174805, "learning_rate": 5.27632270228014e-07, "loss": 0.5557, "step": 13525 }, { "epoch": 0.86, "grad_norm": 0.9466037154197693, "learning_rate": 5.271736211978784e-07, "loss": 0.5509, "step": 13526 }, { "epoch": 0.86, "grad_norm": 0.9450697302818298, "learning_rate": 5.267151605029014e-07, "loss": 0.5604, "step": 13527 }, { "epoch": 0.86, "grad_norm": 0.8523156046867371, "learning_rate": 5.262568881623892e-07, "loss": 0.5634, "step": 13528 }, { "epoch": 0.86, "grad_norm": 0.8883264660835266, "learning_rate": 5.257988041956347e-07, "loss": 0.543, "step": 13529 }, { "epoch": 0.86, "grad_norm": 0.9195562601089478, "learning_rate": 5.253409086219274e-07, "loss": 0.6409, "step": 13530 }, { "epoch": 0.86, "grad_norm": 0.8769651055335999, "learning_rate": 5.248832014605503e-07, "loss": 0.542, "step": 13531 }, { "epoch": 0.86, "grad_norm": 0.9406867623329163, "learning_rate": 5.244256827307726e-07, "loss": 0.6345, "step": 13532 }, { "epoch": 0.86, "grad_norm": 0.811181366443634, "learning_rate": 5.239683524518596e-07, "loss": 0.5724, "step": 13533 }, { "epoch": 0.86, "grad_norm": 0.9315853714942932, "learning_rate": 5.23511210643069e-07, "loss": 0.5907, "step": 13534 }, { "epoch": 0.86, "grad_norm": 0.852668821811676, "learning_rate": 5.230542573236485e-07, "loss": 0.5694, "step": 13535 }, { "epoch": 0.86, "grad_norm": 0.8345797657966614, "learning_rate": 5.225974925128402e-07, "loss": 0.5513, "step": 13536 }, { "epoch": 0.86, "grad_norm": 0.9633619785308838, "learning_rate": 5.221409162298741e-07, "loss": 0.607, "step": 13537 }, { "epoch": 0.86, "grad_norm": 0.8628314733505249, "learning_rate": 5.216845284939764e-07, "loss": 0.5985, "step": 13538 }, { "epoch": 0.86, "grad_norm": 0.8650707602500916, "learning_rate": 5.212283293243658e-07, "loss": 0.5438, "step": 13539 }, { "epoch": 0.86, "grad_norm": 0.8653766512870789, "learning_rate": 5.207723187402491e-07, "loss": 0.521, "step": 13540 }, { "epoch": 0.86, "grad_norm": 0.9244462847709656, "learning_rate": 5.203164967608282e-07, "loss": 0.6332, "step": 13541 }, { "epoch": 0.86, "grad_norm": 0.9744123816490173, "learning_rate": 5.198608634052965e-07, "loss": 0.6545, "step": 13542 }, { "epoch": 0.86, "grad_norm": 0.8849944472312927, "learning_rate": 5.194054186928365e-07, "loss": 0.5575, "step": 13543 }, { "epoch": 0.86, "grad_norm": 0.8665662407875061, "learning_rate": 5.189501626426297e-07, "loss": 0.5634, "step": 13544 }, { "epoch": 0.86, "grad_norm": 0.9209324717521667, "learning_rate": 5.184950952738421e-07, "loss": 0.6306, "step": 13545 }, { "epoch": 0.86, "grad_norm": 0.86234050989151, "learning_rate": 5.180402166056359e-07, "loss": 0.5072, "step": 13546 }, { "epoch": 0.86, "grad_norm": 0.9226478338241577, "learning_rate": 5.175855266571644e-07, "loss": 0.6141, "step": 13547 }, { "epoch": 0.86, "grad_norm": 0.9241039752960205, "learning_rate": 5.171310254475737e-07, "loss": 0.5793, "step": 13548 }, { "epoch": 0.86, "grad_norm": 0.8989474177360535, "learning_rate": 5.166767129960004e-07, "loss": 0.5708, "step": 13549 }, { "epoch": 0.86, "grad_norm": 0.8441492915153503, "learning_rate": 5.162225893215755e-07, "loss": 0.5564, "step": 13550 }, { "epoch": 0.86, "grad_norm": 0.8294525742530823, "learning_rate": 5.157686544434176e-07, "loss": 0.553, "step": 13551 }, { "epoch": 0.86, "grad_norm": 0.8823322057723999, "learning_rate": 5.153149083806436e-07, "loss": 0.5434, "step": 13552 }, { "epoch": 0.86, "grad_norm": 0.9079649448394775, "learning_rate": 5.14861351152357e-07, "loss": 0.6156, "step": 13553 }, { "epoch": 0.86, "grad_norm": 0.8697636723518372, "learning_rate": 5.144079827776566e-07, "loss": 0.5319, "step": 13554 }, { "epoch": 0.86, "grad_norm": 0.8235500454902649, "learning_rate": 5.139548032756325e-07, "loss": 0.5539, "step": 13555 }, { "epoch": 0.86, "grad_norm": 0.8555493354797363, "learning_rate": 5.13501812665364e-07, "loss": 0.5011, "step": 13556 }, { "epoch": 0.86, "grad_norm": 0.8816463351249695, "learning_rate": 5.130490109659275e-07, "loss": 0.5324, "step": 13557 }, { "epoch": 0.86, "grad_norm": 0.8975476622581482, "learning_rate": 5.125963981963894e-07, "loss": 0.5839, "step": 13558 }, { "epoch": 0.86, "grad_norm": 0.8355741500854492, "learning_rate": 5.12143974375805e-07, "loss": 0.5476, "step": 13559 }, { "epoch": 0.86, "grad_norm": 0.8712900876998901, "learning_rate": 5.116917395232262e-07, "loss": 0.6212, "step": 13560 }, { "epoch": 0.86, "grad_norm": 0.8691787123680115, "learning_rate": 5.112396936576947e-07, "loss": 0.5257, "step": 13561 }, { "epoch": 0.86, "grad_norm": 0.860202968120575, "learning_rate": 5.107878367982438e-07, "loss": 0.5328, "step": 13562 }, { "epoch": 0.86, "grad_norm": 0.9462293386459351, "learning_rate": 5.103361689639019e-07, "loss": 0.6081, "step": 13563 }, { "epoch": 0.86, "grad_norm": 0.9243309497833252, "learning_rate": 5.098846901736832e-07, "loss": 0.5952, "step": 13564 }, { "epoch": 0.86, "grad_norm": 0.8597437739372253, "learning_rate": 5.094334004466012e-07, "loss": 0.6039, "step": 13565 }, { "epoch": 0.86, "grad_norm": 0.9059598445892334, "learning_rate": 5.089822998016586e-07, "loss": 0.5546, "step": 13566 }, { "epoch": 0.86, "grad_norm": 0.9531145691871643, "learning_rate": 5.085313882578469e-07, "loss": 0.5446, "step": 13567 }, { "epoch": 0.86, "grad_norm": 0.923179566860199, "learning_rate": 5.080806658341536e-07, "loss": 0.5803, "step": 13568 }, { "epoch": 0.86, "grad_norm": 0.9919398427009583, "learning_rate": 5.076301325495575e-07, "loss": 0.6093, "step": 13569 }, { "epoch": 0.86, "grad_norm": 0.8127473592758179, "learning_rate": 5.071797884230284e-07, "loss": 0.528, "step": 13570 }, { "epoch": 0.86, "grad_norm": 0.9124990701675415, "learning_rate": 5.067296334735306e-07, "loss": 0.6227, "step": 13571 }, { "epoch": 0.86, "grad_norm": 0.9629392623901367, "learning_rate": 5.062796677200154e-07, "loss": 0.613, "step": 13572 }, { "epoch": 0.86, "grad_norm": 0.921553373336792, "learning_rate": 5.058298911814302e-07, "loss": 0.5961, "step": 13573 }, { "epoch": 0.86, "grad_norm": 0.9422236680984497, "learning_rate": 5.053803038767158e-07, "loss": 0.5932, "step": 13574 }, { "epoch": 0.86, "grad_norm": 0.8603041172027588, "learning_rate": 5.049309058248004e-07, "loss": 0.528, "step": 13575 }, { "epoch": 0.86, "grad_norm": 0.8307815790176392, "learning_rate": 5.044816970446076e-07, "loss": 0.5176, "step": 13576 }, { "epoch": 0.86, "grad_norm": 0.8835110068321228, "learning_rate": 5.040326775550514e-07, "loss": 0.5863, "step": 13577 }, { "epoch": 0.86, "grad_norm": 0.9652464985847473, "learning_rate": 5.035838473750393e-07, "loss": 0.5984, "step": 13578 }, { "epoch": 0.86, "grad_norm": 0.8423542380332947, "learning_rate": 5.031352065234702e-07, "loss": 0.5387, "step": 13579 }, { "epoch": 0.86, "grad_norm": 0.8693512678146362, "learning_rate": 5.026867550192327e-07, "loss": 0.5339, "step": 13580 }, { "epoch": 0.86, "grad_norm": 0.9612827301025391, "learning_rate": 5.022384928812107e-07, "loss": 0.5946, "step": 13581 }, { "epoch": 0.86, "grad_norm": 0.8645419478416443, "learning_rate": 5.017904201282808e-07, "loss": 0.5286, "step": 13582 }, { "epoch": 0.86, "grad_norm": 0.875821053981781, "learning_rate": 5.013425367793074e-07, "loss": 0.5546, "step": 13583 }, { "epoch": 0.86, "grad_norm": 0.8827986121177673, "learning_rate": 5.008948428531496e-07, "loss": 0.5512, "step": 13584 }, { "epoch": 0.86, "grad_norm": 0.8592386245727539, "learning_rate": 5.004473383686592e-07, "loss": 0.5975, "step": 13585 }, { "epoch": 0.86, "grad_norm": 0.9252444505691528, "learning_rate": 5.000000233446783e-07, "loss": 0.5423, "step": 13586 }, { "epoch": 0.86, "grad_norm": 0.893185019493103, "learning_rate": 4.99552897800043e-07, "loss": 0.5593, "step": 13587 }, { "epoch": 0.86, "grad_norm": 0.8774006366729736, "learning_rate": 4.991059617535781e-07, "loss": 0.5636, "step": 13588 }, { "epoch": 0.86, "grad_norm": 0.8796536922454834, "learning_rate": 4.986592152241043e-07, "loss": 0.6007, "step": 13589 }, { "epoch": 0.86, "grad_norm": 0.8507401943206787, "learning_rate": 4.982126582304314e-07, "loss": 0.5618, "step": 13590 }, { "epoch": 0.86, "grad_norm": 0.9354941248893738, "learning_rate": 4.977662907913633e-07, "loss": 0.6212, "step": 13591 }, { "epoch": 0.86, "grad_norm": 0.8648061156272888, "learning_rate": 4.973201129256943e-07, "loss": 0.5562, "step": 13592 }, { "epoch": 0.86, "grad_norm": 0.9033337831497192, "learning_rate": 4.968741246522129e-07, "loss": 0.6071, "step": 13593 }, { "epoch": 0.86, "grad_norm": 0.9331035017967224, "learning_rate": 4.964283259896945e-07, "loss": 0.568, "step": 13594 }, { "epoch": 0.86, "grad_norm": 0.903471052646637, "learning_rate": 4.959827169569136e-07, "loss": 0.542, "step": 13595 }, { "epoch": 0.86, "grad_norm": 0.8744809627532959, "learning_rate": 4.955372975726336e-07, "loss": 0.5319, "step": 13596 }, { "epoch": 0.86, "grad_norm": 0.9031259417533875, "learning_rate": 4.950920678556065e-07, "loss": 0.5862, "step": 13597 }, { "epoch": 0.86, "grad_norm": 0.836344838142395, "learning_rate": 4.946470278245813e-07, "loss": 0.5504, "step": 13598 }, { "epoch": 0.86, "grad_norm": 0.8633370995521545, "learning_rate": 4.942021774982969e-07, "loss": 0.5772, "step": 13599 }, { "epoch": 0.86, "grad_norm": 0.8724879622459412, "learning_rate": 4.937575168954845e-07, "loss": 0.5347, "step": 13600 }, { "epoch": 0.86, "grad_norm": 0.9412771463394165, "learning_rate": 4.933130460348673e-07, "loss": 0.5512, "step": 13601 }, { "epoch": 0.86, "grad_norm": 0.9978772401809692, "learning_rate": 4.928687649351594e-07, "loss": 0.5511, "step": 13602 }, { "epoch": 0.86, "grad_norm": 0.8989056348800659, "learning_rate": 4.924246736150679e-07, "loss": 0.5492, "step": 13603 }, { "epoch": 0.86, "grad_norm": 0.9470418095588684, "learning_rate": 4.919807720932946e-07, "loss": 0.5756, "step": 13604 }, { "epoch": 0.86, "grad_norm": 0.8301222324371338, "learning_rate": 4.915370603885272e-07, "loss": 0.5398, "step": 13605 }, { "epoch": 0.86, "grad_norm": 0.8426318764686584, "learning_rate": 4.91093538519451e-07, "loss": 0.5703, "step": 13606 }, { "epoch": 0.86, "grad_norm": 0.8601441383361816, "learning_rate": 4.906502065047403e-07, "loss": 0.5795, "step": 13607 }, { "epoch": 0.86, "grad_norm": 0.8154615163803101, "learning_rate": 4.902070643630624e-07, "loss": 0.5552, "step": 13608 }, { "epoch": 0.86, "grad_norm": 0.8969496488571167, "learning_rate": 4.89764112113078e-07, "loss": 0.5814, "step": 13609 }, { "epoch": 0.86, "grad_norm": 0.9093883633613586, "learning_rate": 4.893213497734356e-07, "loss": 0.5667, "step": 13610 }, { "epoch": 0.86, "grad_norm": 0.8988984227180481, "learning_rate": 4.888787773627785e-07, "loss": 0.5766, "step": 13611 }, { "epoch": 0.86, "grad_norm": 0.8481857180595398, "learning_rate": 4.884363948997455e-07, "loss": 0.5216, "step": 13612 }, { "epoch": 0.86, "grad_norm": 0.8880239725112915, "learning_rate": 4.879942024029599e-07, "loss": 0.5504, "step": 13613 }, { "epoch": 0.86, "grad_norm": 0.8837846517562866, "learning_rate": 4.875521998910426e-07, "loss": 0.5126, "step": 13614 }, { "epoch": 0.86, "grad_norm": 0.905758798122406, "learning_rate": 4.871103873826044e-07, "loss": 0.5803, "step": 13615 }, { "epoch": 0.86, "grad_norm": 0.8283089995384216, "learning_rate": 4.866687648962487e-07, "loss": 0.6043, "step": 13616 }, { "epoch": 0.86, "grad_norm": 0.9035173654556274, "learning_rate": 4.862273324505712e-07, "loss": 0.5171, "step": 13617 }, { "epoch": 0.86, "grad_norm": 0.8479889631271362, "learning_rate": 4.857860900641576e-07, "loss": 0.5226, "step": 13618 }, { "epoch": 0.86, "grad_norm": 0.8784950375556946, "learning_rate": 4.853450377555879e-07, "loss": 0.5695, "step": 13619 }, { "epoch": 0.86, "grad_norm": 0.8352934122085571, "learning_rate": 4.849041755434336e-07, "loss": 0.548, "step": 13620 }, { "epoch": 0.86, "grad_norm": 0.8737031817436218, "learning_rate": 4.844635034462574e-07, "loss": 0.503, "step": 13621 }, { "epoch": 0.86, "grad_norm": 0.8494743704795837, "learning_rate": 4.840230214826147e-07, "loss": 0.6044, "step": 13622 }, { "epoch": 0.86, "grad_norm": 0.866535484790802, "learning_rate": 4.835827296710537e-07, "loss": 0.625, "step": 13623 }, { "epoch": 0.86, "grad_norm": 0.9683859944343567, "learning_rate": 4.831426280301105e-07, "loss": 0.6063, "step": 13624 }, { "epoch": 0.86, "grad_norm": 0.8437833189964294, "learning_rate": 4.8270271657832e-07, "loss": 0.4983, "step": 13625 }, { "epoch": 0.86, "grad_norm": 0.9570308327674866, "learning_rate": 4.822629953342028e-07, "loss": 0.5752, "step": 13626 }, { "epoch": 0.86, "grad_norm": 0.8903212547302246, "learning_rate": 4.81823464316275e-07, "loss": 0.558, "step": 13627 }, { "epoch": 0.86, "grad_norm": 0.916301429271698, "learning_rate": 4.813841235430433e-07, "loss": 0.5274, "step": 13628 }, { "epoch": 0.86, "grad_norm": 0.863028883934021, "learning_rate": 4.809449730330068e-07, "loss": 0.544, "step": 13629 }, { "epoch": 0.86, "grad_norm": 0.9283245205879211, "learning_rate": 4.805060128046574e-07, "loss": 0.5725, "step": 13630 }, { "epoch": 0.86, "grad_norm": 0.8969873189926147, "learning_rate": 4.80067242876478e-07, "loss": 0.5858, "step": 13631 }, { "epoch": 0.86, "grad_norm": 0.9229633808135986, "learning_rate": 4.796286632669417e-07, "loss": 0.5792, "step": 13632 }, { "epoch": 0.86, "grad_norm": 0.9000493288040161, "learning_rate": 4.791902739945187e-07, "loss": 0.5484, "step": 13633 }, { "epoch": 0.86, "grad_norm": 0.8788484930992126, "learning_rate": 4.787520750776658e-07, "loss": 0.5745, "step": 13634 }, { "epoch": 0.86, "grad_norm": 0.8873356580734253, "learning_rate": 4.783140665348352e-07, "loss": 0.509, "step": 13635 }, { "epoch": 0.86, "grad_norm": 0.8811357021331787, "learning_rate": 4.778762483844701e-07, "loss": 0.5573, "step": 13636 }, { "epoch": 0.86, "grad_norm": 0.8424716591835022, "learning_rate": 4.774386206450027e-07, "loss": 0.5391, "step": 13637 }, { "epoch": 0.86, "grad_norm": 0.8560691475868225, "learning_rate": 4.770011833348631e-07, "loss": 0.5524, "step": 13638 }, { "epoch": 0.86, "grad_norm": 0.8537570238113403, "learning_rate": 4.7656393647247054e-07, "loss": 0.5459, "step": 13639 }, { "epoch": 0.86, "grad_norm": 0.8615885972976685, "learning_rate": 4.7612688007623363e-07, "loss": 0.5397, "step": 13640 }, { "epoch": 0.86, "grad_norm": 0.8361106514930725, "learning_rate": 4.756900141645565e-07, "loss": 0.6052, "step": 13641 }, { "epoch": 0.86, "grad_norm": 0.8686729669570923, "learning_rate": 4.752533387558339e-07, "loss": 0.5867, "step": 13642 }, { "epoch": 0.86, "grad_norm": 0.8469032645225525, "learning_rate": 4.748168538684528e-07, "loss": 0.5686, "step": 13643 }, { "epoch": 0.86, "grad_norm": 0.9301448464393616, "learning_rate": 4.7438055952079287e-07, "loss": 0.6143, "step": 13644 }, { "epoch": 0.86, "grad_norm": 0.9038071036338806, "learning_rate": 4.739444557312223e-07, "loss": 0.5559, "step": 13645 }, { "epoch": 0.86, "grad_norm": 0.8837379813194275, "learning_rate": 4.735085425181063e-07, "loss": 0.5633, "step": 13646 }, { "epoch": 0.86, "grad_norm": 0.884790301322937, "learning_rate": 4.730728198998008e-07, "loss": 0.6254, "step": 13647 }, { "epoch": 0.86, "grad_norm": 0.918059766292572, "learning_rate": 4.726372878946489e-07, "loss": 0.5338, "step": 13648 }, { "epoch": 0.86, "grad_norm": 0.8778460621833801, "learning_rate": 4.7220194652099204e-07, "loss": 0.5891, "step": 13649 }, { "epoch": 0.86, "grad_norm": 0.9263706207275391, "learning_rate": 4.7176679579716e-07, "loss": 0.5847, "step": 13650 }, { "epoch": 0.86, "grad_norm": 0.9742307066917419, "learning_rate": 4.7133183574147534e-07, "loss": 0.6366, "step": 13651 }, { "epoch": 0.86, "grad_norm": 0.9247993230819702, "learning_rate": 4.7089706637225283e-07, "loss": 0.6062, "step": 13652 }, { "epoch": 0.86, "grad_norm": 0.8701785802841187, "learning_rate": 4.7046248770780065e-07, "loss": 0.5936, "step": 13653 }, { "epoch": 0.87, "grad_norm": 0.8334656953811646, "learning_rate": 4.7002809976641417e-07, "loss": 0.5348, "step": 13654 }, { "epoch": 0.87, "grad_norm": 0.9703954458236694, "learning_rate": 4.6959390256638703e-07, "loss": 0.5806, "step": 13655 }, { "epoch": 0.87, "grad_norm": 0.9238660335540771, "learning_rate": 4.691598961260002e-07, "loss": 0.5771, "step": 13656 }, { "epoch": 0.87, "grad_norm": 0.9725003838539124, "learning_rate": 4.68726080463528e-07, "loss": 0.6504, "step": 13657 }, { "epoch": 0.87, "grad_norm": 0.8711181879043579, "learning_rate": 4.682924555972379e-07, "loss": 0.5642, "step": 13658 }, { "epoch": 0.87, "grad_norm": 0.8927187919616699, "learning_rate": 4.6785902154538763e-07, "loss": 0.6176, "step": 13659 }, { "epoch": 0.87, "grad_norm": 0.9735706448554993, "learning_rate": 4.674257783262276e-07, "loss": 0.6128, "step": 13660 }, { "epoch": 0.87, "grad_norm": 0.9042197465896606, "learning_rate": 4.669927259580015e-07, "loss": 0.5782, "step": 13661 }, { "epoch": 0.87, "grad_norm": 0.8241575956344604, "learning_rate": 4.665598644589409e-07, "loss": 0.4985, "step": 13662 }, { "epoch": 0.87, "grad_norm": 0.8879325985908508, "learning_rate": 4.6612719384727556e-07, "loss": 0.5778, "step": 13663 }, { "epoch": 0.87, "grad_norm": 0.9175477027893066, "learning_rate": 4.656947141412205e-07, "loss": 0.5892, "step": 13664 }, { "epoch": 0.87, "grad_norm": 0.8603050112724304, "learning_rate": 4.652624253589877e-07, "loss": 0.6211, "step": 13665 }, { "epoch": 0.87, "grad_norm": 0.9173632860183716, "learning_rate": 4.6483032751877987e-07, "loss": 0.6, "step": 13666 }, { "epoch": 0.87, "grad_norm": 0.9489515423774719, "learning_rate": 4.6439842063878803e-07, "loss": 0.5417, "step": 13667 }, { "epoch": 0.87, "grad_norm": 0.8993018865585327, "learning_rate": 4.639667047372015e-07, "loss": 0.5831, "step": 13668 }, { "epoch": 0.87, "grad_norm": 0.8332312107086182, "learning_rate": 4.6353517983219856e-07, "loss": 0.5133, "step": 13669 }, { "epoch": 0.87, "grad_norm": 0.8579809069633484, "learning_rate": 4.631038459419468e-07, "loss": 0.5473, "step": 13670 }, { "epoch": 0.87, "grad_norm": 0.9015935659408569, "learning_rate": 4.6267270308460955e-07, "loss": 0.5857, "step": 13671 }, { "epoch": 0.87, "grad_norm": 0.8048023581504822, "learning_rate": 4.6224175127834057e-07, "loss": 0.5637, "step": 13672 }, { "epoch": 0.87, "grad_norm": 0.8290963172912598, "learning_rate": 4.61810990541286e-07, "loss": 0.5104, "step": 13673 }, { "epoch": 0.87, "grad_norm": 0.8655577301979065, "learning_rate": 4.61380420891584e-07, "loss": 0.5858, "step": 13674 }, { "epoch": 0.87, "grad_norm": 0.9205370545387268, "learning_rate": 4.6095004234736175e-07, "loss": 0.6064, "step": 13675 }, { "epoch": 0.87, "grad_norm": 0.8846642374992371, "learning_rate": 4.6051985492674425e-07, "loss": 0.5652, "step": 13676 }, { "epoch": 0.87, "grad_norm": 0.8984456062316895, "learning_rate": 4.6008985864784473e-07, "loss": 0.578, "step": 13677 }, { "epoch": 0.87, "grad_norm": 0.9386430382728577, "learning_rate": 4.596600535287671e-07, "loss": 0.6143, "step": 13678 }, { "epoch": 0.87, "grad_norm": 0.9357401132583618, "learning_rate": 4.592304395876102e-07, "loss": 0.5837, "step": 13679 }, { "epoch": 0.87, "grad_norm": 0.9388497471809387, "learning_rate": 4.588010168424628e-07, "loss": 0.5809, "step": 13680 }, { "epoch": 0.87, "grad_norm": 0.9037414193153381, "learning_rate": 4.5837178531140723e-07, "loss": 0.5671, "step": 13681 }, { "epoch": 0.87, "grad_norm": 0.9199149012565613, "learning_rate": 4.579427450125179e-07, "loss": 0.5612, "step": 13682 }, { "epoch": 0.87, "grad_norm": 0.8453497886657715, "learning_rate": 4.5751389596385755e-07, "loss": 0.5223, "step": 13683 }, { "epoch": 0.87, "grad_norm": 0.9701248407363892, "learning_rate": 4.570852381834839e-07, "loss": 0.556, "step": 13684 }, { "epoch": 0.87, "grad_norm": 0.8935304284095764, "learning_rate": 4.5665677168944935e-07, "loss": 0.6301, "step": 13685 }, { "epoch": 0.87, "grad_norm": 0.8989062905311584, "learning_rate": 4.562284964997915e-07, "loss": 0.5626, "step": 13686 }, { "epoch": 0.87, "grad_norm": 0.9354601502418518, "learning_rate": 4.5580041263254547e-07, "loss": 0.6159, "step": 13687 }, { "epoch": 0.87, "grad_norm": 0.8538670539855957, "learning_rate": 4.553725201057363e-07, "loss": 0.5656, "step": 13688 }, { "epoch": 0.87, "grad_norm": 0.9553387761116028, "learning_rate": 4.5494481893738005e-07, "loss": 0.5389, "step": 13689 }, { "epoch": 0.87, "grad_norm": 1.0032283067703247, "learning_rate": 4.5451730914548744e-07, "loss": 0.6298, "step": 13690 }, { "epoch": 0.87, "grad_norm": 0.8711049556732178, "learning_rate": 4.540899907480578e-07, "loss": 0.5762, "step": 13691 }, { "epoch": 0.87, "grad_norm": 0.8655171990394592, "learning_rate": 4.536628637630836e-07, "loss": 0.552, "step": 13692 }, { "epoch": 0.87, "grad_norm": 0.8877602815628052, "learning_rate": 4.532359282085519e-07, "loss": 0.6132, "step": 13693 }, { "epoch": 0.87, "grad_norm": 0.8526985049247742, "learning_rate": 4.528091841024379e-07, "loss": 0.5487, "step": 13694 }, { "epoch": 0.87, "grad_norm": 0.8731285333633423, "learning_rate": 4.5238263146271053e-07, "loss": 0.5844, "step": 13695 }, { "epoch": 0.87, "grad_norm": 0.9351499676704407, "learning_rate": 4.5195627030733156e-07, "loss": 0.5862, "step": 13696 }, { "epoch": 0.87, "grad_norm": 0.8490439653396606, "learning_rate": 4.5153010065425054e-07, "loss": 0.5402, "step": 13697 }, { "epoch": 0.87, "grad_norm": 0.8659386038780212, "learning_rate": 4.511041225214158e-07, "loss": 0.561, "step": 13698 }, { "epoch": 0.87, "grad_norm": 0.8737561106681824, "learning_rate": 4.5067833592676136e-07, "loss": 0.5404, "step": 13699 }, { "epoch": 0.87, "grad_norm": 0.8905614614486694, "learning_rate": 4.502527408882157e-07, "loss": 0.5343, "step": 13700 }, { "epoch": 0.87, "grad_norm": 0.8490473031997681, "learning_rate": 4.498273374237e-07, "loss": 0.5809, "step": 13701 }, { "epoch": 0.87, "grad_norm": 0.9129199981689453, "learning_rate": 4.494021255511266e-07, "loss": 0.5969, "step": 13702 }, { "epoch": 0.87, "grad_norm": 0.9153651595115662, "learning_rate": 4.48977105288399e-07, "loss": 0.571, "step": 13703 }, { "epoch": 0.87, "grad_norm": 0.957604706287384, "learning_rate": 4.485522766534145e-07, "loss": 0.6299, "step": 13704 }, { "epoch": 0.87, "grad_norm": 0.8316980600357056, "learning_rate": 4.4812763966405825e-07, "loss": 0.5236, "step": 13705 }, { "epoch": 0.87, "grad_norm": 0.8910514712333679, "learning_rate": 4.4770319433821487e-07, "loss": 0.5742, "step": 13706 }, { "epoch": 0.87, "grad_norm": 0.904670000076294, "learning_rate": 4.472789406937522e-07, "loss": 0.5931, "step": 13707 }, { "epoch": 0.87, "grad_norm": 0.9204214215278625, "learning_rate": 4.468548787485355e-07, "loss": 0.5939, "step": 13708 }, { "epoch": 0.87, "grad_norm": 0.9537574648857117, "learning_rate": 4.4643100852042097e-07, "loss": 0.5974, "step": 13709 }, { "epoch": 0.87, "grad_norm": 0.9143358469009399, "learning_rate": 4.4600733002725547e-07, "loss": 0.5724, "step": 13710 }, { "epoch": 0.87, "grad_norm": 0.8609566688537598, "learning_rate": 4.4558384328687975e-07, "loss": 0.5714, "step": 13711 }, { "epoch": 0.87, "grad_norm": 0.8667165637016296, "learning_rate": 4.451605483171251e-07, "loss": 0.571, "step": 13712 }, { "epoch": 0.87, "grad_norm": 0.8885953426361084, "learning_rate": 4.4473744513581384e-07, "loss": 0.5167, "step": 13713 }, { "epoch": 0.87, "grad_norm": 0.8588200807571411, "learning_rate": 4.443145337607624e-07, "loss": 0.5433, "step": 13714 }, { "epoch": 0.87, "grad_norm": 0.8709940910339355, "learning_rate": 4.4389181420977814e-07, "loss": 0.6293, "step": 13715 }, { "epoch": 0.87, "grad_norm": 0.8453631401062012, "learning_rate": 4.4346928650065957e-07, "loss": 0.5408, "step": 13716 }, { "epoch": 0.87, "grad_norm": 0.8846293091773987, "learning_rate": 4.4304695065119807e-07, "loss": 0.5588, "step": 13717 }, { "epoch": 0.87, "grad_norm": 0.8700962066650391, "learning_rate": 4.4262480667917774e-07, "loss": 0.5479, "step": 13718 }, { "epoch": 0.87, "grad_norm": 0.844928503036499, "learning_rate": 4.422028546023721e-07, "loss": 0.5638, "step": 13719 }, { "epoch": 0.87, "grad_norm": 0.8891464471817017, "learning_rate": 4.4178109443855033e-07, "loss": 0.5845, "step": 13720 }, { "epoch": 0.87, "grad_norm": 0.8719486594200134, "learning_rate": 4.4135952620546876e-07, "loss": 0.5724, "step": 13721 }, { "epoch": 0.87, "grad_norm": 0.8617244958877563, "learning_rate": 4.409381499208787e-07, "loss": 0.5087, "step": 13722 }, { "epoch": 0.87, "grad_norm": 0.8584579229354858, "learning_rate": 4.405169656025238e-07, "loss": 0.5701, "step": 13723 }, { "epoch": 0.87, "grad_norm": 0.8903681635856628, "learning_rate": 4.400959732681381e-07, "loss": 0.5974, "step": 13724 }, { "epoch": 0.87, "grad_norm": 0.878350019454956, "learning_rate": 4.3967517293544814e-07, "loss": 0.5478, "step": 13725 }, { "epoch": 0.87, "grad_norm": 0.9635295271873474, "learning_rate": 4.3925456462217244e-07, "loss": 0.5976, "step": 13726 }, { "epoch": 0.87, "grad_norm": 0.897746741771698, "learning_rate": 4.3883414834602125e-07, "loss": 0.5702, "step": 13727 }, { "epoch": 0.87, "grad_norm": 0.8466120958328247, "learning_rate": 4.384139241246982e-07, "loss": 0.6266, "step": 13728 }, { "epoch": 0.87, "grad_norm": 0.9046663045883179, "learning_rate": 4.3799389197589525e-07, "loss": 0.5742, "step": 13729 }, { "epoch": 0.87, "grad_norm": 0.855974018573761, "learning_rate": 4.375740519172994e-07, "loss": 0.6135, "step": 13730 }, { "epoch": 0.87, "grad_norm": 0.8562418818473816, "learning_rate": 4.3715440396658816e-07, "loss": 0.5726, "step": 13731 }, { "epoch": 0.87, "grad_norm": 1.0470370054244995, "learning_rate": 4.3673494814143234e-07, "loss": 0.5896, "step": 13732 }, { "epoch": 0.87, "grad_norm": 0.9556792378425598, "learning_rate": 4.3631568445949403e-07, "loss": 0.5409, "step": 13733 }, { "epoch": 0.87, "grad_norm": 0.8872630000114441, "learning_rate": 4.3589661293842624e-07, "loss": 0.5565, "step": 13734 }, { "epoch": 0.87, "grad_norm": 0.9071952104568481, "learning_rate": 4.3547773359587377e-07, "loss": 0.6007, "step": 13735 }, { "epoch": 0.87, "grad_norm": 0.8532198071479797, "learning_rate": 4.350590464494764e-07, "loss": 0.521, "step": 13736 }, { "epoch": 0.87, "grad_norm": 0.8936211466789246, "learning_rate": 4.346405515168617e-07, "loss": 0.5663, "step": 13737 }, { "epoch": 0.87, "grad_norm": 0.9199041128158569, "learning_rate": 4.342222488156511e-07, "loss": 0.5873, "step": 13738 }, { "epoch": 0.87, "grad_norm": 0.8879461884498596, "learning_rate": 4.3380413836345893e-07, "loss": 0.5838, "step": 13739 }, { "epoch": 0.87, "grad_norm": 0.8619484305381775, "learning_rate": 4.333862201778899e-07, "loss": 0.5107, "step": 13740 }, { "epoch": 0.87, "grad_norm": 0.9578720927238464, "learning_rate": 4.329684942765411e-07, "loss": 0.5738, "step": 13741 }, { "epoch": 0.87, "grad_norm": 0.8812727928161621, "learning_rate": 4.3255096067700176e-07, "loss": 0.5691, "step": 13742 }, { "epoch": 0.87, "grad_norm": 0.847726047039032, "learning_rate": 4.321336193968523e-07, "loss": 0.5662, "step": 13743 }, { "epoch": 0.87, "grad_norm": 0.8409244418144226, "learning_rate": 4.3171647045366525e-07, "loss": 0.543, "step": 13744 }, { "epoch": 0.87, "grad_norm": 0.966153621673584, "learning_rate": 4.312995138650056e-07, "loss": 0.5944, "step": 13745 }, { "epoch": 0.87, "grad_norm": 0.8785676956176758, "learning_rate": 4.3088274964843027e-07, "loss": 0.5476, "step": 13746 }, { "epoch": 0.87, "grad_norm": 0.9022130370140076, "learning_rate": 4.3046617782148857e-07, "loss": 0.5837, "step": 13747 }, { "epoch": 0.87, "grad_norm": 0.9164488911628723, "learning_rate": 4.300497984017182e-07, "loss": 0.5348, "step": 13748 }, { "epoch": 0.87, "grad_norm": 0.8544109463691711, "learning_rate": 4.2963361140665405e-07, "loss": 0.5099, "step": 13749 }, { "epoch": 0.87, "grad_norm": 0.8812281489372253, "learning_rate": 4.292176168538198e-07, "loss": 0.5676, "step": 13750 }, { "epoch": 0.87, "grad_norm": 0.9989331364631653, "learning_rate": 4.2880181476073034e-07, "loss": 0.5962, "step": 13751 }, { "epoch": 0.87, "grad_norm": 0.8740145564079285, "learning_rate": 4.283862051448945e-07, "loss": 0.5772, "step": 13752 }, { "epoch": 0.87, "grad_norm": 0.9067648649215698, "learning_rate": 4.279707880238121e-07, "loss": 0.57, "step": 13753 }, { "epoch": 0.87, "grad_norm": 0.9112171530723572, "learning_rate": 4.275555634149753e-07, "loss": 0.605, "step": 13754 }, { "epoch": 0.87, "grad_norm": 0.8743265867233276, "learning_rate": 4.2714053133586785e-07, "loss": 0.5284, "step": 13755 }, { "epoch": 0.87, "grad_norm": 0.9052802324295044, "learning_rate": 4.267256918039625e-07, "loss": 0.5345, "step": 13756 }, { "epoch": 0.87, "grad_norm": 0.840216875076294, "learning_rate": 4.263110448367308e-07, "loss": 0.5121, "step": 13757 }, { "epoch": 0.87, "grad_norm": 0.8973818421363831, "learning_rate": 4.2589659045163044e-07, "loss": 0.6348, "step": 13758 }, { "epoch": 0.87, "grad_norm": 0.8948500752449036, "learning_rate": 4.254823286661125e-07, "loss": 0.5814, "step": 13759 }, { "epoch": 0.87, "grad_norm": 0.920590341091156, "learning_rate": 4.250682594976191e-07, "loss": 0.5692, "step": 13760 }, { "epoch": 0.87, "grad_norm": 0.885006844997406, "learning_rate": 4.2465438296358685e-07, "loss": 0.5802, "step": 13761 }, { "epoch": 0.87, "grad_norm": 0.847855269908905, "learning_rate": 4.2424069908144236e-07, "loss": 0.5368, "step": 13762 }, { "epoch": 0.87, "grad_norm": 0.9912233948707581, "learning_rate": 4.2382720786860453e-07, "loss": 0.5901, "step": 13763 }, { "epoch": 0.87, "grad_norm": 0.9090965986251831, "learning_rate": 4.2341390934248273e-07, "loss": 0.5469, "step": 13764 }, { "epoch": 0.87, "grad_norm": 0.8936121463775635, "learning_rate": 4.230008035204797e-07, "loss": 0.5723, "step": 13765 }, { "epoch": 0.87, "grad_norm": 0.8051524758338928, "learning_rate": 4.225878904199926e-07, "loss": 0.4853, "step": 13766 }, { "epoch": 0.87, "grad_norm": 0.8978790640830994, "learning_rate": 4.2217517005840423e-07, "loss": 0.5568, "step": 13767 }, { "epoch": 0.87, "grad_norm": 0.8630240559577942, "learning_rate": 4.2176264245309517e-07, "loss": 0.5686, "step": 13768 }, { "epoch": 0.87, "grad_norm": 0.8735791444778442, "learning_rate": 4.2135030762143424e-07, "loss": 0.5625, "step": 13769 }, { "epoch": 0.87, "grad_norm": 0.9239519238471985, "learning_rate": 4.2093816558078373e-07, "loss": 0.656, "step": 13770 }, { "epoch": 0.87, "grad_norm": 0.8584021925926208, "learning_rate": 4.205262163484991e-07, "loss": 0.5384, "step": 13771 }, { "epoch": 0.87, "grad_norm": 1.000178575515747, "learning_rate": 4.2011445994192324e-07, "loss": 0.5553, "step": 13772 }, { "epoch": 0.87, "grad_norm": 1.005164384841919, "learning_rate": 4.1970289637839556e-07, "loss": 0.5724, "step": 13773 }, { "epoch": 0.87, "grad_norm": 0.8568700551986694, "learning_rate": 4.19291525675245e-07, "loss": 0.567, "step": 13774 }, { "epoch": 0.87, "grad_norm": 0.9740828275680542, "learning_rate": 4.1888034784979326e-07, "loss": 0.6062, "step": 13775 }, { "epoch": 0.87, "grad_norm": 0.8327596187591553, "learning_rate": 4.184693629193537e-07, "loss": 0.627, "step": 13776 }, { "epoch": 0.87, "grad_norm": 0.8938369154930115, "learning_rate": 4.180585709012319e-07, "loss": 0.577, "step": 13777 }, { "epoch": 0.87, "grad_norm": 0.8749104738235474, "learning_rate": 4.1764797181272296e-07, "loss": 0.5819, "step": 13778 }, { "epoch": 0.87, "grad_norm": 0.8669180870056152, "learning_rate": 4.172375656711181e-07, "loss": 0.5144, "step": 13779 }, { "epoch": 0.87, "grad_norm": 0.9040730595588684, "learning_rate": 4.1682735249369663e-07, "loss": 0.6053, "step": 13780 }, { "epoch": 0.87, "grad_norm": 0.9332876801490784, "learning_rate": 4.1641733229773163e-07, "loss": 0.5463, "step": 13781 }, { "epoch": 0.87, "grad_norm": 0.8687313795089722, "learning_rate": 4.1600750510048805e-07, "loss": 0.5951, "step": 13782 }, { "epoch": 0.87, "grad_norm": 0.9103281497955322, "learning_rate": 4.1559787091922153e-07, "loss": 0.5824, "step": 13783 }, { "epoch": 0.87, "grad_norm": 0.840823233127594, "learning_rate": 4.151884297711806e-07, "loss": 0.5203, "step": 13784 }, { "epoch": 0.87, "grad_norm": 0.890895664691925, "learning_rate": 4.147791816736063e-07, "loss": 0.5681, "step": 13785 }, { "epoch": 0.87, "grad_norm": 0.8432772755622864, "learning_rate": 4.143701266437283e-07, "loss": 0.5618, "step": 13786 }, { "epoch": 0.87, "grad_norm": 0.8869197368621826, "learning_rate": 4.139612646987734e-07, "loss": 0.5829, "step": 13787 }, { "epoch": 0.87, "grad_norm": 0.9564074873924255, "learning_rate": 4.135525958559555e-07, "loss": 0.6305, "step": 13788 }, { "epoch": 0.87, "grad_norm": 0.8258056044578552, "learning_rate": 4.131441201324826e-07, "loss": 0.5646, "step": 13789 }, { "epoch": 0.87, "grad_norm": 0.9355778694152832, "learning_rate": 4.1273583754555424e-07, "loss": 0.5543, "step": 13790 }, { "epoch": 0.87, "grad_norm": 0.895876407623291, "learning_rate": 4.123277481123622e-07, "loss": 0.5631, "step": 13791 }, { "epoch": 0.87, "grad_norm": 0.8127149343490601, "learning_rate": 4.1191985185008887e-07, "loss": 0.5681, "step": 13792 }, { "epoch": 0.87, "grad_norm": 0.8988841772079468, "learning_rate": 4.1151214877591105e-07, "loss": 0.5723, "step": 13793 }, { "epoch": 0.87, "grad_norm": 0.9248142242431641, "learning_rate": 4.1110463890699336e-07, "loss": 0.5945, "step": 13794 }, { "epoch": 0.87, "grad_norm": 0.9253085851669312, "learning_rate": 4.1069732226049484e-07, "loss": 0.6259, "step": 13795 }, { "epoch": 0.87, "grad_norm": 0.8506118059158325, "learning_rate": 4.102901988535685e-07, "loss": 0.5496, "step": 13796 }, { "epoch": 0.87, "grad_norm": 0.8817588686943054, "learning_rate": 4.0988326870335494e-07, "loss": 0.5534, "step": 13797 }, { "epoch": 0.87, "grad_norm": 0.9294220805168152, "learning_rate": 4.0947653182698887e-07, "loss": 0.6071, "step": 13798 }, { "epoch": 0.87, "grad_norm": 0.9263404011726379, "learning_rate": 4.0906998824159715e-07, "loss": 0.6115, "step": 13799 }, { "epoch": 0.87, "grad_norm": 0.9097710251808167, "learning_rate": 4.086636379642972e-07, "loss": 0.5834, "step": 13800 }, { "epoch": 0.87, "grad_norm": 0.9048157930374146, "learning_rate": 4.0825748101220087e-07, "loss": 0.5976, "step": 13801 }, { "epoch": 0.87, "grad_norm": 0.8855105042457581, "learning_rate": 4.078515174024067e-07, "loss": 0.571, "step": 13802 }, { "epoch": 0.87, "grad_norm": 0.8673086762428284, "learning_rate": 4.074457471520099e-07, "loss": 0.5968, "step": 13803 }, { "epoch": 0.87, "grad_norm": 0.8577106595039368, "learning_rate": 4.0704017027809797e-07, "loss": 0.5826, "step": 13804 }, { "epoch": 0.87, "grad_norm": 0.8799236416816711, "learning_rate": 4.0663478679774604e-07, "loss": 0.5497, "step": 13805 }, { "epoch": 0.87, "grad_norm": 0.8574314117431641, "learning_rate": 4.062295967280239e-07, "loss": 0.5452, "step": 13806 }, { "epoch": 0.87, "grad_norm": 0.8260728120803833, "learning_rate": 4.058246000859939e-07, "loss": 0.6122, "step": 13807 }, { "epoch": 0.87, "grad_norm": 0.8879086375236511, "learning_rate": 4.054197968887064e-07, "loss": 0.5183, "step": 13808 }, { "epoch": 0.87, "grad_norm": 0.8634669184684753, "learning_rate": 4.0501518715320933e-07, "loss": 0.5658, "step": 13809 }, { "epoch": 0.87, "grad_norm": 0.8982515931129456, "learning_rate": 4.046107708965369e-07, "loss": 0.5977, "step": 13810 }, { "epoch": 0.88, "grad_norm": 0.9243874549865723, "learning_rate": 4.042065481357188e-07, "loss": 0.5895, "step": 13811 }, { "epoch": 0.88, "grad_norm": 0.9220935702323914, "learning_rate": 4.038025188877753e-07, "loss": 0.5842, "step": 13812 }, { "epoch": 0.88, "grad_norm": 0.9362528920173645, "learning_rate": 4.03398683169719e-07, "loss": 0.5917, "step": 13813 }, { "epoch": 0.88, "grad_norm": 0.9390722513198853, "learning_rate": 4.029950409985539e-07, "loss": 0.5915, "step": 13814 }, { "epoch": 0.88, "grad_norm": 0.9555262327194214, "learning_rate": 4.0259159239127656e-07, "loss": 0.5891, "step": 13815 }, { "epoch": 0.88, "grad_norm": 0.8341988325119019, "learning_rate": 4.021883373648722e-07, "loss": 0.5347, "step": 13816 }, { "epoch": 0.88, "grad_norm": 0.8047258853912354, "learning_rate": 4.017852759363239e-07, "loss": 0.4463, "step": 13817 }, { "epoch": 0.88, "grad_norm": 0.8843516707420349, "learning_rate": 4.013824081226009e-07, "loss": 0.5822, "step": 13818 }, { "epoch": 0.88, "grad_norm": 0.889491856098175, "learning_rate": 4.009797339406674e-07, "loss": 0.5696, "step": 13819 }, { "epoch": 0.88, "grad_norm": 0.866584062576294, "learning_rate": 4.005772534074792e-07, "loss": 0.5666, "step": 13820 }, { "epoch": 0.88, "grad_norm": 0.9304389953613281, "learning_rate": 4.001749665399807e-07, "loss": 0.5963, "step": 13821 }, { "epoch": 0.88, "grad_norm": 0.9130128026008606, "learning_rate": 3.997728733551137e-07, "loss": 0.5348, "step": 13822 }, { "epoch": 0.88, "grad_norm": 0.8755511045455933, "learning_rate": 3.993709738698093e-07, "loss": 0.5714, "step": 13823 }, { "epoch": 0.88, "grad_norm": 0.8576176762580872, "learning_rate": 3.989692681009877e-07, "loss": 0.5845, "step": 13824 }, { "epoch": 0.88, "grad_norm": 0.9526224136352539, "learning_rate": 3.985677560655643e-07, "loss": 0.5841, "step": 13825 }, { "epoch": 0.88, "grad_norm": 0.915798544883728, "learning_rate": 3.9816643778044506e-07, "loss": 0.6271, "step": 13826 }, { "epoch": 0.88, "grad_norm": 0.8685756921768188, "learning_rate": 3.977653132625292e-07, "loss": 0.5363, "step": 13827 }, { "epoch": 0.88, "grad_norm": 0.8983719348907471, "learning_rate": 3.9736438252870655e-07, "loss": 0.589, "step": 13828 }, { "epoch": 0.88, "grad_norm": 0.9057663679122925, "learning_rate": 3.969636455958564e-07, "loss": 0.587, "step": 13829 }, { "epoch": 0.88, "grad_norm": 0.886985719203949, "learning_rate": 3.965631024808553e-07, "loss": 0.6049, "step": 13830 }, { "epoch": 0.88, "grad_norm": 0.8384401798248291, "learning_rate": 3.961627532005691e-07, "loss": 0.519, "step": 13831 }, { "epoch": 0.88, "grad_norm": 0.851017951965332, "learning_rate": 3.957625977718527e-07, "loss": 0.5821, "step": 13832 }, { "epoch": 0.88, "grad_norm": 0.8468850255012512, "learning_rate": 3.953626362115559e-07, "loss": 0.5651, "step": 13833 }, { "epoch": 0.88, "grad_norm": 0.9317176938056946, "learning_rate": 3.949628685365203e-07, "loss": 0.5769, "step": 13834 }, { "epoch": 0.88, "grad_norm": 0.888606607913971, "learning_rate": 3.945632947635791e-07, "loss": 0.5461, "step": 13835 }, { "epoch": 0.88, "grad_norm": 0.8703384399414062, "learning_rate": 3.941639149095566e-07, "loss": 0.6008, "step": 13836 }, { "epoch": 0.88, "grad_norm": 0.8924920558929443, "learning_rate": 3.9376472899126884e-07, "loss": 0.5616, "step": 13837 }, { "epoch": 0.88, "grad_norm": 0.8976526856422424, "learning_rate": 3.933657370255228e-07, "loss": 0.603, "step": 13838 }, { "epoch": 0.88, "grad_norm": 0.9381121397018433, "learning_rate": 3.9296693902912244e-07, "loss": 0.5989, "step": 13839 }, { "epoch": 0.88, "grad_norm": 0.8531518578529358, "learning_rate": 3.9256833501885693e-07, "loss": 0.613, "step": 13840 }, { "epoch": 0.88, "grad_norm": 0.8720222115516663, "learning_rate": 3.9216992501151074e-07, "loss": 0.5606, "step": 13841 }, { "epoch": 0.88, "grad_norm": 0.8882898688316345, "learning_rate": 3.917717090238593e-07, "loss": 0.6199, "step": 13842 }, { "epoch": 0.88, "grad_norm": 0.898091197013855, "learning_rate": 3.913736870726703e-07, "loss": 0.5583, "step": 13843 }, { "epoch": 0.88, "grad_norm": 0.8894566893577576, "learning_rate": 3.909758591747037e-07, "loss": 0.6388, "step": 13844 }, { "epoch": 0.88, "grad_norm": 0.8718437552452087, "learning_rate": 3.905782253467094e-07, "loss": 0.5293, "step": 13845 }, { "epoch": 0.88, "grad_norm": 0.8743876218795776, "learning_rate": 3.9018078560543015e-07, "loss": 0.5883, "step": 13846 }, { "epoch": 0.88, "grad_norm": 0.9666427373886108, "learning_rate": 3.8978353996760365e-07, "loss": 0.5466, "step": 13847 }, { "epoch": 0.88, "grad_norm": 0.8270087838172913, "learning_rate": 3.8938648844995374e-07, "loss": 0.5458, "step": 13848 }, { "epoch": 0.88, "grad_norm": 0.8940174579620361, "learning_rate": 3.889896310691993e-07, "loss": 0.5766, "step": 13849 }, { "epoch": 0.88, "grad_norm": 0.9705901741981506, "learning_rate": 3.885929678420508e-07, "loss": 0.5894, "step": 13850 }, { "epoch": 0.88, "grad_norm": 0.8427651524543762, "learning_rate": 3.881964987852105e-07, "loss": 0.5606, "step": 13851 }, { "epoch": 0.88, "grad_norm": 0.9404253363609314, "learning_rate": 3.878002239153739e-07, "loss": 0.635, "step": 13852 }, { "epoch": 0.88, "grad_norm": 0.8982987403869629, "learning_rate": 3.874041432492237e-07, "loss": 0.6041, "step": 13853 }, { "epoch": 0.88, "grad_norm": 0.9181349873542786, "learning_rate": 3.870082568034389e-07, "loss": 0.5793, "step": 13854 }, { "epoch": 0.88, "grad_norm": 0.9045166969299316, "learning_rate": 3.866125645946894e-07, "loss": 0.533, "step": 13855 }, { "epoch": 0.88, "grad_norm": 0.8742471933364868, "learning_rate": 3.862170666396359e-07, "loss": 0.5753, "step": 13856 }, { "epoch": 0.88, "grad_norm": 0.8175247311592102, "learning_rate": 3.858217629549316e-07, "loss": 0.5068, "step": 13857 }, { "epoch": 0.88, "grad_norm": 0.9291229248046875, "learning_rate": 3.8542665355722154e-07, "loss": 0.5553, "step": 13858 }, { "epoch": 0.88, "grad_norm": 0.8577315807342529, "learning_rate": 3.8503173846314137e-07, "loss": 0.489, "step": 13859 }, { "epoch": 0.88, "grad_norm": 0.9240617752075195, "learning_rate": 3.846370176893205e-07, "loss": 0.5803, "step": 13860 }, { "epoch": 0.88, "grad_norm": 0.9180030226707458, "learning_rate": 3.8424249125238065e-07, "loss": 0.5931, "step": 13861 }, { "epoch": 0.88, "grad_norm": 0.8835217356681824, "learning_rate": 3.838481591689308e-07, "loss": 0.5581, "step": 13862 }, { "epoch": 0.88, "grad_norm": 0.9661378860473633, "learning_rate": 3.834540214555771e-07, "loss": 0.628, "step": 13863 }, { "epoch": 0.88, "grad_norm": 0.8079046607017517, "learning_rate": 3.830600781289151e-07, "loss": 0.5342, "step": 13864 }, { "epoch": 0.88, "grad_norm": 0.8845729231834412, "learning_rate": 3.826663292055316e-07, "loss": 0.5631, "step": 13865 }, { "epoch": 0.88, "grad_norm": 0.9163376092910767, "learning_rate": 3.822727747020072e-07, "loss": 0.6073, "step": 13866 }, { "epoch": 0.88, "grad_norm": 0.9018417596817017, "learning_rate": 3.818794146349114e-07, "loss": 0.5893, "step": 13867 }, { "epoch": 0.88, "grad_norm": 0.9381417036056519, "learning_rate": 3.8148624902080764e-07, "loss": 0.5507, "step": 13868 }, { "epoch": 0.88, "grad_norm": 0.8565617203712463, "learning_rate": 3.8109327787625273e-07, "loss": 0.581, "step": 13869 }, { "epoch": 0.88, "grad_norm": 0.8831982016563416, "learning_rate": 3.807005012177911e-07, "loss": 0.5437, "step": 13870 }, { "epoch": 0.88, "grad_norm": 0.9772710204124451, "learning_rate": 3.803079190619624e-07, "loss": 0.6289, "step": 13871 }, { "epoch": 0.88, "grad_norm": 0.892636239528656, "learning_rate": 3.7991553142529616e-07, "loss": 0.5559, "step": 13872 }, { "epoch": 0.88, "grad_norm": 0.8699362277984619, "learning_rate": 3.7952333832431466e-07, "loss": 0.5419, "step": 13873 }, { "epoch": 0.88, "grad_norm": 0.8843465447425842, "learning_rate": 3.7913133977553306e-07, "loss": 0.5928, "step": 13874 }, { "epoch": 0.88, "grad_norm": 0.9290990233421326, "learning_rate": 3.7873953579545486e-07, "loss": 0.5642, "step": 13875 }, { "epoch": 0.88, "grad_norm": 0.9399954080581665, "learning_rate": 3.783479264005779e-07, "loss": 0.5733, "step": 13876 }, { "epoch": 0.88, "grad_norm": 0.9060240387916565, "learning_rate": 3.779565116073941e-07, "loss": 0.5444, "step": 13877 }, { "epoch": 0.88, "grad_norm": 0.8936532735824585, "learning_rate": 3.775652914323813e-07, "loss": 0.6461, "step": 13878 }, { "epoch": 0.88, "grad_norm": 0.9363529086112976, "learning_rate": 3.771742658920141e-07, "loss": 0.586, "step": 13879 }, { "epoch": 0.88, "grad_norm": 0.8696059584617615, "learning_rate": 3.767834350027572e-07, "loss": 0.5465, "step": 13880 }, { "epoch": 0.88, "grad_norm": 0.8789991736412048, "learning_rate": 3.7639279878106616e-07, "loss": 0.5846, "step": 13881 }, { "epoch": 0.88, "grad_norm": 0.9171331524848938, "learning_rate": 3.7600235724339127e-07, "loss": 0.5901, "step": 13882 }, { "epoch": 0.88, "grad_norm": 0.8733245134353638, "learning_rate": 3.756121104061705e-07, "loss": 0.54, "step": 13883 }, { "epoch": 0.88, "grad_norm": 0.8445425033569336, "learning_rate": 3.752220582858368e-07, "loss": 0.5577, "step": 13884 }, { "epoch": 0.88, "grad_norm": 0.8852535486221313, "learning_rate": 3.748322008988137e-07, "loss": 0.6001, "step": 13885 }, { "epoch": 0.88, "grad_norm": 0.9022545218467712, "learning_rate": 3.744425382615169e-07, "loss": 0.6012, "step": 13886 }, { "epoch": 0.88, "grad_norm": 0.8933830261230469, "learning_rate": 3.7405307039035387e-07, "loss": 0.6361, "step": 13887 }, { "epoch": 0.88, "grad_norm": 0.8915839195251465, "learning_rate": 3.7366379730172376e-07, "loss": 0.6196, "step": 13888 }, { "epoch": 0.88, "grad_norm": 0.8615158200263977, "learning_rate": 3.732747190120162e-07, "loss": 0.6029, "step": 13889 }, { "epoch": 0.88, "grad_norm": 0.8902248740196228, "learning_rate": 3.728858355376164e-07, "loss": 0.6026, "step": 13890 }, { "epoch": 0.88, "grad_norm": 0.8872123956680298, "learning_rate": 3.724971468948968e-07, "loss": 0.5928, "step": 13891 }, { "epoch": 0.88, "grad_norm": 0.8912094831466675, "learning_rate": 3.721086531002244e-07, "loss": 0.588, "step": 13892 }, { "epoch": 0.88, "grad_norm": 0.9006378054618835, "learning_rate": 3.7172035416995765e-07, "loss": 0.5815, "step": 13893 }, { "epoch": 0.88, "grad_norm": 0.877053439617157, "learning_rate": 3.7133225012044585e-07, "loss": 0.5095, "step": 13894 }, { "epoch": 0.88, "grad_norm": 0.9015281796455383, "learning_rate": 3.709443409680308e-07, "loss": 0.5498, "step": 13895 }, { "epoch": 0.88, "grad_norm": 0.8962835073471069, "learning_rate": 3.7055662672904723e-07, "loss": 0.569, "step": 13896 }, { "epoch": 0.88, "grad_norm": 0.8946380615234375, "learning_rate": 3.7016910741981825e-07, "loss": 0.5726, "step": 13897 }, { "epoch": 0.88, "grad_norm": 0.8510831594467163, "learning_rate": 3.6978178305666357e-07, "loss": 0.496, "step": 13898 }, { "epoch": 0.88, "grad_norm": 0.9237155914306641, "learning_rate": 3.693946536558896e-07, "loss": 0.5727, "step": 13899 }, { "epoch": 0.88, "grad_norm": 0.8890257477760315, "learning_rate": 3.6900771923379817e-07, "loss": 0.5797, "step": 13900 }, { "epoch": 0.88, "grad_norm": 0.931348979473114, "learning_rate": 3.6862097980668255e-07, "loss": 0.6333, "step": 13901 }, { "epoch": 0.88, "grad_norm": 0.8788846135139465, "learning_rate": 3.68234435390824e-07, "loss": 0.6, "step": 13902 }, { "epoch": 0.88, "grad_norm": 0.9328796863555908, "learning_rate": 3.6784808600250186e-07, "loss": 0.5973, "step": 13903 }, { "epoch": 0.88, "grad_norm": 0.9625527262687683, "learning_rate": 3.674619316579836e-07, "loss": 0.5795, "step": 13904 }, { "epoch": 0.88, "grad_norm": 0.9201703071594238, "learning_rate": 3.670759723735273e-07, "loss": 0.5592, "step": 13905 }, { "epoch": 0.88, "grad_norm": 0.8659148812294006, "learning_rate": 3.666902081653845e-07, "loss": 0.5503, "step": 13906 }, { "epoch": 0.88, "grad_norm": 0.9211107492446899, "learning_rate": 3.663046390497993e-07, "loss": 0.5797, "step": 13907 }, { "epoch": 0.88, "grad_norm": 0.9264574646949768, "learning_rate": 3.659192650430066e-07, "loss": 0.5992, "step": 13908 }, { "epoch": 0.88, "grad_norm": 0.8879082202911377, "learning_rate": 3.655340861612333e-07, "loss": 0.5596, "step": 13909 }, { "epoch": 0.88, "grad_norm": 0.8419327139854431, "learning_rate": 3.6514910242069547e-07, "loss": 0.5711, "step": 13910 }, { "epoch": 0.88, "grad_norm": 0.8223779797554016, "learning_rate": 3.647643138376067e-07, "loss": 0.5581, "step": 13911 }, { "epoch": 0.88, "grad_norm": 0.9604656100273132, "learning_rate": 3.6437972042816904e-07, "loss": 0.6241, "step": 13912 }, { "epoch": 0.88, "grad_norm": 0.8464024662971497, "learning_rate": 3.6399532220857403e-07, "loss": 0.5518, "step": 13913 }, { "epoch": 0.88, "grad_norm": 0.8726653456687927, "learning_rate": 3.6361111919500815e-07, "loss": 0.5917, "step": 13914 }, { "epoch": 0.88, "grad_norm": 0.888209879398346, "learning_rate": 3.6322711140364953e-07, "loss": 0.6297, "step": 13915 }, { "epoch": 0.88, "grad_norm": 0.8308035731315613, "learning_rate": 3.628432988506675e-07, "loss": 0.5388, "step": 13916 }, { "epoch": 0.88, "grad_norm": 0.9365667700767517, "learning_rate": 3.6245968155222243e-07, "loss": 0.5515, "step": 13917 }, { "epoch": 0.88, "grad_norm": 0.8524565100669861, "learning_rate": 3.6207625952446756e-07, "loss": 0.5938, "step": 13918 }, { "epoch": 0.88, "grad_norm": 0.8615753650665283, "learning_rate": 3.616930327835466e-07, "loss": 0.5269, "step": 13919 }, { "epoch": 0.88, "grad_norm": 0.894736111164093, "learning_rate": 3.613100013455972e-07, "loss": 0.6134, "step": 13920 }, { "epoch": 0.88, "grad_norm": 0.9682538509368896, "learning_rate": 3.609271652267465e-07, "loss": 0.5901, "step": 13921 }, { "epoch": 0.88, "grad_norm": 0.9055116772651672, "learning_rate": 3.6054452444311493e-07, "loss": 0.6037, "step": 13922 }, { "epoch": 0.88, "grad_norm": 0.8035820126533508, "learning_rate": 3.601620790108135e-07, "loss": 0.5568, "step": 13923 }, { "epoch": 0.88, "grad_norm": 0.930554211139679, "learning_rate": 3.597798289459464e-07, "loss": 0.6585, "step": 13924 }, { "epoch": 0.88, "grad_norm": 0.9319306015968323, "learning_rate": 3.593977742646088e-07, "loss": 0.5412, "step": 13925 }, { "epoch": 0.88, "grad_norm": 0.8614120483398438, "learning_rate": 3.5901591498288755e-07, "loss": 0.5399, "step": 13926 }, { "epoch": 0.88, "grad_norm": 0.897907555103302, "learning_rate": 3.5863425111686e-07, "loss": 0.5419, "step": 13927 }, { "epoch": 0.88, "grad_norm": 0.9247115254402161, "learning_rate": 3.5825278268259987e-07, "loss": 0.6008, "step": 13928 }, { "epoch": 0.88, "grad_norm": 0.886035680770874, "learning_rate": 3.5787150969616657e-07, "loss": 0.559, "step": 13929 }, { "epoch": 0.88, "grad_norm": 0.9609770774841309, "learning_rate": 3.57490432173615e-07, "loss": 0.5855, "step": 13930 }, { "epoch": 0.88, "grad_norm": 0.8835691809654236, "learning_rate": 3.5710955013099233e-07, "loss": 0.5454, "step": 13931 }, { "epoch": 0.88, "grad_norm": 0.9028952717781067, "learning_rate": 3.5672886358433356e-07, "loss": 0.57, "step": 13932 }, { "epoch": 0.88, "grad_norm": 0.8876438140869141, "learning_rate": 3.5634837254967023e-07, "loss": 0.5503, "step": 13933 }, { "epoch": 0.88, "grad_norm": 0.8692540526390076, "learning_rate": 3.559680770430235e-07, "loss": 0.5504, "step": 13934 }, { "epoch": 0.88, "grad_norm": 0.8966943025588989, "learning_rate": 3.555879770804049e-07, "loss": 0.6109, "step": 13935 }, { "epoch": 0.88, "grad_norm": 0.8818347454071045, "learning_rate": 3.5520807267782007e-07, "loss": 0.5318, "step": 13936 }, { "epoch": 0.88, "grad_norm": 0.9468558430671692, "learning_rate": 3.548283638512651e-07, "loss": 0.6075, "step": 13937 }, { "epoch": 0.88, "grad_norm": 0.8646183013916016, "learning_rate": 3.544488506167282e-07, "loss": 0.5779, "step": 13938 }, { "epoch": 0.88, "grad_norm": 0.8838092088699341, "learning_rate": 3.5406953299019056e-07, "loss": 0.5319, "step": 13939 }, { "epoch": 0.88, "grad_norm": 0.9578419327735901, "learning_rate": 3.5369041098762103e-07, "loss": 0.5811, "step": 13940 }, { "epoch": 0.88, "grad_norm": 0.9535823464393616, "learning_rate": 3.5331148462498635e-07, "loss": 0.5944, "step": 13941 }, { "epoch": 0.88, "grad_norm": 0.8988412618637085, "learning_rate": 3.529327539182403e-07, "loss": 0.5615, "step": 13942 }, { "epoch": 0.88, "grad_norm": 0.8499272465705872, "learning_rate": 3.5255421888332976e-07, "loss": 0.5468, "step": 13943 }, { "epoch": 0.88, "grad_norm": 0.8369562029838562, "learning_rate": 3.5217587953619404e-07, "loss": 0.5457, "step": 13944 }, { "epoch": 0.88, "grad_norm": 0.8756216168403625, "learning_rate": 3.517977358927632e-07, "loss": 0.5575, "step": 13945 }, { "epoch": 0.88, "grad_norm": 0.8889182209968567, "learning_rate": 3.514197879689596e-07, "loss": 0.5777, "step": 13946 }, { "epoch": 0.88, "grad_norm": 0.9014686942100525, "learning_rate": 3.5104203578069817e-07, "loss": 0.5799, "step": 13947 }, { "epoch": 0.88, "grad_norm": 0.8833813071250916, "learning_rate": 3.506644793438835e-07, "loss": 0.5552, "step": 13948 }, { "epoch": 0.88, "grad_norm": 0.9554223418235779, "learning_rate": 3.502871186744128e-07, "loss": 0.5921, "step": 13949 }, { "epoch": 0.88, "grad_norm": 0.879019021987915, "learning_rate": 3.499099537881784e-07, "loss": 0.6137, "step": 13950 }, { "epoch": 0.88, "grad_norm": 0.9680423140525818, "learning_rate": 3.495329847010581e-07, "loss": 0.5773, "step": 13951 }, { "epoch": 0.88, "grad_norm": 0.8740622997283936, "learning_rate": 3.4915621142892595e-07, "loss": 0.5879, "step": 13952 }, { "epoch": 0.88, "grad_norm": 0.9262283444404602, "learning_rate": 3.48779633987647e-07, "loss": 0.5917, "step": 13953 }, { "epoch": 0.88, "grad_norm": 0.8888399004936218, "learning_rate": 3.4840325239307693e-07, "loss": 0.6129, "step": 13954 }, { "epoch": 0.88, "grad_norm": 0.855827808380127, "learning_rate": 3.4802706666106525e-07, "loss": 0.5782, "step": 13955 }, { "epoch": 0.88, "grad_norm": 0.9697046279907227, "learning_rate": 3.476510768074498e-07, "loss": 0.5647, "step": 13956 }, { "epoch": 0.88, "grad_norm": 0.8779304623603821, "learning_rate": 3.4727528284806247e-07, "loss": 0.5947, "step": 13957 }, { "epoch": 0.88, "grad_norm": 0.8670563697814941, "learning_rate": 3.468996847987288e-07, "loss": 0.5335, "step": 13958 }, { "epoch": 0.88, "grad_norm": 0.9170581102371216, "learning_rate": 3.4652428267526184e-07, "loss": 0.6615, "step": 13959 }, { "epoch": 0.88, "grad_norm": 0.8803734183311462, "learning_rate": 3.4614907649346884e-07, "loss": 0.5578, "step": 13960 }, { "epoch": 0.88, "grad_norm": 1.0183887481689453, "learning_rate": 3.4577406626914947e-07, "loss": 0.6096, "step": 13961 }, { "epoch": 0.88, "grad_norm": 0.911680281162262, "learning_rate": 3.453992520180921e-07, "loss": 0.5951, "step": 13962 }, { "epoch": 0.88, "grad_norm": 0.8374574780464172, "learning_rate": 3.4502463375608143e-07, "loss": 0.5592, "step": 13963 }, { "epoch": 0.88, "grad_norm": 0.862108588218689, "learning_rate": 3.446502114988892e-07, "loss": 0.5704, "step": 13964 }, { "epoch": 0.88, "grad_norm": 0.9024475812911987, "learning_rate": 3.442759852622812e-07, "loss": 0.6107, "step": 13965 }, { "epoch": 0.88, "grad_norm": 0.8448460102081299, "learning_rate": 3.4390195506201594e-07, "loss": 0.5212, "step": 13966 }, { "epoch": 0.88, "grad_norm": 0.8650651574134827, "learning_rate": 3.435281209138419e-07, "loss": 0.5699, "step": 13967 }, { "epoch": 0.88, "grad_norm": 0.8967364430427551, "learning_rate": 3.4315448283349985e-07, "loss": 0.58, "step": 13968 }, { "epoch": 0.89, "grad_norm": 0.8695257902145386, "learning_rate": 3.4278104083672383e-07, "loss": 0.5894, "step": 13969 }, { "epoch": 0.89, "grad_norm": 0.85512775182724, "learning_rate": 3.424077949392346e-07, "loss": 0.5647, "step": 13970 }, { "epoch": 0.89, "grad_norm": 0.908794641494751, "learning_rate": 3.4203474515675293e-07, "loss": 0.5832, "step": 13971 }, { "epoch": 0.89, "grad_norm": 0.859935462474823, "learning_rate": 3.4166189150498297e-07, "loss": 0.5769, "step": 13972 }, { "epoch": 0.89, "grad_norm": 0.892041802406311, "learning_rate": 3.4128923399962543e-07, "loss": 0.5362, "step": 13973 }, { "epoch": 0.89, "grad_norm": 0.9205070734024048, "learning_rate": 3.4091677265637224e-07, "loss": 0.6354, "step": 13974 }, { "epoch": 0.89, "grad_norm": 0.8668642640113831, "learning_rate": 3.405445074909053e-07, "loss": 0.5666, "step": 13975 }, { "epoch": 0.89, "grad_norm": 0.8559014797210693, "learning_rate": 3.401724385189009e-07, "loss": 0.528, "step": 13976 }, { "epoch": 0.89, "grad_norm": 0.8495075702667236, "learning_rate": 3.398005657560249e-07, "loss": 0.568, "step": 13977 }, { "epoch": 0.89, "grad_norm": 0.8912281394004822, "learning_rate": 3.394288892179348e-07, "loss": 0.5637, "step": 13978 }, { "epoch": 0.89, "grad_norm": 0.8422101140022278, "learning_rate": 3.390574089202814e-07, "loss": 0.5824, "step": 13979 }, { "epoch": 0.89, "grad_norm": 0.8872683048248291, "learning_rate": 3.3868612487870657e-07, "loss": 0.6023, "step": 13980 }, { "epoch": 0.89, "grad_norm": 0.871668815612793, "learning_rate": 3.3831503710884286e-07, "loss": 0.5728, "step": 13981 }, { "epoch": 0.89, "grad_norm": 0.9398965835571289, "learning_rate": 3.379441456263166e-07, "loss": 0.5988, "step": 13982 }, { "epoch": 0.89, "grad_norm": 0.804894208908081, "learning_rate": 3.375734504467437e-07, "loss": 0.545, "step": 13983 }, { "epoch": 0.89, "grad_norm": 0.8272179961204529, "learning_rate": 3.372029515857339e-07, "loss": 0.5751, "step": 13984 }, { "epoch": 0.89, "grad_norm": 0.8695221543312073, "learning_rate": 3.368326490588875e-07, "loss": 0.5484, "step": 13985 }, { "epoch": 0.89, "grad_norm": 0.865994930267334, "learning_rate": 3.364625428817958e-07, "loss": 0.5282, "step": 13986 }, { "epoch": 0.89, "grad_norm": 0.9086118340492249, "learning_rate": 3.360926330700431e-07, "loss": 0.573, "step": 13987 }, { "epoch": 0.89, "grad_norm": 0.8299586772918701, "learning_rate": 3.3572291963920536e-07, "loss": 0.5297, "step": 13988 }, { "epoch": 0.89, "grad_norm": 0.8901399970054626, "learning_rate": 3.353534026048494e-07, "loss": 0.6119, "step": 13989 }, { "epoch": 0.89, "grad_norm": 0.8746377229690552, "learning_rate": 3.3498408198253453e-07, "loss": 0.5493, "step": 13990 }, { "epoch": 0.89, "grad_norm": 0.8297358751296997, "learning_rate": 3.3461495778781104e-07, "loss": 0.5338, "step": 13991 }, { "epoch": 0.89, "grad_norm": 0.826378345489502, "learning_rate": 3.342460300362227e-07, "loss": 0.5438, "step": 13992 }, { "epoch": 0.89, "grad_norm": 0.8547459244728088, "learning_rate": 3.3387729874330367e-07, "loss": 0.5513, "step": 13993 }, { "epoch": 0.89, "grad_norm": 0.8909960985183716, "learning_rate": 3.335087639245782e-07, "loss": 0.6038, "step": 13994 }, { "epoch": 0.89, "grad_norm": 0.992056131362915, "learning_rate": 3.331404255955656e-07, "loss": 0.5645, "step": 13995 }, { "epoch": 0.89, "grad_norm": 0.8902263641357422, "learning_rate": 3.327722837717745e-07, "loss": 0.5573, "step": 13996 }, { "epoch": 0.89, "grad_norm": 0.896858274936676, "learning_rate": 3.32404338468707e-07, "loss": 0.5627, "step": 13997 }, { "epoch": 0.89, "grad_norm": 0.9204726219177246, "learning_rate": 3.320365897018546e-07, "loss": 0.5782, "step": 13998 }, { "epoch": 0.89, "grad_norm": 0.9295701384544373, "learning_rate": 3.316690374867043e-07, "loss": 0.5769, "step": 13999 }, { "epoch": 0.89, "grad_norm": 0.8957133293151855, "learning_rate": 3.313016818387288e-07, "loss": 0.5495, "step": 14000 }, { "epoch": 0.89, "grad_norm": 0.850740909576416, "learning_rate": 3.309345227734001e-07, "loss": 0.5891, "step": 14001 }, { "epoch": 0.89, "grad_norm": 0.8553286790847778, "learning_rate": 3.305675603061753e-07, "loss": 0.5852, "step": 14002 }, { "epoch": 0.89, "grad_norm": 0.8360522389411926, "learning_rate": 3.3020079445250655e-07, "loss": 0.6143, "step": 14003 }, { "epoch": 0.89, "grad_norm": 0.8745806813240051, "learning_rate": 3.2983422522783747e-07, "loss": 0.5673, "step": 14004 }, { "epoch": 0.89, "grad_norm": 0.8642773032188416, "learning_rate": 3.2946785264760305e-07, "loss": 0.5315, "step": 14005 }, { "epoch": 0.89, "grad_norm": 0.9193217158317566, "learning_rate": 3.291016767272298e-07, "loss": 0.6075, "step": 14006 }, { "epoch": 0.89, "grad_norm": 0.8983094692230225, "learning_rate": 3.287356974821365e-07, "loss": 0.5878, "step": 14007 }, { "epoch": 0.89, "grad_norm": 0.8727191090583801, "learning_rate": 3.28369914927732e-07, "loss": 0.5405, "step": 14008 }, { "epoch": 0.89, "grad_norm": 0.8366736769676208, "learning_rate": 3.2800432907941935e-07, "loss": 0.5571, "step": 14009 }, { "epoch": 0.89, "grad_norm": 0.9027994275093079, "learning_rate": 3.276389399525914e-07, "loss": 0.5512, "step": 14010 }, { "epoch": 0.89, "grad_norm": 0.9058107137680054, "learning_rate": 3.272737475626342e-07, "loss": 0.5707, "step": 14011 }, { "epoch": 0.89, "grad_norm": 0.9187793731689453, "learning_rate": 3.269087519249242e-07, "loss": 0.5556, "step": 14012 }, { "epoch": 0.89, "grad_norm": 0.8657212257385254, "learning_rate": 3.2654395305482924e-07, "loss": 0.5968, "step": 14013 }, { "epoch": 0.89, "grad_norm": 0.8940473198890686, "learning_rate": 3.2617935096771137e-07, "loss": 0.5712, "step": 14014 }, { "epoch": 0.89, "grad_norm": 0.876758873462677, "learning_rate": 3.258149456789228e-07, "loss": 0.5664, "step": 14015 }, { "epoch": 0.89, "grad_norm": 0.9137895703315735, "learning_rate": 3.2545073720380573e-07, "loss": 0.5223, "step": 14016 }, { "epoch": 0.89, "grad_norm": 0.9135635495185852, "learning_rate": 3.2508672555769617e-07, "loss": 0.5516, "step": 14017 }, { "epoch": 0.89, "grad_norm": 1.005510926246643, "learning_rate": 3.2472291075592246e-07, "loss": 0.6037, "step": 14018 }, { "epoch": 0.89, "grad_norm": 0.8511359691619873, "learning_rate": 3.243592928138023e-07, "loss": 0.542, "step": 14019 }, { "epoch": 0.89, "grad_norm": 0.8994077444076538, "learning_rate": 3.2399587174664794e-07, "loss": 0.5644, "step": 14020 }, { "epoch": 0.89, "grad_norm": 0.8860614895820618, "learning_rate": 3.236326475697593e-07, "loss": 0.5672, "step": 14021 }, { "epoch": 0.89, "grad_norm": 0.9307529926300049, "learning_rate": 3.232696202984326e-07, "loss": 0.5568, "step": 14022 }, { "epoch": 0.89, "grad_norm": 0.7981402277946472, "learning_rate": 3.2290678994795377e-07, "loss": 0.537, "step": 14023 }, { "epoch": 0.89, "grad_norm": 0.8489423990249634, "learning_rate": 3.2254415653359906e-07, "loss": 0.5791, "step": 14024 }, { "epoch": 0.89, "grad_norm": 0.9137423634529114, "learning_rate": 3.2218172007063787e-07, "loss": 0.6229, "step": 14025 }, { "epoch": 0.89, "grad_norm": 0.8917653560638428, "learning_rate": 3.218194805743319e-07, "loss": 0.5276, "step": 14026 }, { "epoch": 0.89, "grad_norm": 0.8989799618721008, "learning_rate": 3.2145743805993334e-07, "loss": 0.5448, "step": 14027 }, { "epoch": 0.89, "grad_norm": 0.8666809797286987, "learning_rate": 3.210955925426873e-07, "loss": 0.5514, "step": 14028 }, { "epoch": 0.89, "grad_norm": 0.9025545120239258, "learning_rate": 3.2073394403782823e-07, "loss": 0.6176, "step": 14029 }, { "epoch": 0.89, "grad_norm": 0.8828505873680115, "learning_rate": 3.2037249256058445e-07, "loss": 0.6386, "step": 14030 }, { "epoch": 0.89, "grad_norm": 0.9091066122055054, "learning_rate": 3.2001123812617663e-07, "loss": 0.5939, "step": 14031 }, { "epoch": 0.89, "grad_norm": 0.9628225564956665, "learning_rate": 3.196501807498148e-07, "loss": 0.5717, "step": 14032 }, { "epoch": 0.89, "grad_norm": 0.8795494437217712, "learning_rate": 3.192893204467018e-07, "loss": 0.5421, "step": 14033 }, { "epoch": 0.89, "grad_norm": 0.9152184128761292, "learning_rate": 3.189286572320327e-07, "loss": 0.596, "step": 14034 }, { "epoch": 0.89, "grad_norm": 0.9376192092895508, "learning_rate": 3.185681911209937e-07, "loss": 0.5486, "step": 14035 }, { "epoch": 0.89, "grad_norm": 0.9213606119155884, "learning_rate": 3.1820792212876316e-07, "loss": 0.5384, "step": 14036 }, { "epoch": 0.89, "grad_norm": 0.9375748038291931, "learning_rate": 3.178478502705101e-07, "loss": 0.5772, "step": 14037 }, { "epoch": 0.89, "grad_norm": 0.900903582572937, "learning_rate": 3.174879755613952e-07, "loss": 0.5485, "step": 14038 }, { "epoch": 0.89, "grad_norm": 0.9398730993270874, "learning_rate": 3.1712829801657294e-07, "loss": 0.5505, "step": 14039 }, { "epoch": 0.89, "grad_norm": 0.9179508686065674, "learning_rate": 3.167688176511874e-07, "loss": 0.6111, "step": 14040 }, { "epoch": 0.89, "grad_norm": 0.916954755783081, "learning_rate": 3.1640953448037527e-07, "loss": 0.605, "step": 14041 }, { "epoch": 0.89, "grad_norm": 0.8982189893722534, "learning_rate": 3.1605044851926504e-07, "loss": 0.5572, "step": 14042 }, { "epoch": 0.89, "grad_norm": 0.8893555998802185, "learning_rate": 3.1569155978297463e-07, "loss": 0.5713, "step": 14043 }, { "epoch": 0.89, "grad_norm": 0.9181665182113647, "learning_rate": 3.1533286828661915e-07, "loss": 0.5584, "step": 14044 }, { "epoch": 0.89, "grad_norm": 0.8733421564102173, "learning_rate": 3.1497437404529875e-07, "loss": 0.6073, "step": 14045 }, { "epoch": 0.89, "grad_norm": 0.8900958895683289, "learning_rate": 3.1461607707410914e-07, "loss": 0.5909, "step": 14046 }, { "epoch": 0.89, "grad_norm": 0.9942273497581482, "learning_rate": 3.142579773881377e-07, "loss": 0.5821, "step": 14047 }, { "epoch": 0.89, "grad_norm": 0.87491375207901, "learning_rate": 3.1390007500246236e-07, "loss": 0.576, "step": 14048 }, { "epoch": 0.89, "grad_norm": 0.9551854729652405, "learning_rate": 3.135423699321527e-07, "loss": 0.632, "step": 14049 }, { "epoch": 0.89, "grad_norm": 0.8860867023468018, "learning_rate": 3.131848621922717e-07, "loss": 0.5733, "step": 14050 }, { "epoch": 0.89, "grad_norm": 0.8768588900566101, "learning_rate": 3.128275517978707e-07, "loss": 0.5568, "step": 14051 }, { "epoch": 0.89, "grad_norm": 0.9353142976760864, "learning_rate": 3.124704387639976e-07, "loss": 0.6497, "step": 14052 }, { "epoch": 0.89, "grad_norm": 0.9081913828849792, "learning_rate": 3.1211352310568655e-07, "loss": 0.6053, "step": 14053 }, { "epoch": 0.89, "grad_norm": 0.9168758988380432, "learning_rate": 3.1175680483796713e-07, "loss": 0.5753, "step": 14054 }, { "epoch": 0.89, "grad_norm": 0.8890372514724731, "learning_rate": 3.1140028397585953e-07, "loss": 0.5491, "step": 14055 }, { "epoch": 0.89, "grad_norm": 0.8347086906433105, "learning_rate": 3.110439605343751e-07, "loss": 0.5384, "step": 14056 }, { "epoch": 0.89, "grad_norm": 0.9348717331886292, "learning_rate": 3.1068783452851856e-07, "loss": 0.6164, "step": 14057 }, { "epoch": 0.89, "grad_norm": 0.8585134744644165, "learning_rate": 3.1033190597328456e-07, "loss": 0.6011, "step": 14058 }, { "epoch": 0.89, "grad_norm": 0.843673586845398, "learning_rate": 3.099761748836594e-07, "loss": 0.5566, "step": 14059 }, { "epoch": 0.89, "grad_norm": 0.8475186824798584, "learning_rate": 3.0962064127462167e-07, "loss": 0.5611, "step": 14060 }, { "epoch": 0.89, "grad_norm": 0.8992692232131958, "learning_rate": 3.092653051611427e-07, "loss": 0.5382, "step": 14061 }, { "epoch": 0.89, "grad_norm": 0.8602596521377563, "learning_rate": 3.089101665581834e-07, "loss": 0.6269, "step": 14062 }, { "epoch": 0.89, "grad_norm": 0.8721755146980286, "learning_rate": 3.085552254806978e-07, "loss": 0.5552, "step": 14063 }, { "epoch": 0.89, "grad_norm": 0.9432767629623413, "learning_rate": 3.0820048194363183e-07, "loss": 0.6001, "step": 14064 }, { "epoch": 0.89, "grad_norm": 0.883188009262085, "learning_rate": 3.0784593596192123e-07, "loss": 0.5695, "step": 14065 }, { "epoch": 0.89, "grad_norm": 0.8545171022415161, "learning_rate": 3.074915875504969e-07, "loss": 0.5165, "step": 14066 }, { "epoch": 0.89, "grad_norm": 0.782908022403717, "learning_rate": 3.0713743672427686e-07, "loss": 0.4764, "step": 14067 }, { "epoch": 0.89, "grad_norm": 0.8021277785301208, "learning_rate": 3.067834834981731e-07, "loss": 0.521, "step": 14068 }, { "epoch": 0.89, "grad_norm": 0.8493959903717041, "learning_rate": 3.0642972788709203e-07, "loss": 0.5231, "step": 14069 }, { "epoch": 0.89, "grad_norm": 0.8718252778053284, "learning_rate": 3.060761699059267e-07, "loss": 0.562, "step": 14070 }, { "epoch": 0.89, "grad_norm": 0.9048642516136169, "learning_rate": 3.057228095695647e-07, "loss": 0.6242, "step": 14071 }, { "epoch": 0.89, "grad_norm": 0.8449458479881287, "learning_rate": 3.053696468928857e-07, "loss": 0.5006, "step": 14072 }, { "epoch": 0.89, "grad_norm": 0.901394248008728, "learning_rate": 3.0501668189075794e-07, "loss": 0.5834, "step": 14073 }, { "epoch": 0.89, "grad_norm": 0.9242204427719116, "learning_rate": 3.0466391457804666e-07, "loss": 0.5403, "step": 14074 }, { "epoch": 0.89, "grad_norm": 0.8967319130897522, "learning_rate": 3.0431134496960333e-07, "loss": 0.6035, "step": 14075 }, { "epoch": 0.89, "grad_norm": 0.8391687870025635, "learning_rate": 3.0395897308027443e-07, "loss": 0.5579, "step": 14076 }, { "epoch": 0.89, "grad_norm": 0.9151217937469482, "learning_rate": 3.0360679892489643e-07, "loss": 0.5881, "step": 14077 }, { "epoch": 0.89, "grad_norm": 0.8429851531982422, "learning_rate": 3.032548225182985e-07, "loss": 0.5864, "step": 14078 }, { "epoch": 0.89, "grad_norm": 0.8832830786705017, "learning_rate": 3.029030438753017e-07, "loss": 0.5989, "step": 14079 }, { "epoch": 0.89, "grad_norm": 0.859230101108551, "learning_rate": 3.025514630107179e-07, "loss": 0.5483, "step": 14080 }, { "epoch": 0.89, "grad_norm": 0.8875642418861389, "learning_rate": 3.0220007993934987e-07, "loss": 0.6254, "step": 14081 }, { "epoch": 0.89, "grad_norm": 0.8424782156944275, "learning_rate": 3.018488946759951e-07, "loss": 0.5502, "step": 14082 }, { "epoch": 0.89, "grad_norm": 0.8858059644699097, "learning_rate": 3.01497907235439e-07, "loss": 0.5659, "step": 14083 }, { "epoch": 0.89, "grad_norm": 0.8075915575027466, "learning_rate": 3.0114711763246096e-07, "loss": 0.53, "step": 14084 }, { "epoch": 0.89, "grad_norm": 0.8604494333267212, "learning_rate": 3.007965258818324e-07, "loss": 0.6075, "step": 14085 }, { "epoch": 0.89, "grad_norm": 0.9078390002250671, "learning_rate": 3.0044613199831373e-07, "loss": 0.5984, "step": 14086 }, { "epoch": 0.89, "grad_norm": 0.9238450527191162, "learning_rate": 3.0009593599666044e-07, "loss": 0.6284, "step": 14087 }, { "epoch": 0.89, "grad_norm": 0.9430950284004211, "learning_rate": 2.9974593789161843e-07, "loss": 0.5938, "step": 14088 }, { "epoch": 0.89, "grad_norm": 0.9374951720237732, "learning_rate": 2.9939613769792265e-07, "loss": 0.5881, "step": 14089 }, { "epoch": 0.89, "grad_norm": 0.8708373308181763, "learning_rate": 2.9904653543030406e-07, "loss": 0.6102, "step": 14090 }, { "epoch": 0.89, "grad_norm": 0.926876425743103, "learning_rate": 2.98697131103482e-07, "loss": 0.5941, "step": 14091 }, { "epoch": 0.89, "grad_norm": 0.8974210619926453, "learning_rate": 2.983479247321691e-07, "loss": 0.6017, "step": 14092 }, { "epoch": 0.89, "grad_norm": 0.8967876434326172, "learning_rate": 2.979989163310704e-07, "loss": 0.5812, "step": 14093 }, { "epoch": 0.89, "grad_norm": 0.914035975933075, "learning_rate": 2.976501059148779e-07, "loss": 0.5896, "step": 14094 }, { "epoch": 0.89, "grad_norm": 0.8700651526451111, "learning_rate": 2.9730149349828265e-07, "loss": 0.4908, "step": 14095 }, { "epoch": 0.89, "grad_norm": 0.9540930390357971, "learning_rate": 2.969530790959624e-07, "loss": 0.5677, "step": 14096 }, { "epoch": 0.89, "grad_norm": 0.9152606725692749, "learning_rate": 2.9660486272258703e-07, "loss": 0.5638, "step": 14097 }, { "epoch": 0.89, "grad_norm": 0.9777679443359375, "learning_rate": 2.9625684439281875e-07, "loss": 0.5569, "step": 14098 }, { "epoch": 0.89, "grad_norm": 0.9192463159561157, "learning_rate": 2.959090241213114e-07, "loss": 0.608, "step": 14099 }, { "epoch": 0.89, "grad_norm": 0.9115022420883179, "learning_rate": 2.9556140192271045e-07, "loss": 0.6048, "step": 14100 }, { "epoch": 0.89, "grad_norm": 0.8501712679862976, "learning_rate": 2.9521397781165475e-07, "loss": 0.5831, "step": 14101 }, { "epoch": 0.89, "grad_norm": 0.8686020374298096, "learning_rate": 2.9486675180277035e-07, "loss": 0.5704, "step": 14102 }, { "epoch": 0.89, "grad_norm": 0.8605404496192932, "learning_rate": 2.9451972391067897e-07, "loss": 0.5463, "step": 14103 }, { "epoch": 0.89, "grad_norm": 0.873845636844635, "learning_rate": 2.941728941499938e-07, "loss": 0.531, "step": 14104 }, { "epoch": 0.89, "grad_norm": 0.934984564781189, "learning_rate": 2.938262625353172e-07, "loss": 0.5583, "step": 14105 }, { "epoch": 0.89, "grad_norm": 0.8914570808410645, "learning_rate": 2.934798290812446e-07, "loss": 0.6008, "step": 14106 }, { "epoch": 0.89, "grad_norm": 0.8697715401649475, "learning_rate": 2.931335938023644e-07, "loss": 0.5582, "step": 14107 }, { "epoch": 0.89, "grad_norm": 0.9065754413604736, "learning_rate": 2.9278755671325377e-07, "loss": 0.5708, "step": 14108 }, { "epoch": 0.89, "grad_norm": 0.8564165830612183, "learning_rate": 2.924417178284855e-07, "loss": 0.5854, "step": 14109 }, { "epoch": 0.89, "grad_norm": 0.8233086466789246, "learning_rate": 2.9209607716261856e-07, "loss": 0.5431, "step": 14110 }, { "epoch": 0.89, "grad_norm": 0.8898478746414185, "learning_rate": 2.917506347302079e-07, "loss": 0.5987, "step": 14111 }, { "epoch": 0.89, "grad_norm": 0.9087411761283875, "learning_rate": 2.9140539054580087e-07, "loss": 0.586, "step": 14112 }, { "epoch": 0.89, "grad_norm": 0.8777049779891968, "learning_rate": 2.9106034462393187e-07, "loss": 0.534, "step": 14113 }, { "epoch": 0.89, "grad_norm": 0.8712142109870911, "learning_rate": 2.9071549697913035e-07, "loss": 0.564, "step": 14114 }, { "epoch": 0.89, "grad_norm": 0.8860015869140625, "learning_rate": 2.9037084762591704e-07, "loss": 0.492, "step": 14115 }, { "epoch": 0.89, "grad_norm": 0.9180863499641418, "learning_rate": 2.900263965788036e-07, "loss": 0.6046, "step": 14116 }, { "epoch": 0.89, "grad_norm": 0.8687685132026672, "learning_rate": 2.8968214385229453e-07, "loss": 0.5626, "step": 14117 }, { "epoch": 0.89, "grad_norm": 0.9041091203689575, "learning_rate": 2.8933808946088383e-07, "loss": 0.5691, "step": 14118 }, { "epoch": 0.89, "grad_norm": 0.9091986417770386, "learning_rate": 2.889942334190593e-07, "loss": 0.5794, "step": 14119 }, { "epoch": 0.89, "grad_norm": 0.8923031687736511, "learning_rate": 2.8865057574129883e-07, "loss": 0.5731, "step": 14120 }, { "epoch": 0.89, "grad_norm": 0.8908477425575256, "learning_rate": 2.8830711644207257e-07, "loss": 0.5569, "step": 14121 }, { "epoch": 0.89, "grad_norm": 1.0278847217559814, "learning_rate": 2.8796385553584326e-07, "loss": 0.59, "step": 14122 }, { "epoch": 0.89, "grad_norm": 0.8459283709526062, "learning_rate": 2.8762079303706505e-07, "loss": 0.5899, "step": 14123 }, { "epoch": 0.89, "grad_norm": 0.8664804100990295, "learning_rate": 2.8727792896018015e-07, "loss": 0.56, "step": 14124 }, { "epoch": 0.89, "grad_norm": 0.8533109426498413, "learning_rate": 2.8693526331962875e-07, "loss": 0.5472, "step": 14125 }, { "epoch": 0.89, "grad_norm": 0.8533556461334229, "learning_rate": 2.865927961298376e-07, "loss": 0.5552, "step": 14126 }, { "epoch": 0.9, "grad_norm": 0.8829198479652405, "learning_rate": 2.8625052740522683e-07, "loss": 0.5775, "step": 14127 }, { "epoch": 0.9, "grad_norm": 0.92991703748703, "learning_rate": 2.859084571602083e-07, "loss": 0.5723, "step": 14128 }, { "epoch": 0.9, "grad_norm": 0.9061645269393921, "learning_rate": 2.8556658540918603e-07, "loss": 0.5473, "step": 14129 }, { "epoch": 0.9, "grad_norm": 0.8385295867919922, "learning_rate": 2.8522491216655403e-07, "loss": 0.5513, "step": 14130 }, { "epoch": 0.9, "grad_norm": 0.8606228828430176, "learning_rate": 2.848834374467002e-07, "loss": 0.5805, "step": 14131 }, { "epoch": 0.9, "grad_norm": 0.9587467908859253, "learning_rate": 2.8454216126400146e-07, "loss": 0.6116, "step": 14132 }, { "epoch": 0.9, "grad_norm": 0.9845794439315796, "learning_rate": 2.842010836328274e-07, "loss": 0.6159, "step": 14133 }, { "epoch": 0.9, "grad_norm": 0.8591241240501404, "learning_rate": 2.838602045675426e-07, "loss": 0.5634, "step": 14134 }, { "epoch": 0.9, "grad_norm": 0.916466236114502, "learning_rate": 2.8351952408249726e-07, "loss": 0.5489, "step": 14135 }, { "epoch": 0.9, "grad_norm": 0.851662278175354, "learning_rate": 2.831790421920377e-07, "loss": 0.5919, "step": 14136 }, { "epoch": 0.9, "grad_norm": 0.8946172595024109, "learning_rate": 2.828387589104997e-07, "loss": 0.6006, "step": 14137 }, { "epoch": 0.9, "grad_norm": 0.8727723360061646, "learning_rate": 2.824986742522118e-07, "loss": 0.5747, "step": 14138 }, { "epoch": 0.9, "grad_norm": 0.9003124237060547, "learning_rate": 2.8215878823149466e-07, "loss": 0.6208, "step": 14139 }, { "epoch": 0.9, "grad_norm": 0.9446995258331299, "learning_rate": 2.818191008626581e-07, "loss": 0.5331, "step": 14140 }, { "epoch": 0.9, "grad_norm": 0.8727278709411621, "learning_rate": 2.8147961216000497e-07, "loss": 0.5418, "step": 14141 }, { "epoch": 0.9, "grad_norm": 0.8375770449638367, "learning_rate": 2.8114032213783226e-07, "loss": 0.5109, "step": 14142 }, { "epoch": 0.9, "grad_norm": 0.8434416055679321, "learning_rate": 2.808012308104241e-07, "loss": 0.6113, "step": 14143 }, { "epoch": 0.9, "grad_norm": 0.8837141394615173, "learning_rate": 2.80462338192059e-07, "loss": 0.5899, "step": 14144 }, { "epoch": 0.9, "grad_norm": 0.8532682657241821, "learning_rate": 2.801236442970073e-07, "loss": 0.5329, "step": 14145 }, { "epoch": 0.9, "grad_norm": 0.8666834831237793, "learning_rate": 2.797851491395293e-07, "loss": 0.5825, "step": 14146 }, { "epoch": 0.9, "grad_norm": 0.9193355441093445, "learning_rate": 2.79446852733879e-07, "loss": 0.6144, "step": 14147 }, { "epoch": 0.9, "grad_norm": 0.862277090549469, "learning_rate": 2.791087550942995e-07, "loss": 0.5617, "step": 14148 }, { "epoch": 0.9, "grad_norm": 0.8777857422828674, "learning_rate": 2.7877085623502775e-07, "loss": 0.5741, "step": 14149 }, { "epoch": 0.9, "grad_norm": 0.9369240999221802, "learning_rate": 2.784331561702908e-07, "loss": 0.5701, "step": 14150 }, { "epoch": 0.9, "grad_norm": 0.8633100986480713, "learning_rate": 2.780956549143088e-07, "loss": 0.5204, "step": 14151 }, { "epoch": 0.9, "grad_norm": 0.8361502289772034, "learning_rate": 2.7775835248129267e-07, "loss": 0.5389, "step": 14152 }, { "epoch": 0.9, "grad_norm": 0.9618591070175171, "learning_rate": 2.7742124888544497e-07, "loss": 0.5818, "step": 14153 }, { "epoch": 0.9, "grad_norm": 0.8982853889465332, "learning_rate": 2.7708434414095875e-07, "loss": 0.5917, "step": 14154 }, { "epoch": 0.9, "grad_norm": 0.9148767590522766, "learning_rate": 2.7674763826202265e-07, "loss": 0.5946, "step": 14155 }, { "epoch": 0.9, "grad_norm": 0.8875370621681213, "learning_rate": 2.764111312628115e-07, "loss": 0.6017, "step": 14156 }, { "epoch": 0.9, "grad_norm": 0.9090349078178406, "learning_rate": 2.7607482315749554e-07, "loss": 0.5424, "step": 14157 }, { "epoch": 0.9, "grad_norm": 0.9224393963813782, "learning_rate": 2.757387139602352e-07, "loss": 0.5548, "step": 14158 }, { "epoch": 0.9, "grad_norm": 0.8845816850662231, "learning_rate": 2.754028036851836e-07, "loss": 0.5169, "step": 14159 }, { "epoch": 0.9, "grad_norm": 0.880143404006958, "learning_rate": 2.750670923464838e-07, "loss": 0.5661, "step": 14160 }, { "epoch": 0.9, "grad_norm": 0.9638619422912598, "learning_rate": 2.747315799582728e-07, "loss": 0.5205, "step": 14161 }, { "epoch": 0.9, "grad_norm": 0.87690269947052, "learning_rate": 2.7439626653467555e-07, "loss": 0.6158, "step": 14162 }, { "epoch": 0.9, "grad_norm": 0.9064611196517944, "learning_rate": 2.7406115208981345e-07, "loss": 0.5675, "step": 14163 }, { "epoch": 0.9, "grad_norm": 0.9225680232048035, "learning_rate": 2.7372623663779575e-07, "loss": 0.5312, "step": 14164 }, { "epoch": 0.9, "grad_norm": 0.8610286116600037, "learning_rate": 2.733915201927245e-07, "loss": 0.563, "step": 14165 }, { "epoch": 0.9, "grad_norm": 0.8890798091888428, "learning_rate": 2.7305700276869406e-07, "loss": 0.5899, "step": 14166 }, { "epoch": 0.9, "grad_norm": 0.8515585660934448, "learning_rate": 2.727226843797881e-07, "loss": 0.5643, "step": 14167 }, { "epoch": 0.9, "grad_norm": 0.8853866457939148, "learning_rate": 2.7238856504008594e-07, "loss": 0.6033, "step": 14168 }, { "epoch": 0.9, "grad_norm": 0.8813034296035767, "learning_rate": 2.7205464476365575e-07, "loss": 0.5382, "step": 14169 }, { "epoch": 0.9, "grad_norm": 0.9416490197181702, "learning_rate": 2.7172092356455626e-07, "loss": 0.6018, "step": 14170 }, { "epoch": 0.9, "grad_norm": 0.9497674703598022, "learning_rate": 2.7138740145684017e-07, "loss": 0.6127, "step": 14171 }, { "epoch": 0.9, "grad_norm": 0.9365571737289429, "learning_rate": 2.7105407845455124e-07, "loss": 0.631, "step": 14172 }, { "epoch": 0.9, "grad_norm": 0.8589094877243042, "learning_rate": 2.707209545717238e-07, "loss": 0.5762, "step": 14173 }, { "epoch": 0.9, "grad_norm": 0.8943716287612915, "learning_rate": 2.70388029822386e-07, "loss": 0.5679, "step": 14174 }, { "epoch": 0.9, "grad_norm": 0.9539296627044678, "learning_rate": 2.700553042205539e-07, "loss": 0.5642, "step": 14175 }, { "epoch": 0.9, "grad_norm": 0.8620119690895081, "learning_rate": 2.6972277778023913e-07, "loss": 0.5144, "step": 14176 }, { "epoch": 0.9, "grad_norm": 0.8798508644104004, "learning_rate": 2.693904505154432e-07, "loss": 0.5847, "step": 14177 }, { "epoch": 0.9, "grad_norm": 0.9159492254257202, "learning_rate": 2.690583224401588e-07, "loss": 0.5889, "step": 14178 }, { "epoch": 0.9, "grad_norm": 0.8451624512672424, "learning_rate": 2.687263935683704e-07, "loss": 0.5517, "step": 14179 }, { "epoch": 0.9, "grad_norm": 0.9503071308135986, "learning_rate": 2.6839466391405444e-07, "loss": 0.6165, "step": 14180 }, { "epoch": 0.9, "grad_norm": 0.8462880849838257, "learning_rate": 2.680631334911793e-07, "loss": 0.5567, "step": 14181 }, { "epoch": 0.9, "grad_norm": 0.94367516040802, "learning_rate": 2.677318023137049e-07, "loss": 0.6164, "step": 14182 }, { "epoch": 0.9, "grad_norm": 0.9024264216423035, "learning_rate": 2.674006703955817e-07, "loss": 0.5862, "step": 14183 }, { "epoch": 0.9, "grad_norm": 0.9006355404853821, "learning_rate": 2.670697377507514e-07, "loss": 0.5654, "step": 14184 }, { "epoch": 0.9, "grad_norm": 0.8944267630577087, "learning_rate": 2.667390043931517e-07, "loss": 0.5717, "step": 14185 }, { "epoch": 0.9, "grad_norm": 0.8411933779716492, "learning_rate": 2.664084703367059e-07, "loss": 0.5411, "step": 14186 }, { "epoch": 0.9, "grad_norm": 0.9336392283439636, "learning_rate": 2.6607813559533236e-07, "loss": 0.5433, "step": 14187 }, { "epoch": 0.9, "grad_norm": 0.8673104643821716, "learning_rate": 2.6574800018294043e-07, "loss": 0.5634, "step": 14188 }, { "epoch": 0.9, "grad_norm": 0.9110936522483826, "learning_rate": 2.654180641134313e-07, "loss": 0.5422, "step": 14189 }, { "epoch": 0.9, "grad_norm": 0.8549519777297974, "learning_rate": 2.650883274006966e-07, "loss": 0.5758, "step": 14190 }, { "epoch": 0.9, "grad_norm": 0.9573348164558411, "learning_rate": 2.6475879005862183e-07, "loss": 0.6086, "step": 14191 }, { "epoch": 0.9, "grad_norm": 0.8656295537948608, "learning_rate": 2.644294521010804e-07, "loss": 0.5346, "step": 14192 }, { "epoch": 0.9, "grad_norm": 0.9295397996902466, "learning_rate": 2.6410031354194175e-07, "loss": 0.6098, "step": 14193 }, { "epoch": 0.9, "grad_norm": 0.8475077748298645, "learning_rate": 2.6377137439506373e-07, "loss": 0.5531, "step": 14194 }, { "epoch": 0.9, "grad_norm": 0.9059809446334839, "learning_rate": 2.634426346742969e-07, "loss": 0.5737, "step": 14195 }, { "epoch": 0.9, "grad_norm": 0.9273040890693665, "learning_rate": 2.6311409439348403e-07, "loss": 0.5759, "step": 14196 }, { "epoch": 0.9, "grad_norm": 0.8897231221199036, "learning_rate": 2.6278575356645687e-07, "loss": 0.5843, "step": 14197 }, { "epoch": 0.9, "grad_norm": 0.9147869348526001, "learning_rate": 2.624576122070427e-07, "loss": 0.6142, "step": 14198 }, { "epoch": 0.9, "grad_norm": 0.8592173457145691, "learning_rate": 2.621296703290588e-07, "loss": 0.5259, "step": 14199 }, { "epoch": 0.9, "grad_norm": 0.8588521480560303, "learning_rate": 2.6180192794631133e-07, "loss": 0.5644, "step": 14200 }, { "epoch": 0.9, "grad_norm": 0.913691520690918, "learning_rate": 2.6147438507260205e-07, "loss": 0.5955, "step": 14201 }, { "epoch": 0.9, "grad_norm": 0.9359204769134521, "learning_rate": 2.611470417217227e-07, "loss": 0.563, "step": 14202 }, { "epoch": 0.9, "grad_norm": 0.8501827120780945, "learning_rate": 2.6081989790745554e-07, "loss": 0.553, "step": 14203 }, { "epoch": 0.9, "grad_norm": 0.8793197274208069, "learning_rate": 2.6049295364357684e-07, "loss": 0.6034, "step": 14204 }, { "epoch": 0.9, "grad_norm": 0.8312693238258362, "learning_rate": 2.6016620894385113e-07, "loss": 0.5353, "step": 14205 }, { "epoch": 0.9, "grad_norm": 0.8711232542991638, "learning_rate": 2.59839663822038e-07, "loss": 0.5567, "step": 14206 }, { "epoch": 0.9, "grad_norm": 0.9392272233963013, "learning_rate": 2.5951331829188797e-07, "loss": 0.5764, "step": 14207 }, { "epoch": 0.9, "grad_norm": 0.8943392634391785, "learning_rate": 2.591871723671402e-07, "loss": 0.5447, "step": 14208 }, { "epoch": 0.9, "grad_norm": 0.8029001355171204, "learning_rate": 2.5886122606152866e-07, "loss": 0.5799, "step": 14209 }, { "epoch": 0.9, "grad_norm": 0.8463373780250549, "learning_rate": 2.585354793887779e-07, "loss": 0.5161, "step": 14210 }, { "epoch": 0.9, "grad_norm": 0.9136335849761963, "learning_rate": 2.5820993236260305e-07, "loss": 0.5433, "step": 14211 }, { "epoch": 0.9, "grad_norm": 0.8141673803329468, "learning_rate": 2.5788458499671376e-07, "loss": 0.5733, "step": 14212 }, { "epoch": 0.9, "grad_norm": 0.9717278480529785, "learning_rate": 2.5755943730480735e-07, "loss": 0.5817, "step": 14213 }, { "epoch": 0.9, "grad_norm": 0.91008460521698, "learning_rate": 2.5723448930057405e-07, "loss": 0.6328, "step": 14214 }, { "epoch": 0.9, "grad_norm": 0.9435662031173706, "learning_rate": 2.569097409976995e-07, "loss": 0.5827, "step": 14215 }, { "epoch": 0.9, "grad_norm": 0.8856955766677856, "learning_rate": 2.5658519240985444e-07, "loss": 0.5993, "step": 14216 }, { "epoch": 0.9, "grad_norm": 0.8379449248313904, "learning_rate": 2.5626084355070634e-07, "loss": 0.596, "step": 14217 }, { "epoch": 0.9, "grad_norm": 0.8931264281272888, "learning_rate": 2.5593669443391145e-07, "loss": 0.6241, "step": 14218 }, { "epoch": 0.9, "grad_norm": 1.0004993677139282, "learning_rate": 2.556127450731194e-07, "loss": 0.6224, "step": 14219 }, { "epoch": 0.9, "grad_norm": 0.9444043040275574, "learning_rate": 2.552889954819704e-07, "loss": 0.56, "step": 14220 }, { "epoch": 0.9, "grad_norm": 0.9467916488647461, "learning_rate": 2.5496544567409577e-07, "loss": 0.593, "step": 14221 }, { "epoch": 0.9, "grad_norm": 0.9097285866737366, "learning_rate": 2.5464209566311847e-07, "loss": 0.5677, "step": 14222 }, { "epoch": 0.9, "grad_norm": 1.0071593523025513, "learning_rate": 2.5431894546265654e-07, "loss": 0.6015, "step": 14223 }, { "epoch": 0.9, "grad_norm": 0.8538757562637329, "learning_rate": 2.5399599508631356e-07, "loss": 0.5435, "step": 14224 }, { "epoch": 0.9, "grad_norm": 0.8897154331207275, "learning_rate": 2.5367324454768916e-07, "loss": 0.6174, "step": 14225 }, { "epoch": 0.9, "grad_norm": 0.8830700516700745, "learning_rate": 2.5335069386037414e-07, "loss": 0.604, "step": 14226 }, { "epoch": 0.9, "grad_norm": 0.8626001477241516, "learning_rate": 2.530283430379471e-07, "loss": 0.5986, "step": 14227 }, { "epoch": 0.9, "grad_norm": 0.8587076663970947, "learning_rate": 2.5270619209398497e-07, "loss": 0.5063, "step": 14228 }, { "epoch": 0.9, "grad_norm": 0.9192159175872803, "learning_rate": 2.523842410420496e-07, "loss": 0.6178, "step": 14229 }, { "epoch": 0.9, "grad_norm": 0.9127901196479797, "learning_rate": 2.5206248989569803e-07, "loss": 0.5617, "step": 14230 }, { "epoch": 0.9, "grad_norm": 0.9163671135902405, "learning_rate": 2.5174093866847826e-07, "loss": 0.5319, "step": 14231 }, { "epoch": 0.9, "grad_norm": 0.8983326554298401, "learning_rate": 2.5141958737392947e-07, "loss": 0.5889, "step": 14232 }, { "epoch": 0.9, "grad_norm": 0.9140615463256836, "learning_rate": 2.5109843602558247e-07, "loss": 0.5825, "step": 14233 }, { "epoch": 0.9, "grad_norm": 0.837908148765564, "learning_rate": 2.507774846369615e-07, "loss": 0.5451, "step": 14234 }, { "epoch": 0.9, "grad_norm": 0.9031140804290771, "learning_rate": 2.5045673322157735e-07, "loss": 0.5876, "step": 14235 }, { "epoch": 0.9, "grad_norm": 0.8454420566558838, "learning_rate": 2.501361817929393e-07, "loss": 0.5165, "step": 14236 }, { "epoch": 0.9, "grad_norm": 0.8986586332321167, "learning_rate": 2.4981583036454203e-07, "loss": 0.554, "step": 14237 }, { "epoch": 0.9, "grad_norm": 0.8993757367134094, "learning_rate": 2.494956789498759e-07, "loss": 0.5715, "step": 14238 }, { "epoch": 0.9, "grad_norm": 0.9246693253517151, "learning_rate": 2.491757275624207e-07, "loss": 0.5542, "step": 14239 }, { "epoch": 0.9, "grad_norm": 0.961254894733429, "learning_rate": 2.4885597621564896e-07, "loss": 0.6091, "step": 14240 }, { "epoch": 0.9, "grad_norm": 0.9174337387084961, "learning_rate": 2.485364249230238e-07, "loss": 0.5664, "step": 14241 }, { "epoch": 0.9, "grad_norm": 0.824385941028595, "learning_rate": 2.4821707369800163e-07, "loss": 0.5478, "step": 14242 }, { "epoch": 0.9, "grad_norm": 0.868877649307251, "learning_rate": 2.478979225540268e-07, "loss": 0.5719, "step": 14243 }, { "epoch": 0.9, "grad_norm": 0.9093589782714844, "learning_rate": 2.475789715045401e-07, "loss": 0.5828, "step": 14244 }, { "epoch": 0.9, "grad_norm": 0.881280243396759, "learning_rate": 2.472602205629698e-07, "loss": 0.5403, "step": 14245 }, { "epoch": 0.9, "grad_norm": 0.9545583128929138, "learning_rate": 2.469416697427379e-07, "loss": 0.5959, "step": 14246 }, { "epoch": 0.9, "grad_norm": 0.9496628642082214, "learning_rate": 2.466233190572581e-07, "loss": 0.5762, "step": 14247 }, { "epoch": 0.9, "grad_norm": 0.8831350803375244, "learning_rate": 2.463051685199341e-07, "loss": 0.6048, "step": 14248 }, { "epoch": 0.9, "grad_norm": 0.9093460440635681, "learning_rate": 2.4598721814416306e-07, "loss": 0.5595, "step": 14249 }, { "epoch": 0.9, "grad_norm": 0.8506335616111755, "learning_rate": 2.4566946794333247e-07, "loss": 0.579, "step": 14250 }, { "epoch": 0.9, "grad_norm": 0.9103783369064331, "learning_rate": 2.4535191793082116e-07, "loss": 0.5803, "step": 14251 }, { "epoch": 0.9, "grad_norm": 0.8295513391494751, "learning_rate": 2.4503456812e-07, "loss": 0.5156, "step": 14252 }, { "epoch": 0.9, "grad_norm": 0.956263542175293, "learning_rate": 2.447174185242324e-07, "loss": 0.5824, "step": 14253 }, { "epoch": 0.9, "grad_norm": 0.8482615947723389, "learning_rate": 2.4440046915687135e-07, "loss": 0.531, "step": 14254 }, { "epoch": 0.9, "grad_norm": 0.8663813471794128, "learning_rate": 2.4408372003126345e-07, "loss": 0.6005, "step": 14255 }, { "epoch": 0.9, "grad_norm": 0.9090369939804077, "learning_rate": 2.4376717116074533e-07, "loss": 0.5589, "step": 14256 }, { "epoch": 0.9, "grad_norm": 0.803523600101471, "learning_rate": 2.434508225586457e-07, "loss": 0.5677, "step": 14257 }, { "epoch": 0.9, "grad_norm": 0.8888107538223267, "learning_rate": 2.431346742382856e-07, "loss": 0.5486, "step": 14258 }, { "epoch": 0.9, "grad_norm": 0.874443769454956, "learning_rate": 2.428187262129761e-07, "loss": 0.5408, "step": 14259 }, { "epoch": 0.9, "grad_norm": 0.8557073473930359, "learning_rate": 2.4250297849602145e-07, "loss": 0.6157, "step": 14260 }, { "epoch": 0.9, "grad_norm": 0.890663743019104, "learning_rate": 2.421874311007155e-07, "loss": 0.5452, "step": 14261 }, { "epoch": 0.9, "grad_norm": 0.9649395942687988, "learning_rate": 2.41872084040346e-07, "loss": 0.5703, "step": 14262 }, { "epoch": 0.9, "grad_norm": 0.8820181488990784, "learning_rate": 2.4155693732819065e-07, "loss": 0.5555, "step": 14263 }, { "epoch": 0.9, "grad_norm": 0.9557092189788818, "learning_rate": 2.412419909775199e-07, "loss": 0.5643, "step": 14264 }, { "epoch": 0.9, "grad_norm": 0.8582893013954163, "learning_rate": 2.4092724500159315e-07, "loss": 0.5908, "step": 14265 }, { "epoch": 0.9, "grad_norm": 0.8829105496406555, "learning_rate": 2.406126994136654e-07, "loss": 0.5481, "step": 14266 }, { "epoch": 0.9, "grad_norm": 0.8586880564689636, "learning_rate": 2.402983542269799e-07, "loss": 0.5469, "step": 14267 }, { "epoch": 0.9, "grad_norm": 0.9591865539550781, "learning_rate": 2.3998420945477276e-07, "loss": 0.5816, "step": 14268 }, { "epoch": 0.9, "grad_norm": 0.8931147456169128, "learning_rate": 2.3967026511027224e-07, "loss": 0.6198, "step": 14269 }, { "epoch": 0.9, "grad_norm": 0.8495928645133972, "learning_rate": 2.393565212066962e-07, "loss": 0.5475, "step": 14270 }, { "epoch": 0.9, "grad_norm": 0.8455556035041809, "learning_rate": 2.3904297775725614e-07, "loss": 0.5551, "step": 14271 }, { "epoch": 0.9, "grad_norm": 0.8738865256309509, "learning_rate": 2.3872963477515497e-07, "loss": 0.5653, "step": 14272 }, { "epoch": 0.9, "grad_norm": 0.89951092004776, "learning_rate": 2.3841649227358489e-07, "loss": 0.556, "step": 14273 }, { "epoch": 0.9, "grad_norm": 0.8624039888381958, "learning_rate": 2.3810355026573195e-07, "loss": 0.5726, "step": 14274 }, { "epoch": 0.9, "grad_norm": 0.9102184176445007, "learning_rate": 2.377908087647729e-07, "loss": 0.5875, "step": 14275 }, { "epoch": 0.9, "grad_norm": 0.8978198766708374, "learning_rate": 2.374782677838766e-07, "loss": 0.5379, "step": 14276 }, { "epoch": 0.9, "grad_norm": 0.8829779028892517, "learning_rate": 2.3716592733620315e-07, "loss": 0.6237, "step": 14277 }, { "epoch": 0.9, "grad_norm": 0.9458640217781067, "learning_rate": 2.3685378743490306e-07, "loss": 0.5471, "step": 14278 }, { "epoch": 0.9, "grad_norm": 0.9152674078941345, "learning_rate": 2.3654184809312032e-07, "loss": 0.563, "step": 14279 }, { "epoch": 0.9, "grad_norm": 0.7956060171127319, "learning_rate": 2.362301093239905e-07, "loss": 0.514, "step": 14280 }, { "epoch": 0.9, "grad_norm": 0.8902352452278137, "learning_rate": 2.359185711406381e-07, "loss": 0.5497, "step": 14281 }, { "epoch": 0.9, "grad_norm": 0.8263982534408569, "learning_rate": 2.3560723355618152e-07, "loss": 0.5835, "step": 14282 }, { "epoch": 0.9, "grad_norm": 0.8512941002845764, "learning_rate": 2.3529609658373032e-07, "loss": 0.5797, "step": 14283 }, { "epoch": 0.9, "grad_norm": 0.9477369785308838, "learning_rate": 2.3498516023638562e-07, "loss": 0.5849, "step": 14284 }, { "epoch": 0.91, "grad_norm": 0.9041728377342224, "learning_rate": 2.3467442452723976e-07, "loss": 0.5284, "step": 14285 }, { "epoch": 0.91, "grad_norm": 0.9231773614883423, "learning_rate": 2.3436388946937504e-07, "loss": 0.533, "step": 14286 }, { "epoch": 0.91, "grad_norm": 0.8928778171539307, "learning_rate": 2.3405355507586992e-07, "loss": 0.5975, "step": 14287 }, { "epoch": 0.91, "grad_norm": 0.8574181795120239, "learning_rate": 2.3374342135979e-07, "loss": 0.5832, "step": 14288 }, { "epoch": 0.91, "grad_norm": 0.900062620639801, "learning_rate": 2.3343348833419377e-07, "loss": 0.6058, "step": 14289 }, { "epoch": 0.91, "grad_norm": 0.9051018953323364, "learning_rate": 2.3312375601213134e-07, "loss": 0.5408, "step": 14290 }, { "epoch": 0.91, "grad_norm": 0.9527705311775208, "learning_rate": 2.3281422440664503e-07, "loss": 0.5875, "step": 14291 }, { "epoch": 0.91, "grad_norm": 0.9164147973060608, "learning_rate": 2.3250489353076777e-07, "loss": 0.5846, "step": 14292 }, { "epoch": 0.91, "grad_norm": 0.9179802536964417, "learning_rate": 2.3219576339752525e-07, "loss": 0.58, "step": 14293 }, { "epoch": 0.91, "grad_norm": 0.9129411578178406, "learning_rate": 2.3188683401993261e-07, "loss": 0.5996, "step": 14294 }, { "epoch": 0.91, "grad_norm": 0.8692997097969055, "learning_rate": 2.3157810541099724e-07, "loss": 0.5227, "step": 14295 }, { "epoch": 0.91, "grad_norm": 0.8740088939666748, "learning_rate": 2.3126957758372149e-07, "loss": 0.5473, "step": 14296 }, { "epoch": 0.91, "grad_norm": 0.908848762512207, "learning_rate": 2.3096125055109386e-07, "loss": 0.543, "step": 14297 }, { "epoch": 0.91, "grad_norm": 0.8550407290458679, "learning_rate": 2.3065312432609788e-07, "loss": 0.557, "step": 14298 }, { "epoch": 0.91, "grad_norm": 0.852614164352417, "learning_rate": 2.3034519892170705e-07, "loss": 0.5691, "step": 14299 }, { "epoch": 0.91, "grad_norm": 0.8640037775039673, "learning_rate": 2.3003747435088764e-07, "loss": 0.5388, "step": 14300 }, { "epoch": 0.91, "grad_norm": 0.8585585355758667, "learning_rate": 2.2972995062659764e-07, "loss": 0.5929, "step": 14301 }, { "epoch": 0.91, "grad_norm": 0.9112229943275452, "learning_rate": 2.2942262776178392e-07, "loss": 0.5954, "step": 14302 }, { "epoch": 0.91, "grad_norm": 0.8441083431243896, "learning_rate": 2.291155057693878e-07, "loss": 0.571, "step": 14303 }, { "epoch": 0.91, "grad_norm": 0.922584593296051, "learning_rate": 2.2880858466234114e-07, "loss": 0.5756, "step": 14304 }, { "epoch": 0.91, "grad_norm": 0.860567569732666, "learning_rate": 2.2850186445356693e-07, "loss": 0.5948, "step": 14305 }, { "epoch": 0.91, "grad_norm": 0.8519131541252136, "learning_rate": 2.281953451559804e-07, "loss": 0.5374, "step": 14306 }, { "epoch": 0.91, "grad_norm": 0.9286757111549377, "learning_rate": 2.2788902678248904e-07, "loss": 0.6002, "step": 14307 }, { "epoch": 0.91, "grad_norm": 0.8797339797019958, "learning_rate": 2.2758290934598805e-07, "loss": 0.5811, "step": 14308 }, { "epoch": 0.91, "grad_norm": 0.871191680431366, "learning_rate": 2.2727699285937043e-07, "loss": 0.5332, "step": 14309 }, { "epoch": 0.91, "grad_norm": 1.0105799436569214, "learning_rate": 2.2697127733551483e-07, "loss": 0.6011, "step": 14310 }, { "epoch": 0.91, "grad_norm": 0.8978657722473145, "learning_rate": 2.2666576278729424e-07, "loss": 0.6085, "step": 14311 }, { "epoch": 0.91, "grad_norm": 0.9255068302154541, "learning_rate": 2.2636044922757339e-07, "loss": 0.5758, "step": 14312 }, { "epoch": 0.91, "grad_norm": 0.8957815170288086, "learning_rate": 2.2605533666920753e-07, "loss": 0.5948, "step": 14313 }, { "epoch": 0.91, "grad_norm": 0.9146010279655457, "learning_rate": 2.257504251250442e-07, "loss": 0.5328, "step": 14314 }, { "epoch": 0.91, "grad_norm": 0.8640668392181396, "learning_rate": 2.2544571460792308e-07, "loss": 0.5836, "step": 14315 }, { "epoch": 0.91, "grad_norm": 0.8612959980964661, "learning_rate": 2.251412051306717e-07, "loss": 0.5623, "step": 14316 }, { "epoch": 0.91, "grad_norm": 0.8819116950035095, "learning_rate": 2.2483689670611542e-07, "loss": 0.6256, "step": 14317 }, { "epoch": 0.91, "grad_norm": 0.9524977207183838, "learning_rate": 2.2453278934706446e-07, "loss": 0.6195, "step": 14318 }, { "epoch": 0.91, "grad_norm": 0.912262499332428, "learning_rate": 2.2422888306632584e-07, "loss": 0.5568, "step": 14319 }, { "epoch": 0.91, "grad_norm": 0.8547895550727844, "learning_rate": 2.2392517787669487e-07, "loss": 0.5127, "step": 14320 }, { "epoch": 0.91, "grad_norm": 0.9063989520072937, "learning_rate": 2.2362167379096023e-07, "loss": 0.5975, "step": 14321 }, { "epoch": 0.91, "grad_norm": 0.8552689552307129, "learning_rate": 2.2331837082190056e-07, "loss": 0.5232, "step": 14322 }, { "epoch": 0.91, "grad_norm": 0.8747928142547607, "learning_rate": 2.2301526898228842e-07, "loss": 0.5471, "step": 14323 }, { "epoch": 0.91, "grad_norm": 0.879938006401062, "learning_rate": 2.2271236828488474e-07, "loss": 0.5942, "step": 14324 }, { "epoch": 0.91, "grad_norm": 0.9146292209625244, "learning_rate": 2.224096687424443e-07, "loss": 0.57, "step": 14325 }, { "epoch": 0.91, "grad_norm": 0.8963407874107361, "learning_rate": 2.2210717036771246e-07, "loss": 0.549, "step": 14326 }, { "epoch": 0.91, "grad_norm": 0.9316021800041199, "learning_rate": 2.218048731734268e-07, "loss": 0.6144, "step": 14327 }, { "epoch": 0.91, "grad_norm": 0.8399151563644409, "learning_rate": 2.215027771723155e-07, "loss": 0.5571, "step": 14328 }, { "epoch": 0.91, "grad_norm": 0.8416658043861389, "learning_rate": 2.2120088237709946e-07, "loss": 0.5699, "step": 14329 }, { "epoch": 0.91, "grad_norm": 0.9296332597732544, "learning_rate": 2.2089918880049023e-07, "loss": 0.5445, "step": 14330 }, { "epoch": 0.91, "grad_norm": 0.8599060773849487, "learning_rate": 2.205976964551909e-07, "loss": 0.5574, "step": 14331 }, { "epoch": 0.91, "grad_norm": 0.9314765334129333, "learning_rate": 2.2029640535389586e-07, "loss": 0.5455, "step": 14332 }, { "epoch": 0.91, "grad_norm": 0.8735009431838989, "learning_rate": 2.1999531550929098e-07, "loss": 0.5763, "step": 14333 }, { "epoch": 0.91, "grad_norm": 0.8349171280860901, "learning_rate": 2.1969442693405673e-07, "loss": 0.5455, "step": 14334 }, { "epoch": 0.91, "grad_norm": 0.8516371846199036, "learning_rate": 2.1939373964085964e-07, "loss": 0.6168, "step": 14335 }, { "epoch": 0.91, "grad_norm": 0.8744125366210938, "learning_rate": 2.190932536423618e-07, "loss": 0.5543, "step": 14336 }, { "epoch": 0.91, "grad_norm": 0.9350723028182983, "learning_rate": 2.1879296895121637e-07, "loss": 0.6037, "step": 14337 }, { "epoch": 0.91, "grad_norm": 0.8729871511459351, "learning_rate": 2.1849288558006442e-07, "loss": 0.5759, "step": 14338 }, { "epoch": 0.91, "grad_norm": 0.9231459498405457, "learning_rate": 2.1819300354154526e-07, "loss": 0.6121, "step": 14339 }, { "epoch": 0.91, "grad_norm": 0.941109299659729, "learning_rate": 2.1789332284828323e-07, "loss": 0.5852, "step": 14340 }, { "epoch": 0.91, "grad_norm": 0.7934569716453552, "learning_rate": 2.175938435128977e-07, "loss": 0.501, "step": 14341 }, { "epoch": 0.91, "grad_norm": 0.8977835774421692, "learning_rate": 2.1729456554799855e-07, "loss": 0.5864, "step": 14342 }, { "epoch": 0.91, "grad_norm": 0.9556468725204468, "learning_rate": 2.1699548896618795e-07, "loss": 0.6184, "step": 14343 }, { "epoch": 0.91, "grad_norm": 0.8871400952339172, "learning_rate": 2.1669661378005802e-07, "loss": 0.55, "step": 14344 }, { "epoch": 0.91, "grad_norm": 0.8647100329399109, "learning_rate": 2.1639794000219426e-07, "loss": 0.5605, "step": 14345 }, { "epoch": 0.91, "grad_norm": 0.9198769330978394, "learning_rate": 2.1609946764517108e-07, "loss": 0.5757, "step": 14346 }, { "epoch": 0.91, "grad_norm": 0.9258266687393188, "learning_rate": 2.1580119672155898e-07, "loss": 0.6241, "step": 14347 }, { "epoch": 0.91, "grad_norm": 0.8942254781723022, "learning_rate": 2.1550312724391452e-07, "loss": 0.5967, "step": 14348 }, { "epoch": 0.91, "grad_norm": 0.8659271597862244, "learning_rate": 2.152052592247894e-07, "loss": 0.5591, "step": 14349 }, { "epoch": 0.91, "grad_norm": 0.86356520652771, "learning_rate": 2.1490759267672634e-07, "loss": 0.5706, "step": 14350 }, { "epoch": 0.91, "grad_norm": 0.9083942770957947, "learning_rate": 2.1461012761225696e-07, "loss": 0.5906, "step": 14351 }, { "epoch": 0.91, "grad_norm": 0.8626084327697754, "learning_rate": 2.143128640439085e-07, "loss": 0.5377, "step": 14352 }, { "epoch": 0.91, "grad_norm": 0.8646213412284851, "learning_rate": 2.1401580198419812e-07, "loss": 0.5453, "step": 14353 }, { "epoch": 0.91, "grad_norm": 0.8685572147369385, "learning_rate": 2.1371894144563254e-07, "loss": 0.5698, "step": 14354 }, { "epoch": 0.91, "grad_norm": 0.8844971060752869, "learning_rate": 2.1342228244071173e-07, "loss": 0.5964, "step": 14355 }, { "epoch": 0.91, "grad_norm": 0.8692290186882019, "learning_rate": 2.1312582498192792e-07, "loss": 0.5788, "step": 14356 }, { "epoch": 0.91, "grad_norm": 0.9128808975219727, "learning_rate": 2.1282956908176277e-07, "loss": 0.5831, "step": 14357 }, { "epoch": 0.91, "grad_norm": 0.8476049304008484, "learning_rate": 2.125335147526919e-07, "loss": 0.5757, "step": 14358 }, { "epoch": 0.91, "grad_norm": 0.9030495882034302, "learning_rate": 2.122376620071792e-07, "loss": 0.6114, "step": 14359 }, { "epoch": 0.91, "grad_norm": 0.9279770851135254, "learning_rate": 2.1194201085768363e-07, "loss": 0.5573, "step": 14360 }, { "epoch": 0.91, "grad_norm": 0.915275514125824, "learning_rate": 2.1164656131665407e-07, "loss": 0.5517, "step": 14361 }, { "epoch": 0.91, "grad_norm": 0.8605398535728455, "learning_rate": 2.1135131339652947e-07, "loss": 0.5765, "step": 14362 }, { "epoch": 0.91, "grad_norm": 0.8572301864624023, "learning_rate": 2.1105626710974325e-07, "loss": 0.5418, "step": 14363 }, { "epoch": 0.91, "grad_norm": 1.0193010568618774, "learning_rate": 2.1076142246871766e-07, "loss": 0.523, "step": 14364 }, { "epoch": 0.91, "grad_norm": 0.8625369668006897, "learning_rate": 2.1046677948586836e-07, "loss": 0.5679, "step": 14365 }, { "epoch": 0.91, "grad_norm": 0.878371000289917, "learning_rate": 2.1017233817360149e-07, "loss": 0.5779, "step": 14366 }, { "epoch": 0.91, "grad_norm": 0.9470487833023071, "learning_rate": 2.098780985443144e-07, "loss": 0.6326, "step": 14367 }, { "epoch": 0.91, "grad_norm": 0.8446786999702454, "learning_rate": 2.095840606103966e-07, "loss": 0.5459, "step": 14368 }, { "epoch": 0.91, "grad_norm": 0.862938642501831, "learning_rate": 2.092902243842304e-07, "loss": 0.5463, "step": 14369 }, { "epoch": 0.91, "grad_norm": 0.9745242595672607, "learning_rate": 2.0899658987818705e-07, "loss": 0.5714, "step": 14370 }, { "epoch": 0.91, "grad_norm": 0.9046638607978821, "learning_rate": 2.0870315710462996e-07, "loss": 0.5915, "step": 14371 }, { "epoch": 0.91, "grad_norm": 0.9039925932884216, "learning_rate": 2.0840992607591593e-07, "loss": 0.5574, "step": 14372 }, { "epoch": 0.91, "grad_norm": 0.9303500056266785, "learning_rate": 2.081168968043906e-07, "loss": 0.5953, "step": 14373 }, { "epoch": 0.91, "grad_norm": 0.9302768707275391, "learning_rate": 2.0782406930239363e-07, "loss": 0.6172, "step": 14374 }, { "epoch": 0.91, "grad_norm": 0.8675611019134521, "learning_rate": 2.0753144358225397e-07, "loss": 0.5759, "step": 14375 }, { "epoch": 0.91, "grad_norm": 0.9458869695663452, "learning_rate": 2.072390196562929e-07, "loss": 0.5794, "step": 14376 }, { "epoch": 0.91, "grad_norm": 0.934730052947998, "learning_rate": 2.0694679753682445e-07, "loss": 0.5584, "step": 14377 }, { "epoch": 0.91, "grad_norm": 0.8245408535003662, "learning_rate": 2.0665477723615268e-07, "loss": 0.5508, "step": 14378 }, { "epoch": 0.91, "grad_norm": 0.8302925229072571, "learning_rate": 2.063629587665733e-07, "loss": 0.5543, "step": 14379 }, { "epoch": 0.91, "grad_norm": 0.8917962908744812, "learning_rate": 2.0607134214037373e-07, "loss": 0.5144, "step": 14380 }, { "epoch": 0.91, "grad_norm": 0.830355703830719, "learning_rate": 2.05779927369833e-07, "loss": 0.5366, "step": 14381 }, { "epoch": 0.91, "grad_norm": 0.8706912994384766, "learning_rate": 2.054887144672224e-07, "loss": 0.5151, "step": 14382 }, { "epoch": 0.91, "grad_norm": 0.8185093402862549, "learning_rate": 2.0519770344480272e-07, "loss": 0.5727, "step": 14383 }, { "epoch": 0.91, "grad_norm": 0.876105785369873, "learning_rate": 2.0490689431482746e-07, "loss": 0.541, "step": 14384 }, { "epoch": 0.91, "grad_norm": 0.899113118648529, "learning_rate": 2.0461628708954183e-07, "loss": 0.6337, "step": 14385 }, { "epoch": 0.91, "grad_norm": 0.8771045207977295, "learning_rate": 2.0432588178118274e-07, "loss": 0.5753, "step": 14386 }, { "epoch": 0.91, "grad_norm": 0.8254141211509705, "learning_rate": 2.0403567840197813e-07, "loss": 0.5234, "step": 14387 }, { "epoch": 0.91, "grad_norm": 0.9506052732467651, "learning_rate": 2.0374567696414716e-07, "loss": 0.5637, "step": 14388 }, { "epoch": 0.91, "grad_norm": 0.9225839972496033, "learning_rate": 2.0345587747990004e-07, "loss": 0.5918, "step": 14389 }, { "epoch": 0.91, "grad_norm": 0.9083296656608582, "learning_rate": 2.0316627996144035e-07, "loss": 0.5388, "step": 14390 }, { "epoch": 0.91, "grad_norm": 0.9312944412231445, "learning_rate": 2.028768844209622e-07, "loss": 0.5816, "step": 14391 }, { "epoch": 0.91, "grad_norm": 0.9078527092933655, "learning_rate": 2.0258769087065034e-07, "loss": 0.62, "step": 14392 }, { "epoch": 0.91, "grad_norm": 0.8647677898406982, "learning_rate": 2.022986993226811e-07, "loss": 0.5776, "step": 14393 }, { "epoch": 0.91, "grad_norm": 0.8233307600021362, "learning_rate": 2.020099097892242e-07, "loss": 0.5451, "step": 14394 }, { "epoch": 0.91, "grad_norm": 0.8663762211799622, "learning_rate": 2.0172132228243878e-07, "loss": 0.6064, "step": 14395 }, { "epoch": 0.91, "grad_norm": 0.8814685940742493, "learning_rate": 2.014329368144774e-07, "loss": 0.5361, "step": 14396 }, { "epoch": 0.91, "grad_norm": 0.9814503788948059, "learning_rate": 2.0114475339748085e-07, "loss": 0.6066, "step": 14397 }, { "epoch": 0.91, "grad_norm": 0.8616315126419067, "learning_rate": 2.0085677204358445e-07, "loss": 0.5469, "step": 14398 }, { "epoch": 0.91, "grad_norm": 0.8662075400352478, "learning_rate": 2.005689927649157e-07, "loss": 0.601, "step": 14399 }, { "epoch": 0.91, "grad_norm": 0.8765701055526733, "learning_rate": 2.0028141557358992e-07, "loss": 0.5845, "step": 14400 }, { "epoch": 0.91, "grad_norm": 0.9240060448646545, "learning_rate": 1.999940404817169e-07, "loss": 0.5803, "step": 14401 }, { "epoch": 0.91, "grad_norm": 0.8918695449829102, "learning_rate": 1.9970686750139633e-07, "loss": 0.5592, "step": 14402 }, { "epoch": 0.91, "grad_norm": 0.8836629986763, "learning_rate": 1.994198966447214e-07, "loss": 0.6042, "step": 14403 }, { "epoch": 0.91, "grad_norm": 0.8865856528282166, "learning_rate": 1.991331279237746e-07, "loss": 0.6085, "step": 14404 }, { "epoch": 0.91, "grad_norm": 0.8742964863777161, "learning_rate": 1.988465613506302e-07, "loss": 0.5309, "step": 14405 }, { "epoch": 0.91, "grad_norm": 0.9155290126800537, "learning_rate": 1.9856019693735463e-07, "loss": 0.6035, "step": 14406 }, { "epoch": 0.91, "grad_norm": 0.8845750093460083, "learning_rate": 1.982740346960077e-07, "loss": 0.5975, "step": 14407 }, { "epoch": 0.91, "grad_norm": 1.014074683189392, "learning_rate": 1.9798807463863589e-07, "loss": 0.615, "step": 14408 }, { "epoch": 0.91, "grad_norm": 0.8332728147506714, "learning_rate": 1.977023167772818e-07, "loss": 0.551, "step": 14409 }, { "epoch": 0.91, "grad_norm": 0.8898774981498718, "learning_rate": 1.9741676112397688e-07, "loss": 0.5352, "step": 14410 }, { "epoch": 0.91, "grad_norm": 0.8795101046562195, "learning_rate": 1.9713140769074546e-07, "loss": 0.5739, "step": 14411 }, { "epoch": 0.91, "grad_norm": 0.9184449315071106, "learning_rate": 1.9684625648960287e-07, "loss": 0.5652, "step": 14412 }, { "epoch": 0.91, "grad_norm": 0.837814450263977, "learning_rate": 1.965613075325551e-07, "loss": 0.5506, "step": 14413 }, { "epoch": 0.91, "grad_norm": 0.9577629566192627, "learning_rate": 1.9627656083160085e-07, "loss": 0.6132, "step": 14414 }, { "epoch": 0.91, "grad_norm": 0.8790633678436279, "learning_rate": 1.9599201639872943e-07, "loss": 0.5509, "step": 14415 }, { "epoch": 0.91, "grad_norm": 0.8492127060890198, "learning_rate": 1.9570767424592186e-07, "loss": 0.5252, "step": 14416 }, { "epoch": 0.91, "grad_norm": 0.9555572271347046, "learning_rate": 1.9542353438515183e-07, "loss": 0.5623, "step": 14417 }, { "epoch": 0.91, "grad_norm": 0.8835919499397278, "learning_rate": 1.9513959682838314e-07, "loss": 0.5849, "step": 14418 }, { "epoch": 0.91, "grad_norm": 0.882376492023468, "learning_rate": 1.9485586158757009e-07, "loss": 0.5733, "step": 14419 }, { "epoch": 0.91, "grad_norm": 0.944187343120575, "learning_rate": 1.9457232867466204e-07, "loss": 0.6102, "step": 14420 }, { "epoch": 0.91, "grad_norm": 0.9396758079528809, "learning_rate": 1.9428899810159606e-07, "loss": 0.5872, "step": 14421 }, { "epoch": 0.91, "grad_norm": 0.98106449842453, "learning_rate": 1.9400586988030212e-07, "loss": 0.5745, "step": 14422 }, { "epoch": 0.91, "grad_norm": 0.8788214921951294, "learning_rate": 1.937229440227023e-07, "loss": 0.5746, "step": 14423 }, { "epoch": 0.91, "grad_norm": 0.8514944911003113, "learning_rate": 1.9344022054070933e-07, "loss": 0.5369, "step": 14424 }, { "epoch": 0.91, "grad_norm": 0.8359341025352478, "learning_rate": 1.9315769944622808e-07, "loss": 0.5719, "step": 14425 }, { "epoch": 0.91, "grad_norm": 0.8570429682731628, "learning_rate": 1.9287538075115463e-07, "loss": 0.6033, "step": 14426 }, { "epoch": 0.91, "grad_norm": 0.8559786081314087, "learning_rate": 1.9259326446737503e-07, "loss": 0.5631, "step": 14427 }, { "epoch": 0.91, "grad_norm": 0.8701675534248352, "learning_rate": 1.9231135060677087e-07, "loss": 0.581, "step": 14428 }, { "epoch": 0.91, "grad_norm": 0.9724037051200867, "learning_rate": 1.9202963918120988e-07, "loss": 0.6107, "step": 14429 }, { "epoch": 0.91, "grad_norm": 0.8299076557159424, "learning_rate": 1.9174813020255533e-07, "loss": 0.5938, "step": 14430 }, { "epoch": 0.91, "grad_norm": 0.84014493227005, "learning_rate": 1.9146682368266112e-07, "loss": 0.5601, "step": 14431 }, { "epoch": 0.91, "grad_norm": 0.9042718410491943, "learning_rate": 1.9118571963336996e-07, "loss": 0.5843, "step": 14432 }, { "epoch": 0.91, "grad_norm": 0.8914376497268677, "learning_rate": 1.9090481806652017e-07, "loss": 0.5966, "step": 14433 }, { "epoch": 0.91, "grad_norm": 0.832227885723114, "learning_rate": 1.9062411899393896e-07, "loss": 0.5538, "step": 14434 }, { "epoch": 0.91, "grad_norm": 0.8363732695579529, "learning_rate": 1.9034362242744576e-07, "loss": 0.5605, "step": 14435 }, { "epoch": 0.91, "grad_norm": 0.9074519872665405, "learning_rate": 1.9006332837885054e-07, "loss": 0.5895, "step": 14436 }, { "epoch": 0.91, "grad_norm": 0.9413220882415771, "learning_rate": 1.8978323685995558e-07, "loss": 0.6647, "step": 14437 }, { "epoch": 0.91, "grad_norm": 0.9103296995162964, "learning_rate": 1.8950334788255586e-07, "loss": 0.5697, "step": 14438 }, { "epoch": 0.91, "grad_norm": 0.8581710457801819, "learning_rate": 1.8922366145843585e-07, "loss": 0.5329, "step": 14439 }, { "epoch": 0.91, "grad_norm": 0.8271042704582214, "learning_rate": 1.8894417759937055e-07, "loss": 0.5779, "step": 14440 }, { "epoch": 0.91, "grad_norm": 0.8599358797073364, "learning_rate": 1.886648963171306e-07, "loss": 0.5483, "step": 14441 }, { "epoch": 0.91, "grad_norm": 0.8909770250320435, "learning_rate": 1.8838581762347485e-07, "loss": 0.5755, "step": 14442 }, { "epoch": 0.92, "grad_norm": 0.9004727005958557, "learning_rate": 1.881069415301534e-07, "loss": 0.5401, "step": 14443 }, { "epoch": 0.92, "grad_norm": 0.926632821559906, "learning_rate": 1.8782826804890908e-07, "loss": 0.6182, "step": 14444 }, { "epoch": 0.92, "grad_norm": 0.9216212630271912, "learning_rate": 1.875497971914758e-07, "loss": 0.6631, "step": 14445 }, { "epoch": 0.92, "grad_norm": 0.9242258071899414, "learning_rate": 1.872715289695798e-07, "loss": 0.5645, "step": 14446 }, { "epoch": 0.92, "grad_norm": 0.8471218943595886, "learning_rate": 1.8699346339493774e-07, "loss": 0.5925, "step": 14447 }, { "epoch": 0.92, "grad_norm": 0.8889009952545166, "learning_rate": 1.867156004792575e-07, "loss": 0.5835, "step": 14448 }, { "epoch": 0.92, "grad_norm": 0.9420746564865112, "learning_rate": 1.864379402342381e-07, "loss": 0.5516, "step": 14449 }, { "epoch": 0.92, "grad_norm": 0.8928573131561279, "learning_rate": 1.8616048267157348e-07, "loss": 0.5829, "step": 14450 }, { "epoch": 0.92, "grad_norm": 0.9594048261642456, "learning_rate": 1.8588322780294377e-07, "loss": 0.5724, "step": 14451 }, { "epoch": 0.92, "grad_norm": 0.8984374403953552, "learning_rate": 1.8560617564002458e-07, "loss": 0.5723, "step": 14452 }, { "epoch": 0.92, "grad_norm": 0.9626867175102234, "learning_rate": 1.8532932619448106e-07, "loss": 0.5672, "step": 14453 }, { "epoch": 0.92, "grad_norm": 0.8767659068107605, "learning_rate": 1.8505267947797056e-07, "loss": 0.5572, "step": 14454 }, { "epoch": 0.92, "grad_norm": 0.8869348168373108, "learning_rate": 1.847762355021421e-07, "loss": 0.5568, "step": 14455 }, { "epoch": 0.92, "grad_norm": 0.9030115604400635, "learning_rate": 1.8449999427863575e-07, "loss": 0.5726, "step": 14456 }, { "epoch": 0.92, "grad_norm": 0.8998832106590271, "learning_rate": 1.842239558190817e-07, "loss": 0.5776, "step": 14457 }, { "epoch": 0.92, "grad_norm": 0.9000546932220459, "learning_rate": 1.839481201351051e-07, "loss": 0.5754, "step": 14458 }, { "epoch": 0.92, "grad_norm": 0.9226672053337097, "learning_rate": 1.8367248723831889e-07, "loss": 0.6349, "step": 14459 }, { "epoch": 0.92, "grad_norm": 0.8481553792953491, "learning_rate": 1.833970571403293e-07, "loss": 0.5471, "step": 14460 }, { "epoch": 0.92, "grad_norm": 0.8419100046157837, "learning_rate": 1.831218298527343e-07, "loss": 0.5638, "step": 14461 }, { "epoch": 0.92, "grad_norm": 0.9119687080383301, "learning_rate": 1.828468053871213e-07, "loss": 0.6084, "step": 14462 }, { "epoch": 0.92, "grad_norm": 0.8742016553878784, "learning_rate": 1.825719837550727e-07, "loss": 0.5708, "step": 14463 }, { "epoch": 0.92, "grad_norm": 0.8338335752487183, "learning_rate": 1.822973649681592e-07, "loss": 0.5689, "step": 14464 }, { "epoch": 0.92, "grad_norm": 0.8597403168678284, "learning_rate": 1.820229490379438e-07, "loss": 0.4852, "step": 14465 }, { "epoch": 0.92, "grad_norm": 0.8943460583686829, "learning_rate": 1.8174873597598176e-07, "loss": 0.5449, "step": 14466 }, { "epoch": 0.92, "grad_norm": 0.9471714496612549, "learning_rate": 1.814747257938182e-07, "loss": 0.6154, "step": 14467 }, { "epoch": 0.92, "grad_norm": 0.8958661556243896, "learning_rate": 1.8120091850299225e-07, "loss": 0.5881, "step": 14468 }, { "epoch": 0.92, "grad_norm": 0.8442410826683044, "learning_rate": 1.809273141150325e-07, "loss": 0.5285, "step": 14469 }, { "epoch": 0.92, "grad_norm": 0.8980113863945007, "learning_rate": 1.8065391264145805e-07, "loss": 0.5612, "step": 14470 }, { "epoch": 0.92, "grad_norm": 0.8895835876464844, "learning_rate": 1.8038071409378299e-07, "loss": 0.6386, "step": 14471 }, { "epoch": 0.92, "grad_norm": 0.872721791267395, "learning_rate": 1.8010771848350983e-07, "loss": 0.6137, "step": 14472 }, { "epoch": 0.92, "grad_norm": 0.8433144688606262, "learning_rate": 1.7983492582213324e-07, "loss": 0.5736, "step": 14473 }, { "epoch": 0.92, "grad_norm": 0.9304208159446716, "learning_rate": 1.7956233612114017e-07, "loss": 0.5614, "step": 14474 }, { "epoch": 0.92, "grad_norm": 0.8683229088783264, "learning_rate": 1.792899493920075e-07, "loss": 0.5202, "step": 14475 }, { "epoch": 0.92, "grad_norm": 0.9167693257331848, "learning_rate": 1.79017765646205e-07, "loss": 0.6225, "step": 14476 }, { "epoch": 0.92, "grad_norm": 0.8281375169754028, "learning_rate": 1.78745784895194e-07, "loss": 0.5476, "step": 14477 }, { "epoch": 0.92, "grad_norm": 0.8747629523277283, "learning_rate": 1.7847400715042594e-07, "loss": 0.5737, "step": 14478 }, { "epoch": 0.92, "grad_norm": 0.916631817817688, "learning_rate": 1.7820243242334334e-07, "loss": 0.6067, "step": 14479 }, { "epoch": 0.92, "grad_norm": 0.9027150869369507, "learning_rate": 1.7793106072538423e-07, "loss": 0.5696, "step": 14480 }, { "epoch": 0.92, "grad_norm": 0.917669415473938, "learning_rate": 1.7765989206797285e-07, "loss": 0.6068, "step": 14481 }, { "epoch": 0.92, "grad_norm": 1.0007344484329224, "learning_rate": 1.7738892646252726e-07, "loss": 0.5725, "step": 14482 }, { "epoch": 0.92, "grad_norm": 0.9161766767501831, "learning_rate": 1.7711816392045778e-07, "loss": 0.5761, "step": 14483 }, { "epoch": 0.92, "grad_norm": 0.892318606376648, "learning_rate": 1.7684760445316418e-07, "loss": 0.5412, "step": 14484 }, { "epoch": 0.92, "grad_norm": 0.9114658236503601, "learning_rate": 1.765772480720407e-07, "loss": 0.5503, "step": 14485 }, { "epoch": 0.92, "grad_norm": 0.9310790300369263, "learning_rate": 1.763070947884693e-07, "loss": 0.5859, "step": 14486 }, { "epoch": 0.92, "grad_norm": 0.898358941078186, "learning_rate": 1.7603714461382481e-07, "loss": 0.6073, "step": 14487 }, { "epoch": 0.92, "grad_norm": 0.846410870552063, "learning_rate": 1.7576739755947593e-07, "loss": 0.5229, "step": 14488 }, { "epoch": 0.92, "grad_norm": 0.8562573194503784, "learning_rate": 1.7549785363677906e-07, "loss": 0.5742, "step": 14489 }, { "epoch": 0.92, "grad_norm": 0.8590916395187378, "learning_rate": 1.7522851285708465e-07, "loss": 0.5965, "step": 14490 }, { "epoch": 0.92, "grad_norm": 0.8809557557106018, "learning_rate": 1.7495937523173356e-07, "loss": 0.5514, "step": 14491 }, { "epoch": 0.92, "grad_norm": 0.9356900453567505, "learning_rate": 1.7469044077205732e-07, "loss": 0.5705, "step": 14492 }, { "epoch": 0.92, "grad_norm": 0.8484225869178772, "learning_rate": 1.744217094893813e-07, "loss": 0.5891, "step": 14493 }, { "epoch": 0.92, "grad_norm": 0.8933218121528625, "learning_rate": 1.7415318139502036e-07, "loss": 0.55, "step": 14494 }, { "epoch": 0.92, "grad_norm": 0.9217604398727417, "learning_rate": 1.7388485650028043e-07, "loss": 0.5794, "step": 14495 }, { "epoch": 0.92, "grad_norm": 0.9160068035125732, "learning_rate": 1.7361673481646025e-07, "loss": 0.5694, "step": 14496 }, { "epoch": 0.92, "grad_norm": 1.0053678750991821, "learning_rate": 1.7334881635485023e-07, "loss": 0.5796, "step": 14497 }, { "epoch": 0.92, "grad_norm": 0.934262216091156, "learning_rate": 1.7308110112673027e-07, "loss": 0.5943, "step": 14498 }, { "epoch": 0.92, "grad_norm": 0.8574293255805969, "learning_rate": 1.7281358914337408e-07, "loss": 0.5807, "step": 14499 }, { "epoch": 0.92, "grad_norm": 0.9166094064712524, "learning_rate": 1.7254628041604437e-07, "loss": 0.5807, "step": 14500 }, { "epoch": 0.92, "grad_norm": 0.8386301398277283, "learning_rate": 1.7227917495599823e-07, "loss": 0.5858, "step": 14501 }, { "epoch": 0.92, "grad_norm": 1.001879096031189, "learning_rate": 1.7201227277448108e-07, "loss": 0.6831, "step": 14502 }, { "epoch": 0.92, "grad_norm": 0.887852668762207, "learning_rate": 1.7174557388273173e-07, "loss": 0.5888, "step": 14503 }, { "epoch": 0.92, "grad_norm": 0.9143977165222168, "learning_rate": 1.7147907829198008e-07, "loss": 0.5656, "step": 14504 }, { "epoch": 0.92, "grad_norm": 0.9383800029754639, "learning_rate": 1.7121278601344715e-07, "loss": 0.6177, "step": 14505 }, { "epoch": 0.92, "grad_norm": 0.918759822845459, "learning_rate": 1.7094669705834566e-07, "loss": 0.58, "step": 14506 }, { "epoch": 0.92, "grad_norm": 0.8827510476112366, "learning_rate": 1.706808114378805e-07, "loss": 0.5384, "step": 14507 }, { "epoch": 0.92, "grad_norm": 0.8679559230804443, "learning_rate": 1.7041512916324554e-07, "loss": 0.4867, "step": 14508 }, { "epoch": 0.92, "grad_norm": 0.9122892618179321, "learning_rate": 1.7014965024562846e-07, "loss": 0.5651, "step": 14509 }, { "epoch": 0.92, "grad_norm": 0.8981230854988098, "learning_rate": 1.698843746962081e-07, "loss": 0.5893, "step": 14510 }, { "epoch": 0.92, "grad_norm": 0.9092260599136353, "learning_rate": 1.6961930252615388e-07, "loss": 0.5357, "step": 14511 }, { "epoch": 0.92, "grad_norm": 0.8531707525253296, "learning_rate": 1.6935443374662741e-07, "loss": 0.5466, "step": 14512 }, { "epoch": 0.92, "grad_norm": 0.890770673751831, "learning_rate": 1.6908976836878088e-07, "loss": 0.5673, "step": 14513 }, { "epoch": 0.92, "grad_norm": 0.9549497961997986, "learning_rate": 1.6882530640375872e-07, "loss": 0.5602, "step": 14514 }, { "epoch": 0.92, "grad_norm": 0.89844810962677, "learning_rate": 1.68561047862697e-07, "loss": 0.5751, "step": 14515 }, { "epoch": 0.92, "grad_norm": 0.9014208912849426, "learning_rate": 1.6829699275672186e-07, "loss": 0.5715, "step": 14516 }, { "epoch": 0.92, "grad_norm": 0.9853465557098389, "learning_rate": 1.6803314109695157e-07, "loss": 0.5917, "step": 14517 }, { "epoch": 0.92, "grad_norm": 0.8697071075439453, "learning_rate": 1.677694928944973e-07, "loss": 0.5795, "step": 14518 }, { "epoch": 0.92, "grad_norm": 0.9067200422286987, "learning_rate": 1.6750604816045902e-07, "loss": 0.5587, "step": 14519 }, { "epoch": 0.92, "grad_norm": 0.899541437625885, "learning_rate": 1.6724280690593008e-07, "loss": 0.5799, "step": 14520 }, { "epoch": 0.92, "grad_norm": 0.8571711778640747, "learning_rate": 1.6697976914199497e-07, "loss": 0.5425, "step": 14521 }, { "epoch": 0.92, "grad_norm": 0.8938726782798767, "learning_rate": 1.6671693487972818e-07, "loss": 0.5316, "step": 14522 }, { "epoch": 0.92, "grad_norm": 0.9331005215644836, "learning_rate": 1.6645430413019858e-07, "loss": 0.6139, "step": 14523 }, { "epoch": 0.92, "grad_norm": 0.89864182472229, "learning_rate": 1.6619187690446293e-07, "loss": 0.5949, "step": 14524 }, { "epoch": 0.92, "grad_norm": 0.8560614585876465, "learning_rate": 1.659296532135718e-07, "loss": 0.5573, "step": 14525 }, { "epoch": 0.92, "grad_norm": 0.9034044742584229, "learning_rate": 1.6566763306856638e-07, "loss": 0.5997, "step": 14526 }, { "epoch": 0.92, "grad_norm": 0.8897153735160828, "learning_rate": 1.6540581648048003e-07, "loss": 0.5859, "step": 14527 }, { "epoch": 0.92, "grad_norm": 0.8635241985321045, "learning_rate": 1.6514420346033565e-07, "loss": 0.5781, "step": 14528 }, { "epoch": 0.92, "grad_norm": 0.9002516865730286, "learning_rate": 1.6488279401915052e-07, "loss": 0.6092, "step": 14529 }, { "epoch": 0.92, "grad_norm": 0.8698979020118713, "learning_rate": 1.6462158816792973e-07, "loss": 0.5981, "step": 14530 }, { "epoch": 0.92, "grad_norm": 0.9019778966903687, "learning_rate": 1.643605859176739e-07, "loss": 0.5418, "step": 14531 }, { "epoch": 0.92, "grad_norm": 0.8013015389442444, "learning_rate": 1.6409978727937094e-07, "loss": 0.5363, "step": 14532 }, { "epoch": 0.92, "grad_norm": 0.9132005572319031, "learning_rate": 1.6383919226400368e-07, "loss": 0.5711, "step": 14533 }, { "epoch": 0.92, "grad_norm": 0.8765537738800049, "learning_rate": 1.6357880088254396e-07, "loss": 0.5648, "step": 14534 }, { "epoch": 0.92, "grad_norm": 0.8809942007064819, "learning_rate": 1.633186131459563e-07, "loss": 0.529, "step": 14535 }, { "epoch": 0.92, "grad_norm": 0.9183542728424072, "learning_rate": 1.6305862906519587e-07, "loss": 0.5666, "step": 14536 }, { "epoch": 0.92, "grad_norm": 0.9470837712287903, "learning_rate": 1.6279884865121108e-07, "loss": 0.5908, "step": 14537 }, { "epoch": 0.92, "grad_norm": 0.9300570487976074, "learning_rate": 1.6253927191493879e-07, "loss": 0.6089, "step": 14538 }, { "epoch": 0.92, "grad_norm": 0.8753145337104797, "learning_rate": 1.622798988673091e-07, "loss": 0.5747, "step": 14539 }, { "epoch": 0.92, "grad_norm": 0.8983120918273926, "learning_rate": 1.6202072951924386e-07, "loss": 0.5561, "step": 14540 }, { "epoch": 0.92, "grad_norm": 0.8710740208625793, "learning_rate": 1.6176176388165598e-07, "loss": 0.5833, "step": 14541 }, { "epoch": 0.92, "grad_norm": 0.8573417067527771, "learning_rate": 1.6150300196544955e-07, "loss": 0.5251, "step": 14542 }, { "epoch": 0.92, "grad_norm": 0.8613002300262451, "learning_rate": 1.612444437815186e-07, "loss": 0.5635, "step": 14543 }, { "epoch": 0.92, "grad_norm": 0.7859262824058533, "learning_rate": 1.6098608934075166e-07, "loss": 0.5038, "step": 14544 }, { "epoch": 0.92, "grad_norm": 0.9234058260917664, "learning_rate": 1.607279386540278e-07, "loss": 0.6108, "step": 14545 }, { "epoch": 0.92, "grad_norm": 0.9104276895523071, "learning_rate": 1.60469991732215e-07, "loss": 0.5749, "step": 14546 }, { "epoch": 0.92, "grad_norm": 0.9492734670639038, "learning_rate": 1.6021224858617513e-07, "loss": 0.5761, "step": 14547 }, { "epoch": 0.92, "grad_norm": 0.8354452848434448, "learning_rate": 1.5995470922676116e-07, "loss": 0.636, "step": 14548 }, { "epoch": 0.92, "grad_norm": 0.8883960247039795, "learning_rate": 1.5969737366481774e-07, "loss": 0.5439, "step": 14549 }, { "epoch": 0.92, "grad_norm": 0.9534339308738708, "learning_rate": 1.5944024191117958e-07, "loss": 0.6199, "step": 14550 }, { "epoch": 0.92, "grad_norm": 0.9829249382019043, "learning_rate": 1.5918331397667298e-07, "loss": 0.5705, "step": 14551 }, { "epoch": 0.92, "grad_norm": 0.8952674865722656, "learning_rate": 1.589265898721176e-07, "loss": 0.5662, "step": 14552 }, { "epoch": 0.92, "grad_norm": 0.8737239241600037, "learning_rate": 1.586700696083232e-07, "loss": 0.5791, "step": 14553 }, { "epoch": 0.92, "grad_norm": 0.9135823249816895, "learning_rate": 1.5841375319608943e-07, "loss": 0.5703, "step": 14554 }, { "epoch": 0.92, "grad_norm": 0.8945186734199524, "learning_rate": 1.5815764064621043e-07, "loss": 0.5432, "step": 14555 }, { "epoch": 0.92, "grad_norm": 0.9179873466491699, "learning_rate": 1.5790173196946924e-07, "loss": 0.5429, "step": 14556 }, { "epoch": 0.92, "grad_norm": 0.9364351630210876, "learning_rate": 1.5764602717664224e-07, "loss": 0.5607, "step": 14557 }, { "epoch": 0.92, "grad_norm": 0.8686890602111816, "learning_rate": 1.5739052627849581e-07, "loss": 0.5407, "step": 14558 }, { "epoch": 0.92, "grad_norm": 0.9229563474655151, "learning_rate": 1.571352292857875e-07, "loss": 0.523, "step": 14559 }, { "epoch": 0.92, "grad_norm": 0.9978078603744507, "learning_rate": 1.5688013620926757e-07, "loss": 0.5824, "step": 14560 }, { "epoch": 0.92, "grad_norm": 0.8744204640388489, "learning_rate": 1.566252470596774e-07, "loss": 0.5485, "step": 14561 }, { "epoch": 0.92, "grad_norm": 0.8485409617424011, "learning_rate": 1.5637056184774958e-07, "loss": 0.588, "step": 14562 }, { "epoch": 0.92, "grad_norm": 0.8641082048416138, "learning_rate": 1.5611608058420714e-07, "loss": 0.5467, "step": 14563 }, { "epoch": 0.92, "grad_norm": 0.9121243357658386, "learning_rate": 1.5586180327976598e-07, "loss": 0.5439, "step": 14564 }, { "epoch": 0.92, "grad_norm": 0.9285200834274292, "learning_rate": 1.5560772994513251e-07, "loss": 0.5821, "step": 14565 }, { "epoch": 0.92, "grad_norm": 0.868303120136261, "learning_rate": 1.55353860591006e-07, "loss": 0.5052, "step": 14566 }, { "epoch": 0.92, "grad_norm": 0.8897990584373474, "learning_rate": 1.5510019522807397e-07, "loss": 0.545, "step": 14567 }, { "epoch": 0.92, "grad_norm": 0.8986243009567261, "learning_rate": 1.5484673386701953e-07, "loss": 0.6177, "step": 14568 }, { "epoch": 0.92, "grad_norm": 0.8620361685752869, "learning_rate": 1.545934765185131e-07, "loss": 0.5743, "step": 14569 }, { "epoch": 0.92, "grad_norm": 0.8731402158737183, "learning_rate": 1.5434042319321996e-07, "loss": 0.5466, "step": 14570 }, { "epoch": 0.92, "grad_norm": 0.8847206830978394, "learning_rate": 1.5408757390179496e-07, "loss": 0.548, "step": 14571 }, { "epoch": 0.92, "grad_norm": 0.8469605445861816, "learning_rate": 1.5383492865488459e-07, "loss": 0.5754, "step": 14572 }, { "epoch": 0.92, "grad_norm": 0.8870298266410828, "learning_rate": 1.5358248746312588e-07, "loss": 0.5896, "step": 14573 }, { "epoch": 0.92, "grad_norm": 0.9190220236778259, "learning_rate": 1.533302503371503e-07, "loss": 0.5394, "step": 14574 }, { "epoch": 0.92, "grad_norm": 0.9334941506385803, "learning_rate": 1.5307821728757722e-07, "loss": 0.5883, "step": 14575 }, { "epoch": 0.92, "grad_norm": 0.9348692297935486, "learning_rate": 1.5282638832501917e-07, "loss": 0.6218, "step": 14576 }, { "epoch": 0.92, "grad_norm": 0.8661801218986511, "learning_rate": 1.5257476346007938e-07, "loss": 0.5299, "step": 14577 }, { "epoch": 0.92, "grad_norm": 0.9223611354827881, "learning_rate": 1.523233427033538e-07, "loss": 0.5646, "step": 14578 }, { "epoch": 0.92, "grad_norm": 0.8899549245834351, "learning_rate": 1.5207212606542786e-07, "loss": 0.6004, "step": 14579 }, { "epoch": 0.92, "grad_norm": 0.8400233387947083, "learning_rate": 1.518211135568809e-07, "loss": 0.5215, "step": 14580 }, { "epoch": 0.92, "grad_norm": 0.8840736746788025, "learning_rate": 1.5157030518828054e-07, "loss": 0.6081, "step": 14581 }, { "epoch": 0.92, "grad_norm": 0.8895472884178162, "learning_rate": 1.513197009701889e-07, "loss": 0.5254, "step": 14582 }, { "epoch": 0.92, "grad_norm": 0.8849679827690125, "learning_rate": 1.510693009131564e-07, "loss": 0.5798, "step": 14583 }, { "epoch": 0.92, "grad_norm": 0.8696837425231934, "learning_rate": 1.50819105027728e-07, "loss": 0.4908, "step": 14584 }, { "epoch": 0.92, "grad_norm": 0.8720855116844177, "learning_rate": 1.5056911332443801e-07, "loss": 0.6337, "step": 14585 }, { "epoch": 0.92, "grad_norm": 0.9008244276046753, "learning_rate": 1.5031932581381247e-07, "loss": 0.6017, "step": 14586 }, { "epoch": 0.92, "grad_norm": 0.8799957036972046, "learning_rate": 1.5006974250636906e-07, "loss": 0.5997, "step": 14587 }, { "epoch": 0.92, "grad_norm": 0.8658244013786316, "learning_rate": 1.498203634126183e-07, "loss": 0.5255, "step": 14588 }, { "epoch": 0.92, "grad_norm": 0.876167893409729, "learning_rate": 1.4957118854305842e-07, "loss": 0.5822, "step": 14589 }, { "epoch": 0.92, "grad_norm": 0.926031768321991, "learning_rate": 1.4932221790818268e-07, "loss": 0.5881, "step": 14590 }, { "epoch": 0.92, "grad_norm": 1.0563932657241821, "learning_rate": 1.4907345151847387e-07, "loss": 0.5859, "step": 14591 }, { "epoch": 0.92, "grad_norm": 0.832943320274353, "learning_rate": 1.4882488938440688e-07, "loss": 0.5449, "step": 14592 }, { "epoch": 0.92, "grad_norm": 0.97652667760849, "learning_rate": 1.485765315164478e-07, "loss": 0.5737, "step": 14593 }, { "epoch": 0.92, "grad_norm": 0.8609157800674438, "learning_rate": 1.483283779250544e-07, "loss": 0.5534, "step": 14594 }, { "epoch": 0.92, "grad_norm": 1.0561258792877197, "learning_rate": 1.4808042862067496e-07, "loss": 0.5937, "step": 14595 }, { "epoch": 0.92, "grad_norm": 0.9125807881355286, "learning_rate": 1.4783268361375058e-07, "loss": 0.6365, "step": 14596 }, { "epoch": 0.92, "grad_norm": 0.8677931427955627, "learning_rate": 1.4758514291471238e-07, "loss": 0.5369, "step": 14597 }, { "epoch": 0.92, "grad_norm": 1.0120726823806763, "learning_rate": 1.4733780653398254e-07, "loss": 0.5515, "step": 14598 }, { "epoch": 0.92, "grad_norm": 0.9230685234069824, "learning_rate": 1.4709067448197722e-07, "loss": 0.6248, "step": 14599 }, { "epoch": 0.92, "grad_norm": 0.8900234699249268, "learning_rate": 1.4684374676910197e-07, "loss": 0.5869, "step": 14600 }, { "epoch": 0.93, "grad_norm": 0.8961969614028931, "learning_rate": 1.4659702340575287e-07, "loss": 0.5668, "step": 14601 }, { "epoch": 0.93, "grad_norm": 0.843304455280304, "learning_rate": 1.4635050440232002e-07, "loss": 0.5334, "step": 14602 }, { "epoch": 0.93, "grad_norm": 0.8992311358451843, "learning_rate": 1.4610418976918172e-07, "loss": 0.5863, "step": 14603 }, { "epoch": 0.93, "grad_norm": 0.9412532448768616, "learning_rate": 1.4585807951671194e-07, "loss": 0.5809, "step": 14604 }, { "epoch": 0.93, "grad_norm": 0.8952974677085876, "learning_rate": 1.4561217365527124e-07, "loss": 0.6129, "step": 14605 }, { "epoch": 0.93, "grad_norm": 0.9405317306518555, "learning_rate": 1.453664721952147e-07, "loss": 0.5574, "step": 14606 }, { "epoch": 0.93, "grad_norm": 0.8412303924560547, "learning_rate": 1.451209751468885e-07, "loss": 0.5746, "step": 14607 }, { "epoch": 0.93, "grad_norm": 0.896740734577179, "learning_rate": 1.448756825206288e-07, "loss": 0.5703, "step": 14608 }, { "epoch": 0.93, "grad_norm": 0.9362704753875732, "learning_rate": 1.4463059432676395e-07, "loss": 0.618, "step": 14609 }, { "epoch": 0.93, "grad_norm": 0.89310622215271, "learning_rate": 1.4438571057561523e-07, "loss": 0.5166, "step": 14610 }, { "epoch": 0.93, "grad_norm": 0.8676783442497253, "learning_rate": 1.4414103127749157e-07, "loss": 0.5502, "step": 14611 }, { "epoch": 0.93, "grad_norm": 0.9016738533973694, "learning_rate": 1.4389655644269752e-07, "loss": 0.6399, "step": 14612 }, { "epoch": 0.93, "grad_norm": 0.9519631266593933, "learning_rate": 1.4365228608152647e-07, "loss": 0.6178, "step": 14613 }, { "epoch": 0.93, "grad_norm": 0.9060442447662354, "learning_rate": 1.4340822020426304e-07, "loss": 0.6017, "step": 14614 }, { "epoch": 0.93, "grad_norm": 0.922366201877594, "learning_rate": 1.4316435882118563e-07, "loss": 0.6266, "step": 14615 }, { "epoch": 0.93, "grad_norm": 0.9243087768554688, "learning_rate": 1.429207019425599e-07, "loss": 0.5854, "step": 14616 }, { "epoch": 0.93, "grad_norm": 0.9128119945526123, "learning_rate": 1.426772495786477e-07, "loss": 0.5616, "step": 14617 }, { "epoch": 0.93, "grad_norm": 0.8875778913497925, "learning_rate": 1.4243400173969968e-07, "loss": 0.5322, "step": 14618 }, { "epoch": 0.93, "grad_norm": 0.875525712966919, "learning_rate": 1.4219095843595654e-07, "loss": 0.5701, "step": 14619 }, { "epoch": 0.93, "grad_norm": 0.8632004857063293, "learning_rate": 1.4194811967765344e-07, "loss": 0.555, "step": 14620 }, { "epoch": 0.93, "grad_norm": 0.9275280237197876, "learning_rate": 1.417054854750155e-07, "loss": 0.5548, "step": 14621 }, { "epoch": 0.93, "grad_norm": 0.893173098564148, "learning_rate": 1.414630558382579e-07, "loss": 0.5044, "step": 14622 }, { "epoch": 0.93, "grad_norm": 0.8877639174461365, "learning_rate": 1.4122083077759087e-07, "loss": 0.556, "step": 14623 }, { "epoch": 0.93, "grad_norm": 0.8409522771835327, "learning_rate": 1.409788103032106e-07, "loss": 0.5588, "step": 14624 }, { "epoch": 0.93, "grad_norm": 0.8505089282989502, "learning_rate": 1.4073699442531007e-07, "loss": 0.5673, "step": 14625 }, { "epoch": 0.93, "grad_norm": 0.865674614906311, "learning_rate": 1.4049538315407064e-07, "loss": 0.5732, "step": 14626 }, { "epoch": 0.93, "grad_norm": 0.8754248023033142, "learning_rate": 1.4025397649966577e-07, "loss": 0.5596, "step": 14627 }, { "epoch": 0.93, "grad_norm": 0.8468300104141235, "learning_rate": 1.400127744722596e-07, "loss": 0.5757, "step": 14628 }, { "epoch": 0.93, "grad_norm": 0.9094893336296082, "learning_rate": 1.3977177708200896e-07, "loss": 0.5904, "step": 14629 }, { "epoch": 0.93, "grad_norm": 0.9047295451164246, "learning_rate": 1.395309843390613e-07, "loss": 0.584, "step": 14630 }, { "epoch": 0.93, "grad_norm": 0.8569273948669434, "learning_rate": 1.3929039625355633e-07, "loss": 0.5859, "step": 14631 }, { "epoch": 0.93, "grad_norm": 0.9276108145713806, "learning_rate": 1.3905001283562257e-07, "loss": 0.5994, "step": 14632 }, { "epoch": 0.93, "grad_norm": 0.8770352602005005, "learning_rate": 1.3880983409538252e-07, "loss": 0.562, "step": 14633 }, { "epoch": 0.93, "grad_norm": 0.9754252433776855, "learning_rate": 1.3856986004295082e-07, "loss": 0.5636, "step": 14634 }, { "epoch": 0.93, "grad_norm": 0.8698052763938904, "learning_rate": 1.3833009068842995e-07, "loss": 0.567, "step": 14635 }, { "epoch": 0.93, "grad_norm": 0.9316123723983765, "learning_rate": 1.3809052604191632e-07, "loss": 0.5865, "step": 14636 }, { "epoch": 0.93, "grad_norm": 0.9362663626670837, "learning_rate": 1.3785116611349736e-07, "loss": 0.5783, "step": 14637 }, { "epoch": 0.93, "grad_norm": 0.91670161485672, "learning_rate": 1.3761201091325172e-07, "loss": 0.5896, "step": 14638 }, { "epoch": 0.93, "grad_norm": 0.856816828250885, "learning_rate": 1.3737306045124966e-07, "loss": 0.6125, "step": 14639 }, { "epoch": 0.93, "grad_norm": 0.9262798428535461, "learning_rate": 1.3713431473755147e-07, "loss": 0.5683, "step": 14640 }, { "epoch": 0.93, "grad_norm": 0.9130752086639404, "learning_rate": 1.3689577378221019e-07, "loss": 0.6457, "step": 14641 }, { "epoch": 0.93, "grad_norm": 0.8534045815467834, "learning_rate": 1.3665743759527173e-07, "loss": 0.5155, "step": 14642 }, { "epoch": 0.93, "grad_norm": 0.9429634213447571, "learning_rate": 1.3641930618676912e-07, "loss": 0.5988, "step": 14643 }, { "epoch": 0.93, "grad_norm": 0.9114389419555664, "learning_rate": 1.3618137956673105e-07, "loss": 0.5677, "step": 14644 }, { "epoch": 0.93, "grad_norm": 0.9275795221328735, "learning_rate": 1.3594365774517447e-07, "loss": 0.5971, "step": 14645 }, { "epoch": 0.93, "grad_norm": 0.9180171489715576, "learning_rate": 1.357061407321103e-07, "loss": 0.545, "step": 14646 }, { "epoch": 0.93, "grad_norm": 0.8974061608314514, "learning_rate": 1.3546882853753885e-07, "loss": 0.5988, "step": 14647 }, { "epoch": 0.93, "grad_norm": 0.8184780478477478, "learning_rate": 1.3523172117145212e-07, "loss": 0.606, "step": 14648 }, { "epoch": 0.93, "grad_norm": 0.8791584968566895, "learning_rate": 1.349948186438349e-07, "loss": 0.5674, "step": 14649 }, { "epoch": 0.93, "grad_norm": 0.900364100933075, "learning_rate": 1.347581209646609e-07, "loss": 0.5965, "step": 14650 }, { "epoch": 0.93, "grad_norm": 0.9039656519889832, "learning_rate": 1.3452162814389824e-07, "loss": 0.5437, "step": 14651 }, { "epoch": 0.93, "grad_norm": 0.8871658444404602, "learning_rate": 1.342853401915034e-07, "loss": 0.6313, "step": 14652 }, { "epoch": 0.93, "grad_norm": 0.8789704442024231, "learning_rate": 1.3404925711742734e-07, "loss": 0.6161, "step": 14653 }, { "epoch": 0.93, "grad_norm": 0.8579655885696411, "learning_rate": 1.3381337893160818e-07, "loss": 0.5503, "step": 14654 }, { "epoch": 0.93, "grad_norm": 0.9106957316398621, "learning_rate": 1.3357770564398075e-07, "loss": 0.5624, "step": 14655 }, { "epoch": 0.93, "grad_norm": 0.8811336159706116, "learning_rate": 1.333422372644666e-07, "loss": 0.5981, "step": 14656 }, { "epoch": 0.93, "grad_norm": 0.8811172842979431, "learning_rate": 1.331069738029811e-07, "loss": 0.5696, "step": 14657 }, { "epoch": 0.93, "grad_norm": 0.8447614908218384, "learning_rate": 1.3287191526942968e-07, "loss": 0.6139, "step": 14658 }, { "epoch": 0.93, "grad_norm": 0.8404377102851868, "learning_rate": 1.3263706167371104e-07, "loss": 0.5405, "step": 14659 }, { "epoch": 0.93, "grad_norm": 0.9460970163345337, "learning_rate": 1.324024130257129e-07, "loss": 0.6109, "step": 14660 }, { "epoch": 0.93, "grad_norm": 0.862629234790802, "learning_rate": 1.3216796933531672e-07, "loss": 0.5738, "step": 14661 }, { "epoch": 0.93, "grad_norm": 0.8235554695129395, "learning_rate": 1.31933730612393e-07, "loss": 0.598, "step": 14662 }, { "epoch": 0.93, "grad_norm": 0.8776896595954895, "learning_rate": 1.316996968668044e-07, "loss": 0.5908, "step": 14663 }, { "epoch": 0.93, "grad_norm": 0.8844308257102966, "learning_rate": 1.3146586810840745e-07, "loss": 0.5464, "step": 14664 }, { "epoch": 0.93, "grad_norm": 0.9336026310920715, "learning_rate": 1.312322443470454e-07, "loss": 0.5776, "step": 14665 }, { "epoch": 0.93, "grad_norm": 0.9121221303939819, "learning_rate": 1.309988255925565e-07, "loss": 0.5844, "step": 14666 }, { "epoch": 0.93, "grad_norm": 0.9228976964950562, "learning_rate": 1.30765611854769e-07, "loss": 0.6109, "step": 14667 }, { "epoch": 0.93, "grad_norm": 0.9101218581199646, "learning_rate": 1.305326031435028e-07, "loss": 0.5677, "step": 14668 }, { "epoch": 0.93, "grad_norm": 0.9400882720947266, "learning_rate": 1.3029979946856953e-07, "loss": 0.6526, "step": 14669 }, { "epoch": 0.93, "grad_norm": 0.8727139830589294, "learning_rate": 1.3006720083977076e-07, "loss": 0.5734, "step": 14670 }, { "epoch": 0.93, "grad_norm": 0.8939454555511475, "learning_rate": 1.2983480726690033e-07, "loss": 0.5467, "step": 14671 }, { "epoch": 0.93, "grad_norm": 0.962372899055481, "learning_rate": 1.296026187597449e-07, "loss": 0.5609, "step": 14672 }, { "epoch": 0.93, "grad_norm": 0.8936379551887512, "learning_rate": 1.2937063532807992e-07, "loss": 0.5464, "step": 14673 }, { "epoch": 0.93, "grad_norm": 0.8870679140090942, "learning_rate": 1.2913885698167427e-07, "loss": 0.5659, "step": 14674 }, { "epoch": 0.93, "grad_norm": 0.901860237121582, "learning_rate": 1.2890728373028626e-07, "loss": 0.5578, "step": 14675 }, { "epoch": 0.93, "grad_norm": 0.8647616505622864, "learning_rate": 1.2867591558366755e-07, "loss": 0.5257, "step": 14676 }, { "epoch": 0.93, "grad_norm": 0.8566569089889526, "learning_rate": 1.2844475255156087e-07, "loss": 0.5479, "step": 14677 }, { "epoch": 0.93, "grad_norm": 0.8648780584335327, "learning_rate": 1.2821379464369732e-07, "loss": 0.5547, "step": 14678 }, { "epoch": 0.93, "grad_norm": 0.9167495369911194, "learning_rate": 1.2798304186980358e-07, "loss": 0.5968, "step": 14679 }, { "epoch": 0.93, "grad_norm": 0.877565860748291, "learning_rate": 1.277524942395958e-07, "loss": 0.5279, "step": 14680 }, { "epoch": 0.93, "grad_norm": 0.9053774476051331, "learning_rate": 1.275221517627806e-07, "loss": 0.5837, "step": 14681 }, { "epoch": 0.93, "grad_norm": 0.8372228145599365, "learning_rate": 1.2729201444905803e-07, "loss": 0.5663, "step": 14682 }, { "epoch": 0.93, "grad_norm": 0.8383122086524963, "learning_rate": 1.2706208230811812e-07, "loss": 0.5338, "step": 14683 }, { "epoch": 0.93, "grad_norm": 0.8852533102035522, "learning_rate": 1.2683235534964088e-07, "loss": 0.6176, "step": 14684 }, { "epoch": 0.93, "grad_norm": 0.9276344180107117, "learning_rate": 1.2660283358330195e-07, "loss": 0.5446, "step": 14685 }, { "epoch": 0.93, "grad_norm": 0.9432269334793091, "learning_rate": 1.263735170187641e-07, "loss": 0.5843, "step": 14686 }, { "epoch": 0.93, "grad_norm": 0.8667259812355042, "learning_rate": 1.26144405665683e-07, "loss": 0.5467, "step": 14687 }, { "epoch": 0.93, "grad_norm": 0.8887820243835449, "learning_rate": 1.2591549953370586e-07, "loss": 0.5782, "step": 14688 }, { "epoch": 0.93, "grad_norm": 0.908986508846283, "learning_rate": 1.2568679863247168e-07, "loss": 0.5866, "step": 14689 }, { "epoch": 0.93, "grad_norm": 0.8384619951248169, "learning_rate": 1.2545830297161e-07, "loss": 0.5293, "step": 14690 }, { "epoch": 0.93, "grad_norm": 0.8378182053565979, "learning_rate": 1.2523001256074196e-07, "loss": 0.5163, "step": 14691 }, { "epoch": 0.93, "grad_norm": 0.9510812759399414, "learning_rate": 1.2500192740947936e-07, "loss": 0.5682, "step": 14692 }, { "epoch": 0.93, "grad_norm": 0.8728382587432861, "learning_rate": 1.2477404752742784e-07, "loss": 0.5734, "step": 14693 }, { "epoch": 0.93, "grad_norm": 0.9147626757621765, "learning_rate": 1.2454637292418082e-07, "loss": 0.573, "step": 14694 }, { "epoch": 0.93, "grad_norm": 0.9553124308586121, "learning_rate": 1.2431890360932507e-07, "loss": 0.5857, "step": 14695 }, { "epoch": 0.93, "grad_norm": 0.9232133626937866, "learning_rate": 1.2409163959244019e-07, "loss": 0.5843, "step": 14696 }, { "epoch": 0.93, "grad_norm": 0.8695604801177979, "learning_rate": 1.2386458088309296e-07, "loss": 0.5436, "step": 14697 }, { "epoch": 0.93, "grad_norm": 0.9066473245620728, "learning_rate": 1.2363772749084625e-07, "loss": 0.5383, "step": 14698 }, { "epoch": 0.93, "grad_norm": 0.9268561005592346, "learning_rate": 1.2341107942525132e-07, "loss": 0.5507, "step": 14699 }, { "epoch": 0.93, "grad_norm": 0.8040413856506348, "learning_rate": 1.2318463669585112e-07, "loss": 0.5478, "step": 14700 }, { "epoch": 0.93, "grad_norm": 0.8859974145889282, "learning_rate": 1.229583993121808e-07, "loss": 0.5666, "step": 14701 }, { "epoch": 0.93, "grad_norm": 0.9229069948196411, "learning_rate": 1.2273236728376604e-07, "loss": 0.5676, "step": 14702 }, { "epoch": 0.93, "grad_norm": 0.853150486946106, "learning_rate": 1.2250654062012478e-07, "loss": 0.5378, "step": 14703 }, { "epoch": 0.93, "grad_norm": 0.8615385890007019, "learning_rate": 1.2228091933076613e-07, "loss": 0.5285, "step": 14704 }, { "epoch": 0.93, "grad_norm": 0.8250787854194641, "learning_rate": 1.2205550342518803e-07, "loss": 0.5093, "step": 14705 }, { "epoch": 0.93, "grad_norm": 0.8962552547454834, "learning_rate": 1.2183029291288452e-07, "loss": 0.5964, "step": 14706 }, { "epoch": 0.93, "grad_norm": 0.8384519815444946, "learning_rate": 1.2160528780333803e-07, "loss": 0.5196, "step": 14707 }, { "epoch": 0.93, "grad_norm": 0.8676429986953735, "learning_rate": 1.2138048810602154e-07, "loss": 0.5945, "step": 14708 }, { "epoch": 0.93, "grad_norm": 0.8354253172874451, "learning_rate": 1.2115589383040083e-07, "loss": 0.604, "step": 14709 }, { "epoch": 0.93, "grad_norm": 0.9310884475708008, "learning_rate": 1.2093150498593387e-07, "loss": 0.5702, "step": 14710 }, { "epoch": 0.93, "grad_norm": 0.878158450126648, "learning_rate": 1.2070732158206754e-07, "loss": 0.6138, "step": 14711 }, { "epoch": 0.93, "grad_norm": 0.9077056050300598, "learning_rate": 1.2048334362824265e-07, "loss": 0.6086, "step": 14712 }, { "epoch": 0.93, "grad_norm": 0.8881044983863831, "learning_rate": 1.202595711338894e-07, "loss": 0.5935, "step": 14713 }, { "epoch": 0.93, "grad_norm": 0.8508563041687012, "learning_rate": 1.2003600410842974e-07, "loss": 0.5741, "step": 14714 }, { "epoch": 0.93, "grad_norm": 0.9420803785324097, "learning_rate": 1.1981264256127832e-07, "loss": 0.5942, "step": 14715 }, { "epoch": 0.93, "grad_norm": 0.8780478239059448, "learning_rate": 1.1958948650183988e-07, "loss": 0.5893, "step": 14716 }, { "epoch": 0.93, "grad_norm": 0.8164428472518921, "learning_rate": 1.1936653593950964e-07, "loss": 0.5472, "step": 14717 }, { "epoch": 0.93, "grad_norm": 0.901099681854248, "learning_rate": 1.1914379088367677e-07, "loss": 0.5937, "step": 14718 }, { "epoch": 0.93, "grad_norm": 0.8668814897537231, "learning_rate": 1.1892125134371935e-07, "loss": 0.5709, "step": 14719 }, { "epoch": 0.93, "grad_norm": 0.9474896788597107, "learning_rate": 1.1869891732900762e-07, "loss": 0.607, "step": 14720 }, { "epoch": 0.93, "grad_norm": 0.9030824303627014, "learning_rate": 1.1847678884890467e-07, "loss": 0.5238, "step": 14721 }, { "epoch": 0.93, "grad_norm": 0.81502366065979, "learning_rate": 1.1825486591276136e-07, "loss": 0.5206, "step": 14722 }, { "epoch": 0.93, "grad_norm": 0.9036549925804138, "learning_rate": 1.1803314852992409e-07, "loss": 0.6143, "step": 14723 }, { "epoch": 0.93, "grad_norm": 0.8753872513771057, "learning_rate": 1.1781163670972762e-07, "loss": 0.571, "step": 14724 }, { "epoch": 0.93, "grad_norm": 0.9172664284706116, "learning_rate": 1.1759033046149948e-07, "loss": 0.5988, "step": 14725 }, { "epoch": 0.93, "grad_norm": 0.8600670695304871, "learning_rate": 1.1736922979455778e-07, "loss": 0.6114, "step": 14726 }, { "epoch": 0.93, "grad_norm": 0.9102545380592346, "learning_rate": 1.1714833471821175e-07, "loss": 0.612, "step": 14727 }, { "epoch": 0.93, "grad_norm": 0.8528123497962952, "learning_rate": 1.1692764524176337e-07, "loss": 0.5978, "step": 14728 }, { "epoch": 0.93, "grad_norm": 0.8206274509429932, "learning_rate": 1.1670716137450577e-07, "loss": 0.5306, "step": 14729 }, { "epoch": 0.93, "grad_norm": 0.8492806553840637, "learning_rate": 1.1648688312572099e-07, "loss": 0.5431, "step": 14730 }, { "epoch": 0.93, "grad_norm": 0.9542714357376099, "learning_rate": 1.1626681050468492e-07, "loss": 0.5932, "step": 14731 }, { "epoch": 0.93, "grad_norm": 0.9290028810501099, "learning_rate": 1.1604694352066459e-07, "loss": 0.6256, "step": 14732 }, { "epoch": 0.93, "grad_norm": 0.8055222034454346, "learning_rate": 1.1582728218291761e-07, "loss": 0.5609, "step": 14733 }, { "epoch": 0.93, "grad_norm": 0.874623715877533, "learning_rate": 1.1560782650069269e-07, "loss": 0.5352, "step": 14734 }, { "epoch": 0.93, "grad_norm": 0.9114512205123901, "learning_rate": 1.153885764832302e-07, "loss": 0.5984, "step": 14735 }, { "epoch": 0.93, "grad_norm": 0.9033612608909607, "learning_rate": 1.1516953213976278e-07, "loss": 0.602, "step": 14736 }, { "epoch": 0.93, "grad_norm": 0.905761182308197, "learning_rate": 1.1495069347951416e-07, "loss": 0.573, "step": 14737 }, { "epoch": 0.93, "grad_norm": 0.9094721078872681, "learning_rate": 1.1473206051169694e-07, "loss": 0.6089, "step": 14738 }, { "epoch": 0.93, "grad_norm": 0.875142514705658, "learning_rate": 1.1451363324551822e-07, "loss": 0.5662, "step": 14739 }, { "epoch": 0.93, "grad_norm": 0.9052537679672241, "learning_rate": 1.1429541169017511e-07, "loss": 0.5498, "step": 14740 }, { "epoch": 0.93, "grad_norm": 0.9127451777458191, "learning_rate": 1.1407739585485633e-07, "loss": 0.5891, "step": 14741 }, { "epoch": 0.93, "grad_norm": 0.9252974390983582, "learning_rate": 1.1385958574874178e-07, "loss": 0.5736, "step": 14742 }, { "epoch": 0.93, "grad_norm": 0.9759803414344788, "learning_rate": 1.1364198138100191e-07, "loss": 0.64, "step": 14743 }, { "epoch": 0.93, "grad_norm": 1.012252926826477, "learning_rate": 1.1342458276079937e-07, "loss": 0.6271, "step": 14744 }, { "epoch": 0.93, "grad_norm": 0.8845311403274536, "learning_rate": 1.1320738989728963e-07, "loss": 0.5, "step": 14745 }, { "epoch": 0.93, "grad_norm": 0.7984757423400879, "learning_rate": 1.1299040279961593e-07, "loss": 0.516, "step": 14746 }, { "epoch": 0.93, "grad_norm": 0.8785191774368286, "learning_rate": 1.1277362147691595e-07, "loss": 0.5672, "step": 14747 }, { "epoch": 0.93, "grad_norm": 0.8724797368049622, "learning_rate": 1.125570459383174e-07, "loss": 0.5881, "step": 14748 }, { "epoch": 0.93, "grad_norm": 0.8874450325965881, "learning_rate": 1.1234067619293909e-07, "loss": 0.5522, "step": 14749 }, { "epoch": 0.93, "grad_norm": 0.9328641891479492, "learning_rate": 1.1212451224989262e-07, "loss": 0.5661, "step": 14750 }, { "epoch": 0.93, "grad_norm": 0.9419904351234436, "learning_rate": 1.1190855411827906e-07, "loss": 0.5788, "step": 14751 }, { "epoch": 0.93, "grad_norm": 0.945692241191864, "learning_rate": 1.1169280180719111e-07, "loss": 0.5635, "step": 14752 }, { "epoch": 0.93, "grad_norm": 0.8278078436851501, "learning_rate": 1.114772553257154e-07, "loss": 0.5664, "step": 14753 }, { "epoch": 0.93, "grad_norm": 0.8511500358581543, "learning_rate": 1.1126191468292579e-07, "loss": 0.5296, "step": 14754 }, { "epoch": 0.93, "grad_norm": 0.921492338180542, "learning_rate": 1.1104677988789004e-07, "loss": 0.6163, "step": 14755 }, { "epoch": 0.93, "grad_norm": 0.8180157542228699, "learning_rate": 1.1083185094966753e-07, "loss": 0.519, "step": 14756 }, { "epoch": 0.93, "grad_norm": 0.9290024638175964, "learning_rate": 1.1061712787730716e-07, "loss": 0.5699, "step": 14757 }, { "epoch": 0.93, "grad_norm": 0.8530245423316956, "learning_rate": 1.1040261067985114e-07, "loss": 0.5732, "step": 14758 }, { "epoch": 0.94, "grad_norm": 0.8908236026763916, "learning_rate": 1.1018829936633113e-07, "loss": 0.6248, "step": 14759 }, { "epoch": 0.94, "grad_norm": 0.9305959939956665, "learning_rate": 1.0997419394577158e-07, "loss": 0.6109, "step": 14760 }, { "epoch": 0.94, "grad_norm": 0.8903090357780457, "learning_rate": 1.0976029442718694e-07, "loss": 0.5836, "step": 14761 }, { "epoch": 0.94, "grad_norm": 0.9302195310592651, "learning_rate": 1.0954660081958502e-07, "loss": 0.5357, "step": 14762 }, { "epoch": 0.94, "grad_norm": 0.9230768084526062, "learning_rate": 1.0933311313196304e-07, "loss": 0.6302, "step": 14763 }, { "epoch": 0.94, "grad_norm": 0.876150906085968, "learning_rate": 1.091198313733105e-07, "loss": 0.5398, "step": 14764 }, { "epoch": 0.94, "grad_norm": 0.8719222545623779, "learning_rate": 1.0890675555260688e-07, "loss": 0.5634, "step": 14765 }, { "epoch": 0.94, "grad_norm": 0.8793936967849731, "learning_rate": 1.086938856788261e-07, "loss": 0.558, "step": 14766 }, { "epoch": 0.94, "grad_norm": 0.9724277257919312, "learning_rate": 1.0848122176092935e-07, "loss": 0.5401, "step": 14767 }, { "epoch": 0.94, "grad_norm": 0.8842143416404724, "learning_rate": 1.0826876380787221e-07, "loss": 0.5905, "step": 14768 }, { "epoch": 0.94, "grad_norm": 0.8512188196182251, "learning_rate": 1.0805651182860033e-07, "loss": 0.5681, "step": 14769 }, { "epoch": 0.94, "grad_norm": 0.8788979649543762, "learning_rate": 1.0784446583205099e-07, "loss": 0.5593, "step": 14770 }, { "epoch": 0.94, "grad_norm": 0.9469314217567444, "learning_rate": 1.0763262582715206e-07, "loss": 0.6131, "step": 14771 }, { "epoch": 0.94, "grad_norm": 0.8478160500526428, "learning_rate": 1.0742099182282529e-07, "loss": 0.5218, "step": 14772 }, { "epoch": 0.94, "grad_norm": 0.8808424472808838, "learning_rate": 1.0720956382797965e-07, "loss": 0.5471, "step": 14773 }, { "epoch": 0.94, "grad_norm": 0.9337725639343262, "learning_rate": 1.0699834185151802e-07, "loss": 0.5696, "step": 14774 }, { "epoch": 0.94, "grad_norm": 0.9188077449798584, "learning_rate": 1.0678732590233553e-07, "loss": 0.5836, "step": 14775 }, { "epoch": 0.94, "grad_norm": 0.8440690040588379, "learning_rate": 1.0657651598931563e-07, "loss": 0.5362, "step": 14776 }, { "epoch": 0.94, "grad_norm": 0.886298418045044, "learning_rate": 1.0636591212133673e-07, "loss": 0.5703, "step": 14777 }, { "epoch": 0.94, "grad_norm": 0.8677876591682434, "learning_rate": 1.0615551430726456e-07, "loss": 0.581, "step": 14778 }, { "epoch": 0.94, "grad_norm": 0.8505701422691345, "learning_rate": 1.0594532255595979e-07, "loss": 0.554, "step": 14779 }, { "epoch": 0.94, "grad_norm": 0.8900310397148132, "learning_rate": 1.0573533687627258e-07, "loss": 0.5679, "step": 14780 }, { "epoch": 0.94, "grad_norm": 0.7801492214202881, "learning_rate": 1.0552555727704417e-07, "loss": 0.5483, "step": 14781 }, { "epoch": 0.94, "grad_norm": 0.9094178676605225, "learning_rate": 1.053159837671075e-07, "loss": 0.5976, "step": 14782 }, { "epoch": 0.94, "grad_norm": 0.8431292772293091, "learning_rate": 1.0510661635528774e-07, "loss": 0.5384, "step": 14783 }, { "epoch": 0.94, "grad_norm": 0.9035899639129639, "learning_rate": 1.0489745505040006e-07, "loss": 0.5172, "step": 14784 }, { "epoch": 0.94, "grad_norm": 0.849152147769928, "learning_rate": 1.0468849986125185e-07, "loss": 0.5678, "step": 14785 }, { "epoch": 0.94, "grad_norm": 0.8912017941474915, "learning_rate": 1.0447975079664163e-07, "loss": 0.556, "step": 14786 }, { "epoch": 0.94, "grad_norm": 0.87160325050354, "learning_rate": 1.042712078653585e-07, "loss": 0.5773, "step": 14787 }, { "epoch": 0.94, "grad_norm": 0.8713656067848206, "learning_rate": 1.0406287107618429e-07, "loss": 0.5917, "step": 14788 }, { "epoch": 0.94, "grad_norm": 0.8525165319442749, "learning_rate": 1.0385474043789034e-07, "loss": 0.5386, "step": 14789 }, { "epoch": 0.94, "grad_norm": 0.8784705996513367, "learning_rate": 1.0364681595924131e-07, "loss": 0.5234, "step": 14790 }, { "epoch": 0.94, "grad_norm": 0.8679460287094116, "learning_rate": 1.034390976489913e-07, "loss": 0.5367, "step": 14791 }, { "epoch": 0.94, "grad_norm": 0.8518469929695129, "learning_rate": 1.0323158551588663e-07, "loss": 0.5427, "step": 14792 }, { "epoch": 0.94, "grad_norm": 0.8853098750114441, "learning_rate": 1.030242795686659e-07, "loss": 0.5349, "step": 14793 }, { "epoch": 0.94, "grad_norm": 0.9167693257331848, "learning_rate": 1.0281717981605765e-07, "loss": 0.5876, "step": 14794 }, { "epoch": 0.94, "grad_norm": 0.8666211366653442, "learning_rate": 1.0261028626678104e-07, "loss": 0.491, "step": 14795 }, { "epoch": 0.94, "grad_norm": 0.8543499708175659, "learning_rate": 1.024035989295491e-07, "loss": 0.5786, "step": 14796 }, { "epoch": 0.94, "grad_norm": 0.9377774000167847, "learning_rate": 1.0219711781306374e-07, "loss": 0.6073, "step": 14797 }, { "epoch": 0.94, "grad_norm": 0.9474400877952576, "learning_rate": 1.0199084292602024e-07, "loss": 0.5722, "step": 14798 }, { "epoch": 0.94, "grad_norm": 0.9373930096626282, "learning_rate": 1.0178477427710276e-07, "loss": 0.6303, "step": 14799 }, { "epoch": 0.94, "grad_norm": 0.9386447668075562, "learning_rate": 1.015789118749888e-07, "loss": 0.5895, "step": 14800 }, { "epoch": 0.94, "grad_norm": 0.8682166337966919, "learning_rate": 1.0137325572834644e-07, "loss": 0.5264, "step": 14801 }, { "epoch": 0.94, "grad_norm": 0.9619151949882507, "learning_rate": 1.0116780584583596e-07, "loss": 0.5927, "step": 14802 }, { "epoch": 0.94, "grad_norm": 0.8697635531425476, "learning_rate": 1.0096256223610657e-07, "loss": 0.5474, "step": 14803 }, { "epoch": 0.94, "grad_norm": 0.9294276833534241, "learning_rate": 1.0075752490780133e-07, "loss": 0.6086, "step": 14804 }, { "epoch": 0.94, "grad_norm": 0.8718865513801575, "learning_rate": 1.0055269386955391e-07, "loss": 0.5709, "step": 14805 }, { "epoch": 0.94, "grad_norm": 0.8805193305015564, "learning_rate": 1.0034806912998796e-07, "loss": 0.5771, "step": 14806 }, { "epoch": 0.94, "grad_norm": 0.8892708420753479, "learning_rate": 1.0014365069772102e-07, "loss": 0.5927, "step": 14807 }, { "epoch": 0.94, "grad_norm": 0.878617525100708, "learning_rate": 9.993943858135846e-08, "loss": 0.632, "step": 14808 }, { "epoch": 0.94, "grad_norm": 0.9322656393051147, "learning_rate": 9.973543278950115e-08, "loss": 0.5679, "step": 14809 }, { "epoch": 0.94, "grad_norm": 1.0550402402877808, "learning_rate": 9.953163333073779e-08, "loss": 0.6446, "step": 14810 }, { "epoch": 0.94, "grad_norm": 0.8860265016555786, "learning_rate": 9.932804021364928e-08, "loss": 0.5912, "step": 14811 }, { "epoch": 0.94, "grad_norm": 0.9326887726783752, "learning_rate": 9.912465344680933e-08, "loss": 0.5793, "step": 14812 }, { "epoch": 0.94, "grad_norm": 0.7893259525299072, "learning_rate": 9.892147303878108e-08, "loss": 0.4661, "step": 14813 }, { "epoch": 0.94, "grad_norm": 0.8492047786712646, "learning_rate": 9.871849899811991e-08, "loss": 0.5652, "step": 14814 }, { "epoch": 0.94, "grad_norm": 0.9137168526649475, "learning_rate": 9.851573133337288e-08, "loss": 0.6163, "step": 14815 }, { "epoch": 0.94, "grad_norm": 0.8527875542640686, "learning_rate": 9.83131700530765e-08, "loss": 0.5876, "step": 14816 }, { "epoch": 0.94, "grad_norm": 0.9483136534690857, "learning_rate": 9.81108151657617e-08, "loss": 0.5822, "step": 14817 }, { "epoch": 0.94, "grad_norm": 0.9227954149246216, "learning_rate": 9.790866667994781e-08, "loss": 0.589, "step": 14818 }, { "epoch": 0.94, "grad_norm": 0.9560403823852539, "learning_rate": 9.770672460414688e-08, "loss": 0.6438, "step": 14819 }, { "epoch": 0.94, "grad_norm": 0.9232782125473022, "learning_rate": 9.750498894686156e-08, "loss": 0.5775, "step": 14820 }, { "epoch": 0.94, "grad_norm": 0.9010851383209229, "learning_rate": 9.730345971658728e-08, "loss": 0.5361, "step": 14821 }, { "epoch": 0.94, "grad_norm": 0.8730600476264954, "learning_rate": 9.710213692180836e-08, "loss": 0.5925, "step": 14822 }, { "epoch": 0.94, "grad_norm": 0.9010719060897827, "learning_rate": 9.690102057100304e-08, "loss": 0.5806, "step": 14823 }, { "epoch": 0.94, "grad_norm": 0.8896587491035461, "learning_rate": 9.670011067263896e-08, "loss": 0.538, "step": 14824 }, { "epoch": 0.94, "grad_norm": 0.8645528554916382, "learning_rate": 9.649940723517549e-08, "loss": 0.5407, "step": 14825 }, { "epoch": 0.94, "grad_norm": 0.9243733286857605, "learning_rate": 9.629891026706472e-08, "loss": 0.5227, "step": 14826 }, { "epoch": 0.94, "grad_norm": 0.8660601377487183, "learning_rate": 9.609861977674773e-08, "loss": 0.5238, "step": 14827 }, { "epoch": 0.94, "grad_norm": 0.8025797605514526, "learning_rate": 9.589853577265829e-08, "loss": 0.5363, "step": 14828 }, { "epoch": 0.94, "grad_norm": 0.8957266807556152, "learning_rate": 9.569865826322133e-08, "loss": 0.5925, "step": 14829 }, { "epoch": 0.94, "grad_norm": 0.8849166035652161, "learning_rate": 9.549898725685291e-08, "loss": 0.5775, "step": 14830 }, { "epoch": 0.94, "grad_norm": 0.8966931700706482, "learning_rate": 9.52995227619613e-08, "loss": 0.5489, "step": 14831 }, { "epoch": 0.94, "grad_norm": 0.8650779724121094, "learning_rate": 9.510026478694423e-08, "loss": 0.5734, "step": 14832 }, { "epoch": 0.94, "grad_norm": 0.799005925655365, "learning_rate": 9.49012133401922e-08, "loss": 0.5848, "step": 14833 }, { "epoch": 0.94, "grad_norm": 0.8896704912185669, "learning_rate": 9.47023684300863e-08, "loss": 0.6051, "step": 14834 }, { "epoch": 0.94, "grad_norm": 0.9791715145111084, "learning_rate": 9.450373006499924e-08, "loss": 0.5622, "step": 14835 }, { "epoch": 0.94, "grad_norm": 0.9830961227416992, "learning_rate": 9.430529825329492e-08, "loss": 0.5459, "step": 14836 }, { "epoch": 0.94, "grad_norm": 0.8603661060333252, "learning_rate": 9.410707300333e-08, "loss": 0.6293, "step": 14837 }, { "epoch": 0.94, "grad_norm": 0.8368241190910339, "learning_rate": 9.390905432344833e-08, "loss": 0.5623, "step": 14838 }, { "epoch": 0.94, "grad_norm": 0.9664581418037415, "learning_rate": 9.371124222199046e-08, "loss": 0.5745, "step": 14839 }, { "epoch": 0.94, "grad_norm": 0.9014714360237122, "learning_rate": 9.35136367072842e-08, "loss": 0.5557, "step": 14840 }, { "epoch": 0.94, "grad_norm": 0.9360548257827759, "learning_rate": 9.331623778765009e-08, "loss": 0.5281, "step": 14841 }, { "epoch": 0.94, "grad_norm": 0.938177764415741, "learning_rate": 9.311904547139982e-08, "loss": 0.5604, "step": 14842 }, { "epoch": 0.94, "grad_norm": 0.9227628707885742, "learning_rate": 9.292205976683733e-08, "loss": 0.5932, "step": 14843 }, { "epoch": 0.94, "grad_norm": 0.8790847063064575, "learning_rate": 9.272528068225595e-08, "loss": 0.5774, "step": 14844 }, { "epoch": 0.94, "grad_norm": 0.8632011413574219, "learning_rate": 9.252870822594239e-08, "loss": 0.574, "step": 14845 }, { "epoch": 0.94, "grad_norm": 0.8468140959739685, "learning_rate": 9.233234240617228e-08, "loss": 0.5549, "step": 14846 }, { "epoch": 0.94, "grad_norm": 0.878075897693634, "learning_rate": 9.213618323121564e-08, "loss": 0.589, "step": 14847 }, { "epoch": 0.94, "grad_norm": 0.8827310800552368, "learning_rate": 9.19402307093309e-08, "loss": 0.5678, "step": 14848 }, { "epoch": 0.94, "grad_norm": 0.9109396934509277, "learning_rate": 9.174448484876864e-08, "loss": 0.5426, "step": 14849 }, { "epoch": 0.94, "grad_norm": 0.8124753832817078, "learning_rate": 9.154894565777173e-08, "loss": 0.486, "step": 14850 }, { "epoch": 0.94, "grad_norm": 0.9251282215118408, "learning_rate": 9.135361314457358e-08, "loss": 0.5975, "step": 14851 }, { "epoch": 0.94, "grad_norm": 0.8516286015510559, "learning_rate": 9.115848731739874e-08, "loss": 0.5802, "step": 14852 }, { "epoch": 0.94, "grad_norm": 0.9649109244346619, "learning_rate": 9.096356818446395e-08, "loss": 0.6226, "step": 14853 }, { "epoch": 0.94, "grad_norm": 0.8949142694473267, "learning_rate": 9.076885575397543e-08, "loss": 0.5577, "step": 14854 }, { "epoch": 0.94, "grad_norm": 0.8870638012886047, "learning_rate": 9.057435003413273e-08, "loss": 0.619, "step": 14855 }, { "epoch": 0.94, "grad_norm": 0.911157488822937, "learning_rate": 9.038005103312486e-08, "loss": 0.5763, "step": 14856 }, { "epoch": 0.94, "grad_norm": 0.9567490816116333, "learning_rate": 9.018595875913416e-08, "loss": 0.5394, "step": 14857 }, { "epoch": 0.94, "grad_norm": 0.9272708296775818, "learning_rate": 8.999207322033299e-08, "loss": 0.5666, "step": 14858 }, { "epoch": 0.94, "grad_norm": 0.8706281781196594, "learning_rate": 8.979839442488425e-08, "loss": 0.5775, "step": 14859 }, { "epoch": 0.94, "grad_norm": 0.8722688555717468, "learning_rate": 8.960492238094421e-08, "loss": 0.511, "step": 14860 }, { "epoch": 0.94, "grad_norm": 0.8723371624946594, "learning_rate": 8.941165709665966e-08, "loss": 0.5932, "step": 14861 }, { "epoch": 0.94, "grad_norm": 0.8604128360748291, "learning_rate": 8.921859858016635e-08, "loss": 0.5499, "step": 14862 }, { "epoch": 0.94, "grad_norm": 0.9421664476394653, "learning_rate": 8.902574683959442e-08, "loss": 0.5585, "step": 14863 }, { "epoch": 0.94, "grad_norm": 0.8716034889221191, "learning_rate": 8.883310188306515e-08, "loss": 0.5443, "step": 14864 }, { "epoch": 0.94, "grad_norm": 0.9441227316856384, "learning_rate": 8.864066371868873e-08, "loss": 0.6073, "step": 14865 }, { "epoch": 0.94, "grad_norm": 0.925212562084198, "learning_rate": 8.844843235456868e-08, "loss": 0.5865, "step": 14866 }, { "epoch": 0.94, "grad_norm": 0.8931495547294617, "learning_rate": 8.825640779879962e-08, "loss": 0.516, "step": 14867 }, { "epoch": 0.94, "grad_norm": 0.8108246326446533, "learning_rate": 8.806459005946565e-08, "loss": 0.5725, "step": 14868 }, { "epoch": 0.94, "grad_norm": 0.92397141456604, "learning_rate": 8.787297914464533e-08, "loss": 0.5473, "step": 14869 }, { "epoch": 0.94, "grad_norm": 0.9079901576042175, "learning_rate": 8.768157506240494e-08, "loss": 0.616, "step": 14870 }, { "epoch": 0.94, "grad_norm": 0.9255422353744507, "learning_rate": 8.749037782080528e-08, "loss": 0.6009, "step": 14871 }, { "epoch": 0.94, "grad_norm": 0.8876083493232727, "learning_rate": 8.729938742789601e-08, "loss": 0.5668, "step": 14872 }, { "epoch": 0.94, "grad_norm": 0.8934264183044434, "learning_rate": 8.71086038917196e-08, "loss": 0.6096, "step": 14873 }, { "epoch": 0.94, "grad_norm": 0.9331870079040527, "learning_rate": 8.691802722030906e-08, "loss": 0.5824, "step": 14874 }, { "epoch": 0.94, "grad_norm": 0.8458074927330017, "learning_rate": 8.672765742168964e-08, "loss": 0.5393, "step": 14875 }, { "epoch": 0.94, "grad_norm": 0.9242926836013794, "learning_rate": 8.65374945038755e-08, "loss": 0.5651, "step": 14876 }, { "epoch": 0.94, "grad_norm": 0.9075822830200195, "learning_rate": 8.634753847487575e-08, "loss": 0.6445, "step": 14877 }, { "epoch": 0.94, "grad_norm": 0.8882941603660583, "learning_rate": 8.615778934268793e-08, "loss": 0.6056, "step": 14878 }, { "epoch": 0.94, "grad_norm": 0.8262979984283447, "learning_rate": 8.59682471153006e-08, "loss": 0.5354, "step": 14879 }, { "epoch": 0.94, "grad_norm": 0.8280760645866394, "learning_rate": 8.577891180069687e-08, "loss": 0.5311, "step": 14880 }, { "epoch": 0.94, "grad_norm": 0.9035456776618958, "learning_rate": 8.558978340684642e-08, "loss": 0.572, "step": 14881 }, { "epoch": 0.94, "grad_norm": 0.8470786213874817, "learning_rate": 8.540086194171515e-08, "loss": 0.5889, "step": 14882 }, { "epoch": 0.94, "grad_norm": 0.8594496250152588, "learning_rate": 8.521214741325722e-08, "loss": 0.5709, "step": 14883 }, { "epoch": 0.94, "grad_norm": 0.8812367916107178, "learning_rate": 8.502363982941797e-08, "loss": 0.5249, "step": 14884 }, { "epoch": 0.94, "grad_norm": 0.8610761165618896, "learning_rate": 8.483533919813546e-08, "loss": 0.5788, "step": 14885 }, { "epoch": 0.94, "grad_norm": 0.882064700126648, "learning_rate": 8.464724552733782e-08, "loss": 0.6047, "step": 14886 }, { "epoch": 0.94, "grad_norm": 0.9064013361930847, "learning_rate": 8.445935882494593e-08, "loss": 0.5604, "step": 14887 }, { "epoch": 0.94, "grad_norm": 0.9299684166908264, "learning_rate": 8.427167909887069e-08, "loss": 0.5411, "step": 14888 }, { "epoch": 0.94, "grad_norm": 0.9375229477882385, "learning_rate": 8.408420635701353e-08, "loss": 0.5626, "step": 14889 }, { "epoch": 0.94, "grad_norm": 0.9137569665908813, "learning_rate": 8.389694060726927e-08, "loss": 0.6039, "step": 14890 }, { "epoch": 0.94, "grad_norm": 0.9540867209434509, "learning_rate": 8.370988185752383e-08, "loss": 0.631, "step": 14891 }, { "epoch": 0.94, "grad_norm": 0.8793188333511353, "learning_rate": 8.352303011565254e-08, "loss": 0.5763, "step": 14892 }, { "epoch": 0.94, "grad_norm": 0.880684494972229, "learning_rate": 8.333638538952305e-08, "loss": 0.5755, "step": 14893 }, { "epoch": 0.94, "grad_norm": 0.8908638954162598, "learning_rate": 8.314994768699458e-08, "loss": 0.5873, "step": 14894 }, { "epoch": 0.94, "grad_norm": 0.9388841986656189, "learning_rate": 8.296371701591699e-08, "loss": 0.5329, "step": 14895 }, { "epoch": 0.94, "grad_norm": 0.8881575465202332, "learning_rate": 8.277769338413288e-08, "loss": 0.6017, "step": 14896 }, { "epoch": 0.94, "grad_norm": 0.8779671788215637, "learning_rate": 8.259187679947434e-08, "loss": 0.5545, "step": 14897 }, { "epoch": 0.94, "grad_norm": 0.874380350112915, "learning_rate": 8.240626726976453e-08, "loss": 0.5587, "step": 14898 }, { "epoch": 0.94, "grad_norm": 0.9045870900154114, "learning_rate": 8.222086480282054e-08, "loss": 0.5667, "step": 14899 }, { "epoch": 0.94, "grad_norm": 0.9012387990951538, "learning_rate": 8.20356694064478e-08, "loss": 0.6325, "step": 14900 }, { "epoch": 0.94, "grad_norm": 0.8285881280899048, "learning_rate": 8.185068108844507e-08, "loss": 0.5491, "step": 14901 }, { "epoch": 0.94, "grad_norm": 0.8597615361213684, "learning_rate": 8.166589985660056e-08, "loss": 0.5429, "step": 14902 }, { "epoch": 0.94, "grad_norm": 0.9608265161514282, "learning_rate": 8.148132571869582e-08, "loss": 0.6108, "step": 14903 }, { "epoch": 0.94, "grad_norm": 0.8628665208816528, "learning_rate": 8.129695868250242e-08, "loss": 0.5564, "step": 14904 }, { "epoch": 0.94, "grad_norm": 0.8341482877731323, "learning_rate": 8.111279875578304e-08, "loss": 0.5658, "step": 14905 }, { "epoch": 0.94, "grad_norm": 0.8764296770095825, "learning_rate": 8.092884594629147e-08, "loss": 0.582, "step": 14906 }, { "epoch": 0.94, "grad_norm": 0.8712512254714966, "learning_rate": 8.074510026177485e-08, "loss": 0.5598, "step": 14907 }, { "epoch": 0.94, "grad_norm": 1.110312581062317, "learning_rate": 8.056156170996866e-08, "loss": 0.6074, "step": 14908 }, { "epoch": 0.94, "grad_norm": 0.8867812156677246, "learning_rate": 8.03782302986017e-08, "loss": 0.5814, "step": 14909 }, { "epoch": 0.94, "grad_norm": 0.9579918384552002, "learning_rate": 8.019510603539338e-08, "loss": 0.5672, "step": 14910 }, { "epoch": 0.94, "grad_norm": 0.8660980463027954, "learning_rate": 8.001218892805474e-08, "loss": 0.569, "step": 14911 }, { "epoch": 0.94, "grad_norm": 0.9106853604316711, "learning_rate": 7.982947898428739e-08, "loss": 0.577, "step": 14912 }, { "epoch": 0.94, "grad_norm": 0.8973606824874878, "learning_rate": 7.964697621178463e-08, "loss": 0.623, "step": 14913 }, { "epoch": 0.94, "grad_norm": 0.8993417024612427, "learning_rate": 7.946468061823031e-08, "loss": 0.5553, "step": 14914 }, { "epoch": 0.94, "grad_norm": 0.9079226851463318, "learning_rate": 7.928259221130163e-08, "loss": 0.5785, "step": 14915 }, { "epoch": 0.95, "grad_norm": 0.8791465759277344, "learning_rate": 7.910071099866523e-08, "loss": 0.6139, "step": 14916 }, { "epoch": 0.95, "grad_norm": 0.8370904326438904, "learning_rate": 7.891903698797886e-08, "loss": 0.5087, "step": 14917 }, { "epoch": 0.95, "grad_norm": 0.8958890438079834, "learning_rate": 7.87375701868931e-08, "loss": 0.5871, "step": 14918 }, { "epoch": 0.95, "grad_norm": 0.863865315914154, "learning_rate": 7.855631060304792e-08, "loss": 0.5092, "step": 14919 }, { "epoch": 0.95, "grad_norm": 0.8901463747024536, "learning_rate": 7.837525824407665e-08, "loss": 0.5633, "step": 14920 }, { "epoch": 0.95, "grad_norm": 0.8937858939170837, "learning_rate": 7.819441311760156e-08, "loss": 0.5461, "step": 14921 }, { "epoch": 0.95, "grad_norm": 0.9540120363235474, "learning_rate": 7.801377523123877e-08, "loss": 0.6248, "step": 14922 }, { "epoch": 0.95, "grad_norm": 0.8416619300842285, "learning_rate": 7.783334459259273e-08, "loss": 0.5367, "step": 14923 }, { "epoch": 0.95, "grad_norm": 0.9118484854698181, "learning_rate": 7.765312120926182e-08, "loss": 0.6225, "step": 14924 }, { "epoch": 0.95, "grad_norm": 0.8478346467018127, "learning_rate": 7.747310508883444e-08, "loss": 0.5574, "step": 14925 }, { "epoch": 0.95, "grad_norm": 0.8656757473945618, "learning_rate": 7.729329623889114e-08, "loss": 0.5149, "step": 14926 }, { "epoch": 0.95, "grad_norm": 0.9110966920852661, "learning_rate": 7.711369466700147e-08, "loss": 0.6383, "step": 14927 }, { "epoch": 0.95, "grad_norm": 0.8306471109390259, "learning_rate": 7.693430038072824e-08, "loss": 0.5397, "step": 14928 }, { "epoch": 0.95, "grad_norm": 0.8142772316932678, "learning_rate": 7.675511338762654e-08, "loss": 0.481, "step": 14929 }, { "epoch": 0.95, "grad_norm": 0.8677499890327454, "learning_rate": 7.657613369523975e-08, "loss": 0.5712, "step": 14930 }, { "epoch": 0.95, "grad_norm": 0.8763403296470642, "learning_rate": 7.639736131110465e-08, "loss": 0.581, "step": 14931 }, { "epoch": 0.95, "grad_norm": 1.208530068397522, "learning_rate": 7.621879624274853e-08, "loss": 0.5727, "step": 14932 }, { "epoch": 0.95, "grad_norm": 0.8692548274993896, "learning_rate": 7.604043849769094e-08, "loss": 0.5459, "step": 14933 }, { "epoch": 0.95, "grad_norm": 0.9086669087409973, "learning_rate": 7.586228808344087e-08, "loss": 0.6114, "step": 14934 }, { "epoch": 0.95, "grad_norm": 0.9117394089698792, "learning_rate": 7.56843450075001e-08, "loss": 0.5875, "step": 14935 }, { "epoch": 0.95, "grad_norm": 0.873921811580658, "learning_rate": 7.550660927736042e-08, "loss": 0.529, "step": 14936 }, { "epoch": 0.95, "grad_norm": 0.918420135974884, "learning_rate": 7.53290809005075e-08, "loss": 0.5835, "step": 14937 }, { "epoch": 0.95, "grad_norm": 0.8901522755622864, "learning_rate": 7.515175988441481e-08, "loss": 0.5254, "step": 14938 }, { "epoch": 0.95, "grad_norm": 0.9384918212890625, "learning_rate": 7.497464623654915e-08, "loss": 0.5693, "step": 14939 }, { "epoch": 0.95, "grad_norm": 0.9153959155082703, "learning_rate": 7.479773996436845e-08, "loss": 0.533, "step": 14940 }, { "epoch": 0.95, "grad_norm": 0.8577287793159485, "learning_rate": 7.46210410753212e-08, "loss": 0.5409, "step": 14941 }, { "epoch": 0.95, "grad_norm": 0.9197996854782104, "learning_rate": 7.44445495768481e-08, "loss": 0.5721, "step": 14942 }, { "epoch": 0.95, "grad_norm": 0.9248746633529663, "learning_rate": 7.426826547637989e-08, "loss": 0.6288, "step": 14943 }, { "epoch": 0.95, "grad_norm": 0.8270097374916077, "learning_rate": 7.40921887813395e-08, "loss": 0.565, "step": 14944 }, { "epoch": 0.95, "grad_norm": 0.8829072713851929, "learning_rate": 7.391631949914102e-08, "loss": 0.5745, "step": 14945 }, { "epoch": 0.95, "grad_norm": 0.9314706325531006, "learning_rate": 7.374065763719018e-08, "loss": 0.5642, "step": 14946 }, { "epoch": 0.95, "grad_norm": 0.9067994356155396, "learning_rate": 7.356520320288274e-08, "loss": 0.6136, "step": 14947 }, { "epoch": 0.95, "grad_norm": 0.8551090359687805, "learning_rate": 7.338995620360722e-08, "loss": 0.5157, "step": 14948 }, { "epoch": 0.95, "grad_norm": 0.8279046416282654, "learning_rate": 7.321491664674163e-08, "loss": 0.5155, "step": 14949 }, { "epoch": 0.95, "grad_norm": 0.9258044362068176, "learning_rate": 7.304008453965727e-08, "loss": 0.6114, "step": 14950 }, { "epoch": 0.95, "grad_norm": 0.9103056192398071, "learning_rate": 7.286545988971495e-08, "loss": 0.5794, "step": 14951 }, { "epoch": 0.95, "grad_norm": 0.885990560054779, "learning_rate": 7.269104270426818e-08, "loss": 0.5718, "step": 14952 }, { "epoch": 0.95, "grad_norm": 0.965684175491333, "learning_rate": 7.251683299066059e-08, "loss": 0.5777, "step": 14953 }, { "epoch": 0.95, "grad_norm": 0.9360918998718262, "learning_rate": 7.23428307562274e-08, "loss": 0.5961, "step": 14954 }, { "epoch": 0.95, "grad_norm": 0.8759440183639526, "learning_rate": 7.216903600829605e-08, "loss": 0.5533, "step": 14955 }, { "epoch": 0.95, "grad_norm": 0.92622309923172, "learning_rate": 7.199544875418407e-08, "loss": 0.6231, "step": 14956 }, { "epoch": 0.95, "grad_norm": 0.9205344319343567, "learning_rate": 7.182206900119948e-08, "loss": 0.5824, "step": 14957 }, { "epoch": 0.95, "grad_norm": 0.8723695874214172, "learning_rate": 7.164889675664477e-08, "loss": 0.6116, "step": 14958 }, { "epoch": 0.95, "grad_norm": 0.9015873074531555, "learning_rate": 7.147593202781022e-08, "loss": 0.5673, "step": 14959 }, { "epoch": 0.95, "grad_norm": 0.9231569170951843, "learning_rate": 7.13031748219789e-08, "loss": 0.5602, "step": 14960 }, { "epoch": 0.95, "grad_norm": 0.8926693797111511, "learning_rate": 7.113062514642555e-08, "loss": 0.569, "step": 14961 }, { "epoch": 0.95, "grad_norm": 0.9044926762580872, "learning_rate": 7.095828300841435e-08, "loss": 0.5192, "step": 14962 }, { "epoch": 0.95, "grad_norm": 0.9111180901527405, "learning_rate": 7.078614841520392e-08, "loss": 0.6293, "step": 14963 }, { "epoch": 0.95, "grad_norm": 0.8154220581054688, "learning_rate": 7.061422137404129e-08, "loss": 0.5807, "step": 14964 }, { "epoch": 0.95, "grad_norm": 0.8674167394638062, "learning_rate": 7.044250189216561e-08, "loss": 0.5303, "step": 14965 }, { "epoch": 0.95, "grad_norm": 0.9023363590240479, "learning_rate": 7.027098997680726e-08, "loss": 0.6174, "step": 14966 }, { "epoch": 0.95, "grad_norm": 0.9068924784660339, "learning_rate": 7.00996856351882e-08, "loss": 0.5724, "step": 14967 }, { "epoch": 0.95, "grad_norm": 0.9573983550071716, "learning_rate": 6.992858887452158e-08, "loss": 0.5502, "step": 14968 }, { "epoch": 0.95, "grad_norm": 0.9019178748130798, "learning_rate": 6.975769970201163e-08, "loss": 0.5813, "step": 14969 }, { "epoch": 0.95, "grad_norm": 0.8365936279296875, "learning_rate": 6.958701812485369e-08, "loss": 0.5851, "step": 14970 }, { "epoch": 0.95, "grad_norm": 0.9230535626411438, "learning_rate": 6.94165441502348e-08, "loss": 0.5764, "step": 14971 }, { "epoch": 0.95, "grad_norm": 0.9693920612335205, "learning_rate": 6.924627778533366e-08, "loss": 0.5714, "step": 14972 }, { "epoch": 0.95, "grad_norm": 0.8915910720825195, "learning_rate": 6.907621903731842e-08, "loss": 0.6017, "step": 14973 }, { "epoch": 0.95, "grad_norm": 0.8678449392318726, "learning_rate": 6.890636791335003e-08, "loss": 0.5304, "step": 14974 }, { "epoch": 0.95, "grad_norm": 0.8314898014068604, "learning_rate": 6.873672442058054e-08, "loss": 0.5401, "step": 14975 }, { "epoch": 0.95, "grad_norm": 0.893195629119873, "learning_rate": 6.856728856615314e-08, "loss": 0.5449, "step": 14976 }, { "epoch": 0.95, "grad_norm": 0.8856935501098633, "learning_rate": 6.839806035720209e-08, "loss": 0.5813, "step": 14977 }, { "epoch": 0.95, "grad_norm": 0.844600260257721, "learning_rate": 6.822903980085282e-08, "loss": 0.5913, "step": 14978 }, { "epoch": 0.95, "grad_norm": 0.8763694763183594, "learning_rate": 6.806022690422187e-08, "loss": 0.5783, "step": 14979 }, { "epoch": 0.95, "grad_norm": 0.8728997111320496, "learning_rate": 6.789162167441798e-08, "loss": 0.5942, "step": 14980 }, { "epoch": 0.95, "grad_norm": 0.9138465523719788, "learning_rate": 6.772322411854048e-08, "loss": 0.6402, "step": 14981 }, { "epoch": 0.95, "grad_norm": 0.9363642930984497, "learning_rate": 6.755503424368037e-08, "loss": 0.5904, "step": 14982 }, { "epoch": 0.95, "grad_norm": 0.9248054027557373, "learning_rate": 6.73870520569181e-08, "loss": 0.6464, "step": 14983 }, { "epoch": 0.95, "grad_norm": 0.9054921269416809, "learning_rate": 6.721927756532853e-08, "loss": 0.5367, "step": 14984 }, { "epoch": 0.95, "grad_norm": 0.8897875547409058, "learning_rate": 6.705171077597495e-08, "loss": 0.5691, "step": 14985 }, { "epoch": 0.95, "grad_norm": 0.9076294302940369, "learning_rate": 6.68843516959139e-08, "loss": 0.6368, "step": 14986 }, { "epoch": 0.95, "grad_norm": 1.060864806175232, "learning_rate": 6.67172003321903e-08, "loss": 0.6116, "step": 14987 }, { "epoch": 0.95, "grad_norm": 0.9097266793251038, "learning_rate": 6.655025669184522e-08, "loss": 0.6004, "step": 14988 }, { "epoch": 0.95, "grad_norm": 0.8445072174072266, "learning_rate": 6.638352078190636e-08, "loss": 0.5223, "step": 14989 }, { "epoch": 0.95, "grad_norm": 0.8733325600624084, "learning_rate": 6.621699260939418e-08, "loss": 0.5643, "step": 14990 }, { "epoch": 0.95, "grad_norm": 0.8759425282478333, "learning_rate": 6.605067218132145e-08, "loss": 0.6103, "step": 14991 }, { "epoch": 0.95, "grad_norm": 0.9013230800628662, "learning_rate": 6.58845595046903e-08, "loss": 0.643, "step": 14992 }, { "epoch": 0.95, "grad_norm": 0.9331822991371155, "learning_rate": 6.571865458649629e-08, "loss": 0.6525, "step": 14993 }, { "epoch": 0.95, "grad_norm": 0.8814842700958252, "learning_rate": 6.555295743372492e-08, "loss": 0.5572, "step": 14994 }, { "epoch": 0.95, "grad_norm": 0.8553088903427124, "learning_rate": 6.538746805335284e-08, "loss": 0.5265, "step": 14995 }, { "epoch": 0.95, "grad_norm": 0.9224663376808167, "learning_rate": 6.52221864523478e-08, "loss": 0.6197, "step": 14996 }, { "epoch": 0.95, "grad_norm": 0.8610914945602417, "learning_rate": 6.505711263766978e-08, "loss": 0.5687, "step": 14997 }, { "epoch": 0.95, "grad_norm": 0.8670297265052795, "learning_rate": 6.48922466162688e-08, "loss": 0.5878, "step": 14998 }, { "epoch": 0.95, "grad_norm": 0.8055164217948914, "learning_rate": 6.472758839508819e-08, "loss": 0.553, "step": 14999 }, { "epoch": 0.95, "grad_norm": 0.8382790088653564, "learning_rate": 6.456313798105962e-08, "loss": 0.5657, "step": 15000 }, { "epoch": 0.95, "grad_norm": 0.8944666385650635, "learning_rate": 6.439889538110867e-08, "loss": 0.6654, "step": 15001 }, { "epoch": 0.95, "grad_norm": 0.9049035310745239, "learning_rate": 6.423486060215034e-08, "loss": 0.5784, "step": 15002 }, { "epoch": 0.95, "grad_norm": 0.8876950144767761, "learning_rate": 6.40710336510919e-08, "loss": 0.5315, "step": 15003 }, { "epoch": 0.95, "grad_norm": 0.873111367225647, "learning_rate": 6.390741453483119e-08, "loss": 0.5567, "step": 15004 }, { "epoch": 0.95, "grad_norm": 0.8574492931365967, "learning_rate": 6.374400326025765e-08, "loss": 0.5751, "step": 15005 }, { "epoch": 0.95, "grad_norm": 0.9291654825210571, "learning_rate": 6.358079983425247e-08, "loss": 0.5736, "step": 15006 }, { "epoch": 0.95, "grad_norm": 0.880135715007782, "learning_rate": 6.341780426368737e-08, "loss": 0.5925, "step": 15007 }, { "epoch": 0.95, "grad_norm": 0.8919762372970581, "learning_rate": 6.32550165554252e-08, "loss": 0.5895, "step": 15008 }, { "epoch": 0.95, "grad_norm": 0.9122921824455261, "learning_rate": 6.309243671632048e-08, "loss": 0.6224, "step": 15009 }, { "epoch": 0.95, "grad_norm": 0.9773128628730774, "learning_rate": 6.293006475321939e-08, "loss": 0.5849, "step": 15010 }, { "epoch": 0.95, "grad_norm": 0.9223852157592773, "learning_rate": 6.276790067295813e-08, "loss": 0.593, "step": 15011 }, { "epoch": 0.95, "grad_norm": 0.8956741094589233, "learning_rate": 6.260594448236513e-08, "loss": 0.554, "step": 15012 }, { "epoch": 0.95, "grad_norm": 0.8495054244995117, "learning_rate": 6.244419618825992e-08, "loss": 0.5502, "step": 15013 }, { "epoch": 0.95, "grad_norm": 0.8928588628768921, "learning_rate": 6.228265579745318e-08, "loss": 0.5813, "step": 15014 }, { "epoch": 0.95, "grad_norm": 0.8524266481399536, "learning_rate": 6.212132331674725e-08, "loss": 0.5767, "step": 15015 }, { "epoch": 0.95, "grad_norm": 0.9605539441108704, "learning_rate": 6.196019875293391e-08, "loss": 0.635, "step": 15016 }, { "epoch": 0.95, "grad_norm": 0.8842973709106445, "learning_rate": 6.179928211279884e-08, "loss": 0.5695, "step": 15017 }, { "epoch": 0.95, "grad_norm": 0.9240403175354004, "learning_rate": 6.163857340311718e-08, "loss": 0.5168, "step": 15018 }, { "epoch": 0.95, "grad_norm": 0.8997433185577393, "learning_rate": 6.147807263065575e-08, "loss": 0.5261, "step": 15019 }, { "epoch": 0.95, "grad_norm": 0.9333794713020325, "learning_rate": 6.131777980217302e-08, "loss": 0.6007, "step": 15020 }, { "epoch": 0.95, "grad_norm": 0.8286879062652588, "learning_rate": 6.115769492441859e-08, "loss": 0.5741, "step": 15021 }, { "epoch": 0.95, "grad_norm": 0.9274932146072388, "learning_rate": 6.099781800413151e-08, "loss": 0.605, "step": 15022 }, { "epoch": 0.95, "grad_norm": 0.9669122099876404, "learning_rate": 6.083814904804586e-08, "loss": 0.6425, "step": 15023 }, { "epoch": 0.95, "grad_norm": 0.869624674320221, "learning_rate": 6.067868806288346e-08, "loss": 0.5806, "step": 15024 }, { "epoch": 0.95, "grad_norm": 0.8654236197471619, "learning_rate": 6.05194350553584e-08, "loss": 0.561, "step": 15025 }, { "epoch": 0.95, "grad_norm": 0.9452944397926331, "learning_rate": 6.036039003217697e-08, "loss": 0.5796, "step": 15026 }, { "epoch": 0.95, "grad_norm": 0.9010240435600281, "learning_rate": 6.02015530000355e-08, "loss": 0.6307, "step": 15027 }, { "epoch": 0.95, "grad_norm": 0.8794552683830261, "learning_rate": 6.00429239656225e-08, "loss": 0.5545, "step": 15028 }, { "epoch": 0.95, "grad_norm": 0.8755041360855103, "learning_rate": 5.988450293561765e-08, "loss": 0.5406, "step": 15029 }, { "epoch": 0.95, "grad_norm": 0.8471874594688416, "learning_rate": 5.972628991669006e-08, "loss": 0.5309, "step": 15030 }, { "epoch": 0.95, "grad_norm": 0.9170916080474854, "learning_rate": 5.956828491550326e-08, "loss": 0.6034, "step": 15031 }, { "epoch": 0.95, "grad_norm": 0.8616017699241638, "learning_rate": 5.941048793870918e-08, "loss": 0.5492, "step": 15032 }, { "epoch": 0.95, "grad_norm": 0.8741750717163086, "learning_rate": 5.92528989929525e-08, "loss": 0.5936, "step": 15033 }, { "epoch": 0.95, "grad_norm": 0.8802669048309326, "learning_rate": 5.9095518084868467e-08, "loss": 0.5961, "step": 15034 }, { "epoch": 0.95, "grad_norm": 0.8247042298316956, "learning_rate": 5.893834522108399e-08, "loss": 0.5909, "step": 15035 }, { "epoch": 0.95, "grad_norm": 0.9004446864128113, "learning_rate": 5.8781380408217124e-08, "loss": 0.5701, "step": 15036 }, { "epoch": 0.95, "grad_norm": 0.8959584832191467, "learning_rate": 5.862462365287702e-08, "loss": 0.5698, "step": 15037 }, { "epoch": 0.95, "grad_norm": 0.9272680878639221, "learning_rate": 5.846807496166451e-08, "loss": 0.5963, "step": 15038 }, { "epoch": 0.95, "grad_norm": 0.9091727137565613, "learning_rate": 5.831173434117043e-08, "loss": 0.5561, "step": 15039 }, { "epoch": 0.95, "grad_norm": 0.8189939260482788, "learning_rate": 5.815560179797897e-08, "loss": 0.5501, "step": 15040 }, { "epoch": 0.95, "grad_norm": 0.8391079306602478, "learning_rate": 5.7999677338663184e-08, "loss": 0.5442, "step": 15041 }, { "epoch": 0.95, "grad_norm": 0.8865288496017456, "learning_rate": 5.7843960969790056e-08, "loss": 0.5783, "step": 15042 }, { "epoch": 0.95, "grad_norm": 0.8449594378471375, "learning_rate": 5.768845269791379e-08, "loss": 0.5728, "step": 15043 }, { "epoch": 0.95, "grad_norm": 0.8410879373550415, "learning_rate": 5.7533152529584135e-08, "loss": 0.4878, "step": 15044 }, { "epoch": 0.95, "grad_norm": 0.9583869576454163, "learning_rate": 5.7378060471340866e-08, "loss": 0.5291, "step": 15045 }, { "epoch": 0.95, "grad_norm": 0.9091804623603821, "learning_rate": 5.7223176529712097e-08, "loss": 0.6207, "step": 15046 }, { "epoch": 0.95, "grad_norm": 0.8123113512992859, "learning_rate": 5.70685007112215e-08, "loss": 0.5242, "step": 15047 }, { "epoch": 0.95, "grad_norm": 0.8403564095497131, "learning_rate": 5.691403302238052e-08, "loss": 0.5305, "step": 15048 }, { "epoch": 0.95, "grad_norm": 0.8629001379013062, "learning_rate": 5.6759773469694523e-08, "loss": 0.5932, "step": 15049 }, { "epoch": 0.95, "grad_norm": 0.8440834283828735, "learning_rate": 5.660572205965775e-08, "loss": 0.5434, "step": 15050 }, { "epoch": 0.95, "grad_norm": 0.9365416169166565, "learning_rate": 5.645187879875724e-08, "loss": 0.5669, "step": 15051 }, { "epoch": 0.95, "grad_norm": 0.8103097677230835, "learning_rate": 5.6298243693470586e-08, "loss": 0.5747, "step": 15052 }, { "epoch": 0.95, "grad_norm": 0.9661427736282349, "learning_rate": 5.614481675026762e-08, "loss": 0.5651, "step": 15053 }, { "epoch": 0.95, "grad_norm": 0.8838450908660889, "learning_rate": 5.59915979756076e-08, "loss": 0.5523, "step": 15054 }, { "epoch": 0.95, "grad_norm": 0.8871442675590515, "learning_rate": 5.583858737594205e-08, "loss": 0.5693, "step": 15055 }, { "epoch": 0.95, "grad_norm": 0.9019421339035034, "learning_rate": 5.5685784957714707e-08, "loss": 0.5649, "step": 15056 }, { "epoch": 0.95, "grad_norm": 0.8939514756202698, "learning_rate": 5.5533190727358745e-08, "loss": 0.595, "step": 15057 }, { "epoch": 0.95, "grad_norm": 0.8858687281608582, "learning_rate": 5.538080469129958e-08, "loss": 0.5937, "step": 15058 }, { "epoch": 0.95, "grad_norm": 0.8240897059440613, "learning_rate": 5.522862685595376e-08, "loss": 0.5633, "step": 15059 }, { "epoch": 0.95, "grad_norm": 0.846260666847229, "learning_rate": 5.507665722772837e-08, "loss": 0.5337, "step": 15060 }, { "epoch": 0.95, "grad_norm": 0.8530579209327698, "learning_rate": 5.492489581302329e-08, "loss": 0.5521, "step": 15061 }, { "epoch": 0.95, "grad_norm": 0.9388294816017151, "learning_rate": 5.477334261822842e-08, "loss": 0.5776, "step": 15062 }, { "epoch": 0.95, "grad_norm": 0.9231818914413452, "learning_rate": 5.46219976497242e-08, "loss": 0.555, "step": 15063 }, { "epoch": 0.95, "grad_norm": 0.8340309262275696, "learning_rate": 5.447086091388443e-08, "loss": 0.5376, "step": 15064 }, { "epoch": 0.95, "grad_norm": 0.8696257472038269, "learning_rate": 5.4319932417072344e-08, "loss": 0.5702, "step": 15065 }, { "epoch": 0.95, "grad_norm": 0.929787814617157, "learning_rate": 5.416921216564286e-08, "loss": 0.6018, "step": 15066 }, { "epoch": 0.95, "grad_norm": 0.8924655914306641, "learning_rate": 5.401870016594313e-08, "loss": 0.5825, "step": 15067 }, { "epoch": 0.95, "grad_norm": 0.9113631248474121, "learning_rate": 5.38683964243103e-08, "loss": 0.5701, "step": 15068 }, { "epoch": 0.95, "grad_norm": 0.8545387387275696, "learning_rate": 5.3718300947072086e-08, "loss": 0.5573, "step": 15069 }, { "epoch": 0.95, "grad_norm": 0.9120925068855286, "learning_rate": 5.356841374055011e-08, "loss": 0.6162, "step": 15070 }, { "epoch": 0.95, "grad_norm": 0.843596875667572, "learning_rate": 5.341873481105431e-08, "loss": 0.6176, "step": 15071 }, { "epoch": 0.95, "grad_norm": 0.9411029815673828, "learning_rate": 5.3269264164887977e-08, "loss": 0.6389, "step": 15072 }, { "epoch": 0.95, "grad_norm": 0.9116719961166382, "learning_rate": 5.3120001808344425e-08, "loss": 0.5561, "step": 15073 }, { "epoch": 0.96, "grad_norm": 0.8383541703224182, "learning_rate": 5.297094774770861e-08, "loss": 0.5032, "step": 15074 }, { "epoch": 0.96, "grad_norm": 0.8465346097946167, "learning_rate": 5.282210198925664e-08, "loss": 0.598, "step": 15075 }, { "epoch": 0.96, "grad_norm": 0.9090423583984375, "learning_rate": 5.267346453925626e-08, "loss": 0.5912, "step": 15076 }, { "epoch": 0.96, "grad_norm": 0.9705894589424133, "learning_rate": 5.2525035403965805e-08, "loss": 0.5754, "step": 15077 }, { "epoch": 0.96, "grad_norm": 0.9257639646530151, "learning_rate": 5.237681458963473e-08, "loss": 0.6093, "step": 15078 }, { "epoch": 0.96, "grad_norm": 0.8317881226539612, "learning_rate": 5.222880210250469e-08, "loss": 0.5539, "step": 15079 }, { "epoch": 0.96, "grad_norm": 0.8732230067253113, "learning_rate": 5.2080997948807944e-08, "loss": 0.5984, "step": 15080 }, { "epoch": 0.96, "grad_norm": 0.9015724658966064, "learning_rate": 5.193340213476727e-08, "loss": 0.5556, "step": 15081 }, { "epoch": 0.96, "grad_norm": 0.912030041217804, "learning_rate": 5.178601466659827e-08, "loss": 0.5774, "step": 15082 }, { "epoch": 0.96, "grad_norm": 0.9283223748207092, "learning_rate": 5.163883555050708e-08, "loss": 0.5966, "step": 15083 }, { "epoch": 0.96, "grad_norm": 0.8436444997787476, "learning_rate": 5.149186479268986e-08, "loss": 0.5682, "step": 15084 }, { "epoch": 0.96, "grad_norm": 0.9397615194320679, "learning_rate": 5.134510239933554e-08, "loss": 0.6182, "step": 15085 }, { "epoch": 0.96, "grad_norm": 0.8505292534828186, "learning_rate": 5.119854837662419e-08, "loss": 0.5501, "step": 15086 }, { "epoch": 0.96, "grad_norm": 0.9102478623390198, "learning_rate": 5.1052202730725865e-08, "loss": 0.5655, "step": 15087 }, { "epoch": 0.96, "grad_norm": 0.8728495240211487, "learning_rate": 5.0906065467803965e-08, "loss": 0.5521, "step": 15088 }, { "epoch": 0.96, "grad_norm": 0.9245344400405884, "learning_rate": 5.0760136594010246e-08, "loss": 0.5207, "step": 15089 }, { "epoch": 0.96, "grad_norm": 0.8598072528839111, "learning_rate": 5.061441611549034e-08, "loss": 0.5593, "step": 15090 }, { "epoch": 0.96, "grad_norm": 0.8677455186843872, "learning_rate": 5.046890403837989e-08, "loss": 0.5772, "step": 15091 }, { "epoch": 0.96, "grad_norm": 0.8624927401542664, "learning_rate": 5.032360036880568e-08, "loss": 0.5621, "step": 15092 }, { "epoch": 0.96, "grad_norm": 0.8490076661109924, "learning_rate": 5.0178505112885576e-08, "loss": 0.5548, "step": 15093 }, { "epoch": 0.96, "grad_norm": 0.9380584359169006, "learning_rate": 5.00336182767297e-08, "loss": 0.5661, "step": 15094 }, { "epoch": 0.96, "grad_norm": 0.9134517908096313, "learning_rate": 4.988893986643817e-08, "loss": 0.5705, "step": 15095 }, { "epoch": 0.96, "grad_norm": 0.8528224229812622, "learning_rate": 4.9744469888103887e-08, "loss": 0.6008, "step": 15096 }, { "epoch": 0.96, "grad_norm": 0.8432444930076599, "learning_rate": 4.9600208347809206e-08, "loss": 0.5205, "step": 15097 }, { "epoch": 0.96, "grad_norm": 0.9236396551132202, "learning_rate": 4.945615525162761e-08, "loss": 0.594, "step": 15098 }, { "epoch": 0.96, "grad_norm": 0.8537143468856812, "learning_rate": 4.931231060562702e-08, "loss": 0.5326, "step": 15099 }, { "epoch": 0.96, "grad_norm": 0.929633378982544, "learning_rate": 4.916867441586204e-08, "loss": 0.637, "step": 15100 }, { "epoch": 0.96, "grad_norm": 0.8501139283180237, "learning_rate": 4.902524668838116e-08, "loss": 0.5492, "step": 15101 }, { "epoch": 0.96, "grad_norm": 0.8739571571350098, "learning_rate": 4.88820274292251e-08, "loss": 0.5558, "step": 15102 }, { "epoch": 0.96, "grad_norm": 0.8676999807357788, "learning_rate": 4.873901664442182e-08, "loss": 0.5524, "step": 15103 }, { "epoch": 0.96, "grad_norm": 0.864040195941925, "learning_rate": 4.8596214339995395e-08, "loss": 0.5512, "step": 15104 }, { "epoch": 0.96, "grad_norm": 0.9180824160575867, "learning_rate": 4.8453620521957124e-08, "loss": 0.571, "step": 15105 }, { "epoch": 0.96, "grad_norm": 0.9425962567329407, "learning_rate": 4.83112351963122e-08, "loss": 0.5272, "step": 15106 }, { "epoch": 0.96, "grad_norm": 0.9564583897590637, "learning_rate": 4.816905836905528e-08, "loss": 0.5995, "step": 15107 }, { "epoch": 0.96, "grad_norm": 0.8326453566551208, "learning_rate": 4.802709004617267e-08, "loss": 0.5695, "step": 15108 }, { "epoch": 0.96, "grad_norm": 0.8696445822715759, "learning_rate": 4.788533023364295e-08, "loss": 0.5204, "step": 15109 }, { "epoch": 0.96, "grad_norm": 0.9089856743812561, "learning_rate": 4.77437789374352e-08, "loss": 0.6211, "step": 15110 }, { "epoch": 0.96, "grad_norm": 0.9463760256767273, "learning_rate": 4.760243616350913e-08, "loss": 0.5978, "step": 15111 }, { "epoch": 0.96, "grad_norm": 0.8912383913993835, "learning_rate": 4.746130191781606e-08, "loss": 0.5724, "step": 15112 }, { "epoch": 0.96, "grad_norm": 0.8581725358963013, "learning_rate": 4.7320376206299034e-08, "loss": 0.5992, "step": 15113 }, { "epoch": 0.96, "grad_norm": 0.878282368183136, "learning_rate": 4.717965903489219e-08, "loss": 0.5798, "step": 15114 }, { "epoch": 0.96, "grad_norm": 0.866300642490387, "learning_rate": 4.7039150409519674e-08, "loss": 0.5509, "step": 15115 }, { "epoch": 0.96, "grad_norm": 0.8967102766036987, "learning_rate": 4.6898850336098975e-08, "loss": 0.5595, "step": 15116 }, { "epoch": 0.96, "grad_norm": 0.8599669337272644, "learning_rate": 4.675875882053704e-08, "loss": 0.5854, "step": 15117 }, { "epoch": 0.96, "grad_norm": 0.8142878413200378, "learning_rate": 4.6618875868733037e-08, "loss": 0.5502, "step": 15118 }, { "epoch": 0.96, "grad_norm": 0.8852924704551697, "learning_rate": 4.6479201486575585e-08, "loss": 0.54, "step": 15119 }, { "epoch": 0.96, "grad_norm": 0.8815605044364929, "learning_rate": 4.633973567994776e-08, "loss": 0.6029, "step": 15120 }, { "epoch": 0.96, "grad_norm": 0.8725368976593018, "learning_rate": 4.620047845472098e-08, "loss": 0.576, "step": 15121 }, { "epoch": 0.96, "grad_norm": 0.8406761288642883, "learning_rate": 4.606142981675887e-08, "loss": 0.5832, "step": 15122 }, { "epoch": 0.96, "grad_norm": 0.9023706316947937, "learning_rate": 4.592258977191622e-08, "loss": 0.5905, "step": 15123 }, { "epoch": 0.96, "grad_norm": 0.8814811706542969, "learning_rate": 4.578395832603999e-08, "loss": 0.567, "step": 15124 }, { "epoch": 0.96, "grad_norm": 0.9129397869110107, "learning_rate": 4.5645535484966085e-08, "loss": 0.5647, "step": 15125 }, { "epoch": 0.96, "grad_norm": 0.9053840637207031, "learning_rate": 4.5507321254524287e-08, "loss": 0.561, "step": 15126 }, { "epoch": 0.96, "grad_norm": 0.9199474453926086, "learning_rate": 4.536931564053382e-08, "loss": 0.5823, "step": 15127 }, { "epoch": 0.96, "grad_norm": 0.9460538625717163, "learning_rate": 4.523151864880504e-08, "loss": 0.5639, "step": 15128 }, { "epoch": 0.96, "grad_norm": 0.9503843188285828, "learning_rate": 4.5093930285141086e-08, "loss": 0.5857, "step": 15129 }, { "epoch": 0.96, "grad_norm": 0.8704271912574768, "learning_rate": 4.4956550555334546e-08, "loss": 0.5987, "step": 15130 }, { "epoch": 0.96, "grad_norm": 0.8602560758590698, "learning_rate": 4.4819379465170785e-08, "loss": 0.548, "step": 15131 }, { "epoch": 0.96, "grad_norm": 0.9668799638748169, "learning_rate": 4.4682417020425194e-08, "loss": 0.5793, "step": 15132 }, { "epoch": 0.96, "grad_norm": 0.9470771551132202, "learning_rate": 4.454566322686371e-08, "loss": 0.6333, "step": 15133 }, { "epoch": 0.96, "grad_norm": 0.8778170943260193, "learning_rate": 4.440911809024673e-08, "loss": 0.5797, "step": 15134 }, { "epoch": 0.96, "grad_norm": 0.9537930488586426, "learning_rate": 4.427278161632187e-08, "loss": 0.5665, "step": 15135 }, { "epoch": 0.96, "grad_norm": 0.860418975353241, "learning_rate": 4.413665381083065e-08, "loss": 0.5036, "step": 15136 }, { "epoch": 0.96, "grad_norm": 0.8717173933982849, "learning_rate": 4.4000734679504606e-08, "loss": 0.54, "step": 15137 }, { "epoch": 0.96, "grad_norm": 0.8939148187637329, "learning_rate": 4.386502422806749e-08, "loss": 0.5823, "step": 15138 }, { "epoch": 0.96, "grad_norm": 0.8451435565948486, "learning_rate": 4.37295224622325e-08, "loss": 0.5591, "step": 15139 }, { "epoch": 0.96, "grad_norm": 0.8331038951873779, "learning_rate": 4.359422938770619e-08, "loss": 0.5757, "step": 15140 }, { "epoch": 0.96, "grad_norm": 0.9195752739906311, "learning_rate": 4.3459145010184e-08, "loss": 0.5171, "step": 15141 }, { "epoch": 0.96, "grad_norm": 0.9011934995651245, "learning_rate": 4.3324269335355274e-08, "loss": 0.6086, "step": 15142 }, { "epoch": 0.96, "grad_norm": 0.929836094379425, "learning_rate": 4.31896023688988e-08, "loss": 0.5732, "step": 15143 }, { "epoch": 0.96, "grad_norm": 0.9297063946723938, "learning_rate": 4.305514411648393e-08, "loss": 0.5652, "step": 15144 }, { "epoch": 0.96, "grad_norm": 0.8522423505783081, "learning_rate": 4.2920894583773906e-08, "loss": 0.5228, "step": 15145 }, { "epoch": 0.96, "grad_norm": 0.8246431350708008, "learning_rate": 4.278685377641978e-08, "loss": 0.5246, "step": 15146 }, { "epoch": 0.96, "grad_norm": 0.918134868144989, "learning_rate": 4.2653021700066466e-08, "loss": 0.5447, "step": 15147 }, { "epoch": 0.96, "grad_norm": 0.9000471830368042, "learning_rate": 4.251939836034946e-08, "loss": 0.5654, "step": 15148 }, { "epoch": 0.96, "grad_norm": 0.8861368298530579, "learning_rate": 4.238598376289482e-08, "loss": 0.5441, "step": 15149 }, { "epoch": 0.96, "grad_norm": 0.8738230466842651, "learning_rate": 4.225277791331972e-08, "loss": 0.5582, "step": 15150 }, { "epoch": 0.96, "grad_norm": 0.8639594316482544, "learning_rate": 4.211978081723356e-08, "loss": 0.5699, "step": 15151 }, { "epoch": 0.96, "grad_norm": 0.855905294418335, "learning_rate": 4.19869924802363e-08, "loss": 0.5731, "step": 15152 }, { "epoch": 0.96, "grad_norm": 0.8283065557479858, "learning_rate": 4.185441290791903e-08, "loss": 0.5216, "step": 15153 }, { "epoch": 0.96, "grad_norm": 0.9064654111862183, "learning_rate": 4.1722042105863946e-08, "loss": 0.593, "step": 15154 }, { "epoch": 0.96, "grad_norm": 0.9007862210273743, "learning_rate": 4.158988007964548e-08, "loss": 0.5809, "step": 15155 }, { "epoch": 0.96, "grad_norm": 0.8880447149276733, "learning_rate": 4.145792683482808e-08, "loss": 0.5822, "step": 15156 }, { "epoch": 0.96, "grad_norm": 0.8769849538803101, "learning_rate": 4.132618237696784e-08, "loss": 0.6075, "step": 15157 }, { "epoch": 0.96, "grad_norm": 0.9147706627845764, "learning_rate": 4.1194646711612555e-08, "loss": 0.6183, "step": 15158 }, { "epoch": 0.96, "grad_norm": 0.9345024824142456, "learning_rate": 4.1063319844299454e-08, "loss": 0.5518, "step": 15159 }, { "epoch": 0.96, "grad_norm": 0.8593862652778625, "learning_rate": 4.0932201780559674e-08, "loss": 0.5756, "step": 15160 }, { "epoch": 0.96, "grad_norm": 0.8958163857460022, "learning_rate": 4.080129252591325e-08, "loss": 0.5601, "step": 15161 }, { "epoch": 0.96, "grad_norm": 0.8987419009208679, "learning_rate": 4.0670592085872984e-08, "loss": 0.5828, "step": 15162 }, { "epoch": 0.96, "grad_norm": 0.8477271795272827, "learning_rate": 4.054010046594115e-08, "loss": 0.5592, "step": 15163 }, { "epoch": 0.96, "grad_norm": 0.8878704309463501, "learning_rate": 4.040981767161334e-08, "loss": 0.5865, "step": 15164 }, { "epoch": 0.96, "grad_norm": 0.8833525776863098, "learning_rate": 4.027974370837518e-08, "loss": 0.5872, "step": 15165 }, { "epoch": 0.96, "grad_norm": 0.8798415064811707, "learning_rate": 4.014987858170283e-08, "loss": 0.5793, "step": 15166 }, { "epoch": 0.96, "grad_norm": 0.8760266900062561, "learning_rate": 4.0020222297065256e-08, "loss": 0.5665, "step": 15167 }, { "epoch": 0.96, "grad_norm": 0.9930770993232727, "learning_rate": 3.9890774859921987e-08, "loss": 0.591, "step": 15168 }, { "epoch": 0.96, "grad_norm": 0.9291293621063232, "learning_rate": 3.97615362757231e-08, "loss": 0.6082, "step": 15169 }, { "epoch": 0.96, "grad_norm": 0.9287815093994141, "learning_rate": 3.9632506549910356e-08, "loss": 0.5971, "step": 15170 }, { "epoch": 0.96, "grad_norm": 0.9018691778182983, "learning_rate": 3.9503685687916627e-08, "loss": 0.5316, "step": 15171 }, { "epoch": 0.96, "grad_norm": 0.8846923112869263, "learning_rate": 3.937507369516702e-08, "loss": 0.5959, "step": 15172 }, { "epoch": 0.96, "grad_norm": 0.9421688914299011, "learning_rate": 3.92466705770761e-08, "loss": 0.5618, "step": 15173 }, { "epoch": 0.96, "grad_norm": 0.8841253519058228, "learning_rate": 3.911847633905008e-08, "loss": 0.5895, "step": 15174 }, { "epoch": 0.96, "grad_norm": 0.9070528149604797, "learning_rate": 3.899049098648799e-08, "loss": 0.5664, "step": 15175 }, { "epoch": 0.96, "grad_norm": 0.8911782503128052, "learning_rate": 3.88627145247783e-08, "loss": 0.5772, "step": 15176 }, { "epoch": 0.96, "grad_norm": 0.9055638909339905, "learning_rate": 3.873514695930114e-08, "loss": 0.6169, "step": 15177 }, { "epoch": 0.96, "grad_norm": 0.915357232093811, "learning_rate": 3.860778829542777e-08, "loss": 0.6026, "step": 15178 }, { "epoch": 0.96, "grad_norm": 0.9006307721138, "learning_rate": 3.848063853852113e-08, "loss": 0.5744, "step": 15179 }, { "epoch": 0.96, "grad_norm": 0.845581591129303, "learning_rate": 3.835369769393471e-08, "loss": 0.5451, "step": 15180 }, { "epoch": 0.96, "grad_norm": 0.8819062113761902, "learning_rate": 3.822696576701368e-08, "loss": 0.5855, "step": 15181 }, { "epoch": 0.96, "grad_norm": 0.8674046993255615, "learning_rate": 3.8100442763094324e-08, "loss": 0.5511, "step": 15182 }, { "epoch": 0.96, "grad_norm": 0.8748277425765991, "learning_rate": 3.797412868750461e-08, "loss": 0.5703, "step": 15183 }, { "epoch": 0.96, "grad_norm": 0.9121760129928589, "learning_rate": 3.784802354556249e-08, "loss": 0.5536, "step": 15184 }, { "epoch": 0.96, "grad_norm": 0.9617106318473816, "learning_rate": 3.7722127342578183e-08, "loss": 0.5778, "step": 15185 }, { "epoch": 0.96, "grad_norm": 0.9024949073791504, "learning_rate": 3.759644008385244e-08, "loss": 0.5615, "step": 15186 }, { "epoch": 0.96, "grad_norm": 0.9101724028587341, "learning_rate": 3.747096177467768e-08, "loss": 0.5541, "step": 15187 }, { "epoch": 0.96, "grad_norm": 0.8800556659698486, "learning_rate": 3.7345692420337476e-08, "loss": 0.535, "step": 15188 }, { "epoch": 0.96, "grad_norm": 0.9029918909072876, "learning_rate": 3.722063202610593e-08, "loss": 0.591, "step": 15189 }, { "epoch": 0.96, "grad_norm": 0.9296280145645142, "learning_rate": 3.709578059724939e-08, "loss": 0.5728, "step": 15190 }, { "epoch": 0.96, "grad_norm": 0.865096390247345, "learning_rate": 3.697113813902531e-08, "loss": 0.564, "step": 15191 }, { "epoch": 0.96, "grad_norm": 0.9005051851272583, "learning_rate": 3.684670465668116e-08, "loss": 0.6006, "step": 15192 }, { "epoch": 0.96, "grad_norm": 0.8931210041046143, "learning_rate": 3.6722480155456655e-08, "loss": 0.5469, "step": 15193 }, { "epoch": 0.96, "grad_norm": 0.8682152032852173, "learning_rate": 3.6598464640582586e-08, "loss": 0.5758, "step": 15194 }, { "epoch": 0.96, "grad_norm": 0.9252963662147522, "learning_rate": 3.64746581172809e-08, "loss": 0.6333, "step": 15195 }, { "epoch": 0.96, "grad_norm": 0.9037860035896301, "learning_rate": 3.6351060590764656e-08, "loss": 0.567, "step": 15196 }, { "epoch": 0.96, "grad_norm": 0.827499270439148, "learning_rate": 3.6227672066237454e-08, "loss": 0.5443, "step": 15197 }, { "epoch": 0.96, "grad_norm": 0.8574694395065308, "learning_rate": 3.6104492548895695e-08, "loss": 0.6125, "step": 15198 }, { "epoch": 0.96, "grad_norm": 0.8817412853240967, "learning_rate": 3.5981522043925796e-08, "loss": 0.5583, "step": 15199 }, { "epoch": 0.96, "grad_norm": 0.8701195120811462, "learning_rate": 3.585876055650528e-08, "loss": 0.5599, "step": 15200 }, { "epoch": 0.96, "grad_norm": 0.8706973195075989, "learning_rate": 3.5736208091802784e-08, "loss": 0.5572, "step": 15201 }, { "epoch": 0.96, "grad_norm": 0.9477795958518982, "learning_rate": 3.5613864654979734e-08, "loss": 0.6371, "step": 15202 }, { "epoch": 0.96, "grad_norm": 0.9767326712608337, "learning_rate": 3.5491730251187016e-08, "loss": 0.5926, "step": 15203 }, { "epoch": 0.96, "grad_norm": 0.8724082708358765, "learning_rate": 3.5369804885567185e-08, "loss": 0.5583, "step": 15204 }, { "epoch": 0.96, "grad_norm": 0.868426501750946, "learning_rate": 3.52480885632539e-08, "loss": 0.5689, "step": 15205 }, { "epoch": 0.96, "grad_norm": 0.9158200621604919, "learning_rate": 3.512658128937252e-08, "loss": 0.5782, "step": 15206 }, { "epoch": 0.96, "grad_norm": 0.889900267124176, "learning_rate": 3.500528306904005e-08, "loss": 0.5747, "step": 15207 }, { "epoch": 0.96, "grad_norm": 0.9287896752357483, "learning_rate": 3.488419390736242e-08, "loss": 0.5598, "step": 15208 }, { "epoch": 0.96, "grad_norm": 0.9180722236633301, "learning_rate": 3.476331380943887e-08, "loss": 0.6264, "step": 15209 }, { "epoch": 0.96, "grad_norm": 0.9249047040939331, "learning_rate": 3.464264278035978e-08, "loss": 0.5909, "step": 15210 }, { "epoch": 0.96, "grad_norm": 0.8158274292945862, "learning_rate": 3.452218082520553e-08, "loss": 0.5317, "step": 15211 }, { "epoch": 0.96, "grad_norm": 0.8706426620483398, "learning_rate": 3.440192794904873e-08, "loss": 0.547, "step": 15212 }, { "epoch": 0.96, "grad_norm": 0.8472093343734741, "learning_rate": 3.4281884156953106e-08, "loss": 0.5676, "step": 15213 }, { "epoch": 0.96, "grad_norm": 0.8854379057884216, "learning_rate": 3.416204945397239e-08, "loss": 0.5735, "step": 15214 }, { "epoch": 0.96, "grad_norm": 0.8368361592292786, "learning_rate": 3.4042423845153104e-08, "loss": 0.5672, "step": 15215 }, { "epoch": 0.96, "grad_norm": 0.8709746599197388, "learning_rate": 3.392300733553178e-08, "loss": 0.573, "step": 15216 }, { "epoch": 0.96, "grad_norm": 0.9157810807228088, "learning_rate": 3.380379993013716e-08, "loss": 0.6188, "step": 15217 }, { "epoch": 0.96, "grad_norm": 0.9439373016357422, "learning_rate": 3.368480163398802e-08, "loss": 0.5568, "step": 15218 }, { "epoch": 0.96, "grad_norm": 0.8551452159881592, "learning_rate": 3.356601245209534e-08, "loss": 0.5708, "step": 15219 }, { "epoch": 0.96, "grad_norm": 0.9316923022270203, "learning_rate": 3.344743238946124e-08, "loss": 0.5438, "step": 15220 }, { "epoch": 0.96, "grad_norm": 0.912805438041687, "learning_rate": 3.332906145107839e-08, "loss": 0.5485, "step": 15221 }, { "epoch": 0.96, "grad_norm": 0.8899156451225281, "learning_rate": 3.3210899641930586e-08, "loss": 0.5284, "step": 15222 }, { "epoch": 0.96, "grad_norm": 0.9470401406288147, "learning_rate": 3.3092946966994385e-08, "loss": 0.6202, "step": 15223 }, { "epoch": 0.96, "grad_norm": 0.8981837630271912, "learning_rate": 3.297520343123473e-08, "loss": 0.5932, "step": 15224 }, { "epoch": 0.96, "grad_norm": 0.8313995003700256, "learning_rate": 3.285766903961096e-08, "loss": 0.5378, "step": 15225 }, { "epoch": 0.96, "grad_norm": 0.9461470246315002, "learning_rate": 3.274034379707081e-08, "loss": 0.6186, "step": 15226 }, { "epoch": 0.96, "grad_norm": 0.8666161298751831, "learning_rate": 3.262322770855475e-08, "loss": 0.5918, "step": 15227 }, { "epoch": 0.96, "grad_norm": 0.9340410828590393, "learning_rate": 3.250632077899496e-08, "loss": 0.6198, "step": 15228 }, { "epoch": 0.96, "grad_norm": 0.9431737661361694, "learning_rate": 3.238962301331305e-08, "loss": 0.6251, "step": 15229 }, { "epoch": 0.96, "grad_norm": 0.9066559076309204, "learning_rate": 3.227313441642288e-08, "loss": 0.6061, "step": 15230 }, { "epoch": 0.96, "grad_norm": 0.9017807245254517, "learning_rate": 3.2156854993229955e-08, "loss": 0.5255, "step": 15231 }, { "epoch": 0.97, "grad_norm": 0.8806298971176147, "learning_rate": 3.2040784748629814e-08, "loss": 0.5691, "step": 15232 }, { "epoch": 0.97, "grad_norm": 0.9558776617050171, "learning_rate": 3.192492368750966e-08, "loss": 0.5747, "step": 15233 }, { "epoch": 0.97, "grad_norm": 0.9031001329421997, "learning_rate": 3.180927181474891e-08, "loss": 0.6138, "step": 15234 }, { "epoch": 0.97, "grad_norm": 0.9222975373268127, "learning_rate": 3.16938291352159e-08, "loss": 0.5767, "step": 15235 }, { "epoch": 0.97, "grad_norm": 0.8207947611808777, "learning_rate": 3.157859565377286e-08, "loss": 0.5334, "step": 15236 }, { "epoch": 0.97, "grad_norm": 0.8401099443435669, "learning_rate": 3.146357137527145e-08, "loss": 0.5246, "step": 15237 }, { "epoch": 0.97, "grad_norm": 0.8946247100830078, "learning_rate": 3.1348756304554475e-08, "loss": 0.5931, "step": 15238 }, { "epoch": 0.97, "grad_norm": 0.877619743347168, "learning_rate": 3.123415044645639e-08, "loss": 0.5706, "step": 15239 }, { "epoch": 0.97, "grad_norm": 0.865767240524292, "learning_rate": 3.111975380580334e-08, "loss": 0.5536, "step": 15240 }, { "epoch": 0.97, "grad_norm": 0.8900549411773682, "learning_rate": 3.100556638741203e-08, "loss": 0.6122, "step": 15241 }, { "epoch": 0.97, "grad_norm": 0.8810964822769165, "learning_rate": 3.089158819609084e-08, "loss": 0.5664, "step": 15242 }, { "epoch": 0.97, "grad_norm": 0.8617457747459412, "learning_rate": 3.077781923663814e-08, "loss": 0.604, "step": 15243 }, { "epoch": 0.97, "grad_norm": 0.8632597923278809, "learning_rate": 3.066425951384455e-08, "loss": 0.4994, "step": 15244 }, { "epoch": 0.97, "grad_norm": 0.8872633576393127, "learning_rate": 3.055090903249236e-08, "loss": 0.6185, "step": 15245 }, { "epoch": 0.97, "grad_norm": 0.8761091828346252, "learning_rate": 3.0437767797353856e-08, "loss": 0.5448, "step": 15246 }, { "epoch": 0.97, "grad_norm": 0.8925213813781738, "learning_rate": 3.032483581319301e-08, "loss": 0.5856, "step": 15247 }, { "epoch": 0.97, "grad_norm": 0.9047413468360901, "learning_rate": 3.021211308476546e-08, "loss": 0.5834, "step": 15248 }, { "epoch": 0.97, "grad_norm": 0.8721828460693359, "learning_rate": 3.0099599616816856e-08, "loss": 0.571, "step": 15249 }, { "epoch": 0.97, "grad_norm": 0.9577370882034302, "learning_rate": 2.998729541408507e-08, "loss": 0.5948, "step": 15250 }, { "epoch": 0.97, "grad_norm": 0.9254369139671326, "learning_rate": 2.987520048129911e-08, "loss": 0.5841, "step": 15251 }, { "epoch": 0.97, "grad_norm": 0.8802624940872192, "learning_rate": 2.976331482317796e-08, "loss": 0.5672, "step": 15252 }, { "epoch": 0.97, "grad_norm": 0.8794838786125183, "learning_rate": 2.9651638444434528e-08, "loss": 0.6076, "step": 15253 }, { "epoch": 0.97, "grad_norm": 0.8821896910667419, "learning_rate": 2.9540171349769497e-08, "loss": 0.5937, "step": 15254 }, { "epoch": 0.97, "grad_norm": 0.8732861876487732, "learning_rate": 2.942891354387689e-08, "loss": 0.5617, "step": 15255 }, { "epoch": 0.97, "grad_norm": 0.9027414321899414, "learning_rate": 2.9317865031441295e-08, "loss": 0.5962, "step": 15256 }, { "epoch": 0.97, "grad_norm": 0.9479333162307739, "learning_rate": 2.920702581713841e-08, "loss": 0.5659, "step": 15257 }, { "epoch": 0.97, "grad_norm": 0.9096110463142395, "learning_rate": 2.909639590563562e-08, "loss": 0.6119, "step": 15258 }, { "epoch": 0.97, "grad_norm": 0.8687134981155396, "learning_rate": 2.8985975301591975e-08, "loss": 0.5714, "step": 15259 }, { "epoch": 0.97, "grad_norm": 0.8929232358932495, "learning_rate": 2.887576400965486e-08, "loss": 0.5406, "step": 15260 }, { "epoch": 0.97, "grad_norm": 0.8353814482688904, "learning_rate": 2.8765762034466682e-08, "loss": 0.5534, "step": 15261 }, { "epoch": 0.97, "grad_norm": 0.8765125274658203, "learning_rate": 2.8655969380658177e-08, "loss": 0.5562, "step": 15262 }, { "epoch": 0.97, "grad_norm": 0.9010085463523865, "learning_rate": 2.8546386052853427e-08, "loss": 0.5473, "step": 15263 }, { "epoch": 0.97, "grad_norm": 0.9564641118049622, "learning_rate": 2.8437012055665403e-08, "loss": 0.6325, "step": 15264 }, { "epoch": 0.97, "grad_norm": 0.8980580568313599, "learning_rate": 2.832784739369987e-08, "loss": 0.5999, "step": 15265 }, { "epoch": 0.97, "grad_norm": 0.8257995843887329, "learning_rate": 2.8218892071553705e-08, "loss": 0.5208, "step": 15266 }, { "epoch": 0.97, "grad_norm": 0.9016112685203552, "learning_rate": 2.8110146093814906e-08, "loss": 0.555, "step": 15267 }, { "epoch": 0.97, "grad_norm": 0.8790071606636047, "learning_rate": 2.8001609465061474e-08, "loss": 0.5744, "step": 15268 }, { "epoch": 0.97, "grad_norm": 0.8602889180183411, "learning_rate": 2.7893282189863647e-08, "loss": 0.5734, "step": 15269 }, { "epoch": 0.97, "grad_norm": 0.8754189014434814, "learning_rate": 2.7785164272783327e-08, "loss": 0.5627, "step": 15270 }, { "epoch": 0.97, "grad_norm": 0.9688771367073059, "learning_rate": 2.7677255718372986e-08, "loss": 0.5618, "step": 15271 }, { "epoch": 0.97, "grad_norm": 0.8828610181808472, "learning_rate": 2.7569556531175657e-08, "loss": 0.5808, "step": 15272 }, { "epoch": 0.97, "grad_norm": 0.9202200174331665, "learning_rate": 2.7462066715726045e-08, "loss": 0.6168, "step": 15273 }, { "epoch": 0.97, "grad_norm": 0.9173966646194458, "learning_rate": 2.7354786276551083e-08, "loss": 0.6054, "step": 15274 }, { "epoch": 0.97, "grad_norm": 0.9728456139564514, "learning_rate": 2.7247715218167714e-08, "loss": 0.6074, "step": 15275 }, { "epoch": 0.97, "grad_norm": 0.905205249786377, "learning_rate": 2.7140853545083447e-08, "loss": 0.5679, "step": 15276 }, { "epoch": 0.97, "grad_norm": 0.9280872344970703, "learning_rate": 2.703420126179912e-08, "loss": 0.5779, "step": 15277 }, { "epoch": 0.97, "grad_norm": 0.9241009950637817, "learning_rate": 2.692775837280448e-08, "loss": 0.6027, "step": 15278 }, { "epoch": 0.97, "grad_norm": 0.8840800523757935, "learning_rate": 2.682152488258205e-08, "loss": 0.5911, "step": 15279 }, { "epoch": 0.97, "grad_norm": 0.8568680286407471, "learning_rate": 2.671550079560492e-08, "loss": 0.5292, "step": 15280 }, { "epoch": 0.97, "grad_norm": 0.8979047536849976, "learning_rate": 2.6609686116337296e-08, "loss": 0.5474, "step": 15281 }, { "epoch": 0.97, "grad_norm": 0.9321437478065491, "learning_rate": 2.6504080849234504e-08, "loss": 0.6161, "step": 15282 }, { "epoch": 0.97, "grad_norm": 0.8534190654754639, "learning_rate": 2.6398684998742986e-08, "loss": 0.5428, "step": 15283 }, { "epoch": 0.97, "grad_norm": 0.855660080909729, "learning_rate": 2.629349856930141e-08, "loss": 0.5282, "step": 15284 }, { "epoch": 0.97, "grad_norm": 0.9074161052703857, "learning_rate": 2.6188521565338466e-08, "loss": 0.5735, "step": 15285 }, { "epoch": 0.97, "grad_norm": 0.8709417581558228, "learning_rate": 2.608375399127394e-08, "loss": 0.558, "step": 15286 }, { "epoch": 0.97, "grad_norm": 0.8391317129135132, "learning_rate": 2.5979195851519313e-08, "loss": 0.5302, "step": 15287 }, { "epoch": 0.97, "grad_norm": 0.9150146842002869, "learning_rate": 2.5874847150477722e-08, "loss": 0.5882, "step": 15288 }, { "epoch": 0.97, "grad_norm": 0.8675903081893921, "learning_rate": 2.5770707892542878e-08, "loss": 0.5531, "step": 15289 }, { "epoch": 0.97, "grad_norm": 0.9216609597206116, "learning_rate": 2.5666778082099052e-08, "loss": 0.5489, "step": 15290 }, { "epoch": 0.97, "grad_norm": 0.9866342544555664, "learning_rate": 2.5563057723522742e-08, "loss": 0.5831, "step": 15291 }, { "epoch": 0.97, "grad_norm": 0.9153217673301697, "learning_rate": 2.5459546821181014e-08, "loss": 0.5968, "step": 15292 }, { "epoch": 0.97, "grad_norm": 0.8692642450332642, "learning_rate": 2.5356245379433154e-08, "loss": 0.5611, "step": 15293 }, { "epoch": 0.97, "grad_norm": 0.8365652561187744, "learning_rate": 2.525315340262846e-08, "loss": 0.5344, "step": 15294 }, { "epoch": 0.97, "grad_norm": 0.8992009162902832, "learning_rate": 2.51502708951068e-08, "loss": 0.6268, "step": 15295 }, { "epoch": 0.97, "grad_norm": 0.9119232892990112, "learning_rate": 2.5047597861201368e-08, "loss": 0.5838, "step": 15296 }, { "epoch": 0.97, "grad_norm": 0.9377126097679138, "learning_rate": 2.494513430523482e-08, "loss": 0.5693, "step": 15297 }, { "epoch": 0.97, "grad_norm": 0.9767472147941589, "learning_rate": 2.4842880231522038e-08, "loss": 0.5736, "step": 15298 }, { "epoch": 0.97, "grad_norm": 0.8412918448448181, "learning_rate": 2.474083564436791e-08, "loss": 0.5326, "step": 15299 }, { "epoch": 0.97, "grad_norm": 0.8873780965805054, "learning_rate": 2.4639000548070114e-08, "loss": 0.6094, "step": 15300 }, { "epoch": 0.97, "grad_norm": 0.8979631066322327, "learning_rate": 2.4537374946915215e-08, "loss": 0.5258, "step": 15301 }, { "epoch": 0.97, "grad_norm": 0.8989670872688293, "learning_rate": 2.4435958845183684e-08, "loss": 0.5554, "step": 15302 }, { "epoch": 0.97, "grad_norm": 0.8940199017524719, "learning_rate": 2.4334752247145433e-08, "loss": 0.5619, "step": 15303 }, { "epoch": 0.97, "grad_norm": 0.8079856038093567, "learning_rate": 2.4233755157060944e-08, "loss": 0.5733, "step": 15304 }, { "epoch": 0.97, "grad_norm": 0.9142255187034607, "learning_rate": 2.413296757918404e-08, "loss": 0.5756, "step": 15305 }, { "epoch": 0.97, "grad_norm": 0.8849722146987915, "learning_rate": 2.4032389517757993e-08, "loss": 0.5724, "step": 15306 }, { "epoch": 0.97, "grad_norm": 0.8250989317893982, "learning_rate": 2.3932020977017745e-08, "loss": 0.5244, "step": 15307 }, { "epoch": 0.97, "grad_norm": 0.8586926460266113, "learning_rate": 2.3831861961189917e-08, "loss": 0.5474, "step": 15308 }, { "epoch": 0.97, "grad_norm": 0.8741357326507568, "learning_rate": 2.3731912474491137e-08, "loss": 0.5359, "step": 15309 }, { "epoch": 0.97, "grad_norm": 0.8332794308662415, "learning_rate": 2.3632172521130815e-08, "loss": 0.51, "step": 15310 }, { "epoch": 0.97, "grad_norm": 0.8357459902763367, "learning_rate": 2.3532642105307813e-08, "loss": 0.5387, "step": 15311 }, { "epoch": 0.97, "grad_norm": 0.900471568107605, "learning_rate": 2.3433321231213778e-08, "loss": 0.5677, "step": 15312 }, { "epoch": 0.97, "grad_norm": 0.9303227066993713, "learning_rate": 2.3334209903029815e-08, "loss": 0.5955, "step": 15313 }, { "epoch": 0.97, "grad_norm": 0.8947436809539795, "learning_rate": 2.3235308124930357e-08, "loss": 0.5752, "step": 15314 }, { "epoch": 0.97, "grad_norm": 0.9085913896560669, "learning_rate": 2.3136615901078742e-08, "loss": 0.5911, "step": 15315 }, { "epoch": 0.97, "grad_norm": 0.8889240026473999, "learning_rate": 2.303813323563109e-08, "loss": 0.5651, "step": 15316 }, { "epoch": 0.97, "grad_norm": 0.9324416518211365, "learning_rate": 2.2939860132734084e-08, "loss": 0.5499, "step": 15317 }, { "epoch": 0.97, "grad_norm": 0.8463728427886963, "learning_rate": 2.2841796596525522e-08, "loss": 0.5775, "step": 15318 }, { "epoch": 0.97, "grad_norm": 0.9454851150512695, "learning_rate": 2.2743942631134886e-08, "loss": 0.5532, "step": 15319 }, { "epoch": 0.97, "grad_norm": 0.9939208030700684, "learning_rate": 2.264629824068165e-08, "loss": 0.661, "step": 15320 }, { "epoch": 0.97, "grad_norm": 0.952218234539032, "learning_rate": 2.2548863429278645e-08, "loss": 0.6229, "step": 15321 }, { "epoch": 0.97, "grad_norm": 0.8898659348487854, "learning_rate": 2.2451638201027026e-08, "loss": 0.5843, "step": 15322 }, { "epoch": 0.97, "grad_norm": 0.9616308212280273, "learning_rate": 2.2354622560021854e-08, "loss": 0.5664, "step": 15323 }, { "epoch": 0.97, "grad_norm": 0.8662042021751404, "learning_rate": 2.2257816510347086e-08, "loss": 0.55, "step": 15324 }, { "epoch": 0.97, "grad_norm": 0.9076823592185974, "learning_rate": 2.2161220056079457e-08, "loss": 0.5385, "step": 15325 }, { "epoch": 0.97, "grad_norm": 0.9138240218162537, "learning_rate": 2.2064833201286827e-08, "loss": 0.5659, "step": 15326 }, { "epoch": 0.97, "grad_norm": 0.8747329115867615, "learning_rate": 2.1968655950026508e-08, "loss": 0.6079, "step": 15327 }, { "epoch": 0.97, "grad_norm": 0.8825821876525879, "learning_rate": 2.187268830634859e-08, "loss": 0.5695, "step": 15328 }, { "epoch": 0.97, "grad_norm": 0.8999965190887451, "learning_rate": 2.1776930274294283e-08, "loss": 0.5606, "step": 15329 }, { "epoch": 0.97, "grad_norm": 0.8908900022506714, "learning_rate": 2.1681381857895923e-08, "loss": 0.5693, "step": 15330 }, { "epoch": 0.97, "grad_norm": 0.87883061170578, "learning_rate": 2.1586043061175842e-08, "loss": 0.5597, "step": 15331 }, { "epoch": 0.97, "grad_norm": 0.8488723635673523, "learning_rate": 2.1490913888149166e-08, "loss": 0.6041, "step": 15332 }, { "epoch": 0.97, "grad_norm": 0.9183140993118286, "learning_rate": 2.139599434282047e-08, "loss": 0.5741, "step": 15333 }, { "epoch": 0.97, "grad_norm": 0.9319660663604736, "learning_rate": 2.130128442918766e-08, "loss": 0.641, "step": 15334 }, { "epoch": 0.97, "grad_norm": 0.9717698097229004, "learning_rate": 2.1206784151238113e-08, "loss": 0.5749, "step": 15335 }, { "epoch": 0.97, "grad_norm": 0.8720336556434631, "learning_rate": 2.111249351295086e-08, "loss": 0.6203, "step": 15336 }, { "epoch": 0.97, "grad_norm": 0.8387833833694458, "learning_rate": 2.1018412518296617e-08, "loss": 0.5874, "step": 15337 }, { "epoch": 0.97, "grad_norm": 0.8112475872039795, "learning_rate": 2.0924541171235545e-08, "loss": 0.5653, "step": 15338 }, { "epoch": 0.97, "grad_norm": 0.9344534277915955, "learning_rate": 2.083087947572171e-08, "loss": 0.567, "step": 15339 }, { "epoch": 0.97, "grad_norm": 0.9421919584274292, "learning_rate": 2.073742743569862e-08, "loss": 0.5728, "step": 15340 }, { "epoch": 0.97, "grad_norm": 0.9264227151870728, "learning_rate": 2.0644185055100352e-08, "loss": 0.6343, "step": 15341 }, { "epoch": 0.97, "grad_norm": 0.8339887857437134, "learning_rate": 2.0551152337853208e-08, "loss": 0.5588, "step": 15342 }, { "epoch": 0.97, "grad_norm": 0.9266855120658875, "learning_rate": 2.0458329287875168e-08, "loss": 0.5816, "step": 15343 }, { "epoch": 0.97, "grad_norm": 0.9328429698944092, "learning_rate": 2.0365715909074213e-08, "loss": 0.5793, "step": 15344 }, { "epoch": 0.97, "grad_norm": 0.8851380944252014, "learning_rate": 2.027331220535056e-08, "loss": 0.6006, "step": 15345 }, { "epoch": 0.97, "grad_norm": 0.9010560512542725, "learning_rate": 2.018111818059387e-08, "loss": 0.4876, "step": 15346 }, { "epoch": 0.97, "grad_norm": 0.9649263620376587, "learning_rate": 2.008913383868716e-08, "loss": 0.6252, "step": 15347 }, { "epoch": 0.97, "grad_norm": 0.9451420903205872, "learning_rate": 1.999735918350343e-08, "loss": 0.5907, "step": 15348 }, { "epoch": 0.97, "grad_norm": 0.9090909361839294, "learning_rate": 1.990579421890626e-08, "loss": 0.5909, "step": 15349 }, { "epoch": 0.97, "grad_norm": 0.8864248991012573, "learning_rate": 1.9814438948751458e-08, "loss": 0.5775, "step": 15350 }, { "epoch": 0.97, "grad_norm": 0.8502189517021179, "learning_rate": 1.9723293376886497e-08, "loss": 0.5513, "step": 15351 }, { "epoch": 0.97, "grad_norm": 0.8676384687423706, "learning_rate": 1.963235750714776e-08, "loss": 0.6178, "step": 15352 }, { "epoch": 0.97, "grad_norm": 0.9123603701591492, "learning_rate": 1.9541631343365507e-08, "loss": 0.5994, "step": 15353 }, { "epoch": 0.97, "grad_norm": 1.0008543729782104, "learning_rate": 1.9451114889359468e-08, "loss": 0.6402, "step": 15354 }, { "epoch": 0.97, "grad_norm": 0.925025999546051, "learning_rate": 1.936080814894048e-08, "loss": 0.5693, "step": 15355 }, { "epoch": 0.97, "grad_norm": 0.9011825323104858, "learning_rate": 1.9270711125912167e-08, "loss": 0.5558, "step": 15356 }, { "epoch": 0.97, "grad_norm": 0.8734168410301208, "learning_rate": 1.9180823824067053e-08, "loss": 0.5738, "step": 15357 }, { "epoch": 0.97, "grad_norm": 0.858608067035675, "learning_rate": 1.909114624719044e-08, "loss": 0.578, "step": 15358 }, { "epoch": 0.97, "grad_norm": 0.8652524352073669, "learning_rate": 1.90016783990582e-08, "loss": 0.5556, "step": 15359 }, { "epoch": 0.97, "grad_norm": 0.9070523977279663, "learning_rate": 1.891242028343787e-08, "loss": 0.6098, "step": 15360 }, { "epoch": 0.97, "grad_norm": 0.8430723547935486, "learning_rate": 1.8823371904087563e-08, "loss": 0.5671, "step": 15361 }, { "epoch": 0.97, "grad_norm": 0.8682308197021484, "learning_rate": 1.8734533264757047e-08, "loss": 0.5375, "step": 15362 }, { "epoch": 0.97, "grad_norm": 0.9168040156364441, "learning_rate": 1.864590436918612e-08, "loss": 0.611, "step": 15363 }, { "epoch": 0.97, "grad_norm": 0.9281341433525085, "learning_rate": 1.8557485221107897e-08, "loss": 0.5621, "step": 15364 }, { "epoch": 0.97, "grad_norm": 0.9351321458816528, "learning_rate": 1.8469275824244958e-08, "loss": 0.5595, "step": 15365 }, { "epoch": 0.97, "grad_norm": 0.9267570376396179, "learning_rate": 1.8381276182311004e-08, "loss": 0.5362, "step": 15366 }, { "epoch": 0.97, "grad_norm": 0.9369710087776184, "learning_rate": 1.8293486299011398e-08, "loss": 0.5719, "step": 15367 }, { "epoch": 0.97, "grad_norm": 0.9404371380805969, "learning_rate": 1.8205906178043186e-08, "loss": 0.5835, "step": 15368 }, { "epoch": 0.97, "grad_norm": 0.8816442489624023, "learning_rate": 1.811853582309453e-08, "loss": 0.6092, "step": 15369 }, { "epoch": 0.97, "grad_norm": 0.8771417737007141, "learning_rate": 1.803137523784304e-08, "loss": 0.5125, "step": 15370 }, { "epoch": 0.97, "grad_norm": 0.8539003133773804, "learning_rate": 1.7944424425959116e-08, "loss": 0.5784, "step": 15371 }, { "epoch": 0.97, "grad_norm": 0.876171886920929, "learning_rate": 1.7857683391104273e-08, "loss": 0.5423, "step": 15372 }, { "epoch": 0.97, "grad_norm": 0.8787450790405273, "learning_rate": 1.7771152136931147e-08, "loss": 0.5901, "step": 15373 }, { "epoch": 0.97, "grad_norm": 0.9184008836746216, "learning_rate": 1.7684830667082377e-08, "loss": 0.5802, "step": 15374 }, { "epoch": 0.97, "grad_norm": 0.887069046497345, "learning_rate": 1.759871898519394e-08, "loss": 0.5638, "step": 15375 }, { "epoch": 0.97, "grad_norm": 0.8621271848678589, "learning_rate": 1.7512817094890167e-08, "loss": 0.5408, "step": 15376 }, { "epoch": 0.97, "grad_norm": 0.8775637745857239, "learning_rate": 1.742712499978927e-08, "loss": 0.5448, "step": 15377 }, { "epoch": 0.97, "grad_norm": 0.9085080027580261, "learning_rate": 1.734164270349892e-08, "loss": 0.5373, "step": 15378 }, { "epoch": 0.97, "grad_norm": 0.8680553436279297, "learning_rate": 1.7256370209618458e-08, "loss": 0.6011, "step": 15379 }, { "epoch": 0.97, "grad_norm": 0.9011398553848267, "learning_rate": 1.71713075217389e-08, "loss": 0.5927, "step": 15380 }, { "epoch": 0.97, "grad_norm": 0.9100791215896606, "learning_rate": 1.7086454643441273e-08, "loss": 0.592, "step": 15381 }, { "epoch": 0.97, "grad_norm": 0.8486478924751282, "learning_rate": 1.7001811578298832e-08, "loss": 0.5193, "step": 15382 }, { "epoch": 0.97, "grad_norm": 0.8972152471542358, "learning_rate": 1.6917378329875946e-08, "loss": 0.5594, "step": 15383 }, { "epoch": 0.97, "grad_norm": 0.8258572220802307, "learning_rate": 1.6833154901726988e-08, "loss": 0.5933, "step": 15384 }, { "epoch": 0.97, "grad_norm": 0.9576346278190613, "learning_rate": 1.6749141297398574e-08, "loss": 0.5544, "step": 15385 }, { "epoch": 0.97, "grad_norm": 0.8871638178825378, "learning_rate": 1.6665337520428427e-08, "loss": 0.5285, "step": 15386 }, { "epoch": 0.97, "grad_norm": 0.8549116253852844, "learning_rate": 1.658174357434483e-08, "loss": 0.5918, "step": 15387 }, { "epoch": 0.97, "grad_norm": 0.9582047462463379, "learning_rate": 1.649835946266831e-08, "loss": 0.6273, "step": 15388 }, { "epoch": 0.97, "grad_norm": 0.8410069346427917, "learning_rate": 1.6415185188909944e-08, "loss": 0.5405, "step": 15389 }, { "epoch": 0.98, "grad_norm": 0.8835951685905457, "learning_rate": 1.6332220756570815e-08, "loss": 0.5562, "step": 15390 }, { "epoch": 0.98, "grad_norm": 0.8853712677955627, "learning_rate": 1.6249466169145354e-08, "loss": 0.6191, "step": 15391 }, { "epoch": 0.98, "grad_norm": 0.887049674987793, "learning_rate": 1.6166921430118e-08, "loss": 0.5563, "step": 15392 }, { "epoch": 0.98, "grad_norm": 0.9243970513343811, "learning_rate": 1.608458654296319e-08, "loss": 0.5632, "step": 15393 }, { "epoch": 0.98, "grad_norm": 0.9056985378265381, "learning_rate": 1.600246151114926e-08, "loss": 0.5555, "step": 15394 }, { "epoch": 0.98, "grad_norm": 0.8855133056640625, "learning_rate": 1.5920546338133447e-08, "loss": 0.5699, "step": 15395 }, { "epoch": 0.98, "grad_norm": 0.8974730968475342, "learning_rate": 1.5838841027365215e-08, "loss": 0.5759, "step": 15396 }, { "epoch": 0.98, "grad_norm": 0.876063883304596, "learning_rate": 1.5757345582285144e-08, "loss": 0.5996, "step": 15397 }, { "epoch": 0.98, "grad_norm": 0.8951111435890198, "learning_rate": 1.5676060006323267e-08, "loss": 0.6397, "step": 15398 }, { "epoch": 0.98, "grad_norm": 0.9184896349906921, "learning_rate": 1.559498430290407e-08, "loss": 0.5522, "step": 15399 }, { "epoch": 0.98, "grad_norm": 0.9113056063652039, "learning_rate": 1.5514118475440378e-08, "loss": 0.5542, "step": 15400 }, { "epoch": 0.98, "grad_norm": 0.8028354048728943, "learning_rate": 1.5433462527337793e-08, "loss": 0.5046, "step": 15401 }, { "epoch": 0.98, "grad_norm": 0.8966811299324036, "learning_rate": 1.5353016461991387e-08, "loss": 0.6253, "step": 15402 }, { "epoch": 0.98, "grad_norm": 0.8664458394050598, "learning_rate": 1.5272780282789556e-08, "loss": 0.6081, "step": 15403 }, { "epoch": 0.98, "grad_norm": 0.8263579607009888, "learning_rate": 1.5192753993110155e-08, "loss": 0.535, "step": 15404 }, { "epoch": 0.98, "grad_norm": 0.908085286617279, "learning_rate": 1.5112937596323263e-08, "loss": 0.6077, "step": 15405 }, { "epoch": 0.98, "grad_norm": 0.895283579826355, "learning_rate": 1.5033331095788973e-08, "loss": 0.5622, "step": 15406 }, { "epoch": 0.98, "grad_norm": 0.84937584400177, "learning_rate": 1.4953934494860155e-08, "loss": 0.4815, "step": 15407 }, { "epoch": 0.98, "grad_norm": 0.8702456951141357, "learning_rate": 1.4874747796879142e-08, "loss": 0.5375, "step": 15408 }, { "epoch": 0.98, "grad_norm": 0.9346665740013123, "learning_rate": 1.4795771005181036e-08, "loss": 0.633, "step": 15409 }, { "epoch": 0.98, "grad_norm": 0.8707761764526367, "learning_rate": 1.4717004123090406e-08, "loss": 0.5525, "step": 15410 }, { "epoch": 0.98, "grad_norm": 0.9167184829711914, "learning_rate": 1.463844715392404e-08, "loss": 0.6256, "step": 15411 }, { "epoch": 0.98, "grad_norm": 0.8158385157585144, "learning_rate": 1.4560100100989849e-08, "loss": 0.514, "step": 15412 }, { "epoch": 0.98, "grad_norm": 0.8651106953620911, "learning_rate": 1.448196296758686e-08, "loss": 0.5446, "step": 15413 }, { "epoch": 0.98, "grad_norm": 0.8702985644340515, "learning_rate": 1.4404035757005219e-08, "loss": 0.5671, "step": 15414 }, { "epoch": 0.98, "grad_norm": 0.9393275380134583, "learning_rate": 1.4326318472525635e-08, "loss": 0.567, "step": 15415 }, { "epoch": 0.98, "grad_norm": 0.9015846252441406, "learning_rate": 1.4248811117421046e-08, "loss": 0.5923, "step": 15416 }, { "epoch": 0.98, "grad_norm": 0.9426272511482239, "learning_rate": 1.4171513694954953e-08, "loss": 0.5628, "step": 15417 }, { "epoch": 0.98, "grad_norm": 0.9212367534637451, "learning_rate": 1.4094426208381972e-08, "loss": 0.6307, "step": 15418 }, { "epoch": 0.98, "grad_norm": 0.9438113570213318, "learning_rate": 1.4017548660947844e-08, "loss": 0.5934, "step": 15419 }, { "epoch": 0.98, "grad_norm": 0.8640725016593933, "learning_rate": 1.3940881055889976e-08, "loss": 0.594, "step": 15420 }, { "epoch": 0.98, "grad_norm": 0.9187299609184265, "learning_rate": 1.3864423396436344e-08, "loss": 0.55, "step": 15421 }, { "epoch": 0.98, "grad_norm": 0.8524268865585327, "learning_rate": 1.3788175685806594e-08, "loss": 0.5912, "step": 15422 }, { "epoch": 0.98, "grad_norm": 0.9235708713531494, "learning_rate": 1.3712137927210377e-08, "loss": 0.5946, "step": 15423 }, { "epoch": 0.98, "grad_norm": 0.911343514919281, "learning_rate": 1.3636310123850694e-08, "loss": 0.6099, "step": 15424 }, { "epoch": 0.98, "grad_norm": 0.8266077637672424, "learning_rate": 1.3560692278919429e-08, "loss": 0.554, "step": 15425 }, { "epoch": 0.98, "grad_norm": 0.8780014514923096, "learning_rate": 1.3485284395600707e-08, "loss": 0.5294, "step": 15426 }, { "epoch": 0.98, "grad_norm": 0.9280437231063843, "learning_rate": 1.3410086477069761e-08, "loss": 0.6171, "step": 15427 }, { "epoch": 0.98, "grad_norm": 0.8806030750274658, "learning_rate": 1.333509852649295e-08, "loss": 0.551, "step": 15428 }, { "epoch": 0.98, "grad_norm": 0.8715260028839111, "learning_rate": 1.3260320547028305e-08, "loss": 0.5821, "step": 15429 }, { "epoch": 0.98, "grad_norm": 0.8022364377975464, "learning_rate": 1.3185752541823304e-08, "loss": 0.5608, "step": 15430 }, { "epoch": 0.98, "grad_norm": 0.9580459594726562, "learning_rate": 1.3111394514018772e-08, "loss": 0.5416, "step": 15431 }, { "epoch": 0.98, "grad_norm": 0.8772706985473633, "learning_rate": 1.3037246466745535e-08, "loss": 0.5591, "step": 15432 }, { "epoch": 0.98, "grad_norm": 0.8983436822891235, "learning_rate": 1.2963308403124985e-08, "loss": 0.5797, "step": 15433 }, { "epoch": 0.98, "grad_norm": 0.9700096249580383, "learning_rate": 1.2889580326271301e-08, "loss": 0.5816, "step": 15434 }, { "epoch": 0.98, "grad_norm": 0.8904829621315002, "learning_rate": 1.2816062239288107e-08, "loss": 0.5378, "step": 15435 }, { "epoch": 0.98, "grad_norm": 0.9526095390319824, "learning_rate": 1.2742754145271264e-08, "loss": 0.5637, "step": 15436 }, { "epoch": 0.98, "grad_norm": 0.9104148745536804, "learning_rate": 1.2669656047308299e-08, "loss": 0.5773, "step": 15437 }, { "epoch": 0.98, "grad_norm": 0.9310768246650696, "learning_rate": 1.2596767948476196e-08, "loss": 0.603, "step": 15438 }, { "epoch": 0.98, "grad_norm": 0.9144603610038757, "learning_rate": 1.2524089851844168e-08, "loss": 0.5422, "step": 15439 }, { "epoch": 0.98, "grad_norm": 0.856395959854126, "learning_rate": 1.2451621760472544e-08, "loss": 0.5526, "step": 15440 }, { "epoch": 0.98, "grad_norm": 0.9461926817893982, "learning_rate": 1.237936367741277e-08, "loss": 0.5741, "step": 15441 }, { "epoch": 0.98, "grad_norm": 0.9388793706893921, "learning_rate": 1.2307315605707416e-08, "loss": 0.5818, "step": 15442 }, { "epoch": 0.98, "grad_norm": 0.9289106726646423, "learning_rate": 1.2235477548390162e-08, "loss": 0.6053, "step": 15443 }, { "epoch": 0.98, "grad_norm": 0.821549654006958, "learning_rate": 1.2163849508485259e-08, "loss": 0.5101, "step": 15444 }, { "epoch": 0.98, "grad_norm": 0.8986890316009521, "learning_rate": 1.2092431489009738e-08, "loss": 0.5866, "step": 15445 }, { "epoch": 0.98, "grad_norm": 0.8315547704696655, "learning_rate": 1.202122349297008e-08, "loss": 0.5505, "step": 15446 }, { "epoch": 0.98, "grad_norm": 0.9459112882614136, "learning_rate": 1.1950225523365e-08, "loss": 0.6082, "step": 15447 }, { "epoch": 0.98, "grad_norm": 0.8490333557128906, "learning_rate": 1.1879437583183217e-08, "loss": 0.5561, "step": 15448 }, { "epoch": 0.98, "grad_norm": 0.8695975542068481, "learning_rate": 1.1808859675406236e-08, "loss": 0.5987, "step": 15449 }, { "epoch": 0.98, "grad_norm": 0.9123320579528809, "learning_rate": 1.173849180300557e-08, "loss": 0.5599, "step": 15450 }, { "epoch": 0.98, "grad_norm": 0.9430971145629883, "learning_rate": 1.16683339689444e-08, "loss": 0.6193, "step": 15451 }, { "epoch": 0.98, "grad_norm": 0.871895968914032, "learning_rate": 1.1598386176175924e-08, "loss": 0.567, "step": 15452 }, { "epoch": 0.98, "grad_norm": 0.852607786655426, "learning_rate": 1.1528648427646671e-08, "loss": 0.5368, "step": 15453 }, { "epoch": 0.98, "grad_norm": 0.887416422367096, "learning_rate": 1.1459120726292072e-08, "loss": 0.5469, "step": 15454 }, { "epoch": 0.98, "grad_norm": 0.9012024998664856, "learning_rate": 1.1389803075039785e-08, "loss": 0.5667, "step": 15455 }, { "epoch": 0.98, "grad_norm": 0.8867619633674622, "learning_rate": 1.1320695476809141e-08, "loss": 0.6016, "step": 15456 }, { "epoch": 0.98, "grad_norm": 0.896775484085083, "learning_rate": 1.1251797934509478e-08, "loss": 0.5845, "step": 15457 }, { "epoch": 0.98, "grad_norm": 0.9201370477676392, "learning_rate": 1.1183110451042368e-08, "loss": 0.5236, "step": 15458 }, { "epoch": 0.98, "grad_norm": 0.877086877822876, "learning_rate": 1.1114633029299382e-08, "loss": 0.5334, "step": 15459 }, { "epoch": 0.98, "grad_norm": 0.8296651840209961, "learning_rate": 1.1046365672163772e-08, "loss": 0.5453, "step": 15460 }, { "epoch": 0.98, "grad_norm": 0.8853237628936768, "learning_rate": 1.0978308382511016e-08, "loss": 0.5937, "step": 15461 }, { "epoch": 0.98, "grad_norm": 0.9216740131378174, "learning_rate": 1.0910461163206043e-08, "loss": 0.5611, "step": 15462 }, { "epoch": 0.98, "grad_norm": 0.9715553522109985, "learning_rate": 1.0842824017105458e-08, "loss": 0.5791, "step": 15463 }, { "epoch": 0.98, "grad_norm": 0.8290508985519409, "learning_rate": 1.0775396947057537e-08, "loss": 0.5391, "step": 15464 }, { "epoch": 0.98, "grad_norm": 0.8305854201316833, "learning_rate": 1.0708179955901677e-08, "loss": 0.5389, "step": 15465 }, { "epoch": 0.98, "grad_norm": 0.9410961866378784, "learning_rate": 1.0641173046467833e-08, "loss": 0.5963, "step": 15466 }, { "epoch": 0.98, "grad_norm": 0.9235133528709412, "learning_rate": 1.0574376221577642e-08, "loss": 0.5421, "step": 15467 }, { "epoch": 0.98, "grad_norm": 0.8532764315605164, "learning_rate": 1.0507789484043295e-08, "loss": 0.4974, "step": 15468 }, { "epoch": 0.98, "grad_norm": 0.874849796295166, "learning_rate": 1.0441412836668663e-08, "loss": 0.5337, "step": 15469 }, { "epoch": 0.98, "grad_norm": 0.8603571057319641, "learning_rate": 1.037524628224873e-08, "loss": 0.5523, "step": 15470 }, { "epoch": 0.98, "grad_norm": 0.871017575263977, "learning_rate": 1.0309289823569601e-08, "loss": 0.5627, "step": 15471 }, { "epoch": 0.98, "grad_norm": 0.9409274458885193, "learning_rate": 1.02435434634085e-08, "loss": 0.5904, "step": 15472 }, { "epoch": 0.98, "grad_norm": 1.0156452655792236, "learning_rate": 1.0178007204533768e-08, "loss": 0.5892, "step": 15473 }, { "epoch": 0.98, "grad_norm": 0.8976706266403198, "learning_rate": 1.0112681049704865e-08, "loss": 0.5797, "step": 15474 }, { "epoch": 0.98, "grad_norm": 0.8561591506004333, "learning_rate": 1.004756500167181e-08, "loss": 0.5468, "step": 15475 }, { "epoch": 0.98, "grad_norm": 0.8820354342460632, "learning_rate": 9.982659063177413e-09, "loss": 0.57, "step": 15476 }, { "epoch": 0.98, "grad_norm": 0.8407560586929321, "learning_rate": 9.917963236954487e-09, "loss": 0.5503, "step": 15477 }, { "epoch": 0.98, "grad_norm": 0.9282391667366028, "learning_rate": 9.853477525726962e-09, "loss": 0.5349, "step": 15478 }, { "epoch": 0.98, "grad_norm": 0.8840251564979553, "learning_rate": 9.789201932209335e-09, "loss": 0.6081, "step": 15479 }, { "epoch": 0.98, "grad_norm": 0.9751169681549072, "learning_rate": 9.725136459109441e-09, "loss": 0.6371, "step": 15480 }, { "epoch": 0.98, "grad_norm": 0.8782668709754944, "learning_rate": 9.66128110912401e-09, "loss": 0.5463, "step": 15481 }, { "epoch": 0.98, "grad_norm": 0.7854354381561279, "learning_rate": 9.597635884941447e-09, "loss": 0.5376, "step": 15482 }, { "epoch": 0.98, "grad_norm": 0.847726047039032, "learning_rate": 9.534200789242388e-09, "loss": 0.5804, "step": 15483 }, { "epoch": 0.98, "grad_norm": 0.8897963166236877, "learning_rate": 9.470975824698025e-09, "loss": 0.6062, "step": 15484 }, { "epoch": 0.98, "grad_norm": 0.930458128452301, "learning_rate": 9.407960993969567e-09, "loss": 0.6237, "step": 15485 }, { "epoch": 0.98, "grad_norm": 0.8038657903671265, "learning_rate": 9.345156299711e-09, "loss": 0.5404, "step": 15486 }, { "epoch": 0.98, "grad_norm": 0.8747665286064148, "learning_rate": 9.282561744566321e-09, "loss": 0.551, "step": 15487 }, { "epoch": 0.98, "grad_norm": 0.8687538504600525, "learning_rate": 9.220177331172309e-09, "loss": 0.5515, "step": 15488 }, { "epoch": 0.98, "grad_norm": 0.8491008877754211, "learning_rate": 9.158003062154642e-09, "loss": 0.5227, "step": 15489 }, { "epoch": 0.98, "grad_norm": 0.9173932671546936, "learning_rate": 9.096038940131225e-09, "loss": 0.6004, "step": 15490 }, { "epoch": 0.98, "grad_norm": 0.817330539226532, "learning_rate": 9.034284967711637e-09, "loss": 0.5088, "step": 15491 }, { "epoch": 0.98, "grad_norm": 0.8443012833595276, "learning_rate": 8.972741147496023e-09, "loss": 0.5755, "step": 15492 }, { "epoch": 0.98, "grad_norm": 0.804356575012207, "learning_rate": 8.911407482076196e-09, "loss": 0.5244, "step": 15493 }, { "epoch": 0.98, "grad_norm": 0.865323007106781, "learning_rate": 8.85028397403398e-09, "loss": 0.5702, "step": 15494 }, { "epoch": 0.98, "grad_norm": 0.9057186245918274, "learning_rate": 8.789370625943427e-09, "loss": 0.5933, "step": 15495 }, { "epoch": 0.98, "grad_norm": 0.9095432162284851, "learning_rate": 8.728667440369153e-09, "loss": 0.5645, "step": 15496 }, { "epoch": 0.98, "grad_norm": 0.8884914517402649, "learning_rate": 8.668174419867449e-09, "loss": 0.6086, "step": 15497 }, { "epoch": 0.98, "grad_norm": 0.9319071173667908, "learning_rate": 8.60789156698516e-09, "loss": 0.5349, "step": 15498 }, { "epoch": 0.98, "grad_norm": 0.9238869547843933, "learning_rate": 8.547818884260816e-09, "loss": 0.5856, "step": 15499 }, { "epoch": 0.98, "grad_norm": 0.8431046009063721, "learning_rate": 8.48795637422406e-09, "loss": 0.5275, "step": 15500 }, { "epoch": 0.98, "grad_norm": 0.884147584438324, "learning_rate": 8.428304039395096e-09, "loss": 0.5912, "step": 15501 }, { "epoch": 0.98, "grad_norm": 0.8942022919654846, "learning_rate": 8.368861882285806e-09, "loss": 0.5818, "step": 15502 }, { "epoch": 0.98, "grad_norm": 0.8837722539901733, "learning_rate": 8.309629905399186e-09, "loss": 0.5234, "step": 15503 }, { "epoch": 0.98, "grad_norm": 0.8658926486968994, "learning_rate": 8.250608111229352e-09, "loss": 0.599, "step": 15504 }, { "epoch": 0.98, "grad_norm": 0.9732296466827393, "learning_rate": 8.191796502260985e-09, "loss": 0.6188, "step": 15505 }, { "epoch": 0.98, "grad_norm": 0.9690650701522827, "learning_rate": 8.13319508097099e-09, "loss": 0.6361, "step": 15506 }, { "epoch": 0.98, "grad_norm": 0.9391032457351685, "learning_rate": 8.074803849827395e-09, "loss": 0.5799, "step": 15507 }, { "epoch": 0.98, "grad_norm": 0.8532130122184753, "learning_rate": 8.016622811287123e-09, "loss": 0.5294, "step": 15508 }, { "epoch": 0.98, "grad_norm": 0.8348953127861023, "learning_rate": 7.958651967801545e-09, "loss": 0.5303, "step": 15509 }, { "epoch": 0.98, "grad_norm": 0.865597128868103, "learning_rate": 7.900891321810932e-09, "loss": 0.5601, "step": 15510 }, { "epoch": 0.98, "grad_norm": 0.9278403520584106, "learning_rate": 7.843340875747785e-09, "loss": 0.6282, "step": 15511 }, { "epoch": 0.98, "grad_norm": 0.8793516159057617, "learning_rate": 7.786000632035163e-09, "loss": 0.5598, "step": 15512 }, { "epoch": 0.98, "grad_norm": 0.8610735535621643, "learning_rate": 7.728870593087246e-09, "loss": 0.5739, "step": 15513 }, { "epoch": 0.98, "grad_norm": 0.9375894665718079, "learning_rate": 7.671950761309333e-09, "loss": 0.5678, "step": 15514 }, { "epoch": 0.98, "grad_norm": 0.8614829182624817, "learning_rate": 7.61524113909895e-09, "loss": 0.5117, "step": 15515 }, { "epoch": 0.98, "grad_norm": 0.8722830414772034, "learning_rate": 7.558741728843633e-09, "loss": 0.5368, "step": 15516 }, { "epoch": 0.98, "grad_norm": 0.9089422821998596, "learning_rate": 7.502452532922033e-09, "loss": 0.5445, "step": 15517 }, { "epoch": 0.98, "grad_norm": 0.9493569135665894, "learning_rate": 7.446373553705033e-09, "loss": 0.601, "step": 15518 }, { "epoch": 0.98, "grad_norm": 0.8956559896469116, "learning_rate": 7.390504793552966e-09, "loss": 0.6217, "step": 15519 }, { "epoch": 0.98, "grad_norm": 0.8692125082015991, "learning_rate": 7.3348462548183955e-09, "loss": 0.587, "step": 15520 }, { "epoch": 0.98, "grad_norm": 0.9651332497596741, "learning_rate": 7.279397939845556e-09, "loss": 0.5755, "step": 15521 }, { "epoch": 0.98, "grad_norm": 0.9648064374923706, "learning_rate": 7.2241598509686926e-09, "loss": 0.5921, "step": 15522 }, { "epoch": 0.98, "grad_norm": 0.8698114156723022, "learning_rate": 7.169131990514278e-09, "loss": 0.6096, "step": 15523 }, { "epoch": 0.98, "grad_norm": 0.8747323751449585, "learning_rate": 7.114314360798791e-09, "loss": 0.6015, "step": 15524 }, { "epoch": 0.98, "grad_norm": 0.8900646567344666, "learning_rate": 7.0597069641303865e-09, "loss": 0.5879, "step": 15525 }, { "epoch": 0.98, "grad_norm": 0.9312129020690918, "learning_rate": 7.00530980280889e-09, "loss": 0.5577, "step": 15526 }, { "epoch": 0.98, "grad_norm": 0.8599275946617126, "learning_rate": 6.951122879124139e-09, "loss": 0.541, "step": 15527 }, { "epoch": 0.98, "grad_norm": 0.8398301601409912, "learning_rate": 6.89714619535764e-09, "loss": 0.5422, "step": 15528 }, { "epoch": 0.98, "grad_norm": 0.8573556542396545, "learning_rate": 6.84337975378313e-09, "loss": 0.5242, "step": 15529 }, { "epoch": 0.98, "grad_norm": 0.8811535239219666, "learning_rate": 6.789823556663799e-09, "loss": 0.571, "step": 15530 }, { "epoch": 0.98, "grad_norm": 0.8871817588806152, "learning_rate": 6.736477606255065e-09, "loss": 0.5309, "step": 15531 }, { "epoch": 0.98, "grad_norm": 0.8854000568389893, "learning_rate": 6.683341904802909e-09, "loss": 0.5643, "step": 15532 }, { "epoch": 0.98, "grad_norm": 0.9325771927833557, "learning_rate": 6.63041645454443e-09, "loss": 0.5453, "step": 15533 }, { "epoch": 0.98, "grad_norm": 0.9529849290847778, "learning_rate": 6.577701257708957e-09, "loss": 0.6482, "step": 15534 }, { "epoch": 0.98, "grad_norm": 0.8886032700538635, "learning_rate": 6.52519631651527e-09, "loss": 0.5402, "step": 15535 }, { "epoch": 0.98, "grad_norm": 0.933830201625824, "learning_rate": 6.4729016331749325e-09, "loss": 0.5783, "step": 15536 }, { "epoch": 0.98, "grad_norm": 0.8807794451713562, "learning_rate": 6.420817209888963e-09, "loss": 0.5386, "step": 15537 }, { "epoch": 0.98, "grad_norm": 0.8794984221458435, "learning_rate": 6.368943048851162e-09, "loss": 0.5568, "step": 15538 }, { "epoch": 0.98, "grad_norm": 0.9522714018821716, "learning_rate": 6.317279152245892e-09, "loss": 0.555, "step": 15539 }, { "epoch": 0.98, "grad_norm": 0.9179518222808838, "learning_rate": 6.265825522248082e-09, "loss": 0.5825, "step": 15540 }, { "epoch": 0.98, "grad_norm": 0.8840945959091187, "learning_rate": 6.2145821610243296e-09, "loss": 0.5798, "step": 15541 }, { "epoch": 0.98, "grad_norm": 0.9103140830993652, "learning_rate": 6.163549070732356e-09, "loss": 0.6138, "step": 15542 }, { "epoch": 0.98, "grad_norm": 0.9227690696716309, "learning_rate": 6.1127262535209955e-09, "loss": 0.6251, "step": 15543 }, { "epoch": 0.98, "grad_norm": 0.9462999105453491, "learning_rate": 6.062113711530204e-09, "loss": 0.5747, "step": 15544 }, { "epoch": 0.98, "grad_norm": 0.9513913989067078, "learning_rate": 6.01171144689161e-09, "loss": 0.5616, "step": 15545 }, { "epoch": 0.98, "grad_norm": 0.9170199036598206, "learning_rate": 5.96151946172685e-09, "loss": 0.6263, "step": 15546 }, { "epoch": 0.98, "grad_norm": 0.8427810668945312, "learning_rate": 5.911537758149233e-09, "loss": 0.538, "step": 15547 }, { "epoch": 0.99, "grad_norm": 0.8685246109962463, "learning_rate": 5.861766338263741e-09, "loss": 0.5725, "step": 15548 }, { "epoch": 0.99, "grad_norm": 0.9405071139335632, "learning_rate": 5.812205204165922e-09, "loss": 0.5458, "step": 15549 }, { "epoch": 0.99, "grad_norm": 0.933382511138916, "learning_rate": 5.762854357942993e-09, "loss": 0.6187, "step": 15550 }, { "epoch": 0.99, "grad_norm": 0.8507725596427917, "learning_rate": 5.7137138016721825e-09, "loss": 0.529, "step": 15551 }, { "epoch": 0.99, "grad_norm": 0.827217698097229, "learning_rate": 5.6647835374229465e-09, "loss": 0.5867, "step": 15552 }, { "epoch": 0.99, "grad_norm": 0.8920066356658936, "learning_rate": 5.616063567255859e-09, "loss": 0.6224, "step": 15553 }, { "epoch": 0.99, "grad_norm": 0.9211912751197815, "learning_rate": 5.5675538932220555e-09, "loss": 0.6406, "step": 15554 }, { "epoch": 0.99, "grad_norm": 0.8937119245529175, "learning_rate": 5.519254517364347e-09, "loss": 0.526, "step": 15555 }, { "epoch": 0.99, "grad_norm": 0.9318338632583618, "learning_rate": 5.471165441716108e-09, "loss": 0.5555, "step": 15556 }, { "epoch": 0.99, "grad_norm": 0.8785502910614014, "learning_rate": 5.4232866683023856e-09, "loss": 0.5905, "step": 15557 }, { "epoch": 0.99, "grad_norm": 0.870749831199646, "learning_rate": 5.375618199139343e-09, "loss": 0.539, "step": 15558 }, { "epoch": 0.99, "grad_norm": 0.923859179019928, "learning_rate": 5.328160036234264e-09, "loss": 0.6001, "step": 15559 }, { "epoch": 0.99, "grad_norm": 0.9950880408287048, "learning_rate": 5.280912181584441e-09, "loss": 0.6232, "step": 15560 }, { "epoch": 0.99, "grad_norm": 0.887122392654419, "learning_rate": 5.233874637180503e-09, "loss": 0.6034, "step": 15561 }, { "epoch": 0.99, "grad_norm": 0.8819499015808105, "learning_rate": 5.1870474050025325e-09, "loss": 0.5857, "step": 15562 }, { "epoch": 0.99, "grad_norm": 0.859145998954773, "learning_rate": 5.1404304870222856e-09, "loss": 0.5484, "step": 15563 }, { "epoch": 0.99, "grad_norm": 0.8710299134254456, "learning_rate": 5.094023885203192e-09, "loss": 0.5355, "step": 15564 }, { "epoch": 0.99, "grad_norm": 0.8982166647911072, "learning_rate": 5.0478276014981345e-09, "loss": 0.6073, "step": 15565 }, { "epoch": 0.99, "grad_norm": 0.981425404548645, "learning_rate": 5.001841637852778e-09, "loss": 0.5632, "step": 15566 }, { "epoch": 0.99, "grad_norm": 0.865263044834137, "learning_rate": 4.956065996203907e-09, "loss": 0.5613, "step": 15567 }, { "epoch": 0.99, "grad_norm": 0.9212016463279724, "learning_rate": 4.910500678478314e-09, "loss": 0.5628, "step": 15568 }, { "epoch": 0.99, "grad_norm": 0.9121674299240112, "learning_rate": 4.865145686595019e-09, "loss": 0.5908, "step": 15569 }, { "epoch": 0.99, "grad_norm": 0.8445576429367065, "learning_rate": 4.820001022463605e-09, "loss": 0.5616, "step": 15570 }, { "epoch": 0.99, "grad_norm": 0.9339314699172974, "learning_rate": 4.77506668798533e-09, "loss": 0.5892, "step": 15571 }, { "epoch": 0.99, "grad_norm": 0.8746134638786316, "learning_rate": 4.730342685051459e-09, "loss": 0.574, "step": 15572 }, { "epoch": 0.99, "grad_norm": 0.9173433780670166, "learning_rate": 4.685829015545485e-09, "loss": 0.5736, "step": 15573 }, { "epoch": 0.99, "grad_norm": 0.9090994596481323, "learning_rate": 4.641525681342019e-09, "loss": 0.5846, "step": 15574 }, { "epoch": 0.99, "grad_norm": 0.9413917064666748, "learning_rate": 4.597432684306236e-09, "loss": 0.6241, "step": 15575 }, { "epoch": 0.99, "grad_norm": 0.9874243140220642, "learning_rate": 4.553550026294984e-09, "loss": 0.6424, "step": 15576 }, { "epoch": 0.99, "grad_norm": 0.8794564604759216, "learning_rate": 4.5098777091556745e-09, "loss": 0.5624, "step": 15577 }, { "epoch": 0.99, "grad_norm": 0.8835657238960266, "learning_rate": 4.4664157347273916e-09, "loss": 0.5964, "step": 15578 }, { "epoch": 0.99, "grad_norm": 0.8630079627037048, "learning_rate": 4.423164104840339e-09, "loss": 0.5759, "step": 15579 }, { "epoch": 0.99, "grad_norm": 0.8491309285163879, "learning_rate": 4.38012282131528e-09, "loss": 0.539, "step": 15580 }, { "epoch": 0.99, "grad_norm": 0.8870819807052612, "learning_rate": 4.3372918859652115e-09, "loss": 0.5754, "step": 15581 }, { "epoch": 0.99, "grad_norm": 0.9265652894973755, "learning_rate": 4.294671300592579e-09, "loss": 0.5735, "step": 15582 }, { "epoch": 0.99, "grad_norm": 0.8699434995651245, "learning_rate": 4.252261066993169e-09, "loss": 0.5775, "step": 15583 }, { "epoch": 0.99, "grad_norm": 0.9221080541610718, "learning_rate": 4.210061186951664e-09, "loss": 0.6277, "step": 15584 }, { "epoch": 0.99, "grad_norm": 0.9161962866783142, "learning_rate": 4.168071662245532e-09, "loss": 0.5884, "step": 15585 }, { "epoch": 0.99, "grad_norm": 0.9073721170425415, "learning_rate": 4.1262924946422476e-09, "loss": 0.5669, "step": 15586 }, { "epoch": 0.99, "grad_norm": 0.8920649290084839, "learning_rate": 4.084723685901515e-09, "loss": 0.608, "step": 15587 }, { "epoch": 0.99, "grad_norm": 0.8832874298095703, "learning_rate": 4.043365237774155e-09, "loss": 0.5981, "step": 15588 }, { "epoch": 0.99, "grad_norm": 0.9273045063018799, "learning_rate": 4.002217152000443e-09, "loss": 0.574, "step": 15589 }, { "epoch": 0.99, "grad_norm": 0.8001242280006409, "learning_rate": 3.961279430313991e-09, "loss": 0.465, "step": 15590 }, { "epoch": 0.99, "grad_norm": 0.900482714176178, "learning_rate": 3.920552074437867e-09, "loss": 0.5724, "step": 15591 }, { "epoch": 0.99, "grad_norm": 0.9371671080589294, "learning_rate": 3.880035086086808e-09, "loss": 0.5378, "step": 15592 }, { "epoch": 0.99, "grad_norm": 0.8328535556793213, "learning_rate": 3.839728466967785e-09, "loss": 0.5461, "step": 15593 }, { "epoch": 0.99, "grad_norm": 0.9578930139541626, "learning_rate": 3.799632218777216e-09, "loss": 0.5931, "step": 15594 }, { "epoch": 0.99, "grad_norm": 0.9057374596595764, "learning_rate": 3.759746343203751e-09, "loss": 0.5542, "step": 15595 }, { "epoch": 0.99, "grad_norm": 0.9067438840866089, "learning_rate": 3.720070841926604e-09, "loss": 0.5555, "step": 15596 }, { "epoch": 0.99, "grad_norm": 0.8708641529083252, "learning_rate": 3.6806057166166585e-09, "loss": 0.5649, "step": 15597 }, { "epoch": 0.99, "grad_norm": 0.857673168182373, "learning_rate": 3.6413509689353644e-09, "loss": 0.5904, "step": 15598 }, { "epoch": 0.99, "grad_norm": 0.9187641739845276, "learning_rate": 3.602306600535843e-09, "loss": 0.573, "step": 15599 }, { "epoch": 0.99, "grad_norm": 0.9030482172966003, "learning_rate": 3.5634726130617802e-09, "loss": 0.6042, "step": 15600 }, { "epoch": 0.99, "grad_norm": 0.814935028553009, "learning_rate": 3.5248490081485343e-09, "loss": 0.5411, "step": 15601 }, { "epoch": 0.99, "grad_norm": 0.8016582131385803, "learning_rate": 3.486435787422582e-09, "loss": 0.5426, "step": 15602 }, { "epoch": 0.99, "grad_norm": 0.8617429733276367, "learning_rate": 3.4482329525009627e-09, "loss": 0.5523, "step": 15603 }, { "epoch": 0.99, "grad_norm": 0.8524816632270813, "learning_rate": 3.4102405049929455e-09, "loss": 0.6124, "step": 15604 }, { "epoch": 0.99, "grad_norm": 0.8613601922988892, "learning_rate": 3.372458446497251e-09, "loss": 0.5662, "step": 15605 }, { "epoch": 0.99, "grad_norm": 0.9535294771194458, "learning_rate": 3.3348867786059393e-09, "loss": 0.5969, "step": 15606 }, { "epoch": 0.99, "grad_norm": 0.8776538372039795, "learning_rate": 3.2975255028999675e-09, "loss": 0.5582, "step": 15607 }, { "epoch": 0.99, "grad_norm": 0.9301447868347168, "learning_rate": 3.2603746209530774e-09, "loss": 0.5439, "step": 15608 }, { "epoch": 0.99, "grad_norm": 0.8789377212524414, "learning_rate": 3.223434134329573e-09, "loss": 0.5597, "step": 15609 }, { "epoch": 0.99, "grad_norm": 0.9617857336997986, "learning_rate": 3.1867040445848764e-09, "loss": 0.5971, "step": 15610 }, { "epoch": 0.99, "grad_norm": 0.9199445843696594, "learning_rate": 3.1501843532649734e-09, "loss": 0.51, "step": 15611 }, { "epoch": 0.99, "grad_norm": 0.9115186929702759, "learning_rate": 3.113875061908078e-09, "loss": 0.5553, "step": 15612 }, { "epoch": 0.99, "grad_norm": 0.9086104035377502, "learning_rate": 3.077776172043523e-09, "loss": 0.5724, "step": 15613 }, { "epoch": 0.99, "grad_norm": 0.9141691327095032, "learning_rate": 3.0418876851900924e-09, "loss": 0.5002, "step": 15614 }, { "epoch": 0.99, "grad_norm": 0.8640308380126953, "learning_rate": 3.0062096028599108e-09, "loss": 0.511, "step": 15615 }, { "epoch": 0.99, "grad_norm": 0.8536925911903381, "learning_rate": 2.9707419265551097e-09, "loss": 0.5577, "step": 15616 }, { "epoch": 0.99, "grad_norm": 0.9029596447944641, "learning_rate": 2.9354846577689387e-09, "loss": 0.6127, "step": 15617 }, { "epoch": 0.99, "grad_norm": 0.877974271774292, "learning_rate": 2.900437797986322e-09, "loss": 0.5218, "step": 15618 }, { "epoch": 0.99, "grad_norm": 0.8660022616386414, "learning_rate": 2.8656013486821897e-09, "loss": 0.58, "step": 15619 }, { "epoch": 0.99, "grad_norm": 0.8560828566551208, "learning_rate": 2.8309753113237025e-09, "loss": 0.5314, "step": 15620 }, { "epoch": 0.99, "grad_norm": 0.8698206543922424, "learning_rate": 2.796559687369138e-09, "loss": 0.5735, "step": 15621 }, { "epoch": 0.99, "grad_norm": 0.8277180790901184, "learning_rate": 2.7623544782673372e-09, "loss": 0.5116, "step": 15622 }, { "epoch": 0.99, "grad_norm": 0.8713210225105286, "learning_rate": 2.7283596854588148e-09, "loss": 0.583, "step": 15623 }, { "epoch": 0.99, "grad_norm": 0.931766927242279, "learning_rate": 2.6945753103746475e-09, "loss": 0.5452, "step": 15624 }, { "epoch": 0.99, "grad_norm": 0.9144072532653809, "learning_rate": 2.661001354437587e-09, "loss": 0.5662, "step": 15625 }, { "epoch": 0.99, "grad_norm": 0.9093576073646545, "learning_rate": 2.6276378190615016e-09, "loss": 0.56, "step": 15626 }, { "epoch": 0.99, "grad_norm": 0.9563875198364258, "learning_rate": 2.5944847056508237e-09, "loss": 0.5346, "step": 15627 }, { "epoch": 0.99, "grad_norm": 0.9016739726066589, "learning_rate": 2.561542015601659e-09, "loss": 0.6267, "step": 15628 }, { "epoch": 0.99, "grad_norm": 0.9251307249069214, "learning_rate": 2.528809750301231e-09, "loss": 0.5576, "step": 15629 }, { "epoch": 0.99, "grad_norm": 0.9503340721130371, "learning_rate": 2.4962879111278813e-09, "loss": 0.5327, "step": 15630 }, { "epoch": 0.99, "grad_norm": 0.8992822766304016, "learning_rate": 2.4639764994505156e-09, "loss": 0.5629, "step": 15631 }, { "epoch": 0.99, "grad_norm": 0.8664157390594482, "learning_rate": 2.4318755166302668e-09, "loss": 0.5529, "step": 15632 }, { "epoch": 0.99, "grad_norm": 0.837928831577301, "learning_rate": 2.399984964018276e-09, "loss": 0.5679, "step": 15633 }, { "epoch": 0.99, "grad_norm": 0.8134336471557617, "learning_rate": 2.3683048429573587e-09, "loss": 0.5256, "step": 15634 }, { "epoch": 0.99, "grad_norm": 0.9103096127510071, "learning_rate": 2.3368351547820023e-09, "loss": 0.58, "step": 15635 }, { "epoch": 0.99, "grad_norm": 0.8657124638557434, "learning_rate": 2.3055759008167033e-09, "loss": 0.5196, "step": 15636 }, { "epoch": 0.99, "grad_norm": 0.8632974028587341, "learning_rate": 2.2745270823776312e-09, "loss": 0.5559, "step": 15637 }, { "epoch": 0.99, "grad_norm": 0.9439393877983093, "learning_rate": 2.243688700772628e-09, "loss": 0.6166, "step": 15638 }, { "epoch": 0.99, "grad_norm": 0.8731262683868408, "learning_rate": 2.2130607573001006e-09, "loss": 0.6254, "step": 15639 }, { "epoch": 0.99, "grad_norm": 0.867850124835968, "learning_rate": 2.1826432532495724e-09, "loss": 0.5505, "step": 15640 }, { "epoch": 0.99, "grad_norm": 0.9074274897575378, "learning_rate": 2.1524361899016853e-09, "loss": 0.581, "step": 15641 }, { "epoch": 0.99, "grad_norm": 0.8568212389945984, "learning_rate": 2.1224395685282008e-09, "loss": 0.6037, "step": 15642 }, { "epoch": 0.99, "grad_norm": 0.8557693362236023, "learning_rate": 2.0926533903925516e-09, "loss": 0.5362, "step": 15643 }, { "epoch": 0.99, "grad_norm": 0.8828796744346619, "learning_rate": 2.0630776567492904e-09, "loss": 0.573, "step": 15644 }, { "epoch": 0.99, "grad_norm": 0.8889597058296204, "learning_rate": 2.033712368842977e-09, "loss": 0.5788, "step": 15645 }, { "epoch": 0.99, "grad_norm": 0.8992346525192261, "learning_rate": 2.004557527909845e-09, "loss": 0.5368, "step": 15646 }, { "epoch": 0.99, "grad_norm": 0.8839378356933594, "learning_rate": 1.975613135178911e-09, "loss": 0.5774, "step": 15647 }, { "epoch": 0.99, "grad_norm": 0.9571453928947449, "learning_rate": 1.9468791918675345e-09, "loss": 0.5711, "step": 15648 }, { "epoch": 0.99, "grad_norm": 0.8286476731300354, "learning_rate": 1.918355699186414e-09, "loss": 0.5746, "step": 15649 }, { "epoch": 0.99, "grad_norm": 0.8622028827667236, "learning_rate": 1.8900426583357003e-09, "loss": 0.4927, "step": 15650 }, { "epoch": 0.99, "grad_norm": 0.818505048751831, "learning_rate": 1.861940070508883e-09, "loss": 0.5347, "step": 15651 }, { "epoch": 0.99, "grad_norm": 0.9037706851959229, "learning_rate": 1.8340479368883502e-09, "loss": 0.5652, "step": 15652 }, { "epoch": 0.99, "grad_norm": 0.8619953989982605, "learning_rate": 1.8063662586481622e-09, "loss": 0.5376, "step": 15653 }, { "epoch": 0.99, "grad_norm": 0.8764271140098572, "learning_rate": 1.7788950369551638e-09, "loss": 0.544, "step": 15654 }, { "epoch": 0.99, "grad_norm": 0.9021615386009216, "learning_rate": 1.751634272964542e-09, "loss": 0.5352, "step": 15655 }, { "epoch": 0.99, "grad_norm": 0.9306265115737915, "learning_rate": 1.7245839678259323e-09, "loss": 0.549, "step": 15656 }, { "epoch": 0.99, "grad_norm": 0.9229673743247986, "learning_rate": 1.6977441226767589e-09, "loss": 0.6132, "step": 15657 }, { "epoch": 0.99, "grad_norm": 0.8322945237159729, "learning_rate": 1.6711147386477833e-09, "loss": 0.4952, "step": 15658 }, { "epoch": 0.99, "grad_norm": 0.9024680852890015, "learning_rate": 1.644695816860331e-09, "loss": 0.5401, "step": 15659 }, { "epoch": 0.99, "grad_norm": 0.86787348985672, "learning_rate": 1.6184873584268457e-09, "loss": 0.6113, "step": 15660 }, { "epoch": 0.99, "grad_norm": 0.9406611323356628, "learning_rate": 1.5924893644503336e-09, "loss": 0.6352, "step": 15661 }, { "epoch": 0.99, "grad_norm": 0.9413818120956421, "learning_rate": 1.566701836026585e-09, "loss": 0.5955, "step": 15662 }, { "epoch": 0.99, "grad_norm": 0.8670917749404907, "learning_rate": 1.541124774240288e-09, "loss": 0.5796, "step": 15663 }, { "epoch": 0.99, "grad_norm": 0.850266695022583, "learning_rate": 1.5157581801689137e-09, "loss": 0.5393, "step": 15664 }, { "epoch": 0.99, "grad_norm": 0.9164510369300842, "learning_rate": 1.4906020548804968e-09, "loss": 0.5778, "step": 15665 }, { "epoch": 0.99, "grad_norm": 0.8657138347625732, "learning_rate": 1.4656563994341898e-09, "loss": 0.5405, "step": 15666 }, { "epoch": 0.99, "grad_norm": 0.925816535949707, "learning_rate": 1.4409212148802643e-09, "loss": 0.6178, "step": 15667 }, { "epoch": 0.99, "grad_norm": 0.8860337138175964, "learning_rate": 1.416396502260664e-09, "loss": 0.5587, "step": 15668 }, { "epoch": 0.99, "grad_norm": 0.9135521054267883, "learning_rate": 1.3920822626078967e-09, "loss": 0.5695, "step": 15669 }, { "epoch": 0.99, "grad_norm": 0.8713152408599854, "learning_rate": 1.3679784969461429e-09, "loss": 0.601, "step": 15670 }, { "epoch": 0.99, "grad_norm": 0.8884409666061401, "learning_rate": 1.3440852062890364e-09, "loss": 0.5773, "step": 15671 }, { "epoch": 0.99, "grad_norm": 0.8426517248153687, "learning_rate": 1.3204023916435494e-09, "loss": 0.5539, "step": 15672 }, { "epoch": 0.99, "grad_norm": 0.9166735410690308, "learning_rate": 1.2969300540072171e-09, "loss": 0.5526, "step": 15673 }, { "epoch": 0.99, "grad_norm": 0.8987283706665039, "learning_rate": 1.2736681943675833e-09, "loss": 0.562, "step": 15674 }, { "epoch": 0.99, "grad_norm": 0.8762261867523193, "learning_rate": 1.2506168137049747e-09, "loss": 0.5946, "step": 15675 }, { "epoch": 0.99, "grad_norm": 0.8147190809249878, "learning_rate": 1.2277759129886158e-09, "loss": 0.5149, "step": 15676 }, { "epoch": 0.99, "grad_norm": 0.8669753670692444, "learning_rate": 1.2051454931816254e-09, "loss": 0.544, "step": 15677 }, { "epoch": 0.99, "grad_norm": 0.9210073351860046, "learning_rate": 1.1827255552365745e-09, "loss": 0.5321, "step": 15678 }, { "epoch": 0.99, "grad_norm": 0.9330329895019531, "learning_rate": 1.1605161000971532e-09, "loss": 0.5983, "step": 15679 }, { "epoch": 0.99, "grad_norm": 0.8832536935806274, "learning_rate": 1.1385171286992791e-09, "loss": 0.5729, "step": 15680 }, { "epoch": 0.99, "grad_norm": 0.8871389627456665, "learning_rate": 1.116728641967768e-09, "loss": 0.6509, "step": 15681 }, { "epoch": 0.99, "grad_norm": 0.9270039200782776, "learning_rate": 1.0951506408213298e-09, "loss": 0.588, "step": 15682 }, { "epoch": 0.99, "grad_norm": 0.8575807213783264, "learning_rate": 1.0737831261686815e-09, "loss": 0.6011, "step": 15683 }, { "epoch": 0.99, "grad_norm": 0.8653765320777893, "learning_rate": 1.052626098907994e-09, "loss": 0.546, "step": 15684 }, { "epoch": 0.99, "grad_norm": 0.9024002552032471, "learning_rate": 1.0316795599318862e-09, "loss": 0.5587, "step": 15685 }, { "epoch": 0.99, "grad_norm": 0.9174841046333313, "learning_rate": 1.0109435101218757e-09, "loss": 0.5712, "step": 15686 }, { "epoch": 0.99, "grad_norm": 0.9049075841903687, "learning_rate": 9.90417950350042e-10, "loss": 0.5595, "step": 15687 }, { "epoch": 0.99, "grad_norm": 0.873509407043457, "learning_rate": 9.701028814818047e-10, "loss": 0.5713, "step": 15688 }, { "epoch": 0.99, "grad_norm": 0.9006879329681396, "learning_rate": 9.499983043720351e-10, "loss": 0.5857, "step": 15689 }, { "epoch": 0.99, "grad_norm": 0.9213382601737976, "learning_rate": 9.301042198678334e-10, "loss": 0.5795, "step": 15690 }, { "epoch": 0.99, "grad_norm": 0.9829197525978088, "learning_rate": 9.104206288057527e-10, "loss": 0.5673, "step": 15691 }, { "epoch": 0.99, "grad_norm": 0.8557707071304321, "learning_rate": 8.909475320156846e-10, "loss": 0.5127, "step": 15692 }, { "epoch": 0.99, "grad_norm": 0.9053747057914734, "learning_rate": 8.71684930317529e-10, "loss": 0.5696, "step": 15693 }, { "epoch": 0.99, "grad_norm": 0.8902594447135925, "learning_rate": 8.526328245217485e-10, "loss": 0.5673, "step": 15694 }, { "epoch": 0.99, "grad_norm": 0.86614990234375, "learning_rate": 8.337912154304795e-10, "loss": 0.5927, "step": 15695 }, { "epoch": 0.99, "grad_norm": 0.8675297498703003, "learning_rate": 8.151601038375312e-10, "loss": 0.5788, "step": 15696 }, { "epoch": 0.99, "grad_norm": 0.8987656831741333, "learning_rate": 7.967394905278314e-10, "loss": 0.5593, "step": 15697 }, { "epoch": 0.99, "grad_norm": 0.8395030498504639, "learning_rate": 7.785293762757607e-10, "loss": 0.5587, "step": 15698 }, { "epoch": 0.99, "grad_norm": 0.8946781754493713, "learning_rate": 7.605297618495932e-10, "loss": 0.6318, "step": 15699 }, { "epoch": 0.99, "grad_norm": 0.917945384979248, "learning_rate": 7.427406480059463e-10, "loss": 0.6166, "step": 15700 }, { "epoch": 0.99, "grad_norm": 0.8553743362426758, "learning_rate": 7.251620354942201e-10, "loss": 0.5548, "step": 15701 }, { "epoch": 0.99, "grad_norm": 0.9134872555732727, "learning_rate": 7.077939250549337e-10, "loss": 0.5561, "step": 15702 }, { "epoch": 0.99, "grad_norm": 0.8560099601745605, "learning_rate": 6.906363174191688e-10, "loss": 0.5588, "step": 15703 }, { "epoch": 0.99, "grad_norm": 0.8506429195404053, "learning_rate": 6.736892133091255e-10, "loss": 0.496, "step": 15704 }, { "epoch": 0.99, "grad_norm": 0.8329866528511047, "learning_rate": 6.569526134392324e-10, "loss": 0.4726, "step": 15705 }, { "epoch": 1.0, "grad_norm": 0.8823480606079102, "learning_rate": 6.404265185128155e-10, "loss": 0.6387, "step": 15706 }, { "epoch": 1.0, "grad_norm": 0.8894028067588806, "learning_rate": 6.241109292270953e-10, "loss": 0.5589, "step": 15707 }, { "epoch": 1.0, "grad_norm": 0.9294013977050781, "learning_rate": 6.080058462687444e-10, "loss": 0.5855, "step": 15708 }, { "epoch": 1.0, "grad_norm": 0.8650762438774109, "learning_rate": 5.92111270314999e-10, "loss": 0.5452, "step": 15709 }, { "epoch": 1.0, "grad_norm": 0.8934659361839294, "learning_rate": 5.764272020358785e-10, "loss": 0.5798, "step": 15710 }, { "epoch": 1.0, "grad_norm": 1.0057034492492676, "learning_rate": 5.609536420919659e-10, "loss": 0.5901, "step": 15711 }, { "epoch": 1.0, "grad_norm": 0.9346972703933716, "learning_rate": 5.456905911344068e-10, "loss": 0.5695, "step": 15712 }, { "epoch": 1.0, "grad_norm": 0.9616386294364929, "learning_rate": 5.306380498060204e-10, "loss": 0.5871, "step": 15713 }, { "epoch": 1.0, "grad_norm": 0.8900029063224792, "learning_rate": 5.15796018740744e-10, "loss": 0.5773, "step": 15714 }, { "epoch": 1.0, "grad_norm": 0.9126656651496887, "learning_rate": 5.011644985630781e-10, "loss": 0.5591, "step": 15715 }, { "epoch": 1.0, "grad_norm": 0.9090896248817444, "learning_rate": 4.867434898891965e-10, "loss": 0.5804, "step": 15716 }, { "epoch": 1.0, "grad_norm": 0.8257348537445068, "learning_rate": 4.725329933269463e-10, "loss": 0.5875, "step": 15717 }, { "epoch": 1.0, "grad_norm": 0.9159669876098633, "learning_rate": 4.5853300947418247e-10, "loss": 0.5699, "step": 15718 }, { "epoch": 1.0, "grad_norm": 0.8885412812232971, "learning_rate": 4.4474353892043356e-10, "loss": 0.615, "step": 15719 }, { "epoch": 1.0, "grad_norm": 0.8214197158813477, "learning_rate": 4.311645822463462e-10, "loss": 0.5229, "step": 15720 }, { "epoch": 1.0, "grad_norm": 0.9046826362609863, "learning_rate": 4.177961400236852e-10, "loss": 0.5779, "step": 15721 }, { "epoch": 1.0, "grad_norm": 0.8636698722839355, "learning_rate": 4.046382128147786e-10, "loss": 0.5556, "step": 15722 }, { "epoch": 1.0, "grad_norm": 1.039339542388916, "learning_rate": 3.916908011747378e-10, "loss": 0.6415, "step": 15723 }, { "epoch": 1.0, "grad_norm": 0.8799046874046326, "learning_rate": 3.7895390564868237e-10, "loss": 0.5939, "step": 15724 }, { "epoch": 1.0, "grad_norm": 0.855849027633667, "learning_rate": 3.664275267717399e-10, "loss": 0.5676, "step": 15725 }, { "epoch": 1.0, "grad_norm": 0.8732782602310181, "learning_rate": 3.541116650723764e-10, "loss": 0.5287, "step": 15726 }, { "epoch": 1.0, "grad_norm": 0.8243375420570374, "learning_rate": 3.4200632106906605e-10, "loss": 0.5535, "step": 15727 }, { "epoch": 1.0, "grad_norm": 0.8324832320213318, "learning_rate": 3.301114952708462e-10, "loss": 0.5476, "step": 15728 }, { "epoch": 1.0, "grad_norm": 0.9179962873458862, "learning_rate": 3.1842718817953755e-10, "loss": 0.6169, "step": 15729 }, { "epoch": 1.0, "grad_norm": 0.9185804724693298, "learning_rate": 3.0695340028641383e-10, "loss": 0.6479, "step": 15730 }, { "epoch": 1.0, "grad_norm": 0.9088355302810669, "learning_rate": 2.956901320744221e-10, "loss": 0.5929, "step": 15731 }, { "epoch": 1.0, "grad_norm": 0.8863728642463684, "learning_rate": 2.8463738401873776e-10, "loss": 0.5875, "step": 15732 }, { "epoch": 1.0, "grad_norm": 0.9010648727416992, "learning_rate": 2.7379515658398915e-10, "loss": 0.5564, "step": 15733 }, { "epoch": 1.0, "grad_norm": 0.9122373461723328, "learning_rate": 2.6316345022703307e-10, "loss": 0.5589, "step": 15734 }, { "epoch": 1.0, "grad_norm": 0.9271931052207947, "learning_rate": 2.5274226539584445e-10, "loss": 0.5492, "step": 15735 }, { "epoch": 1.0, "grad_norm": 0.8589327931404114, "learning_rate": 2.4253160252840636e-10, "loss": 0.4886, "step": 15736 }, { "epoch": 1.0, "grad_norm": 0.9020355343818665, "learning_rate": 2.3253146205493014e-10, "loss": 0.5461, "step": 15737 }, { "epoch": 1.0, "grad_norm": 0.8690382838249207, "learning_rate": 2.227418443967455e-10, "loss": 0.5784, "step": 15738 }, { "epoch": 1.0, "grad_norm": 0.9112160801887512, "learning_rate": 2.1316274996630026e-10, "loss": 0.6373, "step": 15739 }, { "epoch": 1.0, "grad_norm": 0.8418201208114624, "learning_rate": 2.0379417916605027e-10, "loss": 0.5148, "step": 15740 }, { "epoch": 1.0, "grad_norm": 0.9191485047340393, "learning_rate": 1.946361323912349e-10, "loss": 0.5662, "step": 15741 }, { "epoch": 1.0, "grad_norm": 0.9604305028915405, "learning_rate": 1.856886100276567e-10, "loss": 0.5844, "step": 15742 }, { "epoch": 1.0, "grad_norm": 0.8791788816452026, "learning_rate": 1.7695161245112613e-10, "loss": 0.5593, "step": 15743 }, { "epoch": 1.0, "grad_norm": 0.9725036025047302, "learning_rate": 1.6842514003023724e-10, "loss": 0.5887, "step": 15744 }, { "epoch": 1.0, "grad_norm": 0.8569048047065735, "learning_rate": 1.6010919312359208e-10, "loss": 0.5369, "step": 15745 }, { "epoch": 1.0, "grad_norm": 0.8856031894683838, "learning_rate": 1.520037720820211e-10, "loss": 0.5598, "step": 15746 }, { "epoch": 1.0, "grad_norm": 0.8477560877799988, "learning_rate": 1.4410887724580768e-10, "loss": 0.5036, "step": 15747 }, { "epoch": 1.0, "grad_norm": 0.8959560394287109, "learning_rate": 1.3642450894801872e-10, "loss": 0.5094, "step": 15748 }, { "epoch": 1.0, "grad_norm": 0.896802544593811, "learning_rate": 1.2895066751283935e-10, "loss": 0.5458, "step": 15749 }, { "epoch": 1.0, "grad_norm": 0.8456780910491943, "learning_rate": 1.2168735325335246e-10, "loss": 0.5843, "step": 15750 }, { "epoch": 1.0, "grad_norm": 0.8784763813018799, "learning_rate": 1.1463456647708982e-10, "loss": 0.5836, "step": 15751 }, { "epoch": 1.0, "grad_norm": 0.900086522102356, "learning_rate": 1.0779230747992587e-10, "loss": 0.583, "step": 15752 }, { "epoch": 1.0, "grad_norm": 0.9559805393218994, "learning_rate": 1.0116057654996348e-10, "loss": 0.6462, "step": 15753 }, { "epoch": 1.0, "grad_norm": 0.9030122756958008, "learning_rate": 9.473937396697885e-11, "loss": 0.5079, "step": 15754 }, { "epoch": 1.0, "grad_norm": 0.8667322993278503, "learning_rate": 8.85287000013113e-11, "loss": 0.5711, "step": 15755 }, { "epoch": 1.0, "grad_norm": 0.9083278179168701, "learning_rate": 8.252855491386325e-11, "loss": 0.5124, "step": 15756 }, { "epoch": 1.0, "grad_norm": 0.8988203406333923, "learning_rate": 7.673893895776551e-11, "loss": 0.5766, "step": 15757 }, { "epoch": 1.0, "grad_norm": 0.9265889525413513, "learning_rate": 7.115985237726719e-11, "loss": 0.605, "step": 15758 }, { "epoch": 1.0, "grad_norm": 0.8614688515663147, "learning_rate": 6.57912954060702e-11, "loss": 0.5763, "step": 15759 }, { "epoch": 1.0, "grad_norm": 0.8665549755096436, "learning_rate": 6.063326827121518e-11, "loss": 0.5447, "step": 15760 }, { "epoch": 1.0, "grad_norm": 0.9184945225715637, "learning_rate": 5.5685771189750714e-11, "loss": 0.5477, "step": 15761 }, { "epoch": 1.0, "grad_norm": 0.9115839004516602, "learning_rate": 5.0948804369843616e-11, "loss": 0.5807, "step": 15762 }, { "epoch": 1.0, "grad_norm": 0.865247905254364, "learning_rate": 4.642236801022382e-11, "loss": 0.59, "step": 15763 }, { "epoch": 1.0, "grad_norm": 0.9395208358764648, "learning_rate": 4.210646230295989e-11, "loss": 0.553, "step": 15764 }, { "epoch": 1.0, "grad_norm": 0.8607648611068726, "learning_rate": 3.800108742846309e-11, "loss": 0.5839, "step": 15765 }, { "epoch": 1.0, "grad_norm": 0.9533546566963196, "learning_rate": 3.410624356048331e-11, "loss": 0.6237, "step": 15766 }, { "epoch": 1.0, "grad_norm": 0.8995264768600464, "learning_rate": 3.0421930862778446e-11, "loss": 0.5904, "step": 15767 }, { "epoch": 1.0, "grad_norm": 0.8825034499168396, "learning_rate": 2.6948149490224618e-11, "loss": 0.5683, "step": 15768 }, { "epoch": 1.0, "grad_norm": 0.8862566351890564, "learning_rate": 2.3684899589371256e-11, "loss": 0.5178, "step": 15769 }, { "epoch": 1.0, "grad_norm": 0.9058730006217957, "learning_rate": 2.063218129733091e-11, "loss": 0.5956, "step": 15770 }, { "epoch": 1.0, "grad_norm": 0.9475806355476379, "learning_rate": 1.7789994742889448e-11, "loss": 0.6292, "step": 15771 }, { "epoch": 1.0, "grad_norm": 0.8897664546966553, "learning_rate": 1.5158340045395847e-11, "loss": 0.5825, "step": 15772 }, { "epoch": 1.0, "grad_norm": 0.8877468705177307, "learning_rate": 1.2737217315872407e-11, "loss": 0.572, "step": 15773 }, { "epoch": 1.0, "grad_norm": 0.9111242294311523, "learning_rate": 1.0526626656459648e-11, "loss": 0.557, "step": 15774 }, { "epoch": 1.0, "grad_norm": 0.8383316397666931, "learning_rate": 8.526568160416304e-12, "loss": 0.5977, "step": 15775 }, { "epoch": 1.0, "grad_norm": 0.9318212270736694, "learning_rate": 6.737041911564213e-12, "loss": 0.608, "step": 15776 }, { "epoch": 1.0, "grad_norm": 0.901352047920227, "learning_rate": 5.158047984843428e-12, "loss": 0.5461, "step": 15777 }, { "epoch": 1.0, "grad_norm": 0.8835100531578064, "learning_rate": 3.789586447422444e-12, "loss": 0.5323, "step": 15778 }, { "epoch": 1.0, "grad_norm": 0.8877159953117371, "learning_rate": 2.631657356477746e-12, "loss": 0.5907, "step": 15779 }, { "epoch": 1.0, "grad_norm": 0.8495984077453613, "learning_rate": 1.6842607614142582e-12, "loss": 0.5492, "step": 15780 }, { "epoch": 1.0, "grad_norm": 0.9034891724586487, "learning_rate": 9.473967016448982e-13, "loss": 0.6355, "step": 15781 }, { "epoch": 1.0, "grad_norm": 0.9179747104644775, "learning_rate": 4.210652082559108e-13, "loss": 0.5649, "step": 15782 }, { "epoch": 1.0, "grad_norm": 0.877388060092926, "learning_rate": 1.0526630289664496e-13, "loss": 0.5585, "step": 15783 }, { "epoch": 1.0, "grad_norm": 0.8421609997749329, "learning_rate": 0.0, "loss": 0.5535, "step": 15784 }, { "epoch": 1.0, "step": 15784, "total_flos": 8.263869225865576e+18, "train_loss": 0.6036004031589656, "train_runtime": 161757.4173, "train_samples_per_second": 24.981, "train_steps_per_second": 0.098 } ], "logging_steps": 1.0, "max_steps": 15784, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8000, "total_flos": 8.263869225865576e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }