{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 6906, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9230769230769234e-07, "loss": 3.2464, "step": 2 }, { "epoch": 0.0, "learning_rate": 3.846153846153847e-07, "loss": 3.1627, "step": 4 }, { "epoch": 0.0, "learning_rate": 5.76923076923077e-07, "loss": 3.1188, "step": 6 }, { "epoch": 0.0, "learning_rate": 7.692307692307694e-07, "loss": 3.2139, "step": 8 }, { "epoch": 0.0, "learning_rate": 9.615384615384617e-07, "loss": 3.0034, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.153846153846154e-06, "loss": 3.1531, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.3461538461538462e-06, "loss": 3.1241, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.5384615384615387e-06, "loss": 3.0655, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.7307692307692308e-06, "loss": 3.0326, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.9230769230769234e-06, "loss": 3.0302, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.1153846153846155e-06, "loss": 2.9642, "step": 22 }, { "epoch": 0.01, "learning_rate": 2.307692307692308e-06, "loss": 2.8971, "step": 24 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 2.9549, "step": 26 }, { "epoch": 0.01, "learning_rate": 2.6923076923076923e-06, "loss": 3.0163, "step": 28 }, { "epoch": 0.01, "learning_rate": 2.8846153846153845e-06, "loss": 2.8992, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.0769230769230774e-06, "loss": 2.9378, "step": 32 }, { "epoch": 0.01, "learning_rate": 3.2692307692307696e-06, "loss": 2.9469, "step": 34 }, { "epoch": 0.01, "learning_rate": 3.4615384615384617e-06, "loss": 2.8905, "step": 36 }, { "epoch": 0.01, "learning_rate": 3.653846153846154e-06, "loss": 2.9661, "step": 38 }, { "epoch": 0.01, "learning_rate": 3.846153846153847e-06, "loss": 3.003, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.0384615384615385e-06, "loss": 2.9465, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.230769230769231e-06, "loss": 2.8778, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.423076923076924e-06, "loss": 2.8636, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.615384615384616e-06, "loss": 2.8111, "step": 48 }, { "epoch": 0.01, "learning_rate": 4.807692307692308e-06, "loss": 2.8998, "step": 50 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 2.7667, "step": 52 }, { "epoch": 0.02, "learning_rate": 5.192307692307693e-06, "loss": 2.8239, "step": 54 }, { "epoch": 0.02, "learning_rate": 5.384615384615385e-06, "loss": 2.82, "step": 56 }, { "epoch": 0.02, "learning_rate": 5.576923076923077e-06, "loss": 2.8308, "step": 58 }, { "epoch": 0.02, "learning_rate": 5.769230769230769e-06, "loss": 2.8225, "step": 60 }, { "epoch": 0.02, "learning_rate": 5.961538461538462e-06, "loss": 2.771, "step": 62 }, { "epoch": 0.02, "learning_rate": 6.153846153846155e-06, "loss": 2.7791, "step": 64 }, { "epoch": 0.02, "learning_rate": 6.3461538461538466e-06, "loss": 2.81, "step": 66 }, { "epoch": 0.02, "learning_rate": 6.538461538461539e-06, "loss": 2.698, "step": 68 }, { "epoch": 0.02, "learning_rate": 6.730769230769232e-06, "loss": 2.7116, "step": 70 }, { "epoch": 0.02, "learning_rate": 6.923076923076923e-06, "loss": 2.7821, "step": 72 }, { "epoch": 0.02, "learning_rate": 7.115384615384616e-06, "loss": 2.7261, "step": 74 }, { "epoch": 0.02, "learning_rate": 7.307692307692308e-06, "loss": 2.6107, "step": 76 }, { "epoch": 0.02, "learning_rate": 7.500000000000001e-06, "loss": 2.7644, "step": 78 }, { "epoch": 0.02, "learning_rate": 7.692307692307694e-06, "loss": 2.6608, "step": 80 }, { "epoch": 0.02, "learning_rate": 7.884615384615384e-06, "loss": 2.7882, "step": 82 }, { "epoch": 0.02, "learning_rate": 8.076923076923077e-06, "loss": 2.7254, "step": 84 }, { "epoch": 0.02, "learning_rate": 8.26923076923077e-06, "loss": 2.6863, "step": 86 }, { "epoch": 0.03, "learning_rate": 8.461538461538462e-06, "loss": 2.7382, "step": 88 }, { "epoch": 0.03, "learning_rate": 8.653846153846155e-06, "loss": 2.7564, "step": 90 }, { "epoch": 0.03, "learning_rate": 8.846153846153847e-06, "loss": 2.7662, "step": 92 }, { "epoch": 0.03, "learning_rate": 9.03846153846154e-06, "loss": 2.6772, "step": 94 }, { "epoch": 0.03, "learning_rate": 9.230769230769232e-06, "loss": 2.6868, "step": 96 }, { "epoch": 0.03, "learning_rate": 9.423076923076923e-06, "loss": 2.7084, "step": 98 }, { "epoch": 0.03, "learning_rate": 9.615384615384616e-06, "loss": 2.7353, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.807692307692308e-06, "loss": 2.7682, "step": 102 }, { "epoch": 0.03, "learning_rate": 1e-05, "loss": 2.7172, "step": 104 }, { "epoch": 0.03, "learning_rate": 1.0192307692307692e-05, "loss": 2.6715, "step": 106 }, { "epoch": 0.03, "learning_rate": 1.0384615384615386e-05, "loss": 2.8149, "step": 108 }, { "epoch": 0.03, "learning_rate": 1.0576923076923078e-05, "loss": 2.72, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.076923076923077e-05, "loss": 2.6134, "step": 112 }, { "epoch": 0.03, "learning_rate": 1.0961538461538464e-05, "loss": 2.7016, "step": 114 }, { "epoch": 0.03, "learning_rate": 1.1153846153846154e-05, "loss": 2.7647, "step": 116 }, { "epoch": 0.03, "learning_rate": 1.1346153846153847e-05, "loss": 2.6968, "step": 118 }, { "epoch": 0.03, "learning_rate": 1.1538461538461538e-05, "loss": 2.7275, "step": 120 }, { "epoch": 0.04, "learning_rate": 1.1730769230769232e-05, "loss": 2.6384, "step": 122 }, { "epoch": 0.04, "learning_rate": 1.1923076923076925e-05, "loss": 2.6941, "step": 124 }, { "epoch": 0.04, "learning_rate": 1.2115384615384615e-05, "loss": 2.704, "step": 126 }, { "epoch": 0.04, "learning_rate": 1.230769230769231e-05, "loss": 2.6528, "step": 128 }, { "epoch": 0.04, "learning_rate": 1.25e-05, "loss": 2.6703, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.2692307692307693e-05, "loss": 2.6419, "step": 132 }, { "epoch": 0.04, "learning_rate": 1.2884615384615386e-05, "loss": 2.546, "step": 134 }, { "epoch": 0.04, "learning_rate": 1.3076923076923078e-05, "loss": 2.6544, "step": 136 }, { "epoch": 0.04, "learning_rate": 1.3269230769230769e-05, "loss": 2.6557, "step": 138 }, { "epoch": 0.04, "learning_rate": 1.3461538461538463e-05, "loss": 2.6297, "step": 140 }, { "epoch": 0.04, "learning_rate": 1.3653846153846156e-05, "loss": 2.5931, "step": 142 }, { "epoch": 0.04, "learning_rate": 1.3846153846153847e-05, "loss": 2.6423, "step": 144 }, { "epoch": 0.04, "learning_rate": 1.403846153846154e-05, "loss": 2.5605, "step": 146 }, { "epoch": 0.04, "learning_rate": 1.4230769230769232e-05, "loss": 2.6118, "step": 148 }, { "epoch": 0.04, "learning_rate": 1.4423076923076924e-05, "loss": 2.6806, "step": 150 }, { "epoch": 0.04, "learning_rate": 1.4615384615384615e-05, "loss": 2.6614, "step": 152 }, { "epoch": 0.04, "learning_rate": 1.480769230769231e-05, "loss": 2.6027, "step": 154 }, { "epoch": 0.05, "learning_rate": 1.5000000000000002e-05, "loss": 2.6462, "step": 156 }, { "epoch": 0.05, "learning_rate": 1.5192307692307693e-05, "loss": 2.646, "step": 158 }, { "epoch": 0.05, "learning_rate": 1.5384615384615387e-05, "loss": 2.5606, "step": 160 }, { "epoch": 0.05, "learning_rate": 1.557692307692308e-05, "loss": 2.5469, "step": 162 }, { "epoch": 0.05, "learning_rate": 1.576923076923077e-05, "loss": 2.5603, "step": 164 }, { "epoch": 0.05, "learning_rate": 1.5961538461538465e-05, "loss": 2.6135, "step": 166 }, { "epoch": 0.05, "learning_rate": 1.6153846153846154e-05, "loss": 2.6702, "step": 168 }, { "epoch": 0.05, "learning_rate": 1.6346153846153847e-05, "loss": 2.6701, "step": 170 }, { "epoch": 0.05, "learning_rate": 1.653846153846154e-05, "loss": 2.6189, "step": 172 }, { "epoch": 0.05, "learning_rate": 1.673076923076923e-05, "loss": 2.5349, "step": 174 }, { "epoch": 0.05, "learning_rate": 1.6923076923076924e-05, "loss": 2.5305, "step": 176 }, { "epoch": 0.05, "learning_rate": 1.7115384615384617e-05, "loss": 2.6062, "step": 178 }, { "epoch": 0.05, "learning_rate": 1.730769230769231e-05, "loss": 2.5689, "step": 180 }, { "epoch": 0.05, "learning_rate": 1.7500000000000002e-05, "loss": 2.5936, "step": 182 }, { "epoch": 0.05, "learning_rate": 1.7692307692307694e-05, "loss": 2.4798, "step": 184 }, { "epoch": 0.05, "learning_rate": 1.7884615384615387e-05, "loss": 2.6349, "step": 186 }, { "epoch": 0.05, "learning_rate": 1.807692307692308e-05, "loss": 2.4817, "step": 188 }, { "epoch": 0.06, "learning_rate": 1.826923076923077e-05, "loss": 2.5275, "step": 190 }, { "epoch": 0.06, "learning_rate": 1.8461538461538465e-05, "loss": 2.5682, "step": 192 }, { "epoch": 0.06, "learning_rate": 1.8653846153846157e-05, "loss": 2.5253, "step": 194 }, { "epoch": 0.06, "learning_rate": 1.8846153846153846e-05, "loss": 2.4439, "step": 196 }, { "epoch": 0.06, "learning_rate": 1.903846153846154e-05, "loss": 2.5073, "step": 198 }, { "epoch": 0.06, "learning_rate": 1.923076923076923e-05, "loss": 2.6221, "step": 200 }, { "epoch": 0.06, "learning_rate": 1.9423076923076924e-05, "loss": 2.6206, "step": 202 }, { "epoch": 0.06, "learning_rate": 1.9615384615384617e-05, "loss": 2.5148, "step": 204 }, { "epoch": 0.06, "learning_rate": 1.980769230769231e-05, "loss": 2.5907, "step": 206 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 2.5529, "step": 208 }, { "epoch": 0.06, "learning_rate": 1.9999995600134276e-05, "loss": 2.575, "step": 210 }, { "epoch": 0.06, "learning_rate": 1.999998240054098e-05, "loss": 2.5796, "step": 212 }, { "epoch": 0.06, "learning_rate": 1.9999960401231717e-05, "loss": 2.5095, "step": 214 }, { "epoch": 0.06, "learning_rate": 1.9999929602225854e-05, "loss": 2.6128, "step": 216 }, { "epoch": 0.06, "learning_rate": 1.9999890003550492e-05, "loss": 2.4593, "step": 218 }, { "epoch": 0.06, "learning_rate": 1.9999841605240474e-05, "loss": 2.5003, "step": 220 }, { "epoch": 0.06, "learning_rate": 1.9999784407338392e-05, "loss": 2.4851, "step": 222 }, { "epoch": 0.06, "learning_rate": 1.9999718409894578e-05, "loss": 2.5124, "step": 224 }, { "epoch": 0.07, "learning_rate": 1.9999643612967107e-05, "loss": 2.5252, "step": 226 }, { "epoch": 0.07, "learning_rate": 1.9999560016621803e-05, "loss": 2.5795, "step": 228 }, { "epoch": 0.07, "learning_rate": 1.999946762093222e-05, "loss": 2.5298, "step": 230 }, { "epoch": 0.07, "learning_rate": 1.999936642597967e-05, "loss": 2.5648, "step": 232 }, { "epoch": 0.07, "learning_rate": 1.99992564318532e-05, "loss": 2.4464, "step": 234 }, { "epoch": 0.07, "learning_rate": 1.9999137638649602e-05, "loss": 2.4772, "step": 236 }, { "epoch": 0.07, "learning_rate": 1.999901004647341e-05, "loss": 2.5088, "step": 238 }, { "epoch": 0.07, "learning_rate": 1.9998873655436905e-05, "loss": 2.5665, "step": 240 }, { "epoch": 0.07, "learning_rate": 1.9998728465660105e-05, "loss": 2.5304, "step": 242 }, { "epoch": 0.07, "learning_rate": 1.999857447727077e-05, "loss": 2.4762, "step": 244 }, { "epoch": 0.07, "learning_rate": 1.9998411690404414e-05, "loss": 2.4636, "step": 246 }, { "epoch": 0.07, "learning_rate": 1.9998240105204278e-05, "loss": 2.5426, "step": 248 }, { "epoch": 0.07, "learning_rate": 1.999805972182135e-05, "loss": 2.5622, "step": 250 }, { "epoch": 0.07, "learning_rate": 1.9997870540414368e-05, "loss": 2.4804, "step": 252 }, { "epoch": 0.07, "learning_rate": 1.9997672561149807e-05, "loss": 2.4921, "step": 254 }, { "epoch": 0.07, "learning_rate": 1.9997465784201883e-05, "loss": 2.4792, "step": 256 }, { "epoch": 0.07, "learning_rate": 1.9997250209752553e-05, "loss": 2.4961, "step": 258 }, { "epoch": 0.08, "learning_rate": 1.9997025837991514e-05, "loss": 2.5079, "step": 260 }, { "epoch": 0.08, "learning_rate": 1.9996910353180503e-05, "loss": 2.5027, "step": 262 }, { "epoch": 0.08, "learning_rate": 1.999667278582453e-05, "loss": 2.5219, "step": 264 }, { "epoch": 0.08, "learning_rate": 1.9996426421664963e-05, "loss": 2.5153, "step": 266 }, { "epoch": 0.08, "learning_rate": 1.9996171260918603e-05, "loss": 2.4486, "step": 268 }, { "epoch": 0.08, "learning_rate": 1.999590730380998e-05, "loss": 2.473, "step": 270 }, { "epoch": 0.08, "learning_rate": 1.9995634550571377e-05, "loss": 2.415, "step": 272 }, { "epoch": 0.08, "learning_rate": 1.9995353001442802e-05, "loss": 2.4859, "step": 274 }, { "epoch": 0.08, "learning_rate": 1.9995062656672013e-05, "loss": 2.437, "step": 276 }, { "epoch": 0.08, "learning_rate": 1.9994763516514507e-05, "loss": 2.403, "step": 278 }, { "epoch": 0.08, "learning_rate": 1.9994455581233514e-05, "loss": 2.4603, "step": 280 }, { "epoch": 0.08, "learning_rate": 1.9994138851100016e-05, "loss": 2.4633, "step": 282 }, { "epoch": 0.08, "learning_rate": 1.9993813326392725e-05, "loss": 2.3986, "step": 284 }, { "epoch": 0.08, "learning_rate": 1.9993479007398088e-05, "loss": 2.481, "step": 286 }, { "epoch": 0.08, "learning_rate": 1.9993135894410304e-05, "loss": 2.4863, "step": 288 }, { "epoch": 0.08, "learning_rate": 1.9992783987731303e-05, "loss": 2.3833, "step": 290 }, { "epoch": 0.08, "learning_rate": 1.9992423287670747e-05, "loss": 2.3884, "step": 292 }, { "epoch": 0.09, "learning_rate": 1.999205379454605e-05, "loss": 2.4209, "step": 294 }, { "epoch": 0.09, "learning_rate": 1.999167550868235e-05, "loss": 2.4955, "step": 296 }, { "epoch": 0.09, "learning_rate": 1.999128843041253e-05, "loss": 2.4296, "step": 298 }, { "epoch": 0.09, "learning_rate": 1.999089256007721e-05, "loss": 2.3886, "step": 300 }, { "epoch": 0.09, "learning_rate": 1.999048789802475e-05, "loss": 2.3798, "step": 302 }, { "epoch": 0.09, "learning_rate": 1.999007444461123e-05, "loss": 2.3672, "step": 304 }, { "epoch": 0.09, "learning_rate": 1.9989652200200487e-05, "loss": 2.4881, "step": 306 }, { "epoch": 0.09, "learning_rate": 1.9989221165164082e-05, "loss": 2.4644, "step": 308 }, { "epoch": 0.09, "learning_rate": 1.998878133988131e-05, "loss": 2.4128, "step": 310 }, { "epoch": 0.09, "learning_rate": 1.9988332724739215e-05, "loss": 2.3347, "step": 312 }, { "epoch": 0.09, "learning_rate": 1.998787532013256e-05, "loss": 2.4759, "step": 314 }, { "epoch": 0.09, "learning_rate": 1.9987409126463844e-05, "loss": 2.4638, "step": 316 }, { "epoch": 0.09, "learning_rate": 1.9986934144143316e-05, "loss": 2.413, "step": 318 }, { "epoch": 0.09, "learning_rate": 1.9986450373588946e-05, "loss": 2.4634, "step": 320 }, { "epoch": 0.09, "learning_rate": 1.998595781522643e-05, "loss": 2.3494, "step": 322 }, { "epoch": 0.09, "learning_rate": 1.9985456469489214e-05, "loss": 2.4045, "step": 324 }, { "epoch": 0.09, "learning_rate": 1.9984946336818462e-05, "loss": 2.3463, "step": 326 }, { "epoch": 0.09, "learning_rate": 1.9984427417663085e-05, "loss": 2.3993, "step": 328 }, { "epoch": 0.1, "learning_rate": 1.9983899712479715e-05, "loss": 2.4054, "step": 330 }, { "epoch": 0.1, "learning_rate": 1.9983363221732715e-05, "loss": 2.4889, "step": 332 }, { "epoch": 0.1, "learning_rate": 1.998281794589419e-05, "loss": 2.3572, "step": 334 }, { "epoch": 0.1, "learning_rate": 1.998226388544396e-05, "loss": 2.3883, "step": 336 }, { "epoch": 0.1, "learning_rate": 1.9981701040869585e-05, "loss": 2.4365, "step": 338 }, { "epoch": 0.1, "learning_rate": 1.9981129412666358e-05, "loss": 2.452, "step": 340 }, { "epoch": 0.1, "learning_rate": 1.998054900133729e-05, "loss": 2.3221, "step": 342 }, { "epoch": 0.1, "learning_rate": 1.9979959807393133e-05, "loss": 2.355, "step": 344 }, { "epoch": 0.1, "learning_rate": 1.997936183135236e-05, "loss": 2.3635, "step": 346 }, { "epoch": 0.1, "learning_rate": 1.997875507374117e-05, "loss": 2.3158, "step": 348 }, { "epoch": 0.1, "learning_rate": 1.9978139535093496e-05, "loss": 2.4037, "step": 350 }, { "epoch": 0.1, "learning_rate": 1.9977515215951e-05, "loss": 2.4657, "step": 352 }, { "epoch": 0.1, "learning_rate": 1.997688211686306e-05, "loss": 2.4044, "step": 354 }, { "epoch": 0.1, "learning_rate": 1.9976240238386787e-05, "loss": 2.4106, "step": 356 }, { "epoch": 0.1, "learning_rate": 1.9975589581087023e-05, "loss": 2.3555, "step": 358 }, { "epoch": 0.1, "learning_rate": 1.9974930145536323e-05, "loss": 2.349, "step": 360 }, { "epoch": 0.1, "learning_rate": 1.997426193231497e-05, "loss": 2.339, "step": 362 }, { "epoch": 0.11, "learning_rate": 1.997358494201098e-05, "loss": 2.3412, "step": 364 }, { "epoch": 0.11, "learning_rate": 1.9972899175220082e-05, "loss": 2.3466, "step": 366 }, { "epoch": 0.11, "learning_rate": 1.9972204632545736e-05, "loss": 2.3652, "step": 368 }, { "epoch": 0.11, "learning_rate": 1.997150131459912e-05, "loss": 2.4157, "step": 370 }, { "epoch": 0.11, "learning_rate": 1.997078922199913e-05, "loss": 2.3888, "step": 372 }, { "epoch": 0.11, "learning_rate": 1.9970068355372395e-05, "loss": 2.4248, "step": 374 }, { "epoch": 0.11, "learning_rate": 1.9969338715353255e-05, "loss": 2.4166, "step": 376 }, { "epoch": 0.11, "learning_rate": 1.9968600302583774e-05, "loss": 2.3273, "step": 378 }, { "epoch": 0.11, "learning_rate": 1.9967853117713738e-05, "loss": 2.4131, "step": 380 }, { "epoch": 0.11, "learning_rate": 1.9967097161400643e-05, "loss": 2.4415, "step": 382 }, { "epoch": 0.11, "learning_rate": 1.9966332434309716e-05, "loss": 2.3656, "step": 384 }, { "epoch": 0.11, "learning_rate": 1.9965558937113897e-05, "loss": 2.3778, "step": 386 }, { "epoch": 0.11, "learning_rate": 1.996477667049384e-05, "loss": 2.3751, "step": 388 }, { "epoch": 0.11, "learning_rate": 1.9963985635137914e-05, "loss": 2.3504, "step": 390 }, { "epoch": 0.11, "learning_rate": 1.996318583174222e-05, "loss": 2.3476, "step": 392 }, { "epoch": 0.11, "learning_rate": 1.9962377261010554e-05, "loss": 2.3413, "step": 394 }, { "epoch": 0.11, "learning_rate": 1.996155992365444e-05, "loss": 2.3356, "step": 396 }, { "epoch": 0.12, "learning_rate": 1.9960733820393114e-05, "loss": 2.27, "step": 398 }, { "epoch": 0.12, "learning_rate": 1.9959898951953526e-05, "loss": 2.296, "step": 400 }, { "epoch": 0.12, "learning_rate": 1.9959055319070332e-05, "loss": 2.4112, "step": 402 }, { "epoch": 0.12, "learning_rate": 1.995820292248591e-05, "loss": 2.4603, "step": 404 }, { "epoch": 0.12, "learning_rate": 1.9957341762950346e-05, "loss": 2.3315, "step": 406 }, { "epoch": 0.12, "learning_rate": 1.995647184122144e-05, "loss": 2.3084, "step": 408 }, { "epoch": 0.12, "learning_rate": 1.9955593158064693e-05, "loss": 2.315, "step": 410 }, { "epoch": 0.12, "learning_rate": 1.9954705714253327e-05, "loss": 2.4334, "step": 412 }, { "epoch": 0.12, "learning_rate": 1.9953809510568272e-05, "loss": 2.4083, "step": 414 }, { "epoch": 0.12, "learning_rate": 1.9952904547798156e-05, "loss": 2.3217, "step": 416 }, { "epoch": 0.12, "learning_rate": 1.9951990826739326e-05, "loss": 2.3572, "step": 418 }, { "epoch": 0.12, "learning_rate": 1.995106834819583e-05, "loss": 2.2781, "step": 420 }, { "epoch": 0.12, "learning_rate": 1.9950137112979425e-05, "loss": 2.3916, "step": 422 }, { "epoch": 0.12, "learning_rate": 1.9949197121909577e-05, "loss": 2.3118, "step": 424 }, { "epoch": 0.12, "learning_rate": 1.9948248375813446e-05, "loss": 2.3829, "step": 426 }, { "epoch": 0.12, "learning_rate": 1.994729087552591e-05, "loss": 2.31, "step": 428 }, { "epoch": 0.12, "learning_rate": 1.9946324621889538e-05, "loss": 2.3093, "step": 430 }, { "epoch": 0.13, "learning_rate": 1.9945349615754607e-05, "loss": 2.3738, "step": 432 }, { "epoch": 0.13, "learning_rate": 1.9944365857979102e-05, "loss": 2.2934, "step": 434 }, { "epoch": 0.13, "learning_rate": 1.99433733494287e-05, "loss": 2.3326, "step": 436 }, { "epoch": 0.13, "learning_rate": 1.994237209097678e-05, "loss": 2.3131, "step": 438 }, { "epoch": 0.13, "learning_rate": 1.9941362083504426e-05, "loss": 2.2382, "step": 440 }, { "epoch": 0.13, "learning_rate": 1.9940343327900417e-05, "loss": 2.3515, "step": 442 }, { "epoch": 0.13, "learning_rate": 1.9939315825061226e-05, "loss": 2.3921, "step": 444 }, { "epoch": 0.13, "learning_rate": 1.9938279575891034e-05, "loss": 2.3123, "step": 446 }, { "epoch": 0.13, "learning_rate": 1.993723458130171e-05, "loss": 2.244, "step": 448 }, { "epoch": 0.13, "learning_rate": 1.9936180842212817e-05, "loss": 2.2958, "step": 450 }, { "epoch": 0.13, "learning_rate": 1.9935118359551623e-05, "loss": 2.3451, "step": 452 }, { "epoch": 0.13, "learning_rate": 1.9934047134253083e-05, "loss": 2.3387, "step": 454 }, { "epoch": 0.13, "learning_rate": 1.9932967167259846e-05, "loss": 2.2762, "step": 456 }, { "epoch": 0.13, "learning_rate": 1.9931878459522253e-05, "loss": 2.3059, "step": 458 }, { "epoch": 0.13, "learning_rate": 1.993078101199834e-05, "loss": 2.2627, "step": 460 }, { "epoch": 0.13, "learning_rate": 1.9929674825653828e-05, "loss": 2.3369, "step": 462 }, { "epoch": 0.13, "learning_rate": 1.992855990146213e-05, "loss": 2.3144, "step": 464 }, { "epoch": 0.13, "learning_rate": 1.992743624040436e-05, "loss": 2.3968, "step": 466 }, { "epoch": 0.14, "learning_rate": 1.9926303843469298e-05, "loss": 2.3522, "step": 468 }, { "epoch": 0.14, "learning_rate": 1.9925162711653426e-05, "loss": 2.3348, "step": 470 }, { "epoch": 0.14, "learning_rate": 1.992401284596091e-05, "loss": 2.2975, "step": 472 }, { "epoch": 0.14, "learning_rate": 1.99228542474036e-05, "loss": 2.3592, "step": 474 }, { "epoch": 0.14, "learning_rate": 1.9921686917001034e-05, "loss": 2.2853, "step": 476 }, { "epoch": 0.14, "learning_rate": 1.9920510855780427e-05, "loss": 2.3495, "step": 478 }, { "epoch": 0.14, "learning_rate": 1.991932606477669e-05, "loss": 2.3033, "step": 480 }, { "epoch": 0.14, "learning_rate": 1.9918132545032395e-05, "loss": 2.311, "step": 482 }, { "epoch": 0.14, "learning_rate": 1.9916930297597817e-05, "loss": 2.3419, "step": 484 }, { "epoch": 0.14, "learning_rate": 1.99157193235309e-05, "loss": 2.3111, "step": 486 }, { "epoch": 0.14, "learning_rate": 1.9914499623897267e-05, "loss": 2.2323, "step": 488 }, { "epoch": 0.14, "learning_rate": 1.991327119977022e-05, "loss": 2.2785, "step": 490 }, { "epoch": 0.14, "learning_rate": 1.991203405223074e-05, "loss": 2.2417, "step": 492 }, { "epoch": 0.14, "learning_rate": 1.991078818236748e-05, "loss": 2.2489, "step": 494 }, { "epoch": 0.14, "learning_rate": 1.9909533591276783e-05, "loss": 2.3443, "step": 496 }, { "epoch": 0.14, "learning_rate": 1.9908270280062643e-05, "loss": 2.2609, "step": 498 }, { "epoch": 0.14, "learning_rate": 1.9906998249836747e-05, "loss": 2.2537, "step": 500 }, { "epoch": 0.15, "learning_rate": 1.9905717501718443e-05, "loss": 2.2982, "step": 502 }, { "epoch": 0.15, "learning_rate": 1.990442803683476e-05, "loss": 2.3378, "step": 504 }, { "epoch": 0.15, "learning_rate": 1.990312985632039e-05, "loss": 2.3748, "step": 506 }, { "epoch": 0.15, "learning_rate": 1.99018229613177e-05, "loss": 2.3103, "step": 508 }, { "epoch": 0.15, "learning_rate": 1.9900507352976714e-05, "loss": 2.2708, "step": 510 }, { "epoch": 0.15, "learning_rate": 1.989918303245514e-05, "loss": 2.266, "step": 512 }, { "epoch": 0.15, "learning_rate": 1.989785000091834e-05, "loss": 2.2582, "step": 514 }, { "epoch": 0.15, "learning_rate": 1.9896508259539352e-05, "loss": 2.3013, "step": 516 }, { "epoch": 0.15, "learning_rate": 1.989515780949887e-05, "loss": 2.3412, "step": 518 }, { "epoch": 0.15, "learning_rate": 1.9893798651985244e-05, "loss": 2.2531, "step": 520 }, { "epoch": 0.15, "learning_rate": 1.9892430788194513e-05, "loss": 2.3357, "step": 522 }, { "epoch": 0.15, "learning_rate": 1.9891054219330346e-05, "loss": 2.3023, "step": 524 }, { "epoch": 0.15, "learning_rate": 1.9889668946604096e-05, "loss": 2.2433, "step": 526 }, { "epoch": 0.15, "learning_rate": 1.9888274971234757e-05, "loss": 2.3815, "step": 528 }, { "epoch": 0.15, "learning_rate": 1.9886872294449e-05, "loss": 2.3272, "step": 530 }, { "epoch": 0.15, "learning_rate": 1.9885460917481137e-05, "loss": 2.3754, "step": 532 }, { "epoch": 0.15, "learning_rate": 1.988404084157314e-05, "loss": 2.2655, "step": 534 }, { "epoch": 0.16, "learning_rate": 1.9882612067974643e-05, "loss": 2.2984, "step": 536 }, { "epoch": 0.16, "learning_rate": 1.9881174597942923e-05, "loss": 2.3155, "step": 538 }, { "epoch": 0.16, "learning_rate": 1.987972843274292e-05, "loss": 2.2577, "step": 540 }, { "epoch": 0.16, "learning_rate": 1.9878273573647218e-05, "loss": 2.2707, "step": 542 }, { "epoch": 0.16, "learning_rate": 1.9876810021936053e-05, "loss": 2.3636, "step": 544 }, { "epoch": 0.16, "learning_rate": 1.9875337778897316e-05, "loss": 2.3111, "step": 546 }, { "epoch": 0.16, "learning_rate": 1.9873856845826534e-05, "loss": 2.2515, "step": 548 }, { "epoch": 0.16, "learning_rate": 1.9872367224026895e-05, "loss": 2.2661, "step": 550 }, { "epoch": 0.16, "learning_rate": 1.9870868914809223e-05, "loss": 2.3141, "step": 552 }, { "epoch": 0.16, "learning_rate": 1.9869361919491988e-05, "loss": 2.2279, "step": 554 }, { "epoch": 0.16, "learning_rate": 1.986784623940131e-05, "loss": 2.2689, "step": 556 }, { "epoch": 0.16, "learning_rate": 1.9866321875870944e-05, "loss": 2.3534, "step": 558 }, { "epoch": 0.16, "learning_rate": 1.9864788830242285e-05, "loss": 2.3154, "step": 560 }, { "epoch": 0.16, "learning_rate": 1.986324710386438e-05, "loss": 2.1851, "step": 562 }, { "epoch": 0.16, "learning_rate": 1.98616966980939e-05, "loss": 2.3048, "step": 564 }, { "epoch": 0.16, "learning_rate": 1.986013761429517e-05, "loss": 2.2319, "step": 566 }, { "epoch": 0.16, "learning_rate": 1.9858569853840133e-05, "loss": 2.2685, "step": 568 }, { "epoch": 0.17, "learning_rate": 1.9856993418108376e-05, "loss": 2.2988, "step": 570 }, { "epoch": 0.17, "learning_rate": 1.9855408308487124e-05, "loss": 2.3465, "step": 572 }, { "epoch": 0.17, "learning_rate": 1.9853814526371228e-05, "loss": 2.2297, "step": 574 }, { "epoch": 0.17, "learning_rate": 1.985221207316318e-05, "loss": 2.303, "step": 576 }, { "epoch": 0.17, "learning_rate": 1.9850600950273086e-05, "loss": 2.3906, "step": 578 }, { "epoch": 0.17, "learning_rate": 1.9848981159118697e-05, "loss": 2.2367, "step": 580 }, { "epoch": 0.17, "learning_rate": 1.9847352701125387e-05, "loss": 2.2816, "step": 582 }, { "epoch": 0.17, "learning_rate": 1.9845715577726147e-05, "loss": 2.2639, "step": 584 }, { "epoch": 0.17, "learning_rate": 1.9844069790361612e-05, "loss": 2.2174, "step": 586 }, { "epoch": 0.17, "learning_rate": 1.9842415340480027e-05, "loss": 2.3075, "step": 588 }, { "epoch": 0.17, "learning_rate": 1.984075222953726e-05, "loss": 2.2894, "step": 590 }, { "epoch": 0.17, "learning_rate": 1.9839080458996807e-05, "loss": 2.242, "step": 592 }, { "epoch": 0.17, "learning_rate": 1.983740003032978e-05, "loss": 2.3078, "step": 594 }, { "epoch": 0.17, "learning_rate": 1.983571094501491e-05, "loss": 2.2462, "step": 596 }, { "epoch": 0.17, "learning_rate": 1.983401320453855e-05, "loss": 2.2419, "step": 598 }, { "epoch": 0.17, "learning_rate": 1.9832306810394665e-05, "loss": 2.2563, "step": 600 }, { "epoch": 0.17, "learning_rate": 1.983059176408484e-05, "loss": 2.2591, "step": 602 }, { "epoch": 0.17, "learning_rate": 1.9828868067118255e-05, "loss": 2.2378, "step": 604 }, { "epoch": 0.18, "learning_rate": 1.9827135721011735e-05, "loss": 2.2947, "step": 606 }, { "epoch": 0.18, "learning_rate": 1.982539472728969e-05, "loss": 2.2471, "step": 608 }, { "epoch": 0.18, "learning_rate": 1.9823645087484145e-05, "loss": 2.2437, "step": 610 }, { "epoch": 0.18, "learning_rate": 1.982188680313474e-05, "loss": 2.1749, "step": 612 }, { "epoch": 0.18, "learning_rate": 1.982011987578872e-05, "loss": 2.2623, "step": 614 }, { "epoch": 0.18, "learning_rate": 1.9818344307000925e-05, "loss": 2.1988, "step": 616 }, { "epoch": 0.18, "learning_rate": 1.9816560098333817e-05, "loss": 2.1493, "step": 618 }, { "epoch": 0.18, "learning_rate": 1.981476725135745e-05, "loss": 2.2578, "step": 620 }, { "epoch": 0.18, "learning_rate": 1.9812965767649476e-05, "loss": 2.258, "step": 622 }, { "epoch": 0.18, "learning_rate": 1.9811155648795157e-05, "loss": 2.3665, "step": 624 }, { "epoch": 0.18, "learning_rate": 1.9809336896387347e-05, "loss": 2.2792, "step": 626 }, { "epoch": 0.18, "learning_rate": 1.9807509512026498e-05, "loss": 2.1625, "step": 628 }, { "epoch": 0.18, "learning_rate": 1.9805673497320662e-05, "loss": 2.2143, "step": 630 }, { "epoch": 0.18, "learning_rate": 1.9803828853885486e-05, "loss": 2.3148, "step": 632 }, { "epoch": 0.18, "learning_rate": 1.98019755833442e-05, "loss": 2.1424, "step": 634 }, { "epoch": 0.18, "learning_rate": 1.9800113687327632e-05, "loss": 2.3551, "step": 636 }, { "epoch": 0.18, "learning_rate": 1.9798243167474206e-05, "loss": 2.1862, "step": 638 }, { "epoch": 0.19, "learning_rate": 1.9796364025429925e-05, "loss": 2.2895, "step": 640 }, { "epoch": 0.19, "learning_rate": 1.9794476262848387e-05, "loss": 2.2618, "step": 642 }, { "epoch": 0.19, "learning_rate": 1.979257988139077e-05, "loss": 2.2236, "step": 644 }, { "epoch": 0.19, "learning_rate": 1.979067488272584e-05, "loss": 2.2903, "step": 646 }, { "epoch": 0.19, "learning_rate": 1.9788761268529942e-05, "loss": 2.2946, "step": 648 }, { "epoch": 0.19, "learning_rate": 1.9786839040487008e-05, "loss": 2.2217, "step": 650 }, { "epoch": 0.19, "learning_rate": 1.9784908200288546e-05, "loss": 2.2633, "step": 652 }, { "epoch": 0.19, "learning_rate": 1.978296874963364e-05, "loss": 2.2014, "step": 654 }, { "epoch": 0.19, "learning_rate": 1.9781020690228962e-05, "loss": 2.1605, "step": 656 }, { "epoch": 0.19, "learning_rate": 1.9779064023788747e-05, "loss": 2.1578, "step": 658 }, { "epoch": 0.19, "learning_rate": 1.977709875203481e-05, "loss": 2.2553, "step": 660 }, { "epoch": 0.19, "learning_rate": 1.9775124876696538e-05, "loss": 2.2279, "step": 662 }, { "epoch": 0.19, "learning_rate": 1.9773142399510886e-05, "loss": 2.2431, "step": 664 }, { "epoch": 0.19, "learning_rate": 1.9771151322222383e-05, "loss": 2.2859, "step": 666 }, { "epoch": 0.19, "learning_rate": 1.9769151646583122e-05, "loss": 2.2062, "step": 668 }, { "epoch": 0.19, "learning_rate": 1.9767143374352764e-05, "loss": 2.2711, "step": 670 }, { "epoch": 0.19, "learning_rate": 1.9765126507298535e-05, "loss": 2.2322, "step": 672 }, { "epoch": 0.2, "learning_rate": 1.9763101047195226e-05, "loss": 2.2385, "step": 674 }, { "epoch": 0.2, "learning_rate": 1.9761066995825184e-05, "loss": 2.1595, "step": 676 }, { "epoch": 0.2, "learning_rate": 1.9759024354978322e-05, "loss": 2.2198, "step": 678 }, { "epoch": 0.2, "learning_rate": 1.975697312645211e-05, "loss": 2.3006, "step": 680 }, { "epoch": 0.2, "learning_rate": 1.9754913312051568e-05, "loss": 2.2337, "step": 682 }, { "epoch": 0.2, "learning_rate": 1.9752844913589285e-05, "loss": 2.1449, "step": 684 }, { "epoch": 0.2, "learning_rate": 1.975076793288539e-05, "loss": 2.3081, "step": 686 }, { "epoch": 0.2, "learning_rate": 1.9748682371767575e-05, "loss": 2.192, "step": 688 }, { "epoch": 0.2, "learning_rate": 1.974658823207108e-05, "loss": 2.1878, "step": 690 }, { "epoch": 0.2, "learning_rate": 1.974448551563868e-05, "loss": 2.1583, "step": 692 }, { "epoch": 0.2, "learning_rate": 1.9742374224320716e-05, "loss": 2.2768, "step": 694 }, { "epoch": 0.2, "learning_rate": 1.9740254359975073e-05, "loss": 2.1645, "step": 696 }, { "epoch": 0.2, "learning_rate": 1.973812592446717e-05, "loss": 2.2509, "step": 698 }, { "epoch": 0.2, "learning_rate": 1.973598891966997e-05, "loss": 2.1763, "step": 700 }, { "epoch": 0.2, "learning_rate": 1.9733843347463982e-05, "loss": 2.2404, "step": 702 }, { "epoch": 0.2, "learning_rate": 1.9731689209737256e-05, "loss": 2.2998, "step": 704 }, { "epoch": 0.2, "learning_rate": 1.972952650838537e-05, "loss": 2.1632, "step": 706 }, { "epoch": 0.21, "learning_rate": 1.9727355245311445e-05, "loss": 2.2153, "step": 708 }, { "epoch": 0.21, "learning_rate": 1.9725175422426136e-05, "loss": 2.2428, "step": 710 }, { "epoch": 0.21, "learning_rate": 1.9722987041647623e-05, "loss": 2.2517, "step": 712 }, { "epoch": 0.21, "learning_rate": 1.972079010490163e-05, "loss": 2.1908, "step": 714 }, { "epoch": 0.21, "learning_rate": 1.9718584614121395e-05, "loss": 2.2404, "step": 716 }, { "epoch": 0.21, "learning_rate": 1.9716370571247698e-05, "loss": 2.2089, "step": 718 }, { "epoch": 0.21, "learning_rate": 1.9714147978228834e-05, "loss": 2.1798, "step": 720 }, { "epoch": 0.21, "learning_rate": 1.971191683702062e-05, "loss": 2.2076, "step": 722 }, { "epoch": 0.21, "learning_rate": 1.9709677149586407e-05, "loss": 2.2341, "step": 724 }, { "epoch": 0.21, "learning_rate": 1.970742891789706e-05, "loss": 2.1853, "step": 726 }, { "epoch": 0.21, "learning_rate": 1.9705172143930955e-05, "loss": 2.1699, "step": 728 }, { "epoch": 0.21, "learning_rate": 1.9702906829674004e-05, "loss": 2.2334, "step": 730 }, { "epoch": 0.21, "learning_rate": 1.970063297711961e-05, "loss": 2.2236, "step": 732 }, { "epoch": 0.21, "learning_rate": 1.9698350588268713e-05, "loss": 2.2117, "step": 734 }, { "epoch": 0.21, "learning_rate": 1.969605966512975e-05, "loss": 2.1755, "step": 736 }, { "epoch": 0.21, "learning_rate": 1.969376020971867e-05, "loss": 2.2764, "step": 738 }, { "epoch": 0.21, "learning_rate": 1.9691452224058932e-05, "loss": 2.1923, "step": 740 }, { "epoch": 0.21, "learning_rate": 1.9689135710181503e-05, "loss": 2.2982, "step": 742 }, { "epoch": 0.22, "learning_rate": 1.9686810670124858e-05, "loss": 2.244, "step": 744 }, { "epoch": 0.22, "learning_rate": 1.968447710593496e-05, "loss": 2.2573, "step": 746 }, { "epoch": 0.22, "learning_rate": 1.9682135019665288e-05, "loss": 2.1907, "step": 748 }, { "epoch": 0.22, "learning_rate": 1.9679784413376812e-05, "loss": 2.2591, "step": 750 }, { "epoch": 0.22, "learning_rate": 1.9677425289138007e-05, "loss": 2.2396, "step": 752 }, { "epoch": 0.22, "learning_rate": 1.9675057649024837e-05, "loss": 2.2264, "step": 754 }, { "epoch": 0.22, "learning_rate": 1.9672681495120763e-05, "loss": 2.2089, "step": 756 }, { "epoch": 0.22, "learning_rate": 1.9670296829516732e-05, "loss": 2.2241, "step": 758 }, { "epoch": 0.22, "learning_rate": 1.9667903654311193e-05, "loss": 2.2048, "step": 760 }, { "epoch": 0.22, "learning_rate": 1.966550197161007e-05, "loss": 2.2203, "step": 762 }, { "epoch": 0.22, "learning_rate": 1.966309178352678e-05, "loss": 2.2343, "step": 764 }, { "epoch": 0.22, "learning_rate": 1.9660673092182225e-05, "loss": 2.1855, "step": 766 }, { "epoch": 0.22, "learning_rate": 1.9658245899704787e-05, "loss": 2.1854, "step": 768 }, { "epoch": 0.22, "learning_rate": 1.9655810208230334e-05, "loss": 2.1718, "step": 770 }, { "epoch": 0.22, "learning_rate": 1.9653366019902206e-05, "loss": 2.2035, "step": 772 }, { "epoch": 0.22, "learning_rate": 1.965091333687122e-05, "loss": 2.0251, "step": 774 }, { "epoch": 0.22, "learning_rate": 1.9648452161295682e-05, "loss": 2.1677, "step": 776 }, { "epoch": 0.23, "learning_rate": 1.964598249534135e-05, "loss": 2.2013, "step": 778 }, { "epoch": 0.23, "learning_rate": 1.9643504341181466e-05, "loss": 2.1793, "step": 780 }, { "epoch": 0.23, "learning_rate": 1.964101770099674e-05, "loss": 2.1444, "step": 782 }, { "epoch": 0.23, "learning_rate": 1.963852257697535e-05, "loss": 2.2216, "step": 784 }, { "epoch": 0.23, "learning_rate": 1.9636018971312934e-05, "loss": 2.1375, "step": 786 }, { "epoch": 0.23, "learning_rate": 1.9633506886212603e-05, "loss": 2.2523, "step": 788 }, { "epoch": 0.23, "learning_rate": 1.963098632388492e-05, "loss": 2.1865, "step": 790 }, { "epoch": 0.23, "learning_rate": 1.9628457286547915e-05, "loss": 2.154, "step": 792 }, { "epoch": 0.23, "learning_rate": 1.9625919776427072e-05, "loss": 2.1641, "step": 794 }, { "epoch": 0.23, "learning_rate": 1.9623373795755333e-05, "loss": 2.2271, "step": 796 }, { "epoch": 0.23, "learning_rate": 1.962081934677309e-05, "loss": 2.1814, "step": 798 }, { "epoch": 0.23, "learning_rate": 1.961825643172819e-05, "loss": 2.2112, "step": 800 }, { "epoch": 0.23, "learning_rate": 1.9615685052875935e-05, "loss": 2.227, "step": 802 }, { "epoch": 0.23, "learning_rate": 1.961310521247906e-05, "loss": 2.1405, "step": 804 }, { "epoch": 0.23, "learning_rate": 1.961051691280776e-05, "loss": 2.219, "step": 806 }, { "epoch": 0.23, "learning_rate": 1.9607920156139672e-05, "loss": 2.2736, "step": 808 }, { "epoch": 0.23, "learning_rate": 1.960531494475987e-05, "loss": 2.1828, "step": 810 }, { "epoch": 0.24, "learning_rate": 1.9602701280960866e-05, "loss": 2.2382, "step": 812 }, { "epoch": 0.24, "learning_rate": 1.960007916704262e-05, "loss": 2.2536, "step": 814 }, { "epoch": 0.24, "learning_rate": 1.9597448605312517e-05, "loss": 2.2281, "step": 816 }, { "epoch": 0.24, "learning_rate": 1.9594809598085382e-05, "loss": 2.1406, "step": 818 }, { "epoch": 0.24, "learning_rate": 1.9592162147683472e-05, "loss": 2.2269, "step": 820 }, { "epoch": 0.24, "learning_rate": 1.9589506256436476e-05, "loss": 2.179, "step": 822 }, { "epoch": 0.24, "learning_rate": 1.9586841926681497e-05, "loss": 2.1538, "step": 824 }, { "epoch": 0.24, "learning_rate": 1.958416916076308e-05, "loss": 2.1434, "step": 826 }, { "epoch": 0.24, "learning_rate": 1.958148796103319e-05, "loss": 2.2194, "step": 828 }, { "epoch": 0.24, "learning_rate": 1.9578798329851202e-05, "loss": 2.0885, "step": 830 }, { "epoch": 0.24, "learning_rate": 1.957610026958393e-05, "loss": 2.228, "step": 832 }, { "epoch": 0.24, "learning_rate": 1.9573393782605584e-05, "loss": 2.2085, "step": 834 }, { "epoch": 0.24, "learning_rate": 1.9570678871297807e-05, "loss": 2.1842, "step": 836 }, { "epoch": 0.24, "learning_rate": 1.9567955538049643e-05, "loss": 2.1945, "step": 838 }, { "epoch": 0.24, "learning_rate": 1.956522378525756e-05, "loss": 2.0916, "step": 840 }, { "epoch": 0.24, "learning_rate": 1.956248361532542e-05, "loss": 2.2378, "step": 842 }, { "epoch": 0.24, "learning_rate": 1.9559735030664498e-05, "loss": 2.2107, "step": 844 }, { "epoch": 0.25, "learning_rate": 1.9556978033693482e-05, "loss": 2.2197, "step": 846 }, { "epoch": 0.25, "learning_rate": 1.9554212626838448e-05, "loss": 2.3519, "step": 848 }, { "epoch": 0.25, "learning_rate": 1.9551438812532883e-05, "loss": 2.1944, "step": 850 }, { "epoch": 0.25, "learning_rate": 1.954865659321767e-05, "loss": 2.1517, "step": 852 }, { "epoch": 0.25, "learning_rate": 1.9545865971341086e-05, "loss": 2.283, "step": 854 }, { "epoch": 0.25, "learning_rate": 1.9543066949358803e-05, "loss": 2.1676, "step": 856 }, { "epoch": 0.25, "learning_rate": 1.9540259529733885e-05, "loss": 2.2481, "step": 858 }, { "epoch": 0.25, "learning_rate": 1.9537443714936788e-05, "loss": 2.1801, "step": 860 }, { "epoch": 0.25, "learning_rate": 1.9534619507445353e-05, "loss": 2.1778, "step": 862 }, { "epoch": 0.25, "learning_rate": 1.9531786909744803e-05, "loss": 2.184, "step": 864 }, { "epoch": 0.25, "learning_rate": 1.952894592432775e-05, "loss": 2.1607, "step": 866 }, { "epoch": 0.25, "learning_rate": 1.9526096553694187e-05, "loss": 2.1119, "step": 868 }, { "epoch": 0.25, "learning_rate": 1.952323880035148e-05, "loss": 2.1492, "step": 870 }, { "epoch": 0.25, "learning_rate": 1.952037266681438e-05, "loss": 2.1458, "step": 872 }, { "epoch": 0.25, "learning_rate": 1.9517498155605003e-05, "loss": 2.1422, "step": 874 }, { "epoch": 0.25, "learning_rate": 1.9514615269252847e-05, "loss": 2.2262, "step": 876 }, { "epoch": 0.25, "learning_rate": 1.9511724010294764e-05, "loss": 2.1291, "step": 878 }, { "epoch": 0.25, "learning_rate": 1.9508824381275e-05, "loss": 2.2675, "step": 880 }, { "epoch": 0.26, "learning_rate": 1.9505916384745136e-05, "loss": 2.2227, "step": 882 }, { "epoch": 0.26, "learning_rate": 1.950300002326414e-05, "loss": 2.2097, "step": 884 }, { "epoch": 0.26, "learning_rate": 1.9500075299398325e-05, "loss": 2.2184, "step": 886 }, { "epoch": 0.26, "learning_rate": 1.9497142215721373e-05, "loss": 2.1813, "step": 888 }, { "epoch": 0.26, "learning_rate": 1.949420077481432e-05, "loss": 2.1859, "step": 890 }, { "epoch": 0.26, "learning_rate": 1.9491250979265555e-05, "loss": 2.2414, "step": 892 }, { "epoch": 0.26, "learning_rate": 1.9488292831670818e-05, "loss": 2.1782, "step": 894 }, { "epoch": 0.26, "learning_rate": 1.94853263346332e-05, "loss": 2.1823, "step": 896 }, { "epoch": 0.26, "learning_rate": 1.9482351490763135e-05, "loss": 2.3021, "step": 898 }, { "epoch": 0.26, "learning_rate": 1.9479368302678412e-05, "loss": 2.1153, "step": 900 }, { "epoch": 0.26, "learning_rate": 1.9476376773004148e-05, "loss": 2.185, "step": 902 }, { "epoch": 0.26, "learning_rate": 1.947337690437282e-05, "loss": 2.1874, "step": 904 }, { "epoch": 0.26, "learning_rate": 1.947036869942422e-05, "loss": 2.0753, "step": 906 }, { "epoch": 0.26, "learning_rate": 1.9467352160805496e-05, "loss": 2.161, "step": 908 }, { "epoch": 0.26, "learning_rate": 1.946432729117112e-05, "loss": 2.1843, "step": 910 }, { "epoch": 0.26, "learning_rate": 1.946129409318289e-05, "loss": 2.125, "step": 912 }, { "epoch": 0.26, "learning_rate": 1.9458252569509946e-05, "loss": 2.1769, "step": 914 }, { "epoch": 0.27, "learning_rate": 1.9455202722828744e-05, "loss": 2.1779, "step": 916 }, { "epoch": 0.27, "learning_rate": 1.9452144555823067e-05, "loss": 2.1165, "step": 918 }, { "epoch": 0.27, "learning_rate": 1.944907807118402e-05, "loss": 2.0941, "step": 920 }, { "epoch": 0.27, "learning_rate": 1.9446003271610027e-05, "loss": 2.209, "step": 922 }, { "epoch": 0.27, "learning_rate": 1.944292015980683e-05, "loss": 2.1753, "step": 924 }, { "epoch": 0.27, "learning_rate": 1.9439828738487484e-05, "loss": 2.2126, "step": 926 }, { "epoch": 0.27, "learning_rate": 1.9436729010372355e-05, "loss": 2.1462, "step": 928 }, { "epoch": 0.27, "learning_rate": 1.9433620978189122e-05, "loss": 2.1645, "step": 930 }, { "epoch": 0.27, "learning_rate": 1.9430504644672772e-05, "loss": 2.1526, "step": 932 }, { "epoch": 0.27, "learning_rate": 1.9427380012565593e-05, "loss": 2.2185, "step": 934 }, { "epoch": 0.27, "learning_rate": 1.9424247084617177e-05, "loss": 2.1899, "step": 936 }, { "epoch": 0.27, "learning_rate": 1.9421105863584415e-05, "loss": 2.1072, "step": 938 }, { "epoch": 0.27, "learning_rate": 1.94179563522315e-05, "loss": 2.2343, "step": 940 }, { "epoch": 0.27, "learning_rate": 1.9414798553329918e-05, "loss": 2.1361, "step": 942 }, { "epoch": 0.27, "learning_rate": 1.9411632469658444e-05, "loss": 2.2046, "step": 944 }, { "epoch": 0.27, "learning_rate": 1.9408458104003144e-05, "loss": 2.1055, "step": 946 }, { "epoch": 0.27, "learning_rate": 1.9405275459157384e-05, "loss": 2.1547, "step": 948 }, { "epoch": 0.28, "learning_rate": 1.94020845379218e-05, "loss": 2.2143, "step": 950 }, { "epoch": 0.28, "learning_rate": 1.9398885343104316e-05, "loss": 2.2045, "step": 952 }, { "epoch": 0.28, "learning_rate": 1.9395677877520138e-05, "loss": 2.1654, "step": 954 }, { "epoch": 0.28, "learning_rate": 1.9392462143991753e-05, "loss": 2.1592, "step": 956 }, { "epoch": 0.28, "learning_rate": 1.9389238145348917e-05, "loss": 2.1205, "step": 958 }, { "epoch": 0.28, "learning_rate": 1.9386005884428663e-05, "loss": 2.0877, "step": 960 }, { "epoch": 0.28, "learning_rate": 1.9382765364075294e-05, "loss": 2.1262, "step": 962 }, { "epoch": 0.28, "learning_rate": 1.9379516587140383e-05, "loss": 2.1199, "step": 964 }, { "epoch": 0.28, "learning_rate": 1.9376259556482758e-05, "loss": 2.1212, "step": 966 }, { "epoch": 0.28, "learning_rate": 1.9372994274968527e-05, "loss": 2.1992, "step": 968 }, { "epoch": 0.28, "learning_rate": 1.936972074547105e-05, "loss": 2.1475, "step": 970 }, { "epoch": 0.28, "learning_rate": 1.936643897087094e-05, "loss": 2.2204, "step": 972 }, { "epoch": 0.28, "learning_rate": 1.9363148954056077e-05, "loss": 2.1961, "step": 974 }, { "epoch": 0.28, "learning_rate": 1.9359850697921583e-05, "loss": 2.1684, "step": 976 }, { "epoch": 0.28, "learning_rate": 1.9356544205369833e-05, "loss": 2.1464, "step": 978 }, { "epoch": 0.28, "learning_rate": 1.9353229479310455e-05, "loss": 2.1356, "step": 980 }, { "epoch": 0.28, "learning_rate": 1.934990652266032e-05, "loss": 2.1415, "step": 982 }, { "epoch": 0.28, "learning_rate": 1.9346575338343537e-05, "loss": 2.1802, "step": 984 }, { "epoch": 0.29, "learning_rate": 1.934323592929146e-05, "loss": 2.1934, "step": 986 }, { "epoch": 0.29, "learning_rate": 1.9339888298442682e-05, "loss": 2.174, "step": 988 }, { "epoch": 0.29, "learning_rate": 1.9336532448743023e-05, "loss": 2.2394, "step": 990 }, { "epoch": 0.29, "learning_rate": 1.9333168383145547e-05, "loss": 2.1015, "step": 992 }, { "epoch": 0.29, "learning_rate": 1.9329796104610536e-05, "loss": 2.1216, "step": 994 }, { "epoch": 0.29, "learning_rate": 1.9326415616105507e-05, "loss": 2.0873, "step": 996 }, { "epoch": 0.29, "learning_rate": 1.9323026920605198e-05, "loss": 2.0841, "step": 998 }, { "epoch": 0.29, "learning_rate": 1.931963002109157e-05, "loss": 2.0564, "step": 1000 }, { "epoch": 0.29, "learning_rate": 1.9316224920553807e-05, "loss": 2.1921, "step": 1002 }, { "epoch": 0.29, "learning_rate": 1.93128116219883e-05, "loss": 2.1366, "step": 1004 }, { "epoch": 0.29, "learning_rate": 1.9309390128398666e-05, "loss": 2.2012, "step": 1006 }, { "epoch": 0.29, "learning_rate": 1.930596044279572e-05, "loss": 2.094, "step": 1008 }, { "epoch": 0.29, "learning_rate": 1.9302522568197504e-05, "loss": 2.1234, "step": 1010 }, { "epoch": 0.29, "learning_rate": 1.9299076507629245e-05, "loss": 2.1094, "step": 1012 }, { "epoch": 0.29, "learning_rate": 1.929562226412339e-05, "loss": 2.1095, "step": 1014 }, { "epoch": 0.29, "learning_rate": 1.9292159840719576e-05, "loss": 2.1506, "step": 1016 }, { "epoch": 0.29, "learning_rate": 1.9288689240464648e-05, "loss": 2.1083, "step": 1018 }, { "epoch": 0.3, "learning_rate": 1.9285210466412635e-05, "loss": 2.2128, "step": 1020 }, { "epoch": 0.3, "learning_rate": 1.928172352162477e-05, "loss": 2.115, "step": 1022 }, { "epoch": 0.3, "learning_rate": 1.9278228409169468e-05, "loss": 2.1797, "step": 1024 }, { "epoch": 0.3, "learning_rate": 1.9274725132122336e-05, "loss": 2.1258, "step": 1026 }, { "epoch": 0.3, "learning_rate": 1.927121369356616e-05, "loss": 2.2018, "step": 1028 }, { "epoch": 0.3, "learning_rate": 1.9267694096590913e-05, "loss": 2.1427, "step": 1030 }, { "epoch": 0.3, "learning_rate": 1.926416634429375e-05, "loss": 2.1718, "step": 1032 }, { "epoch": 0.3, "learning_rate": 1.9260630439778993e-05, "loss": 2.1325, "step": 1034 }, { "epoch": 0.3, "learning_rate": 1.9257086386158147e-05, "loss": 2.1242, "step": 1036 }, { "epoch": 0.3, "learning_rate": 1.9253534186549883e-05, "loss": 2.1654, "step": 1038 }, { "epoch": 0.3, "learning_rate": 1.9249973844080038e-05, "loss": 2.0984, "step": 1040 }, { "epoch": 0.3, "learning_rate": 1.9248190620250808e-05, "loss": 2.1219, "step": 1042 }, { "epoch": 0.3, "learning_rate": 1.924461806936522e-05, "loss": 2.151, "step": 1044 }, { "epoch": 0.3, "learning_rate": 1.9241037383463998e-05, "loss": 2.166, "step": 1046 }, { "epoch": 0.3, "learning_rate": 1.9237448565698046e-05, "loss": 2.0673, "step": 1048 }, { "epoch": 0.3, "learning_rate": 1.9233851619225433e-05, "loss": 2.0821, "step": 1050 }, { "epoch": 0.3, "learning_rate": 1.9230246547211368e-05, "loss": 2.1562, "step": 1052 }, { "epoch": 0.31, "learning_rate": 1.9226633352828227e-05, "loss": 2.0773, "step": 1054 }, { "epoch": 0.31, "learning_rate": 1.9223012039255517e-05, "loss": 2.1818, "step": 1056 }, { "epoch": 0.31, "learning_rate": 1.9219382609679894e-05, "loss": 2.0929, "step": 1058 }, { "epoch": 0.31, "learning_rate": 1.9215745067295168e-05, "loss": 2.1862, "step": 1060 }, { "epoch": 0.31, "learning_rate": 1.921209941530227e-05, "loss": 2.1384, "step": 1062 }, { "epoch": 0.31, "learning_rate": 1.9208445656909284e-05, "loss": 2.1086, "step": 1064 }, { "epoch": 0.31, "learning_rate": 1.920478379533141e-05, "loss": 2.0518, "step": 1066 }, { "epoch": 0.31, "learning_rate": 1.920111383379099e-05, "loss": 2.2252, "step": 1068 }, { "epoch": 0.31, "learning_rate": 1.91974357755175e-05, "loss": 2.234, "step": 1070 }, { "epoch": 0.31, "learning_rate": 1.9193749623747527e-05, "loss": 2.2704, "step": 1072 }, { "epoch": 0.31, "learning_rate": 1.919005538172478e-05, "loss": 2.078, "step": 1074 }, { "epoch": 0.31, "learning_rate": 1.9186353052700104e-05, "loss": 2.137, "step": 1076 }, { "epoch": 0.31, "learning_rate": 1.918264263993144e-05, "loss": 2.128, "step": 1078 }, { "epoch": 0.31, "learning_rate": 1.9178924146683852e-05, "loss": 2.1612, "step": 1080 }, { "epoch": 0.31, "learning_rate": 1.9175197576229522e-05, "loss": 2.1085, "step": 1082 }, { "epoch": 0.31, "learning_rate": 1.9171462931847725e-05, "loss": 2.0825, "step": 1084 }, { "epoch": 0.31, "learning_rate": 1.9167720216824847e-05, "loss": 2.1981, "step": 1086 }, { "epoch": 0.32, "learning_rate": 1.9163969434454383e-05, "loss": 2.0534, "step": 1088 }, { "epoch": 0.32, "learning_rate": 1.916021058803691e-05, "loss": 2.1402, "step": 1090 }, { "epoch": 0.32, "learning_rate": 1.9156443680880126e-05, "loss": 2.0727, "step": 1092 }, { "epoch": 0.32, "learning_rate": 1.91526687162988e-05, "loss": 2.2486, "step": 1094 }, { "epoch": 0.32, "learning_rate": 1.91488856976148e-05, "loss": 2.1329, "step": 1096 }, { "epoch": 0.32, "learning_rate": 1.914509462815708e-05, "loss": 2.1496, "step": 1098 }, { "epoch": 0.32, "learning_rate": 1.914129551126168e-05, "loss": 2.0646, "step": 1100 }, { "epoch": 0.32, "learning_rate": 1.9137488350271724e-05, "loss": 2.1083, "step": 1102 }, { "epoch": 0.32, "learning_rate": 1.9133673148537408e-05, "loss": 2.1117, "step": 1104 }, { "epoch": 0.32, "learning_rate": 1.9129849909416004e-05, "loss": 2.1663, "step": 1106 }, { "epoch": 0.32, "learning_rate": 1.9126018636271867e-05, "loss": 2.093, "step": 1108 }, { "epoch": 0.32, "learning_rate": 1.912217933247641e-05, "loss": 2.0716, "step": 1110 }, { "epoch": 0.32, "learning_rate": 1.911833200140812e-05, "loss": 2.1382, "step": 1112 }, { "epoch": 0.32, "learning_rate": 1.9114476646452546e-05, "loss": 2.1569, "step": 1114 }, { "epoch": 0.32, "learning_rate": 1.911061327100229e-05, "loss": 2.1898, "step": 1116 }, { "epoch": 0.32, "learning_rate": 1.9106741878457026e-05, "loss": 2.0581, "step": 1118 }, { "epoch": 0.32, "learning_rate": 1.9102862472223473e-05, "loss": 2.1661, "step": 1120 }, { "epoch": 0.32, "learning_rate": 1.9098975055715398e-05, "loss": 2.0368, "step": 1122 }, { "epoch": 0.33, "learning_rate": 1.9095079632353635e-05, "loss": 2.0793, "step": 1124 }, { "epoch": 0.33, "learning_rate": 1.9091176205566042e-05, "loss": 2.1038, "step": 1126 }, { "epoch": 0.33, "learning_rate": 1.9087264778787534e-05, "loss": 2.1274, "step": 1128 }, { "epoch": 0.33, "learning_rate": 1.9083345355460064e-05, "loss": 2.1018, "step": 1130 }, { "epoch": 0.33, "learning_rate": 1.9079417939032613e-05, "loss": 2.1023, "step": 1132 }, { "epoch": 0.33, "learning_rate": 1.9075482532961206e-05, "loss": 2.0996, "step": 1134 }, { "epoch": 0.33, "learning_rate": 1.9071539140708895e-05, "loss": 2.1565, "step": 1136 }, { "epoch": 0.33, "learning_rate": 1.9067587765745758e-05, "loss": 2.1128, "step": 1138 }, { "epoch": 0.33, "learning_rate": 1.90636284115489e-05, "loss": 2.1066, "step": 1140 }, { "epoch": 0.33, "learning_rate": 1.9059661081602442e-05, "loss": 2.0893, "step": 1142 }, { "epoch": 0.33, "learning_rate": 1.9055685779397533e-05, "loss": 2.2314, "step": 1144 }, { "epoch": 0.33, "learning_rate": 1.905170250843233e-05, "loss": 2.1511, "step": 1146 }, { "epoch": 0.33, "learning_rate": 1.9047711272212006e-05, "loss": 2.0671, "step": 1148 }, { "epoch": 0.33, "learning_rate": 1.904371207424874e-05, "loss": 2.2326, "step": 1150 }, { "epoch": 0.33, "learning_rate": 1.9039704918061723e-05, "loss": 2.1766, "step": 1152 }, { "epoch": 0.33, "learning_rate": 1.9035689807177138e-05, "loss": 2.0524, "step": 1154 }, { "epoch": 0.33, "learning_rate": 1.903166674512818e-05, "loss": 2.0692, "step": 1156 }, { "epoch": 0.34, "learning_rate": 1.9027635735455028e-05, "loss": 2.0253, "step": 1158 }, { "epoch": 0.34, "learning_rate": 1.902359678170487e-05, "loss": 2.1337, "step": 1160 }, { "epoch": 0.34, "learning_rate": 1.901954988743188e-05, "loss": 2.1115, "step": 1162 }, { "epoch": 0.34, "learning_rate": 1.9015495056197205e-05, "loss": 2.1642, "step": 1164 }, { "epoch": 0.34, "learning_rate": 1.9011432291568998e-05, "loss": 2.0982, "step": 1166 }, { "epoch": 0.34, "learning_rate": 1.9007361597122375e-05, "loss": 2.1366, "step": 1168 }, { "epoch": 0.34, "learning_rate": 1.900328297643944e-05, "loss": 2.1517, "step": 1170 }, { "epoch": 0.34, "learning_rate": 1.8999196433109276e-05, "loss": 2.1013, "step": 1172 }, { "epoch": 0.34, "learning_rate": 1.899510197072792e-05, "loss": 2.1294, "step": 1174 }, { "epoch": 0.34, "learning_rate": 1.89909995928984e-05, "loss": 2.0912, "step": 1176 }, { "epoch": 0.34, "learning_rate": 1.898688930323069e-05, "loss": 2.2012, "step": 1178 }, { "epoch": 0.34, "learning_rate": 1.8982771105341738e-05, "loss": 2.14, "step": 1180 }, { "epoch": 0.34, "learning_rate": 1.8978645002855448e-05, "loss": 2.1074, "step": 1182 }, { "epoch": 0.34, "learning_rate": 1.897451099940268e-05, "loss": 2.1168, "step": 1184 }, { "epoch": 0.34, "learning_rate": 1.8970369098621248e-05, "loss": 2.113, "step": 1186 }, { "epoch": 0.34, "learning_rate": 1.8966219304155908e-05, "loss": 2.1364, "step": 1188 }, { "epoch": 0.34, "learning_rate": 1.8962061619658366e-05, "loss": 2.1987, "step": 1190 }, { "epoch": 0.35, "learning_rate": 1.8957896048787278e-05, "loss": 2.1314, "step": 1192 }, { "epoch": 0.35, "learning_rate": 1.8953722595208237e-05, "loss": 2.0733, "step": 1194 }, { "epoch": 0.35, "learning_rate": 1.8949541262593764e-05, "loss": 2.1458, "step": 1196 }, { "epoch": 0.35, "learning_rate": 1.894535205462332e-05, "loss": 2.0809, "step": 1198 }, { "epoch": 0.35, "learning_rate": 1.89411549749833e-05, "loss": 2.1807, "step": 1200 }, { "epoch": 0.35, "learning_rate": 1.893695002736702e-05, "loss": 2.0415, "step": 1202 }, { "epoch": 0.35, "learning_rate": 1.8932737215474716e-05, "loss": 2.1279, "step": 1204 }, { "epoch": 0.35, "learning_rate": 1.8928516543013555e-05, "loss": 2.1188, "step": 1206 }, { "epoch": 0.35, "learning_rate": 1.8924288013697612e-05, "loss": 2.1261, "step": 1208 }, { "epoch": 0.35, "learning_rate": 1.892005163124788e-05, "loss": 2.0941, "step": 1210 }, { "epoch": 0.35, "learning_rate": 1.8915807399392262e-05, "loss": 2.0453, "step": 1212 }, { "epoch": 0.35, "learning_rate": 1.891155532186557e-05, "loss": 2.0584, "step": 1214 }, { "epoch": 0.35, "learning_rate": 1.8907295402409514e-05, "loss": 2.1231, "step": 1216 }, { "epoch": 0.35, "learning_rate": 1.8903027644772717e-05, "loss": 2.1021, "step": 1218 }, { "epoch": 0.35, "learning_rate": 1.889875205271068e-05, "loss": 2.1612, "step": 1220 }, { "epoch": 0.35, "learning_rate": 1.8894468629985816e-05, "loss": 2.1443, "step": 1222 }, { "epoch": 0.35, "learning_rate": 1.889017738036742e-05, "loss": 2.0796, "step": 1224 }, { "epoch": 0.36, "learning_rate": 1.8885878307631678e-05, "loss": 2.1064, "step": 1226 }, { "epoch": 0.36, "learning_rate": 1.8881571415561658e-05, "loss": 2.1199, "step": 1228 }, { "epoch": 0.36, "learning_rate": 1.8877256707947308e-05, "loss": 2.105, "step": 1230 }, { "epoch": 0.36, "learning_rate": 1.8872934188585455e-05, "loss": 2.128, "step": 1232 }, { "epoch": 0.36, "learning_rate": 1.88686038612798e-05, "loss": 2.178, "step": 1234 }, { "epoch": 0.36, "learning_rate": 1.8864265729840914e-05, "loss": 2.188, "step": 1236 }, { "epoch": 0.36, "learning_rate": 1.8859919798086238e-05, "loss": 2.0585, "step": 1238 }, { "epoch": 0.36, "learning_rate": 1.885556606984008e-05, "loss": 2.1773, "step": 1240 }, { "epoch": 0.36, "learning_rate": 1.8851204548933594e-05, "loss": 2.1241, "step": 1242 }, { "epoch": 0.36, "learning_rate": 1.8846835239204804e-05, "loss": 2.1258, "step": 1244 }, { "epoch": 0.36, "learning_rate": 1.884245814449859e-05, "loss": 2.1719, "step": 1246 }, { "epoch": 0.36, "learning_rate": 1.8838073268666672e-05, "loss": 2.0894, "step": 1248 }, { "epoch": 0.36, "learning_rate": 1.883368061556763e-05, "loss": 2.1982, "step": 1250 }, { "epoch": 0.36, "learning_rate": 1.8829280189066873e-05, "loss": 2.0839, "step": 1252 }, { "epoch": 0.36, "learning_rate": 1.8824871993036664e-05, "loss": 2.0998, "step": 1254 }, { "epoch": 0.36, "learning_rate": 1.8820456031356093e-05, "loss": 2.0763, "step": 1256 }, { "epoch": 0.36, "learning_rate": 1.881603230791109e-05, "loss": 2.1414, "step": 1258 }, { "epoch": 0.36, "learning_rate": 1.8811600826594416e-05, "loss": 2.0851, "step": 1260 }, { "epoch": 0.37, "learning_rate": 1.880716159130565e-05, "loss": 2.0538, "step": 1262 }, { "epoch": 0.37, "learning_rate": 1.88027146059512e-05, "loss": 2.0488, "step": 1264 }, { "epoch": 0.37, "learning_rate": 1.87982598744443e-05, "loss": 2.1024, "step": 1266 }, { "epoch": 0.37, "learning_rate": 1.8793797400704987e-05, "loss": 2.1035, "step": 1268 }, { "epoch": 0.37, "learning_rate": 1.8789327188660116e-05, "loss": 2.066, "step": 1270 }, { "epoch": 0.37, "learning_rate": 1.878484924224336e-05, "loss": 2.1352, "step": 1272 }, { "epoch": 0.37, "learning_rate": 1.8780363565395193e-05, "loss": 2.0745, "step": 1274 }, { "epoch": 0.37, "learning_rate": 1.8775870162062887e-05, "loss": 2.0843, "step": 1276 }, { "epoch": 0.37, "learning_rate": 1.8771369036200514e-05, "loss": 2.0283, "step": 1278 }, { "epoch": 0.37, "learning_rate": 1.8766860191768946e-05, "loss": 2.0932, "step": 1280 }, { "epoch": 0.37, "learning_rate": 1.876234363273585e-05, "loss": 2.0362, "step": 1282 }, { "epoch": 0.37, "learning_rate": 1.8757819363075666e-05, "loss": 2.1168, "step": 1284 }, { "epoch": 0.37, "learning_rate": 1.875328738676964e-05, "loss": 2.0719, "step": 1286 }, { "epoch": 0.37, "learning_rate": 1.874874770780578e-05, "loss": 2.0959, "step": 1288 }, { "epoch": 0.37, "learning_rate": 1.8744200330178887e-05, "loss": 2.071, "step": 1290 }, { "epoch": 0.37, "learning_rate": 1.8739645257890537e-05, "loss": 2.0578, "step": 1292 }, { "epoch": 0.37, "learning_rate": 1.873508249494906e-05, "loss": 1.9623, "step": 1294 }, { "epoch": 0.38, "learning_rate": 1.8730512045369567e-05, "loss": 2.1095, "step": 1296 }, { "epoch": 0.38, "learning_rate": 1.872593391317394e-05, "loss": 2.1088, "step": 1298 }, { "epoch": 0.38, "learning_rate": 1.8721348102390804e-05, "loss": 2.1002, "step": 1300 }, { "epoch": 0.38, "learning_rate": 1.871675461705555e-05, "loss": 2.1879, "step": 1302 }, { "epoch": 0.38, "learning_rate": 1.8712153461210326e-05, "loss": 2.0976, "step": 1304 }, { "epoch": 0.38, "learning_rate": 1.870754463890402e-05, "loss": 2.0386, "step": 1306 }, { "epoch": 0.38, "learning_rate": 1.8702928154192275e-05, "loss": 2.0671, "step": 1308 }, { "epoch": 0.38, "learning_rate": 1.8698304011137475e-05, "loss": 2.1133, "step": 1310 }, { "epoch": 0.38, "learning_rate": 1.8693672213808738e-05, "loss": 2.0988, "step": 1312 }, { "epoch": 0.38, "learning_rate": 1.8689032766281926e-05, "loss": 2.0859, "step": 1314 }, { "epoch": 0.38, "learning_rate": 1.8684385672639623e-05, "loss": 2.0666, "step": 1316 }, { "epoch": 0.38, "learning_rate": 1.867973093697115e-05, "loss": 2.0783, "step": 1318 }, { "epoch": 0.38, "learning_rate": 1.8675068563372546e-05, "loss": 2.0884, "step": 1320 }, { "epoch": 0.38, "learning_rate": 1.867039855594658e-05, "loss": 2.1157, "step": 1322 }, { "epoch": 0.38, "learning_rate": 1.866572091880273e-05, "loss": 2.047, "step": 1324 }, { "epoch": 0.38, "learning_rate": 1.866103565605719e-05, "loss": 2.1038, "step": 1326 }, { "epoch": 0.38, "learning_rate": 1.865634277183287e-05, "loss": 2.0949, "step": 1328 }, { "epoch": 0.39, "learning_rate": 1.8651642270259375e-05, "loss": 2.1952, "step": 1330 }, { "epoch": 0.39, "learning_rate": 1.8646934155473025e-05, "loss": 2.034, "step": 1332 }, { "epoch": 0.39, "learning_rate": 1.864221843161683e-05, "loss": 2.1085, "step": 1334 }, { "epoch": 0.39, "learning_rate": 1.863749510284051e-05, "loss": 1.962, "step": 1336 }, { "epoch": 0.39, "learning_rate": 1.8632764173300458e-05, "loss": 2.0325, "step": 1338 }, { "epoch": 0.39, "learning_rate": 1.8628025647159765e-05, "loss": 2.0462, "step": 1340 }, { "epoch": 0.39, "learning_rate": 1.8623279528588215e-05, "loss": 2.1436, "step": 1342 }, { "epoch": 0.39, "learning_rate": 1.8618525821762258e-05, "loss": 2.0869, "step": 1344 }, { "epoch": 0.39, "learning_rate": 1.8613764530865028e-05, "loss": 2.0867, "step": 1346 }, { "epoch": 0.39, "learning_rate": 1.860899566008634e-05, "loss": 2.0902, "step": 1348 }, { "epoch": 0.39, "learning_rate": 1.8604219213622664e-05, "loss": 2.0804, "step": 1350 }, { "epoch": 0.39, "learning_rate": 1.8599435195677146e-05, "loss": 2.0009, "step": 1352 }, { "epoch": 0.39, "learning_rate": 1.85946436104596e-05, "loss": 2.0423, "step": 1354 }, { "epoch": 0.39, "learning_rate": 1.8589844462186485e-05, "loss": 2.0722, "step": 1356 }, { "epoch": 0.39, "learning_rate": 1.8585037755080928e-05, "loss": 2.0408, "step": 1358 }, { "epoch": 0.39, "learning_rate": 1.8580223493372695e-05, "loss": 2.0429, "step": 1360 }, { "epoch": 0.39, "learning_rate": 1.8575401681298216e-05, "loss": 2.0491, "step": 1362 }, { "epoch": 0.4, "learning_rate": 1.857057232310055e-05, "loss": 2.0458, "step": 1364 }, { "epoch": 0.4, "learning_rate": 1.8565735423029406e-05, "loss": 2.0703, "step": 1366 }, { "epoch": 0.4, "learning_rate": 1.856089098534112e-05, "loss": 2.099, "step": 1368 }, { "epoch": 0.4, "learning_rate": 1.855603901429867e-05, "loss": 2.0961, "step": 1370 }, { "epoch": 0.4, "learning_rate": 1.8551179514171668e-05, "loss": 2.0984, "step": 1372 }, { "epoch": 0.4, "learning_rate": 1.8546312489236335e-05, "loss": 2.0821, "step": 1374 }, { "epoch": 0.4, "learning_rate": 1.8541437943775524e-05, "loss": 2.1224, "step": 1376 }, { "epoch": 0.4, "learning_rate": 1.8536555882078707e-05, "loss": 2.0928, "step": 1378 }, { "epoch": 0.4, "learning_rate": 1.8531666308441963e-05, "loss": 2.0941, "step": 1380 }, { "epoch": 0.4, "learning_rate": 1.8526769227167984e-05, "loss": 2.0791, "step": 1382 }, { "epoch": 0.4, "learning_rate": 1.8521864642566076e-05, "loss": 2.1329, "step": 1384 }, { "epoch": 0.4, "learning_rate": 1.851695255895214e-05, "loss": 2.0662, "step": 1386 }, { "epoch": 0.4, "learning_rate": 1.8512032980648677e-05, "loss": 1.9956, "step": 1388 }, { "epoch": 0.4, "learning_rate": 1.850710591198478e-05, "loss": 2.0167, "step": 1390 }, { "epoch": 0.4, "learning_rate": 1.8502171357296144e-05, "loss": 2.1732, "step": 1392 }, { "epoch": 0.4, "learning_rate": 1.8497229320925038e-05, "loss": 2.1371, "step": 1394 }, { "epoch": 0.4, "learning_rate": 1.8492279807220327e-05, "loss": 2.1178, "step": 1396 }, { "epoch": 0.4, "learning_rate": 1.8487322820537447e-05, "loss": 1.969, "step": 1398 }, { "epoch": 0.41, "learning_rate": 1.8482358365238414e-05, "loss": 2.0843, "step": 1400 }, { "epoch": 0.41, "learning_rate": 1.8477386445691815e-05, "loss": 2.0195, "step": 1402 }, { "epoch": 0.41, "learning_rate": 1.8472407066272805e-05, "loss": 2.0972, "step": 1404 }, { "epoch": 0.41, "learning_rate": 1.8467420231363106e-05, "loss": 1.9727, "step": 1406 }, { "epoch": 0.41, "learning_rate": 1.8462425945350995e-05, "loss": 2.1001, "step": 1408 }, { "epoch": 0.41, "learning_rate": 1.8457424212631314e-05, "loss": 2.0428, "step": 1410 }, { "epoch": 0.41, "learning_rate": 1.845241503760545e-05, "loss": 2.1113, "step": 1412 }, { "epoch": 0.41, "learning_rate": 1.8447398424681347e-05, "loss": 2.0645, "step": 1414 }, { "epoch": 0.41, "learning_rate": 1.844237437827349e-05, "loss": 2.1457, "step": 1416 }, { "epoch": 0.41, "learning_rate": 1.8437342902802896e-05, "loss": 1.9936, "step": 1418 }, { "epoch": 0.41, "learning_rate": 1.8432304002697137e-05, "loss": 2.0743, "step": 1420 }, { "epoch": 0.41, "learning_rate": 1.8427257682390306e-05, "loss": 2.075, "step": 1422 }, { "epoch": 0.41, "learning_rate": 1.842220394632303e-05, "loss": 1.9954, "step": 1424 }, { "epoch": 0.41, "learning_rate": 1.8417142798942464e-05, "loss": 2.0575, "step": 1426 }, { "epoch": 0.41, "learning_rate": 1.8412074244702274e-05, "loss": 2.0298, "step": 1428 }, { "epoch": 0.41, "learning_rate": 1.8406998288062657e-05, "loss": 2.1156, "step": 1430 }, { "epoch": 0.41, "learning_rate": 1.8401914933490324e-05, "loss": 2.0947, "step": 1432 }, { "epoch": 0.42, "learning_rate": 1.839682418545848e-05, "loss": 2.0166, "step": 1434 }, { "epoch": 0.42, "learning_rate": 1.8391726048446852e-05, "loss": 2.1002, "step": 1436 }, { "epoch": 0.42, "learning_rate": 1.8386620526941662e-05, "loss": 2.0844, "step": 1438 }, { "epoch": 0.42, "learning_rate": 1.8381507625435636e-05, "loss": 2.0448, "step": 1440 }, { "epoch": 0.42, "learning_rate": 1.8376387348427986e-05, "loss": 2.0582, "step": 1442 }, { "epoch": 0.42, "learning_rate": 1.8371259700424415e-05, "loss": 2.0872, "step": 1444 }, { "epoch": 0.42, "learning_rate": 1.8366124685937122e-05, "loss": 2.0711, "step": 1446 }, { "epoch": 0.42, "learning_rate": 1.836098230948478e-05, "loss": 2.1061, "step": 1448 }, { "epoch": 0.42, "learning_rate": 1.8355832575592544e-05, "loss": 2.0725, "step": 1450 }, { "epoch": 0.42, "learning_rate": 1.8350675488792036e-05, "loss": 2.0661, "step": 1452 }, { "epoch": 0.42, "learning_rate": 1.8345511053621356e-05, "loss": 2.0892, "step": 1454 }, { "epoch": 0.42, "learning_rate": 1.834033927462507e-05, "loss": 2.0469, "step": 1456 }, { "epoch": 0.42, "learning_rate": 1.8335160156354206e-05, "loss": 2.2153, "step": 1458 }, { "epoch": 0.42, "learning_rate": 1.8329973703366244e-05, "loss": 2.0874, "step": 1460 }, { "epoch": 0.42, "learning_rate": 1.8324779920225126e-05, "loss": 2.1474, "step": 1462 }, { "epoch": 0.42, "learning_rate": 1.8319578811501246e-05, "loss": 2.1195, "step": 1464 }, { "epoch": 0.42, "learning_rate": 1.831437038177143e-05, "loss": 2.0972, "step": 1466 }, { "epoch": 0.43, "learning_rate": 1.8309154635618967e-05, "loss": 1.9928, "step": 1468 }, { "epoch": 0.43, "learning_rate": 1.8303931577633567e-05, "loss": 2.0548, "step": 1470 }, { "epoch": 0.43, "learning_rate": 1.829870121241138e-05, "loss": 2.1038, "step": 1472 }, { "epoch": 0.43, "learning_rate": 1.8293463544554994e-05, "loss": 2.1347, "step": 1474 }, { "epoch": 0.43, "learning_rate": 1.8288218578673412e-05, "loss": 2.1166, "step": 1476 }, { "epoch": 0.43, "learning_rate": 1.8282966319382058e-05, "loss": 2.1109, "step": 1478 }, { "epoch": 0.43, "learning_rate": 1.8277706771302787e-05, "loss": 2.0491, "step": 1480 }, { "epoch": 0.43, "learning_rate": 1.8272439939063854e-05, "loss": 2.0205, "step": 1482 }, { "epoch": 0.43, "learning_rate": 1.8267165827299937e-05, "loss": 2.1435, "step": 1484 }, { "epoch": 0.43, "learning_rate": 1.8261884440652108e-05, "loss": 2.1956, "step": 1486 }, { "epoch": 0.43, "learning_rate": 1.8256595783767843e-05, "loss": 2.0148, "step": 1488 }, { "epoch": 0.43, "learning_rate": 1.825129986130102e-05, "loss": 2.0617, "step": 1490 }, { "epoch": 0.43, "learning_rate": 1.824599667791191e-05, "loss": 2.091, "step": 1492 }, { "epoch": 0.43, "learning_rate": 1.8240686238267174e-05, "loss": 2.0486, "step": 1494 }, { "epoch": 0.43, "learning_rate": 1.823536854703985e-05, "loss": 2.0421, "step": 1496 }, { "epoch": 0.43, "learning_rate": 1.8230043608909368e-05, "loss": 2.1232, "step": 1498 }, { "epoch": 0.43, "learning_rate": 1.8224711428561536e-05, "loss": 2.0997, "step": 1500 }, { "epoch": 0.43, "learning_rate": 1.8219372010688516e-05, "loss": 2.0716, "step": 1502 }, { "epoch": 0.44, "learning_rate": 1.821402535998886e-05, "loss": 2.1129, "step": 1504 }, { "epoch": 0.44, "learning_rate": 1.820867148116748e-05, "loss": 2.0452, "step": 1506 }, { "epoch": 0.44, "learning_rate": 1.820331037893564e-05, "loss": 2.0412, "step": 1508 }, { "epoch": 0.44, "learning_rate": 1.8197942058010968e-05, "loss": 2.0624, "step": 1510 }, { "epoch": 0.44, "learning_rate": 1.819256652311744e-05, "loss": 2.0284, "step": 1512 }, { "epoch": 0.44, "learning_rate": 1.8187183778985388e-05, "loss": 2.0125, "step": 1514 }, { "epoch": 0.44, "learning_rate": 1.818179383035148e-05, "loss": 2.0311, "step": 1516 }, { "epoch": 0.44, "learning_rate": 1.8176396681958725e-05, "loss": 2.0649, "step": 1518 }, { "epoch": 0.44, "learning_rate": 1.8170992338556468e-05, "loss": 2.0264, "step": 1520 }, { "epoch": 0.44, "learning_rate": 1.8165580804900383e-05, "loss": 2.0805, "step": 1522 }, { "epoch": 0.44, "learning_rate": 1.8160162085752483e-05, "loss": 2.0125, "step": 1524 }, { "epoch": 0.44, "learning_rate": 1.8154736185881088e-05, "loss": 2.0489, "step": 1526 }, { "epoch": 0.44, "learning_rate": 1.8149303110060846e-05, "loss": 2.126, "step": 1528 }, { "epoch": 0.44, "learning_rate": 1.8143862863072723e-05, "loss": 2.0846, "step": 1530 }, { "epoch": 0.44, "learning_rate": 1.8138415449703978e-05, "loss": 2.0037, "step": 1532 }, { "epoch": 0.44, "learning_rate": 1.8132960874748202e-05, "loss": 2.1824, "step": 1534 }, { "epoch": 0.44, "learning_rate": 1.8127499143005266e-05, "loss": 2.0413, "step": 1536 }, { "epoch": 0.45, "learning_rate": 1.812203025928135e-05, "loss": 2.1124, "step": 1538 }, { "epoch": 0.45, "learning_rate": 1.8116554228388925e-05, "loss": 2.0868, "step": 1540 }, { "epoch": 0.45, "learning_rate": 1.811107105514675e-05, "loss": 2.0494, "step": 1542 }, { "epoch": 0.45, "learning_rate": 1.8105580744379873e-05, "loss": 2.0931, "step": 1544 }, { "epoch": 0.45, "learning_rate": 1.8100083300919618e-05, "loss": 1.9948, "step": 1546 }, { "epoch": 0.45, "learning_rate": 1.8094578729603584e-05, "loss": 2.0586, "step": 1548 }, { "epoch": 0.45, "learning_rate": 1.8089067035275655e-05, "loss": 2.02, "step": 1550 }, { "epoch": 0.45, "learning_rate": 1.8083548222785964e-05, "loss": 2.0878, "step": 1552 }, { "epoch": 0.45, "learning_rate": 1.8078022296990926e-05, "loss": 2.1095, "step": 1554 }, { "epoch": 0.45, "learning_rate": 1.80724892627532e-05, "loss": 2.0944, "step": 1556 }, { "epoch": 0.45, "learning_rate": 1.8066949124941712e-05, "loss": 2.0507, "step": 1558 }, { "epoch": 0.45, "learning_rate": 1.8061401888431634e-05, "loss": 2.0588, "step": 1560 }, { "epoch": 0.45, "learning_rate": 1.8055847558104384e-05, "loss": 2.071, "step": 1562 }, { "epoch": 0.45, "learning_rate": 1.8050286138847627e-05, "loss": 2.0435, "step": 1564 }, { "epoch": 0.45, "learning_rate": 1.804471763555526e-05, "loss": 2.016, "step": 1566 }, { "epoch": 0.45, "learning_rate": 1.8039142053127414e-05, "loss": 2.1168, "step": 1568 }, { "epoch": 0.45, "learning_rate": 1.8033559396470455e-05, "loss": 2.0811, "step": 1570 }, { "epoch": 0.46, "learning_rate": 1.802796967049697e-05, "loss": 2.0482, "step": 1572 }, { "epoch": 0.46, "learning_rate": 1.8022372880125764e-05, "loss": 2.0646, "step": 1574 }, { "epoch": 0.46, "learning_rate": 1.801676903028187e-05, "loss": 2.0257, "step": 1576 }, { "epoch": 0.46, "learning_rate": 1.8011158125896523e-05, "loss": 2.1262, "step": 1578 }, { "epoch": 0.46, "learning_rate": 1.8005540171907165e-05, "loss": 2.0215, "step": 1580 }, { "epoch": 0.46, "learning_rate": 1.7999915173257446e-05, "loss": 2.0566, "step": 1582 }, { "epoch": 0.46, "learning_rate": 1.7994283134897212e-05, "loss": 2.0351, "step": 1584 }, { "epoch": 0.46, "learning_rate": 1.7988644061782508e-05, "loss": 2.0187, "step": 1586 }, { "epoch": 0.46, "learning_rate": 1.7982997958875566e-05, "loss": 1.97, "step": 1588 }, { "epoch": 0.46, "learning_rate": 1.7977344831144807e-05, "loss": 2.0563, "step": 1590 }, { "epoch": 0.46, "learning_rate": 1.797168468356483e-05, "loss": 2.0436, "step": 1592 }, { "epoch": 0.46, "learning_rate": 1.796601752111641e-05, "loss": 2.087, "step": 1594 }, { "epoch": 0.46, "learning_rate": 1.7960343348786503e-05, "loss": 2.0485, "step": 1596 }, { "epoch": 0.46, "learning_rate": 1.7954662171568226e-05, "loss": 2.0845, "step": 1598 }, { "epoch": 0.46, "learning_rate": 1.794897399446086e-05, "loss": 2.0078, "step": 1600 }, { "epoch": 0.46, "learning_rate": 1.794327882246985e-05, "loss": 2.039, "step": 1602 }, { "epoch": 0.46, "learning_rate": 1.79375766606068e-05, "loss": 2.0217, "step": 1604 }, { "epoch": 0.47, "learning_rate": 1.7931867513889448e-05, "loss": 2.1352, "step": 1606 }, { "epoch": 0.47, "learning_rate": 1.7926151387341696e-05, "loss": 2.0311, "step": 1608 }, { "epoch": 0.47, "learning_rate": 1.7920428285993585e-05, "loss": 2.0342, "step": 1610 }, { "epoch": 0.47, "learning_rate": 1.7914698214881286e-05, "loss": 2.0341, "step": 1612 }, { "epoch": 0.47, "learning_rate": 1.7908961179047106e-05, "loss": 2.082, "step": 1614 }, { "epoch": 0.47, "learning_rate": 1.7903217183539488e-05, "loss": 2.1386, "step": 1616 }, { "epoch": 0.47, "learning_rate": 1.7897466233412993e-05, "loss": 2.021, "step": 1618 }, { "epoch": 0.47, "learning_rate": 1.7891708333728296e-05, "loss": 2.0533, "step": 1620 }, { "epoch": 0.47, "learning_rate": 1.78859434895522e-05, "loss": 2.0906, "step": 1622 }, { "epoch": 0.47, "learning_rate": 1.7880171705957614e-05, "loss": 2.0136, "step": 1624 }, { "epoch": 0.47, "learning_rate": 1.787439298802355e-05, "loss": 2.1187, "step": 1626 }, { "epoch": 0.47, "learning_rate": 1.7868607340835124e-05, "loss": 2.0702, "step": 1628 }, { "epoch": 0.47, "learning_rate": 1.786281476948355e-05, "loss": 2.0357, "step": 1630 }, { "epoch": 0.47, "learning_rate": 1.7857015279066138e-05, "loss": 2.1559, "step": 1632 }, { "epoch": 0.47, "learning_rate": 1.785120887468628e-05, "loss": 2.031, "step": 1634 }, { "epoch": 0.47, "learning_rate": 1.784539556145346e-05, "loss": 1.9995, "step": 1636 }, { "epoch": 0.47, "learning_rate": 1.7839575344483237e-05, "loss": 2.0429, "step": 1638 }, { "epoch": 0.47, "learning_rate": 1.7833748228897243e-05, "loss": 2.0846, "step": 1640 }, { "epoch": 0.48, "learning_rate": 1.7827914219823184e-05, "loss": 2.0386, "step": 1642 }, { "epoch": 0.48, "learning_rate": 1.7822073322394834e-05, "loss": 2.1606, "step": 1644 }, { "epoch": 0.48, "learning_rate": 1.781622554175202e-05, "loss": 2.0079, "step": 1646 }, { "epoch": 0.48, "learning_rate": 1.781037088304064e-05, "loss": 2.0579, "step": 1648 }, { "epoch": 0.48, "learning_rate": 1.7804509351412628e-05, "loss": 2.0415, "step": 1650 }, { "epoch": 0.48, "learning_rate": 1.779864095202598e-05, "loss": 2.0523, "step": 1652 }, { "epoch": 0.48, "learning_rate": 1.779276569004473e-05, "loss": 2.0272, "step": 1654 }, { "epoch": 0.48, "learning_rate": 1.7786883570638945e-05, "loss": 1.9306, "step": 1656 }, { "epoch": 0.48, "learning_rate": 1.7780994598984737e-05, "loss": 2.0384, "step": 1658 }, { "epoch": 0.48, "learning_rate": 1.7775098780264243e-05, "loss": 2.0764, "step": 1660 }, { "epoch": 0.48, "learning_rate": 1.7769196119665628e-05, "loss": 2.0837, "step": 1662 }, { "epoch": 0.48, "learning_rate": 1.7763286622383067e-05, "loss": 1.9946, "step": 1664 }, { "epoch": 0.48, "learning_rate": 1.7757370293616763e-05, "loss": 2.0448, "step": 1666 }, { "epoch": 0.48, "learning_rate": 1.775144713857293e-05, "loss": 2.0251, "step": 1668 }, { "epoch": 0.48, "learning_rate": 1.7745517162463777e-05, "loss": 2.041, "step": 1670 }, { "epoch": 0.48, "learning_rate": 1.7739580370507533e-05, "loss": 2.1176, "step": 1672 }, { "epoch": 0.48, "learning_rate": 1.773363676792841e-05, "loss": 2.0606, "step": 1674 }, { "epoch": 0.49, "learning_rate": 1.7727686359956613e-05, "loss": 2.0368, "step": 1676 }, { "epoch": 0.49, "learning_rate": 1.7721729151828355e-05, "loss": 2.103, "step": 1678 }, { "epoch": 0.49, "learning_rate": 1.771576514878581e-05, "loss": 2.0597, "step": 1680 }, { "epoch": 0.49, "learning_rate": 1.7709794356077145e-05, "loss": 1.9877, "step": 1682 }, { "epoch": 0.49, "learning_rate": 1.770381677895649e-05, "loss": 2.1252, "step": 1684 }, { "epoch": 0.49, "learning_rate": 1.769783242268396e-05, "loss": 2.0272, "step": 1686 }, { "epoch": 0.49, "learning_rate": 1.7691841292525625e-05, "loss": 1.9746, "step": 1688 }, { "epoch": 0.49, "learning_rate": 1.7685843393753518e-05, "loss": 2.0328, "step": 1690 }, { "epoch": 0.49, "learning_rate": 1.767983873164563e-05, "loss": 2.0766, "step": 1692 }, { "epoch": 0.49, "learning_rate": 1.76738273114859e-05, "loss": 1.9991, "step": 1694 }, { "epoch": 0.49, "learning_rate": 1.766780913856422e-05, "loss": 2.0019, "step": 1696 }, { "epoch": 0.49, "learning_rate": 1.766178421817642e-05, "loss": 2.0277, "step": 1698 }, { "epoch": 0.49, "learning_rate": 1.7655752555624265e-05, "loss": 2.0549, "step": 1700 }, { "epoch": 0.49, "learning_rate": 1.7649714156215457e-05, "loss": 2.0357, "step": 1702 }, { "epoch": 0.49, "learning_rate": 1.7643669025263625e-05, "loss": 2.0472, "step": 1704 }, { "epoch": 0.49, "learning_rate": 1.7637617168088327e-05, "loss": 2.0235, "step": 1706 }, { "epoch": 0.49, "learning_rate": 1.7631558590015027e-05, "loss": 2.0642, "step": 1708 }, { "epoch": 0.5, "learning_rate": 1.7625493296375117e-05, "loss": 2.0306, "step": 1710 }, { "epoch": 0.5, "learning_rate": 1.761942129250589e-05, "loss": 2.0909, "step": 1712 }, { "epoch": 0.5, "learning_rate": 1.761334258375055e-05, "loss": 2.0607, "step": 1714 }, { "epoch": 0.5, "learning_rate": 1.760725717545819e-05, "loss": 2.0383, "step": 1716 }, { "epoch": 0.5, "learning_rate": 1.760116507298381e-05, "loss": 2.0077, "step": 1718 }, { "epoch": 0.5, "learning_rate": 1.75950662816883e-05, "loss": 2.0952, "step": 1720 }, { "epoch": 0.5, "learning_rate": 1.7588960806938424e-05, "loss": 2.0552, "step": 1722 }, { "epoch": 0.5, "learning_rate": 1.7582848654106847e-05, "loss": 2.0309, "step": 1724 }, { "epoch": 0.5, "learning_rate": 1.757672982857209e-05, "loss": 2.096, "step": 1726 }, { "epoch": 0.5, "learning_rate": 1.757060433571856e-05, "loss": 1.9698, "step": 1728 }, { "epoch": 0.5, "learning_rate": 1.756447218093652e-05, "loss": 1.9976, "step": 1730 }, { "epoch": 0.5, "learning_rate": 1.7558333369622113e-05, "loss": 2.1042, "step": 1732 }, { "epoch": 0.5, "learning_rate": 1.7552187907177317e-05, "loss": 2.0519, "step": 1734 }, { "epoch": 0.5, "learning_rate": 1.754603579900998e-05, "loss": 2.0431, "step": 1736 }, { "epoch": 0.5, "learning_rate": 1.7539877050533787e-05, "loss": 2.1122, "step": 1738 }, { "epoch": 0.5, "learning_rate": 1.753371166716828e-05, "loss": 2.0859, "step": 1740 }, { "epoch": 0.5, "learning_rate": 1.752753965433882e-05, "loss": 2.0125, "step": 1742 }, { "epoch": 0.51, "learning_rate": 1.7521361017476622e-05, "loss": 2.0359, "step": 1744 }, { "epoch": 0.51, "learning_rate": 1.7515175762018713e-05, "loss": 2.0384, "step": 1746 }, { "epoch": 0.51, "learning_rate": 1.750898389340796e-05, "loss": 2.0546, "step": 1748 }, { "epoch": 0.51, "learning_rate": 1.750278541709303e-05, "loss": 1.9671, "step": 1750 }, { "epoch": 0.51, "learning_rate": 1.7496580338528427e-05, "loss": 2.1202, "step": 1752 }, { "epoch": 0.51, "learning_rate": 1.7490368663174445e-05, "loss": 2.0299, "step": 1754 }, { "epoch": 0.51, "learning_rate": 1.7484150396497194e-05, "loss": 2.0596, "step": 1756 }, { "epoch": 0.51, "learning_rate": 1.7477925543968586e-05, "loss": 2.0507, "step": 1758 }, { "epoch": 0.51, "learning_rate": 1.747169411106632e-05, "loss": 2.1019, "step": 1760 }, { "epoch": 0.51, "learning_rate": 1.7465456103273886e-05, "loss": 2.0797, "step": 1762 }, { "epoch": 0.51, "learning_rate": 1.745921152608057e-05, "loss": 2.0419, "step": 1764 }, { "epoch": 0.51, "learning_rate": 1.7452960384981432e-05, "loss": 2.0454, "step": 1766 }, { "epoch": 0.51, "learning_rate": 1.7446702685477305e-05, "loss": 2.076, "step": 1768 }, { "epoch": 0.51, "learning_rate": 1.7440438433074797e-05, "loss": 2.0108, "step": 1770 }, { "epoch": 0.51, "learning_rate": 1.743416763328628e-05, "loss": 1.9531, "step": 1772 }, { "epoch": 0.51, "learning_rate": 1.7427890291629895e-05, "loss": 2.0058, "step": 1774 }, { "epoch": 0.51, "learning_rate": 1.7421606413629528e-05, "loss": 1.9916, "step": 1776 }, { "epoch": 0.51, "learning_rate": 1.741531600481483e-05, "loss": 1.9424, "step": 1778 }, { "epoch": 0.52, "learning_rate": 1.7409019070721184e-05, "loss": 2.0645, "step": 1780 }, { "epoch": 0.52, "learning_rate": 1.7402715616889728e-05, "loss": 2.1062, "step": 1782 }, { "epoch": 0.52, "learning_rate": 1.7396405648867328e-05, "loss": 2.0967, "step": 1784 }, { "epoch": 0.52, "learning_rate": 1.7390089172206594e-05, "loss": 2.0825, "step": 1786 }, { "epoch": 0.52, "learning_rate": 1.7383766192465847e-05, "loss": 1.9999, "step": 1788 }, { "epoch": 0.52, "learning_rate": 1.7377436715209144e-05, "loss": 2.058, "step": 1790 }, { "epoch": 0.52, "learning_rate": 1.7371100746006254e-05, "loss": 2.0412, "step": 1792 }, { "epoch": 0.52, "learning_rate": 1.736475829043266e-05, "loss": 2.1359, "step": 1794 }, { "epoch": 0.52, "learning_rate": 1.7358409354069554e-05, "loss": 1.9961, "step": 1796 }, { "epoch": 0.52, "learning_rate": 1.7352053942503827e-05, "loss": 2.0994, "step": 1798 }, { "epoch": 0.52, "learning_rate": 1.7345692061328072e-05, "loss": 1.9647, "step": 1800 }, { "epoch": 0.52, "learning_rate": 1.733932371614057e-05, "loss": 2.0731, "step": 1802 }, { "epoch": 0.52, "learning_rate": 1.73329489125453e-05, "loss": 2.0636, "step": 1804 }, { "epoch": 0.52, "learning_rate": 1.7326567656151915e-05, "loss": 2.0053, "step": 1806 }, { "epoch": 0.52, "learning_rate": 1.732017995257575e-05, "loss": 2.0001, "step": 1808 }, { "epoch": 0.52, "learning_rate": 1.7313785807437812e-05, "loss": 1.9585, "step": 1810 }, { "epoch": 0.52, "learning_rate": 1.7307385226364773e-05, "loss": 2.0508, "step": 1812 }, { "epoch": 0.53, "learning_rate": 1.730097821498898e-05, "loss": 2.0681, "step": 1814 }, { "epoch": 0.53, "learning_rate": 1.7294564778948422e-05, "loss": 1.997, "step": 1816 }, { "epoch": 0.53, "learning_rate": 1.728814492388676e-05, "loss": 1.9991, "step": 1818 }, { "epoch": 0.53, "learning_rate": 1.7281718655453285e-05, "loss": 2.0365, "step": 1820 }, { "epoch": 0.53, "learning_rate": 1.7275285979302952e-05, "loss": 2.0506, "step": 1822 }, { "epoch": 0.53, "learning_rate": 1.7268846901096332e-05, "loss": 2.03, "step": 1824 }, { "epoch": 0.53, "learning_rate": 1.7262401426499648e-05, "loss": 2.0482, "step": 1826 }, { "epoch": 0.53, "learning_rate": 1.7255949561184738e-05, "loss": 1.9595, "step": 1828 }, { "epoch": 0.53, "learning_rate": 1.7249491310829077e-05, "loss": 2.0894, "step": 1830 }, { "epoch": 0.53, "learning_rate": 1.724302668111575e-05, "loss": 2.0724, "step": 1832 }, { "epoch": 0.53, "learning_rate": 1.7236555677733454e-05, "loss": 2.0303, "step": 1834 }, { "epoch": 0.53, "learning_rate": 1.7230078306376502e-05, "loss": 2.1064, "step": 1836 }, { "epoch": 0.53, "learning_rate": 1.7223594572744806e-05, "loss": 1.9888, "step": 1838 }, { "epoch": 0.53, "learning_rate": 1.7217104482543875e-05, "loss": 2.0056, "step": 1840 }, { "epoch": 0.53, "learning_rate": 1.721060804148482e-05, "loss": 2.0771, "step": 1842 }, { "epoch": 0.53, "learning_rate": 1.720410525528433e-05, "loss": 2.0223, "step": 1844 }, { "epoch": 0.53, "learning_rate": 1.7197596129664683e-05, "loss": 1.9976, "step": 1846 }, { "epoch": 0.54, "learning_rate": 1.7191080670353734e-05, "loss": 2.0212, "step": 1848 }, { "epoch": 0.54, "learning_rate": 1.718455888308491e-05, "loss": 2.0439, "step": 1850 }, { "epoch": 0.54, "learning_rate": 1.7178030773597213e-05, "loss": 2.0355, "step": 1852 }, { "epoch": 0.54, "learning_rate": 1.7171496347635205e-05, "loss": 2.0823, "step": 1854 }, { "epoch": 0.54, "learning_rate": 1.7164955610949004e-05, "loss": 2.0116, "step": 1856 }, { "epoch": 0.54, "learning_rate": 1.715840856929428e-05, "loss": 2.0371, "step": 1858 }, { "epoch": 0.54, "learning_rate": 1.7151855228432254e-05, "loss": 2.0431, "step": 1860 }, { "epoch": 0.54, "learning_rate": 1.7145295594129695e-05, "loss": 1.9825, "step": 1862 }, { "epoch": 0.54, "learning_rate": 1.71387296721589e-05, "loss": 2.0695, "step": 1864 }, { "epoch": 0.54, "learning_rate": 1.7132157468297707e-05, "loss": 1.9384, "step": 1866 }, { "epoch": 0.54, "learning_rate": 1.712557898832947e-05, "loss": 2.0299, "step": 1868 }, { "epoch": 0.54, "learning_rate": 1.711899423804309e-05, "loss": 1.9555, "step": 1870 }, { "epoch": 0.54, "learning_rate": 1.7112403223232962e-05, "loss": 2.0391, "step": 1872 }, { "epoch": 0.54, "learning_rate": 1.7105805949699e-05, "loss": 2.0386, "step": 1874 }, { "epoch": 0.54, "learning_rate": 1.7099202423246632e-05, "loss": 2.0401, "step": 1876 }, { "epoch": 0.54, "learning_rate": 1.709259264968678e-05, "loss": 2.044, "step": 1878 }, { "epoch": 0.54, "learning_rate": 1.7085976634835873e-05, "loss": 2.0466, "step": 1880 }, { "epoch": 0.55, "learning_rate": 1.707935438451582e-05, "loss": 2.1137, "step": 1882 }, { "epoch": 0.55, "learning_rate": 1.7072725904554025e-05, "loss": 1.9271, "step": 1884 }, { "epoch": 0.55, "learning_rate": 1.7066091200783378e-05, "loss": 1.9648, "step": 1886 }, { "epoch": 0.55, "learning_rate": 1.7059450279042233e-05, "loss": 2.0104, "step": 1888 }, { "epoch": 0.55, "learning_rate": 1.7052803145174422e-05, "loss": 2.0178, "step": 1890 }, { "epoch": 0.55, "learning_rate": 1.7046149805029254e-05, "loss": 1.9954, "step": 1892 }, { "epoch": 0.55, "learning_rate": 1.7039490264461478e-05, "loss": 2.065, "step": 1894 }, { "epoch": 0.55, "learning_rate": 1.703282452933132e-05, "loss": 1.9824, "step": 1896 }, { "epoch": 0.55, "learning_rate": 1.702615260550444e-05, "loss": 2.0759, "step": 1898 }, { "epoch": 0.55, "learning_rate": 1.701947449885196e-05, "loss": 2.0015, "step": 1900 }, { "epoch": 0.55, "learning_rate": 1.7012790215250432e-05, "loss": 2.0361, "step": 1902 }, { "epoch": 0.55, "learning_rate": 1.700609976058184e-05, "loss": 2.0795, "step": 1904 }, { "epoch": 0.55, "learning_rate": 1.699940314073361e-05, "loss": 2.0514, "step": 1906 }, { "epoch": 0.55, "learning_rate": 1.6992700361598586e-05, "loss": 2.0818, "step": 1908 }, { "epoch": 0.55, "learning_rate": 1.6985991429075038e-05, "loss": 2.0405, "step": 1910 }, { "epoch": 0.55, "learning_rate": 1.6979276349066638e-05, "loss": 1.9782, "step": 1912 }, { "epoch": 0.55, "learning_rate": 1.6972555127482485e-05, "loss": 1.9911, "step": 1914 }, { "epoch": 0.55, "learning_rate": 1.6965827770237068e-05, "loss": 2.0631, "step": 1916 }, { "epoch": 0.56, "learning_rate": 1.695909428325028e-05, "loss": 2.0176, "step": 1918 }, { "epoch": 0.56, "learning_rate": 1.695235467244741e-05, "loss": 2.0008, "step": 1920 }, { "epoch": 0.56, "learning_rate": 1.6945608943759137e-05, "loss": 2.0284, "step": 1922 }, { "epoch": 0.56, "learning_rate": 1.693885710312152e-05, "loss": 1.9363, "step": 1924 }, { "epoch": 0.56, "learning_rate": 1.6932099156475995e-05, "loss": 2.0708, "step": 1926 }, { "epoch": 0.56, "learning_rate": 1.6925335109769372e-05, "loss": 2.0562, "step": 1928 }, { "epoch": 0.56, "learning_rate": 1.6918564968953842e-05, "loss": 1.9718, "step": 1930 }, { "epoch": 0.56, "learning_rate": 1.6911788739986933e-05, "loss": 1.9959, "step": 1932 }, { "epoch": 0.56, "learning_rate": 1.6905006428831553e-05, "loss": 2.09, "step": 1934 }, { "epoch": 0.56, "learning_rate": 1.689821804145595e-05, "loss": 2.0036, "step": 1936 }, { "epoch": 0.56, "learning_rate": 1.6891423583833724e-05, "loss": 2.0407, "step": 1938 }, { "epoch": 0.56, "learning_rate": 1.6884623061943815e-05, "loss": 2.0177, "step": 1940 }, { "epoch": 0.56, "learning_rate": 1.6877816481770497e-05, "loss": 2.023, "step": 1942 }, { "epoch": 0.56, "learning_rate": 1.687100384930338e-05, "loss": 2.0787, "step": 1944 }, { "epoch": 0.56, "learning_rate": 1.68641851705374e-05, "loss": 2.0041, "step": 1946 }, { "epoch": 0.56, "learning_rate": 1.685736045147281e-05, "loss": 1.9607, "step": 1948 }, { "epoch": 0.56, "learning_rate": 1.6850529698115177e-05, "loss": 1.9651, "step": 1950 }, { "epoch": 0.57, "learning_rate": 1.6843692916475386e-05, "loss": 1.9384, "step": 1952 }, { "epoch": 0.57, "learning_rate": 1.6836850112569613e-05, "loss": 2.0112, "step": 1954 }, { "epoch": 0.57, "learning_rate": 1.6830001292419344e-05, "loss": 2.0481, "step": 1956 }, { "epoch": 0.57, "learning_rate": 1.6823146462051364e-05, "loss": 1.9834, "step": 1958 }, { "epoch": 0.57, "learning_rate": 1.681628562749773e-05, "loss": 2.0328, "step": 1960 }, { "epoch": 0.57, "learning_rate": 1.6809418794795796e-05, "loss": 2.1084, "step": 1962 }, { "epoch": 0.57, "learning_rate": 1.6802545969988196e-05, "loss": 2.0001, "step": 1964 }, { "epoch": 0.57, "learning_rate": 1.6795667159122822e-05, "loss": 2.0554, "step": 1966 }, { "epoch": 0.57, "learning_rate": 1.678878236825285e-05, "loss": 2.0771, "step": 1968 }, { "epoch": 0.57, "learning_rate": 1.6781891603436705e-05, "loss": 2.0145, "step": 1970 }, { "epoch": 0.57, "learning_rate": 1.677499487073808e-05, "loss": 2.0167, "step": 1972 }, { "epoch": 0.57, "learning_rate": 1.6768092176225917e-05, "loss": 1.9822, "step": 1974 }, { "epoch": 0.57, "learning_rate": 1.676118352597439e-05, "loss": 2.0824, "step": 1976 }, { "epoch": 0.57, "learning_rate": 1.6754268926062936e-05, "loss": 2.0327, "step": 1978 }, { "epoch": 0.57, "learning_rate": 1.6747348382576213e-05, "loss": 2.0156, "step": 1980 }, { "epoch": 0.57, "learning_rate": 1.674042190160412e-05, "loss": 2.0499, "step": 1982 }, { "epoch": 0.57, "learning_rate": 1.673348948924176e-05, "loss": 1.9036, "step": 1984 }, { "epoch": 0.58, "learning_rate": 1.6726551151589485e-05, "loss": 1.9166, "step": 1986 }, { "epoch": 0.58, "learning_rate": 1.671960689475284e-05, "loss": 1.9551, "step": 1988 }, { "epoch": 0.58, "learning_rate": 1.6712656724842583e-05, "loss": 2.0117, "step": 1990 }, { "epoch": 0.58, "learning_rate": 1.6705700647974677e-05, "loss": 2.0047, "step": 1992 }, { "epoch": 0.58, "learning_rate": 1.6698738670270284e-05, "loss": 2.0296, "step": 1994 }, { "epoch": 0.58, "learning_rate": 1.669177079785576e-05, "loss": 2.0904, "step": 1996 }, { "epoch": 0.58, "learning_rate": 1.6684797036862635e-05, "loss": 2.0254, "step": 1998 }, { "epoch": 0.58, "learning_rate": 1.6677817393427645e-05, "loss": 2.0245, "step": 2000 }, { "epoch": 0.58, "learning_rate": 1.6670831873692684e-05, "loss": 2.0195, "step": 2002 }, { "epoch": 0.58, "learning_rate": 1.6663840483804817e-05, "loss": 2.0221, "step": 2004 }, { "epoch": 0.58, "learning_rate": 1.6656843229916286e-05, "loss": 1.9907, "step": 2006 }, { "epoch": 0.58, "learning_rate": 1.664984011818448e-05, "loss": 1.9537, "step": 2008 }, { "epoch": 0.58, "learning_rate": 1.6642831154771958e-05, "loss": 1.9859, "step": 2010 }, { "epoch": 0.58, "learning_rate": 1.6635816345846413e-05, "loss": 2.0092, "step": 2012 }, { "epoch": 0.58, "learning_rate": 1.6628795697580688e-05, "loss": 1.9905, "step": 2014 }, { "epoch": 0.58, "learning_rate": 1.6621769216152768e-05, "loss": 1.9855, "step": 2016 }, { "epoch": 0.58, "learning_rate": 1.661473690774577e-05, "loss": 1.9618, "step": 2018 }, { "epoch": 0.58, "learning_rate": 1.6607698778547926e-05, "loss": 2.0367, "step": 2020 }, { "epoch": 0.59, "learning_rate": 1.6600654834752612e-05, "loss": 2.0085, "step": 2022 }, { "epoch": 0.59, "learning_rate": 1.6593605082558303e-05, "loss": 2.0916, "step": 2024 }, { "epoch": 0.59, "learning_rate": 1.6586549528168596e-05, "loss": 2.0532, "step": 2026 }, { "epoch": 0.59, "learning_rate": 1.657948817779219e-05, "loss": 2.0195, "step": 2028 }, { "epoch": 0.59, "learning_rate": 1.6572421037642878e-05, "loss": 1.9261, "step": 2030 }, { "epoch": 0.59, "learning_rate": 1.6565348113939555e-05, "loss": 2.0061, "step": 2032 }, { "epoch": 0.59, "learning_rate": 1.6558269412906206e-05, "loss": 1.9216, "step": 2034 }, { "epoch": 0.59, "learning_rate": 1.65511849407719e-05, "loss": 1.9261, "step": 2036 }, { "epoch": 0.59, "learning_rate": 1.654409470377078e-05, "loss": 2.0259, "step": 2038 }, { "epoch": 0.59, "learning_rate": 1.6536998708142064e-05, "loss": 2.0108, "step": 2040 }, { "epoch": 0.59, "learning_rate": 1.652989696013003e-05, "loss": 2.0059, "step": 2042 }, { "epoch": 0.59, "learning_rate": 1.6522789465984043e-05, "loss": 2.0246, "step": 2044 }, { "epoch": 0.59, "learning_rate": 1.6515676231958488e-05, "loss": 1.9696, "step": 2046 }, { "epoch": 0.59, "learning_rate": 1.6508557264312835e-05, "loss": 1.9981, "step": 2048 }, { "epoch": 0.59, "learning_rate": 1.6501432569311574e-05, "loss": 1.9124, "step": 2050 }, { "epoch": 0.59, "learning_rate": 1.649430215322425e-05, "loss": 2.065, "step": 2052 }, { "epoch": 0.59, "learning_rate": 1.648716602232544e-05, "loss": 1.9602, "step": 2054 }, { "epoch": 0.6, "learning_rate": 1.6480024182894743e-05, "loss": 1.9818, "step": 2056 }, { "epoch": 0.6, "learning_rate": 1.6472876641216786e-05, "loss": 2.065, "step": 2058 }, { "epoch": 0.6, "learning_rate": 1.6465723403581218e-05, "loss": 1.9563, "step": 2060 }, { "epoch": 0.6, "learning_rate": 1.645856447628269e-05, "loss": 2.0489, "step": 2062 }, { "epoch": 0.6, "learning_rate": 1.6451399865620872e-05, "loss": 2.0287, "step": 2064 }, { "epoch": 0.6, "learning_rate": 1.6444229577900422e-05, "loss": 1.9528, "step": 2066 }, { "epoch": 0.6, "learning_rate": 1.6437053619431007e-05, "loss": 2.0533, "step": 2068 }, { "epoch": 0.6, "learning_rate": 1.6429871996527277e-05, "loss": 1.9974, "step": 2070 }, { "epoch": 0.6, "learning_rate": 1.642268471550886e-05, "loss": 2.015, "step": 2072 }, { "epoch": 0.6, "learning_rate": 1.6415491782700383e-05, "loss": 2.0061, "step": 2074 }, { "epoch": 0.6, "learning_rate": 1.6408293204431423e-05, "loss": 1.9186, "step": 2076 }, { "epoch": 0.6, "learning_rate": 1.640108898703654e-05, "loss": 2.0494, "step": 2078 }, { "epoch": 0.6, "learning_rate": 1.6393879136855247e-05, "loss": 1.9877, "step": 2080 }, { "epoch": 0.6, "learning_rate": 1.6386663660232027e-05, "loss": 1.9775, "step": 2082 }, { "epoch": 0.6, "learning_rate": 1.6379442563516304e-05, "loss": 1.9577, "step": 2084 }, { "epoch": 0.6, "learning_rate": 1.637221585306244e-05, "loss": 2.0617, "step": 2086 }, { "epoch": 0.6, "learning_rate": 1.6364983535229755e-05, "loss": 1.9878, "step": 2088 }, { "epoch": 0.61, "learning_rate": 1.6357745616382488e-05, "loss": 1.9319, "step": 2090 }, { "epoch": 0.61, "learning_rate": 1.6350502102889825e-05, "loss": 2.0001, "step": 2092 }, { "epoch": 0.61, "learning_rate": 1.634325300112585e-05, "loss": 1.9845, "step": 2094 }, { "epoch": 0.61, "learning_rate": 1.6335998317469587e-05, "loss": 1.9414, "step": 2096 }, { "epoch": 0.61, "learning_rate": 1.6328738058304956e-05, "loss": 1.981, "step": 2098 }, { "epoch": 0.61, "learning_rate": 1.6321472230020795e-05, "loss": 2.0057, "step": 2100 }, { "epoch": 0.61, "learning_rate": 1.6314200839010834e-05, "loss": 1.9318, "step": 2102 }, { "epoch": 0.61, "learning_rate": 1.6306923891673705e-05, "loss": 1.9895, "step": 2104 }, { "epoch": 0.61, "learning_rate": 1.6299641394412927e-05, "loss": 1.9849, "step": 2106 }, { "epoch": 0.61, "learning_rate": 1.6292353353636897e-05, "loss": 2.0422, "step": 2108 }, { "epoch": 0.61, "learning_rate": 1.62850597757589e-05, "loss": 2.0107, "step": 2110 }, { "epoch": 0.61, "learning_rate": 1.6277760667197088e-05, "loss": 1.9711, "step": 2112 }, { "epoch": 0.61, "learning_rate": 1.6270456034374477e-05, "loss": 1.9908, "step": 2114 }, { "epoch": 0.61, "learning_rate": 1.626314588371895e-05, "loss": 1.9522, "step": 2116 }, { "epoch": 0.61, "learning_rate": 1.6255830221663243e-05, "loss": 2.026, "step": 2118 }, { "epoch": 0.61, "learning_rate": 1.6248509054644945e-05, "loss": 1.9476, "step": 2120 }, { "epoch": 0.61, "learning_rate": 1.6241182389106486e-05, "loss": 1.9634, "step": 2122 }, { "epoch": 0.62, "learning_rate": 1.6233850231495128e-05, "loss": 1.9652, "step": 2124 }, { "epoch": 0.62, "learning_rate": 1.622651258826298e-05, "loss": 2.0089, "step": 2126 }, { "epoch": 0.62, "learning_rate": 1.6219169465866966e-05, "loss": 1.9047, "step": 2128 }, { "epoch": 0.62, "learning_rate": 1.621182087076884e-05, "loss": 1.9699, "step": 2130 }, { "epoch": 0.62, "learning_rate": 1.620446680943517e-05, "loss": 1.894, "step": 2132 }, { "epoch": 0.62, "learning_rate": 1.619710728833733e-05, "loss": 1.9674, "step": 2134 }, { "epoch": 0.62, "learning_rate": 1.6189742313951498e-05, "loss": 1.9691, "step": 2136 }, { "epoch": 0.62, "learning_rate": 1.618237189275866e-05, "loss": 1.9821, "step": 2138 }, { "epoch": 0.62, "learning_rate": 1.617499603124458e-05, "loss": 1.9817, "step": 2140 }, { "epoch": 0.62, "learning_rate": 1.6167614735899828e-05, "loss": 2.0404, "step": 2142 }, { "epoch": 0.62, "learning_rate": 1.616022801321974e-05, "loss": 1.9597, "step": 2144 }, { "epoch": 0.62, "learning_rate": 1.6152835869704433e-05, "loss": 1.8986, "step": 2146 }, { "epoch": 0.62, "learning_rate": 1.61454383118588e-05, "loss": 2.0412, "step": 2148 }, { "epoch": 0.62, "learning_rate": 1.6138035346192485e-05, "loss": 2.0049, "step": 2150 }, { "epoch": 0.62, "learning_rate": 1.6130626979219905e-05, "loss": 2.051, "step": 2152 }, { "epoch": 0.62, "learning_rate": 1.6123213217460226e-05, "loss": 1.9611, "step": 2154 }, { "epoch": 0.62, "learning_rate": 1.611579406743735e-05, "loss": 1.9976, "step": 2156 }, { "epoch": 0.62, "learning_rate": 1.610836953567994e-05, "loss": 1.985, "step": 2158 }, { "epoch": 0.63, "learning_rate": 1.6100939628721376e-05, "loss": 1.9573, "step": 2160 }, { "epoch": 0.63, "learning_rate": 1.6093504353099784e-05, "loss": 2.019, "step": 2162 }, { "epoch": 0.63, "learning_rate": 1.6086063715358003e-05, "loss": 1.9946, "step": 2164 }, { "epoch": 0.63, "learning_rate": 1.6078617722043595e-05, "loss": 1.9879, "step": 2166 }, { "epoch": 0.63, "learning_rate": 1.6071166379708834e-05, "loss": 2.0171, "step": 2168 }, { "epoch": 0.63, "learning_rate": 1.6063709694910703e-05, "loss": 2.0753, "step": 2170 }, { "epoch": 0.63, "learning_rate": 1.605624767421088e-05, "loss": 1.9535, "step": 2172 }, { "epoch": 0.63, "learning_rate": 1.6048780324175748e-05, "loss": 1.9711, "step": 2174 }, { "epoch": 0.63, "learning_rate": 1.604130765137637e-05, "loss": 1.951, "step": 2176 }, { "epoch": 0.63, "learning_rate": 1.6033829662388498e-05, "loss": 1.9996, "step": 2178 }, { "epoch": 0.63, "learning_rate": 1.6026346363792565e-05, "loss": 2.0537, "step": 2180 }, { "epoch": 0.63, "learning_rate": 1.6018857762173672e-05, "loss": 2.0046, "step": 2182 }, { "epoch": 0.63, "learning_rate": 1.6011363864121582e-05, "loss": 2.0638, "step": 2184 }, { "epoch": 0.63, "learning_rate": 1.600386467623073e-05, "loss": 2.0263, "step": 2186 }, { "epoch": 0.63, "learning_rate": 1.5996360205100197e-05, "loss": 2.01, "step": 2188 }, { "epoch": 0.63, "learning_rate": 1.598885045733372e-05, "loss": 2.0062, "step": 2190 }, { "epoch": 0.63, "learning_rate": 1.5981335439539672e-05, "loss": 2.0011, "step": 2192 }, { "epoch": 0.64, "learning_rate": 1.5973815158331065e-05, "loss": 1.9568, "step": 2194 }, { "epoch": 0.64, "learning_rate": 1.5966289620325548e-05, "loss": 1.9392, "step": 2196 }, { "epoch": 0.64, "learning_rate": 1.595875883214539e-05, "loss": 1.9656, "step": 2198 }, { "epoch": 0.64, "learning_rate": 1.5951222800417483e-05, "loss": 2.0052, "step": 2200 }, { "epoch": 0.64, "learning_rate": 1.5943681531773335e-05, "loss": 1.9748, "step": 2202 }, { "epoch": 0.64, "learning_rate": 1.593613503284906e-05, "loss": 1.9759, "step": 2204 }, { "epoch": 0.64, "learning_rate": 1.5928583310285365e-05, "loss": 2.0389, "step": 2206 }, { "epoch": 0.64, "learning_rate": 1.5921026370727574e-05, "loss": 1.9823, "step": 2208 }, { "epoch": 0.64, "learning_rate": 1.5913464220825586e-05, "loss": 1.9362, "step": 2210 }, { "epoch": 0.64, "learning_rate": 1.5905896867233896e-05, "loss": 2.0005, "step": 2212 }, { "epoch": 0.64, "learning_rate": 1.5898324316611563e-05, "loss": 2.0005, "step": 2214 }, { "epoch": 0.64, "learning_rate": 1.589074657562223e-05, "loss": 1.993, "step": 2216 }, { "epoch": 0.64, "learning_rate": 1.588316365093411e-05, "loss": 1.9885, "step": 2218 }, { "epoch": 0.64, "learning_rate": 1.5875575549219974e-05, "loss": 2.0116, "step": 2220 }, { "epoch": 0.64, "learning_rate": 1.5867982277157138e-05, "loss": 1.9939, "step": 2222 }, { "epoch": 0.64, "learning_rate": 1.5860383841427488e-05, "loss": 1.969, "step": 2224 }, { "epoch": 0.64, "learning_rate": 1.5852780248717442e-05, "loss": 1.9508, "step": 2226 }, { "epoch": 0.65, "learning_rate": 1.5845171505717947e-05, "loss": 2.0137, "step": 2228 }, { "epoch": 0.65, "learning_rate": 1.5837557619124507e-05, "loss": 1.9938, "step": 2230 }, { "epoch": 0.65, "learning_rate": 1.582993859563713e-05, "loss": 2.021, "step": 2232 }, { "epoch": 0.65, "learning_rate": 1.582231444196035e-05, "loss": 1.9529, "step": 2234 }, { "epoch": 0.65, "learning_rate": 1.581468516480322e-05, "loss": 2.1067, "step": 2236 }, { "epoch": 0.65, "learning_rate": 1.5807050770879305e-05, "loss": 2.033, "step": 2238 }, { "epoch": 0.65, "learning_rate": 1.5799411266906658e-05, "loss": 2.0098, "step": 2240 }, { "epoch": 0.65, "learning_rate": 1.579176665960784e-05, "loss": 2.0789, "step": 2242 }, { "epoch": 0.65, "learning_rate": 1.5784116955709897e-05, "loss": 1.892, "step": 2244 }, { "epoch": 0.65, "learning_rate": 1.577646216194437e-05, "loss": 1.9689, "step": 2246 }, { "epoch": 0.65, "learning_rate": 1.5768802285047274e-05, "loss": 1.9861, "step": 2248 }, { "epoch": 0.65, "learning_rate": 1.5761137331759084e-05, "loss": 1.963, "step": 2250 }, { "epoch": 0.65, "learning_rate": 1.575346730882476e-05, "loss": 1.9178, "step": 2252 }, { "epoch": 0.65, "learning_rate": 1.5745792222993715e-05, "loss": 1.9682, "step": 2254 }, { "epoch": 0.65, "learning_rate": 1.5738112081019817e-05, "loss": 1.9967, "step": 2256 }, { "epoch": 0.65, "learning_rate": 1.5730426889661387e-05, "loss": 1.9653, "step": 2258 }, { "epoch": 0.65, "learning_rate": 1.5722736655681187e-05, "loss": 2.0259, "step": 2260 }, { "epoch": 0.66, "learning_rate": 1.571504138584641e-05, "loss": 1.9872, "step": 2262 }, { "epoch": 0.66, "learning_rate": 1.5707341086928698e-05, "loss": 1.9854, "step": 2264 }, { "epoch": 0.66, "learning_rate": 1.5699635765704097e-05, "loss": 1.9848, "step": 2266 }, { "epoch": 0.66, "learning_rate": 1.569192542895309e-05, "loss": 2.0996, "step": 2268 }, { "epoch": 0.66, "learning_rate": 1.5684210083460564e-05, "loss": 1.999, "step": 2270 }, { "epoch": 0.66, "learning_rate": 1.567648973601581e-05, "loss": 2.0225, "step": 2272 }, { "epoch": 0.66, "learning_rate": 1.5668764393412536e-05, "loss": 2.0313, "step": 2274 }, { "epoch": 0.66, "learning_rate": 1.5661034062448833e-05, "loss": 2.0073, "step": 2276 }, { "epoch": 0.66, "learning_rate": 1.565329874992718e-05, "loss": 2.0248, "step": 2278 }, { "epoch": 0.66, "learning_rate": 1.56494292277091e-05, "loss": 1.985, "step": 2280 }, { "epoch": 0.66, "learning_rate": 1.564168645561477e-05, "loss": 1.9871, "step": 2282 }, { "epoch": 0.66, "learning_rate": 1.563393871898787e-05, "loss": 1.9695, "step": 2284 }, { "epoch": 0.66, "learning_rate": 1.5626186024646197e-05, "loss": 2.056, "step": 2286 }, { "epoch": 0.66, "learning_rate": 1.5618428379411918e-05, "loss": 2.0037, "step": 2288 }, { "epoch": 0.66, "learning_rate": 1.5610665790111544e-05, "loss": 2.0559, "step": 2290 }, { "epoch": 0.66, "learning_rate": 1.5602898263575956e-05, "loss": 2.0366, "step": 2292 }, { "epoch": 0.66, "learning_rate": 1.559512580664036e-05, "loss": 1.9951, "step": 2294 }, { "epoch": 0.66, "learning_rate": 1.5587348426144312e-05, "loss": 1.9355, "step": 2296 }, { "epoch": 0.67, "learning_rate": 1.55795661289317e-05, "loss": 2.0368, "step": 2298 }, { "epoch": 0.67, "learning_rate": 1.5571778921850733e-05, "loss": 1.9478, "step": 2300 }, { "epoch": 0.67, "learning_rate": 1.5563986811753948e-05, "loss": 2.0824, "step": 2302 }, { "epoch": 0.67, "learning_rate": 1.555618980549819e-05, "loss": 1.9443, "step": 2304 }, { "epoch": 0.67, "learning_rate": 1.5548387909944615e-05, "loss": 1.9034, "step": 2306 }, { "epoch": 0.67, "learning_rate": 1.5540581131958683e-05, "loss": 2.0561, "step": 2308 }, { "epoch": 0.67, "learning_rate": 1.553276947841015e-05, "loss": 1.9994, "step": 2310 }, { "epoch": 0.67, "learning_rate": 1.5524952956173058e-05, "loss": 2.0153, "step": 2312 }, { "epoch": 0.67, "learning_rate": 1.5517131572125737e-05, "loss": 1.9493, "step": 2314 }, { "epoch": 0.67, "learning_rate": 1.55093053331508e-05, "loss": 1.9212, "step": 2316 }, { "epoch": 0.67, "learning_rate": 1.550147424613512e-05, "loss": 1.9566, "step": 2318 }, { "epoch": 0.67, "learning_rate": 1.5493638317969844e-05, "loss": 2.0209, "step": 2320 }, { "epoch": 0.67, "learning_rate": 1.5485797555550386e-05, "loss": 1.9329, "step": 2322 }, { "epoch": 0.67, "learning_rate": 1.5477951965776398e-05, "loss": 2.0064, "step": 2324 }, { "epoch": 0.67, "learning_rate": 1.5470101555551796e-05, "loss": 1.8943, "step": 2326 }, { "epoch": 0.67, "learning_rate": 1.546224633178472e-05, "loss": 1.9599, "step": 2328 }, { "epoch": 0.67, "learning_rate": 1.5454386301387568e-05, "loss": 1.9675, "step": 2330 }, { "epoch": 0.68, "learning_rate": 1.5446521471276947e-05, "loss": 1.9887, "step": 2332 }, { "epoch": 0.68, "learning_rate": 1.5438651848373696e-05, "loss": 1.983, "step": 2334 }, { "epoch": 0.68, "learning_rate": 1.5430777439602875e-05, "loss": 1.9789, "step": 2336 }, { "epoch": 0.68, "learning_rate": 1.5422898251893757e-05, "loss": 2.0016, "step": 2338 }, { "epoch": 0.68, "learning_rate": 1.5415014292179806e-05, "loss": 1.9674, "step": 2340 }, { "epoch": 0.68, "learning_rate": 1.54071255673987e-05, "loss": 2.0096, "step": 2342 }, { "epoch": 0.68, "learning_rate": 1.5399232084492303e-05, "loss": 1.945, "step": 2344 }, { "epoch": 0.68, "learning_rate": 1.5391333850406672e-05, "loss": 2.0074, "step": 2346 }, { "epoch": 0.68, "learning_rate": 1.538343087209204e-05, "loss": 1.8748, "step": 2348 }, { "epoch": 0.68, "learning_rate": 1.5375523156502807e-05, "loss": 1.9764, "step": 2350 }, { "epoch": 0.68, "learning_rate": 1.5367610710597558e-05, "loss": 2.0477, "step": 2352 }, { "epoch": 0.68, "learning_rate": 1.5359693541339037e-05, "loss": 2.0067, "step": 2354 }, { "epoch": 0.68, "learning_rate": 1.535177165569413e-05, "loss": 1.9618, "step": 2356 }, { "epoch": 0.68, "learning_rate": 1.534384506063389e-05, "loss": 2.0083, "step": 2358 }, { "epoch": 0.68, "learning_rate": 1.5335913763133513e-05, "loss": 2.0253, "step": 2360 }, { "epoch": 0.68, "learning_rate": 1.5327977770172314e-05, "loss": 1.9232, "step": 2362 }, { "epoch": 0.68, "learning_rate": 1.5320037088733766e-05, "loss": 1.9809, "step": 2364 }, { "epoch": 0.69, "learning_rate": 1.5312091725805447e-05, "loss": 1.9472, "step": 2366 }, { "epoch": 0.69, "learning_rate": 1.5304141688379072e-05, "loss": 1.9832, "step": 2368 }, { "epoch": 0.69, "learning_rate": 1.529618698345045e-05, "loss": 2.0126, "step": 2370 }, { "epoch": 0.69, "learning_rate": 1.5288227618019512e-05, "loss": 1.8995, "step": 2372 }, { "epoch": 0.69, "learning_rate": 1.5280263599090292e-05, "loss": 2.0261, "step": 2374 }, { "epoch": 0.69, "learning_rate": 1.5272294933670902e-05, "loss": 1.9829, "step": 2376 }, { "epoch": 0.69, "learning_rate": 1.526432162877356e-05, "loss": 1.9512, "step": 2378 }, { "epoch": 0.69, "learning_rate": 1.525634369141456e-05, "loss": 1.9417, "step": 2380 }, { "epoch": 0.69, "learning_rate": 1.5248361128614273e-05, "loss": 1.9576, "step": 2382 }, { "epoch": 0.69, "learning_rate": 1.5240373947397134e-05, "loss": 1.973, "step": 2384 }, { "epoch": 0.69, "learning_rate": 1.5232382154791657e-05, "loss": 1.9344, "step": 2386 }, { "epoch": 0.69, "learning_rate": 1.5224385757830402e-05, "loss": 1.9534, "step": 2388 }, { "epoch": 0.69, "learning_rate": 1.5216384763549978e-05, "loss": 1.98, "step": 2390 }, { "epoch": 0.69, "learning_rate": 1.5208379178991051e-05, "loss": 1.9199, "step": 2392 }, { "epoch": 0.69, "learning_rate": 1.5200369011198319e-05, "loss": 1.9857, "step": 2394 }, { "epoch": 0.69, "learning_rate": 1.5192354267220512e-05, "loss": 1.9762, "step": 2396 }, { "epoch": 0.69, "learning_rate": 1.5184334954110393e-05, "loss": 1.9554, "step": 2398 }, { "epoch": 0.7, "learning_rate": 1.5176311078924739e-05, "loss": 1.9181, "step": 2400 }, { "epoch": 0.7, "learning_rate": 1.5168282648724351e-05, "loss": 1.9768, "step": 2402 }, { "epoch": 0.7, "learning_rate": 1.5160249670574026e-05, "loss": 1.9878, "step": 2404 }, { "epoch": 0.7, "learning_rate": 1.5152212151542566e-05, "loss": 1.9731, "step": 2406 }, { "epoch": 0.7, "learning_rate": 1.514417009870278e-05, "loss": 1.9857, "step": 2408 }, { "epoch": 0.7, "learning_rate": 1.5136123519131455e-05, "loss": 1.971, "step": 2410 }, { "epoch": 0.7, "learning_rate": 1.5128072419909367e-05, "loss": 2.064, "step": 2412 }, { "epoch": 0.7, "learning_rate": 1.5120016808121261e-05, "loss": 2.0137, "step": 2414 }, { "epoch": 0.7, "learning_rate": 1.5111956690855864e-05, "loss": 1.9766, "step": 2416 }, { "epoch": 0.7, "learning_rate": 1.5103892075205861e-05, "loss": 1.9576, "step": 2418 }, { "epoch": 0.7, "learning_rate": 1.5095822968267898e-05, "loss": 1.9388, "step": 2420 }, { "epoch": 0.7, "learning_rate": 1.5087749377142574e-05, "loss": 1.9452, "step": 2422 }, { "epoch": 0.7, "learning_rate": 1.507967130893443e-05, "loss": 1.9136, "step": 2424 }, { "epoch": 0.7, "learning_rate": 1.5071588770751949e-05, "loss": 1.8982, "step": 2426 }, { "epoch": 0.7, "learning_rate": 1.5063501769707548e-05, "loss": 2.0043, "step": 2428 }, { "epoch": 0.7, "learning_rate": 1.5055410312917573e-05, "loss": 1.9842, "step": 2430 }, { "epoch": 0.7, "learning_rate": 1.5047314407502285e-05, "loss": 1.9731, "step": 2432 }, { "epoch": 0.7, "learning_rate": 1.5039214060585864e-05, "loss": 1.9728, "step": 2434 }, { "epoch": 0.71, "learning_rate": 1.5031109279296403e-05, "loss": 1.9271, "step": 2436 }, { "epoch": 0.71, "learning_rate": 1.5023000070765886e-05, "loss": 1.9387, "step": 2438 }, { "epoch": 0.71, "learning_rate": 1.5014886442130196e-05, "loss": 1.9593, "step": 2440 }, { "epoch": 0.71, "learning_rate": 1.5006768400529117e-05, "loss": 1.9508, "step": 2442 }, { "epoch": 0.71, "learning_rate": 1.4998645953106303e-05, "loss": 1.9881, "step": 2444 }, { "epoch": 0.71, "learning_rate": 1.4990519107009291e-05, "loss": 1.9978, "step": 2446 }, { "epoch": 0.71, "learning_rate": 1.4982387869389486e-05, "loss": 2.0741, "step": 2448 }, { "epoch": 0.71, "learning_rate": 1.4974252247402156e-05, "loss": 1.9395, "step": 2450 }, { "epoch": 0.71, "learning_rate": 1.4966112248206437e-05, "loss": 1.884, "step": 2452 }, { "epoch": 0.71, "learning_rate": 1.4957967878965303e-05, "loss": 2.0, "step": 2454 }, { "epoch": 0.71, "learning_rate": 1.4949819146845581e-05, "loss": 1.98, "step": 2456 }, { "epoch": 0.71, "learning_rate": 1.494166605901794e-05, "loss": 1.9504, "step": 2458 }, { "epoch": 0.71, "learning_rate": 1.4933508622656878e-05, "loss": 2.0104, "step": 2460 }, { "epoch": 0.71, "learning_rate": 1.4925346844940718e-05, "loss": 1.8272, "step": 2462 }, { "epoch": 0.71, "learning_rate": 1.4917180733051603e-05, "loss": 1.9523, "step": 2464 }, { "epoch": 0.71, "learning_rate": 1.49090102941755e-05, "loss": 2.0104, "step": 2466 }, { "epoch": 0.71, "learning_rate": 1.4900835535502165e-05, "loss": 1.9371, "step": 2468 }, { "epoch": 0.72, "learning_rate": 1.4892656464225175e-05, "loss": 1.981, "step": 2470 }, { "epoch": 0.72, "learning_rate": 1.488447308754189e-05, "loss": 1.951, "step": 2472 }, { "epoch": 0.72, "learning_rate": 1.4876285412653456e-05, "loss": 1.9473, "step": 2474 }, { "epoch": 0.72, "learning_rate": 1.486809344676482e-05, "loss": 2.0068, "step": 2476 }, { "epoch": 0.72, "learning_rate": 1.485989719708468e-05, "loss": 1.9283, "step": 2478 }, { "epoch": 0.72, "learning_rate": 1.4851696670825525e-05, "loss": 2.0242, "step": 2480 }, { "epoch": 0.72, "learning_rate": 1.4843491875203586e-05, "loss": 2.0488, "step": 2482 }, { "epoch": 0.72, "learning_rate": 1.4835282817438874e-05, "loss": 2.0139, "step": 2484 }, { "epoch": 0.72, "learning_rate": 1.4827069504755133e-05, "loss": 1.9135, "step": 2486 }, { "epoch": 0.72, "learning_rate": 1.4818851944379861e-05, "loss": 1.9589, "step": 2488 }, { "epoch": 0.72, "learning_rate": 1.481063014354429e-05, "loss": 1.9311, "step": 2490 }, { "epoch": 0.72, "learning_rate": 1.4802404109483384e-05, "loss": 2.006, "step": 2492 }, { "epoch": 0.72, "learning_rate": 1.4794173849435826e-05, "loss": 1.9919, "step": 2494 }, { "epoch": 0.72, "learning_rate": 1.4785939370644035e-05, "loss": 1.9392, "step": 2496 }, { "epoch": 0.72, "learning_rate": 1.4777700680354125e-05, "loss": 2.012, "step": 2498 }, { "epoch": 0.72, "learning_rate": 1.476945778581592e-05, "loss": 1.9457, "step": 2500 }, { "epoch": 0.72, "learning_rate": 1.4761210694282948e-05, "loss": 1.8783, "step": 2502 }, { "epoch": 0.73, "learning_rate": 1.4752959413012429e-05, "loss": 1.8724, "step": 2504 }, { "epoch": 0.73, "learning_rate": 1.4744703949265268e-05, "loss": 2.0303, "step": 2506 }, { "epoch": 0.73, "learning_rate": 1.4736444310306053e-05, "loss": 1.957, "step": 2508 }, { "epoch": 0.73, "learning_rate": 1.4728180503403042e-05, "loss": 1.9355, "step": 2510 }, { "epoch": 0.73, "learning_rate": 1.4719912535828164e-05, "loss": 1.9528, "step": 2512 }, { "epoch": 0.73, "learning_rate": 1.471164041485701e-05, "loss": 1.9509, "step": 2514 }, { "epoch": 0.73, "learning_rate": 1.4703364147768822e-05, "loss": 1.9254, "step": 2516 }, { "epoch": 0.73, "learning_rate": 1.4695083741846491e-05, "loss": 1.9255, "step": 2518 }, { "epoch": 0.73, "learning_rate": 1.468679920437656e-05, "loss": 1.9073, "step": 2520 }, { "epoch": 0.73, "learning_rate": 1.4678510542649189e-05, "loss": 2.0394, "step": 2522 }, { "epoch": 0.73, "learning_rate": 1.4670217763958187e-05, "loss": 1.9334, "step": 2524 }, { "epoch": 0.73, "learning_rate": 1.4661920875600972e-05, "loss": 1.9317, "step": 2526 }, { "epoch": 0.73, "learning_rate": 1.4653619884878581e-05, "loss": 1.9352, "step": 2528 }, { "epoch": 0.73, "learning_rate": 1.4645314799095667e-05, "loss": 2.0355, "step": 2530 }, { "epoch": 0.73, "learning_rate": 1.4637005625560478e-05, "loss": 1.9861, "step": 2532 }, { "epoch": 0.73, "learning_rate": 1.4628692371584871e-05, "loss": 2.0131, "step": 2534 }, { "epoch": 0.73, "learning_rate": 1.4620375044484276e-05, "loss": 1.9616, "step": 2536 }, { "epoch": 0.74, "learning_rate": 1.4612053651577726e-05, "loss": 1.9854, "step": 2538 }, { "epoch": 0.74, "learning_rate": 1.4603728200187823e-05, "loss": 1.9612, "step": 2540 }, { "epoch": 0.74, "learning_rate": 1.4595398697640732e-05, "loss": 1.9678, "step": 2542 }, { "epoch": 0.74, "learning_rate": 1.4587065151266202e-05, "loss": 1.9023, "step": 2544 }, { "epoch": 0.74, "learning_rate": 1.4578727568397524e-05, "loss": 1.8887, "step": 2546 }, { "epoch": 0.74, "learning_rate": 1.4570385956371549e-05, "loss": 1.9515, "step": 2548 }, { "epoch": 0.74, "learning_rate": 1.456204032252867e-05, "loss": 1.9971, "step": 2550 }, { "epoch": 0.74, "learning_rate": 1.4553690674212822e-05, "loss": 1.8915, "step": 2552 }, { "epoch": 0.74, "learning_rate": 1.4545337018771472e-05, "loss": 1.9881, "step": 2554 }, { "epoch": 0.74, "learning_rate": 1.453697936355561e-05, "loss": 1.9338, "step": 2556 }, { "epoch": 0.74, "learning_rate": 1.452861771591975e-05, "loss": 2.0571, "step": 2558 }, { "epoch": 0.74, "learning_rate": 1.4520252083221918e-05, "loss": 1.864, "step": 2560 }, { "epoch": 0.74, "learning_rate": 1.4511882472823644e-05, "loss": 1.9669, "step": 2562 }, { "epoch": 0.74, "learning_rate": 1.4503508892089961e-05, "loss": 1.9696, "step": 2564 }, { "epoch": 0.74, "learning_rate": 1.4495131348389396e-05, "loss": 1.9083, "step": 2566 }, { "epoch": 0.74, "learning_rate": 1.4486749849093964e-05, "loss": 1.8773, "step": 2568 }, { "epoch": 0.74, "learning_rate": 1.4478364401579155e-05, "loss": 1.8741, "step": 2570 }, { "epoch": 0.74, "learning_rate": 1.446997501322394e-05, "loss": 1.9686, "step": 2572 }, { "epoch": 0.75, "learning_rate": 1.4465778843538058e-05, "loss": 2.0001, "step": 2574 }, { "epoch": 0.75, "learning_rate": 1.4457383557765385e-05, "loss": 1.9925, "step": 2576 }, { "epoch": 0.75, "learning_rate": 1.4448984349614885e-05, "loss": 2.0139, "step": 2578 }, { "epoch": 0.75, "learning_rate": 1.4440581226477635e-05, "loss": 1.976, "step": 2580 }, { "epoch": 0.75, "learning_rate": 1.4432174195748162e-05, "loss": 1.9307, "step": 2582 }, { "epoch": 0.75, "learning_rate": 1.4423763264824417e-05, "loss": 2.0125, "step": 2584 }, { "epoch": 0.75, "learning_rate": 1.4415348441107808e-05, "loss": 1.9153, "step": 2586 }, { "epoch": 0.75, "learning_rate": 1.440692973200314e-05, "loss": 1.9348, "step": 2588 }, { "epoch": 0.75, "learning_rate": 1.4398507144918657e-05, "loss": 1.9984, "step": 2590 }, { "epoch": 0.75, "learning_rate": 1.4390080687266013e-05, "loss": 2.0108, "step": 2592 }, { "epoch": 0.75, "learning_rate": 1.438165036646026e-05, "loss": 1.9391, "step": 2594 }, { "epoch": 0.75, "learning_rate": 1.4373216189919852e-05, "loss": 1.8911, "step": 2596 }, { "epoch": 0.75, "learning_rate": 1.4364778165066642e-05, "loss": 1.9758, "step": 2598 }, { "epoch": 0.75, "learning_rate": 1.4356336299325863e-05, "loss": 1.967, "step": 2600 }, { "epoch": 0.75, "learning_rate": 1.4347890600126136e-05, "loss": 1.8823, "step": 2602 }, { "epoch": 0.75, "learning_rate": 1.4339441074899438e-05, "loss": 1.9737, "step": 2604 }, { "epoch": 0.75, "learning_rate": 1.4330987731081132e-05, "loss": 2.0223, "step": 2606 }, { "epoch": 0.76, "learning_rate": 1.4322530576109933e-05, "loss": 1.9752, "step": 2608 }, { "epoch": 0.76, "learning_rate": 1.4314069617427908e-05, "loss": 2.0465, "step": 2610 }, { "epoch": 0.76, "learning_rate": 1.4305604862480479e-05, "loss": 1.9971, "step": 2612 }, { "epoch": 0.76, "learning_rate": 1.4297136318716396e-05, "loss": 2.0588, "step": 2614 }, { "epoch": 0.76, "learning_rate": 1.4288663993587753e-05, "loss": 2.0471, "step": 2616 }, { "epoch": 0.76, "learning_rate": 1.4280187894549968e-05, "loss": 1.9108, "step": 2618 }, { "epoch": 0.76, "learning_rate": 1.4271708029061785e-05, "loss": 1.8804, "step": 2620 }, { "epoch": 0.76, "learning_rate": 1.426322440458525e-05, "loss": 1.9669, "step": 2622 }, { "epoch": 0.76, "learning_rate": 1.425473702858573e-05, "loss": 1.9841, "step": 2624 }, { "epoch": 0.76, "learning_rate": 1.4246245908531883e-05, "loss": 1.9594, "step": 2626 }, { "epoch": 0.76, "learning_rate": 1.4237751051895676e-05, "loss": 2.0162, "step": 2628 }, { "epoch": 0.76, "learning_rate": 1.4229252466152346e-05, "loss": 1.9501, "step": 2630 }, { "epoch": 0.76, "learning_rate": 1.4220750158780425e-05, "loss": 1.94, "step": 2632 }, { "epoch": 0.76, "learning_rate": 1.421224413726171e-05, "loss": 1.9805, "step": 2634 }, { "epoch": 0.76, "learning_rate": 1.420373440908128e-05, "loss": 2.0047, "step": 2636 }, { "epoch": 0.76, "learning_rate": 1.419522098172746e-05, "loss": 2.0186, "step": 2638 }, { "epoch": 0.76, "learning_rate": 1.418670386269184e-05, "loss": 1.9707, "step": 2640 }, { "epoch": 0.77, "learning_rate": 1.4178183059469253e-05, "loss": 1.8998, "step": 2642 }, { "epoch": 0.77, "learning_rate": 1.4169658579557782e-05, "loss": 1.9192, "step": 2644 }, { "epoch": 0.77, "learning_rate": 1.4161130430458738e-05, "loss": 1.9512, "step": 2646 }, { "epoch": 0.77, "learning_rate": 1.415259861967666e-05, "loss": 1.9432, "step": 2648 }, { "epoch": 0.77, "learning_rate": 1.414406315471932e-05, "loss": 1.9215, "step": 2650 }, { "epoch": 0.77, "learning_rate": 1.4135524043097693e-05, "loss": 2.0081, "step": 2652 }, { "epoch": 0.77, "learning_rate": 1.4126981292325964e-05, "loss": 1.992, "step": 2654 }, { "epoch": 0.77, "learning_rate": 1.4118434909921535e-05, "loss": 1.9443, "step": 2656 }, { "epoch": 0.77, "learning_rate": 1.4109884903404982e-05, "loss": 1.8853, "step": 2658 }, { "epoch": 0.77, "learning_rate": 1.410133128030009e-05, "loss": 1.9692, "step": 2660 }, { "epoch": 0.77, "learning_rate": 1.4092774048133808e-05, "loss": 2.0239, "step": 2662 }, { "epoch": 0.77, "learning_rate": 1.4084213214436277e-05, "loss": 1.959, "step": 2664 }, { "epoch": 0.77, "learning_rate": 1.4075648786740806e-05, "loss": 2.1128, "step": 2666 }, { "epoch": 0.77, "learning_rate": 1.4067080772583848e-05, "loss": 1.9296, "step": 2668 }, { "epoch": 0.77, "learning_rate": 1.4058509179505035e-05, "loss": 1.9919, "step": 2670 }, { "epoch": 0.77, "learning_rate": 1.4049934015047138e-05, "loss": 1.9158, "step": 2672 }, { "epoch": 0.77, "learning_rate": 1.4041355286756066e-05, "loss": 1.9202, "step": 2674 }, { "epoch": 0.77, "learning_rate": 1.4032773002180874e-05, "loss": 1.8936, "step": 2676 }, { "epoch": 0.78, "learning_rate": 1.4024187168873741e-05, "loss": 1.9233, "step": 2678 }, { "epoch": 0.78, "learning_rate": 1.4015597794389972e-05, "loss": 1.9499, "step": 2680 }, { "epoch": 0.78, "learning_rate": 1.4007004886287982e-05, "loss": 2.0093, "step": 2682 }, { "epoch": 0.78, "learning_rate": 1.3998408452129302e-05, "loss": 2.0139, "step": 2684 }, { "epoch": 0.78, "learning_rate": 1.398980849947856e-05, "loss": 1.9925, "step": 2686 }, { "epoch": 0.78, "learning_rate": 1.3981205035903487e-05, "loss": 1.9188, "step": 2688 }, { "epoch": 0.78, "learning_rate": 1.3972598068974901e-05, "loss": 1.9061, "step": 2690 }, { "epoch": 0.78, "learning_rate": 1.3963987606266693e-05, "loss": 1.9636, "step": 2692 }, { "epoch": 0.78, "learning_rate": 1.3955373655355852e-05, "loss": 1.9085, "step": 2694 }, { "epoch": 0.78, "learning_rate": 1.3946756223822413e-05, "loss": 1.8942, "step": 2696 }, { "epoch": 0.78, "learning_rate": 1.3938135319249488e-05, "loss": 1.8769, "step": 2698 }, { "epoch": 0.78, "learning_rate": 1.3929510949223244e-05, "loss": 1.9469, "step": 2700 }, { "epoch": 0.78, "learning_rate": 1.3920883121332888e-05, "loss": 2.0262, "step": 2702 }, { "epoch": 0.78, "learning_rate": 1.3912251843170687e-05, "loss": 1.8104, "step": 2704 }, { "epoch": 0.78, "learning_rate": 1.3903617122331922e-05, "loss": 1.8825, "step": 2706 }, { "epoch": 0.78, "learning_rate": 1.3894978966414928e-05, "loss": 2.001, "step": 2708 }, { "epoch": 0.78, "learning_rate": 1.388633738302104e-05, "loss": 1.887, "step": 2710 }, { "epoch": 0.79, "learning_rate": 1.3877692379754626e-05, "loss": 1.947, "step": 2712 }, { "epoch": 0.79, "learning_rate": 1.3869043964223052e-05, "loss": 1.9008, "step": 2714 }, { "epoch": 0.79, "learning_rate": 1.3860392144036694e-05, "loss": 1.973, "step": 2716 }, { "epoch": 0.79, "learning_rate": 1.385173692680892e-05, "loss": 1.9786, "step": 2718 }, { "epoch": 0.79, "learning_rate": 1.3843078320156088e-05, "loss": 2.0037, "step": 2720 }, { "epoch": 0.79, "learning_rate": 1.3834416331697544e-05, "loss": 1.9352, "step": 2722 }, { "epoch": 0.79, "learning_rate": 1.3825750969055604e-05, "loss": 1.9707, "step": 2724 }, { "epoch": 0.79, "learning_rate": 1.381708223985555e-05, "loss": 2.0125, "step": 2726 }, { "epoch": 0.79, "learning_rate": 1.3808410151725633e-05, "loss": 1.8255, "step": 2728 }, { "epoch": 0.79, "learning_rate": 1.3799734712297059e-05, "loss": 2.0043, "step": 2730 }, { "epoch": 0.79, "learning_rate": 1.379105592920398e-05, "loss": 1.9295, "step": 2732 }, { "epoch": 0.79, "learning_rate": 1.3782373810083493e-05, "loss": 1.9526, "step": 2734 }, { "epoch": 0.79, "learning_rate": 1.377368836257563e-05, "loss": 1.8964, "step": 2736 }, { "epoch": 0.79, "learning_rate": 1.3764999594323354e-05, "loss": 1.9299, "step": 2738 }, { "epoch": 0.79, "learning_rate": 1.375630751297254e-05, "loss": 1.8939, "step": 2740 }, { "epoch": 0.79, "learning_rate": 1.3747612126171991e-05, "loss": 1.8837, "step": 2742 }, { "epoch": 0.79, "learning_rate": 1.3738913441573417e-05, "loss": 1.9059, "step": 2744 }, { "epoch": 0.8, "learning_rate": 1.3730211466831422e-05, "loss": 1.9827, "step": 2746 }, { "epoch": 0.8, "learning_rate": 1.3721506209603512e-05, "loss": 1.948, "step": 2748 }, { "epoch": 0.8, "learning_rate": 1.3712797677550081e-05, "loss": 2.0373, "step": 2750 }, { "epoch": 0.8, "learning_rate": 1.3704085878334401e-05, "loss": 2.0243, "step": 2752 }, { "epoch": 0.8, "learning_rate": 1.3695370819622621e-05, "loss": 1.966, "step": 2754 }, { "epoch": 0.8, "learning_rate": 1.368665250908376e-05, "loss": 1.8699, "step": 2756 }, { "epoch": 0.8, "learning_rate": 1.3677930954389698e-05, "loss": 2.0359, "step": 2758 }, { "epoch": 0.8, "learning_rate": 1.3669206163215166e-05, "loss": 1.9898, "step": 2760 }, { "epoch": 0.8, "learning_rate": 1.3660478143237748e-05, "loss": 1.9059, "step": 2762 }, { "epoch": 0.8, "learning_rate": 1.3651746902137868e-05, "loss": 1.9645, "step": 2764 }, { "epoch": 0.8, "learning_rate": 1.364301244759878e-05, "loss": 2.0743, "step": 2766 }, { "epoch": 0.8, "learning_rate": 1.3634274787306577e-05, "loss": 1.9403, "step": 2768 }, { "epoch": 0.8, "learning_rate": 1.3625533928950155e-05, "loss": 1.9435, "step": 2770 }, { "epoch": 0.8, "learning_rate": 1.3616789880221241e-05, "loss": 1.9299, "step": 2772 }, { "epoch": 0.8, "learning_rate": 1.3608042648814363e-05, "loss": 1.9786, "step": 2774 }, { "epoch": 0.8, "learning_rate": 1.3599292242426852e-05, "loss": 1.8888, "step": 2776 }, { "epoch": 0.8, "learning_rate": 1.3590538668758821e-05, "loss": 1.9119, "step": 2778 }, { "epoch": 0.81, "learning_rate": 1.358178193551319e-05, "loss": 1.933, "step": 2780 }, { "epoch": 0.81, "learning_rate": 1.3573022050395644e-05, "loss": 1.9602, "step": 2782 }, { "epoch": 0.81, "learning_rate": 1.3564259021114652e-05, "loss": 1.9295, "step": 2784 }, { "epoch": 0.81, "learning_rate": 1.3555492855381433e-05, "loss": 2.027, "step": 2786 }, { "epoch": 0.81, "learning_rate": 1.3546723560909991e-05, "loss": 1.9036, "step": 2788 }, { "epoch": 0.81, "learning_rate": 1.353795114541706e-05, "loss": 1.9909, "step": 2790 }, { "epoch": 0.81, "learning_rate": 1.3529175616622134e-05, "loss": 1.9025, "step": 2792 }, { "epoch": 0.81, "learning_rate": 1.3520396982247443e-05, "loss": 1.9257, "step": 2794 }, { "epoch": 0.81, "learning_rate": 1.3511615250017948e-05, "loss": 1.9279, "step": 2796 }, { "epoch": 0.81, "learning_rate": 1.350283042766134e-05, "loss": 1.9595, "step": 2798 }, { "epoch": 0.81, "learning_rate": 1.3494042522908022e-05, "loss": 1.8935, "step": 2800 }, { "epoch": 0.81, "learning_rate": 1.348525154349112e-05, "loss": 1.8789, "step": 2802 }, { "epoch": 0.81, "learning_rate": 1.3476457497146455e-05, "loss": 1.9998, "step": 2804 }, { "epoch": 0.81, "learning_rate": 1.3467660391612553e-05, "loss": 1.9369, "step": 2806 }, { "epoch": 0.81, "learning_rate": 1.3458860234630633e-05, "loss": 2.0087, "step": 2808 }, { "epoch": 0.81, "learning_rate": 1.3450057033944592e-05, "loss": 2.0431, "step": 2810 }, { "epoch": 0.81, "learning_rate": 1.3441250797301018e-05, "loss": 1.8894, "step": 2812 }, { "epoch": 0.81, "learning_rate": 1.3432441532449152e-05, "loss": 1.9354, "step": 2814 }, { "epoch": 0.82, "learning_rate": 1.342362924714092e-05, "loss": 1.9628, "step": 2816 }, { "epoch": 0.82, "learning_rate": 1.3414813949130893e-05, "loss": 1.9563, "step": 2818 }, { "epoch": 0.82, "learning_rate": 1.3405995646176294e-05, "loss": 1.9604, "step": 2820 }, { "epoch": 0.82, "learning_rate": 1.3397174346036996e-05, "loss": 1.9456, "step": 2822 }, { "epoch": 0.82, "learning_rate": 1.3388350056475505e-05, "loss": 1.9331, "step": 2824 }, { "epoch": 0.82, "learning_rate": 1.337952278525696e-05, "loss": 1.8984, "step": 2826 }, { "epoch": 0.82, "learning_rate": 1.3370692540149121e-05, "loss": 1.937, "step": 2828 }, { "epoch": 0.82, "learning_rate": 1.3361859328922368e-05, "loss": 1.9106, "step": 2830 }, { "epoch": 0.82, "learning_rate": 1.3353023159349691e-05, "loss": 2.0187, "step": 2832 }, { "epoch": 0.82, "learning_rate": 1.3344184039206678e-05, "loss": 1.933, "step": 2834 }, { "epoch": 0.82, "learning_rate": 1.3335341976271518e-05, "loss": 1.8913, "step": 2836 }, { "epoch": 0.82, "learning_rate": 1.332649697832499e-05, "loss": 1.9631, "step": 2838 }, { "epoch": 0.82, "learning_rate": 1.3317649053150457e-05, "loss": 2.0034, "step": 2840 }, { "epoch": 0.82, "learning_rate": 1.3308798208533853e-05, "loss": 1.8998, "step": 2842 }, { "epoch": 0.82, "learning_rate": 1.3299944452263682e-05, "loss": 1.908, "step": 2844 }, { "epoch": 0.82, "learning_rate": 1.3291087792131016e-05, "loss": 1.9703, "step": 2846 }, { "epoch": 0.82, "learning_rate": 1.3282228235929475e-05, "loss": 1.8216, "step": 2848 }, { "epoch": 0.83, "learning_rate": 1.3273365791455231e-05, "loss": 1.9709, "step": 2850 }, { "epoch": 0.83, "learning_rate": 1.3264500466506997e-05, "loss": 1.915, "step": 2852 }, { "epoch": 0.83, "learning_rate": 1.3255632268886023e-05, "loss": 1.9588, "step": 2854 }, { "epoch": 0.83, "learning_rate": 1.3246761206396081e-05, "loss": 1.8578, "step": 2856 }, { "epoch": 0.83, "learning_rate": 1.3237887286843471e-05, "loss": 1.872, "step": 2858 }, { "epoch": 0.83, "learning_rate": 1.3229010518037003e-05, "loss": 1.9372, "step": 2860 }, { "epoch": 0.83, "learning_rate": 1.3220130907787994e-05, "loss": 1.8965, "step": 2862 }, { "epoch": 0.83, "learning_rate": 1.3211248463910263e-05, "loss": 1.9854, "step": 2864 }, { "epoch": 0.83, "learning_rate": 1.3202363194220124e-05, "loss": 1.8744, "step": 2866 }, { "epoch": 0.83, "learning_rate": 1.3193475106536374e-05, "loss": 1.8864, "step": 2868 }, { "epoch": 0.83, "learning_rate": 1.318458420868029e-05, "loss": 1.9733, "step": 2870 }, { "epoch": 0.83, "learning_rate": 1.3175690508475627e-05, "loss": 1.9783, "step": 2872 }, { "epoch": 0.83, "learning_rate": 1.3166794013748598e-05, "loss": 1.9466, "step": 2874 }, { "epoch": 0.83, "learning_rate": 1.3157894732327885e-05, "loss": 1.9431, "step": 2876 }, { "epoch": 0.83, "learning_rate": 1.314899267204461e-05, "loss": 1.8476, "step": 2878 }, { "epoch": 0.83, "learning_rate": 1.3140087840732355e-05, "loss": 1.9083, "step": 2880 }, { "epoch": 0.83, "learning_rate": 1.3131180246227124e-05, "loss": 1.9107, "step": 2882 }, { "epoch": 0.84, "learning_rate": 1.3122269896367366e-05, "loss": 1.8815, "step": 2884 }, { "epoch": 0.84, "learning_rate": 1.3113356798993948e-05, "loss": 1.9069, "step": 2886 }, { "epoch": 0.84, "learning_rate": 1.3104440961950155e-05, "loss": 1.8625, "step": 2888 }, { "epoch": 0.84, "learning_rate": 1.3095522393081693e-05, "loss": 1.9843, "step": 2890 }, { "epoch": 0.84, "learning_rate": 1.3086601100236646e-05, "loss": 1.9848, "step": 2892 }, { "epoch": 0.84, "learning_rate": 1.3077677091265529e-05, "loss": 1.9664, "step": 2894 }, { "epoch": 0.84, "learning_rate": 1.3068750374021222e-05, "loss": 1.9192, "step": 2896 }, { "epoch": 0.84, "learning_rate": 1.3059820956358998e-05, "loss": 1.9552, "step": 2898 }, { "epoch": 0.84, "learning_rate": 1.3050888846136503e-05, "loss": 1.9939, "step": 2900 }, { "epoch": 0.84, "learning_rate": 1.3041954051213757e-05, "loss": 1.9717, "step": 2902 }, { "epoch": 0.84, "learning_rate": 1.3033016579453138e-05, "loss": 1.982, "step": 2904 }, { "epoch": 0.84, "learning_rate": 1.3024076438719382e-05, "loss": 1.9164, "step": 2906 }, { "epoch": 0.84, "learning_rate": 1.3015133636879567e-05, "loss": 1.9404, "step": 2908 }, { "epoch": 0.84, "learning_rate": 1.300618818180313e-05, "loss": 1.9509, "step": 2910 }, { "epoch": 0.84, "learning_rate": 1.2997240081361824e-05, "loss": 1.8706, "step": 2912 }, { "epoch": 0.84, "learning_rate": 1.2988289343429734e-05, "loss": 1.8876, "step": 2914 }, { "epoch": 0.84, "learning_rate": 1.2979335975883276e-05, "loss": 1.9373, "step": 2916 }, { "epoch": 0.85, "learning_rate": 1.297037998660117e-05, "loss": 1.948, "step": 2918 }, { "epoch": 0.85, "learning_rate": 1.2961421383464445e-05, "loss": 1.9704, "step": 2920 }, { "epoch": 0.85, "learning_rate": 1.2952460174356432e-05, "loss": 1.9516, "step": 2922 }, { "epoch": 0.85, "learning_rate": 1.2943496367162756e-05, "loss": 1.9013, "step": 2924 }, { "epoch": 0.85, "learning_rate": 1.2934529969771324e-05, "loss": 1.9702, "step": 2926 }, { "epoch": 0.85, "learning_rate": 1.2925560990072324e-05, "loss": 1.8756, "step": 2928 }, { "epoch": 0.85, "learning_rate": 1.2916589435958223e-05, "loss": 1.8191, "step": 2930 }, { "epoch": 0.85, "learning_rate": 1.290761531532374e-05, "loss": 1.923, "step": 2932 }, { "epoch": 0.85, "learning_rate": 1.2898638636065867e-05, "loss": 1.9531, "step": 2934 }, { "epoch": 0.85, "learning_rate": 1.2889659406083837e-05, "loss": 1.9431, "step": 2936 }, { "epoch": 0.85, "learning_rate": 1.2880677633279134e-05, "loss": 1.9007, "step": 2938 }, { "epoch": 0.85, "learning_rate": 1.2871693325555472e-05, "loss": 2.0625, "step": 2940 }, { "epoch": 0.85, "learning_rate": 1.2862706490818804e-05, "loss": 1.9437, "step": 2942 }, { "epoch": 0.85, "learning_rate": 1.2853717136977305e-05, "loss": 1.8938, "step": 2944 }, { "epoch": 0.85, "learning_rate": 1.284472527194136e-05, "loss": 1.9051, "step": 2946 }, { "epoch": 0.85, "learning_rate": 1.2835730903623571e-05, "loss": 1.8243, "step": 2948 }, { "epoch": 0.85, "learning_rate": 1.2826734039938742e-05, "loss": 1.904, "step": 2950 }, { "epoch": 0.85, "learning_rate": 1.281773468880387e-05, "loss": 1.9508, "step": 2952 }, { "epoch": 0.86, "learning_rate": 1.2808732858138146e-05, "loss": 2.0167, "step": 2954 }, { "epoch": 0.86, "learning_rate": 1.2799728555862934e-05, "loss": 1.9257, "step": 2956 }, { "epoch": 0.86, "learning_rate": 1.2790721789901776e-05, "loss": 1.9615, "step": 2958 }, { "epoch": 0.86, "learning_rate": 1.2781712568180393e-05, "loss": 1.9573, "step": 2960 }, { "epoch": 0.86, "learning_rate": 1.2772700898626651e-05, "loss": 1.9788, "step": 2962 }, { "epoch": 0.86, "learning_rate": 1.276368678917058e-05, "loss": 1.936, "step": 2964 }, { "epoch": 0.86, "learning_rate": 1.2754670247744353e-05, "loss": 1.9734, "step": 2966 }, { "epoch": 0.86, "learning_rate": 1.274565128228229e-05, "loss": 1.9328, "step": 2968 }, { "epoch": 0.86, "learning_rate": 1.2736629900720832e-05, "loss": 1.9136, "step": 2970 }, { "epoch": 0.86, "learning_rate": 1.272760611099855e-05, "loss": 1.9844, "step": 2972 }, { "epoch": 0.86, "learning_rate": 1.2718579921056144e-05, "loss": 1.8504, "step": 2974 }, { "epoch": 0.86, "learning_rate": 1.2709551338836415e-05, "loss": 1.944, "step": 2976 }, { "epoch": 0.86, "learning_rate": 1.2700520372284273e-05, "loss": 1.9139, "step": 2978 }, { "epoch": 0.86, "learning_rate": 1.2691487029346727e-05, "loss": 1.9631, "step": 2980 }, { "epoch": 0.86, "learning_rate": 1.2682451317972875e-05, "loss": 1.9897, "step": 2982 }, { "epoch": 0.86, "learning_rate": 1.2673413246113904e-05, "loss": 1.9006, "step": 2984 }, { "epoch": 0.86, "learning_rate": 1.2664372821723066e-05, "loss": 1.8943, "step": 2986 }, { "epoch": 0.87, "learning_rate": 1.2655330052755702e-05, "loss": 1.933, "step": 2988 }, { "epoch": 0.87, "learning_rate": 1.2646284947169197e-05, "loss": 1.8859, "step": 2990 }, { "epoch": 0.87, "learning_rate": 1.2637237512923006e-05, "loss": 1.9369, "step": 2992 }, { "epoch": 0.87, "learning_rate": 1.2628187757978629e-05, "loss": 2.0031, "step": 2994 }, { "epoch": 0.87, "learning_rate": 1.2619135690299604e-05, "loss": 1.9075, "step": 2996 }, { "epoch": 0.87, "learning_rate": 1.2610081317851512e-05, "loss": 1.8908, "step": 2998 }, { "epoch": 0.87, "learning_rate": 1.260102464860195e-05, "loss": 1.941, "step": 3000 }, { "epoch": 0.87, "learning_rate": 1.259196569052055e-05, "loss": 1.9289, "step": 3002 }, { "epoch": 0.87, "learning_rate": 1.2582904451578952e-05, "loss": 1.8926, "step": 3004 }, { "epoch": 0.87, "learning_rate": 1.25738409397508e-05, "loss": 1.9705, "step": 3006 }, { "epoch": 0.87, "learning_rate": 1.2564775163011742e-05, "loss": 1.9143, "step": 3008 }, { "epoch": 0.87, "learning_rate": 1.2555707129339417e-05, "loss": 1.8967, "step": 3010 }, { "epoch": 0.87, "learning_rate": 1.2546636846713453e-05, "loss": 1.9478, "step": 3012 }, { "epoch": 0.87, "learning_rate": 1.2537564323115454e-05, "loss": 1.9806, "step": 3014 }, { "epoch": 0.87, "learning_rate": 1.2528489566528997e-05, "loss": 1.9715, "step": 3016 }, { "epoch": 0.87, "learning_rate": 1.2519412584939627e-05, "loss": 1.8564, "step": 3018 }, { "epoch": 0.87, "learning_rate": 1.251033338633484e-05, "loss": 1.9019, "step": 3020 }, { "epoch": 0.88, "learning_rate": 1.2501251978704088e-05, "loss": 1.9909, "step": 3022 }, { "epoch": 0.88, "learning_rate": 1.2492168370038767e-05, "loss": 1.9509, "step": 3024 }, { "epoch": 0.88, "learning_rate": 1.2483082568332207e-05, "loss": 1.8935, "step": 3026 }, { "epoch": 0.88, "learning_rate": 1.2473994581579674e-05, "loss": 1.8978, "step": 3028 }, { "epoch": 0.88, "learning_rate": 1.2464904417778345e-05, "loss": 1.9197, "step": 3030 }, { "epoch": 0.88, "learning_rate": 1.2455812084927326e-05, "loss": 2.0019, "step": 3032 }, { "epoch": 0.88, "learning_rate": 1.2446717591027624e-05, "loss": 1.9177, "step": 3034 }, { "epoch": 0.88, "learning_rate": 1.2437620944082144e-05, "loss": 1.98, "step": 3036 }, { "epoch": 0.88, "learning_rate": 1.2428522152095704e-05, "loss": 1.985, "step": 3038 }, { "epoch": 0.88, "learning_rate": 1.2419421223074984e-05, "loss": 1.8888, "step": 3040 }, { "epoch": 0.88, "learning_rate": 1.241031816502856e-05, "loss": 1.9659, "step": 3042 }, { "epoch": 0.88, "learning_rate": 1.2401212985966881e-05, "loss": 1.978, "step": 3044 }, { "epoch": 0.88, "learning_rate": 1.2392105693902263e-05, "loss": 1.9162, "step": 3046 }, { "epoch": 0.88, "learning_rate": 1.2382996296848875e-05, "loss": 1.9194, "step": 3048 }, { "epoch": 0.88, "learning_rate": 1.2373884802822738e-05, "loss": 1.9796, "step": 3050 }, { "epoch": 0.88, "learning_rate": 1.2364771219841728e-05, "loss": 1.9386, "step": 3052 }, { "epoch": 0.88, "learning_rate": 1.235565555592555e-05, "loss": 1.9979, "step": 3054 }, { "epoch": 0.89, "learning_rate": 1.2346537819095743e-05, "loss": 1.9313, "step": 3056 }, { "epoch": 0.89, "learning_rate": 1.2337418017375675e-05, "loss": 1.9945, "step": 3058 }, { "epoch": 0.89, "learning_rate": 1.232829615879052e-05, "loss": 1.8867, "step": 3060 }, { "epoch": 0.89, "learning_rate": 1.2319172251367277e-05, "loss": 1.916, "step": 3062 }, { "epoch": 0.89, "learning_rate": 1.2310046303134733e-05, "loss": 1.9233, "step": 3064 }, { "epoch": 0.89, "learning_rate": 1.2300918322123476e-05, "loss": 1.9622, "step": 3066 }, { "epoch": 0.89, "learning_rate": 1.229178831636589e-05, "loss": 1.952, "step": 3068 }, { "epoch": 0.89, "learning_rate": 1.228265629389613e-05, "loss": 1.9031, "step": 3070 }, { "epoch": 0.89, "learning_rate": 1.2273522262750132e-05, "loss": 1.9861, "step": 3072 }, { "epoch": 0.89, "learning_rate": 1.22643862309656e-05, "loss": 1.8848, "step": 3074 }, { "epoch": 0.89, "learning_rate": 1.2255248206581997e-05, "loss": 1.8875, "step": 3076 }, { "epoch": 0.89, "learning_rate": 1.2246108197640539e-05, "loss": 1.9593, "step": 3078 }, { "epoch": 0.89, "learning_rate": 1.2236966212184181e-05, "loss": 1.9627, "step": 3080 }, { "epoch": 0.89, "learning_rate": 1.2227822258257635e-05, "loss": 1.9554, "step": 3082 }, { "epoch": 0.89, "learning_rate": 1.221867634390733e-05, "loss": 1.8427, "step": 3084 }, { "epoch": 0.89, "learning_rate": 1.2209528477181422e-05, "loss": 1.9162, "step": 3086 }, { "epoch": 0.89, "learning_rate": 1.2200378666129792e-05, "loss": 2.0176, "step": 3088 }, { "epoch": 0.89, "learning_rate": 1.2191226918804031e-05, "loss": 1.8565, "step": 3090 }, { "epoch": 0.9, "learning_rate": 1.2182073243257426e-05, "loss": 1.8597, "step": 3092 }, { "epoch": 0.9, "learning_rate": 1.2172917647544963e-05, "loss": 1.9395, "step": 3094 }, { "epoch": 0.9, "learning_rate": 1.216376013972333e-05, "loss": 1.8917, "step": 3096 }, { "epoch": 0.9, "learning_rate": 1.215460072785088e-05, "loss": 1.915, "step": 3098 }, { "epoch": 0.9, "learning_rate": 1.2145439419987652e-05, "loss": 1.8844, "step": 3100 }, { "epoch": 0.9, "learning_rate": 1.2136276224195349e-05, "loss": 1.8892, "step": 3102 }, { "epoch": 0.9, "learning_rate": 1.212711114853734e-05, "loss": 1.8983, "step": 3104 }, { "epoch": 0.9, "learning_rate": 1.2117944201078646e-05, "loss": 1.8224, "step": 3106 }, { "epoch": 0.9, "learning_rate": 1.210877538988593e-05, "loss": 1.992, "step": 3108 }, { "epoch": 0.9, "learning_rate": 1.2099604723027503e-05, "loss": 1.8978, "step": 3110 }, { "epoch": 0.9, "learning_rate": 1.2090432208573306e-05, "loss": 1.9284, "step": 3112 }, { "epoch": 0.9, "learning_rate": 1.2081257854594905e-05, "loss": 1.8658, "step": 3114 }, { "epoch": 0.9, "learning_rate": 1.2072081669165483e-05, "loss": 1.9074, "step": 3116 }, { "epoch": 0.9, "learning_rate": 1.206290366035984e-05, "loss": 1.9541, "step": 3118 }, { "epoch": 0.9, "learning_rate": 1.2053723836254374e-05, "loss": 1.948, "step": 3120 }, { "epoch": 0.9, "learning_rate": 1.2044542204927088e-05, "loss": 1.9998, "step": 3122 }, { "epoch": 0.9, "learning_rate": 1.2035358774457564e-05, "loss": 1.9138, "step": 3124 }, { "epoch": 0.91, "learning_rate": 1.202617355292698e-05, "loss": 1.9523, "step": 3126 }, { "epoch": 0.91, "learning_rate": 1.2016986548418084e-05, "loss": 1.9099, "step": 3128 }, { "epoch": 0.91, "learning_rate": 1.2007797769015193e-05, "loss": 1.939, "step": 3130 }, { "epoch": 0.91, "learning_rate": 1.1998607222804181e-05, "loss": 2.0317, "step": 3132 }, { "epoch": 0.91, "learning_rate": 1.1989414917872489e-05, "loss": 1.905, "step": 3134 }, { "epoch": 0.91, "learning_rate": 1.1980220862309097e-05, "loss": 2.0023, "step": 3136 }, { "epoch": 0.91, "learning_rate": 1.1971025064204521e-05, "loss": 1.9156, "step": 3138 }, { "epoch": 0.91, "learning_rate": 1.1961827531650824e-05, "loss": 1.9333, "step": 3140 }, { "epoch": 0.91, "learning_rate": 1.1952628272741585e-05, "loss": 1.8992, "step": 3142 }, { "epoch": 0.91, "learning_rate": 1.1943427295571898e-05, "loss": 1.8561, "step": 3144 }, { "epoch": 0.91, "learning_rate": 1.193422460823839e-05, "loss": 1.9071, "step": 3146 }, { "epoch": 0.91, "learning_rate": 1.1925020218839168e-05, "loss": 1.9316, "step": 3148 }, { "epoch": 0.91, "learning_rate": 1.191581413547385e-05, "loss": 1.9141, "step": 3150 }, { "epoch": 0.91, "learning_rate": 1.190660636624354e-05, "loss": 1.9172, "step": 3152 }, { "epoch": 0.91, "learning_rate": 1.1897396919250832e-05, "loss": 1.8921, "step": 3154 }, { "epoch": 0.91, "learning_rate": 1.1888185802599792e-05, "loss": 1.9056, "step": 3156 }, { "epoch": 0.91, "learning_rate": 1.1878973024395952e-05, "loss": 1.8935, "step": 3158 }, { "epoch": 0.92, "learning_rate": 1.186975859274631e-05, "loss": 1.9437, "step": 3160 }, { "epoch": 0.92, "learning_rate": 1.186054251575932e-05, "loss": 1.8435, "step": 3162 }, { "epoch": 0.92, "learning_rate": 1.185132480154488e-05, "loss": 1.8581, "step": 3164 }, { "epoch": 0.92, "learning_rate": 1.1842105458214333e-05, "loss": 1.9032, "step": 3166 }, { "epoch": 0.92, "learning_rate": 1.1832884493880452e-05, "loss": 1.9976, "step": 3168 }, { "epoch": 0.92, "learning_rate": 1.1823661916657441e-05, "loss": 1.9257, "step": 3170 }, { "epoch": 0.92, "learning_rate": 1.1814437734660918e-05, "loss": 1.985, "step": 3172 }, { "epoch": 0.92, "learning_rate": 1.1805211956007914e-05, "loss": 1.9244, "step": 3174 }, { "epoch": 0.92, "learning_rate": 1.1795984588816869e-05, "loss": 1.9198, "step": 3176 }, { "epoch": 0.92, "learning_rate": 1.1786755641207614e-05, "loss": 1.9748, "step": 3178 }, { "epoch": 0.92, "learning_rate": 1.1777525121301383e-05, "loss": 1.8587, "step": 3180 }, { "epoch": 0.92, "learning_rate": 1.1768293037220779e-05, "loss": 2.0632, "step": 3182 }, { "epoch": 0.92, "learning_rate": 1.1759059397089793e-05, "loss": 1.9882, "step": 3184 }, { "epoch": 0.92, "learning_rate": 1.1749824209033775e-05, "loss": 1.8632, "step": 3186 }, { "epoch": 0.92, "learning_rate": 1.1740587481179441e-05, "loss": 1.9311, "step": 3188 }, { "epoch": 0.92, "learning_rate": 1.1731349221654875e-05, "loss": 1.9695, "step": 3190 }, { "epoch": 0.92, "learning_rate": 1.1722109438589486e-05, "loss": 1.8336, "step": 3192 }, { "epoch": 0.92, "learning_rate": 1.1712868140114037e-05, "loss": 1.9044, "step": 3194 }, { "epoch": 0.93, "learning_rate": 1.1703625334360626e-05, "loss": 1.8727, "step": 3196 }, { "epoch": 0.93, "learning_rate": 1.1694381029462671e-05, "loss": 1.8876, "step": 3198 }, { "epoch": 0.93, "learning_rate": 1.1685135233554919e-05, "loss": 1.8582, "step": 3200 }, { "epoch": 0.93, "learning_rate": 1.1675887954773406e-05, "loss": 1.9086, "step": 3202 }, { "epoch": 0.93, "learning_rate": 1.1666639201255507e-05, "loss": 1.8991, "step": 3204 }, { "epoch": 0.93, "learning_rate": 1.1657388981139866e-05, "loss": 1.9048, "step": 3206 }, { "epoch": 0.93, "learning_rate": 1.164813730256643e-05, "loss": 1.9374, "step": 3208 }, { "epoch": 0.93, "learning_rate": 1.1638884173676431e-05, "loss": 1.9176, "step": 3210 }, { "epoch": 0.93, "learning_rate": 1.1629629602612368e-05, "loss": 1.9182, "step": 3212 }, { "epoch": 0.93, "learning_rate": 1.1620373597518025e-05, "loss": 1.8618, "step": 3214 }, { "epoch": 0.93, "learning_rate": 1.1611116166538426e-05, "loss": 1.8918, "step": 3216 }, { "epoch": 0.93, "learning_rate": 1.1601857317819871e-05, "loss": 1.9447, "step": 3218 }, { "epoch": 0.93, "learning_rate": 1.1592597059509894e-05, "loss": 1.8446, "step": 3220 }, { "epoch": 0.93, "learning_rate": 1.1583335399757271e-05, "loss": 1.9189, "step": 3222 }, { "epoch": 0.93, "learning_rate": 1.157407234671202e-05, "loss": 1.9267, "step": 3224 }, { "epoch": 0.93, "learning_rate": 1.1564807908525375e-05, "loss": 1.8746, "step": 3226 }, { "epoch": 0.93, "learning_rate": 1.1555542093349791e-05, "loss": 1.9279, "step": 3228 }, { "epoch": 0.94, "learning_rate": 1.1546274909338946e-05, "loss": 1.893, "step": 3230 }, { "epoch": 0.94, "learning_rate": 1.15370063646477e-05, "loss": 1.9026, "step": 3232 }, { "epoch": 0.94, "learning_rate": 1.1527736467432133e-05, "loss": 1.8533, "step": 3234 }, { "epoch": 0.94, "learning_rate": 1.1518465225849501e-05, "loss": 1.9139, "step": 3236 }, { "epoch": 0.94, "learning_rate": 1.150919264805825e-05, "loss": 1.9716, "step": 3238 }, { "epoch": 0.94, "learning_rate": 1.1499918742217998e-05, "loss": 1.9075, "step": 3240 }, { "epoch": 0.94, "learning_rate": 1.1490643516489533e-05, "loss": 1.887, "step": 3242 }, { "epoch": 0.94, "learning_rate": 1.1481366979034808e-05, "loss": 1.9076, "step": 3244 }, { "epoch": 0.94, "learning_rate": 1.1472089138016917e-05, "loss": 1.9684, "step": 3246 }, { "epoch": 0.94, "learning_rate": 1.1462810001600124e-05, "loss": 1.8651, "step": 3248 }, { "epoch": 0.94, "learning_rate": 1.1453529577949812e-05, "loss": 1.9507, "step": 3250 }, { "epoch": 0.94, "learning_rate": 1.1444247875232504e-05, "loss": 1.8451, "step": 3252 }, { "epoch": 0.94, "learning_rate": 1.1434964901615853e-05, "loss": 1.9231, "step": 3254 }, { "epoch": 0.94, "learning_rate": 1.1425680665268627e-05, "loss": 1.9228, "step": 3256 }, { "epoch": 0.94, "learning_rate": 1.1416395174360702e-05, "loss": 1.9154, "step": 3258 }, { "epoch": 0.94, "learning_rate": 1.1407108437063056e-05, "loss": 1.8836, "step": 3260 }, { "epoch": 0.94, "learning_rate": 1.1397820461547778e-05, "loss": 1.9147, "step": 3262 }, { "epoch": 0.95, "learning_rate": 1.1388531255988033e-05, "loss": 1.9416, "step": 3264 }, { "epoch": 0.95, "learning_rate": 1.1379240828558072e-05, "loss": 1.8345, "step": 3266 }, { "epoch": 0.95, "learning_rate": 1.1369949187433218e-05, "loss": 1.9673, "step": 3268 }, { "epoch": 0.95, "learning_rate": 1.1360656340789873e-05, "loss": 1.9845, "step": 3270 }, { "epoch": 0.95, "learning_rate": 1.1351362296805487e-05, "loss": 1.8903, "step": 3272 }, { "epoch": 0.95, "learning_rate": 1.134206706365857e-05, "loss": 1.9277, "step": 3274 }, { "epoch": 0.95, "learning_rate": 1.1332770649528681e-05, "loss": 1.9942, "step": 3276 }, { "epoch": 0.95, "learning_rate": 1.1323473062596413e-05, "loss": 1.8788, "step": 3278 }, { "epoch": 0.95, "learning_rate": 1.1314174311043391e-05, "loss": 1.9424, "step": 3280 }, { "epoch": 0.95, "learning_rate": 1.1304874403052266e-05, "loss": 1.8541, "step": 3282 }, { "epoch": 0.95, "learning_rate": 1.129557334680671e-05, "loss": 1.9275, "step": 3284 }, { "epoch": 0.95, "learning_rate": 1.1286271150491402e-05, "loss": 1.9019, "step": 3286 }, { "epoch": 0.95, "learning_rate": 1.1276967822292025e-05, "loss": 1.9654, "step": 3288 }, { "epoch": 0.95, "learning_rate": 1.1267663370395256e-05, "loss": 1.8772, "step": 3290 }, { "epoch": 0.95, "learning_rate": 1.1258357802988767e-05, "loss": 1.8976, "step": 3292 }, { "epoch": 0.95, "learning_rate": 1.1249051128261203e-05, "loss": 1.8976, "step": 3294 }, { "epoch": 0.95, "learning_rate": 1.1239743354402188e-05, "loss": 1.943, "step": 3296 }, { "epoch": 0.96, "learning_rate": 1.1230434489602318e-05, "loss": 1.856, "step": 3298 }, { "epoch": 0.96, "learning_rate": 1.122112454205314e-05, "loss": 1.9476, "step": 3300 }, { "epoch": 0.96, "learning_rate": 1.1211813519947159e-05, "loss": 1.8787, "step": 3302 }, { "epoch": 0.96, "learning_rate": 1.1202501431477822e-05, "loss": 1.9352, "step": 3304 }, { "epoch": 0.96, "learning_rate": 1.1193188284839518e-05, "loss": 1.8744, "step": 3306 }, { "epoch": 0.96, "learning_rate": 1.118387408822757e-05, "loss": 1.9171, "step": 3308 }, { "epoch": 0.96, "learning_rate": 1.1174558849838213e-05, "loss": 1.9127, "step": 3310 }, { "epoch": 0.96, "learning_rate": 1.1165242577868615e-05, "loss": 1.8441, "step": 3312 }, { "epoch": 0.96, "learning_rate": 1.1155925280516842e-05, "loss": 1.9041, "step": 3314 }, { "epoch": 0.96, "learning_rate": 1.1146606965981863e-05, "loss": 1.9095, "step": 3316 }, { "epoch": 0.96, "learning_rate": 1.1137287642463545e-05, "loss": 1.9346, "step": 3318 }, { "epoch": 0.96, "learning_rate": 1.1127967318162645e-05, "loss": 1.9397, "step": 3320 }, { "epoch": 0.96, "learning_rate": 1.11186460012808e-05, "loss": 1.9011, "step": 3322 }, { "epoch": 0.96, "learning_rate": 1.110932370002051e-05, "loss": 1.9664, "step": 3324 }, { "epoch": 0.96, "learning_rate": 1.1100000422585158e-05, "loss": 1.9451, "step": 3326 }, { "epoch": 0.96, "learning_rate": 1.1090676177178974e-05, "loss": 1.9446, "step": 3328 }, { "epoch": 0.96, "learning_rate": 1.1081350972007043e-05, "loss": 1.8684, "step": 3330 }, { "epoch": 0.96, "learning_rate": 1.10720248152753e-05, "loss": 1.8903, "step": 3332 }, { "epoch": 0.97, "learning_rate": 1.1062697715190507e-05, "loss": 1.9291, "step": 3334 }, { "epoch": 0.97, "learning_rate": 1.1053369679960264e-05, "loss": 1.9269, "step": 3336 }, { "epoch": 0.97, "learning_rate": 1.104404071779299e-05, "loss": 1.8306, "step": 3338 }, { "epoch": 0.97, "learning_rate": 1.1034710836897922e-05, "loss": 1.9942, "step": 3340 }, { "epoch": 0.97, "learning_rate": 1.1025380045485108e-05, "loss": 1.8568, "step": 3342 }, { "epoch": 0.97, "learning_rate": 1.1016048351765389e-05, "loss": 1.8373, "step": 3344 }, { "epoch": 0.97, "learning_rate": 1.1006715763950406e-05, "loss": 1.9411, "step": 3346 }, { "epoch": 0.97, "learning_rate": 1.0997382290252585e-05, "loss": 1.9802, "step": 3348 }, { "epoch": 0.97, "learning_rate": 1.0988047938885136e-05, "loss": 1.9519, "step": 3350 }, { "epoch": 0.97, "learning_rate": 1.0978712718062037e-05, "loss": 1.8652, "step": 3352 }, { "epoch": 0.97, "learning_rate": 1.0969376635998024e-05, "loss": 1.9404, "step": 3354 }, { "epoch": 0.97, "learning_rate": 1.0960039700908609e-05, "loss": 1.8751, "step": 3356 }, { "epoch": 0.97, "learning_rate": 1.0950701921010036e-05, "loss": 1.9, "step": 3358 }, { "epoch": 0.97, "learning_rate": 1.0941363304519306e-05, "loss": 2.0116, "step": 3360 }, { "epoch": 0.97, "learning_rate": 1.0932023859654146e-05, "loss": 1.9538, "step": 3362 }, { "epoch": 0.97, "learning_rate": 1.092268359463302e-05, "loss": 1.8656, "step": 3364 }, { "epoch": 0.97, "learning_rate": 1.0913342517675113e-05, "loss": 1.9182, "step": 3366 }, { "epoch": 0.98, "learning_rate": 1.0904000637000314e-05, "loss": 1.9302, "step": 3368 }, { "epoch": 0.98, "learning_rate": 1.0894657960829235e-05, "loss": 1.9833, "step": 3370 }, { "epoch": 0.98, "learning_rate": 1.0885314497383175e-05, "loss": 1.8833, "step": 3372 }, { "epoch": 0.98, "learning_rate": 1.087597025488413e-05, "loss": 1.9136, "step": 3374 }, { "epoch": 0.98, "learning_rate": 1.086662524155479e-05, "loss": 1.9007, "step": 3376 }, { "epoch": 0.98, "learning_rate": 1.0857279465618508e-05, "loss": 1.9113, "step": 3378 }, { "epoch": 0.98, "learning_rate": 1.0847932935299319e-05, "loss": 1.8357, "step": 3380 }, { "epoch": 0.98, "learning_rate": 1.0838585658821919e-05, "loss": 1.9059, "step": 3382 }, { "epoch": 0.98, "learning_rate": 1.0829237644411659e-05, "loss": 1.9287, "step": 3384 }, { "epoch": 0.98, "learning_rate": 1.0819888900294542e-05, "loss": 1.9187, "step": 3386 }, { "epoch": 0.98, "learning_rate": 1.0810539434697211e-05, "loss": 1.9122, "step": 3388 }, { "epoch": 0.98, "learning_rate": 1.0801189255846945e-05, "loss": 1.8249, "step": 3390 }, { "epoch": 0.98, "learning_rate": 1.0791838371971649e-05, "loss": 1.9303, "step": 3392 }, { "epoch": 0.98, "learning_rate": 1.0782486791299849e-05, "loss": 1.8269, "step": 3394 }, { "epoch": 0.98, "learning_rate": 1.0773134522060688e-05, "loss": 1.8758, "step": 3396 }, { "epoch": 0.98, "learning_rate": 1.076378157248391e-05, "loss": 1.9062, "step": 3398 }, { "epoch": 0.98, "learning_rate": 1.0754427950799862e-05, "loss": 1.8943, "step": 3400 }, { "epoch": 0.99, "learning_rate": 1.0745073665239476e-05, "loss": 1.848, "step": 3402 }, { "epoch": 0.99, "learning_rate": 1.0735718724034274e-05, "loss": 1.8915, "step": 3404 }, { "epoch": 0.99, "learning_rate": 1.0726363135416352e-05, "loss": 1.9751, "step": 3406 }, { "epoch": 0.99, "learning_rate": 1.0717006907618377e-05, "loss": 1.836, "step": 3408 }, { "epoch": 0.99, "learning_rate": 1.070765004887358e-05, "loss": 1.9257, "step": 3410 }, { "epoch": 0.99, "learning_rate": 1.0698292567415742e-05, "loss": 1.8981, "step": 3412 }, { "epoch": 0.99, "learning_rate": 1.06889344714792e-05, "loss": 1.9421, "step": 3414 }, { "epoch": 0.99, "learning_rate": 1.0679575769298825e-05, "loss": 1.8869, "step": 3416 }, { "epoch": 0.99, "learning_rate": 1.067021646911002e-05, "loss": 1.9248, "step": 3418 }, { "epoch": 0.99, "learning_rate": 1.0660856579148725e-05, "loss": 1.9205, "step": 3420 }, { "epoch": 0.99, "learning_rate": 1.0651496107651384e-05, "loss": 1.9331, "step": 3422 }, { "epoch": 0.99, "learning_rate": 1.0642135062854966e-05, "loss": 1.9035, "step": 3424 }, { "epoch": 0.99, "learning_rate": 1.0632773452996936e-05, "loss": 1.9482, "step": 3426 }, { "epoch": 0.99, "learning_rate": 1.0623411286315262e-05, "loss": 1.9078, "step": 3428 }, { "epoch": 0.99, "learning_rate": 1.0614048571048402e-05, "loss": 1.8669, "step": 3430 }, { "epoch": 0.99, "learning_rate": 1.060468531543528e-05, "loss": 1.8719, "step": 3432 }, { "epoch": 0.99, "learning_rate": 1.0595321527715327e-05, "loss": 1.8837, "step": 3434 }, { "epoch": 1.0, "learning_rate": 1.0585957216128416e-05, "loss": 1.8841, "step": 3436 }, { "epoch": 1.0, "learning_rate": 1.0576592388914891e-05, "loss": 1.877, "step": 3438 }, { "epoch": 1.0, "learning_rate": 1.0567227054315546e-05, "loss": 1.9342, "step": 3440 }, { "epoch": 1.0, "learning_rate": 1.0557861220571626e-05, "loss": 1.8984, "step": 3442 }, { "epoch": 1.0, "learning_rate": 1.0548494895924817e-05, "loss": 1.9915, "step": 3444 }, { "epoch": 1.0, "learning_rate": 1.0539128088617226e-05, "loss": 1.8994, "step": 3446 }, { "epoch": 1.0, "learning_rate": 1.0529760806891396e-05, "loss": 1.9034, "step": 3448 }, { "epoch": 1.0, "learning_rate": 1.0520393058990286e-05, "loss": 1.8539, "step": 3450 }, { "epoch": 1.0, "learning_rate": 1.0511024853157255e-05, "loss": 1.8294, "step": 3452 }, { "epoch": 1.0, "learning_rate": 1.050165619763608e-05, "loss": 1.8923, "step": 3454 }, { "epoch": 1.0, "learning_rate": 1.049228710067092e-05, "loss": 1.8856, "step": 3456 }, { "epoch": 1.0, "learning_rate": 1.0482917570506335e-05, "loss": 1.8989, "step": 3458 }, { "epoch": 1.0, "learning_rate": 1.0473547615387257e-05, "loss": 1.8003, "step": 3460 }, { "epoch": 1.0, "learning_rate": 1.046417724355899e-05, "loss": 1.8187, "step": 3462 }, { "epoch": 1.0, "learning_rate": 1.045480646326722e-05, "loss": 1.8065, "step": 3464 }, { "epoch": 1.0, "learning_rate": 1.0445435282757972e-05, "loss": 1.8697, "step": 3466 }, { "epoch": 1.0, "learning_rate": 1.043606371027764e-05, "loss": 1.8681, "step": 3468 }, { "epoch": 1.0, "learning_rate": 1.0426691754072953e-05, "loss": 1.8208, "step": 3470 }, { "epoch": 1.01, "learning_rate": 1.0417319422390978e-05, "loss": 1.8609, "step": 3472 }, { "epoch": 1.01, "learning_rate": 1.0407946723479126e-05, "loss": 1.8623, "step": 3474 }, { "epoch": 1.01, "learning_rate": 1.0398573665585105e-05, "loss": 2.0423, "step": 3476 }, { "epoch": 1.01, "learning_rate": 1.0389200256956969e-05, "loss": 1.8622, "step": 3478 }, { "epoch": 1.01, "learning_rate": 1.0379826505843054e-05, "loss": 1.9067, "step": 3480 }, { "epoch": 1.01, "learning_rate": 1.0370452420492012e-05, "loss": 1.8273, "step": 3482 }, { "epoch": 1.01, "learning_rate": 1.0361078009152794e-05, "loss": 1.8825, "step": 3484 }, { "epoch": 1.01, "learning_rate": 1.0351703280074623e-05, "loss": 1.8579, "step": 3486 }, { "epoch": 1.01, "learning_rate": 1.0342328241507012e-05, "loss": 1.861, "step": 3488 }, { "epoch": 1.01, "learning_rate": 1.0332952901699738e-05, "loss": 1.9544, "step": 3490 }, { "epoch": 1.01, "learning_rate": 1.0323577268902853e-05, "loss": 1.8467, "step": 3492 }, { "epoch": 1.01, "learning_rate": 1.0314201351366666e-05, "loss": 1.7477, "step": 3494 }, { "epoch": 1.01, "learning_rate": 1.0304825157341722e-05, "loss": 1.9904, "step": 3496 }, { "epoch": 1.01, "learning_rate": 1.0295448695078829e-05, "loss": 1.8608, "step": 3498 }, { "epoch": 1.01, "learning_rate": 1.0286071972829017e-05, "loss": 1.9715, "step": 3500 }, { "epoch": 1.01, "learning_rate": 1.027669499884355e-05, "loss": 1.8495, "step": 3502 }, { "epoch": 1.01, "learning_rate": 1.0267317781373918e-05, "loss": 1.8744, "step": 3504 }, { "epoch": 1.02, "learning_rate": 1.0257940328671814e-05, "loss": 1.9518, "step": 3506 }, { "epoch": 1.02, "learning_rate": 1.0248562648989153e-05, "loss": 1.8339, "step": 3508 }, { "epoch": 1.02, "learning_rate": 1.023918475057803e-05, "loss": 1.9109, "step": 3510 }, { "epoch": 1.02, "learning_rate": 1.0229806641690754e-05, "loss": 1.8809, "step": 3512 }, { "epoch": 1.02, "learning_rate": 1.0220428330579803e-05, "loss": 1.8781, "step": 3514 }, { "epoch": 1.02, "learning_rate": 1.0211049825497841e-05, "loss": 1.7796, "step": 3516 }, { "epoch": 1.02, "learning_rate": 1.02016711346977e-05, "loss": 1.8795, "step": 3518 }, { "epoch": 1.02, "learning_rate": 1.0192292266432378e-05, "loss": 1.8068, "step": 3520 }, { "epoch": 1.02, "learning_rate": 1.0182913228955024e-05, "loss": 1.8162, "step": 3522 }, { "epoch": 1.02, "learning_rate": 1.017353403051894e-05, "loss": 1.9043, "step": 3524 }, { "epoch": 1.02, "learning_rate": 1.0164154679377569e-05, "loss": 1.8815, "step": 3526 }, { "epoch": 1.02, "learning_rate": 1.0154775183784494e-05, "loss": 1.8816, "step": 3528 }, { "epoch": 1.02, "learning_rate": 1.0145395551993409e-05, "loss": 1.8947, "step": 3530 }, { "epoch": 1.02, "learning_rate": 1.0136015792258142e-05, "loss": 1.8829, "step": 3532 }, { "epoch": 1.02, "learning_rate": 1.0126635912832631e-05, "loss": 1.8559, "step": 3534 }, { "epoch": 1.02, "learning_rate": 1.011725592197092e-05, "loss": 1.8835, "step": 3536 }, { "epoch": 1.02, "learning_rate": 1.0107875827927147e-05, "loss": 1.8523, "step": 3538 }, { "epoch": 1.03, "learning_rate": 1.009849563895554e-05, "loss": 1.9074, "step": 3540 }, { "epoch": 1.03, "learning_rate": 1.0089115363310418e-05, "loss": 1.9133, "step": 3542 }, { "epoch": 1.03, "learning_rate": 1.0079735009246168e-05, "loss": 1.8633, "step": 3544 }, { "epoch": 1.03, "learning_rate": 1.007035458501725e-05, "loss": 1.8552, "step": 3546 }, { "epoch": 1.03, "learning_rate": 1.0060974098878189e-05, "loss": 1.8492, "step": 3548 }, { "epoch": 1.03, "learning_rate": 1.0051593559083556e-05, "loss": 1.8911, "step": 3550 }, { "epoch": 1.03, "learning_rate": 1.004221297388798e-05, "loss": 1.9004, "step": 3552 }, { "epoch": 1.03, "learning_rate": 1.0032832351546118e-05, "loss": 1.8149, "step": 3554 }, { "epoch": 1.03, "learning_rate": 1.0023451700312669e-05, "loss": 1.878, "step": 3556 }, { "epoch": 1.03, "learning_rate": 1.0014071028442354e-05, "loss": 1.8721, "step": 3558 }, { "epoch": 1.03, "learning_rate": 1.000469034418991e-05, "loss": 1.7944, "step": 3560 }, { "epoch": 1.03, "learning_rate": 9.995309655810094e-06, "loss": 1.8522, "step": 3562 }, { "epoch": 1.03, "learning_rate": 9.98592897155765e-06, "loss": 1.9749, "step": 3564 }, { "epoch": 1.03, "learning_rate": 9.976548299687333e-06, "loss": 1.9072, "step": 3566 }, { "epoch": 1.03, "learning_rate": 9.967167648453886e-06, "loss": 1.8782, "step": 3568 }, { "epoch": 1.03, "learning_rate": 9.957787026112021e-06, "loss": 1.8707, "step": 3570 }, { "epoch": 1.03, "learning_rate": 9.948406440916444e-06, "loss": 1.9308, "step": 3572 }, { "epoch": 1.04, "learning_rate": 9.939025901121814e-06, "loss": 1.9125, "step": 3574 }, { "epoch": 1.04, "learning_rate": 9.929645414982751e-06, "loss": 1.9055, "step": 3576 }, { "epoch": 1.04, "learning_rate": 9.920264990753837e-06, "loss": 1.7801, "step": 3578 }, { "epoch": 1.04, "learning_rate": 9.910884636689586e-06, "loss": 1.9178, "step": 3580 }, { "epoch": 1.04, "learning_rate": 9.901504361044462e-06, "loss": 1.8503, "step": 3582 }, { "epoch": 1.04, "learning_rate": 9.892124172072856e-06, "loss": 1.8698, "step": 3584 }, { "epoch": 1.04, "learning_rate": 9.882744078029081e-06, "loss": 1.8752, "step": 3586 }, { "epoch": 1.04, "learning_rate": 9.873364087167367e-06, "loss": 1.7909, "step": 3588 }, { "epoch": 1.04, "learning_rate": 9.86398420774186e-06, "loss": 1.861, "step": 3590 }, { "epoch": 1.04, "learning_rate": 9.854604448006595e-06, "loss": 1.9267, "step": 3592 }, { "epoch": 1.04, "learning_rate": 9.845224816215508e-06, "loss": 1.8784, "step": 3594 }, { "epoch": 1.04, "learning_rate": 9.835845320622433e-06, "loss": 1.8094, "step": 3596 }, { "epoch": 1.04, "learning_rate": 9.826465969481061e-06, "loss": 1.8903, "step": 3598 }, { "epoch": 1.04, "learning_rate": 9.81708677104498e-06, "loss": 1.8822, "step": 3600 }, { "epoch": 1.04, "learning_rate": 9.807707733567626e-06, "loss": 1.844, "step": 3602 }, { "epoch": 1.04, "learning_rate": 9.7983288653023e-06, "loss": 1.8766, "step": 3604 }, { "epoch": 1.04, "learning_rate": 9.788950174502164e-06, "loss": 1.9375, "step": 3606 }, { "epoch": 1.04, "learning_rate": 9.7795716694202e-06, "loss": 1.8979, "step": 3608 }, { "epoch": 1.05, "learning_rate": 9.770193358309249e-06, "loss": 1.959, "step": 3610 }, { "epoch": 1.05, "learning_rate": 9.760815249421973e-06, "loss": 1.8954, "step": 3612 }, { "epoch": 1.05, "learning_rate": 9.75143735101085e-06, "loss": 1.8315, "step": 3614 }, { "epoch": 1.05, "learning_rate": 9.742059671328186e-06, "loss": 1.8652, "step": 3616 }, { "epoch": 1.05, "learning_rate": 9.732682218626087e-06, "loss": 1.8517, "step": 3618 }, { "epoch": 1.05, "learning_rate": 9.723305001156452e-06, "loss": 1.7951, "step": 3620 }, { "epoch": 1.05, "learning_rate": 9.71392802717099e-06, "loss": 1.8744, "step": 3622 }, { "epoch": 1.05, "learning_rate": 9.704551304921174e-06, "loss": 1.8848, "step": 3624 }, { "epoch": 1.05, "learning_rate": 9.69517484265828e-06, "loss": 1.8857, "step": 3626 }, { "epoch": 1.05, "learning_rate": 9.68579864863334e-06, "loss": 1.8918, "step": 3628 }, { "epoch": 1.05, "learning_rate": 9.676422731097149e-06, "loss": 1.8541, "step": 3630 }, { "epoch": 1.05, "learning_rate": 9.667047098300263e-06, "loss": 1.9065, "step": 3632 }, { "epoch": 1.05, "learning_rate": 9.657671758492993e-06, "loss": 1.9483, "step": 3634 }, { "epoch": 1.05, "learning_rate": 9.64829671992538e-06, "loss": 1.8835, "step": 3636 }, { "epoch": 1.05, "learning_rate": 9.638921990847206e-06, "loss": 1.8462, "step": 3638 }, { "epoch": 1.05, "learning_rate": 9.629547579507991e-06, "loss": 1.8665, "step": 3640 }, { "epoch": 1.05, "learning_rate": 9.620173494156949e-06, "loss": 1.7901, "step": 3642 }, { "epoch": 1.06, "learning_rate": 9.610799743043036e-06, "loss": 1.8213, "step": 3644 }, { "epoch": 1.06, "learning_rate": 9.601426334414898e-06, "loss": 1.8831, "step": 3646 }, { "epoch": 1.06, "learning_rate": 9.592053276520877e-06, "loss": 1.8917, "step": 3648 }, { "epoch": 1.06, "learning_rate": 9.582680577609023e-06, "loss": 1.896, "step": 3650 }, { "epoch": 1.06, "learning_rate": 9.573308245927052e-06, "loss": 1.8749, "step": 3652 }, { "epoch": 1.06, "learning_rate": 9.563936289722363e-06, "loss": 1.8452, "step": 3654 }, { "epoch": 1.06, "learning_rate": 9.554564717242033e-06, "loss": 1.8268, "step": 3656 }, { "epoch": 1.06, "learning_rate": 9.545193536732783e-06, "loss": 1.8622, "step": 3658 }, { "epoch": 1.06, "learning_rate": 9.535822756441011e-06, "loss": 1.8376, "step": 3660 }, { "epoch": 1.06, "learning_rate": 9.526452384612748e-06, "loss": 1.8858, "step": 3662 }, { "epoch": 1.06, "learning_rate": 9.51708242949367e-06, "loss": 1.8157, "step": 3664 }, { "epoch": 1.06, "learning_rate": 9.50771289932908e-06, "loss": 1.8605, "step": 3666 }, { "epoch": 1.06, "learning_rate": 9.498343802363924e-06, "loss": 1.9124, "step": 3668 }, { "epoch": 1.06, "learning_rate": 9.488975146842746e-06, "loss": 1.8245, "step": 3670 }, { "epoch": 1.06, "learning_rate": 9.479606941009721e-06, "loss": 1.8854, "step": 3672 }, { "epoch": 1.06, "learning_rate": 9.470239193108607e-06, "loss": 1.8287, "step": 3674 }, { "epoch": 1.06, "learning_rate": 9.460871911382776e-06, "loss": 1.9268, "step": 3676 }, { "epoch": 1.07, "learning_rate": 9.451505104075189e-06, "loss": 1.8609, "step": 3678 }, { "epoch": 1.07, "learning_rate": 9.442138779428376e-06, "loss": 1.8599, "step": 3680 }, { "epoch": 1.07, "learning_rate": 9.432772945684454e-06, "loss": 1.922, "step": 3682 }, { "epoch": 1.07, "learning_rate": 9.423407611085114e-06, "loss": 1.841, "step": 3684 }, { "epoch": 1.07, "learning_rate": 9.414042783871586e-06, "loss": 1.9104, "step": 3686 }, { "epoch": 1.07, "learning_rate": 9.404678472284675e-06, "loss": 1.7912, "step": 3688 }, { "epoch": 1.07, "learning_rate": 9.395314684564721e-06, "loss": 1.9258, "step": 3690 }, { "epoch": 1.07, "learning_rate": 9.385951428951603e-06, "loss": 1.8405, "step": 3692 }, { "epoch": 1.07, "learning_rate": 9.37658871368474e-06, "loss": 1.8844, "step": 3694 }, { "epoch": 1.07, "learning_rate": 9.367226547003066e-06, "loss": 1.8473, "step": 3696 }, { "epoch": 1.07, "learning_rate": 9.357864937145037e-06, "loss": 1.8769, "step": 3698 }, { "epoch": 1.07, "learning_rate": 9.34850389234862e-06, "loss": 1.8635, "step": 3700 }, { "epoch": 1.07, "learning_rate": 9.339143420851279e-06, "loss": 1.9234, "step": 3702 }, { "epoch": 1.07, "learning_rate": 9.32978353088998e-06, "loss": 1.833, "step": 3704 }, { "epoch": 1.07, "learning_rate": 9.320424230701179e-06, "loss": 1.8364, "step": 3706 }, { "epoch": 1.07, "learning_rate": 9.311065528520802e-06, "loss": 1.8395, "step": 3708 }, { "epoch": 1.07, "learning_rate": 9.301707432584256e-06, "loss": 1.8766, "step": 3710 }, { "epoch": 1.08, "learning_rate": 9.292349951126423e-06, "loss": 1.8645, "step": 3712 }, { "epoch": 1.08, "learning_rate": 9.282993092381626e-06, "loss": 1.9087, "step": 3714 }, { "epoch": 1.08, "learning_rate": 9.273636864583648e-06, "loss": 1.8182, "step": 3716 }, { "epoch": 1.08, "learning_rate": 9.264281275965728e-06, "loss": 1.98, "step": 3718 }, { "epoch": 1.08, "learning_rate": 9.254926334760527e-06, "loss": 1.8414, "step": 3720 }, { "epoch": 1.08, "learning_rate": 9.245572049200143e-06, "loss": 1.9114, "step": 3722 }, { "epoch": 1.08, "learning_rate": 9.236218427516093e-06, "loss": 1.8829, "step": 3724 }, { "epoch": 1.08, "learning_rate": 9.226865477939314e-06, "loss": 1.8888, "step": 3726 }, { "epoch": 1.08, "learning_rate": 9.217513208700155e-06, "loss": 1.9061, "step": 3728 }, { "epoch": 1.08, "learning_rate": 9.208161628028355e-06, "loss": 1.8963, "step": 3730 }, { "epoch": 1.08, "learning_rate": 9.198810744153059e-06, "loss": 1.8713, "step": 3732 }, { "epoch": 1.08, "learning_rate": 9.189460565302794e-06, "loss": 1.8956, "step": 3734 }, { "epoch": 1.08, "learning_rate": 9.18011109970546e-06, "loss": 1.8209, "step": 3736 }, { "epoch": 1.08, "learning_rate": 9.170762355588343e-06, "loss": 1.9485, "step": 3738 }, { "epoch": 1.08, "learning_rate": 9.161414341178085e-06, "loss": 1.8799, "step": 3740 }, { "epoch": 1.08, "learning_rate": 9.152067064700684e-06, "loss": 1.7764, "step": 3742 }, { "epoch": 1.08, "learning_rate": 9.142720534381497e-06, "loss": 1.8038, "step": 3744 }, { "epoch": 1.08, "learning_rate": 9.133374758445212e-06, "loss": 1.9059, "step": 3746 }, { "epoch": 1.09, "learning_rate": 9.124029745115872e-06, "loss": 1.8583, "step": 3748 }, { "epoch": 1.09, "learning_rate": 9.11468550261683e-06, "loss": 1.9205, "step": 3750 }, { "epoch": 1.09, "learning_rate": 9.105342039170768e-06, "loss": 1.7956, "step": 3752 }, { "epoch": 1.09, "learning_rate": 9.095999362999688e-06, "loss": 1.8781, "step": 3754 }, { "epoch": 1.09, "learning_rate": 9.086657482324892e-06, "loss": 1.8449, "step": 3756 }, { "epoch": 1.09, "learning_rate": 9.07731640536698e-06, "loss": 1.9595, "step": 3758 }, { "epoch": 1.09, "learning_rate": 9.067976140345854e-06, "loss": 1.8879, "step": 3760 }, { "epoch": 1.09, "learning_rate": 9.058636695480698e-06, "loss": 1.8214, "step": 3762 }, { "epoch": 1.09, "learning_rate": 9.049298078989967e-06, "loss": 1.8042, "step": 3764 }, { "epoch": 1.09, "learning_rate": 9.039960299091396e-06, "loss": 1.8872, "step": 3766 }, { "epoch": 1.09, "learning_rate": 9.03062336400198e-06, "loss": 1.9007, "step": 3768 }, { "epoch": 1.09, "learning_rate": 9.021287281937966e-06, "loss": 1.9697, "step": 3770 }, { "epoch": 1.09, "learning_rate": 9.011952061114867e-06, "loss": 1.863, "step": 3772 }, { "epoch": 1.09, "learning_rate": 9.002617709747417e-06, "loss": 1.8501, "step": 3774 }, { "epoch": 1.09, "learning_rate": 8.993284236049597e-06, "loss": 1.9295, "step": 3776 }, { "epoch": 1.09, "learning_rate": 8.983951648234616e-06, "loss": 1.8398, "step": 3778 }, { "epoch": 1.09, "learning_rate": 8.974619954514895e-06, "loss": 1.9025, "step": 3780 }, { "epoch": 1.1, "learning_rate": 8.96528916310208e-06, "loss": 1.796, "step": 3782 }, { "epoch": 1.1, "learning_rate": 8.955959282207014e-06, "loss": 1.8343, "step": 3784 }, { "epoch": 1.1, "learning_rate": 8.946630320039738e-06, "loss": 1.7843, "step": 3786 }, { "epoch": 1.1, "learning_rate": 8.937302284809494e-06, "loss": 1.9336, "step": 3788 }, { "epoch": 1.1, "learning_rate": 8.927975184724704e-06, "loss": 1.8519, "step": 3790 }, { "epoch": 1.1, "learning_rate": 8.918649027992959e-06, "loss": 1.8276, "step": 3792 }, { "epoch": 1.1, "learning_rate": 8.909323822821031e-06, "loss": 1.8707, "step": 3794 }, { "epoch": 1.1, "learning_rate": 8.899999577414845e-06, "loss": 1.9015, "step": 3796 }, { "epoch": 1.1, "learning_rate": 8.890676299979492e-06, "loss": 1.8709, "step": 3798 }, { "epoch": 1.1, "learning_rate": 8.881353998719207e-06, "loss": 1.8555, "step": 3800 }, { "epoch": 1.1, "learning_rate": 8.872032681837358e-06, "loss": 1.8683, "step": 3802 }, { "epoch": 1.1, "learning_rate": 8.862712357536454e-06, "loss": 1.8833, "step": 3804 }, { "epoch": 1.1, "learning_rate": 8.85339303401814e-06, "loss": 1.8996, "step": 3806 }, { "epoch": 1.1, "learning_rate": 8.844074719483161e-06, "loss": 1.861, "step": 3808 }, { "epoch": 1.1, "learning_rate": 8.834757422131386e-06, "loss": 1.9126, "step": 3810 }, { "epoch": 1.1, "learning_rate": 8.825441150161788e-06, "loss": 1.8497, "step": 3812 }, { "epoch": 1.1, "learning_rate": 8.816125911772433e-06, "loss": 1.839, "step": 3814 }, { "epoch": 1.11, "learning_rate": 8.806811715160485e-06, "loss": 1.8571, "step": 3816 }, { "epoch": 1.11, "learning_rate": 8.797498568522183e-06, "loss": 1.8502, "step": 3818 }, { "epoch": 1.11, "learning_rate": 8.788186480052843e-06, "loss": 1.9115, "step": 3820 }, { "epoch": 1.11, "learning_rate": 8.778875457946865e-06, "loss": 1.8831, "step": 3822 }, { "epoch": 1.11, "learning_rate": 8.769565510397684e-06, "loss": 1.8182, "step": 3824 }, { "epoch": 1.11, "learning_rate": 8.760256645597814e-06, "loss": 1.9061, "step": 3826 }, { "epoch": 1.11, "learning_rate": 8.750948871738802e-06, "loss": 1.8385, "step": 3828 }, { "epoch": 1.11, "learning_rate": 8.741642197011235e-06, "loss": 1.8749, "step": 3830 }, { "epoch": 1.11, "learning_rate": 8.732336629604746e-06, "loss": 1.78, "step": 3832 }, { "epoch": 1.11, "learning_rate": 8.723032177707978e-06, "loss": 1.9931, "step": 3834 }, { "epoch": 1.11, "learning_rate": 8.713728849508602e-06, "loss": 1.8977, "step": 3836 }, { "epoch": 1.11, "learning_rate": 8.70442665319329e-06, "loss": 1.8488, "step": 3838 }, { "epoch": 1.11, "learning_rate": 8.695125596947736e-06, "loss": 1.9546, "step": 3840 }, { "epoch": 1.11, "learning_rate": 8.685825688956614e-06, "loss": 1.8658, "step": 3842 }, { "epoch": 1.11, "learning_rate": 8.67652693740359e-06, "loss": 1.8814, "step": 3844 }, { "epoch": 1.11, "learning_rate": 8.667229350471322e-06, "loss": 1.896, "step": 3846 }, { "epoch": 1.11, "learning_rate": 8.65793293634143e-06, "loss": 1.8368, "step": 3848 }, { "epoch": 1.11, "learning_rate": 8.648637703194515e-06, "loss": 1.8357, "step": 3850 }, { "epoch": 1.12, "learning_rate": 8.639343659210132e-06, "loss": 1.9066, "step": 3852 }, { "epoch": 1.12, "learning_rate": 8.63005081256678e-06, "loss": 1.9036, "step": 3854 }, { "epoch": 1.12, "learning_rate": 8.620759171441935e-06, "loss": 1.854, "step": 3856 }, { "epoch": 1.12, "learning_rate": 8.61146874401197e-06, "loss": 1.8337, "step": 3858 }, { "epoch": 1.12, "learning_rate": 8.602179538452224e-06, "loss": 1.8763, "step": 3860 }, { "epoch": 1.12, "learning_rate": 8.592891562936947e-06, "loss": 1.8127, "step": 3862 }, { "epoch": 1.12, "learning_rate": 8.583604825639302e-06, "loss": 1.9282, "step": 3864 }, { "epoch": 1.12, "learning_rate": 8.57431933473138e-06, "loss": 1.9205, "step": 3866 }, { "epoch": 1.12, "learning_rate": 8.565035098384148e-06, "loss": 1.815, "step": 3868 }, { "epoch": 1.12, "learning_rate": 8.555752124767498e-06, "loss": 1.9006, "step": 3870 }, { "epoch": 1.12, "learning_rate": 8.546470422050193e-06, "loss": 1.8515, "step": 3872 }, { "epoch": 1.12, "learning_rate": 8.53718999839988e-06, "loss": 1.8957, "step": 3874 }, { "epoch": 1.12, "learning_rate": 8.527910861983085e-06, "loss": 1.7864, "step": 3876 }, { "epoch": 1.12, "learning_rate": 8.518633020965199e-06, "loss": 1.8931, "step": 3878 }, { "epoch": 1.12, "learning_rate": 8.509356483510472e-06, "loss": 1.855, "step": 3880 }, { "epoch": 1.12, "learning_rate": 8.500081257782004e-06, "loss": 1.8623, "step": 3882 }, { "epoch": 1.12, "learning_rate": 8.490807351941753e-06, "loss": 1.8511, "step": 3884 }, { "epoch": 1.13, "learning_rate": 8.481534774150502e-06, "loss": 1.8626, "step": 3886 }, { "epoch": 1.13, "learning_rate": 8.472263532567872e-06, "loss": 1.8619, "step": 3888 }, { "epoch": 1.13, "learning_rate": 8.462993635352303e-06, "loss": 1.8555, "step": 3890 }, { "epoch": 1.13, "learning_rate": 8.453725090661059e-06, "loss": 1.8693, "step": 3892 }, { "epoch": 1.13, "learning_rate": 8.44445790665021e-06, "loss": 1.8888, "step": 3894 }, { "epoch": 1.13, "learning_rate": 8.435192091474628e-06, "loss": 1.7862, "step": 3896 }, { "epoch": 1.13, "learning_rate": 8.425927653287981e-06, "loss": 1.888, "step": 3898 }, { "epoch": 1.13, "learning_rate": 8.416664600242734e-06, "loss": 1.9538, "step": 3900 }, { "epoch": 1.13, "learning_rate": 8.40740294049011e-06, "loss": 1.8395, "step": 3902 }, { "epoch": 1.13, "learning_rate": 8.398142682180132e-06, "loss": 1.8668, "step": 3904 }, { "epoch": 1.13, "learning_rate": 8.388883833461577e-06, "loss": 1.8593, "step": 3906 }, { "epoch": 1.13, "learning_rate": 8.379626402481977e-06, "loss": 1.7894, "step": 3908 }, { "epoch": 1.13, "learning_rate": 8.37037039738763e-06, "loss": 1.8434, "step": 3910 }, { "epoch": 1.13, "learning_rate": 8.361115826323572e-06, "loss": 1.763, "step": 3912 }, { "epoch": 1.13, "learning_rate": 8.351862697433573e-06, "loss": 1.8845, "step": 3914 }, { "epoch": 1.13, "learning_rate": 8.34261101886014e-06, "loss": 1.8458, "step": 3916 }, { "epoch": 1.13, "learning_rate": 8.333360798744496e-06, "loss": 1.9263, "step": 3918 }, { "epoch": 1.14, "learning_rate": 8.324112045226594e-06, "loss": 1.8637, "step": 3920 }, { "epoch": 1.14, "learning_rate": 8.314864766445088e-06, "loss": 1.8701, "step": 3922 }, { "epoch": 1.14, "learning_rate": 8.30561897053733e-06, "loss": 1.8323, "step": 3924 }, { "epoch": 1.14, "learning_rate": 8.296374665639374e-06, "loss": 1.8901, "step": 3926 }, { "epoch": 1.14, "learning_rate": 8.287131859885965e-06, "loss": 1.8539, "step": 3928 }, { "epoch": 1.14, "learning_rate": 8.277890561410517e-06, "loss": 1.876, "step": 3930 }, { "epoch": 1.14, "learning_rate": 8.268650778345127e-06, "loss": 1.83, "step": 3932 }, { "epoch": 1.14, "learning_rate": 8.25941251882056e-06, "loss": 1.9112, "step": 3934 }, { "epoch": 1.14, "learning_rate": 8.250175790966228e-06, "loss": 1.9087, "step": 3936 }, { "epoch": 1.14, "learning_rate": 8.240940602910212e-06, "loss": 1.9338, "step": 3938 }, { "epoch": 1.14, "learning_rate": 8.231706962779223e-06, "loss": 1.8468, "step": 3940 }, { "epoch": 1.14, "learning_rate": 8.222474878698617e-06, "loss": 1.874, "step": 3942 }, { "epoch": 1.14, "learning_rate": 8.213244358792389e-06, "loss": 1.866, "step": 3944 }, { "epoch": 1.14, "learning_rate": 8.204015411183134e-06, "loss": 1.879, "step": 3946 }, { "epoch": 1.14, "learning_rate": 8.19478804399209e-06, "loss": 1.906, "step": 3948 }, { "epoch": 1.14, "learning_rate": 8.185562265339087e-06, "loss": 1.8017, "step": 3950 }, { "epoch": 1.14, "learning_rate": 8.17633808334256e-06, "loss": 1.9577, "step": 3952 }, { "epoch": 1.15, "learning_rate": 8.16711550611955e-06, "loss": 1.8804, "step": 3954 }, { "epoch": 1.15, "learning_rate": 8.157894541785672e-06, "loss": 1.8612, "step": 3956 }, { "epoch": 1.15, "learning_rate": 8.148675198455124e-06, "loss": 1.8441, "step": 3958 }, { "epoch": 1.15, "learning_rate": 8.139457484240687e-06, "loss": 1.7868, "step": 3960 }, { "epoch": 1.15, "learning_rate": 8.130241407253694e-06, "loss": 1.8622, "step": 3962 }, { "epoch": 1.15, "learning_rate": 8.121026975604051e-06, "loss": 1.9043, "step": 3964 }, { "epoch": 1.15, "learning_rate": 8.111814197400213e-06, "loss": 1.8585, "step": 3966 }, { "epoch": 1.15, "learning_rate": 8.102603080749171e-06, "loss": 1.8691, "step": 3968 }, { "epoch": 1.15, "learning_rate": 8.09339363375646e-06, "loss": 1.8442, "step": 3970 }, { "epoch": 1.15, "learning_rate": 8.084185864526153e-06, "loss": 1.8737, "step": 3972 }, { "epoch": 1.15, "learning_rate": 8.074979781160835e-06, "loss": 1.8284, "step": 3974 }, { "epoch": 1.15, "learning_rate": 8.065775391761611e-06, "loss": 1.8608, "step": 3976 }, { "epoch": 1.15, "learning_rate": 8.056572704428103e-06, "loss": 1.8451, "step": 3978 }, { "epoch": 1.15, "learning_rate": 8.04737172725842e-06, "loss": 1.9054, "step": 3980 }, { "epoch": 1.15, "learning_rate": 8.038172468349178e-06, "loss": 1.8424, "step": 3982 }, { "epoch": 1.15, "learning_rate": 8.02897493579548e-06, "loss": 1.8739, "step": 3984 }, { "epoch": 1.15, "learning_rate": 8.019779137690906e-06, "loss": 1.8487, "step": 3986 }, { "epoch": 1.15, "learning_rate": 8.010585082127513e-06, "loss": 1.8487, "step": 3988 }, { "epoch": 1.16, "learning_rate": 8.001392777195822e-06, "loss": 1.9227, "step": 3990 }, { "epoch": 1.16, "learning_rate": 7.99220223098481e-06, "loss": 1.8733, "step": 3992 }, { "epoch": 1.16, "learning_rate": 7.98301345158192e-06, "loss": 1.8971, "step": 3994 }, { "epoch": 1.16, "learning_rate": 7.973826447073022e-06, "loss": 1.8184, "step": 3996 }, { "epoch": 1.16, "learning_rate": 7.964641225542438e-06, "loss": 1.85, "step": 3998 }, { "epoch": 1.16, "learning_rate": 7.955457795072917e-06, "loss": 1.8748, "step": 4000 }, { "epoch": 1.16, "learning_rate": 7.94627616374563e-06, "loss": 1.902, "step": 4002 }, { "epoch": 1.16, "learning_rate": 7.93709633964016e-06, "loss": 1.9255, "step": 4004 }, { "epoch": 1.16, "learning_rate": 7.92791833083452e-06, "loss": 1.848, "step": 4006 }, { "epoch": 1.16, "learning_rate": 7.918742145405099e-06, "loss": 1.8807, "step": 4008 }, { "epoch": 1.16, "learning_rate": 7.909567791426697e-06, "loss": 1.9259, "step": 4010 }, { "epoch": 1.16, "learning_rate": 7.9003952769725e-06, "loss": 1.9212, "step": 4012 }, { "epoch": 1.16, "learning_rate": 7.891224610114072e-06, "loss": 1.8852, "step": 4014 }, { "epoch": 1.16, "learning_rate": 7.882055798921358e-06, "loss": 1.8676, "step": 4016 }, { "epoch": 1.16, "learning_rate": 7.872888851462663e-06, "loss": 1.8303, "step": 4018 }, { "epoch": 1.16, "learning_rate": 7.863723775804651e-06, "loss": 1.8096, "step": 4020 }, { "epoch": 1.16, "learning_rate": 7.854560580012351e-06, "loss": 1.808, "step": 4022 }, { "epoch": 1.17, "learning_rate": 7.845399272149123e-06, "loss": 1.7862, "step": 4024 }, { "epoch": 1.17, "learning_rate": 7.836239860276671e-06, "loss": 1.9057, "step": 4026 }, { "epoch": 1.17, "learning_rate": 7.827082352455038e-06, "loss": 1.8498, "step": 4028 }, { "epoch": 1.17, "learning_rate": 7.817926756742577e-06, "loss": 1.7707, "step": 4030 }, { "epoch": 1.17, "learning_rate": 7.808773081195974e-06, "loss": 1.8126, "step": 4032 }, { "epoch": 1.17, "learning_rate": 7.79962133387021e-06, "loss": 1.876, "step": 4034 }, { "epoch": 1.17, "learning_rate": 7.79047152281858e-06, "loss": 1.8454, "step": 4036 }, { "epoch": 1.17, "learning_rate": 7.781323656092677e-06, "loss": 1.8572, "step": 4038 }, { "epoch": 1.17, "learning_rate": 7.772177741742368e-06, "loss": 1.8203, "step": 4040 }, { "epoch": 1.17, "learning_rate": 7.76303378781582e-06, "loss": 1.8154, "step": 4042 }, { "epoch": 1.17, "learning_rate": 7.753891802359466e-06, "loss": 1.8494, "step": 4044 }, { "epoch": 1.17, "learning_rate": 7.744751793418005e-06, "loss": 1.7966, "step": 4046 }, { "epoch": 1.17, "learning_rate": 7.7356137690344e-06, "loss": 1.8256, "step": 4048 }, { "epoch": 1.17, "learning_rate": 7.72647773724987e-06, "loss": 1.9322, "step": 4050 }, { "epoch": 1.17, "learning_rate": 7.717343706103872e-06, "loss": 1.8329, "step": 4052 }, { "epoch": 1.17, "learning_rate": 7.708211683634112e-06, "loss": 1.7868, "step": 4054 }, { "epoch": 1.17, "learning_rate": 7.699081677876526e-06, "loss": 1.8211, "step": 4056 }, { "epoch": 1.18, "learning_rate": 7.68995369686527e-06, "loss": 1.8894, "step": 4058 }, { "epoch": 1.18, "learning_rate": 7.680827748632728e-06, "loss": 1.9159, "step": 4060 }, { "epoch": 1.18, "learning_rate": 7.671703841209482e-06, "loss": 1.805, "step": 4062 }, { "epoch": 1.18, "learning_rate": 7.662581982624326e-06, "loss": 1.9556, "step": 4064 }, { "epoch": 1.18, "learning_rate": 7.653462180904259e-06, "loss": 1.9501, "step": 4066 }, { "epoch": 1.18, "learning_rate": 7.644344444074452e-06, "loss": 1.7819, "step": 4068 }, { "epoch": 1.18, "learning_rate": 7.635228780158274e-06, "loss": 1.8371, "step": 4070 }, { "epoch": 1.18, "learning_rate": 7.626115197177266e-06, "loss": 1.8638, "step": 4072 }, { "epoch": 1.18, "learning_rate": 7.617003703151129e-06, "loss": 1.8107, "step": 4074 }, { "epoch": 1.18, "learning_rate": 7.607894306097738e-06, "loss": 1.7774, "step": 4076 }, { "epoch": 1.18, "learning_rate": 7.5987870140331195e-06, "loss": 1.8929, "step": 4078 }, { "epoch": 1.18, "learning_rate": 7.589681834971443e-06, "loss": 1.7603, "step": 4080 }, { "epoch": 1.18, "learning_rate": 7.580578776925023e-06, "loss": 1.8858, "step": 4082 }, { "epoch": 1.18, "learning_rate": 7.571477847904301e-06, "loss": 1.7761, "step": 4084 }, { "epoch": 1.18, "learning_rate": 7.5623790559178565e-06, "loss": 1.856, "step": 4086 }, { "epoch": 1.18, "learning_rate": 7.553282408972382e-06, "loss": 1.8772, "step": 4088 }, { "epoch": 1.18, "learning_rate": 7.544187915072676e-06, "loss": 1.8019, "step": 4090 }, { "epoch": 1.19, "learning_rate": 7.535095582221656e-06, "loss": 1.8706, "step": 4092 }, { "epoch": 1.19, "learning_rate": 7.526005418420331e-06, "loss": 1.9007, "step": 4094 }, { "epoch": 1.19, "learning_rate": 7.516917431667795e-06, "loss": 2.0064, "step": 4096 }, { "epoch": 1.19, "learning_rate": 7.507831629961234e-06, "loss": 1.8621, "step": 4098 }, { "epoch": 1.19, "learning_rate": 7.498748021295914e-06, "loss": 1.8124, "step": 4100 }, { "epoch": 1.19, "learning_rate": 7.489666613665163e-06, "loss": 1.8084, "step": 4102 }, { "epoch": 1.19, "learning_rate": 7.480587415060379e-06, "loss": 1.844, "step": 4104 }, { "epoch": 1.19, "learning_rate": 7.471510433471005e-06, "loss": 1.8879, "step": 4106 }, { "epoch": 1.19, "learning_rate": 7.462435676884547e-06, "loss": 1.882, "step": 4108 }, { "epoch": 1.19, "learning_rate": 7.4533631532865505e-06, "loss": 1.7715, "step": 4110 }, { "epoch": 1.19, "learning_rate": 7.4442928706605855e-06, "loss": 1.8891, "step": 4112 }, { "epoch": 1.19, "learning_rate": 7.43522483698826e-06, "loss": 1.8207, "step": 4114 }, { "epoch": 1.19, "learning_rate": 7.426159060249205e-06, "loss": 1.9177, "step": 4116 }, { "epoch": 1.19, "learning_rate": 7.417095548421051e-06, "loss": 1.8655, "step": 4118 }, { "epoch": 1.19, "learning_rate": 7.40803430947945e-06, "loss": 1.9036, "step": 4120 }, { "epoch": 1.19, "learning_rate": 7.398975351398054e-06, "loss": 1.8158, "step": 4122 }, { "epoch": 1.19, "learning_rate": 7.389918682148492e-06, "loss": 1.8805, "step": 4124 }, { "epoch": 1.19, "learning_rate": 7.380864309700396e-06, "loss": 1.8294, "step": 4126 }, { "epoch": 1.2, "learning_rate": 7.3718122420213745e-06, "loss": 1.9281, "step": 4128 }, { "epoch": 1.2, "learning_rate": 7.362762487076995e-06, "loss": 1.8852, "step": 4130 }, { "epoch": 1.2, "learning_rate": 7.3537150528308076e-06, "loss": 1.8023, "step": 4132 }, { "epoch": 1.2, "learning_rate": 7.344669947244303e-06, "loss": 1.7998, "step": 4134 }, { "epoch": 1.2, "learning_rate": 7.335627178276936e-06, "loss": 1.8682, "step": 4136 }, { "epoch": 1.2, "learning_rate": 7.326586753886102e-06, "loss": 1.8264, "step": 4138 }, { "epoch": 1.2, "learning_rate": 7.317548682027128e-06, "loss": 1.8193, "step": 4140 }, { "epoch": 1.2, "learning_rate": 7.308512970653274e-06, "loss": 1.8863, "step": 4142 }, { "epoch": 1.2, "learning_rate": 7.29947962771573e-06, "loss": 1.8779, "step": 4144 }, { "epoch": 1.2, "learning_rate": 7.290448661163587e-06, "loss": 1.833, "step": 4146 }, { "epoch": 1.2, "learning_rate": 7.281420078943856e-06, "loss": 1.8425, "step": 4148 }, { "epoch": 1.2, "learning_rate": 7.272393889001454e-06, "loss": 1.7858, "step": 4150 }, { "epoch": 1.2, "learning_rate": 7.263370099279173e-06, "loss": 1.8364, "step": 4152 }, { "epoch": 1.2, "learning_rate": 7.2543487177177145e-06, "loss": 1.8445, "step": 4154 }, { "epoch": 1.2, "learning_rate": 7.2453297522556475e-06, "loss": 1.8255, "step": 4156 }, { "epoch": 1.2, "learning_rate": 7.23631321082942e-06, "loss": 1.8634, "step": 4158 }, { "epoch": 1.2, "learning_rate": 7.227299101373353e-06, "loss": 1.8208, "step": 4160 }, { "epoch": 1.21, "learning_rate": 7.218287431819611e-06, "loss": 1.9443, "step": 4162 }, { "epoch": 1.21, "learning_rate": 7.2092782100982254e-06, "loss": 1.8503, "step": 4164 }, { "epoch": 1.21, "learning_rate": 7.2002714441370725e-06, "loss": 1.8886, "step": 4166 }, { "epoch": 1.21, "learning_rate": 7.191267141861857e-06, "loss": 1.8634, "step": 4168 }, { "epoch": 1.21, "learning_rate": 7.18226531119613e-06, "loss": 1.8512, "step": 4170 }, { "epoch": 1.21, "learning_rate": 7.1732659600612606e-06, "loss": 1.9031, "step": 4172 }, { "epoch": 1.21, "learning_rate": 7.1642690963764315e-06, "loss": 1.8531, "step": 4174 }, { "epoch": 1.21, "learning_rate": 7.155274728058646e-06, "loss": 1.82, "step": 4176 }, { "epoch": 1.21, "learning_rate": 7.146282863022699e-06, "loss": 1.8154, "step": 4178 }, { "epoch": 1.21, "learning_rate": 7.137293509181198e-06, "loss": 1.845, "step": 4180 }, { "epoch": 1.21, "learning_rate": 7.128306674444532e-06, "loss": 1.8577, "step": 4182 }, { "epoch": 1.21, "learning_rate": 7.119322366720871e-06, "loss": 1.7934, "step": 4184 }, { "epoch": 1.21, "learning_rate": 7.110340593916164e-06, "loss": 1.8266, "step": 4186 }, { "epoch": 1.21, "learning_rate": 7.101361363934136e-06, "loss": 1.8161, "step": 4188 }, { "epoch": 1.21, "learning_rate": 7.092384684676263e-06, "loss": 1.9875, "step": 4190 }, { "epoch": 1.21, "learning_rate": 7.0834105640417795e-06, "loss": 1.8514, "step": 4192 }, { "epoch": 1.21, "learning_rate": 7.0744390099276806e-06, "loss": 1.8243, "step": 4194 }, { "epoch": 1.22, "learning_rate": 7.06547003022868e-06, "loss": 1.9266, "step": 4196 }, { "epoch": 1.22, "learning_rate": 7.056503632837247e-06, "loss": 1.7795, "step": 4198 }, { "epoch": 1.22, "learning_rate": 7.047539825643572e-06, "loss": 1.795, "step": 4200 }, { "epoch": 1.22, "learning_rate": 7.038578616535556e-06, "loss": 1.8856, "step": 4202 }, { "epoch": 1.22, "learning_rate": 7.0296200133988365e-06, "loss": 1.9069, "step": 4204 }, { "epoch": 1.22, "learning_rate": 7.0206640241167266e-06, "loss": 1.8491, "step": 4206 }, { "epoch": 1.22, "learning_rate": 7.011710656570267e-06, "loss": 1.782, "step": 4208 }, { "epoch": 1.22, "learning_rate": 7.002759918638182e-06, "loss": 1.8662, "step": 4210 }, { "epoch": 1.22, "learning_rate": 6.993811818196872e-06, "loss": 1.8605, "step": 4212 }, { "epoch": 1.22, "learning_rate": 6.984866363120433e-06, "loss": 1.8423, "step": 4214 }, { "epoch": 1.22, "learning_rate": 6.975923561280624e-06, "loss": 1.8466, "step": 4216 }, { "epoch": 1.22, "learning_rate": 6.9669834205468664e-06, "loss": 1.8634, "step": 4218 }, { "epoch": 1.22, "learning_rate": 6.958045948786245e-06, "loss": 1.839, "step": 4220 }, { "epoch": 1.22, "learning_rate": 6.9491111538635e-06, "loss": 1.8284, "step": 4222 }, { "epoch": 1.22, "learning_rate": 6.940179043641005e-06, "loss": 1.8049, "step": 4224 }, { "epoch": 1.22, "learning_rate": 6.931249625978782e-06, "loss": 1.842, "step": 4226 }, { "epoch": 1.22, "learning_rate": 6.922322908734474e-06, "loss": 1.8196, "step": 4228 }, { "epoch": 1.23, "learning_rate": 6.913398899763353e-06, "loss": 1.8584, "step": 4230 }, { "epoch": 1.23, "learning_rate": 6.904477606918314e-06, "loss": 1.9206, "step": 4232 }, { "epoch": 1.23, "learning_rate": 6.895559038049846e-06, "loss": 1.7852, "step": 4234 }, { "epoch": 1.23, "learning_rate": 6.886643201006053e-06, "loss": 1.8948, "step": 4236 }, { "epoch": 1.23, "learning_rate": 6.877730103632639e-06, "loss": 1.8883, "step": 4238 }, { "epoch": 1.23, "learning_rate": 6.868819753772879e-06, "loss": 1.8834, "step": 4240 }, { "epoch": 1.23, "learning_rate": 6.859912159267648e-06, "loss": 1.8056, "step": 4242 }, { "epoch": 1.23, "learning_rate": 6.851007327955392e-06, "loss": 1.8051, "step": 4244 }, { "epoch": 1.23, "learning_rate": 6.842105267672117e-06, "loss": 1.9082, "step": 4246 }, { "epoch": 1.23, "learning_rate": 6.833205986251404e-06, "loss": 1.8168, "step": 4248 }, { "epoch": 1.23, "learning_rate": 6.824309491524377e-06, "loss": 1.8254, "step": 4250 }, { "epoch": 1.23, "learning_rate": 6.815415791319713e-06, "loss": 1.8147, "step": 4252 }, { "epoch": 1.23, "learning_rate": 6.806524893463631e-06, "loss": 1.9192, "step": 4254 }, { "epoch": 1.23, "learning_rate": 6.797636805779879e-06, "loss": 1.8637, "step": 4256 }, { "epoch": 1.23, "learning_rate": 6.788751536089739e-06, "loss": 1.9016, "step": 4258 }, { "epoch": 1.23, "learning_rate": 6.77986909221201e-06, "loss": 1.8474, "step": 4260 }, { "epoch": 1.23, "learning_rate": 6.770989481963001e-06, "loss": 1.8015, "step": 4262 }, { "epoch": 1.23, "learning_rate": 6.76211271315653e-06, "loss": 1.8725, "step": 4264 }, { "epoch": 1.24, "learning_rate": 6.753238793603922e-06, "loss": 1.9194, "step": 4266 }, { "epoch": 1.24, "learning_rate": 6.744367731113981e-06, "loss": 1.8647, "step": 4268 }, { "epoch": 1.24, "learning_rate": 6.735499533493003e-06, "loss": 1.9394, "step": 4270 }, { "epoch": 1.24, "learning_rate": 6.726634208544773e-06, "loss": 1.8945, "step": 4272 }, { "epoch": 1.24, "learning_rate": 6.717771764070527e-06, "loss": 1.8583, "step": 4274 }, { "epoch": 1.24, "learning_rate": 6.708912207868987e-06, "loss": 1.8555, "step": 4276 }, { "epoch": 1.24, "learning_rate": 6.700055547736321e-06, "loss": 1.8682, "step": 4278 }, { "epoch": 1.24, "learning_rate": 6.691201791466148e-06, "loss": 1.9106, "step": 4280 }, { "epoch": 1.24, "learning_rate": 6.682350946849546e-06, "loss": 1.8561, "step": 4282 }, { "epoch": 1.24, "learning_rate": 6.673503021675012e-06, "loss": 1.8179, "step": 4284 }, { "epoch": 1.24, "learning_rate": 6.664658023728485e-06, "loss": 1.8608, "step": 4286 }, { "epoch": 1.24, "learning_rate": 6.655815960793328e-06, "loss": 1.8553, "step": 4288 }, { "epoch": 1.24, "learning_rate": 6.646976840650312e-06, "loss": 1.788, "step": 4290 }, { "epoch": 1.24, "learning_rate": 6.638140671077633e-06, "loss": 1.8884, "step": 4292 }, { "epoch": 1.24, "learning_rate": 6.629307459850881e-06, "loss": 1.8631, "step": 4294 }, { "epoch": 1.24, "learning_rate": 6.620477214743042e-06, "loss": 1.7113, "step": 4296 }, { "epoch": 1.24, "learning_rate": 6.611649943524499e-06, "loss": 1.8712, "step": 4298 }, { "epoch": 1.25, "learning_rate": 6.6028256539630065e-06, "loss": 1.9877, "step": 4300 }, { "epoch": 1.25, "learning_rate": 6.594004353823709e-06, "loss": 1.8343, "step": 4302 }, { "epoch": 1.25, "learning_rate": 6.585186050869113e-06, "loss": 1.8212, "step": 4304 }, { "epoch": 1.25, "learning_rate": 6.5763707528590846e-06, "loss": 1.8561, "step": 4306 }, { "epoch": 1.25, "learning_rate": 6.56755846755085e-06, "loss": 1.831, "step": 4308 }, { "epoch": 1.25, "learning_rate": 6.558749202698988e-06, "loss": 1.8164, "step": 4310 }, { "epoch": 1.25, "learning_rate": 6.54994296605541e-06, "loss": 1.8104, "step": 4312 }, { "epoch": 1.25, "learning_rate": 6.541139765369368e-06, "loss": 1.8236, "step": 4314 }, { "epoch": 1.25, "learning_rate": 6.5323396083874505e-06, "loss": 1.8528, "step": 4316 }, { "epoch": 1.25, "learning_rate": 6.523542502853548e-06, "loss": 1.8057, "step": 4318 }, { "epoch": 1.25, "learning_rate": 6.514748456508885e-06, "loss": 1.8733, "step": 4320 }, { "epoch": 1.25, "learning_rate": 6.505957477091981e-06, "loss": 1.7709, "step": 4322 }, { "epoch": 1.25, "learning_rate": 6.497169572338662e-06, "loss": 1.818, "step": 4324 }, { "epoch": 1.25, "learning_rate": 6.488384749982054e-06, "loss": 1.8176, "step": 4326 }, { "epoch": 1.25, "learning_rate": 6.479603017752559e-06, "loss": 1.8369, "step": 4328 }, { "epoch": 1.25, "learning_rate": 6.470824383377867e-06, "loss": 1.8545, "step": 4330 }, { "epoch": 1.25, "learning_rate": 6.462048854582944e-06, "loss": 1.9237, "step": 4332 }, { "epoch": 1.26, "learning_rate": 6.453276439090012e-06, "loss": 1.8021, "step": 4334 }, { "epoch": 1.26, "learning_rate": 6.444507144618567e-06, "loss": 1.7937, "step": 4336 }, { "epoch": 1.26, "learning_rate": 6.435740978885354e-06, "loss": 1.7941, "step": 4338 }, { "epoch": 1.26, "learning_rate": 6.426977949604358e-06, "loss": 1.9108, "step": 4340 }, { "epoch": 1.26, "learning_rate": 6.4182180644868094e-06, "loss": 1.8568, "step": 4342 }, { "epoch": 1.26, "learning_rate": 6.409461331241181e-06, "loss": 1.8606, "step": 4344 }, { "epoch": 1.26, "learning_rate": 6.400707757573152e-06, "loss": 1.9083, "step": 4346 }, { "epoch": 1.26, "learning_rate": 6.39195735118564e-06, "loss": 1.9421, "step": 4348 }, { "epoch": 1.26, "learning_rate": 6.38321011977876e-06, "loss": 1.8829, "step": 4350 }, { "epoch": 1.26, "learning_rate": 6.374466071049847e-06, "loss": 1.8611, "step": 4352 }, { "epoch": 1.26, "learning_rate": 6.365725212693429e-06, "loss": 1.9434, "step": 4354 }, { "epoch": 1.26, "learning_rate": 6.356987552401223e-06, "loss": 1.8565, "step": 4356 }, { "epoch": 1.26, "learning_rate": 6.348253097862134e-06, "loss": 1.8576, "step": 4358 }, { "epoch": 1.26, "learning_rate": 6.339521856762254e-06, "loss": 1.8413, "step": 4360 }, { "epoch": 1.26, "learning_rate": 6.330793836784837e-06, "loss": 1.8909, "step": 4362 }, { "epoch": 1.26, "learning_rate": 6.322069045610305e-06, "loss": 1.9115, "step": 4364 }, { "epoch": 1.26, "learning_rate": 6.313347490916244e-06, "loss": 1.9097, "step": 4366 }, { "epoch": 1.26, "learning_rate": 6.30462918037738e-06, "loss": 1.8261, "step": 4368 }, { "epoch": 1.27, "learning_rate": 6.295914121665603e-06, "loss": 1.8183, "step": 4370 }, { "epoch": 1.27, "learning_rate": 6.287202322449922e-06, "loss": 1.8784, "step": 4372 }, { "epoch": 1.27, "learning_rate": 6.278493790396489e-06, "loss": 1.9024, "step": 4374 }, { "epoch": 1.27, "learning_rate": 6.269788533168582e-06, "loss": 1.8445, "step": 4376 }, { "epoch": 1.27, "learning_rate": 6.261086558426586e-06, "loss": 1.7719, "step": 4378 }, { "epoch": 1.27, "learning_rate": 6.2523878738280094e-06, "loss": 1.8415, "step": 4380 }, { "epoch": 1.27, "learning_rate": 6.243692487027464e-06, "loss": 1.9056, "step": 4382 }, { "epoch": 1.27, "learning_rate": 6.23500040567665e-06, "loss": 1.8552, "step": 4384 }, { "epoch": 1.27, "learning_rate": 6.22631163742437e-06, "loss": 1.8744, "step": 4386 }, { "epoch": 1.27, "learning_rate": 6.21762618991651e-06, "loss": 1.8766, "step": 4388 }, { "epoch": 1.27, "learning_rate": 6.208944070796023e-06, "loss": 1.7798, "step": 4390 }, { "epoch": 1.27, "learning_rate": 6.200265287702942e-06, "loss": 1.8106, "step": 4392 }, { "epoch": 1.27, "learning_rate": 6.191589848274369e-06, "loss": 1.8748, "step": 4394 }, { "epoch": 1.27, "learning_rate": 6.182917760144454e-06, "loss": 1.8433, "step": 4396 }, { "epoch": 1.27, "learning_rate": 6.1742490309444005e-06, "loss": 1.8979, "step": 4398 }, { "epoch": 1.27, "learning_rate": 6.165583668302457e-06, "loss": 1.8429, "step": 4400 }, { "epoch": 1.27, "learning_rate": 6.156921679843911e-06, "loss": 1.8654, "step": 4402 }, { "epoch": 1.28, "learning_rate": 6.148263073191083e-06, "loss": 2.0143, "step": 4404 }, { "epoch": 1.28, "learning_rate": 6.13960785596331e-06, "loss": 1.7783, "step": 4406 }, { "epoch": 1.28, "learning_rate": 6.130956035776951e-06, "loss": 1.8343, "step": 4408 }, { "epoch": 1.28, "learning_rate": 6.12230762024538e-06, "loss": 1.8569, "step": 4410 }, { "epoch": 1.28, "learning_rate": 6.113662616978961e-06, "loss": 1.7742, "step": 4412 }, { "epoch": 1.28, "learning_rate": 6.109341397322611e-06, "loss": 1.8934, "step": 4414 }, { "epoch": 1.28, "learning_rate": 6.10070152671681e-06, "loss": 1.8192, "step": 4416 }, { "epoch": 1.28, "learning_rate": 6.092065087388953e-06, "loss": 1.8519, "step": 4418 }, { "epoch": 1.28, "learning_rate": 6.08343208693887e-06, "loss": 1.9208, "step": 4420 }, { "epoch": 1.28, "learning_rate": 6.074802532963369e-06, "loss": 1.8049, "step": 4422 }, { "epoch": 1.28, "learning_rate": 6.066176433056228e-06, "loss": 1.838, "step": 4424 }, { "epoch": 1.28, "learning_rate": 6.05755379480818e-06, "loss": 1.8266, "step": 4426 }, { "epoch": 1.28, "learning_rate": 6.048934625806918e-06, "loss": 1.8739, "step": 4428 }, { "epoch": 1.28, "learning_rate": 6.040318933637082e-06, "loss": 1.8105, "step": 4430 }, { "epoch": 1.28, "learning_rate": 6.031706725880243e-06, "loss": 1.8149, "step": 4432 }, { "epoch": 1.28, "learning_rate": 6.023098010114914e-06, "loss": 1.8684, "step": 4434 }, { "epoch": 1.28, "learning_rate": 6.01449279391654e-06, "loss": 1.8255, "step": 4436 }, { "epoch": 1.29, "learning_rate": 6.005891084857473e-06, "loss": 1.8426, "step": 4438 }, { "epoch": 1.29, "learning_rate": 5.9972928905069894e-06, "loss": 1.7438, "step": 4440 }, { "epoch": 1.29, "learning_rate": 5.988698218431273e-06, "loss": 1.8171, "step": 4442 }, { "epoch": 1.29, "learning_rate": 5.980107076193393e-06, "loss": 1.7778, "step": 4444 }, { "epoch": 1.29, "learning_rate": 5.971519471353335e-06, "loss": 1.8779, "step": 4446 }, { "epoch": 1.29, "learning_rate": 5.962935411467956e-06, "loss": 1.7744, "step": 4448 }, { "epoch": 1.29, "learning_rate": 5.954354904090995e-06, "loss": 1.7961, "step": 4450 }, { "epoch": 1.29, "learning_rate": 5.945777956773078e-06, "loss": 1.9499, "step": 4452 }, { "epoch": 1.29, "learning_rate": 5.937204577061677e-06, "loss": 1.8151, "step": 4454 }, { "epoch": 1.29, "learning_rate": 5.928634772501143e-06, "loss": 1.8171, "step": 4456 }, { "epoch": 1.29, "learning_rate": 5.920068550632674e-06, "loss": 1.8733, "step": 4458 }, { "epoch": 1.29, "learning_rate": 5.911505918994309e-06, "loss": 1.8227, "step": 4460 }, { "epoch": 1.29, "learning_rate": 5.902946885120939e-06, "loss": 1.8332, "step": 4462 }, { "epoch": 1.29, "learning_rate": 5.894391456544286e-06, "loss": 1.821, "step": 4464 }, { "epoch": 1.29, "learning_rate": 5.885839640792892e-06, "loss": 1.8454, "step": 4466 }, { "epoch": 1.29, "learning_rate": 5.877291445392128e-06, "loss": 1.888, "step": 4468 }, { "epoch": 1.29, "learning_rate": 5.873018707674036e-06, "loss": 1.8578, "step": 4470 }, { "epoch": 1.3, "learning_rate": 5.864475956902313e-06, "loss": 1.8251, "step": 4472 }, { "epoch": 1.3, "learning_rate": 5.855936845280682e-06, "loss": 1.8705, "step": 4474 }, { "epoch": 1.3, "learning_rate": 5.84740138032334e-06, "loss": 1.8228, "step": 4476 }, { "epoch": 1.3, "learning_rate": 5.838869569541267e-06, "loss": 1.8675, "step": 4478 }, { "epoch": 1.3, "learning_rate": 5.830341420442221e-06, "loss": 1.8586, "step": 4480 }, { "epoch": 1.3, "learning_rate": 5.821816940530749e-06, "loss": 1.8221, "step": 4482 }, { "epoch": 1.3, "learning_rate": 5.813296137308164e-06, "loss": 1.8295, "step": 4484 }, { "epoch": 1.3, "learning_rate": 5.804779018272544e-06, "loss": 1.8872, "step": 4486 }, { "epoch": 1.3, "learning_rate": 5.796265590918723e-06, "loss": 1.8671, "step": 4488 }, { "epoch": 1.3, "learning_rate": 5.787755862738289e-06, "loss": 1.8271, "step": 4490 }, { "epoch": 1.3, "learning_rate": 5.779249841219577e-06, "loss": 1.819, "step": 4492 }, { "epoch": 1.3, "learning_rate": 5.770747533847658e-06, "loss": 1.8345, "step": 4494 }, { "epoch": 1.3, "learning_rate": 5.762248948104325e-06, "loss": 1.8886, "step": 4496 }, { "epoch": 1.3, "learning_rate": 5.753754091468116e-06, "loss": 1.8757, "step": 4498 }, { "epoch": 1.3, "learning_rate": 5.745262971414276e-06, "loss": 1.7844, "step": 4500 }, { "epoch": 1.3, "learning_rate": 5.736775595414756e-06, "loss": 1.7944, "step": 4502 }, { "epoch": 1.3, "learning_rate": 5.728291970938218e-06, "loss": 1.7978, "step": 4504 }, { "epoch": 1.3, "learning_rate": 5.719812105450035e-06, "loss": 1.8304, "step": 4506 }, { "epoch": 1.31, "learning_rate": 5.711336006412251e-06, "loss": 1.893, "step": 4508 }, { "epoch": 1.31, "learning_rate": 5.7028636812836035e-06, "loss": 1.9231, "step": 4510 }, { "epoch": 1.31, "learning_rate": 5.694395137519525e-06, "loss": 1.8476, "step": 4512 }, { "epoch": 1.31, "learning_rate": 5.685930382572093e-06, "loss": 1.7988, "step": 4514 }, { "epoch": 1.31, "learning_rate": 5.67746942389007e-06, "loss": 1.843, "step": 4516 }, { "epoch": 1.31, "learning_rate": 5.66901226891887e-06, "loss": 1.8643, "step": 4518 }, { "epoch": 1.31, "learning_rate": 5.660558925100565e-06, "loss": 1.7984, "step": 4520 }, { "epoch": 1.31, "learning_rate": 5.652109399873873e-06, "loss": 1.7553, "step": 4522 }, { "epoch": 1.31, "learning_rate": 5.643663700674138e-06, "loss": 1.8213, "step": 4524 }, { "epoch": 1.31, "learning_rate": 5.635221834933358e-06, "loss": 1.8554, "step": 4526 }, { "epoch": 1.31, "learning_rate": 5.626783810080152e-06, "loss": 1.8571, "step": 4528 }, { "epoch": 1.31, "learning_rate": 5.618349633539741e-06, "loss": 1.8442, "step": 4530 }, { "epoch": 1.31, "learning_rate": 5.609919312733987e-06, "loss": 1.8351, "step": 4532 }, { "epoch": 1.31, "learning_rate": 5.601492855081345e-06, "loss": 1.8745, "step": 4534 }, { "epoch": 1.31, "learning_rate": 5.593070267996864e-06, "loss": 1.7772, "step": 4536 }, { "epoch": 1.31, "learning_rate": 5.584651558892193e-06, "loss": 1.8456, "step": 4538 }, { "epoch": 1.31, "learning_rate": 5.576236735175584e-06, "loss": 1.8013, "step": 4540 }, { "epoch": 1.32, "learning_rate": 5.567825804251843e-06, "loss": 1.9065, "step": 4542 }, { "epoch": 1.32, "learning_rate": 5.559418773522367e-06, "loss": 1.8452, "step": 4544 }, { "epoch": 1.32, "learning_rate": 5.5510156503851185e-06, "loss": 1.8993, "step": 4546 }, { "epoch": 1.32, "learning_rate": 5.542616442234618e-06, "loss": 1.757, "step": 4548 }, { "epoch": 1.32, "learning_rate": 5.534221156461945e-06, "loss": 1.8117, "step": 4550 }, { "epoch": 1.32, "learning_rate": 5.525829800454725e-06, "loss": 1.8185, "step": 4552 }, { "epoch": 1.32, "learning_rate": 5.517442381597125e-06, "loss": 1.8251, "step": 4554 }, { "epoch": 1.32, "learning_rate": 5.509058907269854e-06, "loss": 1.8996, "step": 4556 }, { "epoch": 1.32, "learning_rate": 5.500679384850132e-06, "loss": 1.809, "step": 4558 }, { "epoch": 1.32, "learning_rate": 5.492303821711724e-06, "loss": 1.9024, "step": 4560 }, { "epoch": 1.32, "learning_rate": 5.483932225224901e-06, "loss": 1.897, "step": 4562 }, { "epoch": 1.32, "learning_rate": 5.475564602756433e-06, "loss": 1.8525, "step": 4564 }, { "epoch": 1.32, "learning_rate": 5.467200961669619e-06, "loss": 1.8612, "step": 4566 }, { "epoch": 1.32, "learning_rate": 5.458841309324225e-06, "loss": 1.8548, "step": 4568 }, { "epoch": 1.32, "learning_rate": 5.450485653076524e-06, "loss": 1.8248, "step": 4570 }, { "epoch": 1.32, "learning_rate": 5.44213400027927e-06, "loss": 1.8221, "step": 4572 }, { "epoch": 1.32, "learning_rate": 5.433786358281694e-06, "loss": 1.7902, "step": 4574 }, { "epoch": 1.33, "learning_rate": 5.425442734429496e-06, "loss": 1.9005, "step": 4576 }, { "epoch": 1.33, "learning_rate": 5.417103136064841e-06, "loss": 1.8317, "step": 4578 }, { "epoch": 1.33, "learning_rate": 5.408767570526352e-06, "loss": 1.8511, "step": 4580 }, { "epoch": 1.33, "learning_rate": 5.4004360451491026e-06, "loss": 1.8683, "step": 4582 }, { "epoch": 1.33, "learning_rate": 5.392108567264609e-06, "loss": 1.8525, "step": 4584 }, { "epoch": 1.33, "learning_rate": 5.383785144200833e-06, "loss": 1.819, "step": 4586 }, { "epoch": 1.33, "learning_rate": 5.375465783282163e-06, "loss": 1.7428, "step": 4588 }, { "epoch": 1.33, "learning_rate": 5.367150491829408e-06, "loss": 1.8835, "step": 4590 }, { "epoch": 1.33, "learning_rate": 5.3588392771598e-06, "loss": 1.8157, "step": 4592 }, { "epoch": 1.33, "learning_rate": 5.3505321465869966e-06, "loss": 1.8017, "step": 4594 }, { "epoch": 1.33, "learning_rate": 5.342229107421037e-06, "loss": 1.8544, "step": 4596 }, { "epoch": 1.33, "learning_rate": 5.333930166968374e-06, "loss": 1.8788, "step": 4598 }, { "epoch": 1.33, "learning_rate": 5.325635332531864e-06, "loss": 1.8826, "step": 4600 }, { "epoch": 1.33, "learning_rate": 5.317344611410726e-06, "loss": 1.8736, "step": 4602 }, { "epoch": 1.33, "learning_rate": 5.309058010900576e-06, "loss": 1.826, "step": 4604 }, { "epoch": 1.33, "learning_rate": 5.300775538293401e-06, "loss": 1.8035, "step": 4606 }, { "epoch": 1.33, "learning_rate": 5.292497200877553e-06, "loss": 1.8112, "step": 4608 }, { "epoch": 1.34, "learning_rate": 5.284223005937749e-06, "loss": 1.8861, "step": 4610 }, { "epoch": 1.34, "learning_rate": 5.275952960755055e-06, "loss": 1.7805, "step": 4612 }, { "epoch": 1.34, "learning_rate": 5.267687072606891e-06, "loss": 1.8031, "step": 4614 }, { "epoch": 1.34, "learning_rate": 5.259425348767018e-06, "loss": 1.7749, "step": 4616 }, { "epoch": 1.34, "learning_rate": 5.251167796505523e-06, "loss": 1.9018, "step": 4618 }, { "epoch": 1.34, "learning_rate": 5.242914423088838e-06, "loss": 1.8248, "step": 4620 }, { "epoch": 1.34, "learning_rate": 5.234665235779714e-06, "loss": 1.7913, "step": 4622 }, { "epoch": 1.34, "learning_rate": 5.2264202418372045e-06, "loss": 1.8212, "step": 4624 }, { "epoch": 1.34, "learning_rate": 5.218179448516683e-06, "loss": 1.825, "step": 4626 }, { "epoch": 1.34, "learning_rate": 5.209942863069837e-06, "loss": 1.7958, "step": 4628 }, { "epoch": 1.34, "learning_rate": 5.201710492744629e-06, "loss": 1.9027, "step": 4630 }, { "epoch": 1.34, "learning_rate": 5.193482344785326e-06, "loss": 1.7226, "step": 4632 }, { "epoch": 1.34, "learning_rate": 5.1852584264324864e-06, "loss": 1.8192, "step": 4634 }, { "epoch": 1.34, "learning_rate": 5.177038744922928e-06, "loss": 1.8859, "step": 4636 }, { "epoch": 1.34, "learning_rate": 5.1688233074897505e-06, "loss": 1.8999, "step": 4638 }, { "epoch": 1.34, "learning_rate": 5.160612121362321e-06, "loss": 1.8355, "step": 4640 }, { "epoch": 1.34, "learning_rate": 5.152405193766263e-06, "loss": 1.7714, "step": 4642 }, { "epoch": 1.34, "learning_rate": 5.14420253192345e-06, "loss": 1.8619, "step": 4644 }, { "epoch": 1.35, "learning_rate": 5.136004143052007e-06, "loss": 1.764, "step": 4646 }, { "epoch": 1.35, "learning_rate": 5.127810034366292e-06, "loss": 1.8031, "step": 4648 }, { "epoch": 1.35, "learning_rate": 5.119620213076909e-06, "loss": 1.908, "step": 4650 }, { "epoch": 1.35, "learning_rate": 5.111434686390666e-06, "loss": 1.8563, "step": 4652 }, { "epoch": 1.35, "learning_rate": 5.10325346151062e-06, "loss": 1.8796, "step": 4654 }, { "epoch": 1.35, "learning_rate": 5.095076545636028e-06, "loss": 1.7955, "step": 4656 }, { "epoch": 1.35, "learning_rate": 5.08690394596235e-06, "loss": 1.85, "step": 4658 }, { "epoch": 1.35, "learning_rate": 5.0787356696812575e-06, "loss": 1.9228, "step": 4660 }, { "epoch": 1.35, "learning_rate": 5.070571723980613e-06, "loss": 1.8955, "step": 4662 }, { "epoch": 1.35, "learning_rate": 5.062412116044472e-06, "loss": 1.8371, "step": 4664 }, { "epoch": 1.35, "learning_rate": 5.054256853053068e-06, "loss": 1.8172, "step": 4666 }, { "epoch": 1.35, "learning_rate": 5.046105942182815e-06, "loss": 1.8934, "step": 4668 }, { "epoch": 1.35, "learning_rate": 5.037959390606294e-06, "loss": 1.8283, "step": 4670 }, { "epoch": 1.35, "learning_rate": 5.029817205492253e-06, "loss": 1.7874, "step": 4672 }, { "epoch": 1.35, "learning_rate": 5.0216793940055965e-06, "loss": 1.8263, "step": 4674 }, { "epoch": 1.35, "learning_rate": 5.013545963307378e-06, "loss": 1.8499, "step": 4676 }, { "epoch": 1.35, "learning_rate": 5.005416920554802e-06, "loss": 1.8426, "step": 4678 }, { "epoch": 1.36, "learning_rate": 4.997292272901203e-06, "loss": 1.8559, "step": 4680 }, { "epoch": 1.36, "learning_rate": 4.989172027496056e-06, "loss": 1.8377, "step": 4682 }, { "epoch": 1.36, "learning_rate": 4.981056191484962e-06, "loss": 1.8703, "step": 4684 }, { "epoch": 1.36, "learning_rate": 4.972944772009627e-06, "loss": 1.8573, "step": 4686 }, { "epoch": 1.36, "learning_rate": 4.964837776207897e-06, "loss": 1.8107, "step": 4688 }, { "epoch": 1.36, "learning_rate": 4.956735211213702e-06, "loss": 1.9626, "step": 4690 }, { "epoch": 1.36, "learning_rate": 4.948637084157081e-06, "loss": 1.8285, "step": 4692 }, { "epoch": 1.36, "learning_rate": 4.94054340216417e-06, "loss": 1.8561, "step": 4694 }, { "epoch": 1.36, "learning_rate": 4.932454172357194e-06, "loss": 1.8988, "step": 4696 }, { "epoch": 1.36, "learning_rate": 4.924369401854456e-06, "loss": 1.8019, "step": 4698 }, { "epoch": 1.36, "learning_rate": 4.916289097770337e-06, "loss": 1.8735, "step": 4700 }, { "epoch": 1.36, "learning_rate": 4.908213267215287e-06, "loss": 1.8198, "step": 4702 }, { "epoch": 1.36, "learning_rate": 4.900141917295822e-06, "loss": 1.8997, "step": 4704 }, { "epoch": 1.36, "learning_rate": 4.892075055114511e-06, "loss": 1.8728, "step": 4706 }, { "epoch": 1.36, "learning_rate": 4.884012687769979e-06, "loss": 1.7945, "step": 4708 }, { "epoch": 1.36, "learning_rate": 4.875954822356893e-06, "loss": 1.8473, "step": 4710 }, { "epoch": 1.36, "learning_rate": 4.86790146596595e-06, "loss": 1.8486, "step": 4712 }, { "epoch": 1.37, "learning_rate": 4.8598526256838955e-06, "loss": 1.9244, "step": 4714 }, { "epoch": 1.37, "learning_rate": 4.851808308593496e-06, "loss": 1.837, "step": 4716 }, { "epoch": 1.37, "learning_rate": 4.843768521773523e-06, "loss": 1.8606, "step": 4718 }, { "epoch": 1.37, "learning_rate": 4.835733272298777e-06, "loss": 1.8465, "step": 4720 }, { "epoch": 1.37, "learning_rate": 4.827702567240072e-06, "loss": 1.8314, "step": 4722 }, { "epoch": 1.37, "learning_rate": 4.8196764136642005e-06, "loss": 1.8301, "step": 4724 }, { "epoch": 1.37, "learning_rate": 4.811654818633966e-06, "loss": 1.8206, "step": 4726 }, { "epoch": 1.37, "learning_rate": 4.8036377892081565e-06, "loss": 1.7604, "step": 4728 }, { "epoch": 1.37, "learning_rate": 4.795625332441545e-06, "loss": 1.8049, "step": 4730 }, { "epoch": 1.37, "learning_rate": 4.787617455384874e-06, "loss": 1.7645, "step": 4732 }, { "epoch": 1.37, "learning_rate": 4.779614165084864e-06, "loss": 1.876, "step": 4734 }, { "epoch": 1.37, "learning_rate": 4.771615468584194e-06, "loss": 1.8472, "step": 4736 }, { "epoch": 1.37, "learning_rate": 4.763621372921506e-06, "loss": 1.8385, "step": 4738 }, { "epoch": 1.37, "learning_rate": 4.755631885131378e-06, "loss": 1.7986, "step": 4740 }, { "epoch": 1.37, "learning_rate": 4.747647012244357e-06, "loss": 1.8153, "step": 4742 }, { "epoch": 1.37, "learning_rate": 4.739666761286915e-06, "loss": 1.8597, "step": 4744 }, { "epoch": 1.37, "learning_rate": 4.731691139281449e-06, "loss": 1.8029, "step": 4746 }, { "epoch": 1.38, "learning_rate": 4.723720153246303e-06, "loss": 1.9, "step": 4748 }, { "epoch": 1.38, "learning_rate": 4.715753810195729e-06, "loss": 1.8275, "step": 4750 }, { "epoch": 1.38, "learning_rate": 4.70779211713989e-06, "loss": 1.9079, "step": 4752 }, { "epoch": 1.38, "learning_rate": 4.69983508108486e-06, "loss": 1.781, "step": 4754 }, { "epoch": 1.38, "learning_rate": 4.691882709032629e-06, "loss": 1.844, "step": 4756 }, { "epoch": 1.38, "learning_rate": 4.6839350079810604e-06, "loss": 1.7992, "step": 4758 }, { "epoch": 1.38, "learning_rate": 4.675991984923921e-06, "loss": 1.7762, "step": 4760 }, { "epoch": 1.38, "learning_rate": 4.6680536468508565e-06, "loss": 1.8644, "step": 4762 }, { "epoch": 1.38, "learning_rate": 4.660120000747393e-06, "loss": 1.8002, "step": 4764 }, { "epoch": 1.38, "learning_rate": 4.652191053594924e-06, "loss": 1.8223, "step": 4766 }, { "epoch": 1.38, "learning_rate": 4.644266812370713e-06, "loss": 1.8264, "step": 4768 }, { "epoch": 1.38, "learning_rate": 4.636347284047878e-06, "loss": 1.8496, "step": 4770 }, { "epoch": 1.38, "learning_rate": 4.628432475595394e-06, "loss": 1.8444, "step": 4772 }, { "epoch": 1.38, "learning_rate": 4.620522393978068e-06, "loss": 1.8195, "step": 4774 }, { "epoch": 1.38, "learning_rate": 4.612617046156575e-06, "loss": 1.8064, "step": 4776 }, { "epoch": 1.38, "learning_rate": 4.604716439087403e-06, "loss": 1.9294, "step": 4778 }, { "epoch": 1.38, "learning_rate": 4.596820579722867e-06, "loss": 1.7976, "step": 4780 }, { "epoch": 1.38, "learning_rate": 4.588929475011125e-06, "loss": 1.778, "step": 4782 }, { "epoch": 1.39, "learning_rate": 4.581043131896126e-06, "loss": 1.9058, "step": 4784 }, { "epoch": 1.39, "learning_rate": 4.573161557317642e-06, "loss": 1.8332, "step": 4786 }, { "epoch": 1.39, "learning_rate": 4.56528475821125e-06, "loss": 1.8732, "step": 4788 }, { "epoch": 1.39, "learning_rate": 4.55741274150832e-06, "loss": 1.8441, "step": 4790 }, { "epoch": 1.39, "learning_rate": 4.5495455141360165e-06, "loss": 1.8808, "step": 4792 }, { "epoch": 1.39, "learning_rate": 4.541683083017288e-06, "loss": 1.845, "step": 4794 }, { "epoch": 1.39, "learning_rate": 4.5338254550708605e-06, "loss": 1.8463, "step": 4796 }, { "epoch": 1.39, "learning_rate": 4.525972637211237e-06, "loss": 1.8353, "step": 4798 }, { "epoch": 1.39, "learning_rate": 4.518124636348689e-06, "loss": 1.8861, "step": 4800 }, { "epoch": 1.39, "learning_rate": 4.510281459389242e-06, "loss": 1.8588, "step": 4802 }, { "epoch": 1.39, "learning_rate": 4.502443113234688e-06, "loss": 1.8414, "step": 4804 }, { "epoch": 1.39, "learning_rate": 4.4946096047825515e-06, "loss": 1.7202, "step": 4806 }, { "epoch": 1.39, "learning_rate": 4.486780940926112e-06, "loss": 1.8469, "step": 4808 }, { "epoch": 1.39, "learning_rate": 4.478957128554392e-06, "loss": 1.901, "step": 4810 }, { "epoch": 1.39, "learning_rate": 4.4711381745521275e-06, "loss": 1.878, "step": 4812 }, { "epoch": 1.39, "learning_rate": 4.4633240857997864e-06, "loss": 1.7875, "step": 4814 }, { "epoch": 1.39, "learning_rate": 4.4555148691735685e-06, "loss": 1.8632, "step": 4816 }, { "epoch": 1.4, "learning_rate": 4.447710531545364e-06, "loss": 1.8088, "step": 4818 }, { "epoch": 1.4, "learning_rate": 4.4399110797827835e-06, "loss": 1.8558, "step": 4820 }, { "epoch": 1.4, "learning_rate": 4.432116520749137e-06, "loss": 1.7338, "step": 4822 }, { "epoch": 1.4, "learning_rate": 4.424326861303424e-06, "loss": 1.8312, "step": 4824 }, { "epoch": 1.4, "learning_rate": 4.416542108300338e-06, "loss": 1.7786, "step": 4826 }, { "epoch": 1.4, "learning_rate": 4.408762268590251e-06, "loss": 1.853, "step": 4828 }, { "epoch": 1.4, "learning_rate": 4.400987349019214e-06, "loss": 1.8221, "step": 4830 }, { "epoch": 1.4, "learning_rate": 4.393217356428951e-06, "loss": 1.807, "step": 4832 }, { "epoch": 1.4, "learning_rate": 4.385452297656836e-06, "loss": 1.8089, "step": 4834 }, { "epoch": 1.4, "learning_rate": 4.377692179535922e-06, "loss": 1.8967, "step": 4836 }, { "epoch": 1.4, "learning_rate": 4.369937008894906e-06, "loss": 1.8494, "step": 4838 }, { "epoch": 1.4, "learning_rate": 4.362186792558122e-06, "loss": 1.8331, "step": 4840 }, { "epoch": 1.4, "learning_rate": 4.354441537345553e-06, "loss": 1.8123, "step": 4842 }, { "epoch": 1.4, "learning_rate": 4.346701250072825e-06, "loss": 1.7976, "step": 4844 }, { "epoch": 1.4, "learning_rate": 4.338965937551173e-06, "loss": 1.9006, "step": 4846 }, { "epoch": 1.4, "learning_rate": 4.331235606587464e-06, "loss": 1.9125, "step": 4848 }, { "epoch": 1.4, "learning_rate": 4.323510263984192e-06, "loss": 1.8015, "step": 4850 }, { "epoch": 1.41, "learning_rate": 4.315789916539441e-06, "loss": 1.8372, "step": 4852 }, { "epoch": 1.41, "learning_rate": 4.3080745710469126e-06, "loss": 1.8578, "step": 4854 }, { "epoch": 1.41, "learning_rate": 4.300364234295905e-06, "loss": 1.865, "step": 4856 }, { "epoch": 1.41, "learning_rate": 4.292658913071306e-06, "loss": 1.8639, "step": 4858 }, { "epoch": 1.41, "learning_rate": 4.284958614153595e-06, "loss": 1.8199, "step": 4860 }, { "epoch": 1.41, "learning_rate": 4.277263344318817e-06, "loss": 1.7949, "step": 4862 }, { "epoch": 1.41, "learning_rate": 4.269573110338615e-06, "loss": 1.8764, "step": 4864 }, { "epoch": 1.41, "learning_rate": 4.2618879189801885e-06, "loss": 1.8619, "step": 4866 }, { "epoch": 1.41, "learning_rate": 4.254207777006287e-06, "loss": 1.8521, "step": 4868 }, { "epoch": 1.41, "learning_rate": 4.246532691175241e-06, "loss": 1.925, "step": 4870 }, { "epoch": 1.41, "learning_rate": 4.23886266824092e-06, "loss": 1.8434, "step": 4872 }, { "epoch": 1.41, "learning_rate": 4.23119771495273e-06, "loss": 1.8595, "step": 4874 }, { "epoch": 1.41, "learning_rate": 4.223537838055626e-06, "loss": 1.8694, "step": 4876 }, { "epoch": 1.41, "learning_rate": 4.215883044290103e-06, "loss": 1.812, "step": 4878 }, { "epoch": 1.41, "learning_rate": 4.208233340392164e-06, "loss": 1.887, "step": 4880 }, { "epoch": 1.41, "learning_rate": 4.200588733093346e-06, "loss": 1.827, "step": 4882 }, { "epoch": 1.41, "learning_rate": 4.192949229120699e-06, "loss": 1.7933, "step": 4884 }, { "epoch": 1.42, "learning_rate": 4.185314835196781e-06, "loss": 1.8536, "step": 4886 }, { "epoch": 1.42, "learning_rate": 4.177685558039652e-06, "loss": 1.8466, "step": 4888 }, { "epoch": 1.42, "learning_rate": 4.170061404362874e-06, "loss": 1.8393, "step": 4890 }, { "epoch": 1.42, "learning_rate": 4.162442380875495e-06, "loss": 1.8296, "step": 4892 }, { "epoch": 1.42, "learning_rate": 4.154828494282056e-06, "loss": 1.8547, "step": 4894 }, { "epoch": 1.42, "learning_rate": 4.147219751282562e-06, "loss": 1.7765, "step": 4896 }, { "epoch": 1.42, "learning_rate": 4.139616158572513e-06, "loss": 1.8488, "step": 4898 }, { "epoch": 1.42, "learning_rate": 4.132017722842865e-06, "loss": 1.7794, "step": 4900 }, { "epoch": 1.42, "learning_rate": 4.124424450780029e-06, "loss": 1.8931, "step": 4902 }, { "epoch": 1.42, "learning_rate": 4.116836349065892e-06, "loss": 1.8797, "step": 4904 }, { "epoch": 1.42, "learning_rate": 4.109253424377773e-06, "loss": 1.8357, "step": 4906 }, { "epoch": 1.42, "learning_rate": 4.101675683388442e-06, "loss": 1.8462, "step": 4908 }, { "epoch": 1.42, "learning_rate": 4.0941031327661085e-06, "loss": 1.8551, "step": 4910 }, { "epoch": 1.42, "learning_rate": 4.086535779174415e-06, "loss": 1.8002, "step": 4912 }, { "epoch": 1.42, "learning_rate": 4.078973629272428e-06, "loss": 1.7824, "step": 4914 }, { "epoch": 1.42, "learning_rate": 4.071416689714637e-06, "loss": 1.8624, "step": 4916 }, { "epoch": 1.42, "learning_rate": 4.063864967150945e-06, "loss": 1.8226, "step": 4918 }, { "epoch": 1.42, "learning_rate": 4.056318468226668e-06, "loss": 1.8114, "step": 4920 }, { "epoch": 1.43, "learning_rate": 4.048777199582517e-06, "loss": 1.794, "step": 4922 }, { "epoch": 1.43, "learning_rate": 4.041241167854612e-06, "loss": 1.8537, "step": 4924 }, { "epoch": 1.43, "learning_rate": 4.033710379674458e-06, "loss": 1.8158, "step": 4926 }, { "epoch": 1.43, "learning_rate": 4.02618484166894e-06, "loss": 1.9308, "step": 4928 }, { "epoch": 1.43, "learning_rate": 4.0186645604603306e-06, "loss": 1.8484, "step": 4930 }, { "epoch": 1.43, "learning_rate": 4.011149542666284e-06, "loss": 1.8014, "step": 4932 }, { "epoch": 1.43, "learning_rate": 4.003639794899804e-06, "loss": 1.847, "step": 4934 }, { "epoch": 1.43, "learning_rate": 3.9961353237692695e-06, "loss": 1.834, "step": 4936 }, { "epoch": 1.43, "learning_rate": 3.988636135878421e-06, "loss": 1.7793, "step": 4938 }, { "epoch": 1.43, "learning_rate": 3.981142237826332e-06, "loss": 1.8501, "step": 4940 }, { "epoch": 1.43, "learning_rate": 3.973653636207437e-06, "loss": 1.9308, "step": 4942 }, { "epoch": 1.43, "learning_rate": 3.966170337611503e-06, "loss": 1.8229, "step": 4944 }, { "epoch": 1.43, "learning_rate": 3.958692348623634e-06, "loss": 1.7535, "step": 4946 }, { "epoch": 1.43, "learning_rate": 3.951219675824256e-06, "loss": 1.7924, "step": 4948 }, { "epoch": 1.43, "learning_rate": 3.943752325789122e-06, "loss": 1.8188, "step": 4950 }, { "epoch": 1.43, "learning_rate": 3.9362903050893e-06, "loss": 1.7527, "step": 4952 }, { "epoch": 1.43, "learning_rate": 3.92883362029117e-06, "loss": 1.8078, "step": 4954 }, { "epoch": 1.44, "learning_rate": 3.921382277956406e-06, "loss": 1.7784, "step": 4956 }, { "epoch": 1.44, "learning_rate": 3.9139362846419974e-06, "loss": 1.8255, "step": 4958 }, { "epoch": 1.44, "learning_rate": 3.906495646900219e-06, "loss": 1.8806, "step": 4960 }, { "epoch": 1.44, "learning_rate": 3.899060371278626e-06, "loss": 1.7968, "step": 4962 }, { "epoch": 1.44, "learning_rate": 3.891630464320061e-06, "loss": 1.8672, "step": 4964 }, { "epoch": 1.44, "learning_rate": 3.8842059325626534e-06, "loss": 1.8209, "step": 4966 }, { "epoch": 1.44, "learning_rate": 3.87678678253978e-06, "loss": 1.8731, "step": 4968 }, { "epoch": 1.44, "learning_rate": 3.869373020780094e-06, "loss": 1.8682, "step": 4970 }, { "epoch": 1.44, "learning_rate": 3.861964653807518e-06, "loss": 1.8386, "step": 4972 }, { "epoch": 1.44, "learning_rate": 3.854561688141205e-06, "loss": 1.7882, "step": 4974 }, { "epoch": 1.44, "learning_rate": 3.847164130295569e-06, "loss": 1.875, "step": 4976 }, { "epoch": 1.44, "learning_rate": 3.839771986780263e-06, "loss": 1.8734, "step": 4978 }, { "epoch": 1.44, "learning_rate": 3.832385264100173e-06, "loss": 1.8781, "step": 4980 }, { "epoch": 1.44, "learning_rate": 3.825003968755419e-06, "loss": 1.8001, "step": 4982 }, { "epoch": 1.44, "learning_rate": 3.8176281072413435e-06, "loss": 1.8319, "step": 4984 }, { "epoch": 1.44, "learning_rate": 3.810257686048503e-06, "loss": 1.8849, "step": 4986 }, { "epoch": 1.44, "learning_rate": 3.8028927116626758e-06, "loss": 1.9688, "step": 4988 }, { "epoch": 1.45, "learning_rate": 3.7955331905648306e-06, "loss": 1.7496, "step": 4990 }, { "epoch": 1.45, "learning_rate": 3.7881791292311587e-06, "loss": 1.8078, "step": 4992 }, { "epoch": 1.45, "learning_rate": 3.7808305341330374e-06, "loss": 1.8289, "step": 4994 }, { "epoch": 1.45, "learning_rate": 3.7734874117370247e-06, "loss": 1.8761, "step": 4996 }, { "epoch": 1.45, "learning_rate": 3.7661497685048765e-06, "loss": 1.7844, "step": 4998 }, { "epoch": 1.45, "learning_rate": 3.7588176108935194e-06, "loss": 1.8894, "step": 5000 }, { "epoch": 1.45, "learning_rate": 3.7514909453550572e-06, "loss": 1.7551, "step": 5002 }, { "epoch": 1.45, "learning_rate": 3.744169778336758e-06, "loss": 1.9197, "step": 5004 }, { "epoch": 1.45, "learning_rate": 3.7368541162810525e-06, "loss": 1.8378, "step": 5006 }, { "epoch": 1.45, "learning_rate": 3.729543965625526e-06, "loss": 1.903, "step": 5008 }, { "epoch": 1.45, "learning_rate": 3.722239332802916e-06, "loss": 1.8387, "step": 5010 }, { "epoch": 1.45, "learning_rate": 3.7149402242411024e-06, "loss": 1.9178, "step": 5012 }, { "epoch": 1.45, "learning_rate": 3.7076466463631043e-06, "loss": 1.8312, "step": 5014 }, { "epoch": 1.45, "learning_rate": 3.7003586055870754e-06, "loss": 1.7831, "step": 5016 }, { "epoch": 1.45, "learning_rate": 3.6930761083262957e-06, "loss": 1.837, "step": 5018 }, { "epoch": 1.45, "learning_rate": 3.6857991609891675e-06, "loss": 1.8062, "step": 5020 }, { "epoch": 1.45, "learning_rate": 3.678527769979211e-06, "loss": 1.8032, "step": 5022 }, { "epoch": 1.45, "learning_rate": 3.6712619416950445e-06, "loss": 1.7846, "step": 5024 }, { "epoch": 1.46, "learning_rate": 3.664001682530418e-06, "loss": 1.7588, "step": 5026 }, { "epoch": 1.46, "learning_rate": 3.6567469988741533e-06, "loss": 1.7735, "step": 5028 }, { "epoch": 1.46, "learning_rate": 3.649497897110179e-06, "loss": 1.8035, "step": 5030 }, { "epoch": 1.46, "learning_rate": 3.6422543836175116e-06, "loss": 1.8607, "step": 5032 }, { "epoch": 1.46, "learning_rate": 3.635016464770249e-06, "loss": 1.8282, "step": 5034 }, { "epoch": 1.46, "learning_rate": 3.627784146937563e-06, "loss": 1.8128, "step": 5036 }, { "epoch": 1.46, "learning_rate": 3.6205574364837014e-06, "loss": 1.9191, "step": 5038 }, { "epoch": 1.46, "learning_rate": 3.6133363397679743e-06, "loss": 1.7697, "step": 5040 }, { "epoch": 1.46, "learning_rate": 3.606120863144753e-06, "loss": 1.7398, "step": 5042 }, { "epoch": 1.46, "learning_rate": 3.598911012963463e-06, "loss": 1.8786, "step": 5044 }, { "epoch": 1.46, "learning_rate": 3.591706795568579e-06, "loss": 1.7464, "step": 5046 }, { "epoch": 1.46, "learning_rate": 3.5845082172996224e-06, "loss": 1.9403, "step": 5048 }, { "epoch": 1.46, "learning_rate": 3.5773152844911385e-06, "loss": 1.823, "step": 5050 }, { "epoch": 1.46, "learning_rate": 3.570128003472725e-06, "loss": 1.8352, "step": 5052 }, { "epoch": 1.46, "learning_rate": 3.562946380568997e-06, "loss": 1.767, "step": 5054 }, { "epoch": 1.46, "learning_rate": 3.5557704220995814e-06, "loss": 1.8623, "step": 5056 }, { "epoch": 1.46, "learning_rate": 3.54860013437913e-06, "loss": 1.7626, "step": 5058 }, { "epoch": 1.47, "learning_rate": 3.5414355237173135e-06, "loss": 1.8645, "step": 5060 }, { "epoch": 1.47, "learning_rate": 3.5342765964187862e-06, "loss": 1.8325, "step": 5062 }, { "epoch": 1.47, "learning_rate": 3.5271233587832165e-06, "loss": 1.8134, "step": 5064 }, { "epoch": 1.47, "learning_rate": 3.51997581710526e-06, "loss": 1.8882, "step": 5066 }, { "epoch": 1.47, "learning_rate": 3.512833977674562e-06, "loss": 1.8366, "step": 5068 }, { "epoch": 1.47, "learning_rate": 3.5056978467757507e-06, "loss": 1.8347, "step": 5070 }, { "epoch": 1.47, "learning_rate": 3.498567430688428e-06, "loss": 1.9284, "step": 5072 }, { "epoch": 1.47, "learning_rate": 3.491442735687167e-06, "loss": 1.874, "step": 5074 }, { "epoch": 1.47, "learning_rate": 3.4843237680415153e-06, "loss": 1.8108, "step": 5076 }, { "epoch": 1.47, "learning_rate": 3.4772105340159613e-06, "loss": 1.7961, "step": 5078 }, { "epoch": 1.47, "learning_rate": 3.4701030398699686e-06, "loss": 1.9675, "step": 5080 }, { "epoch": 1.47, "learning_rate": 3.463001291857944e-06, "loss": 1.8738, "step": 5082 }, { "epoch": 1.47, "learning_rate": 3.4559052962292215e-06, "loss": 1.8255, "step": 5084 }, { "epoch": 1.47, "learning_rate": 3.4488150592281e-06, "loss": 1.7547, "step": 5086 }, { "epoch": 1.47, "learning_rate": 3.4417305870937955e-06, "loss": 1.8438, "step": 5088 }, { "epoch": 1.47, "learning_rate": 3.434651886060448e-06, "loss": 1.7981, "step": 5090 }, { "epoch": 1.47, "learning_rate": 3.427578962357123e-06, "loss": 1.891, "step": 5092 }, { "epoch": 1.48, "learning_rate": 3.420511822207815e-06, "loss": 1.7536, "step": 5094 }, { "epoch": 1.48, "learning_rate": 3.4134504718314065e-06, "loss": 1.8043, "step": 5096 }, { "epoch": 1.48, "learning_rate": 3.4063949174416987e-06, "loss": 1.7858, "step": 5098 }, { "epoch": 1.48, "learning_rate": 3.3993451652473917e-06, "loss": 1.9199, "step": 5100 }, { "epoch": 1.48, "learning_rate": 3.392301221452077e-06, "loss": 1.8617, "step": 5102 }, { "epoch": 1.48, "learning_rate": 3.385263092254236e-06, "loss": 1.8255, "step": 5104 }, { "epoch": 1.48, "learning_rate": 3.3782307838472338e-06, "loss": 1.8227, "step": 5106 }, { "epoch": 1.48, "learning_rate": 3.3712043024193132e-06, "loss": 1.7919, "step": 5108 }, { "epoch": 1.48, "learning_rate": 3.364183654153592e-06, "loss": 1.8356, "step": 5110 }, { "epoch": 1.48, "learning_rate": 3.3571688452280426e-06, "loss": 1.8276, "step": 5112 }, { "epoch": 1.48, "learning_rate": 3.350159881815518e-06, "loss": 1.839, "step": 5114 }, { "epoch": 1.48, "learning_rate": 3.3431567700837175e-06, "loss": 1.8155, "step": 5116 }, { "epoch": 1.48, "learning_rate": 3.3361595161951823e-06, "loss": 1.8856, "step": 5118 }, { "epoch": 1.48, "learning_rate": 3.3291681263073193e-06, "loss": 1.8229, "step": 5120 }, { "epoch": 1.48, "learning_rate": 3.322182606572357e-06, "loss": 1.8331, "step": 5122 }, { "epoch": 1.48, "learning_rate": 3.315202963137366e-06, "loss": 1.9003, "step": 5124 }, { "epoch": 1.48, "learning_rate": 3.3082292021442453e-06, "loss": 1.8108, "step": 5126 }, { "epoch": 1.49, "learning_rate": 3.301261329729719e-06, "loss": 1.9059, "step": 5128 }, { "epoch": 1.49, "learning_rate": 3.294299352025325e-06, "loss": 1.8223, "step": 5130 }, { "epoch": 1.49, "learning_rate": 3.2873432751574196e-06, "loss": 1.7771, "step": 5132 }, { "epoch": 1.49, "learning_rate": 3.2803931052471615e-06, "loss": 1.7774, "step": 5134 }, { "epoch": 1.49, "learning_rate": 3.2734488484105154e-06, "loss": 1.8701, "step": 5136 }, { "epoch": 1.49, "learning_rate": 3.2665105107582383e-06, "loss": 1.9051, "step": 5138 }, { "epoch": 1.49, "learning_rate": 3.2595780983958838e-06, "loss": 1.7985, "step": 5140 }, { "epoch": 1.49, "learning_rate": 3.25265161742379e-06, "loss": 1.9291, "step": 5142 }, { "epoch": 1.49, "learning_rate": 3.2457310739370684e-06, "loss": 1.8253, "step": 5144 }, { "epoch": 1.49, "learning_rate": 3.23881647402561e-06, "loss": 1.7999, "step": 5146 }, { "epoch": 1.49, "learning_rate": 3.23190782377409e-06, "loss": 1.8503, "step": 5148 }, { "epoch": 1.49, "learning_rate": 3.2250051292619224e-06, "loss": 1.808, "step": 5150 }, { "epoch": 1.49, "learning_rate": 3.218108396563293e-06, "loss": 1.7477, "step": 5152 }, { "epoch": 1.49, "learning_rate": 3.2112176317471533e-06, "loss": 1.837, "step": 5154 }, { "epoch": 1.49, "learning_rate": 3.204332840877179e-06, "loss": 1.8397, "step": 5156 }, { "epoch": 1.49, "learning_rate": 3.197454030011806e-06, "loss": 1.8427, "step": 5158 }, { "epoch": 1.49, "learning_rate": 3.190581205204204e-06, "loss": 1.7963, "step": 5160 }, { "epoch": 1.49, "learning_rate": 3.1837143725022725e-06, "loss": 1.8643, "step": 5162 }, { "epoch": 1.5, "learning_rate": 3.17685353794864e-06, "loss": 1.9085, "step": 5164 }, { "epoch": 1.5, "learning_rate": 3.169998707580657e-06, "loss": 1.8862, "step": 5166 }, { "epoch": 1.5, "learning_rate": 3.16314988743039e-06, "loss": 1.7949, "step": 5168 }, { "epoch": 1.5, "learning_rate": 3.1563070835246214e-06, "loss": 1.8723, "step": 5170 }, { "epoch": 1.5, "learning_rate": 3.1494703018848227e-06, "loss": 1.8496, "step": 5172 }, { "epoch": 1.5, "learning_rate": 3.14263954852719e-06, "loss": 1.8218, "step": 5174 }, { "epoch": 1.5, "learning_rate": 3.1358148294626024e-06, "loss": 1.8276, "step": 5176 }, { "epoch": 1.5, "learning_rate": 3.1289961506966217e-06, "loss": 1.8426, "step": 5178 }, { "epoch": 1.5, "learning_rate": 3.1221835182295037e-06, "loss": 1.775, "step": 5180 }, { "epoch": 1.5, "learning_rate": 3.1153769380561903e-06, "loss": 1.8534, "step": 5182 }, { "epoch": 1.5, "learning_rate": 3.10857641616628e-06, "loss": 1.8217, "step": 5184 }, { "epoch": 1.5, "learning_rate": 3.10178195854405e-06, "loss": 1.8098, "step": 5186 }, { "epoch": 1.5, "learning_rate": 3.09499357116845e-06, "loss": 1.8339, "step": 5188 }, { "epoch": 1.5, "learning_rate": 3.0882112600130686e-06, "loss": 1.8711, "step": 5190 }, { "epoch": 1.5, "learning_rate": 3.0814350310461616e-06, "loss": 1.787, "step": 5192 }, { "epoch": 1.5, "learning_rate": 3.0746648902306266e-06, "loss": 1.8453, "step": 5194 }, { "epoch": 1.5, "learning_rate": 3.0679008435240077e-06, "loss": 1.9129, "step": 5196 }, { "epoch": 1.51, "learning_rate": 3.0611428968784862e-06, "loss": 1.8134, "step": 5198 }, { "epoch": 1.51, "learning_rate": 3.0543910562408653e-06, "loss": 1.9354, "step": 5200 }, { "epoch": 1.51, "learning_rate": 3.0476453275525908e-06, "loss": 1.81, "step": 5202 }, { "epoch": 1.51, "learning_rate": 3.0409057167497258e-06, "loss": 1.8519, "step": 5204 }, { "epoch": 1.51, "learning_rate": 3.034172229762935e-06, "loss": 1.9355, "step": 5206 }, { "epoch": 1.51, "learning_rate": 3.027444872517518e-06, "loss": 1.8063, "step": 5208 }, { "epoch": 1.51, "learning_rate": 3.020723650933365e-06, "loss": 1.8507, "step": 5210 }, { "epoch": 1.51, "learning_rate": 3.0140085709249666e-06, "loss": 1.9233, "step": 5212 }, { "epoch": 1.51, "learning_rate": 3.0072996384014163e-06, "loss": 1.8358, "step": 5214 }, { "epoch": 1.51, "learning_rate": 3.0005968592663926e-06, "loss": 1.8306, "step": 5216 }, { "epoch": 1.51, "learning_rate": 2.9939002394181636e-06, "loss": 1.8209, "step": 5218 }, { "epoch": 1.51, "learning_rate": 2.987209784749573e-06, "loss": 1.8599, "step": 5220 }, { "epoch": 1.51, "learning_rate": 2.9805255011480416e-06, "loss": 1.8171, "step": 5222 }, { "epoch": 1.51, "learning_rate": 2.9738473944955604e-06, "loss": 1.7705, "step": 5224 }, { "epoch": 1.51, "learning_rate": 2.967175470668683e-06, "loss": 1.8208, "step": 5226 }, { "epoch": 1.51, "learning_rate": 2.9605097355385225e-06, "loss": 1.8853, "step": 5228 }, { "epoch": 1.51, "learning_rate": 2.9538501949707486e-06, "loss": 1.8722, "step": 5230 }, { "epoch": 1.52, "learning_rate": 2.9471968548255816e-06, "loss": 1.8598, "step": 5232 }, { "epoch": 1.52, "learning_rate": 2.9405497209577706e-06, "loss": 1.8665, "step": 5234 }, { "epoch": 1.52, "learning_rate": 2.9339087992166247e-06, "loss": 1.7962, "step": 5236 }, { "epoch": 1.52, "learning_rate": 2.9272740954459777e-06, "loss": 1.7972, "step": 5238 }, { "epoch": 1.52, "learning_rate": 2.920645615484181e-06, "loss": 1.722, "step": 5240 }, { "epoch": 1.52, "learning_rate": 2.9140233651641316e-06, "loss": 1.8875, "step": 5242 }, { "epoch": 1.52, "learning_rate": 2.9074073503132214e-06, "loss": 1.751, "step": 5244 }, { "epoch": 1.52, "learning_rate": 2.9007975767533714e-06, "loss": 1.8122, "step": 5246 }, { "epoch": 1.52, "learning_rate": 2.8941940503010025e-06, "loss": 1.8473, "step": 5248 }, { "epoch": 1.52, "learning_rate": 2.887596776767041e-06, "loss": 1.8879, "step": 5250 }, { "epoch": 1.52, "learning_rate": 2.881005761956911e-06, "loss": 1.8411, "step": 5252 }, { "epoch": 1.52, "learning_rate": 2.8744210116705295e-06, "loss": 1.7775, "step": 5254 }, { "epoch": 1.52, "learning_rate": 2.867842531702297e-06, "loss": 1.7741, "step": 5256 }, { "epoch": 1.52, "learning_rate": 2.8612703278411025e-06, "loss": 1.9018, "step": 5258 }, { "epoch": 1.52, "learning_rate": 2.854704405870308e-06, "loss": 1.8088, "step": 5260 }, { "epoch": 1.52, "learning_rate": 2.848144771567747e-06, "loss": 1.7874, "step": 5262 }, { "epoch": 1.52, "learning_rate": 2.8415914307057246e-06, "loss": 1.789, "step": 5264 }, { "epoch": 1.53, "learning_rate": 2.8350443890510006e-06, "loss": 1.825, "step": 5266 }, { "epoch": 1.53, "learning_rate": 2.8285036523647944e-06, "loss": 1.8266, "step": 5268 }, { "epoch": 1.53, "learning_rate": 2.8219692264027885e-06, "loss": 1.8305, "step": 5270 }, { "epoch": 1.53, "learning_rate": 2.815441116915093e-06, "loss": 1.8785, "step": 5272 }, { "epoch": 1.53, "learning_rate": 2.8089193296462692e-06, "loss": 1.8515, "step": 5274 }, { "epoch": 1.53, "learning_rate": 2.802403870335323e-06, "loss": 1.845, "step": 5276 }, { "epoch": 1.53, "learning_rate": 2.795894744715675e-06, "loss": 1.7727, "step": 5278 }, { "epoch": 1.53, "learning_rate": 2.789391958515183e-06, "loss": 1.8568, "step": 5280 }, { "epoch": 1.53, "learning_rate": 2.7828955174561258e-06, "loss": 1.8616, "step": 5282 }, { "epoch": 1.53, "learning_rate": 2.7764054272551967e-06, "loss": 1.8141, "step": 5284 }, { "epoch": 1.53, "learning_rate": 2.7699216936234994e-06, "loss": 1.8363, "step": 5286 }, { "epoch": 1.53, "learning_rate": 2.7634443222665474e-06, "loss": 1.8376, "step": 5288 }, { "epoch": 1.53, "learning_rate": 2.756973318884253e-06, "loss": 1.8571, "step": 5290 }, { "epoch": 1.53, "learning_rate": 2.7505086891709277e-06, "loss": 1.8385, "step": 5292 }, { "epoch": 1.53, "learning_rate": 2.7440504388152634e-06, "loss": 1.8428, "step": 5294 }, { "epoch": 1.53, "learning_rate": 2.737598573500355e-06, "loss": 1.7946, "step": 5296 }, { "epoch": 1.53, "learning_rate": 2.731153098903673e-06, "loss": 1.8097, "step": 5298 }, { "epoch": 1.53, "learning_rate": 2.7247140206970535e-06, "loss": 1.7484, "step": 5300 }, { "epoch": 1.54, "learning_rate": 2.7182813445467136e-06, "loss": 1.9215, "step": 5302 }, { "epoch": 1.54, "learning_rate": 2.7118550761132444e-06, "loss": 1.8094, "step": 5304 }, { "epoch": 1.54, "learning_rate": 2.705435221051581e-06, "loss": 1.8405, "step": 5306 }, { "epoch": 1.54, "learning_rate": 2.6990217850110225e-06, "loss": 1.8467, "step": 5308 }, { "epoch": 1.54, "learning_rate": 2.6926147736352302e-06, "loss": 1.6925, "step": 5310 }, { "epoch": 1.54, "learning_rate": 2.686214192562193e-06, "loss": 1.8354, "step": 5312 }, { "epoch": 1.54, "learning_rate": 2.679820047424253e-06, "loss": 1.829, "step": 5314 }, { "epoch": 1.54, "learning_rate": 2.6734323438480856e-06, "loss": 1.9035, "step": 5316 }, { "epoch": 1.54, "learning_rate": 2.667051087454701e-06, "loss": 1.7608, "step": 5318 }, { "epoch": 1.54, "learning_rate": 2.660676283859429e-06, "loss": 1.8885, "step": 5320 }, { "epoch": 1.54, "learning_rate": 2.6543079386719296e-06, "loss": 1.7698, "step": 5322 }, { "epoch": 1.54, "learning_rate": 2.6479460574961747e-06, "loss": 1.7654, "step": 5324 }, { "epoch": 1.54, "learning_rate": 2.6415906459304497e-06, "loss": 1.8165, "step": 5326 }, { "epoch": 1.54, "learning_rate": 2.63524170956734e-06, "loss": 1.8435, "step": 5328 }, { "epoch": 1.54, "learning_rate": 2.6288992539937463e-06, "loss": 1.7945, "step": 5330 }, { "epoch": 1.54, "learning_rate": 2.62256328479086e-06, "loss": 1.8634, "step": 5332 }, { "epoch": 1.54, "learning_rate": 2.6162338075341563e-06, "loss": 1.8982, "step": 5334 }, { "epoch": 1.55, "learning_rate": 2.6099108277934105e-06, "loss": 1.7955, "step": 5336 }, { "epoch": 1.55, "learning_rate": 2.603594351132673e-06, "loss": 1.8325, "step": 5338 }, { "epoch": 1.55, "learning_rate": 2.5972843831102755e-06, "loss": 1.7891, "step": 5340 }, { "epoch": 1.55, "learning_rate": 2.590980929278819e-06, "loss": 1.769, "step": 5342 }, { "epoch": 1.55, "learning_rate": 2.584683995185174e-06, "loss": 1.9159, "step": 5344 }, { "epoch": 1.55, "learning_rate": 2.5783935863704736e-06, "loss": 1.8187, "step": 5346 }, { "epoch": 1.55, "learning_rate": 2.5721097083701085e-06, "loss": 1.8402, "step": 5348 }, { "epoch": 1.55, "learning_rate": 2.5658323667137218e-06, "loss": 1.7992, "step": 5350 }, { "epoch": 1.55, "learning_rate": 2.559561566925206e-06, "loss": 1.8383, "step": 5352 }, { "epoch": 1.55, "learning_rate": 2.553297314522698e-06, "loss": 1.8086, "step": 5354 }, { "epoch": 1.55, "learning_rate": 2.5470396150185692e-06, "loss": 1.7951, "step": 5356 }, { "epoch": 1.55, "learning_rate": 2.5407884739194334e-06, "loss": 1.7492, "step": 5358 }, { "epoch": 1.55, "learning_rate": 2.534543896726117e-06, "loss": 1.8509, "step": 5360 }, { "epoch": 1.55, "learning_rate": 2.528305888933683e-06, "loss": 1.8516, "step": 5362 }, { "epoch": 1.55, "learning_rate": 2.522074456031418e-06, "loss": 1.8307, "step": 5364 }, { "epoch": 1.55, "learning_rate": 2.515849603502808e-06, "loss": 1.864, "step": 5366 }, { "epoch": 1.55, "learning_rate": 2.509631336825559e-06, "loss": 1.857, "step": 5368 }, { "epoch": 1.56, "learning_rate": 2.5034196614715776e-06, "loss": 1.8301, "step": 5370 }, { "epoch": 1.56, "learning_rate": 2.4972145829069717e-06, "loss": 1.8399, "step": 5372 }, { "epoch": 1.56, "learning_rate": 2.491016106592045e-06, "loss": 1.9345, "step": 5374 }, { "epoch": 1.56, "learning_rate": 2.484824237981288e-06, "loss": 1.7854, "step": 5376 }, { "epoch": 1.56, "learning_rate": 2.4786389825233804e-06, "loss": 1.7948, "step": 5378 }, { "epoch": 1.56, "learning_rate": 2.4724603456611805e-06, "loss": 1.8809, "step": 5380 }, { "epoch": 1.56, "learning_rate": 2.466288332831722e-06, "loss": 1.8164, "step": 5382 }, { "epoch": 1.56, "learning_rate": 2.4601229494662125e-06, "loss": 1.8231, "step": 5384 }, { "epoch": 1.56, "learning_rate": 2.4539642009900255e-06, "loss": 1.8263, "step": 5386 }, { "epoch": 1.56, "learning_rate": 2.4478120928226845e-06, "loss": 1.8525, "step": 5388 }, { "epoch": 1.56, "learning_rate": 2.441666630377889e-06, "loss": 1.8702, "step": 5390 }, { "epoch": 1.56, "learning_rate": 2.435527819063482e-06, "loss": 1.8162, "step": 5392 }, { "epoch": 1.56, "learning_rate": 2.4293956642814453e-06, "loss": 1.7771, "step": 5394 }, { "epoch": 1.56, "learning_rate": 2.4232701714279118e-06, "loss": 1.8435, "step": 5396 }, { "epoch": 1.56, "learning_rate": 2.417151345893157e-06, "loss": 1.9168, "step": 5398 }, { "epoch": 1.56, "learning_rate": 2.4110391930615772e-06, "loss": 1.7897, "step": 5400 }, { "epoch": 1.56, "learning_rate": 2.404933718311704e-06, "loss": 1.8614, "step": 5402 }, { "epoch": 1.57, "learning_rate": 2.3988349270161914e-06, "loss": 1.769, "step": 5404 }, { "epoch": 1.57, "learning_rate": 2.3927428245418126e-06, "loss": 1.8057, "step": 5406 }, { "epoch": 1.57, "learning_rate": 2.386657416249454e-06, "loss": 1.8815, "step": 5408 }, { "epoch": 1.57, "learning_rate": 2.38057870749411e-06, "loss": 1.88, "step": 5410 }, { "epoch": 1.57, "learning_rate": 2.3745067036248835e-06, "loss": 1.8729, "step": 5412 }, { "epoch": 1.57, "learning_rate": 2.3684414099849763e-06, "loss": 1.8159, "step": 5414 }, { "epoch": 1.57, "learning_rate": 2.362382831911675e-06, "loss": 1.7876, "step": 5416 }, { "epoch": 1.57, "learning_rate": 2.356330974736375e-06, "loss": 1.8473, "step": 5418 }, { "epoch": 1.57, "learning_rate": 2.3502858437845475e-06, "loss": 1.8786, "step": 5420 }, { "epoch": 1.57, "learning_rate": 2.3442474443757367e-06, "loss": 1.8132, "step": 5422 }, { "epoch": 1.57, "learning_rate": 2.338215781823582e-06, "loss": 1.9074, "step": 5424 }, { "epoch": 1.57, "learning_rate": 2.3321908614357824e-06, "loss": 1.8044, "step": 5426 }, { "epoch": 1.57, "learning_rate": 2.3261726885141023e-06, "loss": 1.7455, "step": 5428 }, { "epoch": 1.57, "learning_rate": 2.3201612683543706e-06, "loss": 1.8745, "step": 5430 }, { "epoch": 1.57, "learning_rate": 2.314156606246485e-06, "loss": 1.7888, "step": 5432 }, { "epoch": 1.57, "learning_rate": 2.308158707474377e-06, "loss": 1.8665, "step": 5434 }, { "epoch": 1.57, "learning_rate": 2.302167577316042e-06, "loss": 1.8502, "step": 5436 }, { "epoch": 1.57, "learning_rate": 2.296183221043511e-06, "loss": 1.7957, "step": 5438 }, { "epoch": 1.58, "learning_rate": 2.2902056439228593e-06, "loss": 1.9009, "step": 5440 }, { "epoch": 1.58, "learning_rate": 2.284234851214191e-06, "loss": 1.8319, "step": 5442 }, { "epoch": 1.58, "learning_rate": 2.2782708481716466e-06, "loss": 1.8424, "step": 5444 }, { "epoch": 1.58, "learning_rate": 2.2723136400433865e-06, "loss": 1.8811, "step": 5446 }, { "epoch": 1.58, "learning_rate": 2.2663632320715976e-06, "loss": 1.9006, "step": 5448 }, { "epoch": 1.58, "learning_rate": 2.2604196294924696e-06, "loss": 1.8163, "step": 5450 }, { "epoch": 1.58, "learning_rate": 2.2544828375362236e-06, "loss": 1.8175, "step": 5452 }, { "epoch": 1.58, "learning_rate": 2.2485528614270757e-06, "loss": 1.8082, "step": 5454 }, { "epoch": 1.58, "learning_rate": 2.242629706383237e-06, "loss": 1.7571, "step": 5456 }, { "epoch": 1.58, "learning_rate": 2.236713377616936e-06, "loss": 1.7668, "step": 5458 }, { "epoch": 1.58, "learning_rate": 2.2308038803343756e-06, "loss": 1.9114, "step": 5460 }, { "epoch": 1.58, "learning_rate": 2.2249012197357578e-06, "loss": 1.8224, "step": 5462 }, { "epoch": 1.58, "learning_rate": 2.219005401015264e-06, "loss": 1.804, "step": 5464 }, { "epoch": 1.58, "learning_rate": 2.2131164293610573e-06, "loss": 1.7861, "step": 5466 }, { "epoch": 1.58, "learning_rate": 2.207234309955275e-06, "loss": 1.8188, "step": 5468 }, { "epoch": 1.58, "learning_rate": 2.201359047974023e-06, "loss": 1.8819, "step": 5470 }, { "epoch": 1.58, "learning_rate": 2.195490648587375e-06, "loss": 1.7782, "step": 5472 }, { "epoch": 1.59, "learning_rate": 2.1896291169593643e-06, "loss": 1.868, "step": 5474 }, { "epoch": 1.59, "learning_rate": 2.1837744582479812e-06, "loss": 1.8392, "step": 5476 }, { "epoch": 1.59, "learning_rate": 2.1779266776051687e-06, "loss": 1.787, "step": 5478 }, { "epoch": 1.59, "learning_rate": 2.1720857801768203e-06, "loss": 1.7397, "step": 5480 }, { "epoch": 1.59, "learning_rate": 2.1662517711027607e-06, "loss": 1.7952, "step": 5482 }, { "epoch": 1.59, "learning_rate": 2.160424655516764e-06, "loss": 1.7927, "step": 5484 }, { "epoch": 1.59, "learning_rate": 2.1546044385465424e-06, "loss": 1.8651, "step": 5486 }, { "epoch": 1.59, "learning_rate": 2.1487911253137215e-06, "loss": 1.8451, "step": 5488 }, { "epoch": 1.59, "learning_rate": 2.142984720933863e-06, "loss": 1.8428, "step": 5490 }, { "epoch": 1.59, "learning_rate": 2.137185230516453e-06, "loss": 1.7256, "step": 5492 }, { "epoch": 1.59, "learning_rate": 2.1313926591648794e-06, "loss": 1.7537, "step": 5494 }, { "epoch": 1.59, "learning_rate": 2.125607011976454e-06, "loss": 1.7836, "step": 5496 }, { "epoch": 1.59, "learning_rate": 2.119828294042389e-06, "loss": 1.8106, "step": 5498 }, { "epoch": 1.59, "learning_rate": 2.1140565104478007e-06, "loss": 1.8746, "step": 5500 }, { "epoch": 1.59, "learning_rate": 2.1082916662717056e-06, "loss": 1.8672, "step": 5502 }, { "epoch": 1.59, "learning_rate": 2.1025337665870104e-06, "loss": 1.7271, "step": 5504 }, { "epoch": 1.59, "learning_rate": 2.096782816460513e-06, "loss": 1.7552, "step": 5506 }, { "epoch": 1.6, "learning_rate": 2.0910388209528966e-06, "loss": 1.8254, "step": 5508 }, { "epoch": 1.6, "learning_rate": 2.085301785118716e-06, "loss": 1.8258, "step": 5510 }, { "epoch": 1.6, "learning_rate": 2.0795717140064163e-06, "loss": 1.7319, "step": 5512 }, { "epoch": 1.6, "learning_rate": 2.0738486126583056e-06, "loss": 1.8778, "step": 5514 }, { "epoch": 1.6, "learning_rate": 2.0681324861105556e-06, "loss": 1.8043, "step": 5516 }, { "epoch": 1.6, "learning_rate": 2.0624233393932024e-06, "loss": 1.7538, "step": 5518 }, { "epoch": 1.6, "learning_rate": 2.056721177530151e-06, "loss": 1.8766, "step": 5520 }, { "epoch": 1.6, "learning_rate": 2.0510260055391417e-06, "loss": 1.8054, "step": 5522 }, { "epoch": 1.6, "learning_rate": 2.0453378284317748e-06, "loss": 1.8081, "step": 5524 }, { "epoch": 1.6, "learning_rate": 2.0396566512135e-06, "loss": 1.8145, "step": 5526 }, { "epoch": 1.6, "learning_rate": 2.0339824788835914e-06, "loss": 1.8295, "step": 5528 }, { "epoch": 1.6, "learning_rate": 2.028315316435173e-06, "loss": 1.8263, "step": 5530 }, { "epoch": 1.6, "learning_rate": 2.0226551688551955e-06, "loss": 1.8278, "step": 5532 }, { "epoch": 1.6, "learning_rate": 2.017002041124435e-06, "loss": 1.7478, "step": 5534 }, { "epoch": 1.6, "learning_rate": 2.0113559382174964e-06, "loss": 1.8702, "step": 5536 }, { "epoch": 1.6, "learning_rate": 2.00571686510279e-06, "loss": 1.7817, "step": 5538 }, { "epoch": 1.6, "learning_rate": 2.0000848267425577e-06, "loss": 1.8449, "step": 5540 }, { "epoch": 1.6, "learning_rate": 1.994459828092841e-06, "loss": 1.8, "step": 5542 }, { "epoch": 1.61, "learning_rate": 1.9888418741034786e-06, "loss": 1.8687, "step": 5544 }, { "epoch": 1.61, "learning_rate": 1.9832309697181297e-06, "loss": 1.8668, "step": 5546 }, { "epoch": 1.61, "learning_rate": 1.977627119874237e-06, "loss": 1.8876, "step": 5548 }, { "epoch": 1.61, "learning_rate": 1.9720303295030352e-06, "loss": 1.7988, "step": 5550 }, { "epoch": 1.61, "learning_rate": 1.9664406035295493e-06, "loss": 1.7664, "step": 5552 }, { "epoch": 1.61, "learning_rate": 1.9608579468725895e-06, "loss": 1.8747, "step": 5554 }, { "epoch": 1.61, "learning_rate": 1.9552823644447437e-06, "loss": 1.8523, "step": 5556 }, { "epoch": 1.61, "learning_rate": 1.949713861152375e-06, "loss": 1.8145, "step": 5558 }, { "epoch": 1.61, "learning_rate": 1.944152441895616e-06, "loss": 1.837, "step": 5560 }, { "epoch": 1.61, "learning_rate": 1.938598111568367e-06, "loss": 1.826, "step": 5562 }, { "epoch": 1.61, "learning_rate": 1.93305087505829e-06, "loss": 1.8027, "step": 5564 }, { "epoch": 1.61, "learning_rate": 1.9275107372468017e-06, "loss": 1.8829, "step": 5566 }, { "epoch": 1.61, "learning_rate": 1.921977703009077e-06, "loss": 1.8581, "step": 5568 }, { "epoch": 1.61, "learning_rate": 1.916451777214039e-06, "loss": 1.8827, "step": 5570 }, { "epoch": 1.61, "learning_rate": 1.910932964724347e-06, "loss": 1.8069, "step": 5572 }, { "epoch": 1.61, "learning_rate": 1.9054212703964147e-06, "loss": 1.7724, "step": 5574 }, { "epoch": 1.61, "learning_rate": 1.8999166990803863e-06, "loss": 1.8488, "step": 5576 }, { "epoch": 1.62, "learning_rate": 1.8944192556201269e-06, "loss": 1.7872, "step": 5578 }, { "epoch": 1.62, "learning_rate": 1.888928944853251e-06, "loss": 1.8851, "step": 5580 }, { "epoch": 1.62, "learning_rate": 1.8834457716110777e-06, "loss": 1.8077, "step": 5582 }, { "epoch": 1.62, "learning_rate": 1.8779697407186526e-06, "loss": 1.8433, "step": 5584 }, { "epoch": 1.62, "learning_rate": 1.8725008569947366e-06, "loss": 1.836, "step": 5586 }, { "epoch": 1.62, "learning_rate": 1.8670391252518017e-06, "loss": 1.8421, "step": 5588 }, { "epoch": 1.62, "learning_rate": 1.8615845502960227e-06, "loss": 1.7945, "step": 5590 }, { "epoch": 1.62, "learning_rate": 1.8561371369272818e-06, "loss": 1.8385, "step": 5592 }, { "epoch": 1.62, "learning_rate": 1.8506968899391554e-06, "loss": 1.8468, "step": 5594 }, { "epoch": 1.62, "learning_rate": 1.8452638141189139e-06, "loss": 1.807, "step": 5596 }, { "epoch": 1.62, "learning_rate": 1.8398379142475198e-06, "loss": 1.8125, "step": 5598 }, { "epoch": 1.62, "learning_rate": 1.834419195099617e-06, "loss": 1.8089, "step": 5600 }, { "epoch": 1.62, "learning_rate": 1.8290076614435381e-06, "loss": 1.8355, "step": 5602 }, { "epoch": 1.62, "learning_rate": 1.8236033180412793e-06, "loss": 1.8614, "step": 5604 }, { "epoch": 1.62, "learning_rate": 1.8182061696485209e-06, "loss": 1.8707, "step": 5606 }, { "epoch": 1.62, "learning_rate": 1.8128162210146138e-06, "loss": 1.9556, "step": 5608 }, { "epoch": 1.62, "learning_rate": 1.8074334768825608e-06, "loss": 1.7945, "step": 5610 }, { "epoch": 1.63, "learning_rate": 1.8020579419890328e-06, "loss": 1.8511, "step": 5612 }, { "epoch": 1.63, "learning_rate": 1.7966896210643635e-06, "loss": 1.8637, "step": 5614 }, { "epoch": 1.63, "learning_rate": 1.7913285188325235e-06, "loss": 1.8081, "step": 5616 }, { "epoch": 1.63, "learning_rate": 1.7859746400111421e-06, "loss": 1.8801, "step": 5618 }, { "epoch": 1.63, "learning_rate": 1.7806279893114874e-06, "loss": 1.8429, "step": 5620 }, { "epoch": 1.63, "learning_rate": 1.7752885714384693e-06, "loss": 1.8177, "step": 5622 }, { "epoch": 1.63, "learning_rate": 1.7699563910906313e-06, "loss": 1.871, "step": 5624 }, { "epoch": 1.63, "learning_rate": 1.7646314529601506e-06, "loss": 1.8023, "step": 5626 }, { "epoch": 1.63, "learning_rate": 1.7593137617328282e-06, "loss": 1.9429, "step": 5628 }, { "epoch": 1.63, "learning_rate": 1.7540033220880936e-06, "loss": 1.8974, "step": 5630 }, { "epoch": 1.63, "learning_rate": 1.7487001386989821e-06, "loss": 1.8657, "step": 5632 }, { "epoch": 1.63, "learning_rate": 1.7434042162321596e-06, "loss": 1.8849, "step": 5634 }, { "epoch": 1.63, "learning_rate": 1.7381155593478982e-06, "loss": 1.7614, "step": 5636 }, { "epoch": 1.63, "learning_rate": 1.7328341727000664e-06, "loss": 1.7925, "step": 5638 }, { "epoch": 1.63, "learning_rate": 1.7275600609361444e-06, "loss": 1.8834, "step": 5640 }, { "epoch": 1.63, "learning_rate": 1.7222932286972161e-06, "loss": 1.7748, "step": 5642 }, { "epoch": 1.63, "learning_rate": 1.7170336806179434e-06, "loss": 1.9128, "step": 5644 }, { "epoch": 1.64, "learning_rate": 1.7117814213265893e-06, "loss": 1.8234, "step": 5646 }, { "epoch": 1.64, "learning_rate": 1.706536455445007e-06, "loss": 1.8123, "step": 5648 }, { "epoch": 1.64, "learning_rate": 1.701298787588619e-06, "loss": 1.8257, "step": 5650 }, { "epoch": 1.64, "learning_rate": 1.6960684223664336e-06, "loss": 1.8156, "step": 5652 }, { "epoch": 1.64, "learning_rate": 1.6908453643810342e-06, "loss": 1.8735, "step": 5654 }, { "epoch": 1.64, "learning_rate": 1.6856296182285692e-06, "loss": 1.7037, "step": 5656 }, { "epoch": 1.64, "learning_rate": 1.6804211884987565e-06, "loss": 1.8316, "step": 5658 }, { "epoch": 1.64, "learning_rate": 1.6752200797748742e-06, "loss": 1.8537, "step": 5660 }, { "epoch": 1.64, "learning_rate": 1.6700262966337577e-06, "loss": 1.8606, "step": 5662 }, { "epoch": 1.64, "learning_rate": 1.664839843645799e-06, "loss": 1.8887, "step": 5664 }, { "epoch": 1.64, "learning_rate": 1.6596607253749308e-06, "loss": 1.8245, "step": 5666 }, { "epoch": 1.64, "learning_rate": 1.654488946378645e-06, "loss": 1.8174, "step": 5668 }, { "epoch": 1.64, "learning_rate": 1.6493245112079691e-06, "loss": 1.7511, "step": 5670 }, { "epoch": 1.64, "learning_rate": 1.6441674244074612e-06, "loss": 1.845, "step": 5672 }, { "epoch": 1.64, "learning_rate": 1.639017690515222e-06, "loss": 1.8372, "step": 5674 }, { "epoch": 1.64, "learning_rate": 1.6338753140628794e-06, "loss": 1.7638, "step": 5676 }, { "epoch": 1.64, "learning_rate": 1.6287402995755863e-06, "loss": 1.8449, "step": 5678 }, { "epoch": 1.64, "learning_rate": 1.6236126515720186e-06, "loss": 1.8081, "step": 5680 }, { "epoch": 1.65, "learning_rate": 1.6184923745643665e-06, "loss": 1.8004, "step": 5682 }, { "epoch": 1.65, "learning_rate": 1.6133794730583386e-06, "loss": 1.7859, "step": 5684 }, { "epoch": 1.65, "learning_rate": 1.6082739515531498e-06, "loss": 1.8423, "step": 5686 }, { "epoch": 1.65, "learning_rate": 1.6031758145415222e-06, "loss": 1.7751, "step": 5688 }, { "epoch": 1.65, "learning_rate": 1.5980850665096792e-06, "loss": 1.7912, "step": 5690 }, { "epoch": 1.65, "learning_rate": 1.5930017119373432e-06, "loss": 1.8369, "step": 5692 }, { "epoch": 1.65, "learning_rate": 1.5879257552977279e-06, "loss": 1.8307, "step": 5694 }, { "epoch": 1.65, "learning_rate": 1.5828572010575427e-06, "loss": 1.8192, "step": 5696 }, { "epoch": 1.65, "learning_rate": 1.5777960536769743e-06, "loss": 1.8485, "step": 5698 }, { "epoch": 1.65, "learning_rate": 1.5727423176096957e-06, "loss": 1.8562, "step": 5700 }, { "epoch": 1.65, "learning_rate": 1.5676959973028672e-06, "loss": 1.7949, "step": 5702 }, { "epoch": 1.65, "learning_rate": 1.5626570971971067e-06, "loss": 1.7888, "step": 5704 }, { "epoch": 1.65, "learning_rate": 1.5576256217265152e-06, "loss": 1.813, "step": 5706 }, { "epoch": 1.65, "learning_rate": 1.552601575318654e-06, "loss": 1.8079, "step": 5708 }, { "epoch": 1.65, "learning_rate": 1.5475849623945504e-06, "loss": 1.7935, "step": 5710 }, { "epoch": 1.65, "learning_rate": 1.5425757873686876e-06, "loss": 1.7549, "step": 5712 }, { "epoch": 1.65, "learning_rate": 1.537574054649007e-06, "loss": 1.7857, "step": 5714 }, { "epoch": 1.66, "learning_rate": 1.532579768636898e-06, "loss": 1.7966, "step": 5716 }, { "epoch": 1.66, "learning_rate": 1.5275929337271978e-06, "loss": 1.7901, "step": 5718 }, { "epoch": 1.66, "learning_rate": 1.5226135543081877e-06, "loss": 1.7949, "step": 5720 }, { "epoch": 1.66, "learning_rate": 1.5176416347615886e-06, "loss": 1.8145, "step": 5722 }, { "epoch": 1.66, "learning_rate": 1.5126771794625571e-06, "loss": 1.7595, "step": 5724 }, { "epoch": 1.66, "learning_rate": 1.5077201927796747e-06, "loss": 1.8266, "step": 5726 }, { "epoch": 1.66, "learning_rate": 1.5027706790749619e-06, "loss": 1.8144, "step": 5728 }, { "epoch": 1.66, "learning_rate": 1.4978286427038602e-06, "loss": 1.8202, "step": 5730 }, { "epoch": 1.66, "learning_rate": 1.4928940880152232e-06, "loss": 1.8693, "step": 5732 }, { "epoch": 1.66, "learning_rate": 1.4879670193513252e-06, "loss": 1.7547, "step": 5734 }, { "epoch": 1.66, "learning_rate": 1.4830474410478623e-06, "loss": 1.8959, "step": 5736 }, { "epoch": 1.66, "learning_rate": 1.4781353574339252e-06, "loss": 1.8529, "step": 5738 }, { "epoch": 1.66, "learning_rate": 1.473230772832015e-06, "loss": 1.8747, "step": 5740 }, { "epoch": 1.66, "learning_rate": 1.4683336915580415e-06, "loss": 1.8312, "step": 5742 }, { "epoch": 1.66, "learning_rate": 1.4634441179212967e-06, "loss": 1.8286, "step": 5744 }, { "epoch": 1.66, "learning_rate": 1.4585620562244774e-06, "loss": 1.8396, "step": 5746 }, { "epoch": 1.66, "learning_rate": 1.453687510763666e-06, "loss": 1.8173, "step": 5748 }, { "epoch": 1.67, "learning_rate": 1.4488204858283329e-06, "loss": 1.8713, "step": 5750 }, { "epoch": 1.67, "learning_rate": 1.4439609857013305e-06, "loss": 1.7954, "step": 5752 }, { "epoch": 1.67, "learning_rate": 1.4391090146588815e-06, "loss": 1.8298, "step": 5754 }, { "epoch": 1.67, "learning_rate": 1.4342645769705977e-06, "loss": 1.8825, "step": 5756 }, { "epoch": 1.67, "learning_rate": 1.4294276768994542e-06, "loss": 1.818, "step": 5758 }, { "epoch": 1.67, "learning_rate": 1.4245983187017854e-06, "loss": 1.8724, "step": 5760 }, { "epoch": 1.67, "learning_rate": 1.419776506627305e-06, "loss": 1.8686, "step": 5762 }, { "epoch": 1.67, "learning_rate": 1.414962244919077e-06, "loss": 1.8533, "step": 5764 }, { "epoch": 1.67, "learning_rate": 1.4101555378135178e-06, "loss": 1.8157, "step": 5766 }, { "epoch": 1.67, "learning_rate": 1.4053563895404032e-06, "loss": 1.7849, "step": 5768 }, { "epoch": 1.67, "learning_rate": 1.400564804322856e-06, "loss": 1.7533, "step": 5770 }, { "epoch": 1.67, "learning_rate": 1.3957807863773399e-06, "loss": 1.8838, "step": 5772 }, { "epoch": 1.67, "learning_rate": 1.3910043399136653e-06, "loss": 1.8299, "step": 5774 }, { "epoch": 1.67, "learning_rate": 1.3862354691349734e-06, "loss": 1.8411, "step": 5776 }, { "epoch": 1.67, "learning_rate": 1.381474178237746e-06, "loss": 1.8531, "step": 5778 }, { "epoch": 1.67, "learning_rate": 1.3767204714117877e-06, "loss": 1.741, "step": 5780 }, { "epoch": 1.67, "learning_rate": 1.3719743528402362e-06, "loss": 1.8359, "step": 5782 }, { "epoch": 1.68, "learning_rate": 1.3672358266995456e-06, "loss": 1.8093, "step": 5784 }, { "epoch": 1.68, "learning_rate": 1.3625048971594956e-06, "loss": 1.8818, "step": 5786 }, { "epoch": 1.68, "learning_rate": 1.3577815683831708e-06, "loss": 1.7879, "step": 5788 }, { "epoch": 1.68, "learning_rate": 1.3530658445269784e-06, "loss": 1.8348, "step": 5790 }, { "epoch": 1.68, "learning_rate": 1.3483577297406303e-06, "loss": 1.8167, "step": 5792 }, { "epoch": 1.68, "learning_rate": 1.3436572281671334e-06, "loss": 1.7819, "step": 5794 }, { "epoch": 1.68, "learning_rate": 1.3389643439428124e-06, "loss": 1.7392, "step": 5796 }, { "epoch": 1.68, "learning_rate": 1.3342790811972728e-06, "loss": 1.8368, "step": 5798 }, { "epoch": 1.68, "learning_rate": 1.3296014440534223e-06, "loss": 1.8756, "step": 5800 }, { "epoch": 1.68, "learning_rate": 1.3249314366274546e-06, "loss": 1.8568, "step": 5802 }, { "epoch": 1.68, "learning_rate": 1.320269063028853e-06, "loss": 1.7863, "step": 5804 }, { "epoch": 1.68, "learning_rate": 1.3156143273603794e-06, "loss": 1.8009, "step": 5806 }, { "epoch": 1.68, "learning_rate": 1.3109672337180767e-06, "loss": 1.8055, "step": 5808 }, { "epoch": 1.68, "learning_rate": 1.3063277861912637e-06, "loss": 1.8465, "step": 5810 }, { "epoch": 1.68, "learning_rate": 1.301695988862527e-06, "loss": 1.7902, "step": 5812 }, { "epoch": 1.68, "learning_rate": 1.2970718458077258e-06, "loss": 1.8154, "step": 5814 }, { "epoch": 1.68, "learning_rate": 1.2924553610959823e-06, "loss": 1.8033, "step": 5816 }, { "epoch": 1.68, "learning_rate": 1.2878465387896789e-06, "loss": 1.7789, "step": 5818 }, { "epoch": 1.69, "learning_rate": 1.2832453829444535e-06, "loss": 1.8309, "step": 5820 }, { "epoch": 1.69, "learning_rate": 1.2786518976091978e-06, "loss": 1.8139, "step": 5822 }, { "epoch": 1.69, "learning_rate": 1.2740660868260634e-06, "loss": 1.7461, "step": 5824 }, { "epoch": 1.69, "learning_rate": 1.269487954630434e-06, "loss": 1.8488, "step": 5826 }, { "epoch": 1.69, "learning_rate": 1.264917505050942e-06, "loss": 1.9847, "step": 5828 }, { "epoch": 1.69, "learning_rate": 1.2603547421094675e-06, "loss": 1.7848, "step": 5830 }, { "epoch": 1.69, "learning_rate": 1.2557996698211138e-06, "loss": 1.8047, "step": 5832 }, { "epoch": 1.69, "learning_rate": 1.2512522921942227e-06, "loss": 1.8262, "step": 5834 }, { "epoch": 1.69, "learning_rate": 1.2467126132303641e-06, "loss": 1.8487, "step": 5836 }, { "epoch": 1.69, "learning_rate": 1.2421806369243361e-06, "loss": 1.833, "step": 5838 }, { "epoch": 1.69, "learning_rate": 1.2376563672641562e-06, "loss": 1.773, "step": 5840 }, { "epoch": 1.69, "learning_rate": 1.233139808231053e-06, "loss": 1.8821, "step": 5842 }, { "epoch": 1.69, "learning_rate": 1.2286309637994864e-06, "loss": 1.846, "step": 5844 }, { "epoch": 1.69, "learning_rate": 1.224129837937117e-06, "loss": 1.8719, "step": 5846 }, { "epoch": 1.69, "learning_rate": 1.2196364346048073e-06, "loss": 1.8041, "step": 5848 }, { "epoch": 1.69, "learning_rate": 1.215150757756639e-06, "loss": 1.8355, "step": 5850 }, { "epoch": 1.69, "learning_rate": 1.2106728113398869e-06, "loss": 1.7858, "step": 5852 }, { "epoch": 1.7, "learning_rate": 1.2062025992950188e-06, "loss": 1.7884, "step": 5854 }, { "epoch": 1.7, "learning_rate": 1.2017401255557038e-06, "loss": 1.8878, "step": 5856 }, { "epoch": 1.7, "learning_rate": 1.1972853940488017e-06, "loss": 1.7389, "step": 5858 }, { "epoch": 1.7, "learning_rate": 1.1928384086943534e-06, "loss": 1.8282, "step": 5860 }, { "epoch": 1.7, "learning_rate": 1.1883991734055844e-06, "loss": 1.852, "step": 5862 }, { "epoch": 1.7, "learning_rate": 1.1839676920889098e-06, "loss": 1.8416, "step": 5864 }, { "epoch": 1.7, "learning_rate": 1.179543968643907e-06, "loss": 1.7861, "step": 5866 }, { "epoch": 1.7, "learning_rate": 1.1751280069633375e-06, "loss": 1.8537, "step": 5868 }, { "epoch": 1.7, "learning_rate": 1.1707198109331274e-06, "loss": 1.857, "step": 5870 }, { "epoch": 1.7, "learning_rate": 1.1663193844323728e-06, "loss": 1.8286, "step": 5872 }, { "epoch": 1.7, "learning_rate": 1.1619267313333281e-06, "loss": 1.8221, "step": 5874 }, { "epoch": 1.7, "learning_rate": 1.1575418555014128e-06, "loss": 1.8341, "step": 5876 }, { "epoch": 1.7, "learning_rate": 1.1531647607951978e-06, "loss": 1.8179, "step": 5878 }, { "epoch": 1.7, "learning_rate": 1.1487954510664123e-06, "loss": 1.7206, "step": 5880 }, { "epoch": 1.7, "learning_rate": 1.144433930159924e-06, "loss": 1.7921, "step": 5882 }, { "epoch": 1.7, "learning_rate": 1.1400802019137613e-06, "loss": 1.8454, "step": 5884 }, { "epoch": 1.7, "learning_rate": 1.135734270159089e-06, "loss": 1.7938, "step": 5886 }, { "epoch": 1.71, "learning_rate": 1.1313961387202044e-06, "loss": 1.8295, "step": 5888 }, { "epoch": 1.71, "learning_rate": 1.1270658114145495e-06, "loss": 1.8364, "step": 5890 }, { "epoch": 1.71, "learning_rate": 1.122743292052697e-06, "loss": 1.8628, "step": 5892 }, { "epoch": 1.71, "learning_rate": 1.1184285844383458e-06, "loss": 1.802, "step": 5894 }, { "epoch": 1.71, "learning_rate": 1.114121692368324e-06, "loss": 1.8421, "step": 5896 }, { "epoch": 1.71, "learning_rate": 1.1098226196325813e-06, "loss": 1.8328, "step": 5898 }, { "epoch": 1.71, "learning_rate": 1.1055313700141856e-06, "loss": 1.8659, "step": 5900 }, { "epoch": 1.71, "learning_rate": 1.1012479472893212e-06, "loss": 1.7957, "step": 5902 }, { "epoch": 1.71, "learning_rate": 1.0969723552272859e-06, "loss": 1.8406, "step": 5904 }, { "epoch": 1.71, "learning_rate": 1.0927045975904849e-06, "loss": 1.808, "step": 5906 }, { "epoch": 1.71, "learning_rate": 1.0884446781344304e-06, "loss": 1.8698, "step": 5908 }, { "epoch": 1.71, "learning_rate": 1.0841926006077385e-06, "loss": 1.7862, "step": 5910 }, { "epoch": 1.71, "learning_rate": 1.0799483687521218e-06, "loss": 1.891, "step": 5912 }, { "epoch": 1.71, "learning_rate": 1.0757119863023924e-06, "loss": 1.8765, "step": 5914 }, { "epoch": 1.71, "learning_rate": 1.0714834569864473e-06, "loss": 1.8899, "step": 5916 }, { "epoch": 1.71, "learning_rate": 1.0672627845252881e-06, "loss": 1.8288, "step": 5918 }, { "epoch": 1.71, "learning_rate": 1.0630499726329846e-06, "loss": 1.9467, "step": 5920 }, { "epoch": 1.72, "learning_rate": 1.0588450250167025e-06, "loss": 1.8145, "step": 5922 }, { "epoch": 1.72, "learning_rate": 1.054647945376681e-06, "loss": 1.7282, "step": 5924 }, { "epoch": 1.72, "learning_rate": 1.0504587374062392e-06, "loss": 1.8403, "step": 5926 }, { "epoch": 1.72, "learning_rate": 1.0462774047917656e-06, "loss": 1.851, "step": 5928 }, { "epoch": 1.72, "learning_rate": 1.042103951212723e-06, "loss": 1.7849, "step": 5930 }, { "epoch": 1.72, "learning_rate": 1.0379383803416365e-06, "loss": 1.7733, "step": 5932 }, { "epoch": 1.72, "learning_rate": 1.0337806958440966e-06, "loss": 1.7451, "step": 5934 }, { "epoch": 1.72, "learning_rate": 1.0296309013787553e-06, "loss": 1.8616, "step": 5936 }, { "epoch": 1.72, "learning_rate": 1.0254890005973205e-06, "loss": 1.8507, "step": 5938 }, { "epoch": 1.72, "learning_rate": 1.0213549971445536e-06, "loss": 1.889, "step": 5940 }, { "epoch": 1.72, "learning_rate": 1.017228894658262e-06, "loss": 1.8583, "step": 5942 }, { "epoch": 1.72, "learning_rate": 1.0131106967693117e-06, "loss": 1.8485, "step": 5944 }, { "epoch": 1.72, "learning_rate": 1.0090004071016047e-06, "loss": 1.8138, "step": 5946 }, { "epoch": 1.72, "learning_rate": 1.0048980292720823e-06, "loss": 1.8337, "step": 5948 }, { "epoch": 1.72, "learning_rate": 1.0008035668907278e-06, "loss": 1.8109, "step": 5950 }, { "epoch": 1.72, "learning_rate": 9.967170235605617e-07, "loss": 1.7835, "step": 5952 }, { "epoch": 1.72, "learning_rate": 9.926384028776293e-07, "loss": 1.7696, "step": 5954 }, { "epoch": 1.72, "learning_rate": 9.885677084310063e-07, "loss": 1.8643, "step": 5956 }, { "epoch": 1.73, "learning_rate": 9.845049438027976e-07, "loss": 1.8149, "step": 5958 }, { "epoch": 1.73, "learning_rate": 9.804501125681243e-07, "loss": 1.8179, "step": 5960 }, { "epoch": 1.73, "learning_rate": 9.7640321829513e-07, "loss": 1.7643, "step": 5962 }, { "epoch": 1.73, "learning_rate": 9.723642645449738e-07, "loss": 1.8391, "step": 5964 }, { "epoch": 1.73, "learning_rate": 9.68333254871825e-07, "loss": 1.8627, "step": 5966 }, { "epoch": 1.73, "learning_rate": 9.64310192822867e-07, "loss": 1.8508, "step": 5968 }, { "epoch": 1.73, "learning_rate": 9.602950819382806e-07, "loss": 1.9322, "step": 5970 }, { "epoch": 1.73, "learning_rate": 9.562879257512602e-07, "loss": 1.8147, "step": 5972 }, { "epoch": 1.73, "learning_rate": 9.522887277879955e-07, "loss": 1.8193, "step": 5974 }, { "epoch": 1.73, "learning_rate": 9.482974915676701e-07, "loss": 1.7667, "step": 5976 }, { "epoch": 1.73, "learning_rate": 9.443142206024669e-07, "loss": 1.9172, "step": 5978 }, { "epoch": 1.73, "learning_rate": 9.403389183975609e-07, "loss": 1.8337, "step": 5980 }, { "epoch": 1.73, "learning_rate": 9.363715884511049e-07, "loss": 1.8693, "step": 5982 }, { "epoch": 1.73, "learning_rate": 9.32412234254243e-07, "loss": 1.8566, "step": 5984 }, { "epoch": 1.73, "learning_rate": 9.284608592911082e-07, "loss": 1.7987, "step": 5986 }, { "epoch": 1.73, "learning_rate": 9.245174670387957e-07, "loss": 1.8364, "step": 5988 }, { "epoch": 1.73, "learning_rate": 9.205820609673888e-07, "loss": 1.8101, "step": 5990 }, { "epoch": 1.74, "learning_rate": 9.166546445399393e-07, "loss": 1.8665, "step": 5992 }, { "epoch": 1.74, "learning_rate": 9.127352212124663e-07, "loss": 1.8683, "step": 5994 }, { "epoch": 1.74, "learning_rate": 9.088237944339595e-07, "loss": 1.7889, "step": 5996 }, { "epoch": 1.74, "learning_rate": 9.049203676463681e-07, "loss": 1.8646, "step": 5998 }, { "epoch": 1.74, "learning_rate": 9.01024944284602e-07, "loss": 1.8732, "step": 6000 }, { "epoch": 1.74, "learning_rate": 8.971375277765326e-07, "loss": 1.7928, "step": 6002 }, { "epoch": 1.74, "learning_rate": 8.932581215429748e-07, "loss": 1.8552, "step": 6004 }, { "epoch": 1.74, "learning_rate": 8.893867289977099e-07, "loss": 1.7768, "step": 6006 }, { "epoch": 1.74, "learning_rate": 8.855233535474572e-07, "loss": 1.7545, "step": 6008 }, { "epoch": 1.74, "learning_rate": 8.816679985918786e-07, "loss": 1.8201, "step": 6010 }, { "epoch": 1.74, "learning_rate": 8.778206675235901e-07, "loss": 1.7988, "step": 6012 }, { "epoch": 1.74, "learning_rate": 8.739813637281336e-07, "loss": 1.7736, "step": 6014 }, { "epoch": 1.74, "learning_rate": 8.701500905839966e-07, "loss": 1.7752, "step": 6016 }, { "epoch": 1.74, "learning_rate": 8.663268514625966e-07, "loss": 1.7626, "step": 6018 }, { "epoch": 1.74, "learning_rate": 8.625116497282804e-07, "loss": 1.7975, "step": 6020 }, { "epoch": 1.74, "learning_rate": 8.587044887383222e-07, "loss": 1.8777, "step": 6022 }, { "epoch": 1.74, "learning_rate": 8.549053718429234e-07, "loss": 1.7877, "step": 6024 }, { "epoch": 1.75, "learning_rate": 8.511143023852042e-07, "loss": 1.7681, "step": 6026 }, { "epoch": 1.75, "learning_rate": 8.473312837012027e-07, "loss": 1.8131, "step": 6028 }, { "epoch": 1.75, "learning_rate": 8.435563191198759e-07, "loss": 1.7893, "step": 6030 }, { "epoch": 1.75, "learning_rate": 8.39789411963089e-07, "loss": 1.8997, "step": 6032 }, { "epoch": 1.75, "learning_rate": 8.360305655456225e-07, "loss": 1.7596, "step": 6034 }, { "epoch": 1.75, "learning_rate": 8.322797831751561e-07, "loss": 1.8154, "step": 6036 }, { "epoch": 1.75, "learning_rate": 8.285370681522776e-07, "loss": 1.8544, "step": 6038 }, { "epoch": 1.75, "learning_rate": 8.248024237704822e-07, "loss": 1.8349, "step": 6040 }, { "epoch": 1.75, "learning_rate": 8.210758533161489e-07, "loss": 1.8777, "step": 6042 }, { "epoch": 1.75, "learning_rate": 8.173573600685614e-07, "loss": 1.7708, "step": 6044 }, { "epoch": 1.75, "learning_rate": 8.136469472998987e-07, "loss": 1.8132, "step": 6046 }, { "epoch": 1.75, "learning_rate": 8.099446182752202e-07, "loss": 1.8318, "step": 6048 }, { "epoch": 1.75, "learning_rate": 8.062503762524754e-07, "loss": 1.7707, "step": 6050 }, { "epoch": 1.75, "learning_rate": 8.025642244825004e-07, "loss": 1.7801, "step": 6052 }, { "epoch": 1.75, "learning_rate": 7.988861662090087e-07, "loss": 1.8214, "step": 6054 }, { "epoch": 1.75, "learning_rate": 7.952162046685929e-07, "loss": 1.7987, "step": 6056 }, { "epoch": 1.75, "learning_rate": 7.915543430907202e-07, "loss": 1.7793, "step": 6058 }, { "epoch": 1.75, "learning_rate": 7.879005846977306e-07, "loss": 1.8592, "step": 6060 }, { "epoch": 1.76, "learning_rate": 7.842549327048366e-07, "loss": 1.8224, "step": 6062 }, { "epoch": 1.76, "learning_rate": 7.806173903201064e-07, "loss": 1.7953, "step": 6064 }, { "epoch": 1.76, "learning_rate": 7.769879607444864e-07, "loss": 1.8166, "step": 6066 }, { "epoch": 1.76, "learning_rate": 7.733666471717771e-07, "loss": 1.7834, "step": 6068 }, { "epoch": 1.76, "learning_rate": 7.69753452788633e-07, "loss": 1.9058, "step": 6070 }, { "epoch": 1.76, "learning_rate": 7.661483807745684e-07, "loss": 1.8149, "step": 6072 }, { "epoch": 1.76, "learning_rate": 7.625514343019557e-07, "loss": 1.8123, "step": 6074 }, { "epoch": 1.76, "learning_rate": 7.589626165360054e-07, "loss": 1.9057, "step": 6076 }, { "epoch": 1.76, "learning_rate": 7.55381930634781e-07, "loss": 1.8547, "step": 6078 }, { "epoch": 1.76, "learning_rate": 7.518093797491944e-07, "loss": 1.8533, "step": 6080 }, { "epoch": 1.76, "learning_rate": 7.482449670229897e-07, "loss": 1.9252, "step": 6082 }, { "epoch": 1.76, "learning_rate": 7.446886955927568e-07, "loss": 1.7652, "step": 6084 }, { "epoch": 1.76, "learning_rate": 7.41140568587918e-07, "loss": 1.8668, "step": 6086 }, { "epoch": 1.76, "learning_rate": 7.376005891307303e-07, "loss": 1.7152, "step": 6088 }, { "epoch": 1.76, "learning_rate": 7.340687603362828e-07, "loss": 1.8373, "step": 6090 }, { "epoch": 1.76, "learning_rate": 7.30545085312484e-07, "loss": 1.8647, "step": 6092 }, { "epoch": 1.76, "learning_rate": 7.270295671600792e-07, "loss": 1.7849, "step": 6094 }, { "epoch": 1.77, "learning_rate": 7.23522208972628e-07, "loss": 1.8133, "step": 6096 }, { "epoch": 1.77, "learning_rate": 7.200230138365094e-07, "loss": 1.8732, "step": 6098 }, { "epoch": 1.77, "learning_rate": 7.165319848309238e-07, "loss": 1.7945, "step": 6100 }, { "epoch": 1.77, "learning_rate": 7.130491250278837e-07, "loss": 1.8295, "step": 6102 }, { "epoch": 1.77, "learning_rate": 7.095744374922087e-07, "loss": 1.7708, "step": 6104 }, { "epoch": 1.77, "learning_rate": 7.061079252815328e-07, "loss": 1.7973, "step": 6106 }, { "epoch": 1.77, "learning_rate": 7.026495914462939e-07, "loss": 1.7952, "step": 6108 }, { "epoch": 1.77, "learning_rate": 6.991994390297307e-07, "loss": 1.8234, "step": 6110 }, { "epoch": 1.77, "learning_rate": 6.957574710678871e-07, "loss": 1.7593, "step": 6112 }, { "epoch": 1.77, "learning_rate": 6.923236905896025e-07, "loss": 1.7826, "step": 6114 }, { "epoch": 1.77, "learning_rate": 6.888981006165096e-07, "loss": 1.7721, "step": 6116 }, { "epoch": 1.77, "learning_rate": 6.854807041630363e-07, "loss": 1.8287, "step": 6118 }, { "epoch": 1.77, "learning_rate": 6.820715042364001e-07, "loss": 1.8697, "step": 6120 }, { "epoch": 1.77, "learning_rate": 6.78670503836606e-07, "loss": 1.8301, "step": 6122 }, { "epoch": 1.77, "learning_rate": 6.752777059564431e-07, "loss": 1.8667, "step": 6124 }, { "epoch": 1.77, "learning_rate": 6.718931135814788e-07, "loss": 1.7833, "step": 6126 }, { "epoch": 1.77, "learning_rate": 6.685167296900686e-07, "loss": 1.7909, "step": 6128 }, { "epoch": 1.78, "learning_rate": 6.651485572533379e-07, "loss": 1.865, "step": 6130 }, { "epoch": 1.78, "learning_rate": 6.617885992351847e-07, "loss": 1.8656, "step": 6132 }, { "epoch": 1.78, "learning_rate": 6.584368585922874e-07, "loss": 1.8938, "step": 6134 }, { "epoch": 1.78, "learning_rate": 6.550933382740832e-07, "loss": 1.8311, "step": 6136 }, { "epoch": 1.78, "learning_rate": 6.517580412227831e-07, "loss": 1.7859, "step": 6138 }, { "epoch": 1.78, "learning_rate": 6.48430970373356e-07, "loss": 1.8806, "step": 6140 }, { "epoch": 1.78, "learning_rate": 6.451121286535378e-07, "loss": 1.8035, "step": 6142 }, { "epoch": 1.78, "learning_rate": 6.41801518983819e-07, "loss": 1.8361, "step": 6144 }, { "epoch": 1.78, "learning_rate": 6.384991442774469e-07, "loss": 1.7832, "step": 6146 }, { "epoch": 1.78, "learning_rate": 6.352050074404225e-07, "loss": 1.8369, "step": 6148 }, { "epoch": 1.78, "learning_rate": 6.319191113714984e-07, "loss": 1.8891, "step": 6150 }, { "epoch": 1.78, "learning_rate": 6.286414589621747e-07, "loss": 1.859, "step": 6152 }, { "epoch": 1.78, "learning_rate": 6.253720530966967e-07, "loss": 1.841, "step": 6154 }, { "epoch": 1.78, "learning_rate": 6.221108966520561e-07, "loss": 1.8298, "step": 6156 }, { "epoch": 1.78, "learning_rate": 6.188579924979799e-07, "loss": 1.8113, "step": 6158 }, { "epoch": 1.78, "learning_rate": 6.156133434969369e-07, "loss": 1.8446, "step": 6160 }, { "epoch": 1.78, "learning_rate": 6.123769525041345e-07, "loss": 1.9107, "step": 6162 }, { "epoch": 1.79, "learning_rate": 6.091488223675058e-07, "loss": 1.8217, "step": 6164 }, { "epoch": 1.79, "learning_rate": 6.059289559277181e-07, "loss": 1.8286, "step": 6166 }, { "epoch": 1.79, "learning_rate": 6.027173560181731e-07, "loss": 1.8096, "step": 6168 }, { "epoch": 1.79, "learning_rate": 5.995140254649878e-07, "loss": 1.8085, "step": 6170 }, { "epoch": 1.79, "learning_rate": 5.963189670870073e-07, "loss": 1.7921, "step": 6172 }, { "epoch": 1.79, "learning_rate": 5.931321836957981e-07, "loss": 1.7358, "step": 6174 }, { "epoch": 1.79, "learning_rate": 5.899536780956439e-07, "loss": 1.8095, "step": 6176 }, { "epoch": 1.79, "learning_rate": 5.867834530835437e-07, "loss": 1.7957, "step": 6178 }, { "epoch": 1.79, "learning_rate": 5.836215114492117e-07, "loss": 1.7599, "step": 6180 }, { "epoch": 1.79, "learning_rate": 5.804678559750709e-07, "loss": 1.8478, "step": 6182 }, { "epoch": 1.79, "learning_rate": 5.77322489436255e-07, "loss": 1.8281, "step": 6184 }, { "epoch": 1.79, "learning_rate": 5.741854146005965e-07, "loss": 1.8363, "step": 6186 }, { "epoch": 1.79, "learning_rate": 5.710566342286427e-07, "loss": 1.8452, "step": 6188 }, { "epoch": 1.79, "learning_rate": 5.67936151073637e-07, "loss": 1.8256, "step": 6190 }, { "epoch": 1.79, "learning_rate": 5.64823967881516e-07, "loss": 1.8277, "step": 6192 }, { "epoch": 1.79, "learning_rate": 5.617200873909179e-07, "loss": 1.7777, "step": 6194 }, { "epoch": 1.79, "learning_rate": 5.58624512333179e-07, "loss": 1.8587, "step": 6196 }, { "epoch": 1.79, "learning_rate": 5.555372454323182e-07, "loss": 1.924, "step": 6198 }, { "epoch": 1.8, "learning_rate": 5.524582894050467e-07, "loss": 1.8908, "step": 6200 }, { "epoch": 1.8, "learning_rate": 5.493876469607673e-07, "loss": 1.8409, "step": 6202 }, { "epoch": 1.8, "learning_rate": 5.463253208015596e-07, "loss": 1.8502, "step": 6204 }, { "epoch": 1.8, "learning_rate": 5.43271313622189e-07, "loss": 1.7905, "step": 6206 }, { "epoch": 1.8, "learning_rate": 5.402256281101003e-07, "loss": 1.8561, "step": 6208 }, { "epoch": 1.8, "learning_rate": 5.371882669454143e-07, "loss": 1.8667, "step": 6210 }, { "epoch": 1.8, "learning_rate": 5.341592328009282e-07, "loss": 1.7545, "step": 6212 }, { "epoch": 1.8, "learning_rate": 5.311385283421089e-07, "loss": 1.8544, "step": 6214 }, { "epoch": 1.8, "learning_rate": 5.281261562270979e-07, "loss": 1.7612, "step": 6216 }, { "epoch": 1.8, "learning_rate": 5.251221191067013e-07, "loss": 1.7687, "step": 6218 }, { "epoch": 1.8, "learning_rate": 5.221264196243869e-07, "loss": 1.8736, "step": 6220 }, { "epoch": 1.8, "learning_rate": 5.191390604162938e-07, "loss": 1.7513, "step": 6222 }, { "epoch": 1.8, "learning_rate": 5.161600441112191e-07, "loss": 1.7629, "step": 6224 }, { "epoch": 1.8, "learning_rate": 5.131893733306137e-07, "loss": 1.8306, "step": 6226 }, { "epoch": 1.8, "learning_rate": 5.102270506885898e-07, "loss": 1.8442, "step": 6228 }, { "epoch": 1.8, "learning_rate": 5.072730787919111e-07, "loss": 1.9108, "step": 6230 }, { "epoch": 1.8, "learning_rate": 5.043274602399939e-07, "loss": 1.8389, "step": 6232 }, { "epoch": 1.81, "learning_rate": 5.013901976249036e-07, "loss": 1.8408, "step": 6234 }, { "epoch": 1.81, "learning_rate": 4.984612935313516e-07, "loss": 1.8271, "step": 6236 }, { "epoch": 1.81, "learning_rate": 4.95540750536696e-07, "loss": 1.7429, "step": 6238 }, { "epoch": 1.81, "learning_rate": 4.926285712109358e-07, "loss": 1.8627, "step": 6240 }, { "epoch": 1.81, "learning_rate": 4.897247581167108e-07, "loss": 1.8469, "step": 6242 }, { "epoch": 1.81, "learning_rate": 4.868293138092972e-07, "loss": 1.8321, "step": 6244 }, { "epoch": 1.81, "learning_rate": 4.839422408366102e-07, "loss": 1.7976, "step": 6246 }, { "epoch": 1.81, "learning_rate": 4.810635417391951e-07, "loss": 1.827, "step": 6248 }, { "epoch": 1.81, "learning_rate": 4.781932190502325e-07, "loss": 1.8251, "step": 6250 }, { "epoch": 1.81, "learning_rate": 4.7533127529552527e-07, "loss": 1.8052, "step": 6252 }, { "epoch": 1.81, "learning_rate": 4.724777129935065e-07, "loss": 1.8734, "step": 6254 }, { "epoch": 1.81, "learning_rate": 4.6963253465523995e-07, "loss": 1.8509, "step": 6256 }, { "epoch": 1.81, "learning_rate": 4.667957427844028e-07, "loss": 1.9109, "step": 6258 }, { "epoch": 1.81, "learning_rate": 4.639673398772937e-07, "loss": 1.8621, "step": 6260 }, { "epoch": 1.81, "learning_rate": 4.6114732842283516e-07, "loss": 1.8675, "step": 6262 }, { "epoch": 1.81, "learning_rate": 4.583357109025588e-07, "loss": 1.8207, "step": 6264 }, { "epoch": 1.81, "learning_rate": 4.555324897906133e-07, "loss": 1.8058, "step": 6266 }, { "epoch": 1.82, "learning_rate": 4.527376675537587e-07, "loss": 1.7903, "step": 6268 }, { "epoch": 1.82, "learning_rate": 4.4995124665136336e-07, "loss": 1.7929, "step": 6270 }, { "epoch": 1.82, "learning_rate": 4.4717322953540144e-07, "loss": 1.7806, "step": 6272 }, { "epoch": 1.82, "learning_rate": 4.444036186504552e-07, "loss": 1.8055, "step": 6274 }, { "epoch": 1.82, "learning_rate": 4.416424164337063e-07, "loss": 1.8513, "step": 6276 }, { "epoch": 1.82, "learning_rate": 4.3888962531494104e-07, "loss": 1.8925, "step": 6278 }, { "epoch": 1.82, "learning_rate": 4.361452477165373e-07, "loss": 1.8175, "step": 6280 }, { "epoch": 1.82, "learning_rate": 4.3340928605347776e-07, "loss": 1.848, "step": 6282 }, { "epoch": 1.82, "learning_rate": 4.306817427333343e-07, "loss": 1.8465, "step": 6284 }, { "epoch": 1.82, "learning_rate": 4.2796262015627033e-07, "loss": 1.8637, "step": 6286 }, { "epoch": 1.82, "learning_rate": 4.2525192071504076e-07, "loss": 1.8615, "step": 6288 }, { "epoch": 1.82, "learning_rate": 4.225496467949908e-07, "loss": 1.736, "step": 6290 }, { "epoch": 1.82, "learning_rate": 4.198558007740461e-07, "loss": 1.8179, "step": 6292 }, { "epoch": 1.82, "learning_rate": 4.1717038502272043e-07, "loss": 1.8314, "step": 6294 }, { "epoch": 1.82, "learning_rate": 4.1449340190410693e-07, "loss": 1.8783, "step": 6296 }, { "epoch": 1.82, "learning_rate": 4.118248537738789e-07, "loss": 1.8085, "step": 6298 }, { "epoch": 1.82, "learning_rate": 4.0916474298028694e-07, "loss": 1.741, "step": 6300 }, { "epoch": 1.83, "learning_rate": 4.0651307186415744e-07, "loss": 1.8242, "step": 6302 }, { "epoch": 1.83, "learning_rate": 4.038698427588894e-07, "loss": 1.8195, "step": 6304 }, { "epoch": 1.83, "learning_rate": 4.012350579904556e-07, "loss": 1.8126, "step": 6306 }, { "epoch": 1.83, "learning_rate": 3.9860871987739136e-07, "loss": 1.915, "step": 6308 }, { "epoch": 1.83, "learning_rate": 3.9599083073080693e-07, "loss": 1.8039, "step": 6310 }, { "epoch": 1.83, "learning_rate": 3.9338139285437504e-07, "loss": 1.8686, "step": 6312 }, { "epoch": 1.83, "learning_rate": 3.9078040854432784e-07, "loss": 1.8078, "step": 6314 }, { "epoch": 1.83, "learning_rate": 3.881878800894645e-07, "loss": 1.8016, "step": 6316 }, { "epoch": 1.83, "learning_rate": 3.856038097711401e-07, "loss": 1.7836, "step": 6318 }, { "epoch": 1.83, "learning_rate": 3.830281998632657e-07, "loss": 1.8198, "step": 6320 }, { "epoch": 1.83, "learning_rate": 3.804610526323105e-07, "loss": 1.8466, "step": 6322 }, { "epoch": 1.83, "learning_rate": 3.7790237033729306e-07, "loss": 1.7226, "step": 6324 }, { "epoch": 1.83, "learning_rate": 3.753521552297867e-07, "loss": 1.8734, "step": 6326 }, { "epoch": 1.83, "learning_rate": 3.72810409553912e-07, "loss": 1.9176, "step": 6328 }, { "epoch": 1.83, "learning_rate": 3.702771355463364e-07, "loss": 1.7755, "step": 6330 }, { "epoch": 1.83, "learning_rate": 3.677523354362733e-07, "loss": 1.7815, "step": 6332 }, { "epoch": 1.83, "learning_rate": 3.6523601144548003e-07, "loss": 1.7896, "step": 6334 }, { "epoch": 1.83, "learning_rate": 3.6272816578825196e-07, "loss": 1.8287, "step": 6336 }, { "epoch": 1.84, "learning_rate": 3.6022880067142717e-07, "loss": 1.8439, "step": 6338 }, { "epoch": 1.84, "learning_rate": 3.5773791829438184e-07, "loss": 1.8294, "step": 6340 }, { "epoch": 1.84, "learning_rate": 3.552555208490205e-07, "loss": 1.7731, "step": 6342 }, { "epoch": 1.84, "learning_rate": 3.527816105197901e-07, "loss": 1.8362, "step": 6344 }, { "epoch": 1.84, "learning_rate": 3.5031618948366595e-07, "loss": 1.829, "step": 6346 }, { "epoch": 1.84, "learning_rate": 3.4785925991014824e-07, "loss": 1.8538, "step": 6348 }, { "epoch": 1.84, "learning_rate": 3.454108239612741e-07, "loss": 1.8663, "step": 6350 }, { "epoch": 1.84, "learning_rate": 3.4297088379159684e-07, "loss": 1.8467, "step": 6352 }, { "epoch": 1.84, "learning_rate": 3.405394415482011e-07, "loss": 1.8871, "step": 6354 }, { "epoch": 1.84, "learning_rate": 3.381164993706898e-07, "loss": 1.8141, "step": 6356 }, { "epoch": 1.84, "learning_rate": 3.357020593911875e-07, "loss": 1.8029, "step": 6358 }, { "epoch": 1.84, "learning_rate": 3.332961237343357e-07, "loss": 1.843, "step": 6360 }, { "epoch": 1.84, "learning_rate": 3.308986945172943e-07, "loss": 1.8996, "step": 6362 }, { "epoch": 1.84, "learning_rate": 3.285097738497356e-07, "loss": 1.7635, "step": 6364 }, { "epoch": 1.84, "learning_rate": 3.2612936383384585e-07, "loss": 1.8144, "step": 6366 }, { "epoch": 1.84, "learning_rate": 3.2375746656432284e-07, "loss": 1.8346, "step": 6368 }, { "epoch": 1.84, "learning_rate": 3.213940841283714e-07, "loss": 1.7476, "step": 6370 }, { "epoch": 1.85, "learning_rate": 3.1903921860570564e-07, "loss": 1.8214, "step": 6372 }, { "epoch": 1.85, "learning_rate": 3.166928720685425e-07, "loss": 1.8104, "step": 6374 }, { "epoch": 1.85, "learning_rate": 3.143550465816036e-07, "loss": 1.7765, "step": 6376 }, { "epoch": 1.85, "learning_rate": 3.120257442021168e-07, "loss": 1.8005, "step": 6378 }, { "epoch": 1.85, "learning_rate": 3.097049669798002e-07, "loss": 1.7421, "step": 6380 }, { "epoch": 1.85, "learning_rate": 3.073927169568769e-07, "loss": 1.8344, "step": 6382 }, { "epoch": 1.85, "learning_rate": 3.0508899616806806e-07, "loss": 1.8013, "step": 6384 }, { "epoch": 1.85, "learning_rate": 3.0279380664058335e-07, "loss": 1.7919, "step": 6386 }, { "epoch": 1.85, "learning_rate": 3.0050715039412704e-07, "loss": 1.86, "step": 6388 }, { "epoch": 1.85, "learning_rate": 2.982290294408974e-07, "loss": 1.868, "step": 6390 }, { "epoch": 1.85, "learning_rate": 2.9595944578557855e-07, "loss": 1.8721, "step": 6392 }, { "epoch": 1.85, "learning_rate": 2.936984014253441e-07, "loss": 1.8373, "step": 6394 }, { "epoch": 1.85, "learning_rate": 2.9144589834985026e-07, "loss": 1.8094, "step": 6396 }, { "epoch": 1.85, "learning_rate": 2.8920193854124146e-07, "loss": 1.7636, "step": 6398 }, { "epoch": 1.85, "learning_rate": 2.869665239741415e-07, "loss": 1.7922, "step": 6400 }, { "epoch": 1.85, "learning_rate": 2.8473965661565353e-07, "loss": 1.8191, "step": 6402 }, { "epoch": 1.85, "learning_rate": 2.825213384253633e-07, "loss": 1.778, "step": 6404 }, { "epoch": 1.86, "learning_rate": 2.8031157135533173e-07, "loss": 1.833, "step": 6406 }, { "epoch": 1.86, "learning_rate": 2.781103573500921e-07, "loss": 1.8031, "step": 6408 }, { "epoch": 1.86, "learning_rate": 2.759176983466527e-07, "loss": 1.9016, "step": 6410 }, { "epoch": 1.86, "learning_rate": 2.7373359627449916e-07, "loss": 1.8235, "step": 6412 }, { "epoch": 1.86, "learning_rate": 2.7155805305557945e-07, "loss": 1.8037, "step": 6414 }, { "epoch": 1.86, "learning_rate": 2.6939107060431234e-07, "loss": 1.8942, "step": 6416 }, { "epoch": 1.86, "learning_rate": 2.672326508275869e-07, "loss": 1.7577, "step": 6418 }, { "epoch": 1.86, "learning_rate": 2.650827956247537e-07, "loss": 1.9022, "step": 6420 }, { "epoch": 1.86, "learning_rate": 2.629415068876262e-07, "loss": 1.7616, "step": 6422 }, { "epoch": 1.86, "learning_rate": 2.608087865004816e-07, "loss": 1.9203, "step": 6424 }, { "epoch": 1.86, "learning_rate": 2.586846363400575e-07, "loss": 1.8079, "step": 6426 }, { "epoch": 1.86, "learning_rate": 2.5656905827554866e-07, "loss": 1.801, "step": 6428 }, { "epoch": 1.86, "learning_rate": 2.5446205416860604e-07, "loss": 1.8108, "step": 6430 }, { "epoch": 1.86, "learning_rate": 2.523636258733375e-07, "loss": 1.8255, "step": 6432 }, { "epoch": 1.86, "learning_rate": 2.502737752363038e-07, "loss": 1.6676, "step": 6434 }, { "epoch": 1.86, "learning_rate": 2.4819250409651605e-07, "loss": 1.7742, "step": 6436 }, { "epoch": 1.86, "learning_rate": 2.461198142854382e-07, "loss": 1.9389, "step": 6438 }, { "epoch": 1.87, "learning_rate": 2.440557076269823e-07, "loss": 1.7722, "step": 6440 }, { "epoch": 1.87, "learning_rate": 2.420001859375054e-07, "loss": 1.769, "step": 6442 }, { "epoch": 1.87, "learning_rate": 2.399532510258107e-07, "loss": 1.7552, "step": 6444 }, { "epoch": 1.87, "learning_rate": 2.3791490469314728e-07, "loss": 1.7424, "step": 6446 }, { "epoch": 1.87, "learning_rate": 2.3588514873320589e-07, "loss": 1.6863, "step": 6448 }, { "epoch": 1.87, "learning_rate": 2.3386398493211558e-07, "loss": 1.9086, "step": 6450 }, { "epoch": 1.87, "learning_rate": 2.3185141506844698e-07, "loss": 1.754, "step": 6452 }, { "epoch": 1.87, "learning_rate": 2.298474409132079e-07, "loss": 1.8507, "step": 6454 }, { "epoch": 1.87, "learning_rate": 2.2785206422984098e-07, "loss": 1.8896, "step": 6456 }, { "epoch": 1.87, "learning_rate": 2.2586528677422392e-07, "loss": 1.8315, "step": 6458 }, { "epoch": 1.87, "learning_rate": 2.2388711029466826e-07, "loss": 1.8136, "step": 6460 }, { "epoch": 1.87, "learning_rate": 2.2191753653191816e-07, "loss": 1.7839, "step": 6462 }, { "epoch": 1.87, "learning_rate": 2.199565672191406e-07, "loss": 1.8141, "step": 6464 }, { "epoch": 1.87, "learning_rate": 2.1800420408193966e-07, "loss": 1.869, "step": 6466 }, { "epoch": 1.87, "learning_rate": 2.160604488383422e-07, "loss": 1.7892, "step": 6468 }, { "epoch": 1.87, "learning_rate": 2.1509179971145766e-07, "loss": 1.8175, "step": 6470 }, { "epoch": 1.87, "learning_rate": 2.1316095951299465e-07, "loss": 1.7859, "step": 6472 }, { "epoch": 1.87, "learning_rate": 2.1123873147005925e-07, "loss": 1.8258, "step": 6474 }, { "epoch": 1.88, "learning_rate": 2.0932511727416173e-07, "loss": 1.8416, "step": 6476 }, { "epoch": 1.88, "learning_rate": 2.074201186092306e-07, "loss": 1.8237, "step": 6478 }, { "epoch": 1.88, "learning_rate": 2.055237371516128e-07, "loss": 1.8434, "step": 6480 }, { "epoch": 1.88, "learning_rate": 2.0363597457007445e-07, "loss": 1.8237, "step": 6482 }, { "epoch": 1.88, "learning_rate": 2.017568325257946e-07, "loss": 1.7998, "step": 6484 }, { "epoch": 1.88, "learning_rate": 1.9988631267236826e-07, "loss": 1.7782, "step": 6486 }, { "epoch": 1.88, "learning_rate": 1.9802441665580208e-07, "loss": 1.7983, "step": 6488 }, { "epoch": 1.88, "learning_rate": 1.9617114611451548e-07, "loss": 1.8632, "step": 6490 }, { "epoch": 1.88, "learning_rate": 1.9432650267933838e-07, "loss": 1.8024, "step": 6492 }, { "epoch": 1.88, "learning_rate": 1.9249048797350234e-07, "loss": 1.8115, "step": 6494 }, { "epoch": 1.88, "learning_rate": 1.9066310361265604e-07, "loss": 1.8764, "step": 6496 }, { "epoch": 1.88, "learning_rate": 1.8884435120484658e-07, "loss": 1.8268, "step": 6498 }, { "epoch": 1.88, "learning_rate": 1.870342323505281e-07, "loss": 1.8459, "step": 6500 }, { "epoch": 1.88, "learning_rate": 1.8523274864255426e-07, "loss": 1.7776, "step": 6502 }, { "epoch": 1.88, "learning_rate": 1.8343990166618476e-07, "loss": 1.7939, "step": 6504 }, { "epoch": 1.88, "learning_rate": 1.816556929990765e-07, "loss": 1.8892, "step": 6506 }, { "epoch": 1.88, "learning_rate": 1.7988012421128463e-07, "loss": 1.876, "step": 6508 }, { "epoch": 1.89, "learning_rate": 1.7811319686526162e-07, "loss": 1.8444, "step": 6510 }, { "epoch": 1.89, "learning_rate": 1.76354912515857e-07, "loss": 1.8453, "step": 6512 }, { "epoch": 1.89, "learning_rate": 1.7460527271031312e-07, "loss": 1.8595, "step": 6514 }, { "epoch": 1.89, "learning_rate": 1.728642789882662e-07, "loss": 1.7766, "step": 6516 }, { "epoch": 1.89, "learning_rate": 1.71131932881744e-07, "loss": 1.8138, "step": 6518 }, { "epoch": 1.89, "learning_rate": 1.6940823591516498e-07, "loss": 1.8142, "step": 6520 }, { "epoch": 1.89, "learning_rate": 1.6769318960533465e-07, "loss": 1.9061, "step": 6522 }, { "epoch": 1.89, "learning_rate": 1.6598679546144914e-07, "loss": 1.738, "step": 6524 }, { "epoch": 1.89, "learning_rate": 1.6428905498509062e-07, "loss": 1.7889, "step": 6526 }, { "epoch": 1.89, "learning_rate": 1.6259996967022184e-07, "loss": 1.8246, "step": 6528 }, { "epoch": 1.89, "learning_rate": 1.6091954100319495e-07, "loss": 1.8332, "step": 6530 }, { "epoch": 1.89, "learning_rate": 1.5924777046274263e-07, "loss": 1.8265, "step": 6532 }, { "epoch": 1.89, "learning_rate": 1.5758465951997592e-07, "loss": 1.7096, "step": 6534 }, { "epoch": 1.89, "learning_rate": 1.5593020963838857e-07, "loss": 1.8322, "step": 6536 }, { "epoch": 1.89, "learning_rate": 1.5428442227385266e-07, "loss": 1.8304, "step": 6538 }, { "epoch": 1.89, "learning_rate": 1.526472988746164e-07, "loss": 1.955, "step": 6540 }, { "epoch": 1.89, "learning_rate": 1.5101884088130402e-07, "loss": 1.9095, "step": 6542 }, { "epoch": 1.9, "learning_rate": 1.4939904972691488e-07, "loss": 1.8259, "step": 6544 }, { "epoch": 1.9, "learning_rate": 1.477879268368221e-07, "loss": 1.8619, "step": 6546 }, { "epoch": 1.9, "learning_rate": 1.4618547362877157e-07, "loss": 1.7997, "step": 6548 }, { "epoch": 1.9, "learning_rate": 1.4459169151287643e-07, "loss": 1.7927, "step": 6550 }, { "epoch": 1.9, "learning_rate": 1.4300658189162486e-07, "loss": 1.8344, "step": 6552 }, { "epoch": 1.9, "learning_rate": 1.4143014615986994e-07, "loss": 1.7652, "step": 6554 }, { "epoch": 1.9, "learning_rate": 1.39862385704832e-07, "loss": 1.8386, "step": 6556 }, { "epoch": 1.9, "learning_rate": 1.3830330190609864e-07, "loss": 1.8415, "step": 6558 }, { "epoch": 1.9, "learning_rate": 1.3675289613562236e-07, "loss": 1.8147, "step": 6560 }, { "epoch": 1.9, "learning_rate": 1.352111697577163e-07, "loss": 1.8047, "step": 6562 }, { "epoch": 1.9, "learning_rate": 1.3367812412906078e-07, "loss": 1.8013, "step": 6564 }, { "epoch": 1.9, "learning_rate": 1.3215376059869444e-07, "loss": 1.8902, "step": 6566 }, { "epoch": 1.9, "learning_rate": 1.306380805080143e-07, "loss": 1.8311, "step": 6568 }, { "epoch": 1.9, "learning_rate": 1.291310851907812e-07, "loss": 1.8208, "step": 6570 }, { "epoch": 1.9, "learning_rate": 1.2763277597310776e-07, "loss": 1.8184, "step": 6572 }, { "epoch": 1.9, "learning_rate": 1.26143154173467e-07, "loss": 1.8422, "step": 6574 }, { "epoch": 1.9, "learning_rate": 1.2466222110268712e-07, "loss": 1.8527, "step": 6576 }, { "epoch": 1.91, "learning_rate": 1.2318997806394784e-07, "loss": 1.8131, "step": 6578 }, { "epoch": 1.91, "learning_rate": 1.2172642635278398e-07, "loss": 1.7978, "step": 6580 }, { "epoch": 1.91, "learning_rate": 1.20271567257082e-07, "loss": 1.8525, "step": 6582 }, { "epoch": 1.91, "learning_rate": 1.1882540205707783e-07, "loss": 1.7503, "step": 6584 }, { "epoch": 1.91, "learning_rate": 1.1738793202536014e-07, "loss": 1.8239, "step": 6586 }, { "epoch": 1.91, "learning_rate": 1.1595915842686267e-07, "loss": 1.784, "step": 6588 }, { "epoch": 1.91, "learning_rate": 1.1453908251886636e-07, "loss": 1.8297, "step": 6590 }, { "epoch": 1.91, "learning_rate": 1.1312770555100272e-07, "loss": 1.852, "step": 6592 }, { "epoch": 1.91, "learning_rate": 1.1172502876524383e-07, "loss": 1.8351, "step": 6594 }, { "epoch": 1.91, "learning_rate": 1.1033105339590678e-07, "loss": 1.7458, "step": 6596 }, { "epoch": 1.91, "learning_rate": 1.0894578066965588e-07, "loss": 1.8377, "step": 6598 }, { "epoch": 1.91, "learning_rate": 1.0825640817892392e-07, "loss": 1.797, "step": 6600 }, { "epoch": 1.91, "learning_rate": 1.0688419170053477e-07, "loss": 1.8481, "step": 6602 }, { "epoch": 1.91, "learning_rate": 1.0552068089837442e-07, "loss": 1.8026, "step": 6604 }, { "epoch": 1.91, "learning_rate": 1.0416587697229752e-07, "loss": 1.8518, "step": 6606 }, { "epoch": 1.91, "learning_rate": 1.0281978111449375e-07, "loss": 1.7746, "step": 6608 }, { "epoch": 1.91, "learning_rate": 1.0148239450949116e-07, "loss": 1.7854, "step": 6610 }, { "epoch": 1.91, "learning_rate": 1.001537183341561e-07, "loss": 1.7759, "step": 6612 }, { "epoch": 1.92, "learning_rate": 9.883375375768556e-08, "loss": 1.779, "step": 6614 }, { "epoch": 1.92, "learning_rate": 9.752250194161373e-08, "loss": 1.7712, "step": 6616 }, { "epoch": 1.92, "learning_rate": 9.62199640398076e-08, "loss": 1.8373, "step": 6618 }, { "epoch": 1.92, "learning_rate": 9.492614119846478e-08, "loss": 1.9126, "step": 6620 }, { "epoch": 1.92, "learning_rate": 9.364103455611451e-08, "loss": 1.8238, "step": 6622 }, { "epoch": 1.92, "learning_rate": 9.236464524361777e-08, "loss": 1.8081, "step": 6624 }, { "epoch": 1.92, "learning_rate": 9.109697438416054e-08, "loss": 1.8599, "step": 6626 }, { "epoch": 1.92, "learning_rate": 8.98380230932605e-08, "loss": 1.8142, "step": 6628 }, { "epoch": 1.92, "learning_rate": 8.858779247876037e-08, "loss": 1.8294, "step": 6630 }, { "epoch": 1.92, "learning_rate": 8.734628364083008e-08, "loss": 1.8201, "step": 6632 }, { "epoch": 1.92, "learning_rate": 8.611349767196465e-08, "loss": 1.7541, "step": 6634 }, { "epoch": 1.92, "learning_rate": 8.488943565698071e-08, "loss": 1.8268, "step": 6636 }, { "epoch": 1.92, "learning_rate": 8.367409867302112e-08, "loss": 1.7823, "step": 6638 }, { "epoch": 1.92, "learning_rate": 8.246748778955038e-08, "loss": 1.7887, "step": 6640 }, { "epoch": 1.92, "learning_rate": 8.12696040683525e-08, "loss": 1.7934, "step": 6642 }, { "epoch": 1.92, "learning_rate": 8.008044856353315e-08, "loss": 1.8104, "step": 6644 }, { "epoch": 1.92, "learning_rate": 7.89000223215186e-08, "loss": 1.7692, "step": 6646 }, { "epoch": 1.93, "learning_rate": 7.772832638105021e-08, "loss": 1.7721, "step": 6648 }, { "epoch": 1.93, "learning_rate": 7.656536177318986e-08, "loss": 1.8268, "step": 6650 }, { "epoch": 1.93, "learning_rate": 7.541112952131669e-08, "loss": 1.8264, "step": 6652 }, { "epoch": 1.93, "learning_rate": 7.426563064112046e-08, "loss": 1.8181, "step": 6654 }, { "epoch": 1.93, "learning_rate": 7.312886614061265e-08, "loss": 1.9159, "step": 6656 }, { "epoch": 1.93, "learning_rate": 7.200083702011529e-08, "loss": 1.899, "step": 6658 }, { "epoch": 1.93, "learning_rate": 7.088154427226213e-08, "loss": 1.8248, "step": 6660 }, { "epoch": 1.93, "learning_rate": 6.977098888200195e-08, "loss": 1.8224, "step": 6662 }, { "epoch": 1.93, "learning_rate": 6.866917182659194e-08, "loss": 1.8505, "step": 6664 }, { "epoch": 1.93, "learning_rate": 6.757609407560429e-08, "loss": 1.8421, "step": 6666 }, { "epoch": 1.93, "learning_rate": 6.649175659091623e-08, "loss": 1.8119, "step": 6668 }, { "epoch": 1.93, "learning_rate": 6.54161603267145e-08, "loss": 1.7577, "step": 6670 }, { "epoch": 1.93, "learning_rate": 6.434930622949753e-08, "loss": 1.8412, "step": 6672 }, { "epoch": 1.93, "learning_rate": 6.32911952380677e-08, "loss": 1.837, "step": 6674 }, { "epoch": 1.93, "learning_rate": 6.224182828353243e-08, "loss": 1.8373, "step": 6676 }, { "epoch": 1.93, "learning_rate": 6.120120628930859e-08, "loss": 1.8321, "step": 6678 }, { "epoch": 1.93, "learning_rate": 6.016933017111481e-08, "loss": 1.8274, "step": 6680 }, { "epoch": 1.94, "learning_rate": 5.914620083697365e-08, "loss": 1.8095, "step": 6682 }, { "epoch": 1.94, "learning_rate": 5.813181918721267e-08, "loss": 1.8558, "step": 6684 }, { "epoch": 1.94, "learning_rate": 5.712618611446008e-08, "loss": 1.9687, "step": 6686 }, { "epoch": 1.94, "learning_rate": 5.6129302503644675e-08, "loss": 1.805, "step": 6688 }, { "epoch": 1.94, "learning_rate": 5.514116923199919e-08, "loss": 1.8037, "step": 6690 }, { "epoch": 1.94, "learning_rate": 5.416178716905252e-08, "loss": 1.8022, "step": 6692 }, { "epoch": 1.94, "learning_rate": 5.319115717663636e-08, "loss": 1.9205, "step": 6694 }, { "epoch": 1.94, "learning_rate": 5.222928010887862e-08, "loss": 1.8316, "step": 6696 }, { "epoch": 1.94, "learning_rate": 5.1276156812204434e-08, "loss": 1.801, "step": 6698 }, { "epoch": 1.94, "learning_rate": 5.033178812533735e-08, "loss": 1.7926, "step": 6700 }, { "epoch": 1.94, "learning_rate": 4.939617487929593e-08, "loss": 1.8503, "step": 6702 }, { "epoch": 1.94, "learning_rate": 4.8469317897396064e-08, "loss": 1.7448, "step": 6704 }, { "epoch": 1.94, "learning_rate": 4.7551217995245316e-08, "loss": 1.7845, "step": 6706 }, { "epoch": 1.94, "learning_rate": 4.664187598074743e-08, "loss": 1.8482, "step": 6708 }, { "epoch": 1.94, "learning_rate": 4.57412926541001e-08, "loss": 1.8219, "step": 6710 }, { "epoch": 1.94, "learning_rate": 4.4849468807791615e-08, "loss": 1.8874, "step": 6712 }, { "epoch": 1.94, "learning_rate": 4.3966405226602004e-08, "loss": 1.7444, "step": 6714 }, { "epoch": 1.94, "learning_rate": 4.309210268760411e-08, "loss": 1.858, "step": 6716 }, { "epoch": 1.95, "learning_rate": 4.22265619601625e-08, "loss": 1.7964, "step": 6718 }, { "epoch": 1.95, "learning_rate": 4.136978380592682e-08, "loss": 1.7859, "step": 6720 }, { "epoch": 1.95, "learning_rate": 4.0521768978840634e-08, "loss": 1.7779, "step": 6722 }, { "epoch": 1.95, "learning_rate": 3.9682518225134803e-08, "loss": 1.7899, "step": 6724 }, { "epoch": 1.95, "learning_rate": 3.8852032283326346e-08, "loss": 1.8346, "step": 6726 }, { "epoch": 1.95, "learning_rate": 3.803031188421957e-08, "loss": 1.8469, "step": 6728 }, { "epoch": 1.95, "learning_rate": 3.72173577509094e-08, "loss": 1.8854, "step": 6730 }, { "epoch": 1.95, "learning_rate": 3.6413170598770255e-08, "loss": 1.7783, "step": 6732 }, { "epoch": 1.95, "learning_rate": 3.561775113546828e-08, "loss": 1.8523, "step": 6734 }, { "epoch": 1.95, "learning_rate": 3.483110006094803e-08, "loss": 1.9493, "step": 6736 }, { "epoch": 1.95, "learning_rate": 3.4053218067443546e-08, "loss": 1.7974, "step": 6738 }, { "epoch": 1.95, "learning_rate": 3.3284105839470616e-08, "loss": 1.8033, "step": 6740 }, { "epoch": 1.95, "learning_rate": 3.2523764053825627e-08, "loss": 1.7993, "step": 6742 }, { "epoch": 1.95, "learning_rate": 3.177219337958892e-08, "loss": 1.7738, "step": 6744 }, { "epoch": 1.95, "learning_rate": 3.1029394478124806e-08, "loss": 1.7647, "step": 6746 }, { "epoch": 1.95, "learning_rate": 3.0295368003073756e-08, "loss": 1.8418, "step": 6748 }, { "epoch": 1.95, "learning_rate": 2.957011460036019e-08, "loss": 1.7791, "step": 6750 }, { "epoch": 1.96, "learning_rate": 2.8853634908188045e-08, "loss": 1.8262, "step": 6752 }, { "epoch": 1.96, "learning_rate": 2.8145929557040762e-08, "loss": 1.8625, "step": 6754 }, { "epoch": 1.96, "learning_rate": 2.7446999169677968e-08, "loss": 1.7885, "step": 6756 }, { "epoch": 1.96, "learning_rate": 2.675684436114212e-08, "loss": 1.7771, "step": 6758 }, { "epoch": 1.96, "learning_rate": 2.607546573874853e-08, "loss": 1.7315, "step": 6760 }, { "epoch": 1.96, "learning_rate": 2.5402863902094233e-08, "loss": 1.9044, "step": 6762 }, { "epoch": 1.96, "learning_rate": 2.4739039443049116e-08, "loss": 1.8263, "step": 6764 }, { "epoch": 1.96, "learning_rate": 2.408399294576258e-08, "loss": 1.7875, "step": 6766 }, { "epoch": 1.96, "learning_rate": 2.343772498665686e-08, "loss": 1.8623, "step": 6768 }, { "epoch": 1.96, "learning_rate": 2.2800236134429275e-08, "loss": 1.8514, "step": 6770 }, { "epoch": 1.96, "learning_rate": 2.2171526950054424e-08, "loss": 1.7741, "step": 6772 }, { "epoch": 1.96, "learning_rate": 2.1551597986780857e-08, "loss": 1.8347, "step": 6774 }, { "epoch": 1.96, "learning_rate": 2.094044979012555e-08, "loss": 1.7823, "step": 6776 }, { "epoch": 1.96, "learning_rate": 2.033808289788608e-08, "loss": 1.8601, "step": 6778 }, { "epoch": 1.96, "learning_rate": 1.974449784012733e-08, "loss": 1.8835, "step": 6780 }, { "epoch": 1.96, "learning_rate": 1.9159695139189248e-08, "loss": 1.9081, "step": 6782 }, { "epoch": 1.96, "learning_rate": 1.8583675309681302e-08, "loss": 1.7362, "step": 6784 }, { "epoch": 1.97, "learning_rate": 1.8016438858485807e-08, "loss": 1.8543, "step": 6786 }, { "epoch": 1.97, "learning_rate": 1.7457986284756812e-08, "loss": 1.7771, "step": 6788 }, { "epoch": 1.97, "learning_rate": 1.6908318079915663e-08, "loss": 1.8068, "step": 6790 }, { "epoch": 1.97, "learning_rate": 1.636743472765656e-08, "loss": 1.7731, "step": 6792 }, { "epoch": 1.97, "learning_rate": 1.5835336703943215e-08, "loss": 1.8231, "step": 6794 }, { "epoch": 1.97, "learning_rate": 1.5312024477006637e-08, "loss": 1.8414, "step": 6796 }, { "epoch": 1.97, "learning_rate": 1.4797498507347353e-08, "loss": 1.8044, "step": 6798 }, { "epoch": 1.97, "learning_rate": 1.4291759247734294e-08, "loss": 1.8366, "step": 6800 }, { "epoch": 1.97, "learning_rate": 1.3794807143205913e-08, "loss": 1.8627, "step": 6802 }, { "epoch": 1.97, "learning_rate": 1.3306642631064625e-08, "loss": 1.7892, "step": 6804 }, { "epoch": 1.97, "learning_rate": 1.2827266140883476e-08, "loss": 1.8368, "step": 6806 }, { "epoch": 1.97, "learning_rate": 1.2356678094500585e-08, "loss": 1.865, "step": 6808 }, { "epoch": 1.97, "learning_rate": 1.1894878906020258e-08, "loss": 1.8197, "step": 6810 }, { "epoch": 1.97, "learning_rate": 1.1441868981815207e-08, "loss": 1.8394, "step": 6812 }, { "epoch": 1.97, "learning_rate": 1.0997648720519893e-08, "loss": 1.9009, "step": 6814 }, { "epoch": 1.97, "learning_rate": 1.0562218513036071e-08, "loss": 1.8575, "step": 6816 }, { "epoch": 1.97, "learning_rate": 1.0135578742532792e-08, "loss": 1.8509, "step": 6818 }, { "epoch": 1.98, "learning_rate": 9.717729784439745e-09, "loss": 1.8586, "step": 6820 }, { "epoch": 1.98, "learning_rate": 9.308672006453912e-09, "loss": 1.7979, "step": 6822 }, { "epoch": 1.98, "learning_rate": 8.908405768534023e-09, "loss": 1.8079, "step": 6824 }, { "epoch": 1.98, "learning_rate": 8.516931422903884e-09, "loss": 1.7776, "step": 6826 }, { "epoch": 1.98, "learning_rate": 8.134249314051268e-09, "loss": 1.797, "step": 6828 }, { "epoch": 1.98, "learning_rate": 7.76035977872569e-09, "loss": 1.7872, "step": 6830 }, { "epoch": 1.98, "learning_rate": 7.395263145939524e-09, "loss": 1.7179, "step": 6832 }, { "epoch": 1.98, "learning_rate": 7.0389597369691084e-09, "loss": 1.8638, "step": 6834 }, { "epoch": 1.98, "learning_rate": 6.691449865350308e-09, "loss": 1.8992, "step": 6836 }, { "epoch": 1.98, "learning_rate": 6.3527338368840615e-09, "loss": 1.834, "step": 6838 }, { "epoch": 1.98, "learning_rate": 6.022811949629726e-09, "loss": 1.8284, "step": 6840 }, { "epoch": 1.98, "learning_rate": 5.70168449391062e-09, "loss": 1.7883, "step": 6842 }, { "epoch": 1.98, "learning_rate": 5.389351752310701e-09, "loss": 1.906, "step": 6844 }, { "epoch": 1.98, "learning_rate": 5.08581399967345e-09, "loss": 1.8207, "step": 6846 }, { "epoch": 1.98, "learning_rate": 4.791071503105205e-09, "loss": 1.8244, "step": 6848 }, { "epoch": 1.98, "learning_rate": 4.505124521969606e-09, "loss": 1.7818, "step": 6850 }, { "epoch": 1.98, "learning_rate": 4.2279733078931514e-09, "loss": 1.8198, "step": 6852 }, { "epoch": 1.98, "learning_rate": 3.959618104762975e-09, "loss": 1.718, "step": 6854 }, { "epoch": 1.99, "learning_rate": 3.7000591487224012e-09, "loss": 1.8369, "step": 6856 }, { "epoch": 1.99, "learning_rate": 3.449296668176505e-09, "loss": 1.8278, "step": 6858 }, { "epoch": 1.99, "learning_rate": 3.2073308837909933e-09, "loss": 1.8245, "step": 6860 }, { "epoch": 1.99, "learning_rate": 2.9741620084877687e-09, "loss": 1.7914, "step": 6862 }, { "epoch": 1.99, "learning_rate": 2.7497902474504788e-09, "loss": 1.858, "step": 6864 }, { "epoch": 1.99, "learning_rate": 2.5342157981189663e-09, "loss": 1.8593, "step": 6866 }, { "epoch": 1.99, "learning_rate": 2.3274388501937084e-09, "loss": 1.8576, "step": 6868 }, { "epoch": 1.99, "learning_rate": 2.1294595856324874e-09, "loss": 1.7837, "step": 6870 }, { "epoch": 1.99, "learning_rate": 1.9402781786515e-09, "loss": 1.7776, "step": 6872 }, { "epoch": 1.99, "learning_rate": 1.7598947957264689e-09, "loss": 1.8347, "step": 6874 }, { "epoch": 1.99, "learning_rate": 1.5883095955893102e-09, "loss": 1.7612, "step": 6876 }, { "epoch": 1.99, "learning_rate": 1.4255227292303552e-09, "loss": 1.8239, "step": 6878 }, { "epoch": 1.99, "learning_rate": 1.2715343398972401e-09, "loss": 1.8468, "step": 6880 }, { "epoch": 1.99, "learning_rate": 1.1263445630960158e-09, "loss": 1.8107, "step": 6882 }, { "epoch": 1.99, "learning_rate": 9.899535265900374e-10, "loss": 1.8042, "step": 6884 }, { "epoch": 1.99, "learning_rate": 8.623613503988548e-10, "loss": 1.7577, "step": 6886 }, { "epoch": 1.99, "learning_rate": 7.435681468015432e-10, "loss": 1.7495, "step": 6888 }, { "epoch": 2.0, "learning_rate": 6.335740203311514e-10, "loss": 1.8365, "step": 6890 }, { "epoch": 2.0, "learning_rate": 5.323790677813634e-10, "loss": 1.8331, "step": 6892 }, { "epoch": 2.0, "learning_rate": 4.399833781998375e-10, "loss": 1.7801, "step": 6894 }, { "epoch": 2.0, "learning_rate": 3.5638703289375685e-10, "loss": 1.818, "step": 6896 }, { "epoch": 2.0, "learning_rate": 2.8159010542316845e-10, "loss": 1.8007, "step": 6898 }, { "epoch": 2.0, "learning_rate": 2.1559266160986469e-10, "loss": 1.7711, "step": 6900 }, { "epoch": 2.0, "learning_rate": 1.583947595273916e-10, "loss": 1.8592, "step": 6902 }, { "epoch": 2.0, "learning_rate": 1.099964495110406e-10, "loss": 1.8752, "step": 6904 }, { "epoch": 2.0, "learning_rate": 7.039777414785676e-11, "loss": 1.8101, "step": 6906 }, { "epoch": 2.0, "step": 6906, "total_flos": 1.8527733254874726e+17, "train_loss": 1.973716755069376, "train_runtime": 36805.2625, "train_samples_per_second": 12.008, "train_steps_per_second": 0.188 } ], "logging_steps": 2, "max_steps": 6906, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "total_flos": 1.8527733254874726e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }