|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.996954314720812, |
|
"eval_steps": 500, |
|
"global_step": 2214, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00676818950930626, |
|
"grad_norm": 7.503111624582946, |
|
"learning_rate": 4.504504504504504e-08, |
|
"loss": 0.7511, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01353637901861252, |
|
"grad_norm": 7.290232887177767, |
|
"learning_rate": 9.009009009009008e-08, |
|
"loss": 0.7468, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02030456852791878, |
|
"grad_norm": 6.95340550567438, |
|
"learning_rate": 1.3513513513513515e-07, |
|
"loss": 0.7048, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02707275803722504, |
|
"grad_norm": 6.910978359875767, |
|
"learning_rate": 1.8018018018018017e-07, |
|
"loss": 0.734, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0338409475465313, |
|
"grad_norm": 5.990453108110331, |
|
"learning_rate": 2.2522522522522522e-07, |
|
"loss": 0.7075, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04060913705583756, |
|
"grad_norm": 6.315135953523189, |
|
"learning_rate": 2.702702702702703e-07, |
|
"loss": 0.7034, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.047377326565143825, |
|
"grad_norm": 4.938125726164199, |
|
"learning_rate": 3.153153153153153e-07, |
|
"loss": 0.6955, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05414551607445008, |
|
"grad_norm": 4.514212008344891, |
|
"learning_rate": 3.6036036036036033e-07, |
|
"loss": 0.6675, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06091370558375635, |
|
"grad_norm": 2.5175394937632234, |
|
"learning_rate": 4.054054054054054e-07, |
|
"loss": 0.6121, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0676818950930626, |
|
"grad_norm": 2.044111131735211, |
|
"learning_rate": 4.5045045045045043e-07, |
|
"loss": 0.5689, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07445008460236886, |
|
"grad_norm": 1.9607335603547595, |
|
"learning_rate": 4.954954954954955e-07, |
|
"loss": 0.5681, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08121827411167512, |
|
"grad_norm": 1.7055910206402423, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 0.5632, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08798646362098139, |
|
"grad_norm": 1.6356611418313087, |
|
"learning_rate": 5.855855855855856e-07, |
|
"loss": 0.5455, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09475465313028765, |
|
"grad_norm": 1.5898009813340985, |
|
"learning_rate": 6.306306306306306e-07, |
|
"loss": 0.5124, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10152284263959391, |
|
"grad_norm": 1.6241164657421379, |
|
"learning_rate": 6.756756756756756e-07, |
|
"loss": 0.5235, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.10829103214890017, |
|
"grad_norm": 1.5281172683623996, |
|
"learning_rate": 7.207207207207207e-07, |
|
"loss": 0.5274, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11505922165820642, |
|
"grad_norm": 1.467101912720914, |
|
"learning_rate": 7.657657657657657e-07, |
|
"loss": 0.5152, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1218274111675127, |
|
"grad_norm": 1.4144943645830494, |
|
"learning_rate": 8.108108108108108e-07, |
|
"loss": 0.4903, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12859560067681894, |
|
"grad_norm": 1.5168733264951235, |
|
"learning_rate": 8.558558558558558e-07, |
|
"loss": 0.5235, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1353637901861252, |
|
"grad_norm": 1.4719778866974211, |
|
"learning_rate": 9.009009009009009e-07, |
|
"loss": 0.4955, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14213197969543148, |
|
"grad_norm": 1.5617750862395916, |
|
"learning_rate": 9.459459459459459e-07, |
|
"loss": 0.5023, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14890016920473773, |
|
"grad_norm": 1.608427652038671, |
|
"learning_rate": 9.90990990990991e-07, |
|
"loss": 0.5161, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.155668358714044, |
|
"grad_norm": 1.470675404237294, |
|
"learning_rate": 1.0360360360360361e-06, |
|
"loss": 0.4629, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.16243654822335024, |
|
"grad_norm": 1.5907906676415646, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 0.5272, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1692047377326565, |
|
"grad_norm": 1.4139201116022069, |
|
"learning_rate": 1.1261261261261262e-06, |
|
"loss": 0.4828, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17597292724196278, |
|
"grad_norm": 1.572210590864309, |
|
"learning_rate": 1.1711711711711712e-06, |
|
"loss": 0.4932, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18274111675126903, |
|
"grad_norm": 1.8317582344702474, |
|
"learning_rate": 1.2162162162162162e-06, |
|
"loss": 0.4851, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1895093062605753, |
|
"grad_norm": 1.4834369904658524, |
|
"learning_rate": 1.2612612612612613e-06, |
|
"loss": 0.4823, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19627749576988154, |
|
"grad_norm": 1.487385439852692, |
|
"learning_rate": 1.3063063063063063e-06, |
|
"loss": 0.4835, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 1.6282121013160264, |
|
"learning_rate": 1.3513513513513513e-06, |
|
"loss": 0.4845, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2098138747884941, |
|
"grad_norm": 1.410710327011968, |
|
"learning_rate": 1.3963963963963963e-06, |
|
"loss": 0.4604, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.21658206429780033, |
|
"grad_norm": 1.5959737364732594, |
|
"learning_rate": 1.4414414414414413e-06, |
|
"loss": 0.49, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2233502538071066, |
|
"grad_norm": 1.5229055220751309, |
|
"learning_rate": 1.4864864864864864e-06, |
|
"loss": 0.4907, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.23011844331641285, |
|
"grad_norm": 1.5262092330780643, |
|
"learning_rate": 1.5315315315315314e-06, |
|
"loss": 0.4603, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23688663282571912, |
|
"grad_norm": 1.6342082189425051, |
|
"learning_rate": 1.5765765765765766e-06, |
|
"loss": 0.4872, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2436548223350254, |
|
"grad_norm": 1.4374951298863459, |
|
"learning_rate": 1.6216216216216216e-06, |
|
"loss": 0.4648, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.25042301184433163, |
|
"grad_norm": 1.5280129895322156, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.4467, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.2571912013536379, |
|
"grad_norm": 1.5525834339213638, |
|
"learning_rate": 1.7117117117117117e-06, |
|
"loss": 0.4738, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2639593908629442, |
|
"grad_norm": 1.5093982724761799, |
|
"learning_rate": 1.7567567567567567e-06, |
|
"loss": 0.483, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2707275803722504, |
|
"grad_norm": 1.6721932829256383, |
|
"learning_rate": 1.8018018018018017e-06, |
|
"loss": 0.463, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27749576988155666, |
|
"grad_norm": 1.5493470193453733, |
|
"learning_rate": 1.8468468468468467e-06, |
|
"loss": 0.4432, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.28426395939086296, |
|
"grad_norm": 1.5282256717827059, |
|
"learning_rate": 1.8918918918918918e-06, |
|
"loss": 0.4659, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2910321489001692, |
|
"grad_norm": 1.493666122191364, |
|
"learning_rate": 1.936936936936937e-06, |
|
"loss": 0.4789, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.29780033840947545, |
|
"grad_norm": 1.7222146450605926, |
|
"learning_rate": 1.981981981981982e-06, |
|
"loss": 0.4636, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.30456852791878175, |
|
"grad_norm": 1.6787579180811572, |
|
"learning_rate": 1.999988807353673e-06, |
|
"loss": 0.4697, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.311336717428088, |
|
"grad_norm": 1.5374929160562005, |
|
"learning_rate": 1.999920408755684e-06, |
|
"loss": 0.4702, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.31810490693739424, |
|
"grad_norm": 1.5486529898439687, |
|
"learning_rate": 1.9997898339445025e-06, |
|
"loss": 0.4545, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3248730964467005, |
|
"grad_norm": 1.5982741872566966, |
|
"learning_rate": 1.9995970910394226e-06, |
|
"loss": 0.4769, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3316412859560068, |
|
"grad_norm": 1.5356403333143016, |
|
"learning_rate": 1.999342192025422e-06, |
|
"loss": 0.456, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.338409475465313, |
|
"grad_norm": 1.5906824266272626, |
|
"learning_rate": 1.9990251527524177e-06, |
|
"loss": 0.4803, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.34517766497461927, |
|
"grad_norm": 1.5359520161986076, |
|
"learning_rate": 1.99864599293428e-06, |
|
"loss": 0.4475, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.35194585448392557, |
|
"grad_norm": 1.5036681203646087, |
|
"learning_rate": 1.9982047361476074e-06, |
|
"loss": 0.464, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3587140439932318, |
|
"grad_norm": 1.461884922691461, |
|
"learning_rate": 1.9977014098302594e-06, |
|
"loss": 0.4593, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.36548223350253806, |
|
"grad_norm": 1.434636405242703, |
|
"learning_rate": 1.997136045279652e-06, |
|
"loss": 0.4524, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37225042301184436, |
|
"grad_norm": 1.4806026186739891, |
|
"learning_rate": 1.996508677650809e-06, |
|
"loss": 0.455, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3790186125211506, |
|
"grad_norm": 1.5577278096965192, |
|
"learning_rate": 1.9958193459541803e-06, |
|
"loss": 0.4715, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.38578680203045684, |
|
"grad_norm": 1.6997591630335047, |
|
"learning_rate": 1.9950680930532106e-06, |
|
"loss": 0.4613, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3925549915397631, |
|
"grad_norm": 1.750274534222129, |
|
"learning_rate": 1.9942549656616785e-06, |
|
"loss": 0.4654, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3993231810490694, |
|
"grad_norm": 1.8009950734484617, |
|
"learning_rate": 1.9933800143407914e-06, |
|
"loss": 0.449, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 1.5530961439855866, |
|
"learning_rate": 1.992443293496038e-06, |
|
"loss": 0.4458, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4128595600676819, |
|
"grad_norm": 1.5081709456372263, |
|
"learning_rate": 1.9914448613738106e-06, |
|
"loss": 0.4565, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4196277495769882, |
|
"grad_norm": 1.6772043840290556, |
|
"learning_rate": 1.9903847800577777e-06, |
|
"loss": 0.4804, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4263959390862944, |
|
"grad_norm": 1.727246671277696, |
|
"learning_rate": 1.9892631154650277e-06, |
|
"loss": 0.4576, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.43316412859560066, |
|
"grad_norm": 1.5355208962246418, |
|
"learning_rate": 1.9880799373419697e-06, |
|
"loss": 0.4453, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.43993231810490696, |
|
"grad_norm": 1.5063427073085303, |
|
"learning_rate": 1.986835319259994e-06, |
|
"loss": 0.4452, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4467005076142132, |
|
"grad_norm": 1.5195960228713277, |
|
"learning_rate": 1.985529338610899e-06, |
|
"loss": 0.465, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.45346869712351945, |
|
"grad_norm": 1.5343721052947665, |
|
"learning_rate": 1.98416207660208e-06, |
|
"loss": 0.436, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.4602368866328257, |
|
"grad_norm": 1.5094089297695503, |
|
"learning_rate": 1.982733618251478e-06, |
|
"loss": 0.4583, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.467005076142132, |
|
"grad_norm": 1.3787923632280212, |
|
"learning_rate": 1.981244052382293e-06, |
|
"loss": 0.4512, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.47377326565143824, |
|
"grad_norm": 1.5868715153799644, |
|
"learning_rate": 1.9796934716174616e-06, |
|
"loss": 0.427, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4805414551607445, |
|
"grad_norm": 1.5348278464191702, |
|
"learning_rate": 1.978081972373899e-06, |
|
"loss": 0.4593, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.4873096446700508, |
|
"grad_norm": 1.587810620443399, |
|
"learning_rate": 1.976409654856501e-06, |
|
"loss": 0.4453, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.494077834179357, |
|
"grad_norm": 1.6669439346571866, |
|
"learning_rate": 1.9746766230519137e-06, |
|
"loss": 0.4726, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5008460236886633, |
|
"grad_norm": 1.6972318618519455, |
|
"learning_rate": 1.9728829847220696e-06, |
|
"loss": 0.454, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"grad_norm": 1.5739750068827298, |
|
"learning_rate": 1.9710288513974846e-06, |
|
"loss": 0.4592, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5143824027072758, |
|
"grad_norm": 1.5240777641912773, |
|
"learning_rate": 1.969114338370324e-06, |
|
"loss": 0.4297, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5211505922165821, |
|
"grad_norm": 1.6105894445618845, |
|
"learning_rate": 1.9671395646872323e-06, |
|
"loss": 0.447, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5279187817258884, |
|
"grad_norm": 1.7397039341357436, |
|
"learning_rate": 1.965104653141933e-06, |
|
"loss": 0.4516, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5346869712351946, |
|
"grad_norm": 1.667337409755205, |
|
"learning_rate": 1.9630097302675913e-06, |
|
"loss": 0.4497, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5414551607445008, |
|
"grad_norm": 1.472174322913246, |
|
"learning_rate": 1.9608549263289456e-06, |
|
"loss": 0.4396, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5482233502538071, |
|
"grad_norm": 1.390664891183549, |
|
"learning_rate": 1.95864037531421e-06, |
|
"loss": 0.4584, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5549915397631133, |
|
"grad_norm": 1.7716596272823615, |
|
"learning_rate": 1.9563662149267405e-06, |
|
"loss": 0.4417, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5617597292724196, |
|
"grad_norm": 1.7181597011497438, |
|
"learning_rate": 1.9540325865764725e-06, |
|
"loss": 0.4566, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.5685279187817259, |
|
"grad_norm": 1.5659915803916853, |
|
"learning_rate": 1.951639635371129e-06, |
|
"loss": 0.4636, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5752961082910322, |
|
"grad_norm": 1.3420492760813085, |
|
"learning_rate": 1.9491875101071985e-06, |
|
"loss": 0.4392, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5820642978003384, |
|
"grad_norm": 1.7068688103479133, |
|
"learning_rate": 1.946676363260679e-06, |
|
"loss": 0.4442, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5888324873096447, |
|
"grad_norm": 1.5986442251841577, |
|
"learning_rate": 1.9441063509776e-06, |
|
"loss": 0.4461, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.5956006768189509, |
|
"grad_norm": 1.3829526404505894, |
|
"learning_rate": 1.9414776330643123e-06, |
|
"loss": 0.4342, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6023688663282571, |
|
"grad_norm": 1.5703877223063394, |
|
"learning_rate": 1.9387903729775516e-06, |
|
"loss": 0.4508, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 1.6558038588752109, |
|
"learning_rate": 1.9360447378142724e-06, |
|
"loss": 0.4223, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6159052453468697, |
|
"grad_norm": 1.4868682565255178, |
|
"learning_rate": 1.9332408983012616e-06, |
|
"loss": 0.4452, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.622673434856176, |
|
"grad_norm": 1.7326589310573917, |
|
"learning_rate": 1.930379028784518e-06, |
|
"loss": 0.4488, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6294416243654822, |
|
"grad_norm": 1.5873739274578285, |
|
"learning_rate": 1.9274593072184147e-06, |
|
"loss": 0.4605, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6362098138747885, |
|
"grad_norm": 1.334324454741449, |
|
"learning_rate": 1.924481915154632e-06, |
|
"loss": 0.4098, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6429780033840947, |
|
"grad_norm": 1.4711516821260495, |
|
"learning_rate": 1.9214470377308698e-06, |
|
"loss": 0.4512, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.649746192893401, |
|
"grad_norm": 1.5422978596049304, |
|
"learning_rate": 1.918354863659332e-06, |
|
"loss": 0.4434, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6565143824027073, |
|
"grad_norm": 1.6734420314479854, |
|
"learning_rate": 1.915205585214998e-06, |
|
"loss": 0.4656, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6632825719120136, |
|
"grad_norm": 1.4869130554051895, |
|
"learning_rate": 1.9119993982236605e-06, |
|
"loss": 0.4658, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6700507614213198, |
|
"grad_norm": 1.4648439577812231, |
|
"learning_rate": 1.908736502049754e-06, |
|
"loss": 0.4485, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.676818950930626, |
|
"grad_norm": 1.4924798254155862, |
|
"learning_rate": 1.9054170995839543e-06, |
|
"loss": 0.4434, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6835871404399323, |
|
"grad_norm": 1.5718324313516365, |
|
"learning_rate": 1.9020413972305652e-06, |
|
"loss": 0.4141, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.6903553299492385, |
|
"grad_norm": 1.6927420013265664, |
|
"learning_rate": 1.8986096048946822e-06, |
|
"loss": 0.4188, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6971235194585449, |
|
"grad_norm": 1.9638689317654023, |
|
"learning_rate": 1.8951219359691416e-06, |
|
"loss": 0.4085, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7038917089678511, |
|
"grad_norm": 1.7008350118895288, |
|
"learning_rate": 1.8915786073212506e-06, |
|
"loss": 0.425, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7106598984771574, |
|
"grad_norm": 1.5729307770372762, |
|
"learning_rate": 1.887979839279303e-06, |
|
"loss": 0.4486, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7174280879864636, |
|
"grad_norm": 1.6239899543583527, |
|
"learning_rate": 1.8843258556188783e-06, |
|
"loss": 0.4314, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7241962774957699, |
|
"grad_norm": 1.5552777279734928, |
|
"learning_rate": 1.8806168835489277e-06, |
|
"loss": 0.426, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.7309644670050761, |
|
"grad_norm": 1.4332159863043232, |
|
"learning_rate": 1.876853153697645e-06, |
|
"loss": 0.4297, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7377326565143824, |
|
"grad_norm": 1.3873749896369056, |
|
"learning_rate": 1.8730349000981267e-06, |
|
"loss": 0.445, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.7445008460236887, |
|
"grad_norm": 1.4780965073836605, |
|
"learning_rate": 1.8691623601738197e-06, |
|
"loss": 0.458, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.751269035532995, |
|
"grad_norm": 1.5268831705229313, |
|
"learning_rate": 1.8652357747237578e-06, |
|
"loss": 0.4354, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.7580372250423012, |
|
"grad_norm": 1.777684181196839, |
|
"learning_rate": 1.8612553879075873e-06, |
|
"loss": 0.4521, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7648054145516074, |
|
"grad_norm": 1.5369674855501423, |
|
"learning_rate": 1.8572214472303868e-06, |
|
"loss": 0.4403, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.7715736040609137, |
|
"grad_norm": 1.4081724058322669, |
|
"learning_rate": 1.8531342035272765e-06, |
|
"loss": 0.432, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7783417935702199, |
|
"grad_norm": 1.6328252561599401, |
|
"learning_rate": 1.8489939109478203e-06, |
|
"loss": 0.4447, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7851099830795262, |
|
"grad_norm": 1.4953859268513063, |
|
"learning_rate": 1.8448008269402226e-06, |
|
"loss": 0.4239, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.7918781725888325, |
|
"grad_norm": 1.5304225811890608, |
|
"learning_rate": 1.840555212235321e-06, |
|
"loss": 0.4293, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.7986463620981388, |
|
"grad_norm": 1.4486153048909152, |
|
"learning_rate": 1.8362573308303717e-06, |
|
"loss": 0.4304, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.805414551607445, |
|
"grad_norm": 1.4602386588010192, |
|
"learning_rate": 1.831907449972636e-06, |
|
"loss": 0.4472, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 1.4965634086484065, |
|
"learning_rate": 1.8275058401427618e-06, |
|
"loss": 0.4409, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8189509306260575, |
|
"grad_norm": 1.5872590957255623, |
|
"learning_rate": 1.823052775037964e-06, |
|
"loss": 0.4522, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.8257191201353637, |
|
"grad_norm": 1.4920706655623288, |
|
"learning_rate": 1.818548531555006e-06, |
|
"loss": 0.4289, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.8324873096446701, |
|
"grad_norm": 1.4808293841541855, |
|
"learning_rate": 1.8139933897729832e-06, |
|
"loss": 0.4404, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.8392554991539763, |
|
"grad_norm": 1.788578348932924, |
|
"learning_rate": 1.8093876329359058e-06, |
|
"loss": 0.4199, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8460236886632826, |
|
"grad_norm": 1.5479379288714532, |
|
"learning_rate": 1.8047315474350868e-06, |
|
"loss": 0.4199, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.8527918781725888, |
|
"grad_norm": 1.4758005354779442, |
|
"learning_rate": 1.8000254227913344e-06, |
|
"loss": 0.4503, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.8595600676818951, |
|
"grad_norm": 1.5709164106643387, |
|
"learning_rate": 1.7952695516369488e-06, |
|
"loss": 0.436, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.8663282571912013, |
|
"grad_norm": 1.4869937326587306, |
|
"learning_rate": 1.7904642296975261e-06, |
|
"loss": 0.4416, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8730964467005076, |
|
"grad_norm": 1.5189251087228999, |
|
"learning_rate": 1.7856097557735694e-06, |
|
"loss": 0.4367, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.8798646362098139, |
|
"grad_norm": 1.5390825637238734, |
|
"learning_rate": 1.7807064317219093e-06, |
|
"loss": 0.4385, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8866328257191202, |
|
"grad_norm": 1.500939175744208, |
|
"learning_rate": 1.7757545624369347e-06, |
|
"loss": 0.4112, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.8934010152284264, |
|
"grad_norm": 1.5891603689470157, |
|
"learning_rate": 1.770754455831633e-06, |
|
"loss": 0.4496, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9001692047377327, |
|
"grad_norm": 1.6353444834776032, |
|
"learning_rate": 1.7657064228184444e-06, |
|
"loss": 0.4222, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.9069373942470389, |
|
"grad_norm": 1.6181070946717884, |
|
"learning_rate": 1.7606107772899285e-06, |
|
"loss": 0.4296, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9137055837563451, |
|
"grad_norm": 1.633235008124872, |
|
"learning_rate": 1.7554678360992475e-06, |
|
"loss": 0.4308, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9204737732656514, |
|
"grad_norm": 1.4850942271908985, |
|
"learning_rate": 1.7502779190404611e-06, |
|
"loss": 0.425, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9272419627749577, |
|
"grad_norm": 1.553921180752237, |
|
"learning_rate": 1.745041348828645e-06, |
|
"loss": 0.4228, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.934010152284264, |
|
"grad_norm": 1.5091342123918963, |
|
"learning_rate": 1.7397584510798206e-06, |
|
"loss": 0.4447, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9407783417935702, |
|
"grad_norm": 1.5470959471430534, |
|
"learning_rate": 1.7344295542907105e-06, |
|
"loss": 0.4515, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.9475465313028765, |
|
"grad_norm": 1.584257899917925, |
|
"learning_rate": 1.7290549898183109e-06, |
|
"loss": 0.4276, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9543147208121827, |
|
"grad_norm": 1.5896926257834991, |
|
"learning_rate": 1.7236350918592866e-06, |
|
"loss": 0.4429, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.961082910321489, |
|
"grad_norm": 1.4741862947637046, |
|
"learning_rate": 1.7181701974291928e-06, |
|
"loss": 0.417, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.9678510998307953, |
|
"grad_norm": 1.5194416495298846, |
|
"learning_rate": 1.7126606463415164e-06, |
|
"loss": 0.4299, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.9746192893401016, |
|
"grad_norm": 1.6092727303865384, |
|
"learning_rate": 1.7071067811865474e-06, |
|
"loss": 0.4351, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9813874788494078, |
|
"grad_norm": 1.4151798722540438, |
|
"learning_rate": 1.701508947310077e-06, |
|
"loss": 0.4194, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.988155668358714, |
|
"grad_norm": 1.5504577940513988, |
|
"learning_rate": 1.695867492791921e-06, |
|
"loss": 0.4269, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.9949238578680203, |
|
"grad_norm": 1.5420736691888262, |
|
"learning_rate": 1.690182768424279e-06, |
|
"loss": 0.4406, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.0016920473773265, |
|
"grad_norm": 1.7092059209864483, |
|
"learning_rate": 1.6844551276899184e-06, |
|
"loss": 0.3988, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0084602368866329, |
|
"grad_norm": 1.5056006562387958, |
|
"learning_rate": 1.6786849267401978e-06, |
|
"loss": 0.3558, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"grad_norm": 1.767638909765125, |
|
"learning_rate": 1.6728725243729187e-06, |
|
"loss": 0.3396, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0219966159052454, |
|
"grad_norm": 1.6366144961909075, |
|
"learning_rate": 1.667018282010016e-06, |
|
"loss": 0.3238, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.0287648054145515, |
|
"grad_norm": 1.5321955881585025, |
|
"learning_rate": 1.6611225636750836e-06, |
|
"loss": 0.3422, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.0355329949238579, |
|
"grad_norm": 1.680806770640699, |
|
"learning_rate": 1.6551857359707405e-06, |
|
"loss": 0.3309, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.0423011844331642, |
|
"grad_norm": 1.5471253871545012, |
|
"learning_rate": 1.649208168055833e-06, |
|
"loss": 0.324, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0490693739424704, |
|
"grad_norm": 1.4947975015447832, |
|
"learning_rate": 1.6431902316224818e-06, |
|
"loss": 0.3329, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.0558375634517767, |
|
"grad_norm": 1.6086982480144758, |
|
"learning_rate": 1.6371323008729687e-06, |
|
"loss": 0.3196, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0626057529610828, |
|
"grad_norm": 1.580457843877164, |
|
"learning_rate": 1.6310347524964687e-06, |
|
"loss": 0.3262, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.0693739424703892, |
|
"grad_norm": 1.6566966310786062, |
|
"learning_rate": 1.6248979656456272e-06, |
|
"loss": 0.3438, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.0761421319796955, |
|
"grad_norm": 1.6737061189158637, |
|
"learning_rate": 1.6187223219129823e-06, |
|
"loss": 0.343, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.0829103214890017, |
|
"grad_norm": 1.7888201683292473, |
|
"learning_rate": 1.6125082053072405e-06, |
|
"loss": 0.3381, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.089678510998308, |
|
"grad_norm": 1.6140042434311925, |
|
"learning_rate": 1.6062560022293933e-06, |
|
"loss": 0.3299, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.0964467005076142, |
|
"grad_norm": 1.5841270947078938, |
|
"learning_rate": 1.5999661014486955e-06, |
|
"loss": 0.3312, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.1032148900169205, |
|
"grad_norm": 1.6193416591297873, |
|
"learning_rate": 1.5936388940784883e-06, |
|
"loss": 0.3523, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.1099830795262267, |
|
"grad_norm": 1.550619017130258, |
|
"learning_rate": 1.5872747735518798e-06, |
|
"loss": 0.3228, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.116751269035533, |
|
"grad_norm": 1.7508231230240185, |
|
"learning_rate": 1.5808741355972807e-06, |
|
"loss": 0.3324, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1235194585448394, |
|
"grad_norm": 1.6682501120844164, |
|
"learning_rate": 1.574437378213799e-06, |
|
"loss": 0.3239, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.1302876480541455, |
|
"grad_norm": 1.8314883569950966, |
|
"learning_rate": 1.5679649016464895e-06, |
|
"loss": 0.3296, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.1370558375634519, |
|
"grad_norm": 1.59630852333473, |
|
"learning_rate": 1.561457108361468e-06, |
|
"loss": 0.3289, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.143824027072758, |
|
"grad_norm": 1.5574912791068813, |
|
"learning_rate": 1.5549144030208855e-06, |
|
"loss": 0.3346, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.1505922165820643, |
|
"grad_norm": 1.6059497129644689, |
|
"learning_rate": 1.5483371924577634e-06, |
|
"loss": 0.3381, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1573604060913705, |
|
"grad_norm": 1.7201015765204164, |
|
"learning_rate": 1.5417258856506994e-06, |
|
"loss": 0.3271, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.1641285956006768, |
|
"grad_norm": 1.6165299860217694, |
|
"learning_rate": 1.535080893698435e-06, |
|
"loss": 0.3312, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.1708967851099832, |
|
"grad_norm": 1.6502865565791853, |
|
"learning_rate": 1.5284026297942926e-06, |
|
"loss": 0.3362, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.1776649746192893, |
|
"grad_norm": 1.7002581521681002, |
|
"learning_rate": 1.5216915092004844e-06, |
|
"loss": 0.3215, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.1844331641285957, |
|
"grad_norm": 1.7420409861182249, |
|
"learning_rate": 1.5149479492222886e-06, |
|
"loss": 0.3464, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.1912013536379018, |
|
"grad_norm": 1.707048857027911, |
|
"learning_rate": 1.5081723691821026e-06, |
|
"loss": 0.3455, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.1979695431472082, |
|
"grad_norm": 1.6420304279171187, |
|
"learning_rate": 1.5013651903933683e-06, |
|
"loss": 0.3332, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.2047377326565143, |
|
"grad_norm": 1.8125800875620734, |
|
"learning_rate": 1.4945268361343746e-06, |
|
"loss": 0.3382, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2115059221658206, |
|
"grad_norm": 1.6640857688992343, |
|
"learning_rate": 1.4876577316219374e-06, |
|
"loss": 0.3369, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"grad_norm": 1.6451257316850152, |
|
"learning_rate": 1.4807583039849586e-06, |
|
"loss": 0.3539, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2250423011844331, |
|
"grad_norm": 1.6610764662131192, |
|
"learning_rate": 1.4738289822378683e-06, |
|
"loss": 0.3369, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.2318104906937395, |
|
"grad_norm": 1.74944774821556, |
|
"learning_rate": 1.4668701972539456e-06, |
|
"loss": 0.3414, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.2385786802030456, |
|
"grad_norm": 1.6889905704095276, |
|
"learning_rate": 1.4598823817385296e-06, |
|
"loss": 0.3462, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.245346869712352, |
|
"grad_norm": 1.87748003800123, |
|
"learning_rate": 1.4528659702021106e-06, |
|
"loss": 0.347, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.252115059221658, |
|
"grad_norm": 1.7676519334092846, |
|
"learning_rate": 1.4458213989333125e-06, |
|
"loss": 0.3344, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.2588832487309645, |
|
"grad_norm": 1.8625421673744915, |
|
"learning_rate": 1.4387491059717651e-06, |
|
"loss": 0.3259, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.2656514382402708, |
|
"grad_norm": 1.5243120020428504, |
|
"learning_rate": 1.431649531080864e-06, |
|
"loss": 0.3286, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.272419627749577, |
|
"grad_norm": 1.773494740626271, |
|
"learning_rate": 1.424523115720428e-06, |
|
"loss": 0.3366, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.2791878172588833, |
|
"grad_norm": 1.642323556539902, |
|
"learning_rate": 1.4173703030192466e-06, |
|
"loss": 0.3381, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.2859560067681894, |
|
"grad_norm": 1.8298130052806405, |
|
"learning_rate": 1.4101915377475273e-06, |
|
"loss": 0.3472, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.2927241962774958, |
|
"grad_norm": 1.5564171598002208, |
|
"learning_rate": 1.4029872662892382e-06, |
|
"loss": 0.3378, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.299492385786802, |
|
"grad_norm": 1.7850954669361399, |
|
"learning_rate": 1.3957579366143519e-06, |
|
"loss": 0.3363, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3062605752961083, |
|
"grad_norm": 1.614939575319601, |
|
"learning_rate": 1.3885039982509905e-06, |
|
"loss": 0.3166, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.3130287648054146, |
|
"grad_norm": 1.7656883518798847, |
|
"learning_rate": 1.3812259022574715e-06, |
|
"loss": 0.3426, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.3197969543147208, |
|
"grad_norm": 1.4996842720105086, |
|
"learning_rate": 1.373924101194264e-06, |
|
"loss": 0.3377, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.3265651438240271, |
|
"grad_norm": 1.834478494924892, |
|
"learning_rate": 1.3665990490958437e-06, |
|
"loss": 0.3408, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 1.687498482197505, |
|
"learning_rate": 1.3592512014424644e-06, |
|
"loss": 0.3341, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.3401015228426396, |
|
"grad_norm": 1.4779395904473713, |
|
"learning_rate": 1.351881015131833e-06, |
|
"loss": 0.3319, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.3468697123519457, |
|
"grad_norm": 1.5491258438326576, |
|
"learning_rate": 1.3444889484507009e-06, |
|
"loss": 0.3287, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.353637901861252, |
|
"grad_norm": 1.753194944328746, |
|
"learning_rate": 1.3370754610463652e-06, |
|
"loss": 0.3264, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3604060913705585, |
|
"grad_norm": 1.8046926820280387, |
|
"learning_rate": 1.32964101389809e-06, |
|
"loss": 0.3453, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.3671742808798646, |
|
"grad_norm": 1.5582819679996394, |
|
"learning_rate": 1.3221860692884395e-06, |
|
"loss": 0.3185, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.373942470389171, |
|
"grad_norm": 1.734615015555365, |
|
"learning_rate": 1.3147110907745336e-06, |
|
"loss": 0.3209, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.380710659898477, |
|
"grad_norm": 1.8370614645263001, |
|
"learning_rate": 1.3072165431592248e-06, |
|
"loss": 0.3389, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.3874788494077834, |
|
"grad_norm": 1.6416288844308489, |
|
"learning_rate": 1.2997028924621943e-06, |
|
"loss": 0.3465, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.3942470389170896, |
|
"grad_norm": 1.7141835707827855, |
|
"learning_rate": 1.2921706058909756e-06, |
|
"loss": 0.3379, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.401015228426396, |
|
"grad_norm": 1.7703336159956253, |
|
"learning_rate": 1.2846201518119017e-06, |
|
"loss": 0.3331, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.4077834179357023, |
|
"grad_norm": 1.7164709273217806, |
|
"learning_rate": 1.2770519997209835e-06, |
|
"loss": 0.3316, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.4145516074450084, |
|
"grad_norm": 1.698294459133158, |
|
"learning_rate": 1.2694666202147137e-06, |
|
"loss": 0.3407, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.4213197969543148, |
|
"grad_norm": 1.7231395084021628, |
|
"learning_rate": 1.2618644849608067e-06, |
|
"loss": 0.3383, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.4280879864636211, |
|
"grad_norm": 1.6225747755972384, |
|
"learning_rate": 1.2542460666688678e-06, |
|
"loss": 0.3272, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.4348561759729273, |
|
"grad_norm": 1.6273808164138512, |
|
"learning_rate": 1.246611839061002e-06, |
|
"loss": 0.3307, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.4416243654822334, |
|
"grad_norm": 1.640412382244569, |
|
"learning_rate": 1.2389622768423536e-06, |
|
"loss": 0.3326, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.4483925549915397, |
|
"grad_norm": 1.5742322851792212, |
|
"learning_rate": 1.231297855671593e-06, |
|
"loss": 0.311, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.455160744500846, |
|
"grad_norm": 1.6398609226586531, |
|
"learning_rate": 1.223619052131337e-06, |
|
"loss": 0.3417, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.4619289340101522, |
|
"grad_norm": 1.5919173149091699, |
|
"learning_rate": 1.2159263436985136e-06, |
|
"loss": 0.3311, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.4686971235194586, |
|
"grad_norm": 1.7663834242591079, |
|
"learning_rate": 1.2082202087146751e-06, |
|
"loss": 0.3404, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.475465313028765, |
|
"grad_norm": 1.6365555810498733, |
|
"learning_rate": 1.2005011263562513e-06, |
|
"loss": 0.3211, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.482233502538071, |
|
"grad_norm": 1.7543606709062083, |
|
"learning_rate": 1.1927695766047538e-06, |
|
"loss": 0.3345, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.4890016920473772, |
|
"grad_norm": 1.6454656998875175, |
|
"learning_rate": 1.185026040216934e-06, |
|
"loss": 0.329, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.4957698815566836, |
|
"grad_norm": 1.6242171627203073, |
|
"learning_rate": 1.1772709986948827e-06, |
|
"loss": 0.3274, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.50253807106599, |
|
"grad_norm": 2.0678978985333596, |
|
"learning_rate": 1.1695049342560967e-06, |
|
"loss": 0.3544, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.509306260575296, |
|
"grad_norm": 1.7262447342718426, |
|
"learning_rate": 1.161728329803488e-06, |
|
"loss": 0.341, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.5160744500846024, |
|
"grad_norm": 1.6907890988982508, |
|
"learning_rate": 1.153941668895361e-06, |
|
"loss": 0.3292, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.5228426395939088, |
|
"grad_norm": 1.6131818091865402, |
|
"learning_rate": 1.1461454357153406e-06, |
|
"loss": 0.3273, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.5296108291032149, |
|
"grad_norm": 1.970023298749538, |
|
"learning_rate": 1.1383401150422668e-06, |
|
"loss": 0.3389, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.536379018612521, |
|
"grad_norm": 1.7477654667475575, |
|
"learning_rate": 1.1305261922200517e-06, |
|
"loss": 0.336, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.5431472081218274, |
|
"grad_norm": 1.8260233194529998, |
|
"learning_rate": 1.1227041531274977e-06, |
|
"loss": 0.3394, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.5499153976311337, |
|
"grad_norm": 1.533061734694472, |
|
"learning_rate": 1.1148744841480873e-06, |
|
"loss": 0.3274, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.5566835871404399, |
|
"grad_norm": 1.769403392681689, |
|
"learning_rate": 1.1070376721397372e-06, |
|
"loss": 0.3438, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.5634517766497462, |
|
"grad_norm": 1.6263236224467823, |
|
"learning_rate": 1.0991942044045274e-06, |
|
"loss": 0.3437, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.5702199661590526, |
|
"grad_norm": 1.9050438393576472, |
|
"learning_rate": 1.0913445686583974e-06, |
|
"loss": 0.3208, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.5769881556683587, |
|
"grad_norm": 1.7107041476766611, |
|
"learning_rate": 1.0834892530008214e-06, |
|
"loss": 0.3192, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.5837563451776648, |
|
"grad_norm": 1.5694513216338701, |
|
"learning_rate": 1.0756287458844569e-06, |
|
"loss": 0.3339, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.5905245346869712, |
|
"grad_norm": 1.5469737030155013, |
|
"learning_rate": 1.0677635360847722e-06, |
|
"loss": 0.3323, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.5972927241962775, |
|
"grad_norm": 1.652088656809816, |
|
"learning_rate": 1.0598941126696543e-06, |
|
"loss": 0.3331, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.6040609137055837, |
|
"grad_norm": 1.696570545718917, |
|
"learning_rate": 1.0520209649689977e-06, |
|
"loss": 0.3258, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.61082910321489, |
|
"grad_norm": 1.759912674332406, |
|
"learning_rate": 1.0441445825442771e-06, |
|
"loss": 0.3379, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.6175972927241964, |
|
"grad_norm": 1.7089027285892213, |
|
"learning_rate": 1.0362654551581062e-06, |
|
"loss": 0.3449, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"grad_norm": 1.5565566000298192, |
|
"learning_rate": 1.0283840727437832e-06, |
|
"loss": 0.338, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6311336717428087, |
|
"grad_norm": 1.6464360907618232, |
|
"learning_rate": 1.0205009253748272e-06, |
|
"loss": 0.3327, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.637901861252115, |
|
"grad_norm": 1.6450744022431256, |
|
"learning_rate": 1.0126165032345037e-06, |
|
"loss": 0.3411, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.6446700507614214, |
|
"grad_norm": 1.8547448481156448, |
|
"learning_rate": 1.0047312965853454e-06, |
|
"loss": 0.3383, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.6514382402707275, |
|
"grad_norm": 1.6495177484989427, |
|
"learning_rate": 9.968457957386662e-07, |
|
"loss": 0.3263, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.6582064297800339, |
|
"grad_norm": 1.6895491290871958, |
|
"learning_rate": 9.88960491024074e-07, |
|
"loss": 0.3325, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.6649746192893402, |
|
"grad_norm": 1.627812788457914, |
|
"learning_rate": 9.810758727589813e-07, |
|
"loss": 0.3291, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.6717428087986463, |
|
"grad_norm": 1.7996421239110534, |
|
"learning_rate": 9.731924312181148e-07, |
|
"loss": 0.3354, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.6785109983079525, |
|
"grad_norm": 1.8622024185022505, |
|
"learning_rate": 9.653106566030328e-07, |
|
"loss": 0.3459, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.6852791878172588, |
|
"grad_norm": 1.7964469673524814, |
|
"learning_rate": 9.574310390116418e-07, |
|
"loss": 0.3205, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.6920473773265652, |
|
"grad_norm": 1.6712003471107053, |
|
"learning_rate": 9.495540684077214e-07, |
|
"loss": 0.3368, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.6988155668358713, |
|
"grad_norm": 1.6956557822203489, |
|
"learning_rate": 9.41680234590459e-07, |
|
"loss": 0.3249, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.7055837563451777, |
|
"grad_norm": 1.694408958921992, |
|
"learning_rate": 9.338100271639931e-07, |
|
"loss": 0.3498, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.712351945854484, |
|
"grad_norm": 1.6701624264401975, |
|
"learning_rate": 9.25943935506969e-07, |
|
"loss": 0.3257, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.7191201353637902, |
|
"grad_norm": 1.7930456020095138, |
|
"learning_rate": 9.180824487421076e-07, |
|
"loss": 0.3261, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.7258883248730963, |
|
"grad_norm": 1.5441215575090625, |
|
"learning_rate": 9.102260557057935e-07, |
|
"loss": 0.336, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.7326565143824029, |
|
"grad_norm": 1.64455257154265, |
|
"learning_rate": 9.023752449176772e-07, |
|
"loss": 0.3269, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.739424703891709, |
|
"grad_norm": 1.5448960299784444, |
|
"learning_rate": 8.945305045502984e-07, |
|
"loss": 0.3288, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.7461928934010151, |
|
"grad_norm": 1.6868399015307785, |
|
"learning_rate": 8.866923223987302e-07, |
|
"loss": 0.3196, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.7529610829103215, |
|
"grad_norm": 1.6844238790169903, |
|
"learning_rate": 8.788611858502489e-07, |
|
"loss": 0.3524, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.7597292724196278, |
|
"grad_norm": 1.536238947791522, |
|
"learning_rate": 8.710375818540279e-07, |
|
"loss": 0.323, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.766497461928934, |
|
"grad_norm": 1.6594155852789063, |
|
"learning_rate": 8.632219968908555e-07, |
|
"loss": 0.3388, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.77326565143824, |
|
"grad_norm": 1.8357910067878633, |
|
"learning_rate": 8.554149169428892e-07, |
|
"loss": 0.319, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.7800338409475467, |
|
"grad_norm": 1.770060103182254, |
|
"learning_rate": 8.476168274634341e-07, |
|
"loss": 0.3533, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.7868020304568528, |
|
"grad_norm": 1.6650622145152638, |
|
"learning_rate": 8.398282133467578e-07, |
|
"loss": 0.3313, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.793570219966159, |
|
"grad_norm": 1.6934630286297077, |
|
"learning_rate": 8.320495588979377e-07, |
|
"loss": 0.3273, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.8003384094754653, |
|
"grad_norm": 1.6802792500913968, |
|
"learning_rate": 8.242813478027491e-07, |
|
"loss": 0.3425, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.8071065989847717, |
|
"grad_norm": 1.6829953200882048, |
|
"learning_rate": 8.165240630975861e-07, |
|
"loss": 0.351, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.8138747884940778, |
|
"grad_norm": 1.7159022109793864, |
|
"learning_rate": 8.087781871394279e-07, |
|
"loss": 0.3211, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.8206429780033841, |
|
"grad_norm": 1.6151668145042095, |
|
"learning_rate": 8.010442015758445e-07, |
|
"loss": 0.316, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.8274111675126905, |
|
"grad_norm": 1.6174069637779651, |
|
"learning_rate": 7.93322587315047e-07, |
|
"loss": 0.332, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.8341793570219966, |
|
"grad_norm": 1.6680877411690365, |
|
"learning_rate": 7.856138244959849e-07, |
|
"loss": 0.3243, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.8409475465313028, |
|
"grad_norm": 1.7974753943714166, |
|
"learning_rate": 7.7791839245849e-07, |
|
"loss": 0.3294, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.8477157360406091, |
|
"grad_norm": 1.646510776278098, |
|
"learning_rate": 7.702367697134701e-07, |
|
"loss": 0.3304, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.8544839255499155, |
|
"grad_norm": 1.7474734525141256, |
|
"learning_rate": 7.625694339131563e-07, |
|
"loss": 0.3588, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.8612521150592216, |
|
"grad_norm": 1.658323098173442, |
|
"learning_rate": 7.549168618213994e-07, |
|
"loss": 0.3362, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.868020304568528, |
|
"grad_norm": 1.67431452331962, |
|
"learning_rate": 7.472795292840269e-07, |
|
"loss": 0.3427, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.8747884940778343, |
|
"grad_norm": 1.6444897700367918, |
|
"learning_rate": 7.396579111992522e-07, |
|
"loss": 0.3552, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.8815566835871405, |
|
"grad_norm": 1.8283646757854843, |
|
"learning_rate": 7.32052481488147e-07, |
|
"loss": 0.3312, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.8883248730964466, |
|
"grad_norm": 1.755531433154494, |
|
"learning_rate": 7.244637130651693e-07, |
|
"loss": 0.3366, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.895093062605753, |
|
"grad_norm": 1.634727248259316, |
|
"learning_rate": 7.168920778087601e-07, |
|
"loss": 0.3323, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.9018612521150593, |
|
"grad_norm": 1.6187492591322967, |
|
"learning_rate": 7.093380465320008e-07, |
|
"loss": 0.345, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.9086294416243654, |
|
"grad_norm": 2.046018137628728, |
|
"learning_rate": 7.018020889533347e-07, |
|
"loss": 0.3316, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.9153976311336718, |
|
"grad_norm": 1.7585386765032196, |
|
"learning_rate": 6.942846736673633e-07, |
|
"loss": 0.3404, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.9221658206429781, |
|
"grad_norm": 1.600209455454873, |
|
"learning_rate": 6.867862681157066e-07, |
|
"loss": 0.3319, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.9289340101522843, |
|
"grad_norm": 1.6939356089504074, |
|
"learning_rate": 6.793073385579372e-07, |
|
"loss": 0.3353, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.9357021996615904, |
|
"grad_norm": 1.730441931386039, |
|
"learning_rate": 6.718483500425866e-07, |
|
"loss": 0.3448, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.9424703891708968, |
|
"grad_norm": 1.602246560839999, |
|
"learning_rate": 6.644097663782308e-07, |
|
"loss": 0.3207, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.9492385786802031, |
|
"grad_norm": 1.5255973835779064, |
|
"learning_rate": 6.569920501046473e-07, |
|
"loss": 0.3211, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.9560067681895092, |
|
"grad_norm": 1.787975436248423, |
|
"learning_rate": 6.495956624640558e-07, |
|
"loss": 0.3331, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.9627749576988156, |
|
"grad_norm": 1.635182796509772, |
|
"learning_rate": 6.422210633724359e-07, |
|
"loss": 0.3151, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.969543147208122, |
|
"grad_norm": 1.745570057757413, |
|
"learning_rate": 6.348687113909303e-07, |
|
"loss": 0.3166, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.976311336717428, |
|
"grad_norm": 1.666654337456338, |
|
"learning_rate": 6.275390636973315e-07, |
|
"loss": 0.3287, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.9830795262267342, |
|
"grad_norm": 1.7830502067885774, |
|
"learning_rate": 6.20232576057651e-07, |
|
"loss": 0.3374, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.9898477157360406, |
|
"grad_norm": 1.6063864832196357, |
|
"learning_rate": 6.129497027977828e-07, |
|
"loss": 0.3333, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.996615905245347, |
|
"grad_norm": 1.84518355863186, |
|
"learning_rate": 6.05690896775251e-07, |
|
"loss": 0.3338, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.003384094754653, |
|
"grad_norm": 1.996243516154583, |
|
"learning_rate": 5.984566093510508e-07, |
|
"loss": 0.3076, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.010152284263959, |
|
"grad_norm": 1.6441418715481781, |
|
"learning_rate": 5.91247290361582e-07, |
|
"loss": 0.2734, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 2.0169204737732658, |
|
"grad_norm": 1.7461011995129512, |
|
"learning_rate": 5.840633880906787e-07, |
|
"loss": 0.2483, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.023688663282572, |
|
"grad_norm": 1.780208702006572, |
|
"learning_rate": 5.769053492417341e-07, |
|
"loss": 0.2597, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"grad_norm": 1.6957870173539042, |
|
"learning_rate": 5.69773618909923e-07, |
|
"loss": 0.2534, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0372250423011846, |
|
"grad_norm": 1.7614796776092347, |
|
"learning_rate": 5.62668640554526e-07, |
|
"loss": 0.2684, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 2.0439932318104908, |
|
"grad_norm": 1.8361067674268434, |
|
"learning_rate": 5.55590855971356e-07, |
|
"loss": 0.2645, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.050761421319797, |
|
"grad_norm": 1.803442468077519, |
|
"learning_rate": 5.485407052652844e-07, |
|
"loss": 0.2637, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 2.057529610829103, |
|
"grad_norm": 1.6146644628968327, |
|
"learning_rate": 5.415186268228762e-07, |
|
"loss": 0.2657, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.0642978003384096, |
|
"grad_norm": 2.478325459542897, |
|
"learning_rate": 5.3452505728513e-07, |
|
"loss": 0.2528, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.0710659898477157, |
|
"grad_norm": 1.902805376038303, |
|
"learning_rate": 5.275604315203292e-07, |
|
"loss": 0.2625, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.077834179357022, |
|
"grad_norm": 1.707278374587984, |
|
"learning_rate": 5.206251825969973e-07, |
|
"loss": 0.2557, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 2.0846023688663284, |
|
"grad_norm": 1.7063185136076124, |
|
"learning_rate": 5.137197417569738e-07, |
|
"loss": 0.2397, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.0913705583756346, |
|
"grad_norm": 1.8210841183819555, |
|
"learning_rate": 5.068445383885961e-07, |
|
"loss": 0.2511, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 2.0981387478849407, |
|
"grad_norm": 1.8554223864028092, |
|
"learning_rate": 5.000000000000002e-07, |
|
"loss": 0.2553, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.104906937394247, |
|
"grad_norm": 1.7188069419320222, |
|
"learning_rate": 4.931865521925383e-07, |
|
"loss": 0.2454, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 2.1116751269035534, |
|
"grad_norm": 1.8040405864378724, |
|
"learning_rate": 4.864046186343139e-07, |
|
"loss": 0.2608, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.1184433164128595, |
|
"grad_norm": 1.9594461805438623, |
|
"learning_rate": 4.796546210338387e-07, |
|
"loss": 0.262, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 2.1252115059221657, |
|
"grad_norm": 1.6763587503898223, |
|
"learning_rate": 4.7293697911380846e-07, |
|
"loss": 0.2622, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.1319796954314723, |
|
"grad_norm": 1.966220531714013, |
|
"learning_rate": 4.662521105850055e-07, |
|
"loss": 0.2512, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.1387478849407784, |
|
"grad_norm": 1.755764907373717, |
|
"learning_rate": 4.596004311203242e-07, |
|
"loss": 0.249, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.1455160744500845, |
|
"grad_norm": 1.8223089125739613, |
|
"learning_rate": 4.5298235432892575e-07, |
|
"loss": 0.2465, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 2.152284263959391, |
|
"grad_norm": 1.7206718832666286, |
|
"learning_rate": 4.463982917305155e-07, |
|
"loss": 0.2458, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.1590524534686972, |
|
"grad_norm": 1.8397041856544176, |
|
"learning_rate": 4.398486527297595e-07, |
|
"loss": 0.2577, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 2.1658206429780034, |
|
"grad_norm": 1.8789732250278144, |
|
"learning_rate": 4.3333384459082247e-07, |
|
"loss": 0.2547, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.1725888324873095, |
|
"grad_norm": 1.9842491893922207, |
|
"learning_rate": 4.268542724120475e-07, |
|
"loss": 0.2407, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 2.179357021996616, |
|
"grad_norm": 1.6840880785917902, |
|
"learning_rate": 4.204103391007623e-07, |
|
"loss": 0.2453, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.186125211505922, |
|
"grad_norm": 1.7102283764854913, |
|
"learning_rate": 4.140024453482307e-07, |
|
"loss": 0.2531, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 2.1928934010152283, |
|
"grad_norm": 1.9705580867409258, |
|
"learning_rate": 4.076309896047336e-07, |
|
"loss": 0.239, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.199661590524535, |
|
"grad_norm": 1.793405155653999, |
|
"learning_rate": 4.012963680547946e-07, |
|
"loss": 0.2565, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.206429780033841, |
|
"grad_norm": 1.720247219871775, |
|
"learning_rate": 3.949989745925437e-07, |
|
"loss": 0.2675, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.213197969543147, |
|
"grad_norm": 1.6800247601578724, |
|
"learning_rate": 3.8873920079722644e-07, |
|
"loss": 0.2568, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 2.2199661590524533, |
|
"grad_norm": 1.8843811057218098, |
|
"learning_rate": 3.8251743590885256e-07, |
|
"loss": 0.2431, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.22673434856176, |
|
"grad_norm": 1.6769092970745525, |
|
"learning_rate": 3.7633406680399416e-07, |
|
"loss": 0.2513, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 2.233502538071066, |
|
"grad_norm": 1.9149891397189558, |
|
"learning_rate": 3.701894779717286e-07, |
|
"loss": 0.2441, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.240270727580372, |
|
"grad_norm": 1.8352949058879366, |
|
"learning_rate": 3.640840514897322e-07, |
|
"loss": 0.2512, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 2.2470389170896787, |
|
"grad_norm": 1.7191969563079663, |
|
"learning_rate": 3.580181670005182e-07, |
|
"loss": 0.2514, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.253807106598985, |
|
"grad_norm": 1.7862905463536183, |
|
"learning_rate": 3.519922016878356e-07, |
|
"loss": 0.2523, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 2.260575296108291, |
|
"grad_norm": 1.7759093954687228, |
|
"learning_rate": 3.460065302532108e-07, |
|
"loss": 0.2455, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.267343485617597, |
|
"grad_norm": 1.8251542509381542, |
|
"learning_rate": 3.400615248926506e-07, |
|
"loss": 0.2628, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.2741116751269037, |
|
"grad_norm": 1.8854432012171292, |
|
"learning_rate": 3.341575552734978e-07, |
|
"loss": 0.2496, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.28087986463621, |
|
"grad_norm": 1.7098687857977533, |
|
"learning_rate": 3.2829498851144577e-07, |
|
"loss": 0.2486, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 2.287648054145516, |
|
"grad_norm": 1.9407175434129038, |
|
"learning_rate": 3.224741891477095e-07, |
|
"loss": 0.254, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.2944162436548226, |
|
"grad_norm": 1.8048961105532955, |
|
"learning_rate": 3.166955191263587e-07, |
|
"loss": 0.2596, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 2.3011844331641287, |
|
"grad_norm": 1.9500522244148442, |
|
"learning_rate": 3.109593377718116e-07, |
|
"loss": 0.2674, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.307952622673435, |
|
"grad_norm": 1.7430933854279251, |
|
"learning_rate": 3.0526600176649153e-07, |
|
"loss": 0.2426, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 2.314720812182741, |
|
"grad_norm": 1.7044330031218278, |
|
"learning_rate": 2.9961586512864944e-07, |
|
"loss": 0.2545, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.3214890016920475, |
|
"grad_norm": 1.750375443426282, |
|
"learning_rate": 2.9400927919034726e-07, |
|
"loss": 0.2408, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 2.3282571912013537, |
|
"grad_norm": 1.8950517301756609, |
|
"learning_rate": 2.884465925756159e-07, |
|
"loss": 0.2489, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.33502538071066, |
|
"grad_norm": 1.8482803305522688, |
|
"learning_rate": 2.829281511787739e-07, |
|
"loss": 0.2625, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.3417935702199664, |
|
"grad_norm": 1.9547518786899865, |
|
"learning_rate": 2.774542981429214e-07, |
|
"loss": 0.2539, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.3485617597292725, |
|
"grad_norm": 1.681029190992801, |
|
"learning_rate": 2.7202537383860193e-07, |
|
"loss": 0.2569, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 2.3553299492385786, |
|
"grad_norm": 1.8386556962717924, |
|
"learning_rate": 2.6664171584263927e-07, |
|
"loss": 0.2738, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.3620981387478848, |
|
"grad_norm": 1.8580649687840483, |
|
"learning_rate": 2.613036589171443e-07, |
|
"loss": 0.2587, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 2.3688663282571913, |
|
"grad_norm": 1.8932490915132734, |
|
"learning_rate": 2.560115349887013e-07, |
|
"loss": 0.2597, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.3756345177664975, |
|
"grad_norm": 1.8291342664374226, |
|
"learning_rate": 2.5076567312772636e-07, |
|
"loss": 0.2514, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 2.3824027072758036, |
|
"grad_norm": 1.8170289718176122, |
|
"learning_rate": 2.4556639952800784e-07, |
|
"loss": 0.2508, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.38917089678511, |
|
"grad_norm": 1.770454255610108, |
|
"learning_rate": 2.4041403748642085e-07, |
|
"loss": 0.2607, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 2.3959390862944163, |
|
"grad_norm": 1.920241090922087, |
|
"learning_rate": 2.353089073828255e-07, |
|
"loss": 0.2497, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.4027072758037225, |
|
"grad_norm": 1.9199730244133009, |
|
"learning_rate": 2.302513266601449e-07, |
|
"loss": 0.2534, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.4094754653130286, |
|
"grad_norm": 1.770783110899942, |
|
"learning_rate": 2.2524160980462747e-07, |
|
"loss": 0.2577, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.416243654822335, |
|
"grad_norm": 1.779049283308491, |
|
"learning_rate": 2.2028006832628876e-07, |
|
"loss": 0.2648, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 2.4230118443316413, |
|
"grad_norm": 1.8951806130885847, |
|
"learning_rate": 2.1536701073954556e-07, |
|
"loss": 0.2552, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.4297800338409474, |
|
"grad_norm": 1.890797368455652, |
|
"learning_rate": 2.1050274254402812e-07, |
|
"loss": 0.2533, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 2.436548223350254, |
|
"grad_norm": 1.8879449556370194, |
|
"learning_rate": 2.0568756620558736e-07, |
|
"loss": 0.2621, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.44331641285956, |
|
"grad_norm": 1.9716199735140634, |
|
"learning_rate": 2.0092178113748348e-07, |
|
"loss": 0.251, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 2.4500846023688663, |
|
"grad_norm": 1.664484183169836, |
|
"learning_rate": 1.962056836817718e-07, |
|
"loss": 0.2451, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.4568527918781724, |
|
"grad_norm": 1.7827762385482862, |
|
"learning_rate": 1.9153956709087337e-07, |
|
"loss": 0.2561, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 2.463620981387479, |
|
"grad_norm": 1.776234828523817, |
|
"learning_rate": 1.8692372150934111e-07, |
|
"loss": 0.2396, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.470389170896785, |
|
"grad_norm": 1.745340346927932, |
|
"learning_rate": 1.8235843395581795e-07, |
|
"loss": 0.2494, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.4771573604060912, |
|
"grad_norm": 1.8045126818551005, |
|
"learning_rate": 1.7784398830519e-07, |
|
"loss": 0.2522, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.483925549915398, |
|
"grad_norm": 1.89151528655817, |
|
"learning_rate": 1.733806652709351e-07, |
|
"loss": 0.2528, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 2.490693739424704, |
|
"grad_norm": 1.7633235435458878, |
|
"learning_rate": 1.68968742387667e-07, |
|
"loss": 0.2518, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.49746192893401, |
|
"grad_norm": 1.7902731019901965, |
|
"learning_rate": 1.6460849399387845e-07, |
|
"loss": 0.2552, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 2.504230118443316, |
|
"grad_norm": 1.8529889195491502, |
|
"learning_rate": 1.6030019121488226e-07, |
|
"loss": 0.2588, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.510998307952623, |
|
"grad_norm": 1.7334848080126557, |
|
"learning_rate": 1.5604410194595264e-07, |
|
"loss": 0.2495, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 2.517766497461929, |
|
"grad_norm": 1.8562652968231852, |
|
"learning_rate": 1.5184049083566687e-07, |
|
"loss": 0.2468, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.524534686971235, |
|
"grad_norm": 1.7483934506844119, |
|
"learning_rate": 1.476896192694499e-07, |
|
"loss": 0.2527, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 2.5313028764805416, |
|
"grad_norm": 1.8012786807901255, |
|
"learning_rate": 1.4359174535331998e-07, |
|
"loss": 0.2495, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.5380710659898478, |
|
"grad_norm": 1.882303438003816, |
|
"learning_rate": 1.3954712389783996e-07, |
|
"loss": 0.2633, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.544839255499154, |
|
"grad_norm": 1.9004686167123348, |
|
"learning_rate": 1.3555600640227283e-07, |
|
"loss": 0.2482, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.55160744500846, |
|
"grad_norm": 1.9008963364397549, |
|
"learning_rate": 1.3161864103894361e-07, |
|
"loss": 0.2601, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 2.5583756345177666, |
|
"grad_norm": 1.7910128571435278, |
|
"learning_rate": 1.2773527263780626e-07, |
|
"loss": 0.2483, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.5651438240270727, |
|
"grad_norm": 1.7108145043264886, |
|
"learning_rate": 1.23906142671222e-07, |
|
"loss": 0.2468, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 2.571912013536379, |
|
"grad_norm": 1.8225256818804154, |
|
"learning_rate": 1.2013148923894212e-07, |
|
"loss": 0.2543, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.5786802030456855, |
|
"grad_norm": 1.79411507183842, |
|
"learning_rate": 1.1641154705330502e-07, |
|
"loss": 0.2409, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 2.5854483925549916, |
|
"grad_norm": 1.7242811103863596, |
|
"learning_rate": 1.127465474246384e-07, |
|
"loss": 0.2571, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.5922165820642977, |
|
"grad_norm": 1.9407815471081673, |
|
"learning_rate": 1.0913671824687953e-07, |
|
"loss": 0.251, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 2.598984771573604, |
|
"grad_norm": 1.6296473201174055, |
|
"learning_rate": 1.0558228398340186e-07, |
|
"loss": 0.2388, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.6057529610829104, |
|
"grad_norm": 1.6835802138496847, |
|
"learning_rate": 1.020834656530597e-07, |
|
"loss": 0.2427, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.6125211505922166, |
|
"grad_norm": 1.8446991621937052, |
|
"learning_rate": 9.86404808164426e-08, |
|
"loss": 0.24, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.6192893401015227, |
|
"grad_norm": 1.762353539890661, |
|
"learning_rate": 9.525354356235004e-08, |
|
"loss": 0.24, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 2.6260575296108293, |
|
"grad_norm": 1.9831610169369156, |
|
"learning_rate": 9.192286449447684e-08, |
|
"loss": 0.2451, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.6328257191201354, |
|
"grad_norm": 1.6135822284867207, |
|
"learning_rate": 8.864865071831829e-08, |
|
"loss": 0.2534, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 2.6395939086294415, |
|
"grad_norm": 1.6037304176671419, |
|
"learning_rate": 8.543110582829272e-08, |
|
"loss": 0.243, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.6463620981387477, |
|
"grad_norm": 1.7531920017607536, |
|
"learning_rate": 8.227042989508104e-08, |
|
"loss": 0.2482, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 2.6531302876480543, |
|
"grad_norm": 1.723522478352686, |
|
"learning_rate": 7.916681945318648e-08, |
|
"loss": 0.2477, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.6598984771573604, |
|
"grad_norm": 1.7614779368425475, |
|
"learning_rate": 7.612046748871326e-08, |
|
"loss": 0.253, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 1.887871045430256, |
|
"learning_rate": 7.313156342736738e-08, |
|
"loss": 0.2508, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.673434856175973, |
|
"grad_norm": 1.9069055836407365, |
|
"learning_rate": 7.020029312267727e-08, |
|
"loss": 0.267, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.6802030456852792, |
|
"grad_norm": 1.8939133923811016, |
|
"learning_rate": 6.732683884443735e-08, |
|
"loss": 0.2692, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.6869712351945854, |
|
"grad_norm": 1.8130887567930416, |
|
"learning_rate": 6.451137926737415e-08, |
|
"loss": 0.2527, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 2.6937394247038915, |
|
"grad_norm": 2.101117018466412, |
|
"learning_rate": 6.175408946003702e-08, |
|
"loss": 0.2497, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.700507614213198, |
|
"grad_norm": 1.654995259770582, |
|
"learning_rate": 5.90551408739105e-08, |
|
"loss": 0.2306, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 2.707275803722504, |
|
"grad_norm": 1.83930610346023, |
|
"learning_rate": 5.641470133275472e-08, |
|
"loss": 0.2573, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.7140439932318103, |
|
"grad_norm": 1.810068940701123, |
|
"learning_rate": 5.3832935022169015e-08, |
|
"loss": 0.236, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 2.720812182741117, |
|
"grad_norm": 1.8885737097550355, |
|
"learning_rate": 5.1310002479383665e-08, |
|
"loss": 0.2543, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.727580372250423, |
|
"grad_norm": 1.722305340814142, |
|
"learning_rate": 4.884606058327612e-08, |
|
"loss": 0.2472, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 2.734348561759729, |
|
"grad_norm": 1.9391080536086678, |
|
"learning_rate": 4.644126254461755e-08, |
|
"loss": 0.259, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.7411167512690353, |
|
"grad_norm": 1.6499649417235132, |
|
"learning_rate": 4.409575789654474e-08, |
|
"loss": 0.2566, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.747884940778342, |
|
"grad_norm": 1.6917223527602334, |
|
"learning_rate": 4.180969248526334e-08, |
|
"loss": 0.2626, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.754653130287648, |
|
"grad_norm": 1.598065120848905, |
|
"learning_rate": 3.958320846097685e-08, |
|
"loss": 0.2428, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 2.761421319796954, |
|
"grad_norm": 1.9249388241762717, |
|
"learning_rate": 3.7416444269050326e-08, |
|
"loss": 0.2589, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.7681895093062607, |
|
"grad_norm": 1.7249000353236292, |
|
"learning_rate": 3.530953464139919e-08, |
|
"loss": 0.2381, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 2.774957698815567, |
|
"grad_norm": 1.8216343143692146, |
|
"learning_rate": 3.3262610588113305e-08, |
|
"loss": 0.2519, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.781725888324873, |
|
"grad_norm": 1.7835249600977168, |
|
"learning_rate": 3.127579938930891e-08, |
|
"loss": 0.2498, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 2.788494077834179, |
|
"grad_norm": 1.9602449026673896, |
|
"learning_rate": 2.934922458721578e-08, |
|
"loss": 0.2609, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.7952622673434857, |
|
"grad_norm": 1.7932782220063819, |
|
"learning_rate": 2.748300597849429e-08, |
|
"loss": 0.2463, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 2.802030456852792, |
|
"grad_norm": 1.8373837478135608, |
|
"learning_rate": 2.5677259606786682e-08, |
|
"loss": 0.2587, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.808798646362098, |
|
"grad_norm": 1.7579583567771166, |
|
"learning_rate": 2.393209775550087e-08, |
|
"loss": 0.2409, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.8155668358714045, |
|
"grad_norm": 1.7193835474755472, |
|
"learning_rate": 2.224762894082921e-08, |
|
"loss": 0.2558, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.8223350253807107, |
|
"grad_norm": 1.9046287715832801, |
|
"learning_rate": 2.06239579050006e-08, |
|
"loss": 0.2551, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 2.829103214890017, |
|
"grad_norm": 2.0268943381701585, |
|
"learning_rate": 1.9061185609766995e-08, |
|
"loss": 0.2412, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.835871404399323, |
|
"grad_norm": 1.9877508850932446, |
|
"learning_rate": 1.7559409230125997e-08, |
|
"loss": 0.2554, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 2.8426395939086295, |
|
"grad_norm": 1.8920868869430731, |
|
"learning_rate": 1.6118722148278584e-08, |
|
"loss": 0.2563, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.8494077834179357, |
|
"grad_norm": 2.075343149112532, |
|
"learning_rate": 1.4739213947821737e-08, |
|
"loss": 0.2524, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 2.8561759729272422, |
|
"grad_norm": 2.055837498560825, |
|
"learning_rate": 1.342097040817891e-08, |
|
"loss": 0.2502, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.8629441624365484, |
|
"grad_norm": 1.7560367998970217, |
|
"learning_rate": 1.2164073499265403e-08, |
|
"loss": 0.2619, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 2.8697123519458545, |
|
"grad_norm": 1.892609707767642, |
|
"learning_rate": 1.0968601376391995e-08, |
|
"loss": 0.2583, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.8764805414551606, |
|
"grad_norm": 1.990394034627918, |
|
"learning_rate": 9.834628375404718e-09, |
|
"loss": 0.2644, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.8832487309644668, |
|
"grad_norm": 1.828132568376944, |
|
"learning_rate": 8.762225008062673e-09, |
|
"loss": 0.2532, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.8900169204737733, |
|
"grad_norm": 1.912512170265383, |
|
"learning_rate": 7.75145795765375e-09, |
|
"loss": 0.2561, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 2.8967851099830795, |
|
"grad_norm": 1.8627242489328315, |
|
"learning_rate": 6.80239007484773e-09, |
|
"loss": 0.2524, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.903553299492386, |
|
"grad_norm": 1.532454933071209, |
|
"learning_rate": 5.915080373788961e-09, |
|
"loss": 0.2369, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 2.910321489001692, |
|
"grad_norm": 1.799514001407587, |
|
"learning_rate": 5.089584028425742e-09, |
|
"loss": 0.2561, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.9170896785109983, |
|
"grad_norm": 1.6517806725559891, |
|
"learning_rate": 4.325952369080288e-09, |
|
"loss": 0.2491, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 2.9238578680203045, |
|
"grad_norm": 1.8524564319779193, |
|
"learning_rate": 3.6242328792567278e-09, |
|
"loss": 0.2548, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.9306260575296106, |
|
"grad_norm": 1.7195279301746678, |
|
"learning_rate": 2.984469192688577e-09, |
|
"loss": 0.2588, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 2.937394247038917, |
|
"grad_norm": 1.9136832821313436, |
|
"learning_rate": 2.4067010906254628e-09, |
|
"loss": 0.2537, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.9441624365482233, |
|
"grad_norm": 2.0082222078514844, |
|
"learning_rate": 1.8909644993593267e-09, |
|
"loss": 0.2654, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.95093062605753, |
|
"grad_norm": 1.8775828520860633, |
|
"learning_rate": 1.4372914879909881e-09, |
|
"loss": 0.2486, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.957698815566836, |
|
"grad_norm": 1.8912270182535766, |
|
"learning_rate": 1.0457102664356288e-09, |
|
"loss": 0.2471, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 2.964467005076142, |
|
"grad_norm": 1.8579123732190646, |
|
"learning_rate": 7.162451836685291e-10, |
|
"loss": 0.2366, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.9712351945854483, |
|
"grad_norm": 1.7842037052182547, |
|
"learning_rate": 4.4891672621161226e-10, |
|
"loss": 0.2467, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 2.9780033840947544, |
|
"grad_norm": 1.875106833427518, |
|
"learning_rate": 2.4374151685913057e-10, |
|
"loss": 0.2626, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.984771573604061, |
|
"grad_norm": 1.7694776387255406, |
|
"learning_rate": 1.007323136438254e-10, |
|
"loss": 0.2515, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 2.991539763113367, |
|
"grad_norm": 1.8122490703088023, |
|
"learning_rate": 1.9898009044450048e-11, |
|
"loss": 0.2448, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.996954314720812, |
|
"step": 2214, |
|
"total_flos": 865276955983872.0, |
|
"train_loss": 0.3529874208604525, |
|
"train_runtime": 40097.4129, |
|
"train_samples_per_second": 7.073, |
|
"train_steps_per_second": 0.055 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2214, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10086, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 865276955983872.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|