|
{ |
|
"best_metric": 1.2792030572891235, |
|
"best_model_checkpoint": "output/oxxxymiron/checkpoint-4807", |
|
"epoch": 19.0, |
|
"global_step": 4807, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013707387498762246, |
|
"loss": 2.6224, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00013669596372654658, |
|
"loss": 2.5403, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00013606765583958522, |
|
"loss": 2.4692, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00013519126168650572, |
|
"loss": 2.3553, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00013407000386857356, |
|
"loss": 2.2478, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001327080053786798, |
|
"loss": 2.4781, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001311102744406261, |
|
"loss": 2.2512, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00012928268609331444, |
|
"loss": 2.2522, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00012723196058755901, |
|
"loss": 2.4658, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00012496563867495734, |
|
"loss": 2.2714, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001224920538796866, |
|
"loss": 2.2569, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00011982030185518478, |
|
"loss": 2.2897, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00011696020693839531, |
|
"loss": 2.1927, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00011392228602455959, |
|
"loss": 2.1867, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00011071770989539364, |
|
"loss": 2.1953, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00010735826214284955, |
|
"loss": 2.2485, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00010385629583950372, |
|
"loss": 2.1775, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00010022468811490008, |
|
"loss": 2.2653, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.647679280487579e-05, |
|
"loss": 2.2253, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.262639134798382e-05, |
|
"loss": 2.2196, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.868764210957135e-05, |
|
"loss": 2.1454, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.467502831985555e-05, |
|
"loss": 2.1683, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.060330481743385e-05, |
|
"loss": 2.1135, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.648744379405968e-05, |
|
"loss": 2.2169, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.234257974018543e-05, |
|
"loss": 2.0504, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.818395379371482e-05, |
|
"loss": 2.1956, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 6.40268576966004e-05, |
|
"loss": 2.1556, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.9886577565364115e-05, |
|
"loss": 2.1258, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.577833768230335e-05, |
|
"loss": 2.0906, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.171724451406823e-05, |
|
"loss": 2.0648, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.7718231163460816e-05, |
|
"loss": 2.1031, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.379600245871268e-05, |
|
"loss": 2.456, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.99649808821543e-05, |
|
"loss": 2.1486, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.6239253537102286e-05, |
|
"loss": 2.1327, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.263252034797391e-05, |
|
"loss": 2.1147, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.9158043684102103e-05, |
|
"loss": 2.0989, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.5828599592490882e-05, |
|
"loss": 2.082, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.265643081883308e-05, |
|
"loss": 2.0394, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.9653201789538038e-05, |
|
"loss": 2.0346, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.682995572030529e-05, |
|
"loss": 2.1082, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.419707400896067e-05, |
|
"loss": 2.1204, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1764238061872434e-05, |
|
"loss": 1.976, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.540393694315812e-06, |
|
"loss": 1.9811, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.533718235689727e-06, |
|
"loss": 2.0259, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.751590460543944e-06, |
|
"loss": 2.0717, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.2005634559834046e-06, |
|
"loss": 2.0042, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8863405252193965e-06, |
|
"loss": 2.0301, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.8137542158731914e-06, |
|
"loss": 2.0554, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.867485501471999e-07, |
|
"loss": 2.0167, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.083645222054329e-07, |
|
"loss": 2.0252, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.072891609113784e-08, |
|
"loss": 2.0128, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.046485299251069e-09, |
|
"loss": 2.1545, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.815955227603999e-07, |
|
"loss": 2.1436, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.097268375260298e-07, |
|
"loss": 2.055, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2878661419176275e-06, |
|
"loss": 2.0446, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.2135198403619775e-06, |
|
"loss": 2.0962, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.3832841986266328e-06, |
|
"loss": 1.9858, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.7928578597388414e-06, |
|
"loss": 2.0331, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.437057660565834e-06, |
|
"loss": 2.0861, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.309837690896773e-06, |
|
"loss": 2.0373, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.0404311524944352e-05, |
|
"loss": 2.0409, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.271277754351776e-05, |
|
"loss": 2.1014, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.5226747253755048e-05, |
|
"loss": 2.0414, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.79369765022795e-05, |
|
"loss": 2.1258, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.083349946700612e-05, |
|
"loss": 2.0382, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.3905665302606086e-05, |
|
"loss": 1.9499, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.7142177304879985e-05, |
|
"loss": 2.1053, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.05311344500276e-05, |
|
"loss": 2.0373, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.4060075156069894e-05, |
|
"loss": 1.9227, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.771602310550759e-05, |
|
"loss": 2.0504, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.148553496072039e-05, |
|
"loss": 2.0583, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.5354749796652995e-05, |
|
"loss": 2.0166, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.930944006901758e-05, |
|
"loss": 2.0524, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.333506393059682e-05, |
|
"loss": 2.0846, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.741681870327513e-05, |
|
"loss": 2.0408, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 6.153969530917408e-05, |
|
"loss": 1.9538, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 6.56885334607442e-05, |
|
"loss": 2.1089, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.984807740687121e-05, |
|
"loss": 2.0182, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.400303203001308e-05, |
|
"loss": 1.9797, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.813811908809188e-05, |
|
"loss": 2.0459, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.22381333943327e-05, |
|
"loss": 2.0528, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.628799872846947e-05, |
|
"loss": 2.0164, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.027282327372695e-05, |
|
"loss": 1.973, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.417795437572915e-05, |
|
"loss": 2.1012, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.798903242198118e-05, |
|
"loss": 1.9342, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00010169204364380207, |
|
"loss": 1.9812, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001052733716465509, |
|
"loss": 2.1037, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001087198474786628, |
|
"loss": 2.0833, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00011201879805538593, |
|
"loss": 2.088, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011515809275916009, |
|
"loss": 2.0211, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00011812618804528036, |
|
"loss": 1.9749, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012091216988882859, |
|
"loss": 2.1269, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001235057939167871, |
|
"loss": 2.1407, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001258975230777674, |
|
"loss": 1.9911, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00012807856271083559, |
|
"loss": 1.8852, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00013004089288448385, |
|
"loss": 2.084, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00013177729788683344, |
|
"loss": 2.0223, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00013328139275863037, |
|
"loss": 1.8615, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00013454764677146868, |
|
"loss": 1.9669, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00013557140376490993, |
|
"loss": 1.997, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00013634889926771582, |
|
"loss": 2.0132, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00013687727434023874, |
|
"loss": 2.1001, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00013715458608706872, |
|
"loss": 1.9605, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00013717981480128154, |
|
"loss": 1.9686, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0001369528677140173, |
|
"loss": 1.9054, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00013647457933560234, |
|
"loss": 1.9621, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00013574670838695924, |
|
"loss": 1.9001, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00013477193133258972, |
|
"loss": 1.9553, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0001335538325389091, |
|
"loss": 1.9963, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00013209689109412243, |
|
"loss": 1.9554, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00013040646433810595, |
|
"loss": 2.0022, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00012848876816285752, |
|
"loss": 1.8702, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00012635085415595263, |
|
"loss": 1.9121, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00012400058367105258, |
|
"loss": 2.0007, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0001214465989208104, |
|
"loss": 2.0254, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00011869829119846914, |
|
"loss": 1.9571, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00011576576634500534, |
|
"loss": 1.9448, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00011265980758879933, |
|
"loss": 2.0852, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00010939183589447406, |
|
"loss": 1.9757, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00010597386796670587, |
|
"loss": 2.0051, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001024184720634304, |
|
"loss": 2.015, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 9.873872178092492e-05, |
|
"loss": 1.9478, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 9.494814798070336e-05, |
|
"loss": 1.8666, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.106068903499513e-05, |
|
"loss": 1.9268, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.709063957376078e-05, |
|
"loss": 1.8729, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 8.305259792170682e-05, |
|
"loss": 1.9046, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 7.89614124185811e-05, |
|
"loss": 1.8255, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.48321268201335e-05, |
|
"loss": 1.9063, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.06799249805101e-05, |
|
"loss": 1.9487, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.652007501948994e-05, |
|
"loss": 1.9612, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.236787317986654e-05, |
|
"loss": 1.829, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.8238587581418946e-05, |
|
"loss": 1.8833, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 5.414740207829316e-05, |
|
"loss": 1.9743, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 5.010936042623931e-05, |
|
"loss": 1.8592, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.6139310965004967e-05, |
|
"loss": 1.8143, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 4.225185201929667e-05, |
|
"loss": 1.9597, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.8461278219075114e-05, |
|
"loss": 1.9673, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.478152793656968e-05, |
|
"loss": 1.9703, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.1226132033294165e-05, |
|
"loss": 1.8479, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.7808164105525978e-05, |
|
"loss": 1.8916, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.454019241120065e-05, |
|
"loss": 1.9233, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.1434233654994646e-05, |
|
"loss": 1.8525, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.850170880153093e-05, |
|
"loss": 1.9783, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.5753401079189615e-05, |
|
"loss": 1.8898, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3199416328947464e-05, |
|
"loss": 1.9018, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.0849145844047363e-05, |
|
"loss": 1.9754, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 8.711231837142545e-06, |
|
"loss": 1.9263, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 6.793535661894062e-06, |
|
"loss": 1.836, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.103108905877591e-06, |
|
"loss": 1.9056, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.6461674610908713e-06, |
|
"loss": 1.9471, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.4280686674102973e-06, |
|
"loss": 1.7961, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.453291613040777e-06, |
|
"loss": 1.9143, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.254206643976737e-07, |
|
"loss": 1.9633, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.4713228598268823e-07, |
|
"loss": 1.9096, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.018519871846962e-08, |
|
"loss": 1.7972, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.5413912931266996e-08, |
|
"loss": 1.8293, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.2272565976124403e-07, |
|
"loss": 1.9095, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.511007322841792e-07, |
|
"loss": 1.8653, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.628596235090069e-06, |
|
"loss": 1.7876, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.652353228531267e-06, |
|
"loss": 1.8415, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.918607241369593e-06, |
|
"loss": 1.9383, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 5.4227021131665505e-06, |
|
"loss": 1.8484, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 7.159107115516193e-06, |
|
"loss": 1.8539, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 9.121437289164363e-06, |
|
"loss": 1.8604, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.1302476922232561e-05, |
|
"loss": 1.857, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.3694206083212835e-05, |
|
"loss": 1.8542, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.6287830111171433e-05, |
|
"loss": 1.8419, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.90738119547196e-05, |
|
"loss": 1.853, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.204190724083989e-05, |
|
"loss": 1.7492, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.5181201944614038e-05, |
|
"loss": 1.8399, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.8480152521337186e-05, |
|
"loss": 1.8052, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.192662835344901e-05, |
|
"loss": 1.9257, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.550795635619789e-05, |
|
"loss": 1.8094, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.921096757801878e-05, |
|
"loss": 1.806, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.302204562427086e-05, |
|
"loss": 1.8443, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.692717672627302e-05, |
|
"loss": 1.892, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.091200127153043e-05, |
|
"loss": 1.8859, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 5.496186660566721e-05, |
|
"loss": 1.8386, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 5.906188091190809e-05, |
|
"loss": 1.7457, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 6.3196967969987e-05, |
|
"loss": 1.8338, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 6.73519225931287e-05, |
|
"loss": 1.9389, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.151146653925576e-05, |
|
"loss": 1.8629, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 7.566030469082582e-05, |
|
"loss": 1.8406, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 7.978318129672488e-05, |
|
"loss": 1.8205, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 8.386493606940314e-05, |
|
"loss": 1.9223, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 8.789055993098239e-05, |
|
"loss": 1.9075, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.184525020334701e-05, |
|
"loss": 1.9361, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.571446503927958e-05, |
|
"loss": 1.9117, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 9.948397689449231e-05, |
|
"loss": 1.8154, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00010313992484393007, |
|
"loss": 1.7656, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.00010666886554997237, |
|
"loss": 1.8084, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.00011005782269512003, |
|
"loss": 1.8921, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00011329433469739388, |
|
"loss": 1.8615, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00011636650053299383, |
|
"loss": 1.9111, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.00011926302349772043, |
|
"loss": 1.8464, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00012197325274624493, |
|
"loss": 1.8594, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00012448722245648227, |
|
"loss": 1.9001, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.00012679568847505558, |
|
"loss": 1.9249, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.00012889016230910322, |
|
"loss": 1.8819, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.00013076294233943414, |
|
"loss": 1.8397, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00013240714214026114, |
|
"loss": 1.8645, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.00013381671580137337, |
|
"loss": 1.8523, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00013498648015963801, |
|
"loss": 1.8243, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00013591213385808236, |
|
"loss": 1.7541, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.00013659027316247397, |
|
"loss": 1.8299, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.00013701840447723958, |
|
"loss": 1.8042, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00013719495351470075, |
|
"loss": 1.8895, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00013711927108390887, |
|
"loss": 1.7727, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00013679163547779458, |
|
"loss": 1.7147, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0001362132514498528, |
|
"loss": 1.7369, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00013538624578412684, |
|
"loss": 1.8461, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00013431365947478064, |
|
"loss": 1.8544, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00013299943654401656, |
|
"loss": 1.7556, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00013144840953945602, |
|
"loss": 1.8978, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00012966628176431033, |
|
"loss": 1.7574, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00012765960630568425, |
|
"loss": 1.8181, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00012543576193812755, |
|
"loss": 1.8873, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.00012300292599103934, |
|
"loss": 1.8158, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00012037004427969473, |
|
"loss": 1.7751, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00011754679821046194, |
|
"loss": 1.8045, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00011454356918116694, |
|
"loss": 1.8204, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00011137140040750914, |
|
"loss": 1.7895, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00010804195631589795, |
|
"loss": 1.8798, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0001045674796520261, |
|
"loss": 1.8371, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00010096074646289774, |
|
"loss": 1.8113, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 9.72350191178458e-05, |
|
"loss": 1.7679, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 9.340399754128733e-05, |
|
"loss": 1.8109, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.948176883653924e-05, |
|
"loss": 1.791, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 8.548275548593188e-05, |
|
"loss": 1.7472, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 8.14216623176968e-05, |
|
"loss": 1.7982, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.731342243463583e-05, |
|
"loss": 1.8122, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 7.317314230339972e-05, |
|
"loss": 1.8021, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 6.901604620628534e-05, |
|
"loss": 1.767, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 6.485742025981452e-05, |
|
"loss": 1.773, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 6.0712556205940305e-05, |
|
"loss": 1.7419, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 5.6596695182566174e-05, |
|
"loss": 1.7812, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 5.2524971680144414e-05, |
|
"loss": 1.7998, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.8512357890428636e-05, |
|
"loss": 1.8107, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 4.4573608652016233e-05, |
|
"loss": 1.8343, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 4.0723207195124294e-05, |
|
"loss": 1.7872, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.697531188509992e-05, |
|
"loss": 1.8365, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.334370416049629e-05, |
|
"loss": 1.7498, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.9841737857150516e-05, |
|
"loss": 1.7864, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.6482290104606358e-05, |
|
"loss": 1.7891, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.327771397544045e-05, |
|
"loss": 1.821, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.0239793061604753e-05, |
|
"loss": 1.6957, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.7379698144815295e-05, |
|
"loss": 1.7592, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.4707946120313422e-05, |
|
"loss": 1.7464, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.2234361325042733e-05, |
|
"loss": 1.7876, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 9.968039412441069e-06, |
|
"loss": 1.7211, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 7.917313906685515e-06, |
|
"loss": 1.7474, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 6.089725559373884e-06, |
|
"loss": 1.7813, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 4.491994621320209e-06, |
|
"loss": 1.7099, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 3.1299961314264275e-06, |
|
"loss": 1.697, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.0087383134942665e-06, |
|
"loss": 1.8274, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.1323441604147912e-06, |
|
"loss": 1.8383, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 5.040362734534388e-07, |
|
"loss": 1.8429, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 1.2612501237755182e-07, |
|
"loss": 1.7653, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.7624, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 1.261250123775442e-07, |
|
"loss": 1.6889, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 5.040362734534236e-07, |
|
"loss": 1.6715, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.132344160414776e-06, |
|
"loss": 1.7252, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.0087383134942512e-06, |
|
"loss": 1.7355, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 3.1299961314264046e-06, |
|
"loss": 1.8099, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 4.491994621320179e-06, |
|
"loss": 1.7203, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 6.089725559373899e-06, |
|
"loss": 1.7262, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 7.917313906685478e-06, |
|
"loss": 1.721, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 9.968039412440962e-06, |
|
"loss": 1.6592, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.2234361325042687e-05, |
|
"loss": 1.6871, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 1.4707946120313293e-05, |
|
"loss": 1.7319, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 1.737969814481516e-05, |
|
"loss": 1.7102, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.0239793061604692e-05, |
|
"loss": 1.6969, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 2.3277713975440297e-05, |
|
"loss": 1.6891, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.648229010460629e-05, |
|
"loss": 1.7584, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 2.9841737857150448e-05, |
|
"loss": 1.7074, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3.334370416049612e-05, |
|
"loss": 1.6984, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 3.697531188509984e-05, |
|
"loss": 1.6734, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 4.072320719512421e-05, |
|
"loss": 1.6728, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 4.457360865201626e-05, |
|
"loss": 1.7748, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 4.8512357890428555e-05, |
|
"loss": 1.6899, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 5.252497168014445e-05, |
|
"loss": 1.6741, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 5.659669518256621e-05, |
|
"loss": 1.6965, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 6.071255620594022e-05, |
|
"loss": 1.6641, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 6.485742025981456e-05, |
|
"loss": 1.7494, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 6.901604620628525e-05, |
|
"loss": 1.7469, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 7.31731423033995e-05, |
|
"loss": 1.7503, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 7.731342243463585e-05, |
|
"loss": 1.7359, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 8.14216623176967e-05, |
|
"loss": 1.7196, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 8.548275548593167e-05, |
|
"loss": 1.7352, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.948176883653917e-05, |
|
"loss": 1.7017, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 9.340399754128714e-05, |
|
"loss": 1.7402, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 9.723501911784583e-05, |
|
"loss": 1.7463, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00010096074646289766, |
|
"loss": 1.6842, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.00010456747965202592, |
|
"loss": 1.7311, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.00010804195631589798, |
|
"loss": 1.7528, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.00011137140040750908, |
|
"loss": 1.7338, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.00011454356918116697, |
|
"loss": 1.8454, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.00011754679821046187, |
|
"loss": 1.6556, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.00012037004427969469, |
|
"loss": 1.7088, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00012300292599103937, |
|
"loss": 1.7949, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0001254357619381275, |
|
"loss": 1.7168, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00012765960630568417, |
|
"loss": 1.8008, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00012966628176431028, |
|
"loss": 1.7668, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.000131448409539456, |
|
"loss": 1.8123, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00013299943654401658, |
|
"loss": 1.6967, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0001343136594747806, |
|
"loss": 1.7767, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00013538624578412676, |
|
"loss": 1.8144, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0001362132514498528, |
|
"loss": 1.7673, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00013679163547779458, |
|
"loss": 1.7239, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00013711927108390887, |
|
"loss": 1.7237, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00013719495351470075, |
|
"loss": 1.8186, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0001370184044772396, |
|
"loss": 1.7571, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00013659027316247397, |
|
"loss": 1.6908, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00013591213385808238, |
|
"loss": 1.6378, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0001349864801596381, |
|
"loss": 1.763, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00013381671580137334, |
|
"loss": 1.8114, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00013240714214026117, |
|
"loss": 1.7691, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00013076294233943417, |
|
"loss": 1.7388, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0001288901623091032, |
|
"loss": 1.6458, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00012679568847505571, |
|
"loss": 1.6852, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00012448722245648225, |
|
"loss": 1.7267, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00012197325274624507, |
|
"loss": 1.7517, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00011926302349772057, |
|
"loss": 1.7343, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.0001163665005329939, |
|
"loss": 1.6811, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00011329433469739406, |
|
"loss": 1.7056, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00011005782269511991, |
|
"loss": 1.7447, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00010666886554997244, |
|
"loss": 1.6661, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00010313992484393024, |
|
"loss": 1.723, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 9.948397689449228e-05, |
|
"loss": 1.6887, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 9.571446503927964e-05, |
|
"loss": 1.6767, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 9.184525020334699e-05, |
|
"loss": 1.6593, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 8.789055993098258e-05, |
|
"loss": 1.6807, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 8.386493606940322e-05, |
|
"loss": 1.7043, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 7.978318129672484e-05, |
|
"loss": 1.7188, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 7.566030469082603e-05, |
|
"loss": 1.6494, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 7.151146653925584e-05, |
|
"loss": 1.6752, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 6.735192259312878e-05, |
|
"loss": 1.6569, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 6.319696796998709e-05, |
|
"loss": 1.6728, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 5.906188091190817e-05, |
|
"loss": 1.6875, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 5.4961866605667284e-05, |
|
"loss": 1.6511, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 5.091200127153063e-05, |
|
"loss": 1.6906, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 4.6927176726273094e-05, |
|
"loss": 1.6586, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 4.302204562427082e-05, |
|
"loss": 1.6804, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.921096757801896e-05, |
|
"loss": 1.6353, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 3.550795635619796e-05, |
|
"loss": 1.6895, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.192662835344908e-05, |
|
"loss": 1.7331, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 2.8480152521337155e-05, |
|
"loss": 1.6892, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.51812019446141e-05, |
|
"loss": 1.6841, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 2.2041907240840133e-05, |
|
"loss": 1.7096, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.907381195471957e-05, |
|
"loss": 1.7284, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.6287830111171488e-05, |
|
"loss": 1.6272, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.3694206083212888e-05, |
|
"loss": 1.5783, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.1302476922232546e-05, |
|
"loss": 1.5607, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 9.121437289164463e-06, |
|
"loss": 1.6762, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 7.159107115516178e-06, |
|
"loss": 1.6488, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 5.422702113166627e-06, |
|
"loss": 1.6201, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 3.918607241369662e-06, |
|
"loss": 1.7022, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 2.65235322853129e-06, |
|
"loss": 1.632, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1.6285962350901147e-06, |
|
"loss": 1.6661, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 8.511007322841488e-07, |
|
"loss": 1.6079, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.2272565976125165e-07, |
|
"loss": 1.6758, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 4.541391293127461e-08, |
|
"loss": 1.6987, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 2.018519871846962e-08, |
|
"loss": 1.5689, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 2.471322859826806e-07, |
|
"loss": 1.5376, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 7.254206643976737e-07, |
|
"loss": 1.6088, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.4532916130407314e-06, |
|
"loss": 1.6132, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 2.4280686674102744e-06, |
|
"loss": 1.6823, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 3.6461674610908866e-06, |
|
"loss": 1.5818, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 5.103108905877507e-06, |
|
"loss": 1.5745, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 6.793535661894024e-06, |
|
"loss": 1.5783, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 8.7112318371425e-06, |
|
"loss": 1.6224, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1.0849145844047318e-05, |
|
"loss": 1.6016, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.3199416328947412e-05, |
|
"loss": 1.5826, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1.5753401079189635e-05, |
|
"loss": 1.6424, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.8501708801530793e-05, |
|
"loss": 1.5944, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 2.1434233654994585e-05, |
|
"loss": 1.5702, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 2.454019241120068e-05, |
|
"loss": 1.5819, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 2.780816410552581e-05, |
|
"loss": 1.5461, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.12261320332941e-05, |
|
"loss": 1.6276, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 3.4781527936569615e-05, |
|
"loss": 1.6333, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 3.8461278219075155e-05, |
|
"loss": 1.5744, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 4.2251852019296586e-05, |
|
"loss": 1.601, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.6139310965004655e-05, |
|
"loss": 1.4994, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 5.010936042623934e-05, |
|
"loss": 1.5667, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 5.4147402078293086e-05, |
|
"loss": 1.6055, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 5.823858758141886e-05, |
|
"loss": 1.6403, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 6.236787317986658e-05, |
|
"loss": 1.5103, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 6.65200750194898e-05, |
|
"loss": 1.5918, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 7.067992498051008e-05, |
|
"loss": 1.5905, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 7.48321268201333e-05, |
|
"loss": 1.6157, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 7.896141241858101e-05, |
|
"loss": 1.5963, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 8.305259792170679e-05, |
|
"loss": 1.586, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 8.709063957376054e-05, |
|
"loss": 1.6324, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.106068903499522e-05, |
|
"loss": 1.6232, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 9.494814798070329e-05, |
|
"loss": 1.6404, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 9.873872178092473e-05, |
|
"loss": 1.6215, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.00010241847206343028, |
|
"loss": 1.617, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0001059738679667058, |
|
"loss": 1.6565, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0001093918358944741, |
|
"loss": 1.7342, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00011265980758879924, |
|
"loss": 1.6063, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00011576576634500532, |
|
"loss": 1.6993, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00011869829119846911, |
|
"loss": 1.6355, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00012144659892081027, |
|
"loss": 1.6087, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00012400058367105252, |
|
"loss": 1.6668, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.00012635085415595263, |
|
"loss": 1.7275, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00012848876816285744, |
|
"loss": 1.6637, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.00013040646433810593, |
|
"loss": 1.6713, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.00013209689109412246, |
|
"loss": 1.6358, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00013355383253890908, |
|
"loss": 1.6572, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.0001347719313325897, |
|
"loss": 1.6781, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.00013574670838695924, |
|
"loss": 1.6401, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.0001364745793356023, |
|
"loss": 1.673, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.0001369528677140173, |
|
"loss": 1.7179, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.00013717981480128154, |
|
"loss": 1.7015, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00013715458608706872, |
|
"loss": 1.6596, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.00013687727434023877, |
|
"loss": 1.6462, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.00013634889926771588, |
|
"loss": 1.6041, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.00013557140376490998, |
|
"loss": 1.5571, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.00013454764677146882, |
|
"loss": 1.5541, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.00013328139275863037, |
|
"loss": 1.6773, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 0.00013177729788683341, |
|
"loss": 1.6003, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.00013004089288448387, |
|
"loss": 1.5288, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0001280785627108356, |
|
"loss": 1.6059, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.00012589752307776752, |
|
"loss": 1.6265, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0001235057939167872, |
|
"loss": 1.6372, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.00012091216988882845, |
|
"loss": 1.7324, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.00011812618804528034, |
|
"loss": 1.5938, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.00011515809275915997, |
|
"loss": 1.5805, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 0.00011201879805538599, |
|
"loss": 1.62, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.00010871984747866294, |
|
"loss": 1.5884, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.00010527337164655102, |
|
"loss": 1.576, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.00010169204364380236, |
|
"loss": 1.5746, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 9.798903242198116e-05, |
|
"loss": 1.6259, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 9.417795437572906e-05, |
|
"loss": 1.6047, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 9.027282327372703e-05, |
|
"loss": 1.5615, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 8.628799872846948e-05, |
|
"loss": 1.6294, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 8.223813339433283e-05, |
|
"loss": 1.5485, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 7.813811908809194e-05, |
|
"loss": 1.5291, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 7.400303203001327e-05, |
|
"loss": 1.6178, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.98480774068711e-05, |
|
"loss": 1.6335, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 6.568853346074429e-05, |
|
"loss": 1.5607, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 6.15396953091741e-05, |
|
"loss": 1.552, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 5.741681870327528e-05, |
|
"loss": 1.6358, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 5.33350639305969e-05, |
|
"loss": 1.6499, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 4.930944006901777e-05, |
|
"loss": 1.5632, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.535474979665314e-05, |
|
"loss": 1.5825, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 4.148553496072023e-05, |
|
"loss": 1.6277, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 3.7716023105507615e-05, |
|
"loss": 1.5497, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 3.406007515606987e-05, |
|
"loss": 1.5159, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 3.0531134450027666e-05, |
|
"loss": 1.5683, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 2.7142177304880198e-05, |
|
"loss": 1.5193, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.390566530260624e-05, |
|
"loss": 1.6145, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 2.0833499467006378e-05, |
|
"loss": 1.5854, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 1.7936976502279525e-05, |
|
"loss": 1.5426, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 1.5226747253755011e-05, |
|
"loss": 1.5862, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 1.2712777543517822e-05, |
|
"loss": 1.5478, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 1.0404311524944368e-05, |
|
"loss": 1.6329, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 8.309837690896873e-06, |
|
"loss": 1.535, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 6.43705766056588e-06, |
|
"loss": 1.5849, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 4.792857859738948e-06, |
|
"loss": 1.6253, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 3.3832841986266175e-06, |
|
"loss": 1.537, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 2.213519840361947e-06, |
|
"loss": 1.5028, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 1.2878661419176351e-06, |
|
"loss": 1.5904, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 6.097268375260679e-07, |
|
"loss": 1.6009, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 1.8159552276040752e-07, |
|
"loss": 1.5465, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.046485299251069e-09, |
|
"loss": 1.5575, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 8.072891609113784e-08, |
|
"loss": 1.5791, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.0836452220544814e-07, |
|
"loss": 1.4865, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 9.867485501471922e-07, |
|
"loss": 1.5316, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.813754215873199e-06, |
|
"loss": 1.5403, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 2.8863405252193584e-06, |
|
"loss": 1.4183, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 4.200563455983382e-06, |
|
"loss": 1.5547, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 5.75159046054386e-06, |
|
"loss": 1.5414, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.5337182356897725e-06, |
|
"loss": 1.5445, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 9.540393694315775e-06, |
|
"loss": 1.4539, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 1.1764238061872442e-05, |
|
"loss": 1.4992, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 1.4197074008960564e-05, |
|
"loss": 1.5203, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 1.6829955720305234e-05, |
|
"loss": 1.4989, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 1.965320178953787e-05, |
|
"loss": 1.5128, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.265643081883295e-05, |
|
"loss": 1.5033, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 2.582859959249101e-05, |
|
"loss": 1.4938, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.915804368410211e-05, |
|
"loss": 1.5157, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.2632520347973973e-05, |
|
"loss": 1.4103, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 3.623925353710222e-05, |
|
"loss": 1.524, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.996498088215406e-05, |
|
"loss": 1.5389, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 4.3796002458712527e-05, |
|
"loss": 1.5645, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 4.7718231163460484e-05, |
|
"loss": 1.5511, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 5.1717244514068206e-05, |
|
"loss": 1.5406, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 5.57783376823034e-05, |
|
"loss": 1.567, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 5.988657756536402e-05, |
|
"loss": 1.602, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 6.402685769660036e-05, |
|
"loss": 1.4789, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 6.818395379371463e-05, |
|
"loss": 1.5673, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 7.234257974018531e-05, |
|
"loss": 1.5527, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 7.64874437940594e-05, |
|
"loss": 1.4721, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 8.060330481743391e-05, |
|
"loss": 1.4447, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 8.467502831985544e-05, |
|
"loss": 1.5768, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 8.868764210957132e-05, |
|
"loss": 1.4808, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 9.262639134798362e-05, |
|
"loss": 1.4197, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 9.647679280487567e-05, |
|
"loss": 1.6109, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 0.00010022468811489983, |
|
"loss": 1.5907, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00010385629583950378, |
|
"loss": 1.5902, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 0.00010735826214284965, |
|
"loss": 1.6053, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00011071770989539361, |
|
"loss": 1.552, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.00011392228602455961, |
|
"loss": 1.5787, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.00011696020693839523, |
|
"loss": 1.4997, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 0.00011982030185518476, |
|
"loss": 1.6354, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.00012249205387968647, |
|
"loss": 1.586, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.0001249656386749574, |
|
"loss": 1.511, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.000127231960587559, |
|
"loss": 1.5002, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.00012928268609331444, |
|
"loss": 1.5829, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.00013111027444062605, |
|
"loss": 1.6407, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.00013270800537867978, |
|
"loss": 1.5058, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00013407000386857348, |
|
"loss": 1.4854, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0001351912616865057, |
|
"loss": 1.4912, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.00013606765583958525, |
|
"loss": 1.5218, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.00013669596372654658, |
|
"loss": 1.5828, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 0.00013707387498762246, |
|
"loss": 1.5816, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.4944, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 0.00013707387498762246, |
|
"loss": 1.5296, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 0.0001366959637265466, |
|
"loss": 1.5509, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.00013606765583958527, |
|
"loss": 1.5305, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.00013519126168650574, |
|
"loss": 1.4872, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.00013407000386857353, |
|
"loss": 1.544, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.00013270800537867983, |
|
"loss": 1.5421, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.0001311102744406261, |
|
"loss": 1.5468, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 0.00012928268609331455, |
|
"loss": 1.5529, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00012723196058755907, |
|
"loss": 1.5357, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 0.00012496563867495748, |
|
"loss": 1.5077, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.00012249205387968658, |
|
"loss": 1.6099, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 0.00011982030185518488, |
|
"loss": 1.5701, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 0.0001169602069383955, |
|
"loss": 1.5479, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.00011392228602455956, |
|
"loss": 1.5486, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.00011071770989539373, |
|
"loss": 1.5367, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 0.0001073582621428498, |
|
"loss": 1.5361, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.00010385629583950413, |
|
"loss": 1.3755, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.00010022468811490019, |
|
"loss": 1.464, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 9.64767928048756e-05, |
|
"loss": 1.5142, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 9.262639134798378e-05, |
|
"loss": 1.5207, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 8.868764210957149e-05, |
|
"loss": 1.5357, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 8.467502831985583e-05, |
|
"loss": 1.454, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 8.06033048174343e-05, |
|
"loss": 1.5096, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 7.648744379405981e-05, |
|
"loss": 1.5628, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 7.234257974018524e-05, |
|
"loss": 1.468, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 6.818395379371479e-05, |
|
"loss": 1.4858, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 6.402685769660054e-05, |
|
"loss": 1.4885, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 5.988657756536443e-05, |
|
"loss": 1.4577, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 5.577833768230333e-05, |
|
"loss": 1.5513, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 5.171724451406837e-05, |
|
"loss": 1.4957, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 4.7718231163460647e-05, |
|
"loss": 1.5075, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 4.379600245871268e-05, |
|
"loss": 1.448, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 3.996498088215443e-05, |
|
"loss": 1.53, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 3.623925353710258e-05, |
|
"loss": 1.5454, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 3.2632520347973906e-05, |
|
"loss": 1.5139, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 2.915804368410225e-05, |
|
"loss": 1.5848, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 2.5828599592491143e-05, |
|
"loss": 1.514, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 2.2656430818833073e-05, |
|
"loss": 1.4666, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 1.965320178953816e-05, |
|
"loss": 1.4546, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 1.682995572030518e-05, |
|
"loss": 1.4552, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 1.4197074008960664e-05, |
|
"loss": 1.482, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 1.1764238061872534e-05, |
|
"loss": 1.5422, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 9.54039369431598e-06, |
|
"loss": 1.492, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 7.5337182356897344e-06, |
|
"loss": 1.4219, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 5.75159046054383e-06, |
|
"loss": 1.4972, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 4.200563455983359e-06, |
|
"loss": 1.4525, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 2.886340525219404e-06, |
|
"loss": 1.4337, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 1.8137542158732371e-06, |
|
"loss": 1.5066, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 9.867485501472609e-07, |
|
"loss": 1.4053, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 4.083645222054405e-07, |
|
"loss": 1.4861, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 8.072891609114545e-08, |
|
"loss": 1.4625, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 5.046485299251069e-09, |
|
"loss": 1.4993, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 1.8159552276039227e-07, |
|
"loss": 1.4475, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 6.097268375260069e-07, |
|
"loss": 1.4133, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 1.2878661419176504e-06, |
|
"loss": 1.4672, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 2.21351984036197e-06, |
|
"loss": 1.4601, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 3.383284198626564e-06, |
|
"loss": 1.4559, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 4.792857859738887e-06, |
|
"loss": 1.3926, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 6.437057660565811e-06, |
|
"loss": 1.3658, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 8.309837690896675e-06, |
|
"loss": 1.3814, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 1.0404311524944405e-05, |
|
"loss": 1.4756, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 1.271277754351773e-05, |
|
"loss": 1.3856, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 1.5226747253754904e-05, |
|
"loss": 1.3775, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 1.7936976502279244e-05, |
|
"loss": 1.4091, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 2.083349946700608e-05, |
|
"loss": 1.4543, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 2.39056653026063e-05, |
|
"loss": 1.4107, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 2.714217730488006e-05, |
|
"loss": 1.4381, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 3.053113445002753e-05, |
|
"loss": 1.4025, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 3.4060075156069725e-05, |
|
"loss": 1.3656, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 3.771602310550724e-05, |
|
"loss": 1.4167, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 4.148553496072031e-05, |
|
"loss": 1.4686, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 4.5354749796653205e-05, |
|
"loss": 1.3998, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 4.9309440069017615e-05, |
|
"loss": 1.4714, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 5.333506393059674e-05, |
|
"loss": 1.3963, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 5.7416818703274866e-05, |
|
"loss": 1.5068, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 6.153969530917418e-05, |
|
"loss": 1.4338, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 6.568853346074412e-05, |
|
"loss": 1.3606, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 6.984807740687094e-05, |
|
"loss": 1.4016, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 7.400303203001311e-05, |
|
"loss": 1.4269, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 7.813811908809178e-05, |
|
"loss": 1.44, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 8.223813339433243e-05, |
|
"loss": 1.4977, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 8.628799872846956e-05, |
|
"loss": 1.436, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 9.027282327372687e-05, |
|
"loss": 1.513, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 9.417795437572891e-05, |
|
"loss": 1.4691, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 9.798903242198079e-05, |
|
"loss": 1.5133, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.000101692043643802, |
|
"loss": 1.5047, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 0.00010527337164655109, |
|
"loss": 1.4302, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 0.00010871984747866282, |
|
"loss": 1.5886, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 0.00011201879805538586, |
|
"loss": 1.4925, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 0.00011515809275915985, |
|
"loss": 1.4445, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 0.00011812618804528006, |
|
"loss": 1.4577, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 0.00012091216988882848, |
|
"loss": 1.4792, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 0.00012350579391678723, |
|
"loss": 1.4425, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.00012589752307776744, |
|
"loss": 1.4294, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.0001280785627108355, |
|
"loss": 1.4138, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 0.0001300408928844837, |
|
"loss": 1.5429, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 0.00013177729788683344, |
|
"loss": 1.5001, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 0.00013328139275863032, |
|
"loss": 1.444, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 0.00013454764677146876, |
|
"loss": 1.4294, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 0.00013557140376490993, |
|
"loss": 1.5483, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 0.0001363488992677158, |
|
"loss": 1.5026, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 0.00013687727434023872, |
|
"loss": 1.5176, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.00013715458608706872, |
|
"loss": 1.4418, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.00013717981480128154, |
|
"loss": 1.441, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 0.00013695286771401734, |
|
"loss": 1.3854, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 0.00013647457933560234, |
|
"loss": 1.4397, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 0.00013574670838695926, |
|
"loss": 1.4672, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 0.0001347719313325897, |
|
"loss": 1.4525, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.00013355383253890914, |
|
"loss": 1.4068, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 0.0001320968910941225, |
|
"loss": 1.4855, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.0001304064643381061, |
|
"loss": 1.4212, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.00012848876816285777, |
|
"loss": 1.4849, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 0.00012635085415595244, |
|
"loss": 1.3912, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 0.00012400058367105247, |
|
"loss": 1.483, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 0.00012144659892081038, |
|
"loss": 1.3818, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 0.00011869829119846924, |
|
"loss": 1.4634, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 0.00011576576634500562, |
|
"loss": 1.4034, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 0.00011265980758879936, |
|
"loss": 1.4014, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 0.00010939183589447423, |
|
"loss": 1.4222, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.00010597386796670575, |
|
"loss": 1.4854, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 0.00010241847206343044, |
|
"loss": 1.4472, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 9.87387217809251e-05, |
|
"loss": 1.5271, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 9.494814798070321e-05, |
|
"loss": 1.401, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 9.106068903499514e-05, |
|
"loss": 1.5122, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 8.709063957376094e-05, |
|
"loss": 1.4755, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 8.30525979217072e-05, |
|
"loss": 1.4605, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 7.896141241858118e-05, |
|
"loss": 1.3958, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 7.48321268201337e-05, |
|
"loss": 1.4285, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 7.067992498051e-05, |
|
"loss": 1.4276, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 6.652007501948996e-05, |
|
"loss": 1.4174, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 6.236787317986674e-05, |
|
"loss": 1.4845, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 5.823858758141927e-05, |
|
"loss": 1.4357, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 5.414740207829325e-05, |
|
"loss": 1.4382, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 5.010936042623904e-05, |
|
"loss": 1.3395, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 4.6139310965004824e-05, |
|
"loss": 1.3898, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 4.225185201929675e-05, |
|
"loss": 1.4521, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 3.8461278219075304e-05, |
|
"loss": 1.4757, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 3.478152793656996e-05, |
|
"loss": 1.4128, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 3.122613203329423e-05, |
|
"loss": 1.4424, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 2.780816410552575e-05, |
|
"loss": 1.4287, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 2.454019241120062e-05, |
|
"loss": 1.3953, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 2.1434233654994707e-05, |
|
"loss": 1.3275, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 1.8501708801531077e-05, |
|
"loss": 1.3897, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 1.575340107918959e-05, |
|
"loss": 1.3657, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.319941632894751e-05, |
|
"loss": 1.3897, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 1.0849145844047538e-05, |
|
"loss": 1.4783, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 8.711231837142462e-06, |
|
"loss": 1.4102, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 6.793535661894092e-06, |
|
"loss": 1.4442, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 5.1031089058776675e-06, |
|
"loss": 1.3875, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.6461674610908637e-06, |
|
"loss": 1.4228, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 2.42806866741032e-06, |
|
"loss": 1.4015, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1.453291613040815e-06, |
|
"loss": 1.3937, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 7.254206643977347e-07, |
|
"loss": 1.4905, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 2.4713228598269586e-07, |
|
"loss": 1.4419, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 2.0185198718462007e-08, |
|
"loss": 1.4331, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.5413912931266996e-08, |
|
"loss": 1.4014, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 3.227256597612364e-07, |
|
"loss": 1.3146, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 8.51100732284126e-07, |
|
"loss": 1.3623, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 1.62859623508997e-06, |
|
"loss": 1.3259, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 2.652353228531244e-06, |
|
"loss": 1.3975, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 3.9186072413696845e-06, |
|
"loss": 1.3585, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 5.422702113166566e-06, |
|
"loss": 1.3596, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 7.159107115516102e-06, |
|
"loss": 1.4021, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 9.121437289164265e-06, |
|
"loss": 1.4666, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 1.1302476922232583e-05, |
|
"loss": 1.327, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 1.3694206083212781e-05, |
|
"loss": 1.2798, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 1.628783011117153e-05, |
|
"loss": 1.3184, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 1.9073811954719624e-05, |
|
"loss": 1.3236, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 2.2041907240839828e-05, |
|
"loss": 1.3766, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 2.518120194461378e-05, |
|
"loss": 1.2779, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 2.8480152521337216e-05, |
|
"loss": 1.3743, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 3.1926628353448936e-05, |
|
"loss": 1.336, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 3.5507956356197615e-05, |
|
"loss": 1.3522, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 3.9210967578018804e-05, |
|
"loss": 1.3693, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 4.302204562427067e-05, |
|
"loss": 1.3374, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 4.692717672627317e-05, |
|
"loss": 1.3881, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"learning_rate": 5.091200127153047e-05, |
|
"loss": 1.2653, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 5.496186660566713e-05, |
|
"loss": 1.3907, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 5.906188091190777e-05, |
|
"loss": 1.3586, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 6.319696796998643e-05, |
|
"loss": 1.3102, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 6.735192259312862e-05, |
|
"loss": 1.3599, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 7.151146653925592e-05, |
|
"loss": 1.3715, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 7.566030469082585e-05, |
|
"loss": 1.4274, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 7.978318129672468e-05, |
|
"loss": 1.2634, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 8.386493606940281e-05, |
|
"loss": 1.3939, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 8.789055993098241e-05, |
|
"loss": 1.4498, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 9.184525020334682e-05, |
|
"loss": 1.4425, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 9.571446503927972e-05, |
|
"loss": 1.3688, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 9.948397689449235e-05, |
|
"loss": 1.3409, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.00010313992484392988, |
|
"loss": 1.4686, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 0.00010666886554997249, |
|
"loss": 1.3646, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.00011005782269511996, |
|
"loss": 1.411, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 0.00011329433469739373, |
|
"loss": 1.3738, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 0.0001163665005329936, |
|
"loss": 1.3912, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 0.00011926302349772045, |
|
"loss": 1.3728, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 0.00012197325274624481, |
|
"loss": 1.3925, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 0.0001244872224564823, |
|
"loss": 1.3735, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 0.0001267956884750556, |
|
"loss": 1.4361, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 0.0001288901623091031, |
|
"loss": 1.4661, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 0.000130762942339434, |
|
"loss": 1.4177, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 0.00013240714214026112, |
|
"loss": 1.4351, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 0.00013381671580137345, |
|
"loss": 1.4243, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 0.00013498648015963804, |
|
"loss": 1.3258, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.00013591213385808236, |
|
"loss": 1.3917, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 0.00013659027316247394, |
|
"loss": 1.3626, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 0.00013701840447723958, |
|
"loss": 1.505, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.00013719495351470075, |
|
"loss": 1.3238, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 0.00013711927108390887, |
|
"loss": 1.3589, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 0.00013679163547779456, |
|
"loss": 1.4241, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 0.00013621325144985282, |
|
"loss": 1.4179, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 0.00013538624578412686, |
|
"loss": 1.3404, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 0.00013431365947478058, |
|
"loss": 1.3758, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 0.00013299943654401664, |
|
"loss": 1.4247, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 0.00013144840953945616, |
|
"loss": 1.3701, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 0.00012966628176431025, |
|
"loss": 1.3553, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 0.00012765960630568425, |
|
"loss": 1.381, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 0.00012543576193812774, |
|
"loss": 1.442, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 0.0001230029259910393, |
|
"loss": 1.3873, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.0001203700442796948, |
|
"loss": 1.3884, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 0.00011754679821046217, |
|
"loss": 1.3278, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 0.00011454356918116728, |
|
"loss": 1.3606, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.00011137140040750922, |
|
"loss": 1.2409, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 0.00010804195631589772, |
|
"loss": 1.411, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 0.00010456747965202607, |
|
"loss": 1.38, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 0.00010096074646289782, |
|
"loss": 1.3982, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 9.723501911784598e-05, |
|
"loss": 1.3883, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 9.340399754128775e-05, |
|
"loss": 1.3611, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 8.948176883653932e-05, |
|
"loss": 1.4344, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 8.548275548593159e-05, |
|
"loss": 1.2783, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 8.142166231769664e-05, |
|
"loss": 1.335, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 7.731342243463601e-05, |
|
"loss": 1.3506, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 7.317314230339991e-05, |
|
"loss": 1.4243, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 6.901604620628517e-05, |
|
"loss": 1.3969, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 6.485742025981473e-05, |
|
"loss": 1.3597, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 6.071255620594063e-05, |
|
"loss": 1.4289, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 5.659669518256613e-05, |
|
"loss": 1.3466, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 5.252497168014461e-05, |
|
"loss": 1.279, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 4.8512357890428955e-05, |
|
"loss": 1.3786, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 4.457360865201619e-05, |
|
"loss": 1.2442, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 4.072320719512437e-05, |
|
"loss": 1.2467, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 3.697531188510021e-05, |
|
"loss": 1.326, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 3.3343704160496265e-05, |
|
"loss": 1.3049, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 2.9841737857150583e-05, |
|
"loss": 1.3741, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 2.648229010460623e-05, |
|
"loss": 1.3036, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 2.3277713975440426e-05, |
|
"loss": 1.3118, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 2.0239793061604814e-05, |
|
"loss": 1.3541, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 1.7379698144815434e-05, |
|
"loss": 1.3646, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 1.4707946120313696e-05, |
|
"loss": 1.3313, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 1.2234361325042786e-05, |
|
"loss": 1.3923, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 9.968039412440925e-06, |
|
"loss": 1.2976, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 7.917313906685554e-06, |
|
"loss": 1.3127, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 6.089725559373968e-06, |
|
"loss": 1.3699, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 4.4919946213203235e-06, |
|
"loss": 1.2705, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 3.129996131426458e-06, |
|
"loss": 1.3474, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 2.00873831349432e-06, |
|
"loss": 1.3704, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 1.1323441604147607e-06, |
|
"loss": 1.3555, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 5.040362734534312e-07, |
|
"loss": 1.3937, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 1.2612501237755945e-07, |
|
"loss": 1.425, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.42, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 1.261250123775442e-07, |
|
"loss": 1.4818, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 5.040362734534007e-07, |
|
"loss": 1.4719, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 1.132344160414715e-06, |
|
"loss": 1.3957, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 2.008738313494259e-06, |
|
"loss": 1.4262, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 3.1299961314263817e-06, |
|
"loss": 1.3789, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 4.491994621320232e-06, |
|
"loss": 1.4566, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 6.089725559373869e-06, |
|
"loss": 1.4144, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 7.91731390668544e-06, |
|
"loss": 1.4525, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 9.968039412440788e-06, |
|
"loss": 1.4557, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 1.2234361325042642e-05, |
|
"loss": 1.4918, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 1.4707946120313543e-05, |
|
"loss": 1.5974, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 1.7379698144815265e-05, |
|
"loss": 1.3931, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 2.0239793061604638e-05, |
|
"loss": 1.3826, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 2.3277713975440236e-05, |
|
"loss": 1.4445, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 2.6482290104606033e-05, |
|
"loss": 1.4149, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 2.984173785715038e-05, |
|
"loss": 1.3804, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 3.334370416049605e-05, |
|
"loss": 1.3937, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"learning_rate": 3.697531188509998e-05, |
|
"loss": 1.4221, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 4.072320719512414e-05, |
|
"loss": 1.4485, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 4.4573608652015956e-05, |
|
"loss": 1.5171, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 4.851235789042871e-05, |
|
"loss": 1.4849, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 5.2524971680144367e-05, |
|
"loss": 1.4614, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 5.659669518256589e-05, |
|
"loss": 1.413, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 6.071255620594038e-05, |
|
"loss": 1.4743, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 6.485742025981448e-05, |
|
"loss": 1.4832, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 6.901604620628492e-05, |
|
"loss": 1.5146, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 7.317314230339967e-05, |
|
"loss": 1.5513, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 7.731342243463577e-05, |
|
"loss": 1.5379, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 8.142166231769639e-05, |
|
"loss": 1.4753, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 8.548275548593135e-05, |
|
"loss": 1.5384, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 8.948176883653908e-05, |
|
"loss": 1.5967, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 9.340399754128752e-05, |
|
"loss": 1.4906, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 9.723501911784575e-05, |
|
"loss": 1.5322, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 0.0001009607464628976, |
|
"loss": 1.5223, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 0.00010456747965202585, |
|
"loss": 1.4992, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.00010804195631589752, |
|
"loss": 1.5217, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 0.00011137140040750902, |
|
"loss": 1.4526, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 0.00011454356918116707, |
|
"loss": 1.5553, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 0.00011754679821046198, |
|
"loss": 1.4297, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 0.00012037004427969463, |
|
"loss": 1.4843, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 0.00012300292599103915, |
|
"loss": 1.4636, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 0.00012543576193812758, |
|
"loss": 1.4776, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 0.00012765960630568412, |
|
"loss": 1.514, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 0.00012966628176431014, |
|
"loss": 1.4759, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 0.00013144840953945605, |
|
"loss": 1.396, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.00013299943654401656, |
|
"loss": 1.5696, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.0001343136594747805, |
|
"loss": 1.5059, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 0.0001353862457841268, |
|
"loss": 1.4919, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 0.00013621325144985277, |
|
"loss": 1.515, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 0.00013679163547779453, |
|
"loss": 1.4448, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 0.00013711927108390882, |
|
"loss": 1.5143, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.3626197576522827, |
|
"eval_runtime": 22.0604, |
|
"eval_samples_per_second": 20.897, |
|
"eval_steps_per_second": 2.629, |
|
"step": 4144 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 4.923820788333643e-05, |
|
"loss": 1.4417, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 5.311198428226757e-05, |
|
"loss": 1.5224, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 5.7038557476801184e-05, |
|
"loss": 1.4984, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 6.100454224793001e-05, |
|
"loss": 1.4427, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 6.49964190272892e-05, |
|
"loss": 1.4789, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 6.900057998375254e-05, |
|
"loss": 1.5665, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 7.300337541089789e-05, |
|
"loss": 1.5002, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 7.699116025723293e-05, |
|
"loss": 1.4668, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 8.09503406405399e-05, |
|
"loss": 1.3757, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 8.48674201878012e-05, |
|
"loss": 1.4722, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 8.872904604271726e-05, |
|
"loss": 1.4961, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 9.252205438400528e-05, |
|
"loss": 1.4798, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 9.623351529928802e-05, |
|
"loss": 1.5392, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 9.985077686162523e-05, |
|
"loss": 1.5653, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 0.00010336150825841603, |
|
"loss": 1.4743, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.00010675374182567242, |
|
"loss": 1.4201, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 0.00011001591384435138, |
|
"loss": 1.3889, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 0.00011313690395969416, |
|
"loss": 1.4913, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 0.00011610607308918656, |
|
"loss": 1.3722, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 0.00011891329968992182, |
|
"loss": 1.4133, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 0.0001215490142617292, |
|
"loss": 1.36, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 0.00012400423196845864, |
|
"loss": 1.361, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 0.00012627058326621316, |
|
"loss": 1.542, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 0.0001283403424341258, |
|
"loss": 1.4983, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 0.00013020645391041629, |
|
"loss": 1.4985, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 0.00013186255634396195, |
|
"loss": 1.4767, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.00013330300427938103, |
|
"loss": 1.4258, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.00013452288740171763, |
|
"loss": 1.4773, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 0.00013551804727511717, |
|
"loss": 1.462, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 0.0001362850915184393, |
|
"loss": 1.4688, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 0.00013682140536947865, |
|
"loss": 1.5146, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.00013712516059837763, |
|
"loss": 1.5462, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.3411859273910522, |
|
"eval_runtime": 18.5136, |
|
"eval_samples_per_second": 20.85, |
|
"eval_steps_per_second": 2.647, |
|
"step": 4304 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.00013719532173984305, |
|
"loss": 1.3395, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 0.00013703164962292424, |
|
"loss": 1.3995, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 0.00013663470218631772, |
|
"loss": 1.4118, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 0.00013600583257642132, |
|
"loss": 1.3778, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 0.00013514718453461912, |
|
"loss": 1.4416, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 0.0001340616850895236, |
|
"loss": 1.4926, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 0.00013275303457908525, |
|
"loss": 1.4668, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 0.00013122569403658038, |
|
"loss": 1.3931, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 0.00012948486998348453, |
|
"loss": 1.403, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 0.0001275364966810606, |
|
"loss": 1.3802, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 0.00012538721590117088, |
|
"loss": 1.429, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.00012304435428527134, |
|
"loss": 1.4773, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 0.00012051589836876666, |
|
"loss": 1.3717, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 0.00011781046735586077, |
|
"loss": 1.4166, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"learning_rate": 0.00011493728373772612, |
|
"loss": 1.432, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 0.00011190614185412497, |
|
"loss": 1.4722, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 0.00010872737450568259, |
|
"loss": 1.3411, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 0.00010541181773059928, |
|
"loss": 1.4268, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 0.00010197077386589103, |
|
"loss": 1.4257, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 9.841597301907411e-05, |
|
"loss": 1.4367, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 9.475953308163089e-05, |
|
"loss": 1.388, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 9.101391842055883e-05, |
|
"loss": 1.4486, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 8.719189738884117e-05, |
|
"loss": 1.3824, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 8.330649879965051e-05, |
|
"loss": 1.4313, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 7.937096751268169e-05, |
|
"loss": 1.3933, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 7.539871928400956e-05, |
|
"loss": 1.4352, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 7.140329503337758e-05, |
|
"loss": 1.4244, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 6.739831468481779e-05, |
|
"loss": 1.4062, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 6.33974307379626e-05, |
|
"loss": 1.3753, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 5.94142817282949e-05, |
|
"loss": 1.3918, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 5.546244573501996e-05, |
|
"loss": 1.423, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 5.155539409500841e-05, |
|
"loss": 1.4141, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 4.7706445480618974e-05, |
|
"loss": 1.4364, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 4.3928720497937174e-05, |
|
"loss": 1.405, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 4.02350969601972e-05, |
|
"loss": 1.4752, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 3.663816598884848e-05, |
|
"loss": 1.4515, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 3.315018909193563e-05, |
|
"loss": 1.4503, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 2.9783056366075814e-05, |
|
"loss": 1.3878, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 2.6548245964540616e-05, |
|
"loss": 1.3826, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 2.345678496960497e-05, |
|
"loss": 1.3709, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 2.051921180253764e-05, |
|
"loss": 1.4434, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 1.774554029938429e-05, |
|
"loss": 1.4217, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 1.5145225574996895e-05, |
|
"loss": 1.3259, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 1.272713179167218e-05, |
|
"loss": 1.3681, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 1.0499501942287456e-05, |
|
"loss": 1.3708, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 8.469929750918058e-06, |
|
"loss": 1.4352, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 6.6453337867398825e-06, |
|
"loss": 1.4355, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 5.031933879454651e-06, |
|
"loss": 1.4338, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 3.6352299166325223e-06, |
|
"loss": 1.3822, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 2.459983095251791e-06, |
|
"loss": 1.3442, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 1.5101996913488535e-06, |
|
"loss": 1.356, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 7.891174030992353e-07, |
|
"loss": 1.3681, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 2.991943138937121e-07, |
|
"loss": 1.3964, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 4.2100513024036057e-08, |
|
"loss": 1.4004, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.3110859394073486, |
|
"eval_runtime": 18.5084, |
|
"eval_samples_per_second": 20.855, |
|
"eval_steps_per_second": 2.647, |
|
"step": 4573 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 0.000134880848712477, |
|
"loss": 1.3191, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 0.00013365575351388775, |
|
"loss": 1.4082, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 0.00013217996375537754, |
|
"loss": 1.381, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 0.0001304591664429994, |
|
"loss": 1.3937, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 0.00012849999272775362, |
|
"loss": 1.3955, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.000126309992352219, |
|
"loss": 1.3851, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 0.00012389760455736593, |
|
"loss": 1.3328, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 0.00012127212556165209, |
|
"loss": 1.3809, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 0.00011844367273772787, |
|
"loss": 1.2981, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 0.00011542314562479984, |
|
"loss": 1.4094, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 0.00011222218392688052, |
|
"loss": 1.4044, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 0.0001088531226587985, |
|
"loss": 1.4849, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 0.00010532894461279404, |
|
"loss": 1.4488, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 0.00010166323032888931, |
|
"loss": 1.4335, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 9.78701057618181e-05, |
|
"loss": 1.3215, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 9.396418784617256e-05, |
|
"loss": 1.4931, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 8.996052816955526e-05, |
|
"loss": 1.4301, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 8.587455497076757e-05, |
|
"loss": 1.3555, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 8.172201368657088e-05, |
|
"loss": 1.3862, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 7.751890627611039e-05, |
|
"loss": 1.3795, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 7.328142955681618e-05, |
|
"loss": 1.4168, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"learning_rate": 6.902591278942331e-05, |
|
"loss": 1.4594, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 6.47687547526032e-05, |
|
"loss": 1.4803, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 6.0526360549714816e-05, |
|
"loss": 1.4239, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 5.6315078391183605e-05, |
|
"loss": 1.3304, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 5.21511365961095e-05, |
|
"loss": 1.3828, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 4.80505810558948e-05, |
|
"loss": 1.3273, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 4.402921340084794e-05, |
|
"loss": 1.3661, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 4.0102530108070474e-05, |
|
"loss": 1.287, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 3.6285662785250574e-05, |
|
"loss": 1.3865, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 3.2593319860498044e-05, |
|
"loss": 1.428, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 2.9039729902920295e-05, |
|
"loss": 1.2403, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 2.5638586792340877e-05, |
|
"loss": 1.4223, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 2.2402996949474048e-05, |
|
"loss": 1.3913, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 1.9345428829881034e-05, |
|
"loss": 1.3764, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"learning_rate": 1.647766487635479e-05, |
|
"loss": 1.4167, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 1.3810756114877466e-05, |
|
"loss": 1.3081, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 1.1354979569111334e-05, |
|
"loss": 1.3206, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 9.119798657542995e-06, |
|
"loss": 1.3369, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 7.113826725875128e-06, |
|
"loss": 1.3328, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 5.344793855206173e-06, |
|
"loss": 1.4008, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 3.819517073901737e-06, |
|
"loss": 1.41, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 2.5438740879408957e-06, |
|
"loss": 1.1899, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 1.522780630978951e-06, |
|
"loss": 1.3401, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 7.601715213983543e-07, |
|
"loss": 1.3232, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 2.5898549935329754e-07, |
|
"loss": 1.3369, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 2.1153902234608112e-08, |
|
"loss": 1.4171, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.2792030572891235, |
|
"eval_runtime": 11.7078, |
|
"eval_samples_per_second": 43.902, |
|
"eval_steps_per_second": 5.552, |
|
"step": 4807 |
|
} |
|
], |
|
"max_steps": 6831, |
|
"num_train_epochs": 27, |
|
"total_flos": 5020465102848000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|