{ "best_metric": null, "best_model_checkpoint": null, "epoch": 194.81998395783202, "global_step": 850000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 1.4325005730002295e-07, "loss": 1.5137, "step": 500 }, { "epoch": 0.11, "eval_loss": 1.2425005435943604, "eval_runtime": 8.7363, "eval_samples_per_second": 537.182, "eval_steps_per_second": 67.191, "step": 500 }, { "epoch": 0.23, "learning_rate": 2.865001146000459e-07, "loss": 1.4827, "step": 1000 }, { "epoch": 0.23, "eval_loss": 1.2392524480819702, "eval_runtime": 8.719, "eval_samples_per_second": 538.251, "eval_steps_per_second": 67.324, "step": 1000 }, { "epoch": 0.34, "learning_rate": 4.297501719000688e-07, "loss": 1.452, "step": 1500 }, { "epoch": 0.34, "eval_loss": 1.2370035648345947, "eval_runtime": 8.7165, "eval_samples_per_second": 538.406, "eval_steps_per_second": 67.344, "step": 1500 }, { "epoch": 0.46, "learning_rate": 5.730002292000918e-07, "loss": 1.4222, "step": 2000 }, { "epoch": 0.46, "eval_loss": 1.220065712928772, "eval_runtime": 8.7271, "eval_samples_per_second": 537.75, "eval_steps_per_second": 67.262, "step": 2000 }, { "epoch": 0.57, "learning_rate": 7.162502865001146e-07, "loss": 1.402, "step": 2500 }, { "epoch": 0.57, "eval_loss": 1.2126001119613647, "eval_runtime": 8.7272, "eval_samples_per_second": 537.746, "eval_steps_per_second": 67.261, "step": 2500 }, { "epoch": 0.69, "learning_rate": 8.595003438001376e-07, "loss": 1.3818, "step": 3000 }, { "epoch": 0.69, "eval_loss": 1.2094238996505737, "eval_runtime": 8.7245, "eval_samples_per_second": 537.913, "eval_steps_per_second": 67.282, "step": 3000 }, { "epoch": 0.8, "learning_rate": 1.0027504011001605e-06, "loss": 1.3602, "step": 3500 }, { "epoch": 0.8, "eval_loss": 1.2199134826660156, "eval_runtime": 8.7284, "eval_samples_per_second": 537.672, "eval_steps_per_second": 67.252, "step": 3500 }, { "epoch": 0.92, "learning_rate": 1.1460004584001836e-06, "loss": 1.3537, "step": 4000 }, { "epoch": 0.92, "eval_loss": 1.2085955142974854, "eval_runtime": 8.7221, "eval_samples_per_second": 538.057, "eval_steps_per_second": 67.3, "step": 4000 }, { "epoch": 1.03, "learning_rate": 1.2892505157002064e-06, "loss": 1.3417, "step": 4500 }, { "epoch": 1.03, "eval_loss": 1.2128750085830688, "eval_runtime": 8.7292, "eval_samples_per_second": 537.623, "eval_steps_per_second": 67.246, "step": 4500 }, { "epoch": 1.15, "learning_rate": 1.4325005730002292e-06, "loss": 1.3338, "step": 5000 }, { "epoch": 1.15, "eval_loss": 1.2038103342056274, "eval_runtime": 8.6862, "eval_samples_per_second": 540.282, "eval_steps_per_second": 67.578, "step": 5000 }, { "epoch": 1.26, "learning_rate": 1.575750630300252e-06, "loss": 1.3158, "step": 5500 }, { "epoch": 1.26, "eval_loss": 1.205370545387268, "eval_runtime": 8.6828, "eval_samples_per_second": 540.495, "eval_steps_per_second": 67.605, "step": 5500 }, { "epoch": 1.38, "learning_rate": 1.7190006876002751e-06, "loss": 1.3232, "step": 6000 }, { "epoch": 1.38, "eval_loss": 1.2093712091445923, "eval_runtime": 8.685, "eval_samples_per_second": 540.356, "eval_steps_per_second": 67.588, "step": 6000 }, { "epoch": 1.49, "learning_rate": 1.8622507449002982e-06, "loss": 1.3126, "step": 6500 }, { "epoch": 1.49, "eval_loss": 1.205424189567566, "eval_runtime": 8.6999, "eval_samples_per_second": 539.429, "eval_steps_per_second": 67.472, "step": 6500 }, { "epoch": 1.6, "learning_rate": 2.005500802200321e-06, "loss": 1.3084, "step": 7000 }, { "epoch": 1.6, "eval_loss": 1.1972569227218628, "eval_runtime": 8.6931, "eval_samples_per_second": 539.854, "eval_steps_per_second": 67.525, "step": 7000 }, { "epoch": 1.72, "learning_rate": 2.148750859500344e-06, "loss": 1.3125, "step": 7500 }, { "epoch": 1.72, "eval_loss": 1.2091785669326782, "eval_runtime": 8.6919, "eval_samples_per_second": 539.931, "eval_steps_per_second": 67.535, "step": 7500 }, { "epoch": 1.83, "learning_rate": 2.292000916800367e-06, "loss": 1.2954, "step": 8000 }, { "epoch": 1.83, "eval_loss": 1.204511284828186, "eval_runtime": 8.697, "eval_samples_per_second": 539.611, "eval_steps_per_second": 67.494, "step": 8000 }, { "epoch": 1.95, "learning_rate": 2.4352509741003895e-06, "loss": 1.2903, "step": 8500 }, { "epoch": 1.95, "eval_loss": 1.198663592338562, "eval_runtime": 8.7018, "eval_samples_per_second": 539.311, "eval_steps_per_second": 67.457, "step": 8500 }, { "epoch": 2.06, "learning_rate": 2.578501031400413e-06, "loss": 1.296, "step": 9000 }, { "epoch": 2.06, "eval_loss": 1.2023383378982544, "eval_runtime": 8.6958, "eval_samples_per_second": 539.689, "eval_steps_per_second": 67.504, "step": 9000 }, { "epoch": 2.18, "learning_rate": 2.7217510887004356e-06, "loss": 1.2828, "step": 9500 }, { "epoch": 2.18, "eval_loss": 1.1956695318222046, "eval_runtime": 8.6889, "eval_samples_per_second": 540.117, "eval_steps_per_second": 67.558, "step": 9500 }, { "epoch": 2.29, "learning_rate": 2.8650011460004585e-06, "loss": 1.283, "step": 10000 }, { "epoch": 2.29, "eval_loss": 1.2248542308807373, "eval_runtime": 8.6939, "eval_samples_per_second": 539.806, "eval_steps_per_second": 67.519, "step": 10000 }, { "epoch": 2.41, "learning_rate": 3.0082512033004813e-06, "loss": 1.2811, "step": 10500 }, { "epoch": 2.41, "eval_loss": 1.2097586393356323, "eval_runtime": 8.6844, "eval_samples_per_second": 540.393, "eval_steps_per_second": 67.592, "step": 10500 }, { "epoch": 2.52, "learning_rate": 3.151501260600504e-06, "loss": 1.2763, "step": 11000 }, { "epoch": 2.52, "eval_loss": 1.194442629814148, "eval_runtime": 8.685, "eval_samples_per_second": 540.358, "eval_steps_per_second": 67.588, "step": 11000 }, { "epoch": 2.64, "learning_rate": 3.2947513179005274e-06, "loss": 1.2655, "step": 11500 }, { "epoch": 2.64, "eval_loss": 1.1981009244918823, "eval_runtime": 8.6887, "eval_samples_per_second": 540.128, "eval_steps_per_second": 67.559, "step": 11500 }, { "epoch": 2.75, "learning_rate": 3.4380013752005503e-06, "loss": 1.273, "step": 12000 }, { "epoch": 2.75, "eval_loss": 1.192397117614746, "eval_runtime": 8.6873, "eval_samples_per_second": 540.216, "eval_steps_per_second": 67.57, "step": 12000 }, { "epoch": 2.86, "learning_rate": 3.581251432500573e-06, "loss": 1.2699, "step": 12500 }, { "epoch": 2.86, "eval_loss": 1.2010928392410278, "eval_runtime": 8.6826, "eval_samples_per_second": 540.509, "eval_steps_per_second": 67.607, "step": 12500 }, { "epoch": 2.98, "learning_rate": 3.7245014898005964e-06, "loss": 1.2674, "step": 13000 }, { "epoch": 2.98, "eval_loss": 1.193132996559143, "eval_runtime": 8.6878, "eval_samples_per_second": 540.183, "eval_steps_per_second": 67.566, "step": 13000 }, { "epoch": 3.09, "learning_rate": 3.867751547100619e-06, "loss": 1.2585, "step": 13500 }, { "epoch": 3.09, "eval_loss": 1.2156059741973877, "eval_runtime": 8.6849, "eval_samples_per_second": 540.366, "eval_steps_per_second": 67.589, "step": 13500 }, { "epoch": 3.21, "learning_rate": 4.011001604400642e-06, "loss": 1.2651, "step": 14000 }, { "epoch": 3.21, "eval_loss": 1.195020079612732, "eval_runtime": 8.6912, "eval_samples_per_second": 539.972, "eval_steps_per_second": 67.54, "step": 14000 }, { "epoch": 3.32, "learning_rate": 4.154251661700665e-06, "loss": 1.2511, "step": 14500 }, { "epoch": 3.32, "eval_loss": 1.1980160474777222, "eval_runtime": 8.6811, "eval_samples_per_second": 540.602, "eval_steps_per_second": 67.619, "step": 14500 }, { "epoch": 3.44, "learning_rate": 4.297501719000688e-06, "loss": 1.2533, "step": 15000 }, { "epoch": 3.44, "eval_loss": 1.1951391696929932, "eval_runtime": 8.6838, "eval_samples_per_second": 540.432, "eval_steps_per_second": 67.597, "step": 15000 }, { "epoch": 3.55, "learning_rate": 4.4407517763007106e-06, "loss": 1.2559, "step": 15500 }, { "epoch": 3.55, "eval_loss": 1.2001351118087769, "eval_runtime": 8.6862, "eval_samples_per_second": 540.284, "eval_steps_per_second": 67.579, "step": 15500 }, { "epoch": 3.67, "learning_rate": 4.584001833600734e-06, "loss": 1.2475, "step": 16000 }, { "epoch": 3.67, "eval_loss": 1.1855541467666626, "eval_runtime": 8.6822, "eval_samples_per_second": 540.532, "eval_steps_per_second": 67.61, "step": 16000 }, { "epoch": 3.78, "learning_rate": 4.727251890900756e-06, "loss": 1.25, "step": 16500 }, { "epoch": 3.78, "eval_loss": 1.201431393623352, "eval_runtime": 8.6884, "eval_samples_per_second": 540.147, "eval_steps_per_second": 67.562, "step": 16500 }, { "epoch": 3.9, "learning_rate": 4.870501948200779e-06, "loss": 1.2451, "step": 17000 }, { "epoch": 3.9, "eval_loss": 1.1962047815322876, "eval_runtime": 8.6895, "eval_samples_per_second": 540.075, "eval_steps_per_second": 67.553, "step": 17000 }, { "epoch": 4.01, "learning_rate": 5.013752005500803e-06, "loss": 1.2474, "step": 17500 }, { "epoch": 4.01, "eval_loss": 1.2016408443450928, "eval_runtime": 8.6871, "eval_samples_per_second": 540.225, "eval_steps_per_second": 67.571, "step": 17500 }, { "epoch": 4.13, "learning_rate": 5.157002062800826e-06, "loss": 1.2435, "step": 18000 }, { "epoch": 4.13, "eval_loss": 1.205292820930481, "eval_runtime": 8.6916, "eval_samples_per_second": 539.944, "eval_steps_per_second": 67.536, "step": 18000 }, { "epoch": 4.24, "learning_rate": 5.300252120100848e-06, "loss": 1.2425, "step": 18500 }, { "epoch": 4.24, "eval_loss": 1.1865408420562744, "eval_runtime": 8.6832, "eval_samples_per_second": 540.467, "eval_steps_per_second": 67.602, "step": 18500 }, { "epoch": 4.35, "learning_rate": 5.443502177400871e-06, "loss": 1.2353, "step": 19000 }, { "epoch": 4.35, "eval_loss": 1.1950767040252686, "eval_runtime": 8.6822, "eval_samples_per_second": 540.529, "eval_steps_per_second": 67.609, "step": 19000 }, { "epoch": 4.47, "learning_rate": 5.586752234700894e-06, "loss": 1.2351, "step": 19500 }, { "epoch": 4.47, "eval_loss": 1.186410903930664, "eval_runtime": 8.6917, "eval_samples_per_second": 539.942, "eval_steps_per_second": 67.536, "step": 19500 }, { "epoch": 4.58, "learning_rate": 5.730002292000917e-06, "loss": 1.2331, "step": 20000 }, { "epoch": 4.58, "eval_loss": 1.1984221935272217, "eval_runtime": 8.6873, "eval_samples_per_second": 540.212, "eval_steps_per_second": 67.57, "step": 20000 }, { "epoch": 4.7, "learning_rate": 5.87325234930094e-06, "loss": 1.227, "step": 20500 }, { "epoch": 4.7, "eval_loss": 1.1957892179489136, "eval_runtime": 8.6887, "eval_samples_per_second": 540.129, "eval_steps_per_second": 67.559, "step": 20500 }, { "epoch": 4.81, "learning_rate": 6.016502406600963e-06, "loss": 1.2354, "step": 21000 }, { "epoch": 4.81, "eval_loss": 1.1925157308578491, "eval_runtime": 8.6835, "eval_samples_per_second": 540.448, "eval_steps_per_second": 67.599, "step": 21000 }, { "epoch": 4.93, "learning_rate": 6.1597524639009855e-06, "loss": 1.2376, "step": 21500 }, { "epoch": 4.93, "eval_loss": 1.1724218130111694, "eval_runtime": 8.6877, "eval_samples_per_second": 540.191, "eval_steps_per_second": 67.567, "step": 21500 }, { "epoch": 5.04, "learning_rate": 6.303002521201008e-06, "loss": 1.2277, "step": 22000 }, { "epoch": 5.04, "eval_loss": 1.1979554891586304, "eval_runtime": 8.6805, "eval_samples_per_second": 540.635, "eval_steps_per_second": 67.623, "step": 22000 }, { "epoch": 5.16, "learning_rate": 6.446252578501032e-06, "loss": 1.2239, "step": 22500 }, { "epoch": 5.16, "eval_loss": 1.1964473724365234, "eval_runtime": 8.689, "eval_samples_per_second": 540.106, "eval_steps_per_second": 67.556, "step": 22500 }, { "epoch": 5.27, "learning_rate": 6.589502635801055e-06, "loss": 1.2249, "step": 23000 }, { "epoch": 5.27, "eval_loss": 1.1936452388763428, "eval_runtime": 8.6849, "eval_samples_per_second": 540.361, "eval_steps_per_second": 67.588, "step": 23000 }, { "epoch": 5.39, "learning_rate": 6.732752693101077e-06, "loss": 1.2231, "step": 23500 }, { "epoch": 5.39, "eval_loss": 1.2032266855239868, "eval_runtime": 8.688, "eval_samples_per_second": 540.17, "eval_steps_per_second": 67.564, "step": 23500 }, { "epoch": 5.5, "learning_rate": 6.8760027504011005e-06, "loss": 1.2193, "step": 24000 }, { "epoch": 5.5, "eval_loss": 1.1982700824737549, "eval_runtime": 8.6836, "eval_samples_per_second": 540.446, "eval_steps_per_second": 67.599, "step": 24000 }, { "epoch": 5.62, "learning_rate": 7.019252807701124e-06, "loss": 1.2311, "step": 24500 }, { "epoch": 5.62, "eval_loss": 1.1817071437835693, "eval_runtime": 8.6975, "eval_samples_per_second": 539.581, "eval_steps_per_second": 67.491, "step": 24500 }, { "epoch": 5.73, "learning_rate": 7.162502865001146e-06, "loss": 1.2197, "step": 25000 }, { "epoch": 5.73, "eval_loss": 1.2048555612564087, "eval_runtime": 8.6898, "eval_samples_per_second": 540.059, "eval_steps_per_second": 67.55, "step": 25000 }, { "epoch": 5.84, "learning_rate": 7.305752922301169e-06, "loss": 1.2133, "step": 25500 }, { "epoch": 5.84, "eval_loss": 1.2156994342803955, "eval_runtime": 8.6867, "eval_samples_per_second": 540.249, "eval_steps_per_second": 67.574, "step": 25500 }, { "epoch": 5.96, "learning_rate": 7.449002979601193e-06, "loss": 1.2143, "step": 26000 }, { "epoch": 5.96, "eval_loss": 1.2034798860549927, "eval_runtime": 8.6878, "eval_samples_per_second": 540.183, "eval_steps_per_second": 67.566, "step": 26000 }, { "epoch": 6.07, "learning_rate": 7.592253036901215e-06, "loss": 1.2187, "step": 26500 }, { "epoch": 6.07, "eval_loss": 1.183269739151001, "eval_runtime": 8.6846, "eval_samples_per_second": 540.384, "eval_steps_per_second": 67.591, "step": 26500 }, { "epoch": 6.19, "learning_rate": 7.735503094201238e-06, "loss": 1.2168, "step": 27000 }, { "epoch": 6.19, "eval_loss": 1.2140603065490723, "eval_runtime": 8.6768, "eval_samples_per_second": 540.865, "eval_steps_per_second": 67.651, "step": 27000 }, { "epoch": 6.3, "learning_rate": 7.878753151501261e-06, "loss": 1.1985, "step": 27500 }, { "epoch": 6.3, "eval_loss": 1.181829571723938, "eval_runtime": 8.6862, "eval_samples_per_second": 540.285, "eval_steps_per_second": 67.579, "step": 27500 }, { "epoch": 6.42, "learning_rate": 8.022003208801284e-06, "loss": 1.2131, "step": 28000 }, { "epoch": 6.42, "eval_loss": 1.2210400104522705, "eval_runtime": 8.6821, "eval_samples_per_second": 540.539, "eval_steps_per_second": 67.611, "step": 28000 }, { "epoch": 6.53, "learning_rate": 8.165253266101307e-06, "loss": 1.208, "step": 28500 }, { "epoch": 6.53, "eval_loss": 1.202985405921936, "eval_runtime": 8.6786, "eval_samples_per_second": 540.753, "eval_steps_per_second": 67.637, "step": 28500 }, { "epoch": 6.65, "learning_rate": 8.30850332340133e-06, "loss": 1.2084, "step": 29000 }, { "epoch": 6.65, "eval_loss": 1.201478362083435, "eval_runtime": 8.6873, "eval_samples_per_second": 540.215, "eval_steps_per_second": 67.57, "step": 29000 }, { "epoch": 6.76, "learning_rate": 8.451753380701353e-06, "loss": 1.2077, "step": 29500 }, { "epoch": 6.76, "eval_loss": 1.1944732666015625, "eval_runtime": 8.6877, "eval_samples_per_second": 540.19, "eval_steps_per_second": 67.567, "step": 29500 }, { "epoch": 6.88, "learning_rate": 8.595003438001375e-06, "loss": 1.1976, "step": 30000 }, { "epoch": 6.88, "eval_loss": 1.2092996835708618, "eval_runtime": 8.6875, "eval_samples_per_second": 540.203, "eval_steps_per_second": 67.569, "step": 30000 }, { "epoch": 6.99, "learning_rate": 8.738253495301398e-06, "loss": 1.2046, "step": 30500 }, { "epoch": 6.99, "eval_loss": 1.188598871231079, "eval_runtime": 8.6807, "eval_samples_per_second": 540.625, "eval_steps_per_second": 67.621, "step": 30500 }, { "epoch": 7.11, "learning_rate": 8.881503552601421e-06, "loss": 1.2027, "step": 31000 }, { "epoch": 7.11, "eval_loss": 1.2029366493225098, "eval_runtime": 8.678, "eval_samples_per_second": 540.793, "eval_steps_per_second": 67.642, "step": 31000 }, { "epoch": 7.22, "learning_rate": 9.024753609901444e-06, "loss": 1.2002, "step": 31500 }, { "epoch": 7.22, "eval_loss": 1.1990541219711304, "eval_runtime": 8.6864, "eval_samples_per_second": 540.268, "eval_steps_per_second": 67.577, "step": 31500 }, { "epoch": 7.33, "learning_rate": 9.168003667201468e-06, "loss": 1.2017, "step": 32000 }, { "epoch": 7.33, "eval_loss": 1.2063162326812744, "eval_runtime": 8.6883, "eval_samples_per_second": 540.153, "eval_steps_per_second": 67.562, "step": 32000 }, { "epoch": 7.45, "learning_rate": 9.31125372450149e-06, "loss": 1.1964, "step": 32500 }, { "epoch": 7.45, "eval_loss": 1.190079927444458, "eval_runtime": 8.6962, "eval_samples_per_second": 539.662, "eval_steps_per_second": 67.501, "step": 32500 }, { "epoch": 7.56, "learning_rate": 9.454503781801512e-06, "loss": 1.1987, "step": 33000 }, { "epoch": 7.56, "eval_loss": 1.2010529041290283, "eval_runtime": 8.6815, "eval_samples_per_second": 540.572, "eval_steps_per_second": 67.615, "step": 33000 }, { "epoch": 7.68, "learning_rate": 9.597753839101537e-06, "loss": 1.2031, "step": 33500 }, { "epoch": 7.68, "eval_loss": 1.199568510055542, "eval_runtime": 8.6878, "eval_samples_per_second": 540.183, "eval_steps_per_second": 67.566, "step": 33500 }, { "epoch": 7.79, "learning_rate": 9.741003896401558e-06, "loss": 1.1952, "step": 34000 }, { "epoch": 7.79, "eval_loss": 1.2089903354644775, "eval_runtime": 8.6865, "eval_samples_per_second": 540.264, "eval_steps_per_second": 67.576, "step": 34000 }, { "epoch": 7.91, "learning_rate": 9.884253953701583e-06, "loss": 1.1952, "step": 34500 }, { "epoch": 7.91, "eval_loss": 1.2042160034179688, "eval_runtime": 8.6843, "eval_samples_per_second": 540.401, "eval_steps_per_second": 67.593, "step": 34500 }, { "epoch": 8.02, "learning_rate": 1.0027504011001606e-05, "loss": 1.1973, "step": 35000 }, { "epoch": 8.02, "eval_loss": 1.1957743167877197, "eval_runtime": 8.6862, "eval_samples_per_second": 540.285, "eval_steps_per_second": 67.579, "step": 35000 }, { "epoch": 8.14, "learning_rate": 1.0170754068301627e-05, "loss": 1.1927, "step": 35500 }, { "epoch": 8.14, "eval_loss": 1.2100292444229126, "eval_runtime": 8.6804, "eval_samples_per_second": 540.646, "eval_steps_per_second": 67.624, "step": 35500 }, { "epoch": 8.25, "learning_rate": 1.0314004125601651e-05, "loss": 1.1882, "step": 36000 }, { "epoch": 8.25, "eval_loss": 1.197716474533081, "eval_runtime": 8.6889, "eval_samples_per_second": 540.114, "eval_steps_per_second": 67.557, "step": 36000 }, { "epoch": 8.37, "learning_rate": 1.0457254182901674e-05, "loss": 1.1884, "step": 36500 }, { "epoch": 8.37, "eval_loss": 1.210768699645996, "eval_runtime": 8.6859, "eval_samples_per_second": 540.304, "eval_steps_per_second": 67.581, "step": 36500 }, { "epoch": 8.48, "learning_rate": 1.0600504240201695e-05, "loss": 1.1885, "step": 37000 }, { "epoch": 8.48, "eval_loss": 1.2071216106414795, "eval_runtime": 8.6801, "eval_samples_per_second": 540.659, "eval_steps_per_second": 67.626, "step": 37000 }, { "epoch": 8.59, "learning_rate": 1.074375429750172e-05, "loss": 1.1925, "step": 37500 }, { "epoch": 8.59, "eval_loss": 1.2086641788482666, "eval_runtime": 8.6921, "eval_samples_per_second": 539.917, "eval_steps_per_second": 67.533, "step": 37500 }, { "epoch": 8.71, "learning_rate": 1.0887004354801743e-05, "loss": 1.1878, "step": 38000 }, { "epoch": 8.71, "eval_loss": 1.2121607065200806, "eval_runtime": 8.6859, "eval_samples_per_second": 540.302, "eval_steps_per_second": 67.581, "step": 38000 }, { "epoch": 8.82, "learning_rate": 1.1030254412101765e-05, "loss": 1.1885, "step": 38500 }, { "epoch": 8.82, "eval_loss": 1.2102335691452026, "eval_runtime": 8.6764, "eval_samples_per_second": 540.892, "eval_steps_per_second": 67.655, "step": 38500 }, { "epoch": 8.94, "learning_rate": 1.1173504469401788e-05, "loss": 1.1896, "step": 39000 }, { "epoch": 8.94, "eval_loss": 1.201532244682312, "eval_runtime": 8.6813, "eval_samples_per_second": 540.589, "eval_steps_per_second": 67.617, "step": 39000 }, { "epoch": 9.05, "learning_rate": 1.1316754526701811e-05, "loss": 1.188, "step": 39500 }, { "epoch": 9.05, "eval_loss": 1.2050158977508545, "eval_runtime": 8.6864, "eval_samples_per_second": 540.269, "eval_steps_per_second": 67.577, "step": 39500 }, { "epoch": 9.17, "learning_rate": 1.1460004584001834e-05, "loss": 1.1757, "step": 40000 }, { "epoch": 9.17, "eval_loss": 1.2065820693969727, "eval_runtime": 8.6876, "eval_samples_per_second": 540.198, "eval_steps_per_second": 67.568, "step": 40000 }, { "epoch": 9.28, "learning_rate": 1.1603254641301857e-05, "loss": 1.1822, "step": 40500 }, { "epoch": 9.28, "eval_loss": 1.2040373086929321, "eval_runtime": 8.6859, "eval_samples_per_second": 540.3, "eval_steps_per_second": 67.581, "step": 40500 }, { "epoch": 9.4, "learning_rate": 1.174650469860188e-05, "loss": 1.1903, "step": 41000 }, { "epoch": 9.4, "eval_loss": 1.1886615753173828, "eval_runtime": 8.6837, "eval_samples_per_second": 540.438, "eval_steps_per_second": 67.598, "step": 41000 }, { "epoch": 9.51, "learning_rate": 1.1889754755901902e-05, "loss": 1.1856, "step": 41500 }, { "epoch": 9.51, "eval_loss": 1.2003201246261597, "eval_runtime": 8.6835, "eval_samples_per_second": 540.448, "eval_steps_per_second": 67.599, "step": 41500 }, { "epoch": 9.63, "learning_rate": 1.2033004813201925e-05, "loss": 1.1804, "step": 42000 }, { "epoch": 9.63, "eval_loss": 1.225038766860962, "eval_runtime": 8.6823, "eval_samples_per_second": 540.526, "eval_steps_per_second": 67.609, "step": 42000 }, { "epoch": 9.74, "learning_rate": 1.217625487050195e-05, "loss": 1.1842, "step": 42500 }, { "epoch": 9.74, "eval_loss": 1.2077628374099731, "eval_runtime": 8.6848, "eval_samples_per_second": 540.372, "eval_steps_per_second": 67.59, "step": 42500 }, { "epoch": 9.86, "learning_rate": 1.2319504927801971e-05, "loss": 1.1813, "step": 43000 }, { "epoch": 9.86, "eval_loss": 1.2078194618225098, "eval_runtime": 8.682, "eval_samples_per_second": 540.546, "eval_steps_per_second": 67.611, "step": 43000 }, { "epoch": 9.97, "learning_rate": 1.2462754985101994e-05, "loss": 1.1719, "step": 43500 }, { "epoch": 9.97, "eval_loss": 1.21042799949646, "eval_runtime": 8.6812, "eval_samples_per_second": 540.593, "eval_steps_per_second": 67.617, "step": 43500 }, { "epoch": 10.08, "learning_rate": 1.2606005042402017e-05, "loss": 1.1728, "step": 44000 }, { "epoch": 10.08, "eval_loss": 1.2213494777679443, "eval_runtime": 8.6971, "eval_samples_per_second": 539.607, "eval_steps_per_second": 67.494, "step": 44000 }, { "epoch": 10.2, "learning_rate": 1.274925509970204e-05, "loss": 1.1711, "step": 44500 }, { "epoch": 10.2, "eval_loss": 1.207056999206543, "eval_runtime": 8.6868, "eval_samples_per_second": 540.245, "eval_steps_per_second": 67.574, "step": 44500 }, { "epoch": 10.31, "learning_rate": 1.2892505157002064e-05, "loss": 1.1736, "step": 45000 }, { "epoch": 10.31, "eval_loss": 1.2064313888549805, "eval_runtime": 8.6843, "eval_samples_per_second": 540.403, "eval_steps_per_second": 67.594, "step": 45000 }, { "epoch": 10.43, "learning_rate": 1.3035755214302087e-05, "loss": 1.1749, "step": 45500 }, { "epoch": 10.43, "eval_loss": 1.2050743103027344, "eval_runtime": 8.687, "eval_samples_per_second": 540.229, "eval_steps_per_second": 67.572, "step": 45500 }, { "epoch": 10.54, "learning_rate": 1.317900527160211e-05, "loss": 1.1759, "step": 46000 }, { "epoch": 10.54, "eval_loss": 1.2032729387283325, "eval_runtime": 8.6902, "eval_samples_per_second": 540.033, "eval_steps_per_second": 67.547, "step": 46000 }, { "epoch": 10.66, "learning_rate": 1.3322255328902134e-05, "loss": 1.1773, "step": 46500 }, { "epoch": 10.66, "eval_loss": 1.2188127040863037, "eval_runtime": 8.6885, "eval_samples_per_second": 540.138, "eval_steps_per_second": 67.56, "step": 46500 }, { "epoch": 10.77, "learning_rate": 1.3465505386202154e-05, "loss": 1.1725, "step": 47000 }, { "epoch": 10.77, "eval_loss": 1.213136076927185, "eval_runtime": 8.6865, "eval_samples_per_second": 540.266, "eval_steps_per_second": 67.576, "step": 47000 }, { "epoch": 10.89, "learning_rate": 1.3608755443502178e-05, "loss": 1.1764, "step": 47500 }, { "epoch": 10.89, "eval_loss": 1.2165940999984741, "eval_runtime": 8.684, "eval_samples_per_second": 540.417, "eval_steps_per_second": 67.595, "step": 47500 }, { "epoch": 11.0, "learning_rate": 1.3752005500802201e-05, "loss": 1.1872, "step": 48000 }, { "epoch": 11.0, "eval_loss": 1.205088496208191, "eval_runtime": 8.6849, "eval_samples_per_second": 540.361, "eval_steps_per_second": 67.588, "step": 48000 }, { "epoch": 11.12, "learning_rate": 1.3895255558102224e-05, "loss": 1.1674, "step": 48500 }, { "epoch": 11.12, "eval_loss": 1.2201519012451172, "eval_runtime": 8.6907, "eval_samples_per_second": 540.005, "eval_steps_per_second": 67.544, "step": 48500 }, { "epoch": 11.23, "learning_rate": 1.4038505615402248e-05, "loss": 1.1611, "step": 49000 }, { "epoch": 11.23, "eval_loss": 1.2200229167938232, "eval_runtime": 8.6898, "eval_samples_per_second": 540.061, "eval_steps_per_second": 67.551, "step": 49000 }, { "epoch": 11.35, "learning_rate": 1.4181755672702271e-05, "loss": 1.1704, "step": 49500 }, { "epoch": 11.35, "eval_loss": 1.2139869928359985, "eval_runtime": 8.687, "eval_samples_per_second": 540.234, "eval_steps_per_second": 67.572, "step": 49500 }, { "epoch": 11.46, "learning_rate": 1.4325005730002292e-05, "loss": 1.1744, "step": 50000 }, { "epoch": 11.46, "eval_loss": 1.2232825756072998, "eval_runtime": 8.6933, "eval_samples_per_second": 539.843, "eval_steps_per_second": 67.524, "step": 50000 }, { "epoch": 11.57, "learning_rate": 1.4468255787302315e-05, "loss": 1.1737, "step": 50500 }, { "epoch": 11.57, "eval_loss": 1.2176398038864136, "eval_runtime": 8.6892, "eval_samples_per_second": 540.093, "eval_steps_per_second": 67.555, "step": 50500 }, { "epoch": 11.69, "learning_rate": 1.4611505844602338e-05, "loss": 1.1717, "step": 51000 }, { "epoch": 11.69, "eval_loss": 1.2151219844818115, "eval_runtime": 8.6893, "eval_samples_per_second": 540.087, "eval_steps_per_second": 67.554, "step": 51000 }, { "epoch": 11.8, "learning_rate": 1.4754755901902363e-05, "loss": 1.1656, "step": 51500 }, { "epoch": 11.8, "eval_loss": 1.2284044027328491, "eval_runtime": 8.6852, "eval_samples_per_second": 540.344, "eval_steps_per_second": 67.586, "step": 51500 }, { "epoch": 11.92, "learning_rate": 1.4898005959202385e-05, "loss": 1.1687, "step": 52000 }, { "epoch": 11.92, "eval_loss": 1.2428866624832153, "eval_runtime": 8.6984, "eval_samples_per_second": 539.524, "eval_steps_per_second": 67.484, "step": 52000 }, { "epoch": 12.03, "learning_rate": 1.5041256016502408e-05, "loss": 1.1645, "step": 52500 }, { "epoch": 12.03, "eval_loss": 1.224706768989563, "eval_runtime": 8.6956, "eval_samples_per_second": 539.699, "eval_steps_per_second": 67.506, "step": 52500 }, { "epoch": 12.15, "learning_rate": 1.518450607380243e-05, "loss": 1.164, "step": 53000 }, { "epoch": 12.15, "eval_loss": 1.2165286540985107, "eval_runtime": 8.689, "eval_samples_per_second": 540.107, "eval_steps_per_second": 67.556, "step": 53000 }, { "epoch": 12.26, "learning_rate": 1.5327756131102454e-05, "loss": 1.1626, "step": 53500 }, { "epoch": 12.26, "eval_loss": 1.2338306903839111, "eval_runtime": 8.6841, "eval_samples_per_second": 540.415, "eval_steps_per_second": 67.595, "step": 53500 }, { "epoch": 12.38, "learning_rate": 1.5471006188402477e-05, "loss": 1.1547, "step": 54000 }, { "epoch": 12.38, "eval_loss": 1.2282565832138062, "eval_runtime": 8.6921, "eval_samples_per_second": 539.914, "eval_steps_per_second": 67.532, "step": 54000 }, { "epoch": 12.49, "learning_rate": 1.56142562457025e-05, "loss": 1.1586, "step": 54500 }, { "epoch": 12.49, "eval_loss": 1.2251629829406738, "eval_runtime": 8.6923, "eval_samples_per_second": 539.906, "eval_steps_per_second": 67.531, "step": 54500 }, { "epoch": 12.61, "learning_rate": 1.5757506303002522e-05, "loss": 1.1633, "step": 55000 }, { "epoch": 12.61, "eval_loss": 1.2295446395874023, "eval_runtime": 8.6919, "eval_samples_per_second": 539.925, "eval_steps_per_second": 67.534, "step": 55000 }, { "epoch": 12.72, "learning_rate": 1.5900756360302545e-05, "loss": 1.1661, "step": 55500 }, { "epoch": 12.72, "eval_loss": 1.2268338203430176, "eval_runtime": 8.6856, "eval_samples_per_second": 540.32, "eval_steps_per_second": 67.583, "step": 55500 }, { "epoch": 12.84, "learning_rate": 1.6044006417602568e-05, "loss": 1.1607, "step": 56000 }, { "epoch": 12.84, "eval_loss": 1.2090040445327759, "eval_runtime": 8.6861, "eval_samples_per_second": 540.286, "eval_steps_per_second": 67.579, "step": 56000 }, { "epoch": 12.95, "learning_rate": 1.618725647490259e-05, "loss": 1.1607, "step": 56500 }, { "epoch": 12.95, "eval_loss": 1.2241711616516113, "eval_runtime": 8.6923, "eval_samples_per_second": 539.906, "eval_steps_per_second": 67.531, "step": 56500 }, { "epoch": 13.06, "learning_rate": 1.6330506532202614e-05, "loss": 1.1575, "step": 57000 }, { "epoch": 13.06, "eval_loss": 1.2332508563995361, "eval_runtime": 8.6846, "eval_samples_per_second": 540.384, "eval_steps_per_second": 67.591, "step": 57000 }, { "epoch": 13.18, "learning_rate": 1.6473756589502637e-05, "loss": 1.1547, "step": 57500 }, { "epoch": 13.18, "eval_loss": 1.2229161262512207, "eval_runtime": 8.6869, "eval_samples_per_second": 540.238, "eval_steps_per_second": 67.573, "step": 57500 }, { "epoch": 13.29, "learning_rate": 1.661700664680266e-05, "loss": 1.1551, "step": 58000 }, { "epoch": 13.29, "eval_loss": 1.2160086631774902, "eval_runtime": 8.6889, "eval_samples_per_second": 540.116, "eval_steps_per_second": 67.558, "step": 58000 }, { "epoch": 13.41, "learning_rate": 1.6760256704102682e-05, "loss": 1.1594, "step": 58500 }, { "epoch": 13.41, "eval_loss": 1.2249845266342163, "eval_runtime": 8.6877, "eval_samples_per_second": 540.19, "eval_steps_per_second": 67.567, "step": 58500 }, { "epoch": 13.52, "learning_rate": 1.6903506761402705e-05, "loss": 1.1585, "step": 59000 }, { "epoch": 13.52, "eval_loss": 1.2205007076263428, "eval_runtime": 8.6785, "eval_samples_per_second": 540.761, "eval_steps_per_second": 67.638, "step": 59000 }, { "epoch": 13.64, "learning_rate": 1.7046756818702728e-05, "loss": 1.1518, "step": 59500 }, { "epoch": 13.64, "eval_loss": 1.2399922609329224, "eval_runtime": 8.6851, "eval_samples_per_second": 540.353, "eval_steps_per_second": 67.587, "step": 59500 }, { "epoch": 13.75, "learning_rate": 1.719000687600275e-05, "loss": 1.1541, "step": 60000 }, { "epoch": 13.75, "eval_loss": 1.2238965034484863, "eval_runtime": 8.7014, "eval_samples_per_second": 539.336, "eval_steps_per_second": 67.46, "step": 60000 }, { "epoch": 13.87, "learning_rate": 1.7333256933302774e-05, "loss": 1.1533, "step": 60500 }, { "epoch": 13.87, "eval_loss": 1.240592122077942, "eval_runtime": 8.6888, "eval_samples_per_second": 540.122, "eval_steps_per_second": 67.558, "step": 60500 }, { "epoch": 13.98, "learning_rate": 1.7476506990602797e-05, "loss": 1.1595, "step": 61000 }, { "epoch": 13.98, "eval_loss": 1.2328312397003174, "eval_runtime": 8.6869, "eval_samples_per_second": 540.241, "eval_steps_per_second": 67.573, "step": 61000 }, { "epoch": 14.1, "learning_rate": 1.7619757047902823e-05, "loss": 1.1481, "step": 61500 }, { "epoch": 14.1, "eval_loss": 1.2268218994140625, "eval_runtime": 8.6867, "eval_samples_per_second": 540.249, "eval_steps_per_second": 67.574, "step": 61500 }, { "epoch": 14.21, "learning_rate": 1.7763007105202842e-05, "loss": 1.1547, "step": 62000 }, { "epoch": 14.21, "eval_loss": 1.23140549659729, "eval_runtime": 8.6934, "eval_samples_per_second": 539.835, "eval_steps_per_second": 67.522, "step": 62000 }, { "epoch": 14.32, "learning_rate": 1.7906257162502865e-05, "loss": 1.1492, "step": 62500 }, { "epoch": 14.32, "eval_loss": 1.22868812084198, "eval_runtime": 8.6891, "eval_samples_per_second": 540.101, "eval_steps_per_second": 67.556, "step": 62500 }, { "epoch": 14.44, "learning_rate": 1.8049507219802888e-05, "loss": 1.1501, "step": 63000 }, { "epoch": 14.44, "eval_loss": 1.2400908470153809, "eval_runtime": 8.6867, "eval_samples_per_second": 540.249, "eval_steps_per_second": 67.574, "step": 63000 }, { "epoch": 14.55, "learning_rate": 1.819275727710291e-05, "loss": 1.1525, "step": 63500 }, { "epoch": 14.55, "eval_loss": 1.2201071977615356, "eval_runtime": 8.6897, "eval_samples_per_second": 540.064, "eval_steps_per_second": 67.551, "step": 63500 }, { "epoch": 14.67, "learning_rate": 1.8336007334402937e-05, "loss": 1.1548, "step": 64000 }, { "epoch": 14.67, "eval_loss": 1.2285242080688477, "eval_runtime": 8.6874, "eval_samples_per_second": 540.205, "eval_steps_per_second": 67.569, "step": 64000 }, { "epoch": 14.78, "learning_rate": 1.847925739170296e-05, "loss": 1.1529, "step": 64500 }, { "epoch": 14.78, "eval_loss": 1.2385305166244507, "eval_runtime": 8.6937, "eval_samples_per_second": 539.817, "eval_steps_per_second": 67.52, "step": 64500 }, { "epoch": 14.9, "learning_rate": 1.862250744900298e-05, "loss": 1.1544, "step": 65000 }, { "epoch": 14.9, "eval_loss": 1.2486348152160645, "eval_runtime": 8.6862, "eval_samples_per_second": 540.285, "eval_steps_per_second": 67.579, "step": 65000 }, { "epoch": 15.01, "learning_rate": 1.8765757506303002e-05, "loss": 1.1527, "step": 65500 }, { "epoch": 15.01, "eval_loss": 1.2322914600372314, "eval_runtime": 8.6861, "eval_samples_per_second": 540.286, "eval_steps_per_second": 67.579, "step": 65500 }, { "epoch": 15.13, "learning_rate": 1.8909007563603025e-05, "loss": 1.1409, "step": 66000 }, { "epoch": 15.13, "eval_loss": 1.2350742816925049, "eval_runtime": 8.6897, "eval_samples_per_second": 540.067, "eval_steps_per_second": 67.551, "step": 66000 }, { "epoch": 15.24, "learning_rate": 1.905225762090305e-05, "loss": 1.1393, "step": 66500 }, { "epoch": 15.24, "eval_loss": 1.246923804283142, "eval_runtime": 8.6874, "eval_samples_per_second": 540.209, "eval_steps_per_second": 67.569, "step": 66500 }, { "epoch": 15.36, "learning_rate": 1.9195507678203074e-05, "loss": 1.1475, "step": 67000 }, { "epoch": 15.36, "eval_loss": 1.2442528009414673, "eval_runtime": 8.6866, "eval_samples_per_second": 540.257, "eval_steps_per_second": 67.575, "step": 67000 }, { "epoch": 15.47, "learning_rate": 1.9338757735503097e-05, "loss": 1.1467, "step": 67500 }, { "epoch": 15.47, "eval_loss": 1.2357794046401978, "eval_runtime": 8.685, "eval_samples_per_second": 540.357, "eval_steps_per_second": 67.588, "step": 67500 }, { "epoch": 15.59, "learning_rate": 1.9482007792803116e-05, "loss": 1.1463, "step": 68000 }, { "epoch": 15.59, "eval_loss": 1.2265408039093018, "eval_runtime": 8.7017, "eval_samples_per_second": 539.318, "eval_steps_per_second": 67.458, "step": 68000 }, { "epoch": 15.7, "learning_rate": 1.962525785010314e-05, "loss": 1.1414, "step": 68500 }, { "epoch": 15.7, "eval_loss": 1.2458525896072388, "eval_runtime": 8.689, "eval_samples_per_second": 540.106, "eval_steps_per_second": 67.556, "step": 68500 }, { "epoch": 15.81, "learning_rate": 1.9768507907403165e-05, "loss": 1.147, "step": 69000 }, { "epoch": 15.81, "eval_loss": 1.251219391822815, "eval_runtime": 8.6883, "eval_samples_per_second": 540.153, "eval_steps_per_second": 67.562, "step": 69000 }, { "epoch": 15.93, "learning_rate": 1.9911757964703188e-05, "loss": 1.1424, "step": 69500 }, { "epoch": 15.93, "eval_loss": 1.2304646968841553, "eval_runtime": 8.6866, "eval_samples_per_second": 540.259, "eval_steps_per_second": 67.575, "step": 69500 }, { "epoch": 16.04, "learning_rate": 2.005500802200321e-05, "loss": 1.1385, "step": 70000 }, { "epoch": 16.04, "eval_loss": 1.2386082410812378, "eval_runtime": 8.6889, "eval_samples_per_second": 540.112, "eval_steps_per_second": 67.557, "step": 70000 }, { "epoch": 16.16, "learning_rate": 2.0198258079303234e-05, "loss": 1.1399, "step": 70500 }, { "epoch": 16.16, "eval_loss": 1.2419416904449463, "eval_runtime": 8.6911, "eval_samples_per_second": 539.98, "eval_steps_per_second": 67.541, "step": 70500 }, { "epoch": 16.27, "learning_rate": 2.0341508136603253e-05, "loss": 1.1346, "step": 71000 }, { "epoch": 16.27, "eval_loss": 1.2377504110336304, "eval_runtime": 8.6919, "eval_samples_per_second": 539.927, "eval_steps_per_second": 67.534, "step": 71000 }, { "epoch": 16.39, "learning_rate": 2.048475819390328e-05, "loss": 1.1475, "step": 71500 }, { "epoch": 16.39, "eval_loss": 1.2566027641296387, "eval_runtime": 8.6953, "eval_samples_per_second": 539.717, "eval_steps_per_second": 67.508, "step": 71500 }, { "epoch": 16.5, "learning_rate": 2.0628008251203302e-05, "loss": 1.1456, "step": 72000 }, { "epoch": 16.5, "eval_loss": 1.253233551979065, "eval_runtime": 8.695, "eval_samples_per_second": 539.735, "eval_steps_per_second": 67.51, "step": 72000 }, { "epoch": 16.62, "learning_rate": 2.0771258308503325e-05, "loss": 1.1362, "step": 72500 }, { "epoch": 16.62, "eval_loss": 1.2570106983184814, "eval_runtime": 8.6787, "eval_samples_per_second": 540.747, "eval_steps_per_second": 67.637, "step": 72500 }, { "epoch": 16.73, "learning_rate": 2.0914508365803348e-05, "loss": 1.1424, "step": 73000 }, { "epoch": 16.73, "eval_loss": 1.2544804811477661, "eval_runtime": 8.6862, "eval_samples_per_second": 540.28, "eval_steps_per_second": 67.578, "step": 73000 }, { "epoch": 16.85, "learning_rate": 2.105775842310337e-05, "loss": 1.1454, "step": 73500 }, { "epoch": 16.85, "eval_loss": 1.245435118675232, "eval_runtime": 8.6873, "eval_samples_per_second": 540.211, "eval_steps_per_second": 67.57, "step": 73500 }, { "epoch": 16.96, "learning_rate": 2.120100848040339e-05, "loss": 1.1474, "step": 74000 }, { "epoch": 16.96, "eval_loss": 1.254456639289856, "eval_runtime": 8.6886, "eval_samples_per_second": 540.131, "eval_steps_per_second": 67.56, "step": 74000 }, { "epoch": 17.08, "learning_rate": 2.1344258537703417e-05, "loss": 1.138, "step": 74500 }, { "epoch": 17.08, "eval_loss": 1.251043677330017, "eval_runtime": 8.6927, "eval_samples_per_second": 539.877, "eval_steps_per_second": 67.528, "step": 74500 }, { "epoch": 17.19, "learning_rate": 2.148750859500344e-05, "loss": 1.1319, "step": 75000 }, { "epoch": 17.19, "eval_loss": 1.2481544017791748, "eval_runtime": 8.6819, "eval_samples_per_second": 540.553, "eval_steps_per_second": 67.612, "step": 75000 }, { "epoch": 17.3, "learning_rate": 2.1630758652303462e-05, "loss": 1.134, "step": 75500 }, { "epoch": 17.3, "eval_loss": 1.2408442497253418, "eval_runtime": 8.6919, "eval_samples_per_second": 539.926, "eval_steps_per_second": 67.534, "step": 75500 }, { "epoch": 17.42, "learning_rate": 2.1774008709603485e-05, "loss": 1.1376, "step": 76000 }, { "epoch": 17.42, "eval_loss": 1.2328704595565796, "eval_runtime": 8.7031, "eval_samples_per_second": 539.235, "eval_steps_per_second": 67.448, "step": 76000 }, { "epoch": 17.53, "learning_rate": 2.1917258766903508e-05, "loss": 1.1349, "step": 76500 }, { "epoch": 17.53, "eval_loss": 1.269067645072937, "eval_runtime": 8.6864, "eval_samples_per_second": 540.271, "eval_steps_per_second": 67.577, "step": 76500 }, { "epoch": 17.65, "learning_rate": 2.206050882420353e-05, "loss": 1.1323, "step": 77000 }, { "epoch": 17.65, "eval_loss": 1.268683671951294, "eval_runtime": 8.6849, "eval_samples_per_second": 540.361, "eval_steps_per_second": 67.588, "step": 77000 }, { "epoch": 17.76, "learning_rate": 2.2203758881503554e-05, "loss": 1.1459, "step": 77500 }, { "epoch": 17.76, "eval_loss": 1.2635948657989502, "eval_runtime": 8.6882, "eval_samples_per_second": 540.156, "eval_steps_per_second": 67.563, "step": 77500 }, { "epoch": 17.88, "learning_rate": 2.2347008938803576e-05, "loss": 1.1341, "step": 78000 }, { "epoch": 17.88, "eval_loss": 1.2346091270446777, "eval_runtime": 8.6885, "eval_samples_per_second": 540.139, "eval_steps_per_second": 67.56, "step": 78000 }, { "epoch": 17.99, "learning_rate": 2.24902589961036e-05, "loss": 1.137, "step": 78500 }, { "epoch": 17.99, "eval_loss": 1.2556732892990112, "eval_runtime": 8.6844, "eval_samples_per_second": 540.396, "eval_steps_per_second": 67.593, "step": 78500 }, { "epoch": 18.11, "learning_rate": 2.2633509053403622e-05, "loss": 1.1256, "step": 79000 }, { "epoch": 18.11, "eval_loss": 1.244991421699524, "eval_runtime": 8.6884, "eval_samples_per_second": 540.148, "eval_steps_per_second": 67.562, "step": 79000 }, { "epoch": 18.22, "learning_rate": 2.2776759110703645e-05, "loss": 1.1413, "step": 79500 }, { "epoch": 18.22, "eval_loss": 1.2606823444366455, "eval_runtime": 8.689, "eval_samples_per_second": 540.106, "eval_steps_per_second": 67.556, "step": 79500 }, { "epoch": 18.34, "learning_rate": 2.2920009168003668e-05, "loss": 1.1291, "step": 80000 }, { "epoch": 18.34, "eval_loss": 1.2636730670928955, "eval_runtime": 8.6879, "eval_samples_per_second": 540.179, "eval_steps_per_second": 67.565, "step": 80000 }, { "epoch": 18.45, "learning_rate": 2.306325922530369e-05, "loss": 1.1397, "step": 80500 }, { "epoch": 18.45, "eval_loss": 1.265358567237854, "eval_runtime": 8.6887, "eval_samples_per_second": 540.129, "eval_steps_per_second": 67.559, "step": 80500 }, { "epoch": 18.57, "learning_rate": 2.3206509282603713e-05, "loss": 1.1394, "step": 81000 }, { "epoch": 18.57, "eval_loss": 1.272621750831604, "eval_runtime": 8.6861, "eval_samples_per_second": 540.29, "eval_steps_per_second": 67.579, "step": 81000 }, { "epoch": 18.68, "learning_rate": 2.3349759339903736e-05, "loss": 1.1281, "step": 81500 }, { "epoch": 18.68, "eval_loss": 1.279017448425293, "eval_runtime": 8.6871, "eval_samples_per_second": 540.225, "eval_steps_per_second": 67.571, "step": 81500 }, { "epoch": 18.79, "learning_rate": 2.349300939720376e-05, "loss": 1.1343, "step": 82000 }, { "epoch": 18.79, "eval_loss": 1.267573356628418, "eval_runtime": 8.6876, "eval_samples_per_second": 540.195, "eval_steps_per_second": 67.567, "step": 82000 }, { "epoch": 18.91, "learning_rate": 2.3636259454503785e-05, "loss": 1.1384, "step": 82500 }, { "epoch": 18.91, "eval_loss": 1.2643240690231323, "eval_runtime": 8.6913, "eval_samples_per_second": 539.965, "eval_steps_per_second": 67.539, "step": 82500 }, { "epoch": 19.02, "learning_rate": 2.3779509511803805e-05, "loss": 1.1368, "step": 83000 }, { "epoch": 19.02, "eval_loss": 1.256750226020813, "eval_runtime": 8.6863, "eval_samples_per_second": 540.273, "eval_steps_per_second": 67.577, "step": 83000 }, { "epoch": 19.14, "learning_rate": 2.3922759569103828e-05, "loss": 1.1257, "step": 83500 }, { "epoch": 19.14, "eval_loss": 1.2705979347229004, "eval_runtime": 8.6912, "eval_samples_per_second": 539.973, "eval_steps_per_second": 67.54, "step": 83500 }, { "epoch": 19.25, "learning_rate": 2.406600962640385e-05, "loss": 1.1253, "step": 84000 }, { "epoch": 19.25, "eval_loss": 1.2625964879989624, "eval_runtime": 8.7018, "eval_samples_per_second": 539.311, "eval_steps_per_second": 67.457, "step": 84000 }, { "epoch": 19.37, "learning_rate": 2.4209259683703873e-05, "loss": 1.1226, "step": 84500 }, { "epoch": 19.37, "eval_loss": 1.2630146741867065, "eval_runtime": 8.6847, "eval_samples_per_second": 540.377, "eval_steps_per_second": 67.59, "step": 84500 }, { "epoch": 19.48, "learning_rate": 2.43525097410039e-05, "loss": 1.1328, "step": 85000 }, { "epoch": 19.48, "eval_loss": 1.2618727684020996, "eval_runtime": 8.6899, "eval_samples_per_second": 540.05, "eval_steps_per_second": 67.549, "step": 85000 }, { "epoch": 19.6, "learning_rate": 2.4495759798303922e-05, "loss": 1.1317, "step": 85500 }, { "epoch": 19.6, "eval_loss": 1.2695611715316772, "eval_runtime": 8.6876, "eval_samples_per_second": 540.198, "eval_steps_per_second": 67.568, "step": 85500 }, { "epoch": 19.71, "learning_rate": 2.4639009855603942e-05, "loss": 1.1288, "step": 86000 }, { "epoch": 19.71, "eval_loss": 1.2572450637817383, "eval_runtime": 8.6868, "eval_samples_per_second": 540.246, "eval_steps_per_second": 67.574, "step": 86000 }, { "epoch": 19.83, "learning_rate": 2.4782259912903965e-05, "loss": 1.134, "step": 86500 }, { "epoch": 19.83, "eval_loss": 1.251129388809204, "eval_runtime": 8.683, "eval_samples_per_second": 540.482, "eval_steps_per_second": 67.603, "step": 86500 }, { "epoch": 19.94, "learning_rate": 2.4925509970203988e-05, "loss": 1.1314, "step": 87000 }, { "epoch": 19.94, "eval_loss": 1.2629115581512451, "eval_runtime": 8.6845, "eval_samples_per_second": 540.387, "eval_steps_per_second": 67.592, "step": 87000 }, { "epoch": 20.06, "learning_rate": 2.5068760027504014e-05, "loss": 1.1312, "step": 87500 }, { "epoch": 20.06, "eval_loss": 1.277274250984192, "eval_runtime": 8.6933, "eval_samples_per_second": 539.844, "eval_steps_per_second": 67.524, "step": 87500 }, { "epoch": 20.17, "learning_rate": 2.5212010084804033e-05, "loss": 1.1237, "step": 88000 }, { "epoch": 20.17, "eval_loss": 1.280105710029602, "eval_runtime": 8.6878, "eval_samples_per_second": 540.186, "eval_steps_per_second": 67.566, "step": 88000 }, { "epoch": 20.28, "learning_rate": 2.535526014210406e-05, "loss": 1.1253, "step": 88500 }, { "epoch": 20.28, "eval_loss": 1.262695074081421, "eval_runtime": 8.6895, "eval_samples_per_second": 540.08, "eval_steps_per_second": 67.553, "step": 88500 }, { "epoch": 20.4, "learning_rate": 2.549851019940408e-05, "loss": 1.1316, "step": 89000 }, { "epoch": 20.4, "eval_loss": 1.2758249044418335, "eval_runtime": 8.6897, "eval_samples_per_second": 540.064, "eval_steps_per_second": 67.551, "step": 89000 }, { "epoch": 20.51, "learning_rate": 2.5641760256704105e-05, "loss": 1.1273, "step": 89500 }, { "epoch": 20.51, "eval_loss": 1.2736468315124512, "eval_runtime": 8.6846, "eval_samples_per_second": 540.381, "eval_steps_per_second": 67.591, "step": 89500 }, { "epoch": 20.63, "learning_rate": 2.5785010314004128e-05, "loss": 1.1254, "step": 90000 }, { "epoch": 20.63, "eval_loss": 1.264664649963379, "eval_runtime": 8.686, "eval_samples_per_second": 540.296, "eval_steps_per_second": 67.58, "step": 90000 }, { "epoch": 20.74, "learning_rate": 2.5928260371304147e-05, "loss": 1.1347, "step": 90500 }, { "epoch": 20.74, "eval_loss": 1.2645220756530762, "eval_runtime": 8.6846, "eval_samples_per_second": 540.38, "eval_steps_per_second": 67.591, "step": 90500 }, { "epoch": 20.86, "learning_rate": 2.6071510428604174e-05, "loss": 1.1325, "step": 91000 }, { "epoch": 20.86, "eval_loss": 1.2732583284378052, "eval_runtime": 8.6899, "eval_samples_per_second": 540.049, "eval_steps_per_second": 67.549, "step": 91000 }, { "epoch": 20.97, "learning_rate": 2.6214760485904193e-05, "loss": 1.1267, "step": 91500 }, { "epoch": 20.97, "eval_loss": 1.2826173305511475, "eval_runtime": 8.6881, "eval_samples_per_second": 540.165, "eval_steps_per_second": 67.564, "step": 91500 }, { "epoch": 21.09, "learning_rate": 2.635801054320422e-05, "loss": 1.1188, "step": 92000 }, { "epoch": 21.09, "eval_loss": 1.2676165103912354, "eval_runtime": 8.6949, "eval_samples_per_second": 539.744, "eval_steps_per_second": 67.511, "step": 92000 }, { "epoch": 21.2, "learning_rate": 2.6501260600504242e-05, "loss": 1.115, "step": 92500 }, { "epoch": 21.2, "eval_loss": 1.2798190116882324, "eval_runtime": 8.6838, "eval_samples_per_second": 540.432, "eval_steps_per_second": 67.597, "step": 92500 }, { "epoch": 21.32, "learning_rate": 2.664451065780427e-05, "loss": 1.1208, "step": 93000 }, { "epoch": 21.32, "eval_loss": 1.2794955968856812, "eval_runtime": 8.6852, "eval_samples_per_second": 540.345, "eval_steps_per_second": 67.586, "step": 93000 }, { "epoch": 21.43, "learning_rate": 2.6787760715104288e-05, "loss": 1.1287, "step": 93500 }, { "epoch": 21.43, "eval_loss": 1.2773120403289795, "eval_runtime": 8.6837, "eval_samples_per_second": 540.44, "eval_steps_per_second": 67.598, "step": 93500 }, { "epoch": 21.54, "learning_rate": 2.6931010772404307e-05, "loss": 1.1215, "step": 94000 }, { "epoch": 21.54, "eval_loss": 1.281087040901184, "eval_runtime": 8.687, "eval_samples_per_second": 540.235, "eval_steps_per_second": 67.573, "step": 94000 }, { "epoch": 21.66, "learning_rate": 2.7074260829704334e-05, "loss": 1.1187, "step": 94500 }, { "epoch": 21.66, "eval_loss": 1.26114821434021, "eval_runtime": 8.692, "eval_samples_per_second": 539.921, "eval_steps_per_second": 67.533, "step": 94500 }, { "epoch": 21.77, "learning_rate": 2.7217510887004356e-05, "loss": 1.1236, "step": 95000 }, { "epoch": 21.77, "eval_loss": 1.268203854560852, "eval_runtime": 8.6912, "eval_samples_per_second": 539.968, "eval_steps_per_second": 67.539, "step": 95000 }, { "epoch": 21.89, "learning_rate": 2.7360760944304383e-05, "loss": 1.1293, "step": 95500 }, { "epoch": 21.89, "eval_loss": 1.265311598777771, "eval_runtime": 8.6974, "eval_samples_per_second": 539.584, "eval_steps_per_second": 67.491, "step": 95500 }, { "epoch": 22.0, "learning_rate": 2.7504011001604402e-05, "loss": 1.1321, "step": 96000 }, { "epoch": 22.0, "eval_loss": 1.2940133810043335, "eval_runtime": 8.6874, "eval_samples_per_second": 540.205, "eval_steps_per_second": 67.569, "step": 96000 }, { "epoch": 22.12, "learning_rate": 2.764726105890442e-05, "loss": 1.1154, "step": 96500 }, { "epoch": 22.12, "eval_loss": 1.2724597454071045, "eval_runtime": 8.6901, "eval_samples_per_second": 540.037, "eval_steps_per_second": 67.548, "step": 96500 }, { "epoch": 22.23, "learning_rate": 2.7790511116204448e-05, "loss": 1.1206, "step": 97000 }, { "epoch": 22.23, "eval_loss": 1.2833462953567505, "eval_runtime": 8.6889, "eval_samples_per_second": 540.112, "eval_steps_per_second": 67.557, "step": 97000 }, { "epoch": 22.35, "learning_rate": 2.793376117350447e-05, "loss": 1.1121, "step": 97500 }, { "epoch": 22.35, "eval_loss": 1.2885328531265259, "eval_runtime": 8.691, "eval_samples_per_second": 539.984, "eval_steps_per_second": 67.541, "step": 97500 }, { "epoch": 22.46, "learning_rate": 2.8077011230804497e-05, "loss": 1.1219, "step": 98000 }, { "epoch": 22.46, "eval_loss": 1.2743043899536133, "eval_runtime": 8.6862, "eval_samples_per_second": 540.284, "eval_steps_per_second": 67.579, "step": 98000 }, { "epoch": 22.58, "learning_rate": 2.8220261288104516e-05, "loss": 1.1154, "step": 98500 }, { "epoch": 22.58, "eval_loss": 1.2759796380996704, "eval_runtime": 8.6857, "eval_samples_per_second": 540.315, "eval_steps_per_second": 67.583, "step": 98500 }, { "epoch": 22.69, "learning_rate": 2.8363511345404542e-05, "loss": 1.1244, "step": 99000 }, { "epoch": 22.69, "eval_loss": 1.2829101085662842, "eval_runtime": 8.6841, "eval_samples_per_second": 540.413, "eval_steps_per_second": 67.595, "step": 99000 }, { "epoch": 22.81, "learning_rate": 2.8506761402704562e-05, "loss": 1.1224, "step": 99500 }, { "epoch": 22.81, "eval_loss": 1.2809455394744873, "eval_runtime": 8.6891, "eval_samples_per_second": 540.105, "eval_steps_per_second": 67.556, "step": 99500 }, { "epoch": 22.92, "learning_rate": 2.8650011460004585e-05, "loss": 1.1217, "step": 100000 }, { "epoch": 22.92, "eval_loss": 1.300841212272644, "eval_runtime": 8.6932, "eval_samples_per_second": 539.846, "eval_steps_per_second": 67.524, "step": 100000 }, { "epoch": 23.03, "learning_rate": 2.879326151730461e-05, "loss": 1.1159, "step": 100500 }, { "epoch": 23.03, "eval_loss": 1.286134123802185, "eval_runtime": 8.6871, "eval_samples_per_second": 540.229, "eval_steps_per_second": 67.572, "step": 100500 }, { "epoch": 23.15, "learning_rate": 2.893651157460463e-05, "loss": 1.1172, "step": 101000 }, { "epoch": 23.15, "eval_loss": 1.2989856004714966, "eval_runtime": 8.6892, "eval_samples_per_second": 540.093, "eval_steps_per_second": 67.555, "step": 101000 }, { "epoch": 23.26, "learning_rate": 2.9079761631904657e-05, "loss": 1.1095, "step": 101500 }, { "epoch": 23.26, "eval_loss": 1.2859405279159546, "eval_runtime": 8.6868, "eval_samples_per_second": 540.247, "eval_steps_per_second": 67.574, "step": 101500 }, { "epoch": 23.38, "learning_rate": 2.9223011689204676e-05, "loss": 1.1124, "step": 102000 }, { "epoch": 23.38, "eval_loss": 1.2853283882141113, "eval_runtime": 8.6838, "eval_samples_per_second": 540.43, "eval_steps_per_second": 67.597, "step": 102000 }, { "epoch": 23.49, "learning_rate": 2.93662617465047e-05, "loss": 1.1183, "step": 102500 }, { "epoch": 23.49, "eval_loss": 1.2964967489242554, "eval_runtime": 8.6843, "eval_samples_per_second": 540.399, "eval_steps_per_second": 67.593, "step": 102500 }, { "epoch": 23.61, "learning_rate": 2.9509511803804725e-05, "loss": 1.1267, "step": 103000 }, { "epoch": 23.61, "eval_loss": 1.276028037071228, "eval_runtime": 8.6888, "eval_samples_per_second": 540.119, "eval_steps_per_second": 67.558, "step": 103000 }, { "epoch": 23.72, "learning_rate": 2.9652761861104745e-05, "loss": 1.1227, "step": 103500 }, { "epoch": 23.72, "eval_loss": 1.2948238849639893, "eval_runtime": 8.7037, "eval_samples_per_second": 539.195, "eval_steps_per_second": 67.442, "step": 103500 }, { "epoch": 23.84, "learning_rate": 2.979601191840477e-05, "loss": 1.1191, "step": 104000 }, { "epoch": 23.84, "eval_loss": 1.2872439622879028, "eval_runtime": 8.6882, "eval_samples_per_second": 540.161, "eval_steps_per_second": 67.563, "step": 104000 }, { "epoch": 23.95, "learning_rate": 2.993926197570479e-05, "loss": 1.1217, "step": 104500 }, { "epoch": 23.95, "eval_loss": 1.3006843328475952, "eval_runtime": 8.6808, "eval_samples_per_second": 540.617, "eval_steps_per_second": 67.62, "step": 104500 }, { "epoch": 24.07, "learning_rate": 3.0082512033004817e-05, "loss": 1.1217, "step": 105000 }, { "epoch": 24.07, "eval_loss": 1.2849390506744385, "eval_runtime": 8.6883, "eval_samples_per_second": 540.15, "eval_steps_per_second": 67.562, "step": 105000 }, { "epoch": 24.18, "learning_rate": 3.022576209030484e-05, "loss": 1.1101, "step": 105500 }, { "epoch": 24.18, "eval_loss": 1.2957041263580322, "eval_runtime": 8.6892, "eval_samples_per_second": 540.094, "eval_steps_per_second": 67.555, "step": 105500 }, { "epoch": 24.3, "learning_rate": 3.036901214760486e-05, "loss": 1.1194, "step": 106000 }, { "epoch": 24.3, "eval_loss": 1.2955604791641235, "eval_runtime": 8.689, "eval_samples_per_second": 540.111, "eval_steps_per_second": 67.557, "step": 106000 }, { "epoch": 24.41, "learning_rate": 3.0512262204904885e-05, "loss": 1.1187, "step": 106500 }, { "epoch": 24.41, "eval_loss": 1.2868297100067139, "eval_runtime": 8.687, "eval_samples_per_second": 540.231, "eval_steps_per_second": 67.572, "step": 106500 }, { "epoch": 24.52, "learning_rate": 3.065551226220491e-05, "loss": 1.114, "step": 107000 }, { "epoch": 24.52, "eval_loss": 1.3014169931411743, "eval_runtime": 8.6849, "eval_samples_per_second": 540.366, "eval_steps_per_second": 67.589, "step": 107000 }, { "epoch": 24.64, "learning_rate": 3.079876231950493e-05, "loss": 1.1171, "step": 107500 }, { "epoch": 24.64, "eval_loss": 1.3098399639129639, "eval_runtime": 8.6886, "eval_samples_per_second": 540.131, "eval_steps_per_second": 67.56, "step": 107500 }, { "epoch": 24.75, "learning_rate": 3.0942012376804954e-05, "loss": 1.1154, "step": 108000 }, { "epoch": 24.75, "eval_loss": 1.2928967475891113, "eval_runtime": 8.6813, "eval_samples_per_second": 540.588, "eval_steps_per_second": 67.617, "step": 108000 }, { "epoch": 24.87, "learning_rate": 3.108526243410497e-05, "loss": 1.1175, "step": 108500 }, { "epoch": 24.87, "eval_loss": 1.2994951009750366, "eval_runtime": 8.685, "eval_samples_per_second": 540.358, "eval_steps_per_second": 67.588, "step": 108500 }, { "epoch": 24.98, "learning_rate": 3.1228512491405e-05, "loss": 1.1239, "step": 109000 }, { "epoch": 24.98, "eval_loss": 1.3164483308792114, "eval_runtime": 8.6845, "eval_samples_per_second": 540.388, "eval_steps_per_second": 67.592, "step": 109000 }, { "epoch": 25.1, "learning_rate": 3.137176254870502e-05, "loss": 1.115, "step": 109500 }, { "epoch": 25.1, "eval_loss": 1.3238645792007446, "eval_runtime": 8.6898, "eval_samples_per_second": 540.06, "eval_steps_per_second": 67.551, "step": 109500 }, { "epoch": 25.21, "learning_rate": 3.1515012606005045e-05, "loss": 1.1111, "step": 110000 }, { "epoch": 25.21, "eval_loss": 1.3043571710586548, "eval_runtime": 8.6922, "eval_samples_per_second": 539.91, "eval_steps_per_second": 67.532, "step": 110000 }, { "epoch": 25.33, "learning_rate": 3.165826266330507e-05, "loss": 1.1161, "step": 110500 }, { "epoch": 25.33, "eval_loss": 1.2957446575164795, "eval_runtime": 8.6925, "eval_samples_per_second": 539.888, "eval_steps_per_second": 67.529, "step": 110500 }, { "epoch": 25.44, "learning_rate": 3.180151272060509e-05, "loss": 1.1094, "step": 111000 }, { "epoch": 25.44, "eval_loss": 1.312481164932251, "eval_runtime": 8.6891, "eval_samples_per_second": 540.103, "eval_steps_per_second": 67.556, "step": 111000 }, { "epoch": 25.56, "learning_rate": 3.1944762777905113e-05, "loss": 1.1166, "step": 111500 }, { "epoch": 25.56, "eval_loss": 1.2944209575653076, "eval_runtime": 8.6996, "eval_samples_per_second": 539.447, "eval_steps_per_second": 67.474, "step": 111500 }, { "epoch": 25.67, "learning_rate": 3.2088012835205136e-05, "loss": 1.1187, "step": 112000 }, { "epoch": 25.67, "eval_loss": 1.3190549612045288, "eval_runtime": 8.6868, "eval_samples_per_second": 540.246, "eval_steps_per_second": 67.574, "step": 112000 }, { "epoch": 25.78, "learning_rate": 3.223126289250516e-05, "loss": 1.1111, "step": 112500 }, { "epoch": 25.78, "eval_loss": 1.2932227849960327, "eval_runtime": 8.6838, "eval_samples_per_second": 540.434, "eval_steps_per_second": 67.597, "step": 112500 }, { "epoch": 25.9, "learning_rate": 3.237451294980518e-05, "loss": 1.1212, "step": 113000 }, { "epoch": 25.9, "eval_loss": 1.30079185962677, "eval_runtime": 8.6873, "eval_samples_per_second": 540.212, "eval_steps_per_second": 67.57, "step": 113000 }, { "epoch": 26.01, "learning_rate": 3.2517763007105205e-05, "loss": 1.1099, "step": 113500 }, { "epoch": 26.01, "eval_loss": 1.3157790899276733, "eval_runtime": 8.683, "eval_samples_per_second": 540.483, "eval_steps_per_second": 67.604, "step": 113500 }, { "epoch": 26.13, "learning_rate": 3.266101306440523e-05, "loss": 1.1145, "step": 114000 }, { "epoch": 26.13, "eval_loss": 1.3033260107040405, "eval_runtime": 8.6836, "eval_samples_per_second": 540.441, "eval_steps_per_second": 67.598, "step": 114000 }, { "epoch": 26.24, "learning_rate": 3.280426312170525e-05, "loss": 1.1074, "step": 114500 }, { "epoch": 26.24, "eval_loss": 1.3248536586761475, "eval_runtime": 8.6833, "eval_samples_per_second": 540.463, "eval_steps_per_second": 67.601, "step": 114500 }, { "epoch": 26.36, "learning_rate": 3.294751317900527e-05, "loss": 1.1092, "step": 115000 }, { "epoch": 26.36, "eval_loss": 1.3140897750854492, "eval_runtime": 8.6842, "eval_samples_per_second": 540.407, "eval_steps_per_second": 67.594, "step": 115000 }, { "epoch": 26.47, "learning_rate": 3.3090763236305296e-05, "loss": 1.1131, "step": 115500 }, { "epoch": 26.47, "eval_loss": 1.325509786605835, "eval_runtime": 8.687, "eval_samples_per_second": 540.231, "eval_steps_per_second": 67.572, "step": 115500 }, { "epoch": 26.59, "learning_rate": 3.323401329360532e-05, "loss": 1.1133, "step": 116000 }, { "epoch": 26.59, "eval_loss": 1.3049628734588623, "eval_runtime": 8.6873, "eval_samples_per_second": 540.212, "eval_steps_per_second": 67.57, "step": 116000 }, { "epoch": 26.7, "learning_rate": 3.337726335090534e-05, "loss": 1.1162, "step": 116500 }, { "epoch": 26.7, "eval_loss": 1.3120251893997192, "eval_runtime": 8.6814, "eval_samples_per_second": 540.581, "eval_steps_per_second": 67.616, "step": 116500 }, { "epoch": 26.82, "learning_rate": 3.3520513408205365e-05, "loss": 1.1122, "step": 117000 }, { "epoch": 26.82, "eval_loss": 1.316186547279358, "eval_runtime": 8.6872, "eval_samples_per_second": 540.222, "eval_steps_per_second": 67.571, "step": 117000 }, { "epoch": 26.93, "learning_rate": 3.366376346550539e-05, "loss": 1.1086, "step": 117500 }, { "epoch": 26.93, "eval_loss": 1.3115354776382446, "eval_runtime": 8.6896, "eval_samples_per_second": 540.07, "eval_steps_per_second": 67.552, "step": 117500 }, { "epoch": 27.05, "learning_rate": 3.380701352280541e-05, "loss": 1.1034, "step": 118000 }, { "epoch": 27.05, "eval_loss": 1.3265858888626099, "eval_runtime": 8.6881, "eval_samples_per_second": 540.163, "eval_steps_per_second": 67.564, "step": 118000 }, { "epoch": 27.16, "learning_rate": 3.395026358010543e-05, "loss": 1.1047, "step": 118500 }, { "epoch": 27.16, "eval_loss": 1.3119142055511475, "eval_runtime": 8.6896, "eval_samples_per_second": 540.071, "eval_steps_per_second": 67.552, "step": 118500 }, { "epoch": 27.27, "learning_rate": 3.4093513637405456e-05, "loss": 1.1075, "step": 119000 }, { "epoch": 27.27, "eval_loss": 1.3273544311523438, "eval_runtime": 8.6869, "eval_samples_per_second": 540.24, "eval_steps_per_second": 67.573, "step": 119000 }, { "epoch": 27.39, "learning_rate": 3.423676369470548e-05, "loss": 1.1052, "step": 119500 }, { "epoch": 27.39, "eval_loss": 1.2995450496673584, "eval_runtime": 8.6944, "eval_samples_per_second": 539.771, "eval_steps_per_second": 67.514, "step": 119500 }, { "epoch": 27.5, "learning_rate": 3.43800137520055e-05, "loss": 1.1119, "step": 120000 }, { "epoch": 27.5, "eval_loss": 1.3373947143554688, "eval_runtime": 8.6898, "eval_samples_per_second": 540.056, "eval_steps_per_second": 67.55, "step": 120000 }, { "epoch": 27.62, "learning_rate": 3.4523263809305525e-05, "loss": 1.1131, "step": 120500 }, { "epoch": 27.62, "eval_loss": 1.3150440454483032, "eval_runtime": 8.6871, "eval_samples_per_second": 540.226, "eval_steps_per_second": 67.571, "step": 120500 }, { "epoch": 27.73, "learning_rate": 3.466651386660555e-05, "loss": 1.1145, "step": 121000 }, { "epoch": 27.73, "eval_loss": 1.3263864517211914, "eval_runtime": 8.6895, "eval_samples_per_second": 540.075, "eval_steps_per_second": 67.552, "step": 121000 }, { "epoch": 27.85, "learning_rate": 3.480976392390557e-05, "loss": 1.1147, "step": 121500 }, { "epoch": 27.85, "eval_loss": 1.331019401550293, "eval_runtime": 8.6887, "eval_samples_per_second": 540.127, "eval_steps_per_second": 67.559, "step": 121500 }, { "epoch": 27.96, "learning_rate": 3.495301398120559e-05, "loss": 1.1031, "step": 122000 }, { "epoch": 27.96, "eval_loss": 1.3226847648620605, "eval_runtime": 8.6807, "eval_samples_per_second": 540.623, "eval_steps_per_second": 67.621, "step": 122000 }, { "epoch": 28.08, "learning_rate": 3.5096264038505616e-05, "loss": 1.1017, "step": 122500 }, { "epoch": 28.08, "eval_loss": 1.3102643489837646, "eval_runtime": 8.6848, "eval_samples_per_second": 540.369, "eval_steps_per_second": 67.589, "step": 122500 }, { "epoch": 28.19, "learning_rate": 3.5239514095805646e-05, "loss": 1.1041, "step": 123000 }, { "epoch": 28.19, "eval_loss": 1.3246045112609863, "eval_runtime": 8.6905, "eval_samples_per_second": 540.015, "eval_steps_per_second": 67.545, "step": 123000 }, { "epoch": 28.31, "learning_rate": 3.538276415310566e-05, "loss": 1.1063, "step": 123500 }, { "epoch": 28.31, "eval_loss": 1.317310094833374, "eval_runtime": 8.6856, "eval_samples_per_second": 540.321, "eval_steps_per_second": 67.583, "step": 123500 }, { "epoch": 28.42, "learning_rate": 3.5526014210405684e-05, "loss": 1.1069, "step": 124000 }, { "epoch": 28.42, "eval_loss": 1.3285433053970337, "eval_runtime": 8.6876, "eval_samples_per_second": 540.195, "eval_steps_per_second": 67.567, "step": 124000 }, { "epoch": 28.54, "learning_rate": 3.566926426770571e-05, "loss": 1.1103, "step": 124500 }, { "epoch": 28.54, "eval_loss": 1.3307170867919922, "eval_runtime": 8.6827, "eval_samples_per_second": 540.502, "eval_steps_per_second": 67.606, "step": 124500 }, { "epoch": 28.65, "learning_rate": 3.581251432500573e-05, "loss": 1.113, "step": 125000 }, { "epoch": 28.65, "eval_loss": 1.32500159740448, "eval_runtime": 8.6901, "eval_samples_per_second": 540.037, "eval_steps_per_second": 67.548, "step": 125000 }, { "epoch": 28.76, "learning_rate": 3.595576438230576e-05, "loss": 1.1085, "step": 125500 }, { "epoch": 28.76, "eval_loss": 1.3362865447998047, "eval_runtime": 8.6855, "eval_samples_per_second": 540.324, "eval_steps_per_second": 67.584, "step": 125500 }, { "epoch": 28.88, "learning_rate": 3.6099014439605776e-05, "loss": 1.1092, "step": 126000 }, { "epoch": 28.88, "eval_loss": 1.3252229690551758, "eval_runtime": 8.6838, "eval_samples_per_second": 540.433, "eval_steps_per_second": 67.597, "step": 126000 }, { "epoch": 28.99, "learning_rate": 3.62422644969058e-05, "loss": 1.1091, "step": 126500 }, { "epoch": 28.99, "eval_loss": 1.3378815650939941, "eval_runtime": 8.6862, "eval_samples_per_second": 540.282, "eval_steps_per_second": 67.578, "step": 126500 }, { "epoch": 29.11, "learning_rate": 3.638551455420582e-05, "loss": 1.0941, "step": 127000 }, { "epoch": 29.11, "eval_loss": 1.3409087657928467, "eval_runtime": 8.6908, "eval_samples_per_second": 539.993, "eval_steps_per_second": 67.542, "step": 127000 }, { "epoch": 29.22, "learning_rate": 3.6528764611505844e-05, "loss": 1.1044, "step": 127500 }, { "epoch": 29.22, "eval_loss": 1.3344773054122925, "eval_runtime": 8.7013, "eval_samples_per_second": 539.347, "eval_steps_per_second": 67.461, "step": 127500 }, { "epoch": 29.34, "learning_rate": 3.6672014668805874e-05, "loss": 1.1013, "step": 128000 }, { "epoch": 29.34, "eval_loss": 1.3356950283050537, "eval_runtime": 8.685, "eval_samples_per_second": 540.359, "eval_steps_per_second": 67.588, "step": 128000 }, { "epoch": 29.45, "learning_rate": 3.681526472610589e-05, "loss": 1.1041, "step": 128500 }, { "epoch": 29.45, "eval_loss": 1.3382378816604614, "eval_runtime": 8.6868, "eval_samples_per_second": 540.246, "eval_steps_per_second": 67.574, "step": 128500 }, { "epoch": 29.57, "learning_rate": 3.695851478340592e-05, "loss": 1.1091, "step": 129000 }, { "epoch": 29.57, "eval_loss": 1.3558728694915771, "eval_runtime": 8.6886, "eval_samples_per_second": 540.132, "eval_steps_per_second": 67.56, "step": 129000 }, { "epoch": 29.68, "learning_rate": 3.7101764840705936e-05, "loss": 1.1065, "step": 129500 }, { "epoch": 29.68, "eval_loss": 1.3426029682159424, "eval_runtime": 8.6853, "eval_samples_per_second": 540.341, "eval_steps_per_second": 67.586, "step": 129500 }, { "epoch": 29.8, "learning_rate": 3.724501489800596e-05, "loss": 1.112, "step": 130000 }, { "epoch": 29.8, "eval_loss": 1.3417446613311768, "eval_runtime": 8.6906, "eval_samples_per_second": 540.011, "eval_steps_per_second": 67.545, "step": 130000 }, { "epoch": 29.91, "learning_rate": 3.738826495530599e-05, "loss": 1.1148, "step": 130500 }, { "epoch": 29.91, "eval_loss": 1.3420060873031616, "eval_runtime": 8.6896, "eval_samples_per_second": 540.07, "eval_steps_per_second": 67.552, "step": 130500 }, { "epoch": 30.03, "learning_rate": 3.7531515012606004e-05, "loss": 1.112, "step": 131000 }, { "epoch": 30.03, "eval_loss": 1.345941424369812, "eval_runtime": 8.6964, "eval_samples_per_second": 539.65, "eval_steps_per_second": 67.499, "step": 131000 }, { "epoch": 30.14, "learning_rate": 3.7674765069906034e-05, "loss": 1.1039, "step": 131500 }, { "epoch": 30.14, "eval_loss": 1.3430460691452026, "eval_runtime": 8.6845, "eval_samples_per_second": 540.391, "eval_steps_per_second": 67.592, "step": 131500 }, { "epoch": 30.25, "learning_rate": 3.781801512720605e-05, "loss": 1.1014, "step": 132000 }, { "epoch": 30.25, "eval_loss": 1.341183066368103, "eval_runtime": 8.6899, "eval_samples_per_second": 540.05, "eval_steps_per_second": 67.549, "step": 132000 }, { "epoch": 30.37, "learning_rate": 3.796126518450607e-05, "loss": 1.096, "step": 132500 }, { "epoch": 30.37, "eval_loss": 1.3385992050170898, "eval_runtime": 8.6881, "eval_samples_per_second": 540.166, "eval_steps_per_second": 67.564, "step": 132500 }, { "epoch": 30.48, "learning_rate": 3.81045152418061e-05, "loss": 1.1057, "step": 133000 }, { "epoch": 30.48, "eval_loss": 1.352662205696106, "eval_runtime": 8.687, "eval_samples_per_second": 540.231, "eval_steps_per_second": 67.572, "step": 133000 }, { "epoch": 30.6, "learning_rate": 3.824776529910612e-05, "loss": 1.0985, "step": 133500 }, { "epoch": 30.6, "eval_loss": 1.3324880599975586, "eval_runtime": 8.6861, "eval_samples_per_second": 540.285, "eval_steps_per_second": 67.579, "step": 133500 }, { "epoch": 30.71, "learning_rate": 3.839101535640615e-05, "loss": 1.1073, "step": 134000 }, { "epoch": 30.71, "eval_loss": 1.3327621221542358, "eval_runtime": 8.6887, "eval_samples_per_second": 540.125, "eval_steps_per_second": 67.559, "step": 134000 }, { "epoch": 30.83, "learning_rate": 3.8534265413706164e-05, "loss": 1.1073, "step": 134500 }, { "epoch": 30.83, "eval_loss": 1.3402713537216187, "eval_runtime": 8.6792, "eval_samples_per_second": 540.718, "eval_steps_per_second": 67.633, "step": 134500 }, { "epoch": 30.94, "learning_rate": 3.8677515471006194e-05, "loss": 1.1124, "step": 135000 }, { "epoch": 30.94, "eval_loss": 1.3610177040100098, "eval_runtime": 8.6883, "eval_samples_per_second": 540.152, "eval_steps_per_second": 67.562, "step": 135000 }, { "epoch": 31.06, "learning_rate": 3.8820765528306217e-05, "loss": 1.102, "step": 135500 }, { "epoch": 31.06, "eval_loss": 1.359560489654541, "eval_runtime": 8.6825, "eval_samples_per_second": 540.515, "eval_steps_per_second": 67.608, "step": 135500 }, { "epoch": 31.17, "learning_rate": 3.896401558560623e-05, "loss": 1.1, "step": 136000 }, { "epoch": 31.17, "eval_loss": 1.358182668685913, "eval_runtime": 8.6849, "eval_samples_per_second": 540.36, "eval_steps_per_second": 67.588, "step": 136000 }, { "epoch": 31.29, "learning_rate": 3.910726564290626e-05, "loss": 1.1016, "step": 136500 }, { "epoch": 31.29, "eval_loss": 1.3600519895553589, "eval_runtime": 8.6868, "eval_samples_per_second": 540.248, "eval_steps_per_second": 67.574, "step": 136500 }, { "epoch": 31.4, "learning_rate": 3.925051570020628e-05, "loss": 1.1079, "step": 137000 }, { "epoch": 31.4, "eval_loss": 1.3567662239074707, "eval_runtime": 8.6796, "eval_samples_per_second": 540.695, "eval_steps_per_second": 67.63, "step": 137000 }, { "epoch": 31.51, "learning_rate": 3.939376575750631e-05, "loss": 1.1158, "step": 137500 }, { "epoch": 31.51, "eval_loss": 1.3397294282913208, "eval_runtime": 8.6823, "eval_samples_per_second": 540.526, "eval_steps_per_second": 67.609, "step": 137500 }, { "epoch": 31.63, "learning_rate": 3.953701581480633e-05, "loss": 1.1043, "step": 138000 }, { "epoch": 31.63, "eval_loss": 1.3646483421325684, "eval_runtime": 8.6879, "eval_samples_per_second": 540.179, "eval_steps_per_second": 67.566, "step": 138000 }, { "epoch": 31.74, "learning_rate": 3.968026587210635e-05, "loss": 1.1122, "step": 138500 }, { "epoch": 31.74, "eval_loss": 1.34135901927948, "eval_runtime": 8.6837, "eval_samples_per_second": 540.439, "eval_steps_per_second": 67.598, "step": 138500 }, { "epoch": 31.86, "learning_rate": 3.9823515929406376e-05, "loss": 1.1135, "step": 139000 }, { "epoch": 31.86, "eval_loss": 1.367103934288025, "eval_runtime": 8.6964, "eval_samples_per_second": 539.649, "eval_steps_per_second": 67.499, "step": 139000 }, { "epoch": 31.97, "learning_rate": 3.996676598670639e-05, "loss": 1.106, "step": 139500 }, { "epoch": 31.97, "eval_loss": 1.3664454221725464, "eval_runtime": 8.6851, "eval_samples_per_second": 540.354, "eval_steps_per_second": 67.587, "step": 139500 }, { "epoch": 32.09, "learning_rate": 4.011001604400642e-05, "loss": 1.0969, "step": 140000 }, { "epoch": 32.09, "eval_loss": 1.3516037464141846, "eval_runtime": 8.6855, "eval_samples_per_second": 540.329, "eval_steps_per_second": 67.584, "step": 140000 }, { "epoch": 32.2, "learning_rate": 4.0253266101306445e-05, "loss": 1.0974, "step": 140500 }, { "epoch": 32.2, "eval_loss": 1.3694406747817993, "eval_runtime": 8.6872, "eval_samples_per_second": 540.217, "eval_steps_per_second": 67.57, "step": 140500 }, { "epoch": 32.32, "learning_rate": 4.039651615860647e-05, "loss": 1.0952, "step": 141000 }, { "epoch": 32.32, "eval_loss": 1.372841715812683, "eval_runtime": 8.6882, "eval_samples_per_second": 540.16, "eval_steps_per_second": 67.563, "step": 141000 }, { "epoch": 32.43, "learning_rate": 4.053976621590649e-05, "loss": 1.1049, "step": 141500 }, { "epoch": 32.43, "eval_loss": 1.3587039709091187, "eval_runtime": 8.6835, "eval_samples_per_second": 540.453, "eval_steps_per_second": 67.6, "step": 141500 }, { "epoch": 32.55, "learning_rate": 4.068301627320651e-05, "loss": 1.1057, "step": 142000 }, { "epoch": 32.55, "eval_loss": 1.3578588962554932, "eval_runtime": 8.6884, "eval_samples_per_second": 540.143, "eval_steps_per_second": 67.561, "step": 142000 }, { "epoch": 32.66, "learning_rate": 4.0826266330506536e-05, "loss": 1.0976, "step": 142500 }, { "epoch": 32.66, "eval_loss": 1.3490372896194458, "eval_runtime": 8.6846, "eval_samples_per_second": 540.381, "eval_steps_per_second": 67.591, "step": 142500 }, { "epoch": 32.78, "learning_rate": 4.096951638780656e-05, "loss": 1.1105, "step": 143000 }, { "epoch": 32.78, "eval_loss": 1.3702690601348877, "eval_runtime": 8.6911, "eval_samples_per_second": 539.981, "eval_steps_per_second": 67.541, "step": 143000 }, { "epoch": 32.89, "learning_rate": 4.111276644510658e-05, "loss": 1.1085, "step": 143500 }, { "epoch": 32.89, "eval_loss": 1.374622106552124, "eval_runtime": 8.6874, "eval_samples_per_second": 540.206, "eval_steps_per_second": 67.569, "step": 143500 }, { "epoch": 33.0, "learning_rate": 4.1256016502406605e-05, "loss": 1.1114, "step": 144000 }, { "epoch": 33.0, "eval_loss": 1.3575122356414795, "eval_runtime": 8.685, "eval_samples_per_second": 540.355, "eval_steps_per_second": 67.588, "step": 144000 }, { "epoch": 33.12, "learning_rate": 4.139926655970662e-05, "loss": 1.0923, "step": 144500 }, { "epoch": 33.12, "eval_loss": 1.3712995052337646, "eval_runtime": 8.6942, "eval_samples_per_second": 539.788, "eval_steps_per_second": 67.517, "step": 144500 }, { "epoch": 33.23, "learning_rate": 4.154251661700665e-05, "loss": 1.0997, "step": 145000 }, { "epoch": 33.23, "eval_loss": 1.3477163314819336, "eval_runtime": 8.6919, "eval_samples_per_second": 539.927, "eval_steps_per_second": 67.534, "step": 145000 }, { "epoch": 33.35, "learning_rate": 4.168576667430667e-05, "loss": 1.0984, "step": 145500 }, { "epoch": 33.35, "eval_loss": 1.3504836559295654, "eval_runtime": 8.6801, "eval_samples_per_second": 540.662, "eval_steps_per_second": 67.626, "step": 145500 }, { "epoch": 33.46, "learning_rate": 4.1829016731606696e-05, "loss": 1.0972, "step": 146000 }, { "epoch": 33.46, "eval_loss": 1.3811625242233276, "eval_runtime": 8.6904, "eval_samples_per_second": 540.019, "eval_steps_per_second": 67.546, "step": 146000 }, { "epoch": 33.58, "learning_rate": 4.197226678890672e-05, "loss": 1.1055, "step": 146500 }, { "epoch": 33.58, "eval_loss": 1.3556126356124878, "eval_runtime": 8.69, "eval_samples_per_second": 540.046, "eval_steps_per_second": 67.549, "step": 146500 }, { "epoch": 33.69, "learning_rate": 4.211551684620674e-05, "loss": 1.1041, "step": 147000 }, { "epoch": 33.69, "eval_loss": 1.3610211610794067, "eval_runtime": 8.7039, "eval_samples_per_second": 539.186, "eval_steps_per_second": 67.441, "step": 147000 }, { "epoch": 33.81, "learning_rate": 4.2258766903506765e-05, "loss": 1.1063, "step": 147500 }, { "epoch": 33.81, "eval_loss": 1.3715120553970337, "eval_runtime": 8.6807, "eval_samples_per_second": 540.622, "eval_steps_per_second": 67.621, "step": 147500 }, { "epoch": 33.92, "learning_rate": 4.240201696080678e-05, "loss": 1.1137, "step": 148000 }, { "epoch": 33.92, "eval_loss": 1.3836239576339722, "eval_runtime": 8.6907, "eval_samples_per_second": 540.002, "eval_steps_per_second": 67.543, "step": 148000 }, { "epoch": 34.04, "learning_rate": 4.254526701810681e-05, "loss": 1.1065, "step": 148500 }, { "epoch": 34.04, "eval_loss": 1.3737373352050781, "eval_runtime": 8.6852, "eval_samples_per_second": 540.342, "eval_steps_per_second": 67.586, "step": 148500 }, { "epoch": 34.15, "learning_rate": 4.268851707540683e-05, "loss": 1.0962, "step": 149000 }, { "epoch": 34.15, "eval_loss": 1.3693293333053589, "eval_runtime": 8.6819, "eval_samples_per_second": 540.549, "eval_steps_per_second": 67.612, "step": 149000 }, { "epoch": 34.27, "learning_rate": 4.2831767132706856e-05, "loss": 1.0961, "step": 149500 }, { "epoch": 34.27, "eval_loss": 1.369345784187317, "eval_runtime": 8.6834, "eval_samples_per_second": 540.458, "eval_steps_per_second": 67.6, "step": 149500 }, { "epoch": 34.38, "learning_rate": 4.297501719000688e-05, "loss": 1.0948, "step": 150000 }, { "epoch": 34.38, "eval_loss": 1.379374623298645, "eval_runtime": 8.687, "eval_samples_per_second": 540.234, "eval_steps_per_second": 67.572, "step": 150000 }, { "epoch": 34.49, "learning_rate": 4.3118267247306895e-05, "loss": 1.1029, "step": 150500 }, { "epoch": 34.49, "eval_loss": 1.3878742456436157, "eval_runtime": 8.6917, "eval_samples_per_second": 539.942, "eval_steps_per_second": 67.536, "step": 150500 }, { "epoch": 34.61, "learning_rate": 4.3261517304606925e-05, "loss": 1.1066, "step": 151000 }, { "epoch": 34.61, "eval_loss": 1.3788371086120605, "eval_runtime": 8.6842, "eval_samples_per_second": 540.409, "eval_steps_per_second": 67.594, "step": 151000 }, { "epoch": 34.72, "learning_rate": 4.340476736190695e-05, "loss": 1.1099, "step": 151500 }, { "epoch": 34.72, "eval_loss": 1.3727527856826782, "eval_runtime": 8.6747, "eval_samples_per_second": 540.997, "eval_steps_per_second": 67.668, "step": 151500 }, { "epoch": 34.84, "learning_rate": 4.354801741920697e-05, "loss": 1.102, "step": 152000 }, { "epoch": 34.84, "eval_loss": 1.3734774589538574, "eval_runtime": 8.6823, "eval_samples_per_second": 540.526, "eval_steps_per_second": 67.609, "step": 152000 }, { "epoch": 34.95, "learning_rate": 4.369126747650699e-05, "loss": 1.0985, "step": 152500 }, { "epoch": 34.95, "eval_loss": 1.3788182735443115, "eval_runtime": 8.6799, "eval_samples_per_second": 540.675, "eval_steps_per_second": 67.628, "step": 152500 }, { "epoch": 35.07, "learning_rate": 4.3834517533807016e-05, "loss": 1.0994, "step": 153000 }, { "epoch": 35.07, "eval_loss": 1.3690484762191772, "eval_runtime": 8.6854, "eval_samples_per_second": 540.333, "eval_steps_per_second": 67.585, "step": 153000 }, { "epoch": 35.18, "learning_rate": 4.397776759110704e-05, "loss": 1.0936, "step": 153500 }, { "epoch": 35.18, "eval_loss": 1.3826614618301392, "eval_runtime": 8.6936, "eval_samples_per_second": 539.82, "eval_steps_per_second": 67.521, "step": 153500 }, { "epoch": 35.3, "learning_rate": 4.412101764840706e-05, "loss": 1.0945, "step": 154000 }, { "epoch": 35.3, "eval_loss": 1.3975774049758911, "eval_runtime": 8.6879, "eval_samples_per_second": 540.177, "eval_steps_per_second": 67.565, "step": 154000 }, { "epoch": 35.41, "learning_rate": 4.4264267705707084e-05, "loss": 1.1016, "step": 154500 }, { "epoch": 35.41, "eval_loss": 1.3767386674880981, "eval_runtime": 8.6882, "eval_samples_per_second": 540.161, "eval_steps_per_second": 67.563, "step": 154500 }, { "epoch": 35.53, "learning_rate": 4.440751776300711e-05, "loss": 1.1006, "step": 155000 }, { "epoch": 35.53, "eval_loss": 1.3715084791183472, "eval_runtime": 8.6929, "eval_samples_per_second": 539.864, "eval_steps_per_second": 67.526, "step": 155000 }, { "epoch": 35.64, "learning_rate": 4.455076782030713e-05, "loss": 1.0947, "step": 155500 }, { "epoch": 35.64, "eval_loss": 1.3812638521194458, "eval_runtime": 8.6804, "eval_samples_per_second": 540.642, "eval_steps_per_second": 67.623, "step": 155500 }, { "epoch": 35.76, "learning_rate": 4.469401787760715e-05, "loss": 1.113, "step": 156000 }, { "epoch": 35.76, "eval_loss": 1.3810392618179321, "eval_runtime": 8.6828, "eval_samples_per_second": 540.491, "eval_steps_per_second": 67.605, "step": 156000 }, { "epoch": 35.87, "learning_rate": 4.4837267934907176e-05, "loss": 1.1084, "step": 156500 }, { "epoch": 35.87, "eval_loss": 1.383670687675476, "eval_runtime": 8.6877, "eval_samples_per_second": 540.189, "eval_steps_per_second": 67.567, "step": 156500 }, { "epoch": 35.98, "learning_rate": 4.49805179922072e-05, "loss": 1.1109, "step": 157000 }, { "epoch": 35.98, "eval_loss": 1.3883386850357056, "eval_runtime": 8.6828, "eval_samples_per_second": 540.495, "eval_steps_per_second": 67.605, "step": 157000 }, { "epoch": 36.1, "learning_rate": 4.512376804950722e-05, "loss": 1.1013, "step": 157500 }, { "epoch": 36.1, "eval_loss": 1.3823014497756958, "eval_runtime": 8.6857, "eval_samples_per_second": 540.313, "eval_steps_per_second": 67.582, "step": 157500 }, { "epoch": 36.21, "learning_rate": 4.5267018106807244e-05, "loss": 1.0946, "step": 158000 }, { "epoch": 36.21, "eval_loss": 1.393052577972412, "eval_runtime": 8.6814, "eval_samples_per_second": 540.58, "eval_steps_per_second": 67.616, "step": 158000 }, { "epoch": 36.33, "learning_rate": 4.541026816410727e-05, "loss": 1.0995, "step": 158500 }, { "epoch": 36.33, "eval_loss": 1.3945343494415283, "eval_runtime": 8.693, "eval_samples_per_second": 539.862, "eval_steps_per_second": 67.526, "step": 158500 }, { "epoch": 36.44, "learning_rate": 4.555351822140729e-05, "loss": 1.0964, "step": 159000 }, { "epoch": 36.44, "eval_loss": 1.4045376777648926, "eval_runtime": 8.6906, "eval_samples_per_second": 540.009, "eval_steps_per_second": 67.544, "step": 159000 }, { "epoch": 36.56, "learning_rate": 4.569676827870731e-05, "loss": 1.1052, "step": 159500 }, { "epoch": 36.56, "eval_loss": 1.3855903148651123, "eval_runtime": 8.6835, "eval_samples_per_second": 540.449, "eval_steps_per_second": 67.599, "step": 159500 }, { "epoch": 36.67, "learning_rate": 4.5840018336007336e-05, "loss": 1.1086, "step": 160000 }, { "epoch": 36.67, "eval_loss": 1.378783106803894, "eval_runtime": 8.6835, "eval_samples_per_second": 540.452, "eval_steps_per_second": 67.6, "step": 160000 }, { "epoch": 36.79, "learning_rate": 4.598326839330736e-05, "loss": 1.1032, "step": 160500 }, { "epoch": 36.79, "eval_loss": 1.3831554651260376, "eval_runtime": 8.6846, "eval_samples_per_second": 540.384, "eval_steps_per_second": 67.591, "step": 160500 }, { "epoch": 36.9, "learning_rate": 4.612651845060738e-05, "loss": 1.1022, "step": 161000 }, { "epoch": 36.9, "eval_loss": 1.402727723121643, "eval_runtime": 8.6805, "eval_samples_per_second": 540.639, "eval_steps_per_second": 67.623, "step": 161000 }, { "epoch": 37.02, "learning_rate": 4.6269768507907404e-05, "loss": 1.1123, "step": 161500 }, { "epoch": 37.02, "eval_loss": 1.4013468027114868, "eval_runtime": 8.6873, "eval_samples_per_second": 540.214, "eval_steps_per_second": 67.57, "step": 161500 }, { "epoch": 37.13, "learning_rate": 4.641301856520743e-05, "loss": 1.0964, "step": 162000 }, { "epoch": 37.13, "eval_loss": 1.394731879234314, "eval_runtime": 8.6864, "eval_samples_per_second": 540.268, "eval_steps_per_second": 67.577, "step": 162000 }, { "epoch": 37.24, "learning_rate": 4.655626862250745e-05, "loss": 1.0936, "step": 162500 }, { "epoch": 37.24, "eval_loss": 1.3878453969955444, "eval_runtime": 8.6805, "eval_samples_per_second": 540.638, "eval_steps_per_second": 67.623, "step": 162500 }, { "epoch": 37.36, "learning_rate": 4.669951867980747e-05, "loss": 1.0936, "step": 163000 }, { "epoch": 37.36, "eval_loss": 1.399857759475708, "eval_runtime": 8.6836, "eval_samples_per_second": 540.445, "eval_steps_per_second": 67.599, "step": 163000 }, { "epoch": 37.47, "learning_rate": 4.6842768737107495e-05, "loss": 1.1054, "step": 163500 }, { "epoch": 37.47, "eval_loss": 1.4086487293243408, "eval_runtime": 8.6835, "eval_samples_per_second": 540.45, "eval_steps_per_second": 67.599, "step": 163500 }, { "epoch": 37.59, "learning_rate": 4.698601879440752e-05, "loss": 1.1043, "step": 164000 }, { "epoch": 37.59, "eval_loss": 1.4070953130722046, "eval_runtime": 8.6861, "eval_samples_per_second": 540.288, "eval_steps_per_second": 67.579, "step": 164000 }, { "epoch": 37.7, "learning_rate": 4.712926885170754e-05, "loss": 1.1007, "step": 164500 }, { "epoch": 37.7, "eval_loss": 1.3754940032958984, "eval_runtime": 8.6894, "eval_samples_per_second": 540.08, "eval_steps_per_second": 67.553, "step": 164500 }, { "epoch": 37.82, "learning_rate": 4.727251890900757e-05, "loss": 1.1139, "step": 165000 }, { "epoch": 37.82, "eval_loss": 1.4017904996871948, "eval_runtime": 8.6819, "eval_samples_per_second": 540.553, "eval_steps_per_second": 67.612, "step": 165000 }, { "epoch": 37.93, "learning_rate": 4.741576896630759e-05, "loss": 1.1036, "step": 165500 }, { "epoch": 37.93, "eval_loss": 1.3944612741470337, "eval_runtime": 8.6826, "eval_samples_per_second": 540.507, "eval_steps_per_second": 67.607, "step": 165500 }, { "epoch": 38.05, "learning_rate": 4.755901902360761e-05, "loss": 1.0943, "step": 166000 }, { "epoch": 38.05, "eval_loss": 1.4136276245117188, "eval_runtime": 8.6849, "eval_samples_per_second": 540.366, "eval_steps_per_second": 67.589, "step": 166000 }, { "epoch": 38.16, "learning_rate": 4.770226908090763e-05, "loss": 1.0966, "step": 166500 }, { "epoch": 38.16, "eval_loss": 1.4286108016967773, "eval_runtime": 8.7012, "eval_samples_per_second": 539.352, "eval_steps_per_second": 67.462, "step": 166500 }, { "epoch": 38.28, "learning_rate": 4.7845519138207655e-05, "loss": 1.1014, "step": 167000 }, { "epoch": 38.28, "eval_loss": 1.3922996520996094, "eval_runtime": 8.6914, "eval_samples_per_second": 539.959, "eval_steps_per_second": 67.538, "step": 167000 }, { "epoch": 38.39, "learning_rate": 4.7988769195507685e-05, "loss": 1.0975, "step": 167500 }, { "epoch": 38.39, "eval_loss": 1.409221887588501, "eval_runtime": 8.6875, "eval_samples_per_second": 540.2, "eval_steps_per_second": 67.568, "step": 167500 }, { "epoch": 38.51, "learning_rate": 4.81320192528077e-05, "loss": 1.1047, "step": 168000 }, { "epoch": 38.51, "eval_loss": 1.4022152423858643, "eval_runtime": 8.6866, "eval_samples_per_second": 540.258, "eval_steps_per_second": 67.575, "step": 168000 }, { "epoch": 38.62, "learning_rate": 4.827526931010773e-05, "loss": 1.098, "step": 168500 }, { "epoch": 38.62, "eval_loss": 1.4087326526641846, "eval_runtime": 8.6845, "eval_samples_per_second": 540.388, "eval_steps_per_second": 67.592, "step": 168500 }, { "epoch": 38.73, "learning_rate": 4.841851936740775e-05, "loss": 1.1128, "step": 169000 }, { "epoch": 38.73, "eval_loss": 1.403940200805664, "eval_runtime": 8.6986, "eval_samples_per_second": 539.512, "eval_steps_per_second": 67.482, "step": 169000 }, { "epoch": 38.85, "learning_rate": 4.856176942470777e-05, "loss": 1.1053, "step": 169500 }, { "epoch": 38.85, "eval_loss": 1.4114054441452026, "eval_runtime": 8.6876, "eval_samples_per_second": 540.197, "eval_steps_per_second": 67.568, "step": 169500 }, { "epoch": 38.96, "learning_rate": 4.87050194820078e-05, "loss": 1.1043, "step": 170000 }, { "epoch": 38.96, "eval_loss": 1.4108600616455078, "eval_runtime": 8.6825, "eval_samples_per_second": 540.512, "eval_steps_per_second": 67.607, "step": 170000 }, { "epoch": 39.08, "learning_rate": 4.8848269539307815e-05, "loss": 1.1039, "step": 170500 }, { "epoch": 39.08, "eval_loss": 1.425398826599121, "eval_runtime": 8.6881, "eval_samples_per_second": 540.164, "eval_steps_per_second": 67.564, "step": 170500 }, { "epoch": 39.19, "learning_rate": 4.8991519596607845e-05, "loss": 1.0938, "step": 171000 }, { "epoch": 39.19, "eval_loss": 1.4083176851272583, "eval_runtime": 8.6855, "eval_samples_per_second": 540.328, "eval_steps_per_second": 67.584, "step": 171000 }, { "epoch": 39.31, "learning_rate": 4.913476965390786e-05, "loss": 1.0938, "step": 171500 }, { "epoch": 39.31, "eval_loss": 1.409211277961731, "eval_runtime": 8.6846, "eval_samples_per_second": 540.382, "eval_steps_per_second": 67.591, "step": 171500 }, { "epoch": 39.42, "learning_rate": 4.9278019711207884e-05, "loss": 1.0998, "step": 172000 }, { "epoch": 39.42, "eval_loss": 1.4043272733688354, "eval_runtime": 8.6835, "eval_samples_per_second": 540.453, "eval_steps_per_second": 67.6, "step": 172000 }, { "epoch": 39.54, "learning_rate": 4.942126976850791e-05, "loss": 1.1057, "step": 172500 }, { "epoch": 39.54, "eval_loss": 1.39749276638031, "eval_runtime": 8.6867, "eval_samples_per_second": 540.25, "eval_steps_per_second": 67.574, "step": 172500 }, { "epoch": 39.65, "learning_rate": 4.956451982580793e-05, "loss": 1.1, "step": 173000 }, { "epoch": 39.65, "eval_loss": 1.4416342973709106, "eval_runtime": 8.6839, "eval_samples_per_second": 540.423, "eval_steps_per_second": 67.596, "step": 173000 }, { "epoch": 39.77, "learning_rate": 4.970776988310796e-05, "loss": 1.1044, "step": 173500 }, { "epoch": 39.77, "eval_loss": 1.4263495206832886, "eval_runtime": 8.6782, "eval_samples_per_second": 540.779, "eval_steps_per_second": 67.641, "step": 173500 }, { "epoch": 39.88, "learning_rate": 4.9851019940407975e-05, "loss": 1.1101, "step": 174000 }, { "epoch": 39.88, "eval_loss": 1.433109164237976, "eval_runtime": 8.6877, "eval_samples_per_second": 540.191, "eval_steps_per_second": 67.567, "step": 174000 }, { "epoch": 40.0, "learning_rate": 4.9994269997708005e-05, "loss": 1.1054, "step": 174500 }, { "epoch": 40.0, "eval_loss": 1.4334523677825928, "eval_runtime": 8.7013, "eval_samples_per_second": 539.342, "eval_steps_per_second": 67.461, "step": 174500 }, { "epoch": 40.11, "learning_rate": 4.999998847829444e-05, "loss": 1.094, "step": 175000 }, { "epoch": 40.11, "eval_loss": 1.4351298809051514, "eval_runtime": 8.6871, "eval_samples_per_second": 540.227, "eval_steps_per_second": 67.572, "step": 175000 }, { "epoch": 40.22, "learning_rate": 4.999995197290226e-05, "loss": 1.0948, "step": 175500 }, { "epoch": 40.22, "eval_loss": 1.404687762260437, "eval_runtime": 8.6835, "eval_samples_per_second": 540.447, "eval_steps_per_second": 67.599, "step": 175500 }, { "epoch": 40.34, "learning_rate": 4.9999890463856975e-05, "loss": 1.095, "step": 176000 }, { "epoch": 40.34, "eval_loss": 1.4201388359069824, "eval_runtime": 8.6858, "eval_samples_per_second": 540.308, "eval_steps_per_second": 67.582, "step": 176000 }, { "epoch": 40.45, "learning_rate": 4.9999803951220124e-05, "loss": 1.1004, "step": 176500 }, { "epoch": 40.45, "eval_loss": 1.423895239830017, "eval_runtime": 8.6827, "eval_samples_per_second": 540.5, "eval_steps_per_second": 67.606, "step": 176500 }, { "epoch": 40.57, "learning_rate": 4.999969243507822e-05, "loss": 1.103, "step": 177000 }, { "epoch": 40.57, "eval_loss": 1.422035813331604, "eval_runtime": 8.6882, "eval_samples_per_second": 540.155, "eval_steps_per_second": 67.563, "step": 177000 }, { "epoch": 40.68, "learning_rate": 4.999955591554281e-05, "loss": 1.1082, "step": 177500 }, { "epoch": 40.68, "eval_loss": 1.4087498188018799, "eval_runtime": 8.686, "eval_samples_per_second": 540.297, "eval_steps_per_second": 67.58, "step": 177500 }, { "epoch": 40.8, "learning_rate": 4.999939439275042e-05, "loss": 1.104, "step": 178000 }, { "epoch": 40.8, "eval_loss": 1.4227778911590576, "eval_runtime": 8.6869, "eval_samples_per_second": 540.236, "eval_steps_per_second": 67.573, "step": 178000 }, { "epoch": 40.91, "learning_rate": 4.9999207866862605e-05, "loss": 1.1036, "step": 178500 }, { "epoch": 40.91, "eval_loss": 1.4217792749404907, "eval_runtime": 8.6905, "eval_samples_per_second": 540.013, "eval_steps_per_second": 67.545, "step": 178500 }, { "epoch": 41.03, "learning_rate": 4.9998996338065916e-05, "loss": 1.1053, "step": 179000 }, { "epoch": 41.03, "eval_loss": 1.4466702938079834, "eval_runtime": 8.6842, "eval_samples_per_second": 540.404, "eval_steps_per_second": 67.594, "step": 179000 }, { "epoch": 41.14, "learning_rate": 4.999875980657191e-05, "loss": 1.0858, "step": 179500 }, { "epoch": 41.14, "eval_loss": 1.4107167720794678, "eval_runtime": 8.6863, "eval_samples_per_second": 540.275, "eval_steps_per_second": 67.578, "step": 179500 }, { "epoch": 41.26, "learning_rate": 4.999849827261716e-05, "loss": 1.0887, "step": 180000 }, { "epoch": 41.26, "eval_loss": 1.4146836996078491, "eval_runtime": 8.6878, "eval_samples_per_second": 540.185, "eval_steps_per_second": 67.566, "step": 180000 }, { "epoch": 41.37, "learning_rate": 4.999821173646323e-05, "loss": 1.0962, "step": 180500 }, { "epoch": 41.37, "eval_loss": 1.4210665225982666, "eval_runtime": 8.6883, "eval_samples_per_second": 540.152, "eval_steps_per_second": 67.562, "step": 180500 }, { "epoch": 41.49, "learning_rate": 4.999790019839672e-05, "loss": 1.0969, "step": 181000 }, { "epoch": 41.49, "eval_loss": 1.432451844215393, "eval_runtime": 8.6817, "eval_samples_per_second": 540.564, "eval_steps_per_second": 67.614, "step": 181000 }, { "epoch": 41.6, "learning_rate": 4.9997563658729184e-05, "loss": 1.105, "step": 181500 }, { "epoch": 41.6, "eval_loss": 1.435603380203247, "eval_runtime": 8.6848, "eval_samples_per_second": 540.367, "eval_steps_per_second": 67.589, "step": 181500 }, { "epoch": 41.71, "learning_rate": 4.9997202117797226e-05, "loss": 1.0996, "step": 182000 }, { "epoch": 41.71, "eval_loss": 1.4452011585235596, "eval_runtime": 8.6869, "eval_samples_per_second": 540.242, "eval_steps_per_second": 67.573, "step": 182000 }, { "epoch": 41.83, "learning_rate": 4.9996815575962444e-05, "loss": 1.1037, "step": 182500 }, { "epoch": 41.83, "eval_loss": 1.4333503246307373, "eval_runtime": 8.6928, "eval_samples_per_second": 539.873, "eval_steps_per_second": 67.527, "step": 182500 }, { "epoch": 41.94, "learning_rate": 4.999640403361143e-05, "loss": 1.1026, "step": 183000 }, { "epoch": 41.94, "eval_loss": 1.4372317790985107, "eval_runtime": 8.6813, "eval_samples_per_second": 540.59, "eval_steps_per_second": 67.617, "step": 183000 }, { "epoch": 42.06, "learning_rate": 4.999596749115579e-05, "loss": 1.0992, "step": 183500 }, { "epoch": 42.06, "eval_loss": 1.4432549476623535, "eval_runtime": 8.6903, "eval_samples_per_second": 540.028, "eval_steps_per_second": 67.547, "step": 183500 }, { "epoch": 42.17, "learning_rate": 4.999550594903214e-05, "loss": 1.0908, "step": 184000 }, { "epoch": 42.17, "eval_loss": 1.4407187700271606, "eval_runtime": 8.6885, "eval_samples_per_second": 540.14, "eval_steps_per_second": 67.561, "step": 184000 }, { "epoch": 42.29, "learning_rate": 4.9995019407702074e-05, "loss": 1.0895, "step": 184500 }, { "epoch": 42.29, "eval_loss": 1.4171150922775269, "eval_runtime": 8.6801, "eval_samples_per_second": 540.659, "eval_steps_per_second": 67.626, "step": 184500 }, { "epoch": 42.4, "learning_rate": 4.999450786765222e-05, "loss": 1.0933, "step": 185000 }, { "epoch": 42.4, "eval_loss": 1.4316980838775635, "eval_runtime": 8.6849, "eval_samples_per_second": 540.363, "eval_steps_per_second": 67.589, "step": 185000 }, { "epoch": 42.52, "learning_rate": 4.9993971329394176e-05, "loss": 1.0972, "step": 185500 }, { "epoch": 42.52, "eval_loss": 1.433881402015686, "eval_runtime": 8.686, "eval_samples_per_second": 540.296, "eval_steps_per_second": 67.58, "step": 185500 }, { "epoch": 42.63, "learning_rate": 4.999340979346458e-05, "loss": 1.093, "step": 186000 }, { "epoch": 42.63, "eval_loss": 1.4354099035263062, "eval_runtime": 8.6952, "eval_samples_per_second": 539.722, "eval_steps_per_second": 67.508, "step": 186000 }, { "epoch": 42.75, "learning_rate": 4.999282326042504e-05, "loss": 1.1005, "step": 186500 }, { "epoch": 42.75, "eval_loss": 1.438645362854004, "eval_runtime": 8.6832, "eval_samples_per_second": 540.468, "eval_steps_per_second": 67.602, "step": 186500 }, { "epoch": 42.86, "learning_rate": 4.999221173086218e-05, "loss": 1.1039, "step": 187000 }, { "epoch": 42.86, "eval_loss": 1.425053358078003, "eval_runtime": 8.6805, "eval_samples_per_second": 540.636, "eval_steps_per_second": 67.623, "step": 187000 }, { "epoch": 42.97, "learning_rate": 4.999157520538761e-05, "loss": 1.1021, "step": 187500 }, { "epoch": 42.97, "eval_loss": 1.4326058626174927, "eval_runtime": 8.6895, "eval_samples_per_second": 540.076, "eval_steps_per_second": 67.553, "step": 187500 }, { "epoch": 43.09, "learning_rate": 4.9990913684637966e-05, "loss": 1.088, "step": 188000 }, { "epoch": 43.09, "eval_loss": 1.4348019361495972, "eval_runtime": 8.6879, "eval_samples_per_second": 540.177, "eval_steps_per_second": 67.565, "step": 188000 }, { "epoch": 43.2, "learning_rate": 4.999022716927485e-05, "loss": 1.0796, "step": 188500 }, { "epoch": 43.2, "eval_loss": 1.4338518381118774, "eval_runtime": 8.6896, "eval_samples_per_second": 540.07, "eval_steps_per_second": 67.552, "step": 188500 }, { "epoch": 43.32, "learning_rate": 4.99895156599849e-05, "loss": 1.0928, "step": 189000 }, { "epoch": 43.32, "eval_loss": 1.430570363998413, "eval_runtime": 8.6836, "eval_samples_per_second": 540.443, "eval_steps_per_second": 67.599, "step": 189000 }, { "epoch": 43.43, "learning_rate": 4.99887791574797e-05, "loss": 1.0883, "step": 189500 }, { "epoch": 43.43, "eval_loss": 1.4227776527404785, "eval_runtime": 8.6811, "eval_samples_per_second": 540.597, "eval_steps_per_second": 67.618, "step": 189500 }, { "epoch": 43.55, "learning_rate": 4.998801766249589e-05, "loss": 1.0901, "step": 190000 }, { "epoch": 43.55, "eval_loss": 1.4285221099853516, "eval_runtime": 8.6803, "eval_samples_per_second": 540.652, "eval_steps_per_second": 67.625, "step": 190000 }, { "epoch": 43.66, "learning_rate": 4.998723117579507e-05, "loss": 1.0958, "step": 190500 }, { "epoch": 43.66, "eval_loss": 1.422059416770935, "eval_runtime": 8.6809, "eval_samples_per_second": 540.614, "eval_steps_per_second": 67.62, "step": 190500 }, { "epoch": 43.78, "learning_rate": 4.9986419698163835e-05, "loss": 1.0934, "step": 191000 }, { "epoch": 43.78, "eval_loss": 1.434030294418335, "eval_runtime": 8.6857, "eval_samples_per_second": 540.315, "eval_steps_per_second": 67.583, "step": 191000 }, { "epoch": 43.89, "learning_rate": 4.998558323041379e-05, "loss": 1.0866, "step": 191500 }, { "epoch": 43.89, "eval_loss": 1.4399570226669312, "eval_runtime": 8.6837, "eval_samples_per_second": 540.436, "eval_steps_per_second": 67.598, "step": 191500 }, { "epoch": 44.01, "learning_rate": 4.998472177338153e-05, "loss": 1.1007, "step": 192000 }, { "epoch": 44.01, "eval_loss": 1.4674826860427856, "eval_runtime": 8.6835, "eval_samples_per_second": 540.451, "eval_steps_per_second": 67.6, "step": 192000 }, { "epoch": 44.12, "learning_rate": 4.9983835327928626e-05, "loss": 1.079, "step": 192500 }, { "epoch": 44.12, "eval_loss": 1.417790412902832, "eval_runtime": 8.6778, "eval_samples_per_second": 540.805, "eval_steps_per_second": 67.644, "step": 192500 }, { "epoch": 44.24, "learning_rate": 4.998292389494166e-05, "loss": 1.0867, "step": 193000 }, { "epoch": 44.24, "eval_loss": 1.4430800676345825, "eval_runtime": 8.6779, "eval_samples_per_second": 540.8, "eval_steps_per_second": 67.643, "step": 193000 }, { "epoch": 44.35, "learning_rate": 4.998198747533222e-05, "loss": 1.0829, "step": 193500 }, { "epoch": 44.35, "eval_loss": 1.4461450576782227, "eval_runtime": 8.6814, "eval_samples_per_second": 540.58, "eval_steps_per_second": 67.616, "step": 193500 }, { "epoch": 44.46, "learning_rate": 4.998102607003683e-05, "loss": 1.0838, "step": 194000 }, { "epoch": 44.46, "eval_loss": 1.4499789476394653, "eval_runtime": 8.6934, "eval_samples_per_second": 539.836, "eval_steps_per_second": 67.523, "step": 194000 }, { "epoch": 44.58, "learning_rate": 4.998003968001706e-05, "loss": 1.0852, "step": 194500 }, { "epoch": 44.58, "eval_loss": 1.4275366067886353, "eval_runtime": 8.681, "eval_samples_per_second": 540.603, "eval_steps_per_second": 67.619, "step": 194500 }, { "epoch": 44.69, "learning_rate": 4.997902830625943e-05, "loss": 1.0947, "step": 195000 }, { "epoch": 44.69, "eval_loss": 1.4414173364639282, "eval_runtime": 8.6822, "eval_samples_per_second": 540.534, "eval_steps_per_second": 67.61, "step": 195000 }, { "epoch": 44.81, "learning_rate": 4.997799194977549e-05, "loss": 1.0932, "step": 195500 }, { "epoch": 44.81, "eval_loss": 1.4369679689407349, "eval_runtime": 8.6776, "eval_samples_per_second": 540.819, "eval_steps_per_second": 67.646, "step": 195500 }, { "epoch": 44.92, "learning_rate": 4.997693061160173e-05, "loss": 1.0876, "step": 196000 }, { "epoch": 44.92, "eval_loss": 1.4421695470809937, "eval_runtime": 8.6786, "eval_samples_per_second": 540.753, "eval_steps_per_second": 67.637, "step": 196000 }, { "epoch": 45.04, "learning_rate": 4.997584429279964e-05, "loss": 1.0922, "step": 196500 }, { "epoch": 45.04, "eval_loss": 1.4423967599868774, "eval_runtime": 8.6795, "eval_samples_per_second": 540.699, "eval_steps_per_second": 67.631, "step": 196500 }, { "epoch": 45.15, "learning_rate": 4.997473299445573e-05, "loss": 1.0763, "step": 197000 }, { "epoch": 45.15, "eval_loss": 1.4209808111190796, "eval_runtime": 8.6832, "eval_samples_per_second": 540.467, "eval_steps_per_second": 67.602, "step": 197000 }, { "epoch": 45.27, "learning_rate": 4.997359671768143e-05, "loss": 1.0892, "step": 197500 }, { "epoch": 45.27, "eval_loss": 1.4599201679229736, "eval_runtime": 8.6824, "eval_samples_per_second": 540.521, "eval_steps_per_second": 67.608, "step": 197500 }, { "epoch": 45.38, "learning_rate": 4.997243546361319e-05, "loss": 1.0874, "step": 198000 }, { "epoch": 45.38, "eval_loss": 1.4576014280319214, "eval_runtime": 8.6757, "eval_samples_per_second": 540.937, "eval_steps_per_second": 67.66, "step": 198000 }, { "epoch": 45.5, "learning_rate": 4.997124923341245e-05, "loss": 1.078, "step": 198500 }, { "epoch": 45.5, "eval_loss": 1.4486885070800781, "eval_runtime": 8.6789, "eval_samples_per_second": 540.738, "eval_steps_per_second": 67.635, "step": 198500 }, { "epoch": 45.61, "learning_rate": 4.9970038028265606e-05, "loss": 1.0847, "step": 199000 }, { "epoch": 45.61, "eval_loss": 1.459747076034546, "eval_runtime": 8.6844, "eval_samples_per_second": 540.397, "eval_steps_per_second": 67.593, "step": 199000 }, { "epoch": 45.73, "learning_rate": 4.996880184938405e-05, "loss": 1.0925, "step": 199500 }, { "epoch": 45.73, "eval_loss": 1.44039785861969, "eval_runtime": 8.6856, "eval_samples_per_second": 540.321, "eval_steps_per_second": 67.583, "step": 199500 }, { "epoch": 45.84, "learning_rate": 4.9967540698004136e-05, "loss": 1.0781, "step": 200000 }, { "epoch": 45.84, "eval_loss": 1.4302361011505127, "eval_runtime": 8.684, "eval_samples_per_second": 540.42, "eval_steps_per_second": 67.596, "step": 200000 }, { "epoch": 45.95, "learning_rate": 4.996625457538721e-05, "loss": 1.0893, "step": 200500 }, { "epoch": 45.95, "eval_loss": 1.4594749212265015, "eval_runtime": 8.6778, "eval_samples_per_second": 540.804, "eval_steps_per_second": 67.644, "step": 200500 }, { "epoch": 46.07, "learning_rate": 4.996494348281957e-05, "loss": 1.0833, "step": 201000 }, { "epoch": 46.07, "eval_loss": 1.4473764896392822, "eval_runtime": 8.683, "eval_samples_per_second": 540.479, "eval_steps_per_second": 67.603, "step": 201000 }, { "epoch": 46.18, "learning_rate": 4.9963607421612516e-05, "loss": 1.0746, "step": 201500 }, { "epoch": 46.18, "eval_loss": 1.4554041624069214, "eval_runtime": 8.6783, "eval_samples_per_second": 540.771, "eval_steps_per_second": 67.64, "step": 201500 }, { "epoch": 46.3, "learning_rate": 4.9962246393102306e-05, "loss": 1.0776, "step": 202000 }, { "epoch": 46.3, "eval_loss": 1.454910159111023, "eval_runtime": 8.6952, "eval_samples_per_second": 539.726, "eval_steps_per_second": 67.509, "step": 202000 }, { "epoch": 46.41, "learning_rate": 4.996086039865017e-05, "loss": 1.0852, "step": 202500 }, { "epoch": 46.41, "eval_loss": 1.4584496021270752, "eval_runtime": 8.6844, "eval_samples_per_second": 540.395, "eval_steps_per_second": 67.593, "step": 202500 }, { "epoch": 46.53, "learning_rate": 4.99594494396423e-05, "loss": 1.0817, "step": 203000 }, { "epoch": 46.53, "eval_loss": 1.4448548555374146, "eval_runtime": 8.6792, "eval_samples_per_second": 540.719, "eval_steps_per_second": 67.633, "step": 203000 }, { "epoch": 46.64, "learning_rate": 4.9958013517489874e-05, "loss": 1.0846, "step": 203500 }, { "epoch": 46.64, "eval_loss": 1.4689854383468628, "eval_runtime": 8.6775, "eval_samples_per_second": 540.825, "eval_steps_per_second": 67.646, "step": 203500 }, { "epoch": 46.76, "learning_rate": 4.9956552633629024e-05, "loss": 1.089, "step": 204000 }, { "epoch": 46.76, "eval_loss": 1.449251413345337, "eval_runtime": 8.6829, "eval_samples_per_second": 540.49, "eval_steps_per_second": 67.604, "step": 204000 }, { "epoch": 46.87, "learning_rate": 4.9955066789520846e-05, "loss": 1.0841, "step": 204500 }, { "epoch": 46.87, "eval_loss": 1.457749605178833, "eval_runtime": 8.6833, "eval_samples_per_second": 540.465, "eval_steps_per_second": 67.601, "step": 204500 }, { "epoch": 46.99, "learning_rate": 4.99535559866514e-05, "loss": 1.092, "step": 205000 }, { "epoch": 46.99, "eval_loss": 1.4333823919296265, "eval_runtime": 8.688, "eval_samples_per_second": 540.17, "eval_steps_per_second": 67.564, "step": 205000 }, { "epoch": 47.1, "learning_rate": 4.9952020226531725e-05, "loss": 1.0685, "step": 205500 }, { "epoch": 47.1, "eval_loss": 1.4558241367340088, "eval_runtime": 8.6924, "eval_samples_per_second": 539.894, "eval_steps_per_second": 67.53, "step": 205500 }, { "epoch": 47.22, "learning_rate": 4.99504595106978e-05, "loss": 1.0785, "step": 206000 }, { "epoch": 47.22, "eval_loss": 1.4453771114349365, "eval_runtime": 8.6813, "eval_samples_per_second": 540.585, "eval_steps_per_second": 67.616, "step": 206000 }, { "epoch": 47.33, "learning_rate": 4.9948873840710576e-05, "loss": 1.0774, "step": 206500 }, { "epoch": 47.33, "eval_loss": 1.4458664655685425, "eval_runtime": 8.679, "eval_samples_per_second": 540.729, "eval_steps_per_second": 67.634, "step": 206500 }, { "epoch": 47.44, "learning_rate": 4.994726321815596e-05, "loss": 1.0813, "step": 207000 }, { "epoch": 47.44, "eval_loss": 1.463316798210144, "eval_runtime": 8.6832, "eval_samples_per_second": 540.471, "eval_steps_per_second": 67.602, "step": 207000 }, { "epoch": 47.56, "learning_rate": 4.99456276446448e-05, "loss": 1.0754, "step": 207500 }, { "epoch": 47.56, "eval_loss": 1.4418036937713623, "eval_runtime": 8.681, "eval_samples_per_second": 540.606, "eval_steps_per_second": 67.619, "step": 207500 }, { "epoch": 47.67, "learning_rate": 4.994396712181293e-05, "loss": 1.0809, "step": 208000 }, { "epoch": 47.67, "eval_loss": 1.4552536010742188, "eval_runtime": 8.6862, "eval_samples_per_second": 540.284, "eval_steps_per_second": 67.579, "step": 208000 }, { "epoch": 47.79, "learning_rate": 4.99422816513211e-05, "loss": 1.0833, "step": 208500 }, { "epoch": 47.79, "eval_loss": 1.4602766036987305, "eval_runtime": 8.6832, "eval_samples_per_second": 540.466, "eval_steps_per_second": 67.601, "step": 208500 }, { "epoch": 47.9, "learning_rate": 4.9940571234855045e-05, "loss": 1.0777, "step": 209000 }, { "epoch": 47.9, "eval_loss": 1.4406481981277466, "eval_runtime": 8.6833, "eval_samples_per_second": 540.462, "eval_steps_per_second": 67.601, "step": 209000 }, { "epoch": 48.02, "learning_rate": 4.993883587412543e-05, "loss": 1.0854, "step": 209500 }, { "epoch": 48.02, "eval_loss": 1.4569498300552368, "eval_runtime": 8.687, "eval_samples_per_second": 540.23, "eval_steps_per_second": 67.572, "step": 209500 }, { "epoch": 48.13, "learning_rate": 4.993707557086786e-05, "loss": 1.0714, "step": 210000 }, { "epoch": 48.13, "eval_loss": 1.4461029767990112, "eval_runtime": 8.6832, "eval_samples_per_second": 540.467, "eval_steps_per_second": 67.602, "step": 210000 }, { "epoch": 48.25, "learning_rate": 4.9935290326842925e-05, "loss": 1.0641, "step": 210500 }, { "epoch": 48.25, "eval_loss": 1.4587260484695435, "eval_runtime": 8.6765, "eval_samples_per_second": 540.888, "eval_steps_per_second": 67.654, "step": 210500 }, { "epoch": 48.36, "learning_rate": 4.993348014383611e-05, "loss": 1.0743, "step": 211000 }, { "epoch": 48.36, "eval_loss": 1.4684042930603027, "eval_runtime": 8.6894, "eval_samples_per_second": 540.085, "eval_steps_per_second": 67.554, "step": 211000 }, { "epoch": 48.48, "learning_rate": 4.993164502365788e-05, "loss": 1.0736, "step": 211500 }, { "epoch": 48.48, "eval_loss": 1.4414480924606323, "eval_runtime": 8.6744, "eval_samples_per_second": 541.019, "eval_steps_per_second": 67.671, "step": 211500 }, { "epoch": 48.59, "learning_rate": 4.992978496814362e-05, "loss": 1.0789, "step": 212000 }, { "epoch": 48.59, "eval_loss": 1.4585165977478027, "eval_runtime": 8.679, "eval_samples_per_second": 540.733, "eval_steps_per_second": 67.635, "step": 212000 }, { "epoch": 48.7, "learning_rate": 4.992789997915367e-05, "loss": 1.0781, "step": 212500 }, { "epoch": 48.7, "eval_loss": 1.457632064819336, "eval_runtime": 8.6855, "eval_samples_per_second": 540.327, "eval_steps_per_second": 67.584, "step": 212500 }, { "epoch": 48.82, "learning_rate": 4.992599005857328e-05, "loss": 1.0801, "step": 213000 }, { "epoch": 48.82, "eval_loss": 1.4507173299789429, "eval_runtime": 8.6821, "eval_samples_per_second": 540.536, "eval_steps_per_second": 67.61, "step": 213000 }, { "epoch": 48.93, "learning_rate": 4.992405520831267e-05, "loss": 1.0793, "step": 213500 }, { "epoch": 48.93, "eval_loss": 1.4515572786331177, "eval_runtime": 8.6891, "eval_samples_per_second": 540.102, "eval_steps_per_second": 67.556, "step": 213500 }, { "epoch": 49.05, "learning_rate": 4.992209543030696e-05, "loss": 1.0787, "step": 214000 }, { "epoch": 49.05, "eval_loss": 1.4577687978744507, "eval_runtime": 8.6829, "eval_samples_per_second": 540.485, "eval_steps_per_second": 67.604, "step": 214000 }, { "epoch": 49.16, "learning_rate": 4.992011072651624e-05, "loss": 1.0681, "step": 214500 }, { "epoch": 49.16, "eval_loss": 1.4672496318817139, "eval_runtime": 8.6826, "eval_samples_per_second": 540.509, "eval_steps_per_second": 67.607, "step": 214500 }, { "epoch": 49.28, "learning_rate": 4.9918101098925495e-05, "loss": 1.068, "step": 215000 }, { "epoch": 49.28, "eval_loss": 1.4596153497695923, "eval_runtime": 8.6761, "eval_samples_per_second": 540.914, "eval_steps_per_second": 67.657, "step": 215000 }, { "epoch": 49.39, "learning_rate": 4.991606654954465e-05, "loss": 1.0692, "step": 215500 }, { "epoch": 49.39, "eval_loss": 1.4536336660385132, "eval_runtime": 8.6818, "eval_samples_per_second": 540.556, "eval_steps_per_second": 67.613, "step": 215500 }, { "epoch": 49.51, "learning_rate": 4.9914007080408556e-05, "loss": 1.0702, "step": 216000 }, { "epoch": 49.51, "eval_loss": 1.45850670337677, "eval_runtime": 8.6842, "eval_samples_per_second": 540.409, "eval_steps_per_second": 67.594, "step": 216000 }, { "epoch": 49.62, "learning_rate": 4.9911922693576983e-05, "loss": 1.075, "step": 216500 }, { "epoch": 49.62, "eval_loss": 1.4590942859649658, "eval_runtime": 8.6845, "eval_samples_per_second": 540.391, "eval_steps_per_second": 67.592, "step": 216500 }, { "epoch": 49.74, "learning_rate": 4.990981339113464e-05, "loss": 1.0754, "step": 217000 }, { "epoch": 49.74, "eval_loss": 1.4532920122146606, "eval_runtime": 8.6811, "eval_samples_per_second": 540.598, "eval_steps_per_second": 67.618, "step": 217000 }, { "epoch": 49.85, "learning_rate": 4.990767917519113e-05, "loss": 1.0744, "step": 217500 }, { "epoch": 49.85, "eval_loss": 1.4531233310699463, "eval_runtime": 8.68, "eval_samples_per_second": 540.671, "eval_steps_per_second": 67.627, "step": 217500 }, { "epoch": 49.97, "learning_rate": 4.9905520047881e-05, "loss": 1.0794, "step": 218000 }, { "epoch": 49.97, "eval_loss": 1.4552360773086548, "eval_runtime": 8.6825, "eval_samples_per_second": 540.514, "eval_steps_per_second": 67.607, "step": 218000 }, { "epoch": 50.08, "learning_rate": 4.9903336011363664e-05, "loss": 1.0693, "step": 218500 }, { "epoch": 50.08, "eval_loss": 1.4694995880126953, "eval_runtime": 8.6862, "eval_samples_per_second": 540.281, "eval_steps_per_second": 67.578, "step": 218500 }, { "epoch": 50.19, "learning_rate": 4.990112706782352e-05, "loss": 1.0674, "step": 219000 }, { "epoch": 50.19, "eval_loss": 1.4630742073059082, "eval_runtime": 8.6848, "eval_samples_per_second": 540.369, "eval_steps_per_second": 67.589, "step": 219000 }, { "epoch": 50.31, "learning_rate": 4.989889321946981e-05, "loss": 1.0678, "step": 219500 }, { "epoch": 50.31, "eval_loss": 1.482413649559021, "eval_runtime": 8.6822, "eval_samples_per_second": 540.53, "eval_steps_per_second": 67.609, "step": 219500 }, { "epoch": 50.42, "learning_rate": 4.989663446853673e-05, "loss": 1.0703, "step": 220000 }, { "epoch": 50.42, "eval_loss": 1.479426383972168, "eval_runtime": 8.6889, "eval_samples_per_second": 540.116, "eval_steps_per_second": 67.558, "step": 220000 }, { "epoch": 50.54, "learning_rate": 4.989435081728335e-05, "loss": 1.0681, "step": 220500 }, { "epoch": 50.54, "eval_loss": 1.4596670866012573, "eval_runtime": 8.685, "eval_samples_per_second": 540.354, "eval_steps_per_second": 67.587, "step": 220500 }, { "epoch": 50.65, "learning_rate": 4.989204226799368e-05, "loss": 1.0784, "step": 221000 }, { "epoch": 50.65, "eval_loss": 1.4664379358291626, "eval_runtime": 8.6819, "eval_samples_per_second": 540.548, "eval_steps_per_second": 67.612, "step": 221000 }, { "epoch": 50.77, "learning_rate": 4.98897088229766e-05, "loss": 1.0675, "step": 221500 }, { "epoch": 50.77, "eval_loss": 1.459997534751892, "eval_runtime": 8.6977, "eval_samples_per_second": 539.569, "eval_steps_per_second": 67.489, "step": 221500 }, { "epoch": 50.88, "learning_rate": 4.9887350484565895e-05, "loss": 1.0777, "step": 222000 }, { "epoch": 50.88, "eval_loss": 1.4787524938583374, "eval_runtime": 8.6845, "eval_samples_per_second": 540.386, "eval_steps_per_second": 67.591, "step": 222000 }, { "epoch": 51.0, "learning_rate": 4.988496725512027e-05, "loss": 1.0667, "step": 222500 }, { "epoch": 51.0, "eval_loss": 1.4562236070632935, "eval_runtime": 8.6862, "eval_samples_per_second": 540.284, "eval_steps_per_second": 67.579, "step": 222500 }, { "epoch": 51.11, "learning_rate": 4.988255913702329e-05, "loss": 1.0567, "step": 223000 }, { "epoch": 51.11, "eval_loss": 1.4595696926116943, "eval_runtime": 8.6848, "eval_samples_per_second": 540.368, "eval_steps_per_second": 67.589, "step": 223000 }, { "epoch": 51.23, "learning_rate": 4.988012613268342e-05, "loss": 1.0612, "step": 223500 }, { "epoch": 51.23, "eval_loss": 1.4612880945205688, "eval_runtime": 8.6854, "eval_samples_per_second": 540.334, "eval_steps_per_second": 67.585, "step": 223500 }, { "epoch": 51.34, "learning_rate": 4.987766824453406e-05, "loss": 1.0621, "step": 224000 }, { "epoch": 51.34, "eval_loss": 1.4710408449172974, "eval_runtime": 8.684, "eval_samples_per_second": 540.417, "eval_steps_per_second": 67.595, "step": 224000 }, { "epoch": 51.46, "learning_rate": 4.9875185475033436e-05, "loss": 1.0631, "step": 224500 }, { "epoch": 51.46, "eval_loss": 1.4636404514312744, "eval_runtime": 8.6842, "eval_samples_per_second": 540.409, "eval_steps_per_second": 67.594, "step": 224500 }, { "epoch": 51.57, "learning_rate": 4.9872677826664696e-05, "loss": 1.0684, "step": 225000 }, { "epoch": 51.57, "eval_loss": 1.443265676498413, "eval_runtime": 8.6819, "eval_samples_per_second": 540.551, "eval_steps_per_second": 67.612, "step": 225000 }, { "epoch": 51.68, "learning_rate": 4.987014530193584e-05, "loss": 1.0721, "step": 225500 }, { "epoch": 51.68, "eval_loss": 1.4495238065719604, "eval_runtime": 8.6869, "eval_samples_per_second": 540.241, "eval_steps_per_second": 67.573, "step": 225500 }, { "epoch": 51.8, "learning_rate": 4.986758790337979e-05, "loss": 1.0705, "step": 226000 }, { "epoch": 51.8, "eval_loss": 1.4889479875564575, "eval_runtime": 8.6873, "eval_samples_per_second": 540.216, "eval_steps_per_second": 67.57, "step": 226000 }, { "epoch": 51.91, "learning_rate": 4.986500563355432e-05, "loss": 1.0691, "step": 226500 }, { "epoch": 51.91, "eval_loss": 1.4760422706604004, "eval_runtime": 8.6885, "eval_samples_per_second": 540.142, "eval_steps_per_second": 67.561, "step": 226500 }, { "epoch": 52.03, "learning_rate": 4.986239849504207e-05, "loss": 1.0765, "step": 227000 }, { "epoch": 52.03, "eval_loss": 1.4814748764038086, "eval_runtime": 8.6876, "eval_samples_per_second": 540.197, "eval_steps_per_second": 67.568, "step": 227000 }, { "epoch": 52.14, "learning_rate": 4.9859766490450576e-05, "loss": 1.0543, "step": 227500 }, { "epoch": 52.14, "eval_loss": 1.4592996835708618, "eval_runtime": 8.6845, "eval_samples_per_second": 540.39, "eval_steps_per_second": 67.592, "step": 227500 }, { "epoch": 52.26, "learning_rate": 4.985710962241222e-05, "loss": 1.0644, "step": 228000 }, { "epoch": 52.26, "eval_loss": 1.4622164964675903, "eval_runtime": 8.6858, "eval_samples_per_second": 540.309, "eval_steps_per_second": 67.582, "step": 228000 }, { "epoch": 52.37, "learning_rate": 4.985442789358428e-05, "loss": 1.0603, "step": 228500 }, { "epoch": 52.37, "eval_loss": 1.4636534452438354, "eval_runtime": 8.6881, "eval_samples_per_second": 540.163, "eval_steps_per_second": 67.564, "step": 228500 }, { "epoch": 52.49, "learning_rate": 4.985172130664887e-05, "loss": 1.0635, "step": 229000 }, { "epoch": 52.49, "eval_loss": 1.466815710067749, "eval_runtime": 8.6822, "eval_samples_per_second": 540.532, "eval_steps_per_second": 67.61, "step": 229000 }, { "epoch": 52.6, "learning_rate": 4.984898986431299e-05, "loss": 1.0599, "step": 229500 }, { "epoch": 52.6, "eval_loss": 1.4655401706695557, "eval_runtime": 8.691, "eval_samples_per_second": 539.983, "eval_steps_per_second": 67.541, "step": 229500 }, { "epoch": 52.72, "learning_rate": 4.984623356930846e-05, "loss": 1.0656, "step": 230000 }, { "epoch": 52.72, "eval_loss": 1.4735110998153687, "eval_runtime": 8.6839, "eval_samples_per_second": 540.423, "eval_steps_per_second": 67.596, "step": 230000 }, { "epoch": 52.83, "learning_rate": 4.9843452424392014e-05, "loss": 1.0651, "step": 230500 }, { "epoch": 52.83, "eval_loss": 1.4705512523651123, "eval_runtime": 8.6845, "eval_samples_per_second": 540.389, "eval_steps_per_second": 67.592, "step": 230500 }, { "epoch": 52.95, "learning_rate": 4.98406464323452e-05, "loss": 1.0617, "step": 231000 }, { "epoch": 52.95, "eval_loss": 1.4695470333099365, "eval_runtime": 8.6833, "eval_samples_per_second": 540.462, "eval_steps_per_second": 67.601, "step": 231000 }, { "epoch": 53.06, "learning_rate": 4.9837815595974415e-05, "loss": 1.06, "step": 231500 }, { "epoch": 53.06, "eval_loss": 1.4782512187957764, "eval_runtime": 8.684, "eval_samples_per_second": 540.418, "eval_steps_per_second": 67.595, "step": 231500 }, { "epoch": 53.17, "learning_rate": 4.983495991811091e-05, "loss": 1.0551, "step": 232000 }, { "epoch": 53.17, "eval_loss": 1.4736642837524414, "eval_runtime": 8.6866, "eval_samples_per_second": 540.255, "eval_steps_per_second": 67.575, "step": 232000 }, { "epoch": 53.29, "learning_rate": 4.983207940161081e-05, "loss": 1.0608, "step": 232500 }, { "epoch": 53.29, "eval_loss": 1.483808994293213, "eval_runtime": 8.6836, "eval_samples_per_second": 540.441, "eval_steps_per_second": 67.598, "step": 232500 }, { "epoch": 53.4, "learning_rate": 4.9829174049355034e-05, "loss": 1.0581, "step": 233000 }, { "epoch": 53.4, "eval_loss": 1.4854003190994263, "eval_runtime": 8.6933, "eval_samples_per_second": 539.84, "eval_steps_per_second": 67.523, "step": 233000 }, { "epoch": 53.52, "learning_rate": 4.982624386424938e-05, "loss": 1.062, "step": 233500 }, { "epoch": 53.52, "eval_loss": 1.4689066410064697, "eval_runtime": 8.6859, "eval_samples_per_second": 540.3, "eval_steps_per_second": 67.581, "step": 233500 }, { "epoch": 53.63, "learning_rate": 4.982328884922446e-05, "loss": 1.0582, "step": 234000 }, { "epoch": 53.63, "eval_loss": 1.4774550199508667, "eval_runtime": 8.6834, "eval_samples_per_second": 540.456, "eval_steps_per_second": 67.6, "step": 234000 }, { "epoch": 53.75, "learning_rate": 4.982030900723573e-05, "loss": 1.0628, "step": 234500 }, { "epoch": 53.75, "eval_loss": 1.4727394580841064, "eval_runtime": 8.6822, "eval_samples_per_second": 540.534, "eval_steps_per_second": 67.61, "step": 234500 }, { "epoch": 53.86, "learning_rate": 4.981730434126347e-05, "loss": 1.0636, "step": 235000 }, { "epoch": 53.86, "eval_loss": 1.455463171005249, "eval_runtime": 8.6858, "eval_samples_per_second": 540.31, "eval_steps_per_second": 67.582, "step": 235000 }, { "epoch": 53.98, "learning_rate": 4.9814274854312786e-05, "loss": 1.0586, "step": 235500 }, { "epoch": 53.98, "eval_loss": 1.4719891548156738, "eval_runtime": 8.6809, "eval_samples_per_second": 540.613, "eval_steps_per_second": 67.62, "step": 235500 }, { "epoch": 54.09, "learning_rate": 4.981122054941362e-05, "loss": 1.0514, "step": 236000 }, { "epoch": 54.09, "eval_loss": 1.4879354238510132, "eval_runtime": 8.6885, "eval_samples_per_second": 540.142, "eval_steps_per_second": 67.561, "step": 236000 }, { "epoch": 54.21, "learning_rate": 4.9808141429620736e-05, "loss": 1.0506, "step": 236500 }, { "epoch": 54.21, "eval_loss": 1.4788156747817993, "eval_runtime": 8.6811, "eval_samples_per_second": 540.603, "eval_steps_per_second": 67.619, "step": 236500 }, { "epoch": 54.32, "learning_rate": 4.9805037498013695e-05, "loss": 1.0552, "step": 237000 }, { "epoch": 54.32, "eval_loss": 1.4671096801757812, "eval_runtime": 8.6852, "eval_samples_per_second": 540.343, "eval_steps_per_second": 67.586, "step": 237000 }, { "epoch": 54.43, "learning_rate": 4.9801908757696894e-05, "loss": 1.0529, "step": 237500 }, { "epoch": 54.43, "eval_loss": 1.4780473709106445, "eval_runtime": 8.6906, "eval_samples_per_second": 540.011, "eval_steps_per_second": 67.545, "step": 237500 }, { "epoch": 54.55, "learning_rate": 4.9798755211799543e-05, "loss": 1.0578, "step": 238000 }, { "epoch": 54.55, "eval_loss": 1.480262279510498, "eval_runtime": 8.6801, "eval_samples_per_second": 540.665, "eval_steps_per_second": 67.626, "step": 238000 }, { "epoch": 54.66, "learning_rate": 4.979557686347565e-05, "loss": 1.0682, "step": 238500 }, { "epoch": 54.66, "eval_loss": 1.4741836786270142, "eval_runtime": 8.6819, "eval_samples_per_second": 540.548, "eval_steps_per_second": 67.612, "step": 238500 }, { "epoch": 54.78, "learning_rate": 4.979237371590403e-05, "loss": 1.0615, "step": 239000 }, { "epoch": 54.78, "eval_loss": 1.4946365356445312, "eval_runtime": 8.6775, "eval_samples_per_second": 540.826, "eval_steps_per_second": 67.646, "step": 239000 }, { "epoch": 54.89, "learning_rate": 4.9789145772288304e-05, "loss": 1.0642, "step": 239500 }, { "epoch": 54.89, "eval_loss": 1.486363172531128, "eval_runtime": 8.6834, "eval_samples_per_second": 540.455, "eval_steps_per_second": 67.6, "step": 239500 }, { "epoch": 55.01, "learning_rate": 4.97858930358569e-05, "loss": 1.0672, "step": 240000 }, { "epoch": 55.01, "eval_loss": 1.4866067171096802, "eval_runtime": 8.6851, "eval_samples_per_second": 540.348, "eval_steps_per_second": 67.587, "step": 240000 }, { "epoch": 55.12, "learning_rate": 4.9782615509863034e-05, "loss": 1.0445, "step": 240500 }, { "epoch": 55.12, "eval_loss": 1.4574443101882935, "eval_runtime": 8.6814, "eval_samples_per_second": 540.584, "eval_steps_per_second": 67.616, "step": 240500 }, { "epoch": 55.24, "learning_rate": 4.9779313197584714e-05, "loss": 1.0452, "step": 241000 }, { "epoch": 55.24, "eval_loss": 1.4819873571395874, "eval_runtime": 8.6917, "eval_samples_per_second": 539.942, "eval_steps_per_second": 67.536, "step": 241000 }, { "epoch": 55.35, "learning_rate": 4.9775986102324745e-05, "loss": 1.0494, "step": 241500 }, { "epoch": 55.35, "eval_loss": 1.4920480251312256, "eval_runtime": 8.6797, "eval_samples_per_second": 540.688, "eval_steps_per_second": 67.629, "step": 241500 }, { "epoch": 55.47, "learning_rate": 4.977263422741072e-05, "loss": 1.0518, "step": 242000 }, { "epoch": 55.47, "eval_loss": 1.5045416355133057, "eval_runtime": 8.6818, "eval_samples_per_second": 540.558, "eval_steps_per_second": 67.613, "step": 242000 }, { "epoch": 55.58, "learning_rate": 4.9769257576194996e-05, "loss": 1.0563, "step": 242500 }, { "epoch": 55.58, "eval_loss": 1.4656827449798584, "eval_runtime": 8.6805, "eval_samples_per_second": 540.639, "eval_steps_per_second": 67.623, "step": 242500 }, { "epoch": 55.7, "learning_rate": 4.976585615205474e-05, "loss": 1.0527, "step": 243000 }, { "epoch": 55.7, "eval_loss": 1.483404278755188, "eval_runtime": 8.6856, "eval_samples_per_second": 540.32, "eval_steps_per_second": 67.583, "step": 243000 }, { "epoch": 55.81, "learning_rate": 4.9762429958391856e-05, "loss": 1.0572, "step": 243500 }, { "epoch": 55.81, "eval_loss": 1.5015789270401, "eval_runtime": 8.6875, "eval_samples_per_second": 540.203, "eval_steps_per_second": 67.569, "step": 243500 }, { "epoch": 55.92, "learning_rate": 4.975897899863308e-05, "loss": 1.0552, "step": 244000 }, { "epoch": 55.92, "eval_loss": 1.474813461303711, "eval_runtime": 8.6863, "eval_samples_per_second": 540.273, "eval_steps_per_second": 67.577, "step": 244000 }, { "epoch": 56.04, "learning_rate": 4.975550327622984e-05, "loss": 1.0569, "step": 244500 }, { "epoch": 56.04, "eval_loss": 1.4723361730575562, "eval_runtime": 8.691, "eval_samples_per_second": 539.986, "eval_steps_per_second": 67.541, "step": 244500 }, { "epoch": 56.15, "learning_rate": 4.975200279465841e-05, "loss": 1.0413, "step": 245000 }, { "epoch": 56.15, "eval_loss": 1.481834053993225, "eval_runtime": 8.6837, "eval_samples_per_second": 540.439, "eval_steps_per_second": 67.598, "step": 245000 }, { "epoch": 56.27, "learning_rate": 4.974847755741978e-05, "loss": 1.0464, "step": 245500 }, { "epoch": 56.27, "eval_loss": 1.4892394542694092, "eval_runtime": 8.6898, "eval_samples_per_second": 540.059, "eval_steps_per_second": 67.551, "step": 245500 }, { "epoch": 56.38, "learning_rate": 4.97449275680397e-05, "loss": 1.0462, "step": 246000 }, { "epoch": 56.38, "eval_loss": 1.4946812391281128, "eval_runtime": 8.6804, "eval_samples_per_second": 540.642, "eval_steps_per_second": 67.623, "step": 246000 }, { "epoch": 56.5, "learning_rate": 4.9741352830068686e-05, "loss": 1.0443, "step": 246500 }, { "epoch": 56.5, "eval_loss": 1.4905681610107422, "eval_runtime": 8.6863, "eval_samples_per_second": 540.275, "eval_steps_per_second": 67.578, "step": 246500 }, { "epoch": 56.61, "learning_rate": 4.973775334708202e-05, "loss": 1.0525, "step": 247000 }, { "epoch": 56.61, "eval_loss": 1.4830855131149292, "eval_runtime": 8.6844, "eval_samples_per_second": 540.394, "eval_steps_per_second": 67.592, "step": 247000 }, { "epoch": 56.73, "learning_rate": 4.973412912267969e-05, "loss": 1.0554, "step": 247500 }, { "epoch": 56.73, "eval_loss": 1.498544454574585, "eval_runtime": 8.6838, "eval_samples_per_second": 540.432, "eval_steps_per_second": 67.597, "step": 247500 }, { "epoch": 56.84, "learning_rate": 4.9730480160486485e-05, "loss": 1.052, "step": 248000 }, { "epoch": 56.84, "eval_loss": 1.4856237173080444, "eval_runtime": 8.6891, "eval_samples_per_second": 540.103, "eval_steps_per_second": 67.556, "step": 248000 }, { "epoch": 56.96, "learning_rate": 4.97268064641519e-05, "loss": 1.0624, "step": 248500 }, { "epoch": 56.96, "eval_loss": 1.4825369119644165, "eval_runtime": 8.6841, "eval_samples_per_second": 540.41, "eval_steps_per_second": 67.594, "step": 248500 }, { "epoch": 57.07, "learning_rate": 4.972310803735016e-05, "loss": 1.0415, "step": 249000 }, { "epoch": 57.07, "eval_loss": 1.485124111175537, "eval_runtime": 8.6935, "eval_samples_per_second": 539.828, "eval_steps_per_second": 67.522, "step": 249000 }, { "epoch": 57.19, "learning_rate": 4.9719384883780265e-05, "loss": 1.0421, "step": 249500 }, { "epoch": 57.19, "eval_loss": 1.476237416267395, "eval_runtime": 8.685, "eval_samples_per_second": 540.356, "eval_steps_per_second": 67.588, "step": 249500 }, { "epoch": 57.3, "learning_rate": 4.9715637007165895e-05, "loss": 1.0448, "step": 250000 }, { "epoch": 57.3, "eval_loss": 1.476651906967163, "eval_runtime": 8.6799, "eval_samples_per_second": 540.675, "eval_steps_per_second": 67.628, "step": 250000 }, { "epoch": 57.41, "learning_rate": 4.971186441125549e-05, "loss": 1.0442, "step": 250500 }, { "epoch": 57.41, "eval_loss": 1.4990246295928955, "eval_runtime": 8.6872, "eval_samples_per_second": 540.219, "eval_steps_per_second": 67.571, "step": 250500 }, { "epoch": 57.53, "learning_rate": 4.970806709982221e-05, "loss": 1.0469, "step": 251000 }, { "epoch": 57.53, "eval_loss": 1.4902597665786743, "eval_runtime": 8.6858, "eval_samples_per_second": 540.306, "eval_steps_per_second": 67.581, "step": 251000 }, { "epoch": 57.64, "learning_rate": 4.970424507666392e-05, "loss": 1.0459, "step": 251500 }, { "epoch": 57.64, "eval_loss": 1.496476411819458, "eval_runtime": 8.6833, "eval_samples_per_second": 540.461, "eval_steps_per_second": 67.601, "step": 251500 }, { "epoch": 57.76, "learning_rate": 4.970039834560323e-05, "loss": 1.0516, "step": 252000 }, { "epoch": 57.76, "eval_loss": 1.4626026153564453, "eval_runtime": 8.6894, "eval_samples_per_second": 540.085, "eval_steps_per_second": 67.554, "step": 252000 }, { "epoch": 57.87, "learning_rate": 4.9696526910487416e-05, "loss": 1.0547, "step": 252500 }, { "epoch": 57.87, "eval_loss": 1.4667470455169678, "eval_runtime": 8.6855, "eval_samples_per_second": 540.324, "eval_steps_per_second": 67.584, "step": 252500 }, { "epoch": 57.99, "learning_rate": 4.9692630775188495e-05, "loss": 1.0545, "step": 253000 }, { "epoch": 57.99, "eval_loss": 1.4665850400924683, "eval_runtime": 8.6876, "eval_samples_per_second": 540.198, "eval_steps_per_second": 67.568, "step": 253000 }, { "epoch": 58.1, "learning_rate": 4.9688709943603176e-05, "loss": 1.0396, "step": 253500 }, { "epoch": 58.1, "eval_loss": 1.5017927885055542, "eval_runtime": 8.6951, "eval_samples_per_second": 539.727, "eval_steps_per_second": 67.509, "step": 253500 }, { "epoch": 58.22, "learning_rate": 4.9684764419652876e-05, "loss": 1.0396, "step": 254000 }, { "epoch": 58.22, "eval_loss": 1.5073521137237549, "eval_runtime": 8.6837, "eval_samples_per_second": 540.436, "eval_steps_per_second": 67.598, "step": 254000 }, { "epoch": 58.33, "learning_rate": 4.96807942072837e-05, "loss": 1.0491, "step": 254500 }, { "epoch": 58.33, "eval_loss": 1.4912420511245728, "eval_runtime": 8.6854, "eval_samples_per_second": 540.335, "eval_steps_per_second": 67.585, "step": 254500 }, { "epoch": 58.45, "learning_rate": 4.967679931046645e-05, "loss": 1.0437, "step": 255000 }, { "epoch": 58.45, "eval_loss": 1.5059596300125122, "eval_runtime": 8.6878, "eval_samples_per_second": 540.182, "eval_steps_per_second": 67.566, "step": 255000 }, { "epoch": 58.56, "learning_rate": 4.967277973319661e-05, "loss": 1.0468, "step": 255500 }, { "epoch": 58.56, "eval_loss": 1.4774737358093262, "eval_runtime": 8.6874, "eval_samples_per_second": 540.21, "eval_steps_per_second": 67.569, "step": 255500 }, { "epoch": 58.68, "learning_rate": 4.9668735479494364e-05, "loss": 1.0491, "step": 256000 }, { "epoch": 58.68, "eval_loss": 1.4895846843719482, "eval_runtime": 8.6765, "eval_samples_per_second": 540.888, "eval_steps_per_second": 67.654, "step": 256000 }, { "epoch": 58.79, "learning_rate": 4.966466655340455e-05, "loss": 1.0532, "step": 256500 }, { "epoch": 58.79, "eval_loss": 1.5017368793487549, "eval_runtime": 8.6791, "eval_samples_per_second": 540.725, "eval_steps_per_second": 67.634, "step": 256500 }, { "epoch": 58.9, "learning_rate": 4.9660572958996697e-05, "loss": 1.0504, "step": 257000 }, { "epoch": 58.9, "eval_loss": 1.4869369268417358, "eval_runtime": 8.6842, "eval_samples_per_second": 540.408, "eval_steps_per_second": 67.594, "step": 257000 }, { "epoch": 59.02, "learning_rate": 4.965645470036502e-05, "loss": 1.0473, "step": 257500 }, { "epoch": 59.02, "eval_loss": 1.483688473701477, "eval_runtime": 8.6826, "eval_samples_per_second": 540.508, "eval_steps_per_second": 67.607, "step": 257500 }, { "epoch": 59.13, "learning_rate": 4.965231178162838e-05, "loss": 1.0378, "step": 258000 }, { "epoch": 59.13, "eval_loss": 1.4915027618408203, "eval_runtime": 8.6835, "eval_samples_per_second": 540.451, "eval_steps_per_second": 67.6, "step": 258000 }, { "epoch": 59.25, "learning_rate": 4.96481442069303e-05, "loss": 1.0413, "step": 258500 }, { "epoch": 59.25, "eval_loss": 1.4818259477615356, "eval_runtime": 8.6782, "eval_samples_per_second": 540.782, "eval_steps_per_second": 67.641, "step": 258500 }, { "epoch": 59.36, "learning_rate": 4.964395198043898e-05, "loss": 1.0429, "step": 259000 }, { "epoch": 59.36, "eval_loss": 1.483168363571167, "eval_runtime": 8.6906, "eval_samples_per_second": 540.01, "eval_steps_per_second": 67.544, "step": 259000 }, { "epoch": 59.48, "learning_rate": 4.963973510634728e-05, "loss": 1.0462, "step": 259500 }, { "epoch": 59.48, "eval_loss": 1.4895223379135132, "eval_runtime": 8.6891, "eval_samples_per_second": 540.099, "eval_steps_per_second": 67.556, "step": 259500 }, { "epoch": 59.59, "learning_rate": 4.963549358887267e-05, "loss": 1.0477, "step": 260000 }, { "epoch": 59.59, "eval_loss": 1.4745076894760132, "eval_runtime": 8.685, "eval_samples_per_second": 540.356, "eval_steps_per_second": 67.588, "step": 260000 }, { "epoch": 59.71, "learning_rate": 4.963122743225732e-05, "loss": 1.0533, "step": 260500 }, { "epoch": 59.71, "eval_loss": 1.4741053581237793, "eval_runtime": 8.6875, "eval_samples_per_second": 540.202, "eval_steps_per_second": 67.568, "step": 260500 }, { "epoch": 59.82, "learning_rate": 4.9626936640768e-05, "loss": 1.0473, "step": 261000 }, { "epoch": 59.82, "eval_loss": 1.4787662029266357, "eval_runtime": 8.6835, "eval_samples_per_second": 540.451, "eval_steps_per_second": 67.6, "step": 261000 }, { "epoch": 59.94, "learning_rate": 4.9622621218696154e-05, "loss": 1.0524, "step": 261500 }, { "epoch": 59.94, "eval_loss": 1.4879391193389893, "eval_runtime": 8.687, "eval_samples_per_second": 540.23, "eval_steps_per_second": 67.572, "step": 261500 }, { "epoch": 60.05, "learning_rate": 4.961828117035783e-05, "loss": 1.0417, "step": 262000 }, { "epoch": 60.05, "eval_loss": 1.4918880462646484, "eval_runtime": 8.686, "eval_samples_per_second": 540.294, "eval_steps_per_second": 67.58, "step": 262000 }, { "epoch": 60.17, "learning_rate": 4.9613916500093714e-05, "loss": 1.0367, "step": 262500 }, { "epoch": 60.17, "eval_loss": 1.466500997543335, "eval_runtime": 8.6785, "eval_samples_per_second": 540.764, "eval_steps_per_second": 67.639, "step": 262500 }, { "epoch": 60.28, "learning_rate": 4.960952721226914e-05, "loss": 1.0328, "step": 263000 }, { "epoch": 60.28, "eval_loss": 1.497222900390625, "eval_runtime": 8.6795, "eval_samples_per_second": 540.698, "eval_steps_per_second": 67.63, "step": 263000 }, { "epoch": 60.39, "learning_rate": 4.9605113311274036e-05, "loss": 1.0372, "step": 263500 }, { "epoch": 60.39, "eval_loss": 1.4830553531646729, "eval_runtime": 8.682, "eval_samples_per_second": 540.544, "eval_steps_per_second": 67.611, "step": 263500 }, { "epoch": 60.51, "learning_rate": 4.960067480152296e-05, "loss": 1.042, "step": 264000 }, { "epoch": 60.51, "eval_loss": 1.4864578247070312, "eval_runtime": 8.6844, "eval_samples_per_second": 540.396, "eval_steps_per_second": 67.593, "step": 264000 }, { "epoch": 60.62, "learning_rate": 4.959621168745507e-05, "loss": 1.0488, "step": 264500 }, { "epoch": 60.62, "eval_loss": 1.495725154876709, "eval_runtime": 8.6774, "eval_samples_per_second": 540.828, "eval_steps_per_second": 67.647, "step": 264500 }, { "epoch": 60.74, "learning_rate": 4.959172397353415e-05, "loss": 1.0488, "step": 265000 }, { "epoch": 60.74, "eval_loss": 1.5080246925354004, "eval_runtime": 8.699, "eval_samples_per_second": 539.485, "eval_steps_per_second": 67.479, "step": 265000 }, { "epoch": 60.85, "learning_rate": 4.958721166424858e-05, "loss": 1.0394, "step": 265500 }, { "epoch": 60.85, "eval_loss": 1.4648449420928955, "eval_runtime": 8.6825, "eval_samples_per_second": 540.51, "eval_steps_per_second": 67.607, "step": 265500 }, { "epoch": 60.97, "learning_rate": 4.9582674764111326e-05, "loss": 1.0485, "step": 266000 }, { "epoch": 60.97, "eval_loss": 1.4943435192108154, "eval_runtime": 8.6849, "eval_samples_per_second": 540.366, "eval_steps_per_second": 67.589, "step": 266000 }, { "epoch": 61.08, "learning_rate": 4.957811327765997e-05, "loss": 1.037, "step": 266500 }, { "epoch": 61.08, "eval_loss": 1.4981658458709717, "eval_runtime": 8.6799, "eval_samples_per_second": 540.677, "eval_steps_per_second": 67.628, "step": 266500 }, { "epoch": 61.2, "learning_rate": 4.9573527209456675e-05, "loss": 1.0282, "step": 267000 }, { "epoch": 61.2, "eval_loss": 1.4907140731811523, "eval_runtime": 8.7202, "eval_samples_per_second": 538.178, "eval_steps_per_second": 67.315, "step": 267000 }, { "epoch": 61.31, "learning_rate": 4.9568916564088174e-05, "loss": 1.0344, "step": 267500 }, { "epoch": 61.31, "eval_loss": 1.4855756759643555, "eval_runtime": 8.7218, "eval_samples_per_second": 538.078, "eval_steps_per_second": 67.303, "step": 267500 }, { "epoch": 61.43, "learning_rate": 4.956428134616581e-05, "loss": 1.0369, "step": 268000 }, { "epoch": 61.43, "eval_loss": 1.4881483316421509, "eval_runtime": 8.7393, "eval_samples_per_second": 537.001, "eval_steps_per_second": 67.168, "step": 268000 }, { "epoch": 61.54, "learning_rate": 4.955962156032548e-05, "loss": 1.0416, "step": 268500 }, { "epoch": 61.54, "eval_loss": 1.498850703239441, "eval_runtime": 8.7174, "eval_samples_per_second": 538.351, "eval_steps_per_second": 67.337, "step": 268500 }, { "epoch": 61.65, "learning_rate": 4.955493721122766e-05, "loss": 1.0414, "step": 269000 }, { "epoch": 61.65, "eval_loss": 1.5085817575454712, "eval_runtime": 8.7081, "eval_samples_per_second": 538.925, "eval_steps_per_second": 67.409, "step": 269000 }, { "epoch": 61.77, "learning_rate": 4.9550228303557386e-05, "loss": 1.0419, "step": 269500 }, { "epoch": 61.77, "eval_loss": 1.5040541887283325, "eval_runtime": 8.7325, "eval_samples_per_second": 537.416, "eval_steps_per_second": 67.22, "step": 269500 }, { "epoch": 61.88, "learning_rate": 4.9545494842024284e-05, "loss": 1.04, "step": 270000 }, { "epoch": 61.88, "eval_loss": 1.5131479501724243, "eval_runtime": 8.7235, "eval_samples_per_second": 537.974, "eval_steps_per_second": 67.29, "step": 270000 }, { "epoch": 62.0, "learning_rate": 4.95407368313625e-05, "loss": 1.0454, "step": 270500 }, { "epoch": 62.0, "eval_loss": 1.49935781955719, "eval_runtime": 8.7251, "eval_samples_per_second": 537.876, "eval_steps_per_second": 67.277, "step": 270500 }, { "epoch": 62.11, "learning_rate": 4.9535954276330745e-05, "loss": 1.0341, "step": 271000 }, { "epoch": 62.11, "eval_loss": 1.5197975635528564, "eval_runtime": 8.7206, "eval_samples_per_second": 538.148, "eval_steps_per_second": 67.312, "step": 271000 }, { "epoch": 62.23, "learning_rate": 4.9531147181712304e-05, "loss": 1.0315, "step": 271500 }, { "epoch": 62.23, "eval_loss": 1.4866434335708618, "eval_runtime": 8.7238, "eval_samples_per_second": 537.951, "eval_steps_per_second": 67.287, "step": 271500 }, { "epoch": 62.34, "learning_rate": 4.952631555231495e-05, "loss": 1.0336, "step": 272000 }, { "epoch": 62.34, "eval_loss": 1.5171456336975098, "eval_runtime": 8.7098, "eval_samples_per_second": 538.816, "eval_steps_per_second": 67.395, "step": 272000 }, { "epoch": 62.46, "learning_rate": 4.952145939297106e-05, "loss": 1.0292, "step": 272500 }, { "epoch": 62.46, "eval_loss": 1.5139013528823853, "eval_runtime": 8.7111, "eval_samples_per_second": 538.738, "eval_steps_per_second": 67.385, "step": 272500 }, { "epoch": 62.57, "learning_rate": 4.951657870853749e-05, "loss": 1.0443, "step": 273000 }, { "epoch": 62.57, "eval_loss": 1.4994021654129028, "eval_runtime": 8.7136, "eval_samples_per_second": 538.584, "eval_steps_per_second": 67.366, "step": 273000 }, { "epoch": 62.69, "learning_rate": 4.9511673503895653e-05, "loss": 1.037, "step": 273500 }, { "epoch": 62.69, "eval_loss": 1.4990782737731934, "eval_runtime": 8.7211, "eval_samples_per_second": 538.12, "eval_steps_per_second": 67.308, "step": 273500 }, { "epoch": 62.8, "learning_rate": 4.950674378395149e-05, "loss": 1.0469, "step": 274000 }, { "epoch": 62.8, "eval_loss": 1.4761539697647095, "eval_runtime": 8.9452, "eval_samples_per_second": 524.641, "eval_steps_per_second": 65.622, "step": 274000 }, { "epoch": 62.92, "learning_rate": 4.9501789553635436e-05, "loss": 1.0472, "step": 274500 }, { "epoch": 62.92, "eval_loss": 1.4913647174835205, "eval_runtime": 8.7346, "eval_samples_per_second": 537.289, "eval_steps_per_second": 67.204, "step": 274500 }, { "epoch": 63.03, "learning_rate": 4.949681081790247e-05, "loss": 1.0416, "step": 275000 }, { "epoch": 63.03, "eval_loss": 1.5266327857971191, "eval_runtime": 8.73, "eval_samples_per_second": 537.57, "eval_steps_per_second": 67.239, "step": 275000 }, { "epoch": 63.14, "learning_rate": 4.9491807581732056e-05, "loss": 1.0283, "step": 275500 }, { "epoch": 63.14, "eval_loss": 1.5132968425750732, "eval_runtime": 8.7354, "eval_samples_per_second": 537.238, "eval_steps_per_second": 67.198, "step": 275500 }, { "epoch": 63.26, "learning_rate": 4.948677985012816e-05, "loss": 1.0321, "step": 276000 }, { "epoch": 63.26, "eval_loss": 1.5040504932403564, "eval_runtime": 8.7237, "eval_samples_per_second": 537.958, "eval_steps_per_second": 67.288, "step": 276000 }, { "epoch": 63.37, "learning_rate": 4.948172762811928e-05, "loss": 1.03, "step": 276500 }, { "epoch": 63.37, "eval_loss": 1.4970957040786743, "eval_runtime": 8.7245, "eval_samples_per_second": 537.91, "eval_steps_per_second": 67.282, "step": 276500 }, { "epoch": 63.49, "learning_rate": 4.947665092075837e-05, "loss": 1.0334, "step": 277000 }, { "epoch": 63.49, "eval_loss": 1.4944591522216797, "eval_runtime": 8.7256, "eval_samples_per_second": 537.84, "eval_steps_per_second": 67.273, "step": 277000 }, { "epoch": 63.6, "learning_rate": 4.9471549733122896e-05, "loss": 1.0339, "step": 277500 }, { "epoch": 63.6, "eval_loss": 1.4970756769180298, "eval_runtime": 8.7196, "eval_samples_per_second": 538.214, "eval_steps_per_second": 67.32, "step": 277500 }, { "epoch": 63.72, "learning_rate": 4.94664240703148e-05, "loss": 1.0379, "step": 278000 }, { "epoch": 63.72, "eval_loss": 1.4899705648422241, "eval_runtime": 8.7238, "eval_samples_per_second": 537.956, "eval_steps_per_second": 67.287, "step": 278000 }, { "epoch": 63.83, "learning_rate": 4.94612739374605e-05, "loss": 1.039, "step": 278500 }, { "epoch": 63.83, "eval_loss": 1.5068836212158203, "eval_runtime": 8.717, "eval_samples_per_second": 538.371, "eval_steps_per_second": 67.339, "step": 278500 }, { "epoch": 63.95, "learning_rate": 4.945609933971089e-05, "loss": 1.0378, "step": 279000 }, { "epoch": 63.95, "eval_loss": 1.50840425491333, "eval_runtime": 8.716, "eval_samples_per_second": 538.438, "eval_steps_per_second": 67.348, "step": 279000 }, { "epoch": 64.06, "learning_rate": 4.9450900282241344e-05, "loss": 1.0301, "step": 279500 }, { "epoch": 64.06, "eval_loss": 1.5302428007125854, "eval_runtime": 8.7137, "eval_samples_per_second": 538.578, "eval_steps_per_second": 67.365, "step": 279500 }, { "epoch": 64.18, "learning_rate": 4.944567677025168e-05, "loss": 1.0304, "step": 280000 }, { "epoch": 64.18, "eval_loss": 1.4791302680969238, "eval_runtime": 8.7276, "eval_samples_per_second": 537.717, "eval_steps_per_second": 67.258, "step": 280000 }, { "epoch": 64.29, "learning_rate": 4.9440428808966194e-05, "loss": 1.0229, "step": 280500 }, { "epoch": 64.29, "eval_loss": 1.499395489692688, "eval_runtime": 8.7259, "eval_samples_per_second": 537.825, "eval_steps_per_second": 67.271, "step": 280500 }, { "epoch": 64.41, "learning_rate": 4.943515640363361e-05, "loss": 1.0327, "step": 281000 }, { "epoch": 64.41, "eval_loss": 1.5016289949417114, "eval_runtime": 8.7215, "eval_samples_per_second": 538.097, "eval_steps_per_second": 67.305, "step": 281000 }, { "epoch": 64.52, "learning_rate": 4.942985955952712e-05, "loss": 1.0232, "step": 281500 }, { "epoch": 64.52, "eval_loss": 1.519260287284851, "eval_runtime": 8.7254, "eval_samples_per_second": 537.853, "eval_steps_per_second": 67.275, "step": 281500 }, { "epoch": 64.63, "learning_rate": 4.9424538281944354e-05, "loss": 1.0364, "step": 282000 }, { "epoch": 64.63, "eval_loss": 1.5095731019973755, "eval_runtime": 8.7284, "eval_samples_per_second": 537.673, "eval_steps_per_second": 67.252, "step": 282000 }, { "epoch": 64.75, "learning_rate": 4.941919257620738e-05, "loss": 1.0329, "step": 282500 }, { "epoch": 64.75, "eval_loss": 1.4907504320144653, "eval_runtime": 8.6585, "eval_samples_per_second": 542.008, "eval_steps_per_second": 67.794, "step": 282500 }, { "epoch": 64.86, "learning_rate": 4.9413822447662684e-05, "loss": 1.0395, "step": 283000 }, { "epoch": 64.86, "eval_loss": 1.4973968267440796, "eval_runtime": 8.7428, "eval_samples_per_second": 536.787, "eval_steps_per_second": 67.141, "step": 283000 }, { "epoch": 64.98, "learning_rate": 4.9408427901681196e-05, "loss": 1.043, "step": 283500 }, { "epoch": 64.98, "eval_loss": 1.527360439300537, "eval_runtime": 8.7319, "eval_samples_per_second": 537.456, "eval_steps_per_second": 67.225, "step": 283500 }, { "epoch": 65.09, "learning_rate": 4.940300894365827e-05, "loss": 1.0183, "step": 284000 }, { "epoch": 65.09, "eval_loss": 1.4810073375701904, "eval_runtime": 8.7362, "eval_samples_per_second": 537.191, "eval_steps_per_second": 67.192, "step": 284000 }, { "epoch": 65.21, "learning_rate": 4.939756557901365e-05, "loss": 1.0214, "step": 284500 }, { "epoch": 65.21, "eval_loss": 1.519277811050415, "eval_runtime": 8.7337, "eval_samples_per_second": 537.341, "eval_steps_per_second": 67.211, "step": 284500 }, { "epoch": 65.32, "learning_rate": 4.939209781319152e-05, "loss": 1.0275, "step": 285000 }, { "epoch": 65.32, "eval_loss": 1.5169512033462524, "eval_runtime": 8.7227, "eval_samples_per_second": 538.022, "eval_steps_per_second": 67.296, "step": 285000 }, { "epoch": 65.44, "learning_rate": 4.938660565166044e-05, "loss": 1.0305, "step": 285500 }, { "epoch": 65.44, "eval_loss": 1.5163440704345703, "eval_runtime": 8.7471, "eval_samples_per_second": 536.522, "eval_steps_per_second": 67.108, "step": 285500 }, { "epoch": 65.55, "learning_rate": 4.93810890999134e-05, "loss": 1.0289, "step": 286000 }, { "epoch": 65.55, "eval_loss": 1.5123355388641357, "eval_runtime": 8.7287, "eval_samples_per_second": 537.654, "eval_steps_per_second": 67.25, "step": 286000 }, { "epoch": 65.67, "learning_rate": 4.9375548163467763e-05, "loss": 1.0345, "step": 286500 }, { "epoch": 65.67, "eval_loss": 1.5163205862045288, "eval_runtime": 8.7289, "eval_samples_per_second": 537.639, "eval_steps_per_second": 67.248, "step": 286500 }, { "epoch": 65.78, "learning_rate": 4.936998284786528e-05, "loss": 1.0249, "step": 287000 }, { "epoch": 65.78, "eval_loss": 1.4969570636749268, "eval_runtime": 8.7429, "eval_samples_per_second": 536.781, "eval_steps_per_second": 67.14, "step": 287000 }, { "epoch": 65.89, "learning_rate": 4.93643931586721e-05, "loss": 1.0388, "step": 287500 }, { "epoch": 65.89, "eval_loss": 1.5167715549468994, "eval_runtime": 8.7456, "eval_samples_per_second": 536.611, "eval_steps_per_second": 67.119, "step": 287500 }, { "epoch": 66.01, "learning_rate": 4.9358779101478737e-05, "loss": 1.0301, "step": 288000 }, { "epoch": 66.01, "eval_loss": 1.4879167079925537, "eval_runtime": 8.7337, "eval_samples_per_second": 537.342, "eval_steps_per_second": 67.211, "step": 288000 }, { "epoch": 66.12, "learning_rate": 4.9353140681900076e-05, "loss": 1.0209, "step": 288500 }, { "epoch": 66.12, "eval_loss": 1.5130021572113037, "eval_runtime": 8.7292, "eval_samples_per_second": 537.622, "eval_steps_per_second": 67.246, "step": 288500 }, { "epoch": 66.24, "learning_rate": 4.934747790557537e-05, "loss": 1.0109, "step": 289000 }, { "epoch": 66.24, "eval_loss": 1.5301724672317505, "eval_runtime": 8.7346, "eval_samples_per_second": 537.286, "eval_steps_per_second": 67.204, "step": 289000 }, { "epoch": 66.35, "learning_rate": 4.934179077816824e-05, "loss": 1.0229, "step": 289500 }, { "epoch": 66.35, "eval_loss": 1.5247511863708496, "eval_runtime": 8.7283, "eval_samples_per_second": 537.675, "eval_steps_per_second": 67.252, "step": 289500 }, { "epoch": 66.47, "learning_rate": 4.933607930536665e-05, "loss": 1.0232, "step": 290000 }, { "epoch": 66.47, "eval_loss": 1.4976083040237427, "eval_runtime": 8.7256, "eval_samples_per_second": 537.845, "eval_steps_per_second": 67.274, "step": 290000 }, { "epoch": 66.58, "learning_rate": 4.9330343492882924e-05, "loss": 1.0235, "step": 290500 }, { "epoch": 66.58, "eval_loss": 1.515222191810608, "eval_runtime": 8.7419, "eval_samples_per_second": 536.837, "eval_steps_per_second": 67.148, "step": 290500 }, { "epoch": 66.7, "learning_rate": 4.932458334645373e-05, "loss": 1.0269, "step": 291000 }, { "epoch": 66.7, "eval_loss": 1.5157530307769775, "eval_runtime": 8.7313, "eval_samples_per_second": 537.491, "eval_steps_per_second": 67.229, "step": 291000 }, { "epoch": 66.81, "learning_rate": 4.9318798871840036e-05, "loss": 1.0332, "step": 291500 }, { "epoch": 66.81, "eval_loss": 1.504539966583252, "eval_runtime": 8.7464, "eval_samples_per_second": 536.565, "eval_steps_per_second": 67.113, "step": 291500 }, { "epoch": 66.93, "learning_rate": 4.93129900748272e-05, "loss": 1.0229, "step": 292000 }, { "epoch": 66.93, "eval_loss": 1.5006330013275146, "eval_runtime": 8.7291, "eval_samples_per_second": 537.63, "eval_steps_per_second": 67.247, "step": 292000 }, { "epoch": 67.04, "learning_rate": 4.930715696122489e-05, "loss": 1.0286, "step": 292500 }, { "epoch": 67.04, "eval_loss": 1.5082921981811523, "eval_runtime": 8.7375, "eval_samples_per_second": 537.107, "eval_steps_per_second": 67.181, "step": 292500 }, { "epoch": 67.16, "learning_rate": 4.930129953686705e-05, "loss": 1.0192, "step": 293000 }, { "epoch": 67.16, "eval_loss": 1.5427532196044922, "eval_runtime": 8.7318, "eval_samples_per_second": 537.461, "eval_steps_per_second": 67.226, "step": 293000 }, { "epoch": 67.27, "learning_rate": 4.929541780761199e-05, "loss": 1.0202, "step": 293500 }, { "epoch": 67.27, "eval_loss": 1.507314682006836, "eval_runtime": 8.7296, "eval_samples_per_second": 537.594, "eval_steps_per_second": 67.242, "step": 293500 }, { "epoch": 67.38, "learning_rate": 4.928951177934231e-05, "loss": 1.0189, "step": 294000 }, { "epoch": 67.38, "eval_loss": 1.5235850811004639, "eval_runtime": 8.7448, "eval_samples_per_second": 536.662, "eval_steps_per_second": 67.126, "step": 294000 }, { "epoch": 67.5, "learning_rate": 4.928358145796491e-05, "loss": 1.0254, "step": 294500 }, { "epoch": 67.5, "eval_loss": 1.5371549129486084, "eval_runtime": 8.7272, "eval_samples_per_second": 537.745, "eval_steps_per_second": 67.261, "step": 294500 }, { "epoch": 67.61, "learning_rate": 4.9277626849410984e-05, "loss": 1.0241, "step": 295000 }, { "epoch": 67.61, "eval_loss": 1.5227069854736328, "eval_runtime": 8.7454, "eval_samples_per_second": 536.623, "eval_steps_per_second": 67.121, "step": 295000 }, { "epoch": 67.73, "learning_rate": 4.9271647959636036e-05, "loss": 1.0209, "step": 295500 }, { "epoch": 67.73, "eval_loss": 1.504327416419983, "eval_runtime": 8.7276, "eval_samples_per_second": 537.718, "eval_steps_per_second": 67.258, "step": 295500 }, { "epoch": 67.84, "learning_rate": 4.9265644794619834e-05, "loss": 1.0297, "step": 296000 }, { "epoch": 67.84, "eval_loss": 1.5058881044387817, "eval_runtime": 8.7441, "eval_samples_per_second": 536.702, "eval_steps_per_second": 67.131, "step": 296000 }, { "epoch": 67.96, "learning_rate": 4.9259617360366425e-05, "loss": 1.0274, "step": 296500 }, { "epoch": 67.96, "eval_loss": 1.4919012784957886, "eval_runtime": 8.7291, "eval_samples_per_second": 537.63, "eval_steps_per_second": 67.247, "step": 296500 }, { "epoch": 68.07, "learning_rate": 4.925356566290414e-05, "loss": 1.0218, "step": 297000 }, { "epoch": 68.07, "eval_loss": 1.5248078107833862, "eval_runtime": 8.7314, "eval_samples_per_second": 537.483, "eval_steps_per_second": 67.228, "step": 297000 }, { "epoch": 68.19, "learning_rate": 4.9247489708285585e-05, "loss": 1.0127, "step": 297500 }, { "epoch": 68.19, "eval_loss": 1.5282312631607056, "eval_runtime": 8.723, "eval_samples_per_second": 538.006, "eval_steps_per_second": 67.294, "step": 297500 }, { "epoch": 68.3, "learning_rate": 4.92413895025876e-05, "loss": 1.0239, "step": 298000 }, { "epoch": 68.3, "eval_loss": 1.508757472038269, "eval_runtime": 8.7311, "eval_samples_per_second": 537.506, "eval_steps_per_second": 67.231, "step": 298000 }, { "epoch": 68.42, "learning_rate": 4.9235265051911285e-05, "loss": 1.0162, "step": 298500 }, { "epoch": 68.42, "eval_loss": 1.5122451782226562, "eval_runtime": 8.7281, "eval_samples_per_second": 537.686, "eval_steps_per_second": 67.254, "step": 298500 }, { "epoch": 68.53, "learning_rate": 4.922911636238202e-05, "loss": 1.0169, "step": 299000 }, { "epoch": 68.53, "eval_loss": 1.5228652954101562, "eval_runtime": 8.7227, "eval_samples_per_second": 538.022, "eval_steps_per_second": 67.296, "step": 299000 }, { "epoch": 68.65, "learning_rate": 4.9222943440149385e-05, "loss": 1.0172, "step": 299500 }, { "epoch": 68.65, "eval_loss": 1.5316460132598877, "eval_runtime": 8.7202, "eval_samples_per_second": 538.179, "eval_steps_per_second": 67.315, "step": 299500 }, { "epoch": 68.76, "learning_rate": 4.921674629138723e-05, "loss": 1.0297, "step": 300000 }, { "epoch": 68.76, "eval_loss": 1.514384388923645, "eval_runtime": 8.733, "eval_samples_per_second": 537.39, "eval_steps_per_second": 67.217, "step": 300000 }, { "epoch": 68.87, "learning_rate": 4.921052492229362e-05, "loss": 1.0227, "step": 300500 }, { "epoch": 68.87, "eval_loss": 1.5083518028259277, "eval_runtime": 8.7315, "eval_samples_per_second": 537.481, "eval_steps_per_second": 67.228, "step": 300500 }, { "epoch": 68.99, "learning_rate": 4.920427933909084e-05, "loss": 1.0248, "step": 301000 }, { "epoch": 68.99, "eval_loss": 1.5104906558990479, "eval_runtime": 8.7247, "eval_samples_per_second": 537.899, "eval_steps_per_second": 67.28, "step": 301000 }, { "epoch": 69.1, "learning_rate": 4.9198009548025394e-05, "loss": 1.0193, "step": 301500 }, { "epoch": 69.1, "eval_loss": 1.5191477537155151, "eval_runtime": 8.7245, "eval_samples_per_second": 537.91, "eval_steps_per_second": 67.282, "step": 301500 }, { "epoch": 69.22, "learning_rate": 4.919171555536801e-05, "loss": 1.0187, "step": 302000 }, { "epoch": 69.22, "eval_loss": 1.5113472938537598, "eval_runtime": 8.7227, "eval_samples_per_second": 538.023, "eval_steps_per_second": 67.296, "step": 302000 }, { "epoch": 69.33, "learning_rate": 4.91853973674136e-05, "loss": 1.0168, "step": 302500 }, { "epoch": 69.33, "eval_loss": 1.4888869524002075, "eval_runtime": 8.726, "eval_samples_per_second": 537.819, "eval_steps_per_second": 67.27, "step": 302500 }, { "epoch": 69.45, "learning_rate": 4.91790549904813e-05, "loss": 1.0137, "step": 303000 }, { "epoch": 69.45, "eval_loss": 1.5301419496536255, "eval_runtime": 8.721, "eval_samples_per_second": 538.125, "eval_steps_per_second": 67.309, "step": 303000 }, { "epoch": 69.56, "learning_rate": 4.917268843091443e-05, "loss": 1.0164, "step": 303500 }, { "epoch": 69.56, "eval_loss": 1.5449107885360718, "eval_runtime": 8.7298, "eval_samples_per_second": 537.586, "eval_steps_per_second": 67.241, "step": 303500 }, { "epoch": 69.68, "learning_rate": 4.916629769508048e-05, "loss": 1.0243, "step": 304000 }, { "epoch": 69.68, "eval_loss": 1.5276342630386353, "eval_runtime": 8.7249, "eval_samples_per_second": 537.887, "eval_steps_per_second": 67.279, "step": 304000 }, { "epoch": 69.79, "learning_rate": 4.915988278937114e-05, "loss": 1.0209, "step": 304500 }, { "epoch": 69.79, "eval_loss": 1.5213499069213867, "eval_runtime": 8.732, "eval_samples_per_second": 537.45, "eval_steps_per_second": 67.224, "step": 304500 }, { "epoch": 69.91, "learning_rate": 4.915344372020225e-05, "loss": 1.0198, "step": 305000 }, { "epoch": 69.91, "eval_loss": 1.515653371810913, "eval_runtime": 8.7248, "eval_samples_per_second": 537.889, "eval_steps_per_second": 67.279, "step": 305000 }, { "epoch": 70.02, "learning_rate": 4.914698049401385e-05, "loss": 1.0254, "step": 305500 }, { "epoch": 70.02, "eval_loss": 1.5187844038009644, "eval_runtime": 8.7272, "eval_samples_per_second": 537.747, "eval_steps_per_second": 67.261, "step": 305500 }, { "epoch": 70.14, "learning_rate": 4.914049311727012e-05, "loss": 1.0098, "step": 306000 }, { "epoch": 70.14, "eval_loss": 1.5185502767562866, "eval_runtime": 8.7252, "eval_samples_per_second": 537.869, "eval_steps_per_second": 67.277, "step": 306000 }, { "epoch": 70.25, "learning_rate": 4.913398159645939e-05, "loss": 1.0078, "step": 306500 }, { "epoch": 70.25, "eval_loss": 1.536851406097412, "eval_runtime": 8.7254, "eval_samples_per_second": 537.857, "eval_steps_per_second": 67.275, "step": 306500 }, { "epoch": 70.36, "learning_rate": 4.912744593809415e-05, "loss": 1.0158, "step": 307000 }, { "epoch": 70.36, "eval_loss": 1.5234601497650146, "eval_runtime": 8.7246, "eval_samples_per_second": 537.904, "eval_steps_per_second": 67.281, "step": 307000 }, { "epoch": 70.48, "learning_rate": 4.9120886148711034e-05, "loss": 1.0108, "step": 307500 }, { "epoch": 70.48, "eval_loss": 1.5269381999969482, "eval_runtime": 8.7247, "eval_samples_per_second": 537.899, "eval_steps_per_second": 67.28, "step": 307500 }, { "epoch": 70.59, "learning_rate": 4.911430223487078e-05, "loss": 1.0114, "step": 308000 }, { "epoch": 70.59, "eval_loss": 1.530612826347351, "eval_runtime": 8.7257, "eval_samples_per_second": 537.835, "eval_steps_per_second": 67.272, "step": 308000 }, { "epoch": 70.71, "learning_rate": 4.910769420315829e-05, "loss": 1.016, "step": 308500 }, { "epoch": 70.71, "eval_loss": 1.5187642574310303, "eval_runtime": 8.7163, "eval_samples_per_second": 538.418, "eval_steps_per_second": 67.345, "step": 308500 }, { "epoch": 70.82, "learning_rate": 4.910106206018258e-05, "loss": 1.016, "step": 309000 }, { "epoch": 70.82, "eval_loss": 1.5222249031066895, "eval_runtime": 8.7461, "eval_samples_per_second": 536.582, "eval_steps_per_second": 67.116, "step": 309000 }, { "epoch": 70.94, "learning_rate": 4.9094405812576774e-05, "loss": 1.0227, "step": 309500 }, { "epoch": 70.94, "eval_loss": 1.5198264122009277, "eval_runtime": 8.727, "eval_samples_per_second": 537.759, "eval_steps_per_second": 67.263, "step": 309500 }, { "epoch": 71.05, "learning_rate": 4.908772546699809e-05, "loss": 1.007, "step": 310000 }, { "epoch": 71.05, "eval_loss": 1.5210902690887451, "eval_runtime": 8.7174, "eval_samples_per_second": 538.35, "eval_steps_per_second": 67.337, "step": 310000 }, { "epoch": 71.17, "learning_rate": 4.9081021030127875e-05, "loss": 1.0035, "step": 310500 }, { "epoch": 71.17, "eval_loss": 1.52009117603302, "eval_runtime": 8.7285, "eval_samples_per_second": 537.666, "eval_steps_per_second": 67.251, "step": 310500 }, { "epoch": 71.28, "learning_rate": 4.907429250867156e-05, "loss": 1.0161, "step": 311000 }, { "epoch": 71.28, "eval_loss": 1.5110955238342285, "eval_runtime": 8.725, "eval_samples_per_second": 537.879, "eval_steps_per_second": 67.278, "step": 311000 }, { "epoch": 71.4, "learning_rate": 4.906753990935865e-05, "loss": 1.0058, "step": 311500 }, { "epoch": 71.4, "eval_loss": 1.5193021297454834, "eval_runtime": 8.7243, "eval_samples_per_second": 537.923, "eval_steps_per_second": 67.283, "step": 311500 }, { "epoch": 71.51, "learning_rate": 4.906076323894276e-05, "loss": 1.0132, "step": 312000 }, { "epoch": 71.51, "eval_loss": 1.5088608264923096, "eval_runtime": 8.7261, "eval_samples_per_second": 537.814, "eval_steps_per_second": 67.27, "step": 312000 }, { "epoch": 71.62, "learning_rate": 4.905396250420156e-05, "loss": 1.0082, "step": 312500 }, { "epoch": 71.62, "eval_loss": 1.5418381690979004, "eval_runtime": 8.7196, "eval_samples_per_second": 538.21, "eval_steps_per_second": 67.319, "step": 312500 }, { "epoch": 71.74, "learning_rate": 4.904713771193678e-05, "loss": 1.0212, "step": 313000 }, { "epoch": 71.74, "eval_loss": 1.5316030979156494, "eval_runtime": 8.7276, "eval_samples_per_second": 537.717, "eval_steps_per_second": 67.258, "step": 313000 }, { "epoch": 71.85, "learning_rate": 4.9040288868974224e-05, "loss": 1.0199, "step": 313500 }, { "epoch": 71.85, "eval_loss": 1.5061845779418945, "eval_runtime": 8.74, "eval_samples_per_second": 536.958, "eval_steps_per_second": 67.163, "step": 313500 }, { "epoch": 71.97, "learning_rate": 4.9033415982163764e-05, "loss": 1.0246, "step": 314000 }, { "epoch": 71.97, "eval_loss": 1.528902292251587, "eval_runtime": 8.72, "eval_samples_per_second": 538.19, "eval_steps_per_second": 67.317, "step": 314000 }, { "epoch": 72.08, "learning_rate": 4.9026519058379294e-05, "loss": 1.0075, "step": 314500 }, { "epoch": 72.08, "eval_loss": 1.5213245153427124, "eval_runtime": 8.7267, "eval_samples_per_second": 537.773, "eval_steps_per_second": 67.265, "step": 314500 }, { "epoch": 72.2, "learning_rate": 4.901959810451876e-05, "loss": 1.0077, "step": 315000 }, { "epoch": 72.2, "eval_loss": 1.5369867086410522, "eval_runtime": 8.73, "eval_samples_per_second": 537.572, "eval_steps_per_second": 67.24, "step": 315000 }, { "epoch": 72.31, "learning_rate": 4.901265312750414e-05, "loss": 1.0067, "step": 315500 }, { "epoch": 72.31, "eval_loss": 1.5198633670806885, "eval_runtime": 8.7216, "eval_samples_per_second": 538.087, "eval_steps_per_second": 67.304, "step": 315500 }, { "epoch": 72.43, "learning_rate": 4.9005684134281434e-05, "loss": 1.0163, "step": 316000 }, { "epoch": 72.43, "eval_loss": 1.5257244110107422, "eval_runtime": 8.7244, "eval_samples_per_second": 537.916, "eval_steps_per_second": 67.283, "step": 316000 }, { "epoch": 72.54, "learning_rate": 4.8998691131820686e-05, "loss": 1.0193, "step": 316500 }, { "epoch": 72.54, "eval_loss": 1.532248854637146, "eval_runtime": 8.7275, "eval_samples_per_second": 537.724, "eval_steps_per_second": 67.258, "step": 316500 }, { "epoch": 72.66, "learning_rate": 4.899167412711592e-05, "loss": 1.0092, "step": 317000 }, { "epoch": 72.66, "eval_loss": 1.5216959714889526, "eval_runtime": 8.7236, "eval_samples_per_second": 537.969, "eval_steps_per_second": 67.289, "step": 317000 }, { "epoch": 72.77, "learning_rate": 4.8984633127185164e-05, "loss": 1.0142, "step": 317500 }, { "epoch": 72.77, "eval_loss": 1.528602123260498, "eval_runtime": 8.7303, "eval_samples_per_second": 537.552, "eval_steps_per_second": 67.237, "step": 317500 }, { "epoch": 72.89, "learning_rate": 4.8977568139070487e-05, "loss": 1.0195, "step": 318000 }, { "epoch": 72.89, "eval_loss": 1.510827898979187, "eval_runtime": 8.7318, "eval_samples_per_second": 537.459, "eval_steps_per_second": 67.225, "step": 318000 }, { "epoch": 73.0, "learning_rate": 4.897047916983791e-05, "loss": 1.0148, "step": 318500 }, { "epoch": 73.0, "eval_loss": 1.5248165130615234, "eval_runtime": 8.7283, "eval_samples_per_second": 537.675, "eval_steps_per_second": 67.252, "step": 318500 }, { "epoch": 73.11, "learning_rate": 4.8963366226577464e-05, "loss": 1.0007, "step": 319000 }, { "epoch": 73.11, "eval_loss": 1.506242036819458, "eval_runtime": 8.7221, "eval_samples_per_second": 538.058, "eval_steps_per_second": 67.3, "step": 319000 }, { "epoch": 73.23, "learning_rate": 4.895622931640312e-05, "loss": 1.0007, "step": 319500 }, { "epoch": 73.23, "eval_loss": 1.52338445186615, "eval_runtime": 8.72, "eval_samples_per_second": 538.186, "eval_steps_per_second": 67.316, "step": 319500 }, { "epoch": 73.34, "learning_rate": 4.894906844645287e-05, "loss": 1.0035, "step": 320000 }, { "epoch": 73.34, "eval_loss": 1.5275533199310303, "eval_runtime": 8.7346, "eval_samples_per_second": 537.29, "eval_steps_per_second": 67.204, "step": 320000 }, { "epoch": 73.46, "learning_rate": 4.894188362388864e-05, "loss": 1.0058, "step": 320500 }, { "epoch": 73.46, "eval_loss": 1.5351226329803467, "eval_runtime": 8.7273, "eval_samples_per_second": 537.739, "eval_steps_per_second": 67.26, "step": 320500 }, { "epoch": 73.57, "learning_rate": 4.89346748558963e-05, "loss": 1.0094, "step": 321000 }, { "epoch": 73.57, "eval_loss": 1.5193467140197754, "eval_runtime": 8.7247, "eval_samples_per_second": 537.9, "eval_steps_per_second": 67.28, "step": 321000 }, { "epoch": 73.69, "learning_rate": 4.89274421496857e-05, "loss": 1.0175, "step": 321500 }, { "epoch": 73.69, "eval_loss": 1.5222405195236206, "eval_runtime": 8.7275, "eval_samples_per_second": 537.723, "eval_steps_per_second": 67.258, "step": 321500 }, { "epoch": 73.8, "learning_rate": 4.8920185512490615e-05, "loss": 1.0093, "step": 322000 }, { "epoch": 73.8, "eval_loss": 1.532472014427185, "eval_runtime": 8.7244, "eval_samples_per_second": 537.916, "eval_steps_per_second": 67.282, "step": 322000 }, { "epoch": 73.92, "learning_rate": 4.891290495156875e-05, "loss": 1.0111, "step": 322500 }, { "epoch": 73.92, "eval_loss": 1.5278300046920776, "eval_runtime": 8.716, "eval_samples_per_second": 538.434, "eval_steps_per_second": 67.347, "step": 322500 }, { "epoch": 74.03, "learning_rate": 4.8905600474201754e-05, "loss": 1.0139, "step": 323000 }, { "epoch": 74.03, "eval_loss": 1.5541963577270508, "eval_runtime": 8.7223, "eval_samples_per_second": 538.049, "eval_steps_per_second": 67.299, "step": 323000 }, { "epoch": 74.15, "learning_rate": 4.8898272087695173e-05, "loss": 1.0002, "step": 323500 }, { "epoch": 74.15, "eval_loss": 1.521405577659607, "eval_runtime": 8.7208, "eval_samples_per_second": 538.14, "eval_steps_per_second": 67.311, "step": 323500 }, { "epoch": 74.26, "learning_rate": 4.889091979937849e-05, "loss": 1.006, "step": 324000 }, { "epoch": 74.26, "eval_loss": 1.5382636785507202, "eval_runtime": 8.7235, "eval_samples_per_second": 537.97, "eval_steps_per_second": 67.289, "step": 324000 }, { "epoch": 74.38, "learning_rate": 4.888354361660507e-05, "loss": 1.006, "step": 324500 }, { "epoch": 74.38, "eval_loss": 1.549988031387329, "eval_runtime": 8.7162, "eval_samples_per_second": 538.425, "eval_steps_per_second": 67.346, "step": 324500 }, { "epoch": 74.49, "learning_rate": 4.8876143546752194e-05, "loss": 1.007, "step": 325000 }, { "epoch": 74.49, "eval_loss": 1.5310583114624023, "eval_runtime": 8.7307, "eval_samples_per_second": 537.525, "eval_steps_per_second": 67.234, "step": 325000 }, { "epoch": 74.6, "learning_rate": 4.8868719597221044e-05, "loss": 0.9995, "step": 325500 }, { "epoch": 74.6, "eval_loss": 1.5531303882598877, "eval_runtime": 8.7243, "eval_samples_per_second": 537.922, "eval_steps_per_second": 67.283, "step": 325500 }, { "epoch": 74.72, "learning_rate": 4.8861271775436633e-05, "loss": 1.0103, "step": 326000 }, { "epoch": 74.72, "eval_loss": 1.5349187850952148, "eval_runtime": 8.7329, "eval_samples_per_second": 537.395, "eval_steps_per_second": 67.217, "step": 326000 }, { "epoch": 74.83, "learning_rate": 4.8853800088847925e-05, "loss": 1.0068, "step": 326500 }, { "epoch": 74.83, "eval_loss": 1.5243899822235107, "eval_runtime": 8.7211, "eval_samples_per_second": 538.12, "eval_steps_per_second": 67.308, "step": 326500 }, { "epoch": 74.95, "learning_rate": 4.884630454492768e-05, "loss": 1.0051, "step": 327000 }, { "epoch": 74.95, "eval_loss": 1.5337499380111694, "eval_runtime": 8.7222, "eval_samples_per_second": 538.052, "eval_steps_per_second": 67.299, "step": 327000 }, { "epoch": 75.06, "learning_rate": 4.883878515117257e-05, "loss": 1.0111, "step": 327500 }, { "epoch": 75.06, "eval_loss": 1.5281380414962769, "eval_runtime": 8.724, "eval_samples_per_second": 537.942, "eval_steps_per_second": 67.286, "step": 327500 }, { "epoch": 75.18, "learning_rate": 4.883124191510309e-05, "loss": 0.9936, "step": 328000 }, { "epoch": 75.18, "eval_loss": 1.542115330696106, "eval_runtime": 8.7252, "eval_samples_per_second": 537.869, "eval_steps_per_second": 67.277, "step": 328000 }, { "epoch": 75.29, "learning_rate": 4.8823674844263604e-05, "loss": 1.0069, "step": 328500 }, { "epoch": 75.29, "eval_loss": 1.5279749631881714, "eval_runtime": 8.7232, "eval_samples_per_second": 537.994, "eval_steps_per_second": 67.292, "step": 328500 }, { "epoch": 75.41, "learning_rate": 4.8816083946222296e-05, "loss": 1.005, "step": 329000 }, { "epoch": 75.41, "eval_loss": 1.5133076906204224, "eval_runtime": 8.7256, "eval_samples_per_second": 537.842, "eval_steps_per_second": 67.273, "step": 329000 }, { "epoch": 75.52, "learning_rate": 4.88084692285712e-05, "loss": 0.9955, "step": 329500 }, { "epoch": 75.52, "eval_loss": 1.5204782485961914, "eval_runtime": 8.7261, "eval_samples_per_second": 537.81, "eval_steps_per_second": 67.269, "step": 329500 }, { "epoch": 75.64, "learning_rate": 4.880083069892614e-05, "loss": 1.0128, "step": 330000 }, { "epoch": 75.64, "eval_loss": 1.5347886085510254, "eval_runtime": 8.7241, "eval_samples_per_second": 537.938, "eval_steps_per_second": 67.285, "step": 330000 }, { "epoch": 75.75, "learning_rate": 4.879316836492679e-05, "loss": 1.0108, "step": 330500 }, { "epoch": 75.75, "eval_loss": 1.5235532522201538, "eval_runtime": 8.743, "eval_samples_per_second": 536.775, "eval_steps_per_second": 67.14, "step": 330500 }, { "epoch": 75.87, "learning_rate": 4.8785482234236614e-05, "loss": 1.0075, "step": 331000 }, { "epoch": 75.87, "eval_loss": 1.5258809328079224, "eval_runtime": 8.7279, "eval_samples_per_second": 537.703, "eval_steps_per_second": 67.256, "step": 331000 }, { "epoch": 75.98, "learning_rate": 4.877777231454289e-05, "loss": 1.0163, "step": 331500 }, { "epoch": 75.98, "eval_loss": 1.5331157445907593, "eval_runtime": 8.7125, "eval_samples_per_second": 538.653, "eval_steps_per_second": 67.375, "step": 331500 }, { "epoch": 76.09, "learning_rate": 4.877003861355667e-05, "loss": 1.0041, "step": 332000 }, { "epoch": 76.09, "eval_loss": 1.5348669290542603, "eval_runtime": 8.7186, "eval_samples_per_second": 538.272, "eval_steps_per_second": 67.327, "step": 332000 }, { "epoch": 76.21, "learning_rate": 4.876228113901279e-05, "loss": 0.9988, "step": 332500 }, { "epoch": 76.21, "eval_loss": 1.54334557056427, "eval_runtime": 8.7265, "eval_samples_per_second": 537.787, "eval_steps_per_second": 67.266, "step": 332500 }, { "epoch": 76.32, "learning_rate": 4.8754499898669905e-05, "loss": 1.0018, "step": 333000 }, { "epoch": 76.32, "eval_loss": 1.5342214107513428, "eval_runtime": 8.7275, "eval_samples_per_second": 537.728, "eval_steps_per_second": 67.259, "step": 333000 }, { "epoch": 76.44, "learning_rate": 4.874669490031037e-05, "loss": 0.9978, "step": 333500 }, { "epoch": 76.44, "eval_loss": 1.5391415357589722, "eval_runtime": 8.7201, "eval_samples_per_second": 538.181, "eval_steps_per_second": 67.316, "step": 333500 }, { "epoch": 76.55, "learning_rate": 4.8738866151740364e-05, "loss": 0.9968, "step": 334000 }, { "epoch": 76.55, "eval_loss": 1.5331487655639648, "eval_runtime": 8.7267, "eval_samples_per_second": 537.773, "eval_steps_per_second": 67.265, "step": 334000 }, { "epoch": 76.67, "learning_rate": 4.873101366078978e-05, "loss": 0.9992, "step": 334500 }, { "epoch": 76.67, "eval_loss": 1.5581574440002441, "eval_runtime": 8.7291, "eval_samples_per_second": 537.627, "eval_steps_per_second": 67.246, "step": 334500 }, { "epoch": 76.78, "learning_rate": 4.872313743531229e-05, "loss": 1.005, "step": 335000 }, { "epoch": 76.78, "eval_loss": 1.5357760190963745, "eval_runtime": 8.7355, "eval_samples_per_second": 537.233, "eval_steps_per_second": 67.197, "step": 335000 }, { "epoch": 76.9, "learning_rate": 4.871523748318526e-05, "loss": 1.0042, "step": 335500 }, { "epoch": 76.9, "eval_loss": 1.5200204849243164, "eval_runtime": 8.72, "eval_samples_per_second": 538.191, "eval_steps_per_second": 67.317, "step": 335500 }, { "epoch": 77.01, "learning_rate": 4.870731381230984e-05, "loss": 1.0025, "step": 336000 }, { "epoch": 77.01, "eval_loss": 1.5314589738845825, "eval_runtime": 8.7237, "eval_samples_per_second": 537.96, "eval_steps_per_second": 67.288, "step": 336000 }, { "epoch": 77.13, "learning_rate": 4.8699366430610846e-05, "loss": 0.9924, "step": 336500 }, { "epoch": 77.13, "eval_loss": 1.5618668794631958, "eval_runtime": 8.727, "eval_samples_per_second": 537.755, "eval_steps_per_second": 67.262, "step": 336500 }, { "epoch": 77.24, "learning_rate": 4.869139534603685e-05, "loss": 0.9942, "step": 337000 }, { "epoch": 77.24, "eval_loss": 1.5325990915298462, "eval_runtime": 8.726, "eval_samples_per_second": 537.816, "eval_steps_per_second": 67.27, "step": 337000 }, { "epoch": 77.35, "learning_rate": 4.868340056656012e-05, "loss": 0.9975, "step": 337500 }, { "epoch": 77.35, "eval_loss": 1.5281915664672852, "eval_runtime": 8.7301, "eval_samples_per_second": 537.566, "eval_steps_per_second": 67.239, "step": 337500 }, { "epoch": 77.47, "learning_rate": 4.867538210017662e-05, "loss": 1.0005, "step": 338000 }, { "epoch": 77.47, "eval_loss": 1.5396850109100342, "eval_runtime": 8.7315, "eval_samples_per_second": 537.478, "eval_steps_per_second": 67.228, "step": 338000 }, { "epoch": 77.58, "learning_rate": 4.866733995490599e-05, "loss": 1.0029, "step": 338500 }, { "epoch": 77.58, "eval_loss": 1.5361145734786987, "eval_runtime": 8.7235, "eval_samples_per_second": 537.972, "eval_steps_per_second": 67.289, "step": 338500 }, { "epoch": 77.7, "learning_rate": 4.865927413879158e-05, "loss": 1.009, "step": 339000 }, { "epoch": 77.7, "eval_loss": 1.5423623323440552, "eval_runtime": 8.7263, "eval_samples_per_second": 537.801, "eval_steps_per_second": 67.268, "step": 339000 }, { "epoch": 77.81, "learning_rate": 4.865118465990039e-05, "loss": 1.0052, "step": 339500 }, { "epoch": 77.81, "eval_loss": 1.5436949729919434, "eval_runtime": 8.7503, "eval_samples_per_second": 536.323, "eval_steps_per_second": 67.083, "step": 339500 }, { "epoch": 77.93, "learning_rate": 4.86430715263231e-05, "loss": 1.0093, "step": 340000 }, { "epoch": 77.93, "eval_loss": 1.5523157119750977, "eval_runtime": 8.7248, "eval_samples_per_second": 537.893, "eval_steps_per_second": 67.28, "step": 340000 }, { "epoch": 78.04, "learning_rate": 4.8634934746174055e-05, "loss": 1.0095, "step": 340500 }, { "epoch": 78.04, "eval_loss": 1.553124189376831, "eval_runtime": 8.7193, "eval_samples_per_second": 538.234, "eval_steps_per_second": 67.322, "step": 340500 }, { "epoch": 78.16, "learning_rate": 4.862677432759123e-05, "loss": 0.996, "step": 341000 }, { "epoch": 78.16, "eval_loss": 1.558811902999878, "eval_runtime": 8.7264, "eval_samples_per_second": 537.793, "eval_steps_per_second": 67.267, "step": 341000 }, { "epoch": 78.27, "learning_rate": 4.8618590278736244e-05, "loss": 0.9952, "step": 341500 }, { "epoch": 78.27, "eval_loss": 1.5298726558685303, "eval_runtime": 8.6888, "eval_samples_per_second": 540.124, "eval_steps_per_second": 67.559, "step": 341500 }, { "epoch": 78.39, "learning_rate": 4.861038260779437e-05, "loss": 0.9935, "step": 342000 }, { "epoch": 78.39, "eval_loss": 1.5340059995651245, "eval_runtime": 8.6888, "eval_samples_per_second": 540.12, "eval_steps_per_second": 67.558, "step": 342000 }, { "epoch": 78.5, "learning_rate": 4.860215132297449e-05, "loss": 0.9937, "step": 342500 }, { "epoch": 78.5, "eval_loss": 1.543186902999878, "eval_runtime": 8.6916, "eval_samples_per_second": 539.947, "eval_steps_per_second": 67.537, "step": 342500 }, { "epoch": 78.62, "learning_rate": 4.85938964325091e-05, "loss": 0.9977, "step": 343000 }, { "epoch": 78.62, "eval_loss": 1.5279232263565063, "eval_runtime": 8.6921, "eval_samples_per_second": 539.917, "eval_steps_per_second": 67.533, "step": 343000 }, { "epoch": 78.73, "learning_rate": 4.8585617944654326e-05, "loss": 1.0091, "step": 343500 }, { "epoch": 78.73, "eval_loss": 1.5354580879211426, "eval_runtime": 8.6886, "eval_samples_per_second": 540.132, "eval_steps_per_second": 67.56, "step": 343500 }, { "epoch": 78.84, "learning_rate": 4.857731586768988e-05, "loss": 1.0111, "step": 344000 }, { "epoch": 78.84, "eval_loss": 1.532023310661316, "eval_runtime": 8.6943, "eval_samples_per_second": 539.776, "eval_steps_per_second": 67.515, "step": 344000 }, { "epoch": 78.96, "learning_rate": 4.856899020991906e-05, "loss": 1.0082, "step": 344500 }, { "epoch": 78.96, "eval_loss": 1.5301722288131714, "eval_runtime": 8.6923, "eval_samples_per_second": 539.905, "eval_steps_per_second": 67.531, "step": 344500 }, { "epoch": 79.07, "learning_rate": 4.856064097966876e-05, "loss": 0.9974, "step": 345000 }, { "epoch": 79.07, "eval_loss": 1.535430669784546, "eval_runtime": 8.6823, "eval_samples_per_second": 540.522, "eval_steps_per_second": 67.608, "step": 345000 }, { "epoch": 79.19, "learning_rate": 4.855226818528944e-05, "loss": 0.9963, "step": 345500 }, { "epoch": 79.19, "eval_loss": 1.5655649900436401, "eval_runtime": 8.691, "eval_samples_per_second": 539.986, "eval_steps_per_second": 67.541, "step": 345500 }, { "epoch": 79.3, "learning_rate": 4.854387183515514e-05, "loss": 0.9917, "step": 346000 }, { "epoch": 79.3, "eval_loss": 1.5389798879623413, "eval_runtime": 8.6881, "eval_samples_per_second": 540.164, "eval_steps_per_second": 67.564, "step": 346000 }, { "epoch": 79.42, "learning_rate": 4.8535451937663456e-05, "loss": 0.9939, "step": 346500 }, { "epoch": 79.42, "eval_loss": 1.5460395812988281, "eval_runtime": 8.6952, "eval_samples_per_second": 539.723, "eval_steps_per_second": 67.509, "step": 346500 }, { "epoch": 79.53, "learning_rate": 4.8527008501235534e-05, "loss": 0.9947, "step": 347000 }, { "epoch": 79.53, "eval_loss": 1.5406036376953125, "eval_runtime": 8.6893, "eval_samples_per_second": 540.09, "eval_steps_per_second": 67.554, "step": 347000 }, { "epoch": 79.65, "learning_rate": 4.851854153431605e-05, "loss": 0.9955, "step": 347500 }, { "epoch": 79.65, "eval_loss": 1.5343595743179321, "eval_runtime": 8.696, "eval_samples_per_second": 539.672, "eval_steps_per_second": 67.502, "step": 347500 }, { "epoch": 79.76, "learning_rate": 4.8510051045373225e-05, "loss": 1.0085, "step": 348000 }, { "epoch": 79.76, "eval_loss": 1.5614354610443115, "eval_runtime": 8.6904, "eval_samples_per_second": 540.018, "eval_steps_per_second": 67.545, "step": 348000 }, { "epoch": 79.88, "learning_rate": 4.850153704289882e-05, "loss": 1.0092, "step": 348500 }, { "epoch": 79.88, "eval_loss": 1.530638575553894, "eval_runtime": 8.6876, "eval_samples_per_second": 540.196, "eval_steps_per_second": 67.568, "step": 348500 }, { "epoch": 79.99, "learning_rate": 4.8492999535408085e-05, "loss": 1.0161, "step": 349000 }, { "epoch": 79.99, "eval_loss": 1.5519975423812866, "eval_runtime": 8.693, "eval_samples_per_second": 539.862, "eval_steps_per_second": 67.526, "step": 349000 }, { "epoch": 80.11, "learning_rate": 4.848443853143979e-05, "loss": 0.9895, "step": 349500 }, { "epoch": 80.11, "eval_loss": 1.5461688041687012, "eval_runtime": 8.6863, "eval_samples_per_second": 540.273, "eval_steps_per_second": 67.577, "step": 349500 }, { "epoch": 80.22, "learning_rate": 4.847585403955621e-05, "loss": 0.9901, "step": 350000 }, { "epoch": 80.22, "eval_loss": 1.5434027910232544, "eval_runtime": 8.6898, "eval_samples_per_second": 540.058, "eval_steps_per_second": 67.55, "step": 350000 }, { "epoch": 80.33, "learning_rate": 4.846724606834312e-05, "loss": 0.9958, "step": 350500 }, { "epoch": 80.33, "eval_loss": 1.5450084209442139, "eval_runtime": 8.688, "eval_samples_per_second": 540.173, "eval_steps_per_second": 67.565, "step": 350500 }, { "epoch": 80.45, "learning_rate": 4.845861462640974e-05, "loss": 0.9929, "step": 351000 }, { "epoch": 80.45, "eval_loss": 1.551780343055725, "eval_runtime": 8.689, "eval_samples_per_second": 540.108, "eval_steps_per_second": 67.557, "step": 351000 }, { "epoch": 80.56, "learning_rate": 4.844995972238881e-05, "loss": 1.0044, "step": 351500 }, { "epoch": 80.56, "eval_loss": 1.55353844165802, "eval_runtime": 8.6929, "eval_samples_per_second": 539.868, "eval_steps_per_second": 67.527, "step": 351500 }, { "epoch": 80.68, "learning_rate": 4.844128136493651e-05, "loss": 0.9949, "step": 352000 }, { "epoch": 80.68, "eval_loss": 1.541213870048523, "eval_runtime": 8.6964, "eval_samples_per_second": 539.651, "eval_steps_per_second": 67.499, "step": 352000 }, { "epoch": 80.79, "learning_rate": 4.8432579562732475e-05, "loss": 1.0012, "step": 352500 }, { "epoch": 80.79, "eval_loss": 1.5379431247711182, "eval_runtime": 8.6901, "eval_samples_per_second": 540.04, "eval_steps_per_second": 67.548, "step": 352500 }, { "epoch": 80.91, "learning_rate": 4.842385432447981e-05, "loss": 1.0005, "step": 353000 }, { "epoch": 80.91, "eval_loss": 1.5450776815414429, "eval_runtime": 8.6888, "eval_samples_per_second": 540.123, "eval_steps_per_second": 67.558, "step": 353000 }, { "epoch": 81.02, "learning_rate": 4.8415105658905036e-05, "loss": 0.9994, "step": 353500 }, { "epoch": 81.02, "eval_loss": 1.5368366241455078, "eval_runtime": 8.6915, "eval_samples_per_second": 539.952, "eval_steps_per_second": 67.537, "step": 353500 }, { "epoch": 81.14, "learning_rate": 4.8406333574758104e-05, "loss": 0.9872, "step": 354000 }, { "epoch": 81.14, "eval_loss": 1.5522481203079224, "eval_runtime": 8.6862, "eval_samples_per_second": 540.282, "eval_steps_per_second": 67.578, "step": 354000 }, { "epoch": 81.25, "learning_rate": 4.8397538080812414e-05, "loss": 0.9869, "step": 354500 }, { "epoch": 81.25, "eval_loss": 1.5545992851257324, "eval_runtime": 8.6936, "eval_samples_per_second": 539.82, "eval_steps_per_second": 67.521, "step": 354500 }, { "epoch": 81.37, "learning_rate": 4.838871918586476e-05, "loss": 0.9856, "step": 355000 }, { "epoch": 81.37, "eval_loss": 1.5336921215057373, "eval_runtime": 8.6891, "eval_samples_per_second": 540.105, "eval_steps_per_second": 67.556, "step": 355000 }, { "epoch": 81.48, "learning_rate": 4.837987689873533e-05, "loss": 0.9953, "step": 355500 }, { "epoch": 81.48, "eval_loss": 1.5535376071929932, "eval_runtime": 8.6939, "eval_samples_per_second": 539.803, "eval_steps_per_second": 67.518, "step": 355500 }, { "epoch": 81.6, "learning_rate": 4.8371011228267734e-05, "loss": 0.9914, "step": 356000 }, { "epoch": 81.6, "eval_loss": 1.5657655000686646, "eval_runtime": 8.6905, "eval_samples_per_second": 540.013, "eval_steps_per_second": 67.545, "step": 356000 }, { "epoch": 81.71, "learning_rate": 4.836212218332895e-05, "loss": 0.995, "step": 356500 }, { "epoch": 81.71, "eval_loss": 1.517477035522461, "eval_runtime": 8.6908, "eval_samples_per_second": 539.996, "eval_steps_per_second": 67.543, "step": 356500 }, { "epoch": 81.82, "learning_rate": 4.835320977280933e-05, "loss": 0.9972, "step": 357000 }, { "epoch": 81.82, "eval_loss": 1.561906337738037, "eval_runtime": 8.6898, "eval_samples_per_second": 540.06, "eval_steps_per_second": 67.551, "step": 357000 }, { "epoch": 81.94, "learning_rate": 4.834427400562261e-05, "loss": 0.9974, "step": 357500 }, { "epoch": 81.94, "eval_loss": 1.5448452234268188, "eval_runtime": 8.6845, "eval_samples_per_second": 540.386, "eval_steps_per_second": 67.591, "step": 357500 }, { "epoch": 82.05, "learning_rate": 4.8335314890705894e-05, "loss": 0.9903, "step": 358000 }, { "epoch": 82.05, "eval_loss": 1.5431965589523315, "eval_runtime": 8.6931, "eval_samples_per_second": 539.851, "eval_steps_per_second": 67.525, "step": 358000 }, { "epoch": 82.17, "learning_rate": 4.832633243701961e-05, "loss": 0.9907, "step": 358500 }, { "epoch": 82.17, "eval_loss": 1.5373342037200928, "eval_runtime": 8.6896, "eval_samples_per_second": 540.07, "eval_steps_per_second": 67.552, "step": 358500 }, { "epoch": 82.28, "learning_rate": 4.831732665354753e-05, "loss": 0.9845, "step": 359000 }, { "epoch": 82.28, "eval_loss": 1.5416852235794067, "eval_runtime": 8.688, "eval_samples_per_second": 540.17, "eval_steps_per_second": 67.564, "step": 359000 }, { "epoch": 82.4, "learning_rate": 4.83082975492968e-05, "loss": 0.9981, "step": 359500 }, { "epoch": 82.4, "eval_loss": 1.5439175367355347, "eval_runtime": 8.6876, "eval_samples_per_second": 540.198, "eval_steps_per_second": 67.568, "step": 359500 }, { "epoch": 82.51, "learning_rate": 4.8299245133297836e-05, "loss": 0.994, "step": 360000 }, { "epoch": 82.51, "eval_loss": 1.559763789176941, "eval_runtime": 8.7014, "eval_samples_per_second": 539.339, "eval_steps_per_second": 67.46, "step": 360000 }, { "epoch": 82.63, "learning_rate": 4.82901694146044e-05, "loss": 0.99, "step": 360500 }, { "epoch": 82.63, "eval_loss": 1.5573593378067017, "eval_runtime": 8.6891, "eval_samples_per_second": 540.1, "eval_steps_per_second": 67.556, "step": 360500 }, { "epoch": 82.74, "learning_rate": 4.828107040229356e-05, "loss": 0.9968, "step": 361000 }, { "epoch": 82.74, "eval_loss": 1.5311838388442993, "eval_runtime": 8.6879, "eval_samples_per_second": 540.175, "eval_steps_per_second": 67.565, "step": 361000 }, { "epoch": 82.86, "learning_rate": 4.827194810546567e-05, "loss": 0.9962, "step": 361500 }, { "epoch": 82.86, "eval_loss": 1.5291633605957031, "eval_runtime": 8.6902, "eval_samples_per_second": 540.031, "eval_steps_per_second": 67.547, "step": 361500 }, { "epoch": 82.97, "learning_rate": 4.826280253324438e-05, "loss": 0.9928, "step": 362000 }, { "epoch": 82.97, "eval_loss": 1.5494097471237183, "eval_runtime": 8.6889, "eval_samples_per_second": 540.112, "eval_steps_per_second": 67.557, "step": 362000 }, { "epoch": 83.09, "learning_rate": 4.8253633694776625e-05, "loss": 0.9885, "step": 362500 }, { "epoch": 83.09, "eval_loss": 1.5534764528274536, "eval_runtime": 8.6935, "eval_samples_per_second": 539.828, "eval_steps_per_second": 67.522, "step": 362500 }, { "epoch": 83.2, "learning_rate": 4.824444159923258e-05, "loss": 0.9745, "step": 363000 }, { "epoch": 83.2, "eval_loss": 1.549716830253601, "eval_runtime": 8.6921, "eval_samples_per_second": 539.917, "eval_steps_per_second": 67.533, "step": 363000 }, { "epoch": 83.31, "learning_rate": 4.823522625580572e-05, "loss": 0.9819, "step": 363500 }, { "epoch": 83.31, "eval_loss": 1.5497312545776367, "eval_runtime": 8.6866, "eval_samples_per_second": 540.257, "eval_steps_per_second": 67.575, "step": 363500 }, { "epoch": 83.43, "learning_rate": 4.822598767371273e-05, "loss": 0.9886, "step": 364000 }, { "epoch": 83.43, "eval_loss": 1.5569432973861694, "eval_runtime": 8.6885, "eval_samples_per_second": 540.137, "eval_steps_per_second": 67.56, "step": 364000 }, { "epoch": 83.54, "learning_rate": 4.82167258621936e-05, "loss": 0.9918, "step": 364500 }, { "epoch": 83.54, "eval_loss": 1.5646703243255615, "eval_runtime": 8.6929, "eval_samples_per_second": 539.867, "eval_steps_per_second": 67.526, "step": 364500 }, { "epoch": 83.66, "learning_rate": 4.820744083051147e-05, "loss": 0.9883, "step": 365000 }, { "epoch": 83.66, "eval_loss": 1.543001651763916, "eval_runtime": 8.6909, "eval_samples_per_second": 539.991, "eval_steps_per_second": 67.542, "step": 365000 }, { "epoch": 83.77, "learning_rate": 4.819813258795278e-05, "loss": 0.9958, "step": 365500 }, { "epoch": 83.77, "eval_loss": 1.558786392211914, "eval_runtime": 8.6943, "eval_samples_per_second": 539.777, "eval_steps_per_second": 67.515, "step": 365500 }, { "epoch": 83.89, "learning_rate": 4.818880114382712e-05, "loss": 0.9953, "step": 366000 }, { "epoch": 83.89, "eval_loss": 1.5496138334274292, "eval_runtime": 8.6889, "eval_samples_per_second": 540.115, "eval_steps_per_second": 67.558, "step": 366000 }, { "epoch": 84.0, "learning_rate": 4.817944650746734e-05, "loss": 0.9916, "step": 366500 }, { "epoch": 84.0, "eval_loss": 1.5494755506515503, "eval_runtime": 8.6881, "eval_samples_per_second": 540.163, "eval_steps_per_second": 67.563, "step": 366500 }, { "epoch": 84.12, "learning_rate": 4.8170068688229456e-05, "loss": 0.9783, "step": 367000 }, { "epoch": 84.12, "eval_loss": 1.555258870124817, "eval_runtime": 8.6887, "eval_samples_per_second": 540.129, "eval_steps_per_second": 67.559, "step": 367000 }, { "epoch": 84.23, "learning_rate": 4.816066769549266e-05, "loss": 0.9749, "step": 367500 }, { "epoch": 84.23, "eval_loss": 1.5668115615844727, "eval_runtime": 8.6874, "eval_samples_per_second": 540.207, "eval_steps_per_second": 67.569, "step": 367500 }, { "epoch": 84.35, "learning_rate": 4.815124353865935e-05, "loss": 0.9812, "step": 368000 }, { "epoch": 84.35, "eval_loss": 1.5503430366516113, "eval_runtime": 8.706, "eval_samples_per_second": 539.055, "eval_steps_per_second": 67.425, "step": 368000 }, { "epoch": 84.46, "learning_rate": 4.8141796227155076e-05, "loss": 0.9859, "step": 368500 }, { "epoch": 84.46, "eval_loss": 1.5724716186523438, "eval_runtime": 8.6957, "eval_samples_per_second": 539.694, "eval_steps_per_second": 67.505, "step": 368500 }, { "epoch": 84.57, "learning_rate": 4.8132325770428556e-05, "loss": 0.9851, "step": 369000 }, { "epoch": 84.57, "eval_loss": 1.5354793071746826, "eval_runtime": 8.6916, "eval_samples_per_second": 539.945, "eval_steps_per_second": 67.536, "step": 369000 }, { "epoch": 84.69, "learning_rate": 4.812283217795164e-05, "loss": 0.9884, "step": 369500 }, { "epoch": 84.69, "eval_loss": 1.5515495538711548, "eval_runtime": 8.6885, "eval_samples_per_second": 540.142, "eval_steps_per_second": 67.561, "step": 369500 }, { "epoch": 84.8, "learning_rate": 4.811331545921932e-05, "loss": 0.9886, "step": 370000 }, { "epoch": 84.8, "eval_loss": 1.5577753782272339, "eval_runtime": 8.7007, "eval_samples_per_second": 539.381, "eval_steps_per_second": 67.466, "step": 370000 }, { "epoch": 84.92, "learning_rate": 4.810377562374973e-05, "loss": 0.9902, "step": 370500 }, { "epoch": 84.92, "eval_loss": 1.550673007965088, "eval_runtime": 8.6913, "eval_samples_per_second": 539.965, "eval_steps_per_second": 67.539, "step": 370500 }, { "epoch": 85.03, "learning_rate": 4.8094212681084126e-05, "loss": 0.9856, "step": 371000 }, { "epoch": 85.03, "eval_loss": 1.5546191930770874, "eval_runtime": 8.6916, "eval_samples_per_second": 539.945, "eval_steps_per_second": 67.536, "step": 371000 }, { "epoch": 85.15, "learning_rate": 4.808462664078686e-05, "loss": 0.9807, "step": 371500 }, { "epoch": 85.15, "eval_loss": 1.5680534839630127, "eval_runtime": 8.686, "eval_samples_per_second": 540.295, "eval_steps_per_second": 67.58, "step": 371500 }, { "epoch": 85.26, "learning_rate": 4.807501751244539e-05, "loss": 0.9835, "step": 372000 }, { "epoch": 85.26, "eval_loss": 1.5502150058746338, "eval_runtime": 8.6955, "eval_samples_per_second": 539.703, "eval_steps_per_second": 67.506, "step": 372000 }, { "epoch": 85.38, "learning_rate": 4.806538530567026e-05, "loss": 0.9828, "step": 372500 }, { "epoch": 85.38, "eval_loss": 1.537994384765625, "eval_runtime": 8.6917, "eval_samples_per_second": 539.938, "eval_steps_per_second": 67.535, "step": 372500 }, { "epoch": 85.49, "learning_rate": 4.805573003009511e-05, "loss": 0.9796, "step": 373000 }, { "epoch": 85.49, "eval_loss": 1.5430189371109009, "eval_runtime": 8.6877, "eval_samples_per_second": 540.192, "eval_steps_per_second": 67.567, "step": 373000 }, { "epoch": 85.61, "learning_rate": 4.804605169537665e-05, "loss": 0.9849, "step": 373500 }, { "epoch": 85.61, "eval_loss": 1.5438148975372314, "eval_runtime": 8.6888, "eval_samples_per_second": 540.12, "eval_steps_per_second": 67.558, "step": 373500 }, { "epoch": 85.72, "learning_rate": 4.8036350311194636e-05, "loss": 0.9856, "step": 374000 }, { "epoch": 85.72, "eval_loss": 1.5512088537216187, "eval_runtime": 8.6883, "eval_samples_per_second": 540.153, "eval_steps_per_second": 67.562, "step": 374000 }, { "epoch": 85.84, "learning_rate": 4.80266258872519e-05, "loss": 0.9914, "step": 374500 }, { "epoch": 85.84, "eval_loss": 1.5661059617996216, "eval_runtime": 8.6884, "eval_samples_per_second": 540.144, "eval_steps_per_second": 67.561, "step": 374500 }, { "epoch": 85.95, "learning_rate": 4.8016878433274304e-05, "loss": 0.9884, "step": 375000 }, { "epoch": 85.95, "eval_loss": 1.5548585653305054, "eval_runtime": 8.6992, "eval_samples_per_second": 539.477, "eval_steps_per_second": 67.478, "step": 375000 }, { "epoch": 86.06, "learning_rate": 4.800710795901073e-05, "loss": 0.9872, "step": 375500 }, { "epoch": 86.06, "eval_loss": 1.5646129846572876, "eval_runtime": 8.6952, "eval_samples_per_second": 539.72, "eval_steps_per_second": 67.508, "step": 375500 }, { "epoch": 86.18, "learning_rate": 4.799731447423312e-05, "loss": 0.9755, "step": 376000 }, { "epoch": 86.18, "eval_loss": 1.5761183500289917, "eval_runtime": 8.6917, "eval_samples_per_second": 539.939, "eval_steps_per_second": 67.535, "step": 376000 }, { "epoch": 86.29, "learning_rate": 4.79874979887364e-05, "loss": 0.9771, "step": 376500 }, { "epoch": 86.29, "eval_loss": 1.5492554903030396, "eval_runtime": 8.6904, "eval_samples_per_second": 540.02, "eval_steps_per_second": 67.546, "step": 376500 }, { "epoch": 86.41, "learning_rate": 4.797765851233851e-05, "loss": 0.978, "step": 377000 }, { "epoch": 86.41, "eval_loss": 1.5470625162124634, "eval_runtime": 8.6906, "eval_samples_per_second": 540.01, "eval_steps_per_second": 67.544, "step": 377000 }, { "epoch": 86.52, "learning_rate": 4.796779605488038e-05, "loss": 0.9764, "step": 377500 }, { "epoch": 86.52, "eval_loss": 1.5422850847244263, "eval_runtime": 8.6911, "eval_samples_per_second": 539.975, "eval_steps_per_second": 67.54, "step": 377500 }, { "epoch": 86.64, "learning_rate": 4.7957910626225916e-05, "loss": 0.9861, "step": 378000 }, { "epoch": 86.64, "eval_loss": 1.533884048461914, "eval_runtime": 8.6908, "eval_samples_per_second": 539.999, "eval_steps_per_second": 67.543, "step": 378000 }, { "epoch": 86.75, "learning_rate": 4.7948002236262034e-05, "loss": 0.9936, "step": 378500 }, { "epoch": 86.75, "eval_loss": 1.5630066394805908, "eval_runtime": 8.6871, "eval_samples_per_second": 540.228, "eval_steps_per_second": 67.572, "step": 378500 }, { "epoch": 86.87, "learning_rate": 4.7938070894898575e-05, "loss": 0.9836, "step": 379000 }, { "epoch": 86.87, "eval_loss": 1.5671770572662354, "eval_runtime": 8.6853, "eval_samples_per_second": 540.336, "eval_steps_per_second": 67.585, "step": 379000 }, { "epoch": 86.98, "learning_rate": 4.7928116612068356e-05, "loss": 0.9922, "step": 379500 }, { "epoch": 86.98, "eval_loss": 1.5519227981567383, "eval_runtime": 8.693, "eval_samples_per_second": 539.859, "eval_steps_per_second": 67.526, "step": 379500 }, { "epoch": 87.1, "learning_rate": 4.791813939772714e-05, "loss": 0.9759, "step": 380000 }, { "epoch": 87.1, "eval_loss": 1.5617653131484985, "eval_runtime": 8.6901, "eval_samples_per_second": 540.042, "eval_steps_per_second": 67.548, "step": 380000 }, { "epoch": 87.21, "learning_rate": 4.79081392618536e-05, "loss": 0.9758, "step": 380500 }, { "epoch": 87.21, "eval_loss": 1.559037446975708, "eval_runtime": 8.708, "eval_samples_per_second": 538.931, "eval_steps_per_second": 67.409, "step": 380500 }, { "epoch": 87.33, "learning_rate": 4.789811621444936e-05, "loss": 0.9776, "step": 381000 }, { "epoch": 87.33, "eval_loss": 1.5723061561584473, "eval_runtime": 8.6948, "eval_samples_per_second": 539.75, "eval_steps_per_second": 67.512, "step": 381000 }, { "epoch": 87.44, "learning_rate": 4.7888070265538965e-05, "loss": 0.9773, "step": 381500 }, { "epoch": 87.44, "eval_loss": 1.5402848720550537, "eval_runtime": 8.6904, "eval_samples_per_second": 540.018, "eval_steps_per_second": 67.545, "step": 381500 }, { "epoch": 87.55, "learning_rate": 4.7878001425169835e-05, "loss": 0.9883, "step": 382000 }, { "epoch": 87.55, "eval_loss": 1.5568128824234009, "eval_runtime": 8.6909, "eval_samples_per_second": 539.988, "eval_steps_per_second": 67.542, "step": 382000 }, { "epoch": 87.67, "learning_rate": 4.7867909703412305e-05, "loss": 0.9895, "step": 382500 }, { "epoch": 87.67, "eval_loss": 1.556656837463379, "eval_runtime": 8.6944, "eval_samples_per_second": 539.773, "eval_steps_per_second": 67.515, "step": 382500 }, { "epoch": 87.78, "learning_rate": 4.7857795110359605e-05, "loss": 0.9884, "step": 383000 }, { "epoch": 87.78, "eval_loss": 1.5599688291549683, "eval_runtime": 8.6915, "eval_samples_per_second": 539.953, "eval_steps_per_second": 67.537, "step": 383000 }, { "epoch": 87.9, "learning_rate": 4.7847657656127796e-05, "loss": 0.9842, "step": 383500 }, { "epoch": 87.9, "eval_loss": 1.5476220846176147, "eval_runtime": 8.6888, "eval_samples_per_second": 540.121, "eval_steps_per_second": 67.558, "step": 383500 }, { "epoch": 88.01, "learning_rate": 4.783749735085586e-05, "loss": 0.9959, "step": 384000 }, { "epoch": 88.01, "eval_loss": 1.5738316774368286, "eval_runtime": 8.6935, "eval_samples_per_second": 539.827, "eval_steps_per_second": 67.522, "step": 384000 }, { "epoch": 88.13, "learning_rate": 4.78273142047056e-05, "loss": 0.9822, "step": 384500 }, { "epoch": 88.13, "eval_loss": 1.595237374305725, "eval_runtime": 8.6973, "eval_samples_per_second": 539.595, "eval_steps_per_second": 67.492, "step": 384500 }, { "epoch": 88.24, "learning_rate": 4.781710822786166e-05, "loss": 0.9773, "step": 385000 }, { "epoch": 88.24, "eval_loss": 1.5697250366210938, "eval_runtime": 8.696, "eval_samples_per_second": 539.673, "eval_steps_per_second": 67.502, "step": 385000 }, { "epoch": 88.36, "learning_rate": 4.780687943053154e-05, "loss": 0.979, "step": 385500 }, { "epoch": 88.36, "eval_loss": 1.577168583869934, "eval_runtime": 8.6917, "eval_samples_per_second": 539.941, "eval_steps_per_second": 67.536, "step": 385500 }, { "epoch": 88.47, "learning_rate": 4.7796627822945536e-05, "loss": 0.9778, "step": 386000 }, { "epoch": 88.47, "eval_loss": 1.5581854581832886, "eval_runtime": 8.6922, "eval_samples_per_second": 539.909, "eval_steps_per_second": 67.532, "step": 386000 }, { "epoch": 88.59, "learning_rate": 4.778635341535679e-05, "loss": 0.9839, "step": 386500 }, { "epoch": 88.59, "eval_loss": 1.5557835102081299, "eval_runtime": 8.6907, "eval_samples_per_second": 540.001, "eval_steps_per_second": 67.543, "step": 386500 }, { "epoch": 88.7, "learning_rate": 4.777605621804122e-05, "loss": 0.984, "step": 387000 }, { "epoch": 88.7, "eval_loss": 1.557741641998291, "eval_runtime": 8.689, "eval_samples_per_second": 540.108, "eval_steps_per_second": 67.557, "step": 387000 }, { "epoch": 88.81, "learning_rate": 4.7765736241297556e-05, "loss": 0.987, "step": 387500 }, { "epoch": 88.81, "eval_loss": 1.5887218713760376, "eval_runtime": 8.69, "eval_samples_per_second": 540.048, "eval_steps_per_second": 67.549, "step": 387500 }, { "epoch": 88.93, "learning_rate": 4.775539349544729e-05, "loss": 0.9827, "step": 388000 }, { "epoch": 88.93, "eval_loss": 1.5731607675552368, "eval_runtime": 8.6889, "eval_samples_per_second": 540.114, "eval_steps_per_second": 67.557, "step": 388000 }, { "epoch": 89.04, "learning_rate": 4.774502799083471e-05, "loss": 0.983, "step": 388500 }, { "epoch": 89.04, "eval_loss": 1.5667802095413208, "eval_runtime": 8.6949, "eval_samples_per_second": 539.74, "eval_steps_per_second": 67.511, "step": 388500 }, { "epoch": 89.16, "learning_rate": 4.773463973782686e-05, "loss": 0.9688, "step": 389000 }, { "epoch": 89.16, "eval_loss": 1.579674482345581, "eval_runtime": 8.6927, "eval_samples_per_second": 539.88, "eval_steps_per_second": 67.528, "step": 389000 }, { "epoch": 89.27, "learning_rate": 4.7724228746813516e-05, "loss": 0.97, "step": 389500 }, { "epoch": 89.27, "eval_loss": 1.561622977256775, "eval_runtime": 8.689, "eval_samples_per_second": 540.111, "eval_steps_per_second": 67.557, "step": 389500 }, { "epoch": 89.39, "learning_rate": 4.771379502820722e-05, "loss": 0.9788, "step": 390000 }, { "epoch": 89.39, "eval_loss": 1.5459749698638916, "eval_runtime": 8.693, "eval_samples_per_second": 539.86, "eval_steps_per_second": 67.526, "step": 390000 }, { "epoch": 89.5, "learning_rate": 4.770333859244323e-05, "loss": 0.9789, "step": 390500 }, { "epoch": 89.5, "eval_loss": 1.5914839506149292, "eval_runtime": 8.689, "eval_samples_per_second": 540.106, "eval_steps_per_second": 67.556, "step": 390500 }, { "epoch": 89.62, "learning_rate": 4.769285944997953e-05, "loss": 0.974, "step": 391000 }, { "epoch": 89.62, "eval_loss": 1.5734363794326782, "eval_runtime": 8.6905, "eval_samples_per_second": 540.013, "eval_steps_per_second": 67.545, "step": 391000 }, { "epoch": 89.73, "learning_rate": 4.7682357611296826e-05, "loss": 0.9826, "step": 391500 }, { "epoch": 89.73, "eval_loss": 1.5744757652282715, "eval_runtime": 8.6916, "eval_samples_per_second": 539.944, "eval_steps_per_second": 67.536, "step": 391500 }, { "epoch": 89.85, "learning_rate": 4.767183308689849e-05, "loss": 0.9945, "step": 392000 }, { "epoch": 89.85, "eval_loss": 1.5632128715515137, "eval_runtime": 8.6907, "eval_samples_per_second": 540.003, "eval_steps_per_second": 67.544, "step": 392000 }, { "epoch": 89.96, "learning_rate": 4.766128588731063e-05, "loss": 0.9847, "step": 392500 }, { "epoch": 89.96, "eval_loss": 1.5777968168258667, "eval_runtime": 8.6922, "eval_samples_per_second": 539.911, "eval_steps_per_second": 67.532, "step": 392500 }, { "epoch": 90.08, "learning_rate": 4.765071602308198e-05, "loss": 0.9813, "step": 393000 }, { "epoch": 90.08, "eval_loss": 1.5809643268585205, "eval_runtime": 8.6879, "eval_samples_per_second": 540.176, "eval_steps_per_second": 67.565, "step": 393000 }, { "epoch": 90.19, "learning_rate": 4.764012350478399e-05, "loss": 0.9719, "step": 393500 }, { "epoch": 90.19, "eval_loss": 1.558480143547058, "eval_runtime": 8.6873, "eval_samples_per_second": 540.215, "eval_steps_per_second": 67.57, "step": 393500 }, { "epoch": 90.3, "learning_rate": 4.762950834301073e-05, "loss": 0.9729, "step": 394000 }, { "epoch": 90.3, "eval_loss": 1.5513899326324463, "eval_runtime": 8.6897, "eval_samples_per_second": 540.065, "eval_steps_per_second": 67.551, "step": 394000 }, { "epoch": 90.42, "learning_rate": 4.7618870548378946e-05, "loss": 0.9715, "step": 394500 }, { "epoch": 90.42, "eval_loss": 1.5705541372299194, "eval_runtime": 8.692, "eval_samples_per_second": 539.92, "eval_steps_per_second": 67.533, "step": 394500 }, { "epoch": 90.53, "learning_rate": 4.760821013152801e-05, "loss": 0.9784, "step": 395000 }, { "epoch": 90.53, "eval_loss": 1.5556312799453735, "eval_runtime": 8.6934, "eval_samples_per_second": 539.832, "eval_steps_per_second": 67.522, "step": 395000 }, { "epoch": 90.65, "learning_rate": 4.7597527103119905e-05, "loss": 0.9751, "step": 395500 }, { "epoch": 90.65, "eval_loss": 1.5540707111358643, "eval_runtime": 8.6866, "eval_samples_per_second": 540.258, "eval_steps_per_second": 67.575, "step": 395500 }, { "epoch": 90.76, "learning_rate": 4.758682147383925e-05, "loss": 0.9826, "step": 396000 }, { "epoch": 90.76, "eval_loss": 1.566818118095398, "eval_runtime": 8.6883, "eval_samples_per_second": 540.152, "eval_steps_per_second": 67.562, "step": 396000 }, { "epoch": 90.88, "learning_rate": 4.757609325439325e-05, "loss": 0.9757, "step": 396500 }, { "epoch": 90.88, "eval_loss": 1.5691992044448853, "eval_runtime": 8.6977, "eval_samples_per_second": 539.568, "eval_steps_per_second": 67.489, "step": 396500 }, { "epoch": 90.99, "learning_rate": 4.756534245551172e-05, "loss": 0.983, "step": 397000 }, { "epoch": 90.99, "eval_loss": 1.5653737783432007, "eval_runtime": 8.6881, "eval_samples_per_second": 540.162, "eval_steps_per_second": 67.563, "step": 397000 }, { "epoch": 91.11, "learning_rate": 4.7554569087947054e-05, "loss": 0.9661, "step": 397500 }, { "epoch": 91.11, "eval_loss": 1.5743303298950195, "eval_runtime": 8.6955, "eval_samples_per_second": 539.703, "eval_steps_per_second": 67.506, "step": 397500 }, { "epoch": 91.22, "learning_rate": 4.7543773162474204e-05, "loss": 0.9683, "step": 398000 }, { "epoch": 91.22, "eval_loss": 1.5484098196029663, "eval_runtime": 8.6874, "eval_samples_per_second": 540.208, "eval_steps_per_second": 67.569, "step": 398000 }, { "epoch": 91.34, "learning_rate": 4.7532954689890705e-05, "loss": 0.9692, "step": 398500 }, { "epoch": 91.34, "eval_loss": 1.550323247909546, "eval_runtime": 8.6899, "eval_samples_per_second": 540.052, "eval_steps_per_second": 67.55, "step": 398500 }, { "epoch": 91.45, "learning_rate": 4.752211368101662e-05, "loss": 0.9701, "step": 399000 }, { "epoch": 91.45, "eval_loss": 1.5726513862609863, "eval_runtime": 8.6924, "eval_samples_per_second": 539.896, "eval_steps_per_second": 67.53, "step": 399000 }, { "epoch": 91.57, "learning_rate": 4.751125014669458e-05, "loss": 0.974, "step": 399500 }, { "epoch": 91.57, "eval_loss": 1.5413353443145752, "eval_runtime": 8.6845, "eval_samples_per_second": 540.386, "eval_steps_per_second": 67.591, "step": 399500 }, { "epoch": 91.68, "learning_rate": 4.7500364097789695e-05, "loss": 0.974, "step": 400000 }, { "epoch": 91.68, "eval_loss": 1.5632089376449585, "eval_runtime": 8.6899, "eval_samples_per_second": 540.052, "eval_steps_per_second": 67.55, "step": 400000 }, { "epoch": 91.79, "learning_rate": 4.748945554518966e-05, "loss": 0.9786, "step": 400500 }, { "epoch": 91.79, "eval_loss": 1.5685193538665771, "eval_runtime": 8.6896, "eval_samples_per_second": 540.073, "eval_steps_per_second": 67.552, "step": 400500 }, { "epoch": 91.91, "learning_rate": 4.7478524499804633e-05, "loss": 0.9735, "step": 401000 }, { "epoch": 91.91, "eval_loss": 1.57188081741333, "eval_runtime": 8.6932, "eval_samples_per_second": 539.847, "eval_steps_per_second": 67.524, "step": 401000 }, { "epoch": 92.02, "learning_rate": 4.7467570972567266e-05, "loss": 0.9724, "step": 401500 }, { "epoch": 92.02, "eval_loss": 1.587400197982788, "eval_runtime": 8.6894, "eval_samples_per_second": 540.084, "eval_steps_per_second": 67.554, "step": 401500 }, { "epoch": 92.14, "learning_rate": 4.745659497443272e-05, "loss": 0.9632, "step": 402000 }, { "epoch": 92.14, "eval_loss": 1.576438069343567, "eval_runtime": 8.6893, "eval_samples_per_second": 540.088, "eval_steps_per_second": 67.554, "step": 402000 }, { "epoch": 92.25, "learning_rate": 4.744559651637861e-05, "loss": 0.961, "step": 402500 }, { "epoch": 92.25, "eval_loss": 1.583878517150879, "eval_runtime": 8.687, "eval_samples_per_second": 540.231, "eval_steps_per_second": 67.572, "step": 402500 }, { "epoch": 92.37, "learning_rate": 4.743457560940502e-05, "loss": 0.9668, "step": 403000 }, { "epoch": 92.37, "eval_loss": 1.543434977531433, "eval_runtime": 8.6846, "eval_samples_per_second": 540.385, "eval_steps_per_second": 67.591, "step": 403000 }, { "epoch": 92.48, "learning_rate": 4.742353226453451e-05, "loss": 0.9671, "step": 403500 }, { "epoch": 92.48, "eval_loss": 1.574384093284607, "eval_runtime": 8.691, "eval_samples_per_second": 539.984, "eval_steps_per_second": 67.541, "step": 403500 }, { "epoch": 92.6, "learning_rate": 4.741246649281203e-05, "loss": 0.9726, "step": 404000 }, { "epoch": 92.6, "eval_loss": 1.5452296733856201, "eval_runtime": 8.6909, "eval_samples_per_second": 539.992, "eval_steps_per_second": 67.542, "step": 404000 }, { "epoch": 92.71, "learning_rate": 4.740137830530501e-05, "loss": 0.9743, "step": 404500 }, { "epoch": 92.71, "eval_loss": 1.5795179605484009, "eval_runtime": 8.7, "eval_samples_per_second": 539.424, "eval_steps_per_second": 67.471, "step": 404500 }, { "epoch": 92.83, "learning_rate": 4.739026771310328e-05, "loss": 0.9786, "step": 405000 }, { "epoch": 92.83, "eval_loss": 1.5691845417022705, "eval_runtime": 8.6853, "eval_samples_per_second": 540.337, "eval_steps_per_second": 67.585, "step": 405000 }, { "epoch": 92.94, "learning_rate": 4.737913472731906e-05, "loss": 0.9761, "step": 405500 }, { "epoch": 92.94, "eval_loss": 1.5573408603668213, "eval_runtime": 8.6873, "eval_samples_per_second": 540.213, "eval_steps_per_second": 67.57, "step": 405500 }, { "epoch": 93.06, "learning_rate": 4.7367979359086994e-05, "loss": 0.9699, "step": 406000 }, { "epoch": 93.06, "eval_loss": 1.5714062452316284, "eval_runtime": 8.687, "eval_samples_per_second": 540.233, "eval_steps_per_second": 67.572, "step": 406000 }, { "epoch": 93.17, "learning_rate": 4.735680161956411e-05, "loss": 0.9547, "step": 406500 }, { "epoch": 93.17, "eval_loss": 1.5804686546325684, "eval_runtime": 8.679, "eval_samples_per_second": 540.731, "eval_steps_per_second": 67.635, "step": 406500 }, { "epoch": 93.28, "learning_rate": 4.734560151992978e-05, "loss": 0.9701, "step": 407000 }, { "epoch": 93.28, "eval_loss": 1.5674899816513062, "eval_runtime": 8.6867, "eval_samples_per_second": 540.254, "eval_steps_per_second": 67.575, "step": 407000 }, { "epoch": 93.4, "learning_rate": 4.733437907138579e-05, "loss": 0.9729, "step": 407500 }, { "epoch": 93.4, "eval_loss": 1.5629485845565796, "eval_runtime": 8.6879, "eval_samples_per_second": 540.179, "eval_steps_per_second": 67.565, "step": 407500 }, { "epoch": 93.51, "learning_rate": 4.732313428515624e-05, "loss": 0.9649, "step": 408000 }, { "epoch": 93.51, "eval_loss": 1.575803279876709, "eval_runtime": 8.684, "eval_samples_per_second": 540.418, "eval_steps_per_second": 67.595, "step": 408000 }, { "epoch": 93.63, "learning_rate": 4.731186717248757e-05, "loss": 0.9658, "step": 408500 }, { "epoch": 93.63, "eval_loss": 1.549422264099121, "eval_runtime": 8.6854, "eval_samples_per_second": 540.332, "eval_steps_per_second": 67.585, "step": 408500 }, { "epoch": 93.74, "learning_rate": 4.730057774464856e-05, "loss": 0.9716, "step": 409000 }, { "epoch": 93.74, "eval_loss": 1.5707573890686035, "eval_runtime": 8.6901, "eval_samples_per_second": 540.039, "eval_steps_per_second": 67.548, "step": 409000 }, { "epoch": 93.86, "learning_rate": 4.7289266012930326e-05, "loss": 0.9781, "step": 409500 }, { "epoch": 93.86, "eval_loss": 1.5600749254226685, "eval_runtime": 8.6907, "eval_samples_per_second": 540.0, "eval_steps_per_second": 67.543, "step": 409500 }, { "epoch": 93.97, "learning_rate": 4.727793198864626e-05, "loss": 0.9762, "step": 410000 }, { "epoch": 93.97, "eval_loss": 1.5529934167861938, "eval_runtime": 8.6864, "eval_samples_per_second": 540.268, "eval_steps_per_second": 67.577, "step": 410000 }, { "epoch": 94.09, "learning_rate": 4.7266575683132076e-05, "loss": 0.967, "step": 410500 }, { "epoch": 94.09, "eval_loss": 1.5735764503479004, "eval_runtime": 8.6863, "eval_samples_per_second": 540.278, "eval_steps_per_second": 67.578, "step": 410500 }, { "epoch": 94.2, "learning_rate": 4.7255197107745755e-05, "loss": 0.9576, "step": 411000 }, { "epoch": 94.2, "eval_loss": 1.5554264783859253, "eval_runtime": 8.6883, "eval_samples_per_second": 540.152, "eval_steps_per_second": 67.562, "step": 411000 }, { "epoch": 94.32, "learning_rate": 4.724379627386754e-05, "loss": 0.9667, "step": 411500 }, { "epoch": 94.32, "eval_loss": 1.5680437088012695, "eval_runtime": 8.6847, "eval_samples_per_second": 540.373, "eval_steps_per_second": 67.59, "step": 411500 }, { "epoch": 94.43, "learning_rate": 4.7232373192899995e-05, "loss": 0.9631, "step": 412000 }, { "epoch": 94.43, "eval_loss": 1.5669670104980469, "eval_runtime": 8.6854, "eval_samples_per_second": 540.335, "eval_steps_per_second": 67.585, "step": 412000 }, { "epoch": 94.54, "learning_rate": 4.722092787626784e-05, "loss": 0.9681, "step": 412500 }, { "epoch": 94.54, "eval_loss": 1.56369948387146, "eval_runtime": 8.6953, "eval_samples_per_second": 539.72, "eval_steps_per_second": 67.508, "step": 412500 }, { "epoch": 94.66, "learning_rate": 4.7209460335418135e-05, "loss": 0.9653, "step": 413000 }, { "epoch": 94.66, "eval_loss": 1.561375379562378, "eval_runtime": 8.6878, "eval_samples_per_second": 540.18, "eval_steps_per_second": 67.566, "step": 413000 }, { "epoch": 94.77, "learning_rate": 4.719797058182008e-05, "loss": 0.9705, "step": 413500 }, { "epoch": 94.77, "eval_loss": 1.559020757675171, "eval_runtime": 8.6863, "eval_samples_per_second": 540.277, "eval_steps_per_second": 67.578, "step": 413500 }, { "epoch": 94.89, "learning_rate": 4.718645862696515e-05, "loss": 0.9722, "step": 414000 }, { "epoch": 94.89, "eval_loss": 1.5785245895385742, "eval_runtime": 8.6921, "eval_samples_per_second": 539.919, "eval_steps_per_second": 67.533, "step": 414000 }, { "epoch": 95.0, "learning_rate": 4.7174924482367e-05, "loss": 0.9709, "step": 414500 }, { "epoch": 95.0, "eval_loss": 1.5653928518295288, "eval_runtime": 8.6885, "eval_samples_per_second": 540.141, "eval_steps_per_second": 67.561, "step": 414500 }, { "epoch": 95.12, "learning_rate": 4.716336815956148e-05, "loss": 0.9613, "step": 415000 }, { "epoch": 95.12, "eval_loss": 1.5745913982391357, "eval_runtime": 8.6874, "eval_samples_per_second": 540.209, "eval_steps_per_second": 67.569, "step": 415000 }, { "epoch": 95.23, "learning_rate": 4.7151789670106624e-05, "loss": 0.9495, "step": 415500 }, { "epoch": 95.23, "eval_loss": 1.5774390697479248, "eval_runtime": 8.6829, "eval_samples_per_second": 540.49, "eval_steps_per_second": 67.604, "step": 415500 }, { "epoch": 95.35, "learning_rate": 4.714018902558265e-05, "loss": 0.9589, "step": 416000 }, { "epoch": 95.35, "eval_loss": 1.561514139175415, "eval_runtime": 8.6874, "eval_samples_per_second": 540.206, "eval_steps_per_second": 67.569, "step": 416000 }, { "epoch": 95.46, "learning_rate": 4.712856623759189e-05, "loss": 0.9592, "step": 416500 }, { "epoch": 95.46, "eval_loss": 1.5718498229980469, "eval_runtime": 8.6893, "eval_samples_per_second": 540.09, "eval_steps_per_second": 67.554, "step": 416500 }, { "epoch": 95.58, "learning_rate": 4.7116921317758874e-05, "loss": 0.9665, "step": 417000 }, { "epoch": 95.58, "eval_loss": 1.5841690301895142, "eval_runtime": 8.6942, "eval_samples_per_second": 539.784, "eval_steps_per_second": 67.516, "step": 417000 }, { "epoch": 95.69, "learning_rate": 4.710525427773025e-05, "loss": 0.9657, "step": 417500 }, { "epoch": 95.69, "eval_loss": 1.5876619815826416, "eval_runtime": 8.686, "eval_samples_per_second": 540.296, "eval_steps_per_second": 67.58, "step": 417500 }, { "epoch": 95.81, "learning_rate": 4.7093565129174764e-05, "loss": 0.9691, "step": 418000 }, { "epoch": 95.81, "eval_loss": 1.570886254310608, "eval_runtime": 8.6881, "eval_samples_per_second": 540.165, "eval_steps_per_second": 67.564, "step": 418000 }, { "epoch": 95.92, "learning_rate": 4.7081853883783314e-05, "loss": 0.9718, "step": 418500 }, { "epoch": 95.92, "eval_loss": 1.5574896335601807, "eval_runtime": 8.6882, "eval_samples_per_second": 540.158, "eval_steps_per_second": 67.563, "step": 418500 }, { "epoch": 96.03, "learning_rate": 4.707012055326887e-05, "loss": 0.9676, "step": 419000 }, { "epoch": 96.03, "eval_loss": 1.5711007118225098, "eval_runtime": 8.6893, "eval_samples_per_second": 540.092, "eval_steps_per_second": 67.555, "step": 419000 }, { "epoch": 96.15, "learning_rate": 4.705836514936651e-05, "loss": 0.9594, "step": 419500 }, { "epoch": 96.15, "eval_loss": 1.5864510536193848, "eval_runtime": 8.6888, "eval_samples_per_second": 540.12, "eval_steps_per_second": 67.558, "step": 419500 }, { "epoch": 96.26, "learning_rate": 4.7046587683833357e-05, "loss": 0.9594, "step": 420000 }, { "epoch": 96.26, "eval_loss": 1.5830575227737427, "eval_runtime": 8.6919, "eval_samples_per_second": 539.927, "eval_steps_per_second": 67.534, "step": 420000 }, { "epoch": 96.38, "learning_rate": 4.703478816844865e-05, "loss": 0.959, "step": 420500 }, { "epoch": 96.38, "eval_loss": 1.5701093673706055, "eval_runtime": 8.6884, "eval_samples_per_second": 540.145, "eval_steps_per_second": 67.561, "step": 420500 }, { "epoch": 96.49, "learning_rate": 4.702296661501362e-05, "loss": 0.9605, "step": 421000 }, { "epoch": 96.49, "eval_loss": 1.5561423301696777, "eval_runtime": 8.6893, "eval_samples_per_second": 540.087, "eval_steps_per_second": 67.554, "step": 421000 }, { "epoch": 96.61, "learning_rate": 4.701112303535159e-05, "loss": 0.9628, "step": 421500 }, { "epoch": 96.61, "eval_loss": 1.5631228685379028, "eval_runtime": 8.6911, "eval_samples_per_second": 539.976, "eval_steps_per_second": 67.54, "step": 421500 }, { "epoch": 96.72, "learning_rate": 4.699925744130789e-05, "loss": 0.9608, "step": 422000 }, { "epoch": 96.72, "eval_loss": 1.5832191705703735, "eval_runtime": 8.6895, "eval_samples_per_second": 540.074, "eval_steps_per_second": 67.552, "step": 422000 }, { "epoch": 96.84, "learning_rate": 4.6987369844749874e-05, "loss": 0.9691, "step": 422500 }, { "epoch": 96.84, "eval_loss": 1.5738263130187988, "eval_runtime": 8.6894, "eval_samples_per_second": 540.083, "eval_steps_per_second": 67.554, "step": 422500 }, { "epoch": 96.95, "learning_rate": 4.697546025756689e-05, "loss": 0.972, "step": 423000 }, { "epoch": 96.95, "eval_loss": 1.5583367347717285, "eval_runtime": 8.6875, "eval_samples_per_second": 540.199, "eval_steps_per_second": 67.568, "step": 423000 }, { "epoch": 97.07, "learning_rate": 4.696352869167029e-05, "loss": 0.9611, "step": 423500 }, { "epoch": 97.07, "eval_loss": 1.5712460279464722, "eval_runtime": 8.6866, "eval_samples_per_second": 540.259, "eval_steps_per_second": 67.576, "step": 423500 }, { "epoch": 97.18, "learning_rate": 4.695157515899341e-05, "loss": 0.9516, "step": 424000 }, { "epoch": 97.18, "eval_loss": 1.5906847715377808, "eval_runtime": 8.6918, "eval_samples_per_second": 539.933, "eval_steps_per_second": 67.535, "step": 424000 }, { "epoch": 97.3, "learning_rate": 4.693959967149155e-05, "loss": 0.9596, "step": 424500 }, { "epoch": 97.3, "eval_loss": 1.5669841766357422, "eval_runtime": 8.6903, "eval_samples_per_second": 540.03, "eval_steps_per_second": 67.547, "step": 424500 }, { "epoch": 97.41, "learning_rate": 4.692760224114197e-05, "loss": 0.9614, "step": 425000 }, { "epoch": 97.41, "eval_loss": 1.5852091312408447, "eval_runtime": 8.7005, "eval_samples_per_second": 539.394, "eval_steps_per_second": 67.467, "step": 425000 }, { "epoch": 97.52, "learning_rate": 4.691558287994388e-05, "loss": 0.9671, "step": 425500 }, { "epoch": 97.52, "eval_loss": 1.5717288255691528, "eval_runtime": 8.6929, "eval_samples_per_second": 539.868, "eval_steps_per_second": 67.527, "step": 425500 }, { "epoch": 97.64, "learning_rate": 4.690354159991842e-05, "loss": 0.9698, "step": 426000 }, { "epoch": 97.64, "eval_loss": 1.5833234786987305, "eval_runtime": 8.6843, "eval_samples_per_second": 540.4, "eval_steps_per_second": 67.593, "step": 426000 }, { "epoch": 97.75, "learning_rate": 4.6891478413108644e-05, "loss": 0.9552, "step": 426500 }, { "epoch": 97.75, "eval_loss": 1.5796605348587036, "eval_runtime": 8.6901, "eval_samples_per_second": 540.041, "eval_steps_per_second": 67.548, "step": 426500 }, { "epoch": 97.87, "learning_rate": 4.6879393331579536e-05, "loss": 0.9592, "step": 427000 }, { "epoch": 97.87, "eval_loss": 1.5699673891067505, "eval_runtime": 8.686, "eval_samples_per_second": 540.292, "eval_steps_per_second": 67.58, "step": 427000 }, { "epoch": 97.98, "learning_rate": 4.686728636741796e-05, "loss": 0.9676, "step": 427500 }, { "epoch": 97.98, "eval_loss": 1.5801987648010254, "eval_runtime": 8.6902, "eval_samples_per_second": 540.036, "eval_steps_per_second": 67.548, "step": 427500 }, { "epoch": 98.1, "learning_rate": 4.6855157532732686e-05, "loss": 0.9495, "step": 428000 }, { "epoch": 98.1, "eval_loss": 1.5904251337051392, "eval_runtime": 8.6929, "eval_samples_per_second": 539.869, "eval_steps_per_second": 67.527, "step": 428000 }, { "epoch": 98.21, "learning_rate": 4.6843006839654316e-05, "loss": 0.9521, "step": 428500 }, { "epoch": 98.21, "eval_loss": 1.5781810283660889, "eval_runtime": 8.686, "eval_samples_per_second": 540.296, "eval_steps_per_second": 67.58, "step": 428500 }, { "epoch": 98.33, "learning_rate": 4.683083430033537e-05, "loss": 0.9584, "step": 429000 }, { "epoch": 98.33, "eval_loss": 1.555160641670227, "eval_runtime": 8.6873, "eval_samples_per_second": 540.212, "eval_steps_per_second": 67.57, "step": 429000 }, { "epoch": 98.44, "learning_rate": 4.681863992695017e-05, "loss": 0.9604, "step": 429500 }, { "epoch": 98.44, "eval_loss": 1.562774658203125, "eval_runtime": 8.6916, "eval_samples_per_second": 539.948, "eval_steps_per_second": 67.537, "step": 429500 }, { "epoch": 98.56, "learning_rate": 4.680642373169491e-05, "loss": 0.9556, "step": 430000 }, { "epoch": 98.56, "eval_loss": 1.5783298015594482, "eval_runtime": 8.6866, "eval_samples_per_second": 540.259, "eval_steps_per_second": 67.576, "step": 430000 }, { "epoch": 98.67, "learning_rate": 4.6794185726787587e-05, "loss": 0.9653, "step": 430500 }, { "epoch": 98.67, "eval_loss": 1.5745137929916382, "eval_runtime": 8.6894, "eval_samples_per_second": 540.08, "eval_steps_per_second": 67.553, "step": 430500 }, { "epoch": 98.79, "learning_rate": 4.678192592446802e-05, "loss": 0.9607, "step": 431000 }, { "epoch": 98.79, "eval_loss": 1.594010353088379, "eval_runtime": 8.683, "eval_samples_per_second": 540.484, "eval_steps_per_second": 67.604, "step": 431000 }, { "epoch": 98.9, "learning_rate": 4.676964433699782e-05, "loss": 0.9618, "step": 431500 }, { "epoch": 98.9, "eval_loss": 1.5614955425262451, "eval_runtime": 8.6886, "eval_samples_per_second": 540.132, "eval_steps_per_second": 67.56, "step": 431500 }, { "epoch": 99.01, "learning_rate": 4.67573409766604e-05, "loss": 0.9574, "step": 432000 }, { "epoch": 99.01, "eval_loss": 1.608638882637024, "eval_runtime": 8.69, "eval_samples_per_second": 540.045, "eval_steps_per_second": 67.549, "step": 432000 }, { "epoch": 99.13, "learning_rate": 4.674501585576093e-05, "loss": 0.9484, "step": 432500 }, { "epoch": 99.13, "eval_loss": 1.5576895475387573, "eval_runtime": 8.6868, "eval_samples_per_second": 540.245, "eval_steps_per_second": 67.574, "step": 432500 }, { "epoch": 99.24, "learning_rate": 4.673266898662637e-05, "loss": 0.9524, "step": 433000 }, { "epoch": 99.24, "eval_loss": 1.5851603746414185, "eval_runtime": 8.7011, "eval_samples_per_second": 539.356, "eval_steps_per_second": 67.463, "step": 433000 }, { "epoch": 99.36, "learning_rate": 4.6720300381605406e-05, "loss": 0.9532, "step": 433500 }, { "epoch": 99.36, "eval_loss": 1.577842354774475, "eval_runtime": 8.6908, "eval_samples_per_second": 539.998, "eval_steps_per_second": 67.543, "step": 433500 }, { "epoch": 99.47, "learning_rate": 4.670791005306848e-05, "loss": 0.9579, "step": 434000 }, { "epoch": 99.47, "eval_loss": 1.5724116563796997, "eval_runtime": 8.6921, "eval_samples_per_second": 539.918, "eval_steps_per_second": 67.533, "step": 434000 }, { "epoch": 99.59, "learning_rate": 4.669549801340776e-05, "loss": 0.9575, "step": 434500 }, { "epoch": 99.59, "eval_loss": 1.5820282697677612, "eval_runtime": 8.6873, "eval_samples_per_second": 540.215, "eval_steps_per_second": 67.57, "step": 434500 }, { "epoch": 99.7, "learning_rate": 4.6683064275037116e-05, "loss": 0.9615, "step": 435000 }, { "epoch": 99.7, "eval_loss": 1.58060884475708, "eval_runtime": 8.6855, "eval_samples_per_second": 540.325, "eval_steps_per_second": 67.584, "step": 435000 }, { "epoch": 99.82, "learning_rate": 4.6670608850392126e-05, "loss": 0.9591, "step": 435500 }, { "epoch": 99.82, "eval_loss": 1.5549876689910889, "eval_runtime": 8.6877, "eval_samples_per_second": 540.192, "eval_steps_per_second": 67.567, "step": 435500 }, { "epoch": 99.93, "learning_rate": 4.6658131751930064e-05, "loss": 0.9622, "step": 436000 }, { "epoch": 99.93, "eval_loss": 1.5800015926361084, "eval_runtime": 8.6863, "eval_samples_per_second": 540.279, "eval_steps_per_second": 67.578, "step": 436000 }, { "epoch": 100.05, "learning_rate": 4.6645632992129874e-05, "loss": 0.9687, "step": 436500 }, { "epoch": 100.05, "eval_loss": 1.5521619319915771, "eval_runtime": 8.6795, "eval_samples_per_second": 540.698, "eval_steps_per_second": 67.63, "step": 436500 }, { "epoch": 100.16, "learning_rate": 4.6633112583492156e-05, "loss": 0.9468, "step": 437000 }, { "epoch": 100.16, "eval_loss": 1.57355535030365, "eval_runtime": 8.6894, "eval_samples_per_second": 540.086, "eval_steps_per_second": 67.554, "step": 437000 }, { "epoch": 100.28, "learning_rate": 4.662057053853919e-05, "loss": 0.9466, "step": 437500 }, { "epoch": 100.28, "eval_loss": 1.5825414657592773, "eval_runtime": 8.6881, "eval_samples_per_second": 540.165, "eval_steps_per_second": 67.564, "step": 437500 }, { "epoch": 100.39, "learning_rate": 4.6608006869814855e-05, "loss": 0.9599, "step": 438000 }, { "epoch": 100.39, "eval_loss": 1.6151872873306274, "eval_runtime": 8.6864, "eval_samples_per_second": 540.268, "eval_steps_per_second": 67.577, "step": 438000 }, { "epoch": 100.5, "learning_rate": 4.659542158988471e-05, "loss": 0.9476, "step": 438500 }, { "epoch": 100.5, "eval_loss": 1.563767910003662, "eval_runtime": 8.6869, "eval_samples_per_second": 540.236, "eval_steps_per_second": 67.573, "step": 438500 }, { "epoch": 100.62, "learning_rate": 4.658281471133587e-05, "loss": 0.951, "step": 439000 }, { "epoch": 100.62, "eval_loss": 1.5975843667984009, "eval_runtime": 8.6885, "eval_samples_per_second": 540.139, "eval_steps_per_second": 67.561, "step": 439000 }, { "epoch": 100.73, "learning_rate": 4.65701862467771e-05, "loss": 0.9548, "step": 439500 }, { "epoch": 100.73, "eval_loss": 1.5861992835998535, "eval_runtime": 8.6888, "eval_samples_per_second": 540.123, "eval_steps_per_second": 67.559, "step": 439500 }, { "epoch": 100.85, "learning_rate": 4.655753620883872e-05, "loss": 0.9589, "step": 440000 }, { "epoch": 100.85, "eval_loss": 1.5851925611495972, "eval_runtime": 8.6795, "eval_samples_per_second": 540.697, "eval_steps_per_second": 67.63, "step": 440000 }, { "epoch": 100.96, "learning_rate": 4.654486461017264e-05, "loss": 0.9639, "step": 440500 }, { "epoch": 100.96, "eval_loss": 1.5885902643203735, "eval_runtime": 8.6901, "eval_samples_per_second": 540.041, "eval_steps_per_second": 67.548, "step": 440500 }, { "epoch": 101.08, "learning_rate": 4.653217146345236e-05, "loss": 0.9522, "step": 441000 }, { "epoch": 101.08, "eval_loss": 1.6002954244613647, "eval_runtime": 8.6984, "eval_samples_per_second": 539.526, "eval_steps_per_second": 67.484, "step": 441000 }, { "epoch": 101.19, "learning_rate": 4.651945678137287e-05, "loss": 0.945, "step": 441500 }, { "epoch": 101.19, "eval_loss": 1.5742580890655518, "eval_runtime": 8.686, "eval_samples_per_second": 540.294, "eval_steps_per_second": 67.58, "step": 441500 }, { "epoch": 101.31, "learning_rate": 4.650672057665076e-05, "loss": 0.9524, "step": 442000 }, { "epoch": 101.31, "eval_loss": 1.5954678058624268, "eval_runtime": 8.6869, "eval_samples_per_second": 540.239, "eval_steps_per_second": 67.573, "step": 442000 }, { "epoch": 101.42, "learning_rate": 4.649396286202411e-05, "loss": 0.9569, "step": 442500 }, { "epoch": 101.42, "eval_loss": 1.5880545377731323, "eval_runtime": 8.6891, "eval_samples_per_second": 540.099, "eval_steps_per_second": 67.556, "step": 442500 }, { "epoch": 101.54, "learning_rate": 4.648118365025252e-05, "loss": 0.9565, "step": 443000 }, { "epoch": 101.54, "eval_loss": 1.576303482055664, "eval_runtime": 8.6908, "eval_samples_per_second": 539.998, "eval_steps_per_second": 67.543, "step": 443000 }, { "epoch": 101.65, "learning_rate": 4.6468382954117103e-05, "loss": 0.9546, "step": 443500 }, { "epoch": 101.65, "eval_loss": 1.5810226202011108, "eval_runtime": 8.688, "eval_samples_per_second": 540.169, "eval_steps_per_second": 67.564, "step": 443500 }, { "epoch": 101.76, "learning_rate": 4.645556078642045e-05, "loss": 0.9559, "step": 444000 }, { "epoch": 101.76, "eval_loss": 1.591800570487976, "eval_runtime": 8.6839, "eval_samples_per_second": 540.424, "eval_steps_per_second": 67.596, "step": 444000 }, { "epoch": 101.88, "learning_rate": 4.6442717159986616e-05, "loss": 0.9547, "step": 444500 }, { "epoch": 101.88, "eval_loss": 1.5807923078536987, "eval_runtime": 8.6908, "eval_samples_per_second": 539.994, "eval_steps_per_second": 67.542, "step": 444500 }, { "epoch": 101.99, "learning_rate": 4.642985208766113e-05, "loss": 0.9559, "step": 445000 }, { "epoch": 101.99, "eval_loss": 1.6016989946365356, "eval_runtime": 8.6875, "eval_samples_per_second": 540.202, "eval_steps_per_second": 67.568, "step": 445000 }, { "epoch": 102.11, "learning_rate": 4.6416965582310984e-05, "loss": 0.9455, "step": 445500 }, { "epoch": 102.11, "eval_loss": 1.559360384941101, "eval_runtime": 8.6913, "eval_samples_per_second": 539.967, "eval_steps_per_second": 67.539, "step": 445500 }, { "epoch": 102.22, "learning_rate": 4.640405765682458e-05, "loss": 0.9454, "step": 446000 }, { "epoch": 102.22, "eval_loss": 1.5626506805419922, "eval_runtime": 8.6895, "eval_samples_per_second": 540.076, "eval_steps_per_second": 67.553, "step": 446000 }, { "epoch": 102.34, "learning_rate": 4.639112832411176e-05, "loss": 0.9546, "step": 446500 }, { "epoch": 102.34, "eval_loss": 1.5822385549545288, "eval_runtime": 8.6845, "eval_samples_per_second": 540.389, "eval_steps_per_second": 67.592, "step": 446500 }, { "epoch": 102.45, "learning_rate": 4.637817759710375e-05, "loss": 0.9529, "step": 447000 }, { "epoch": 102.45, "eval_loss": 1.5959174633026123, "eval_runtime": 8.6912, "eval_samples_per_second": 539.969, "eval_steps_per_second": 67.539, "step": 447000 }, { "epoch": 102.57, "learning_rate": 4.6365205488753225e-05, "loss": 0.9582, "step": 447500 }, { "epoch": 102.57, "eval_loss": 1.5704947710037231, "eval_runtime": 8.6859, "eval_samples_per_second": 540.299, "eval_steps_per_second": 67.581, "step": 447500 }, { "epoch": 102.68, "learning_rate": 4.63522120120342e-05, "loss": 0.9492, "step": 448000 }, { "epoch": 102.68, "eval_loss": 1.5808358192443848, "eval_runtime": 8.6857, "eval_samples_per_second": 540.312, "eval_steps_per_second": 67.582, "step": 448000 }, { "epoch": 102.8, "learning_rate": 4.633919717994207e-05, "loss": 0.949, "step": 448500 }, { "epoch": 102.8, "eval_loss": 1.5768998861312866, "eval_runtime": 8.6875, "eval_samples_per_second": 540.203, "eval_steps_per_second": 67.569, "step": 448500 }, { "epoch": 102.91, "learning_rate": 4.632616100549361e-05, "loss": 0.955, "step": 449000 }, { "epoch": 102.91, "eval_loss": 1.5817002058029175, "eval_runtime": 8.7023, "eval_samples_per_second": 539.28, "eval_steps_per_second": 67.453, "step": 449000 }, { "epoch": 103.03, "learning_rate": 4.6313103501726905e-05, "loss": 0.9581, "step": 449500 }, { "epoch": 103.03, "eval_loss": 1.5687384605407715, "eval_runtime": 8.6866, "eval_samples_per_second": 540.259, "eval_steps_per_second": 67.575, "step": 449500 }, { "epoch": 103.14, "learning_rate": 4.63000246817014e-05, "loss": 0.9446, "step": 450000 }, { "epoch": 103.14, "eval_loss": 1.580004096031189, "eval_runtime": 8.6909, "eval_samples_per_second": 539.991, "eval_steps_per_second": 67.542, "step": 450000 }, { "epoch": 103.25, "learning_rate": 4.628692455849786e-05, "loss": 0.9404, "step": 450500 }, { "epoch": 103.25, "eval_loss": 1.594008445739746, "eval_runtime": 8.6884, "eval_samples_per_second": 540.144, "eval_steps_per_second": 67.561, "step": 450500 }, { "epoch": 103.37, "learning_rate": 4.627380314521832e-05, "loss": 0.9503, "step": 451000 }, { "epoch": 103.37, "eval_loss": 1.5766334533691406, "eval_runtime": 8.6894, "eval_samples_per_second": 540.086, "eval_steps_per_second": 67.554, "step": 451000 }, { "epoch": 103.48, "learning_rate": 4.626066045498617e-05, "loss": 0.9503, "step": 451500 }, { "epoch": 103.48, "eval_loss": 1.569437861442566, "eval_runtime": 8.6862, "eval_samples_per_second": 540.285, "eval_steps_per_second": 67.579, "step": 451500 }, { "epoch": 103.6, "learning_rate": 4.624749650094603e-05, "loss": 0.9514, "step": 452000 }, { "epoch": 103.6, "eval_loss": 1.5873035192489624, "eval_runtime": 8.6924, "eval_samples_per_second": 539.9, "eval_steps_per_second": 67.531, "step": 452000 }, { "epoch": 103.71, "learning_rate": 4.623431129626381e-05, "loss": 0.9524, "step": 452500 }, { "epoch": 103.71, "eval_loss": 1.5841329097747803, "eval_runtime": 8.6922, "eval_samples_per_second": 539.908, "eval_steps_per_second": 67.532, "step": 452500 }, { "epoch": 103.83, "learning_rate": 4.622110485412665e-05, "loss": 0.9553, "step": 453000 }, { "epoch": 103.83, "eval_loss": 1.5758075714111328, "eval_runtime": 8.6906, "eval_samples_per_second": 540.009, "eval_steps_per_second": 67.544, "step": 453000 }, { "epoch": 103.94, "learning_rate": 4.620787718774297e-05, "loss": 0.9537, "step": 453500 }, { "epoch": 103.94, "eval_loss": 1.5764836072921753, "eval_runtime": 8.6982, "eval_samples_per_second": 539.539, "eval_steps_per_second": 67.486, "step": 453500 }, { "epoch": 104.06, "learning_rate": 4.619462831034237e-05, "loss": 0.959, "step": 454000 }, { "epoch": 104.06, "eval_loss": 1.5831761360168457, "eval_runtime": 8.6915, "eval_samples_per_second": 539.95, "eval_steps_per_second": 67.537, "step": 454000 }, { "epoch": 104.17, "learning_rate": 4.6181358235175706e-05, "loss": 0.9422, "step": 454500 }, { "epoch": 104.17, "eval_loss": 1.5758568048477173, "eval_runtime": 8.6896, "eval_samples_per_second": 540.072, "eval_steps_per_second": 67.552, "step": 454500 }, { "epoch": 104.29, "learning_rate": 4.6168066975515014e-05, "loss": 0.9474, "step": 455000 }, { "epoch": 104.29, "eval_loss": 1.5938116312026978, "eval_runtime": 8.6947, "eval_samples_per_second": 539.757, "eval_steps_per_second": 67.513, "step": 455000 }, { "epoch": 104.4, "learning_rate": 4.6154754544653516e-05, "loss": 0.953, "step": 455500 }, { "epoch": 104.4, "eval_loss": 1.5829817056655884, "eval_runtime": 8.6918, "eval_samples_per_second": 539.933, "eval_steps_per_second": 67.535, "step": 455500 }, { "epoch": 104.52, "learning_rate": 4.614142095590562e-05, "loss": 0.9454, "step": 456000 }, { "epoch": 104.52, "eval_loss": 1.6122848987579346, "eval_runtime": 8.6919, "eval_samples_per_second": 539.931, "eval_steps_per_second": 67.534, "step": 456000 }, { "epoch": 104.63, "learning_rate": 4.612806622260689e-05, "loss": 0.9488, "step": 456500 }, { "epoch": 104.63, "eval_loss": 1.6057138442993164, "eval_runtime": 8.6899, "eval_samples_per_second": 540.053, "eval_steps_per_second": 67.55, "step": 456500 }, { "epoch": 104.74, "learning_rate": 4.611469035811403e-05, "loss": 0.9554, "step": 457000 }, { "epoch": 104.74, "eval_loss": 1.5826374292373657, "eval_runtime": 8.6895, "eval_samples_per_second": 540.079, "eval_steps_per_second": 67.553, "step": 457000 }, { "epoch": 104.86, "learning_rate": 4.6101293375804896e-05, "loss": 0.9518, "step": 457500 }, { "epoch": 104.86, "eval_loss": 1.5872045755386353, "eval_runtime": 8.6901, "eval_samples_per_second": 540.043, "eval_steps_per_second": 67.549, "step": 457500 }, { "epoch": 104.97, "learning_rate": 4.608787528907844e-05, "loss": 0.943, "step": 458000 }, { "epoch": 104.97, "eval_loss": 1.5799578428268433, "eval_runtime": 8.6918, "eval_samples_per_second": 539.937, "eval_steps_per_second": 67.535, "step": 458000 }, { "epoch": 105.09, "learning_rate": 4.607443611135474e-05, "loss": 0.9422, "step": 458500 }, { "epoch": 105.09, "eval_loss": 1.6043113470077515, "eval_runtime": 8.689, "eval_samples_per_second": 540.111, "eval_steps_per_second": 67.557, "step": 458500 }, { "epoch": 105.2, "learning_rate": 4.6060975856074966e-05, "loss": 0.9361, "step": 459000 }, { "epoch": 105.2, "eval_loss": 1.5855671167373657, "eval_runtime": 8.691, "eval_samples_per_second": 539.987, "eval_steps_per_second": 67.541, "step": 459000 }, { "epoch": 105.32, "learning_rate": 4.6047494536701366e-05, "loss": 0.9463, "step": 459500 }, { "epoch": 105.32, "eval_loss": 1.6117371320724487, "eval_runtime": 8.685, "eval_samples_per_second": 540.355, "eval_steps_per_second": 67.588, "step": 459500 }, { "epoch": 105.43, "learning_rate": 4.603399216671726e-05, "loss": 0.9388, "step": 460000 }, { "epoch": 105.43, "eval_loss": 1.5906734466552734, "eval_runtime": 8.6923, "eval_samples_per_second": 539.906, "eval_steps_per_second": 67.531, "step": 460000 }, { "epoch": 105.55, "learning_rate": 4.6020468759627e-05, "loss": 0.9428, "step": 460500 }, { "epoch": 105.55, "eval_loss": 1.5808141231536865, "eval_runtime": 8.6877, "eval_samples_per_second": 540.187, "eval_steps_per_second": 67.566, "step": 460500 }, { "epoch": 105.66, "learning_rate": 4.6006924328956e-05, "loss": 0.9489, "step": 461000 }, { "epoch": 105.66, "eval_loss": 1.574753999710083, "eval_runtime": 8.6896, "eval_samples_per_second": 540.073, "eval_steps_per_second": 67.552, "step": 461000 }, { "epoch": 105.78, "learning_rate": 4.599335888825071e-05, "loss": 0.944, "step": 461500 }, { "epoch": 105.78, "eval_loss": 1.5739091634750366, "eval_runtime": 8.6952, "eval_samples_per_second": 539.725, "eval_steps_per_second": 67.509, "step": 461500 }, { "epoch": 105.89, "learning_rate": 4.597977245107857e-05, "loss": 0.9507, "step": 462000 }, { "epoch": 105.89, "eval_loss": 1.5659784078598022, "eval_runtime": 8.6872, "eval_samples_per_second": 540.222, "eval_steps_per_second": 67.571, "step": 462000 }, { "epoch": 106.01, "learning_rate": 4.596616503102803e-05, "loss": 0.9531, "step": 462500 }, { "epoch": 106.01, "eval_loss": 1.5911744832992554, "eval_runtime": 8.6881, "eval_samples_per_second": 540.166, "eval_steps_per_second": 67.564, "step": 462500 }, { "epoch": 106.12, "learning_rate": 4.595253664170852e-05, "loss": 0.9409, "step": 463000 }, { "epoch": 106.12, "eval_loss": 1.5776426792144775, "eval_runtime": 8.6906, "eval_samples_per_second": 540.008, "eval_steps_per_second": 67.544, "step": 463000 }, { "epoch": 106.23, "learning_rate": 4.593888729675046e-05, "loss": 0.9392, "step": 463500 }, { "epoch": 106.23, "eval_loss": 1.5975666046142578, "eval_runtime": 8.6922, "eval_samples_per_second": 539.91, "eval_steps_per_second": 67.532, "step": 463500 }, { "epoch": 106.35, "learning_rate": 4.59252170098052e-05, "loss": 0.9413, "step": 464000 }, { "epoch": 106.35, "eval_loss": 1.5744978189468384, "eval_runtime": 8.6908, "eval_samples_per_second": 539.994, "eval_steps_per_second": 67.542, "step": 464000 }, { "epoch": 106.46, "learning_rate": 4.591152579454505e-05, "loss": 0.9509, "step": 464500 }, { "epoch": 106.46, "eval_loss": 1.5883268117904663, "eval_runtime": 8.688, "eval_samples_per_second": 540.17, "eval_steps_per_second": 67.564, "step": 464500 }, { "epoch": 106.58, "learning_rate": 4.589781366466327e-05, "loss": 0.9434, "step": 465000 }, { "epoch": 106.58, "eval_loss": 1.5891417264938354, "eval_runtime": 8.6852, "eval_samples_per_second": 540.344, "eval_steps_per_second": 67.586, "step": 465000 }, { "epoch": 106.69, "learning_rate": 4.5884080633874005e-05, "loss": 0.945, "step": 465500 }, { "epoch": 106.69, "eval_loss": 1.5641229152679443, "eval_runtime": 8.6924, "eval_samples_per_second": 539.897, "eval_steps_per_second": 67.53, "step": 465500 }, { "epoch": 106.81, "learning_rate": 4.587032671591232e-05, "loss": 0.9486, "step": 466000 }, { "epoch": 106.81, "eval_loss": 1.593518614768982, "eval_runtime": 8.6896, "eval_samples_per_second": 540.071, "eval_steps_per_second": 67.552, "step": 466000 }, { "epoch": 106.92, "learning_rate": 4.5856551924534175e-05, "loss": 0.945, "step": 466500 }, { "epoch": 106.92, "eval_loss": 1.5784742832183838, "eval_runtime": 8.6852, "eval_samples_per_second": 540.344, "eval_steps_per_second": 67.586, "step": 466500 }, { "epoch": 107.04, "learning_rate": 4.58427562735164e-05, "loss": 0.9533, "step": 467000 }, { "epoch": 107.04, "eval_loss": 1.6058757305145264, "eval_runtime": 8.6932, "eval_samples_per_second": 539.846, "eval_steps_per_second": 67.524, "step": 467000 }, { "epoch": 107.15, "learning_rate": 4.582893977665669e-05, "loss": 0.9417, "step": 467500 }, { "epoch": 107.15, "eval_loss": 1.601674199104309, "eval_runtime": 8.695, "eval_samples_per_second": 539.738, "eval_steps_per_second": 67.51, "step": 467500 }, { "epoch": 107.27, "learning_rate": 4.581510244777357e-05, "loss": 0.9355, "step": 468000 }, { "epoch": 107.27, "eval_loss": 1.581628680229187, "eval_runtime": 8.693, "eval_samples_per_second": 539.858, "eval_steps_per_second": 67.525, "step": 468000 }, { "epoch": 107.38, "learning_rate": 4.580124430070644e-05, "loss": 0.9406, "step": 468500 }, { "epoch": 107.38, "eval_loss": 1.5667972564697266, "eval_runtime": 8.6904, "eval_samples_per_second": 540.02, "eval_steps_per_second": 67.546, "step": 468500 }, { "epoch": 107.49, "learning_rate": 4.578736534931549e-05, "loss": 0.9377, "step": 469000 }, { "epoch": 107.49, "eval_loss": 1.5896223783493042, "eval_runtime": 8.6918, "eval_samples_per_second": 539.936, "eval_steps_per_second": 67.535, "step": 469000 }, { "epoch": 107.61, "learning_rate": 4.5773465607481725e-05, "loss": 0.9443, "step": 469500 }, { "epoch": 107.61, "eval_loss": 1.574208378791809, "eval_runtime": 8.6931, "eval_samples_per_second": 539.856, "eval_steps_per_second": 67.525, "step": 469500 }, { "epoch": 107.72, "learning_rate": 4.575954508910694e-05, "loss": 0.9495, "step": 470000 }, { "epoch": 107.72, "eval_loss": 1.5862576961517334, "eval_runtime": 8.6938, "eval_samples_per_second": 539.807, "eval_steps_per_second": 67.519, "step": 470000 }, { "epoch": 107.84, "learning_rate": 4.574560380811372e-05, "loss": 0.9494, "step": 470500 }, { "epoch": 107.84, "eval_loss": 1.5849136114120483, "eval_runtime": 8.6921, "eval_samples_per_second": 539.913, "eval_steps_per_second": 67.532, "step": 470500 }, { "epoch": 107.95, "learning_rate": 4.573164177844542e-05, "loss": 0.9412, "step": 471000 }, { "epoch": 107.95, "eval_loss": 1.5905324220657349, "eval_runtime": 8.6936, "eval_samples_per_second": 539.82, "eval_steps_per_second": 67.521, "step": 471000 }, { "epoch": 108.07, "learning_rate": 4.571765901406612e-05, "loss": 0.9421, "step": 471500 }, { "epoch": 108.07, "eval_loss": 1.5828732252120972, "eval_runtime": 8.6901, "eval_samples_per_second": 540.037, "eval_steps_per_second": 67.548, "step": 471500 }, { "epoch": 108.18, "learning_rate": 4.570365552896066e-05, "loss": 0.9346, "step": 472000 }, { "epoch": 108.18, "eval_loss": 1.5816245079040527, "eval_runtime": 8.6908, "eval_samples_per_second": 539.999, "eval_steps_per_second": 67.543, "step": 472000 }, { "epoch": 108.3, "learning_rate": 4.56896313371346e-05, "loss": 0.9417, "step": 472500 }, { "epoch": 108.3, "eval_loss": 1.5822218656539917, "eval_runtime": 8.6882, "eval_samples_per_second": 540.157, "eval_steps_per_second": 67.563, "step": 472500 }, { "epoch": 108.41, "learning_rate": 4.5675586452614205e-05, "loss": 0.954, "step": 473000 }, { "epoch": 108.41, "eval_loss": 1.5830605030059814, "eval_runtime": 8.6899, "eval_samples_per_second": 540.054, "eval_steps_per_second": 67.55, "step": 473000 }, { "epoch": 108.53, "learning_rate": 4.5661520889446446e-05, "loss": 0.9461, "step": 473500 }, { "epoch": 108.53, "eval_loss": 1.6033912897109985, "eval_runtime": 8.6879, "eval_samples_per_second": 540.18, "eval_steps_per_second": 67.566, "step": 473500 }, { "epoch": 108.64, "learning_rate": 4.564743466169896e-05, "loss": 0.9462, "step": 474000 }, { "epoch": 108.64, "eval_loss": 1.5947974920272827, "eval_runtime": 8.7021, "eval_samples_per_second": 539.297, "eval_steps_per_second": 67.455, "step": 474000 }, { "epoch": 108.76, "learning_rate": 4.5633327783460065e-05, "loss": 0.9438, "step": 474500 }, { "epoch": 108.76, "eval_loss": 1.6093475818634033, "eval_runtime": 8.694, "eval_samples_per_second": 539.8, "eval_steps_per_second": 67.518, "step": 474500 }, { "epoch": 108.87, "learning_rate": 4.561920026883872e-05, "loss": 0.9459, "step": 475000 }, { "epoch": 108.87, "eval_loss": 1.5812232494354248, "eval_runtime": 8.6924, "eval_samples_per_second": 539.895, "eval_steps_per_second": 67.53, "step": 475000 }, { "epoch": 108.98, "learning_rate": 4.5605052131964545e-05, "loss": 0.9513, "step": 475500 }, { "epoch": 108.98, "eval_loss": 1.5897305011749268, "eval_runtime": 8.6934, "eval_samples_per_second": 539.836, "eval_steps_per_second": 67.523, "step": 475500 }, { "epoch": 109.1, "learning_rate": 4.559088338698776e-05, "loss": 0.9382, "step": 476000 }, { "epoch": 109.1, "eval_loss": 1.5933386087417603, "eval_runtime": 8.6915, "eval_samples_per_second": 539.952, "eval_steps_per_second": 67.537, "step": 476000 }, { "epoch": 109.21, "learning_rate": 4.5576694048079213e-05, "loss": 0.9434, "step": 476500 }, { "epoch": 109.21, "eval_loss": 1.578665852546692, "eval_runtime": 8.69, "eval_samples_per_second": 540.044, "eval_steps_per_second": 67.549, "step": 476500 }, { "epoch": 109.33, "learning_rate": 4.556248412943034e-05, "loss": 0.9418, "step": 477000 }, { "epoch": 109.33, "eval_loss": 1.5978331565856934, "eval_runtime": 8.6961, "eval_samples_per_second": 539.669, "eval_steps_per_second": 67.502, "step": 477000 }, { "epoch": 109.44, "learning_rate": 4.5548253645253155e-05, "loss": 0.9472, "step": 477500 }, { "epoch": 109.44, "eval_loss": 1.592658281326294, "eval_runtime": 8.697, "eval_samples_per_second": 539.61, "eval_steps_per_second": 67.494, "step": 477500 }, { "epoch": 109.56, "learning_rate": 4.553400260978027e-05, "loss": 0.9443, "step": 478000 }, { "epoch": 109.56, "eval_loss": 1.6018778085708618, "eval_runtime": 8.6935, "eval_samples_per_second": 539.831, "eval_steps_per_second": 67.522, "step": 478000 }, { "epoch": 109.67, "learning_rate": 4.5519731037264824e-05, "loss": 0.9622, "step": 478500 }, { "epoch": 109.67, "eval_loss": 1.588700294494629, "eval_runtime": 8.6901, "eval_samples_per_second": 540.042, "eval_steps_per_second": 67.548, "step": 478500 }, { "epoch": 109.79, "learning_rate": 4.550543894198049e-05, "loss": 0.9449, "step": 479000 }, { "epoch": 109.79, "eval_loss": 1.605776309967041, "eval_runtime": 8.6917, "eval_samples_per_second": 539.941, "eval_steps_per_second": 67.536, "step": 479000 }, { "epoch": 109.9, "learning_rate": 4.549112633822149e-05, "loss": 0.9514, "step": 479500 }, { "epoch": 109.9, "eval_loss": 1.5965303182601929, "eval_runtime": 8.691, "eval_samples_per_second": 539.985, "eval_steps_per_second": 67.541, "step": 479500 }, { "epoch": 110.02, "learning_rate": 4.547679324030255e-05, "loss": 0.9416, "step": 480000 }, { "epoch": 110.02, "eval_loss": 1.5985066890716553, "eval_runtime": 8.6919, "eval_samples_per_second": 539.925, "eval_steps_per_second": 67.534, "step": 480000 }, { "epoch": 110.13, "learning_rate": 4.5462439662558875e-05, "loss": 0.9368, "step": 480500 }, { "epoch": 110.13, "eval_loss": 1.5850844383239746, "eval_runtime": 8.6953, "eval_samples_per_second": 539.715, "eval_steps_per_second": 67.507, "step": 480500 }, { "epoch": 110.25, "learning_rate": 4.5448065619346175e-05, "loss": 0.9386, "step": 481000 }, { "epoch": 110.25, "eval_loss": 1.617491364479065, "eval_runtime": 8.691, "eval_samples_per_second": 539.981, "eval_steps_per_second": 67.541, "step": 481000 }, { "epoch": 110.36, "learning_rate": 4.543367112504062e-05, "loss": 0.9456, "step": 481500 }, { "epoch": 110.36, "eval_loss": 1.6541961431503296, "eval_runtime": 8.6877, "eval_samples_per_second": 540.19, "eval_steps_per_second": 67.567, "step": 481500 }, { "epoch": 110.47, "learning_rate": 4.541925619403885e-05, "loss": 0.9651, "step": 482000 }, { "epoch": 110.47, "eval_loss": 1.6313079595565796, "eval_runtime": 8.6956, "eval_samples_per_second": 539.7, "eval_steps_per_second": 67.506, "step": 482000 }, { "epoch": 110.59, "learning_rate": 4.5404820840757914e-05, "loss": 0.9954, "step": 482500 }, { "epoch": 110.59, "eval_loss": 1.6509681940078735, "eval_runtime": 8.6906, "eval_samples_per_second": 540.009, "eval_steps_per_second": 67.544, "step": 482500 }, { "epoch": 110.7, "learning_rate": 4.53903650796353e-05, "loss": 1.012, "step": 483000 }, { "epoch": 110.7, "eval_loss": 1.6777703762054443, "eval_runtime": 8.6884, "eval_samples_per_second": 540.145, "eval_steps_per_second": 67.561, "step": 483000 }, { "epoch": 110.82, "learning_rate": 4.537588892512892e-05, "loss": 1.0768, "step": 483500 }, { "epoch": 110.82, "eval_loss": 1.8629058599472046, "eval_runtime": 8.6886, "eval_samples_per_second": 540.134, "eval_steps_per_second": 67.56, "step": 483500 }, { "epoch": 110.93, "learning_rate": 4.536139239171706e-05, "loss": 1.1566, "step": 484000 }, { "epoch": 110.93, "eval_loss": 1.6669704914093018, "eval_runtime": 8.6976, "eval_samples_per_second": 539.576, "eval_steps_per_second": 67.49, "step": 484000 }, { "epoch": 111.05, "learning_rate": 4.534687549389841e-05, "loss": 1.0188, "step": 484500 }, { "epoch": 111.05, "eval_loss": 1.6560417413711548, "eval_runtime": 8.6924, "eval_samples_per_second": 539.896, "eval_steps_per_second": 67.53, "step": 484500 }, { "epoch": 111.16, "learning_rate": 4.5332338246192014e-05, "loss": 1.3486, "step": 485000 }, { "epoch": 111.16, "eval_loss": 1.6922504901885986, "eval_runtime": 8.6907, "eval_samples_per_second": 540.003, "eval_steps_per_second": 67.544, "step": 485000 }, { "epoch": 111.28, "learning_rate": 4.531778066313728e-05, "loss": 1.7851, "step": 485500 }, { "epoch": 111.28, "eval_loss": 2.6232047080993652, "eval_runtime": 8.6862, "eval_samples_per_second": 540.279, "eval_steps_per_second": 67.578, "step": 485500 }, { "epoch": 111.39, "learning_rate": 4.530320275929392e-05, "loss": 2.43, "step": 486000 }, { "epoch": 111.39, "eval_loss": 2.921654462814331, "eval_runtime": 8.6891, "eval_samples_per_second": 540.103, "eval_steps_per_second": 67.556, "step": 486000 }, { "epoch": 111.51, "learning_rate": 4.528860454924203e-05, "loss": 2.3299, "step": 486500 }, { "epoch": 111.51, "eval_loss": 3.3200864791870117, "eval_runtime": 8.6962, "eval_samples_per_second": 539.661, "eval_steps_per_second": 67.501, "step": 486500 }, { "epoch": 111.62, "learning_rate": 4.527398604758195e-05, "loss": 2.4757, "step": 487000 }, { "epoch": 111.62, "eval_loss": 3.4902215003967285, "eval_runtime": 8.6893, "eval_samples_per_second": 540.091, "eval_steps_per_second": 67.554, "step": 487000 }, { "epoch": 111.73, "learning_rate": 4.5259347268934374e-05, "loss": 2.3902, "step": 487500 }, { "epoch": 111.73, "eval_loss": 2.9714717864990234, "eval_runtime": 8.6886, "eval_samples_per_second": 540.134, "eval_steps_per_second": 67.56, "step": 487500 }, { "epoch": 111.85, "learning_rate": 4.524468822794023e-05, "loss": 2.2967, "step": 488000 }, { "epoch": 111.85, "eval_loss": 2.468665599822998, "eval_runtime": 8.7398, "eval_samples_per_second": 536.97, "eval_steps_per_second": 67.164, "step": 488000 }, { "epoch": 111.96, "learning_rate": 4.5230008939260735e-05, "loss": 1.8467, "step": 488500 }, { "epoch": 111.96, "eval_loss": 2.3586478233337402, "eval_runtime": 8.7325, "eval_samples_per_second": 537.419, "eval_steps_per_second": 67.22, "step": 488500 }, { "epoch": 112.08, "learning_rate": 4.521530941757735e-05, "loss": 1.9057, "step": 489000 }, { "epoch": 112.08, "eval_loss": 1.9225101470947266, "eval_runtime": 8.8464, "eval_samples_per_second": 530.499, "eval_steps_per_second": 66.355, "step": 489000 }, { "epoch": 112.19, "learning_rate": 4.5200589677591764e-05, "loss": 1.4912, "step": 489500 }, { "epoch": 112.19, "eval_loss": 1.9203736782073975, "eval_runtime": 8.7456, "eval_samples_per_second": 536.615, "eval_steps_per_second": 67.12, "step": 489500 }, { "epoch": 112.31, "learning_rate": 4.518584973402591e-05, "loss": 1.4332, "step": 490000 }, { "epoch": 112.31, "eval_loss": 2.005058765411377, "eval_runtime": 8.7426, "eval_samples_per_second": 536.799, "eval_steps_per_second": 67.143, "step": 490000 }, { "epoch": 112.42, "learning_rate": 4.517108960162191e-05, "loss": 1.1768, "step": 490500 }, { "epoch": 112.42, "eval_loss": 1.868516206741333, "eval_runtime": 8.7271, "eval_samples_per_second": 537.749, "eval_steps_per_second": 67.262, "step": 490500 }, { "epoch": 112.54, "learning_rate": 4.515630929514208e-05, "loss": 1.3082, "step": 491000 }, { "epoch": 112.54, "eval_loss": 1.8533260822296143, "eval_runtime": 8.7324, "eval_samples_per_second": 537.421, "eval_steps_per_second": 67.221, "step": 491000 }, { "epoch": 112.65, "learning_rate": 4.514150882936892e-05, "loss": 1.1113, "step": 491500 }, { "epoch": 112.65, "eval_loss": 1.6601868867874146, "eval_runtime": 8.7364, "eval_samples_per_second": 537.175, "eval_steps_per_second": 67.19, "step": 491500 }, { "epoch": 112.77, "learning_rate": 4.512668821910507e-05, "loss": 1.1341, "step": 492000 }, { "epoch": 112.77, "eval_loss": 1.6324650049209595, "eval_runtime": 8.7329, "eval_samples_per_second": 537.395, "eval_steps_per_second": 67.217, "step": 492000 }, { "epoch": 112.88, "learning_rate": 4.5111847479173354e-05, "loss": 1.1337, "step": 492500 }, { "epoch": 112.88, "eval_loss": 1.6836940050125122, "eval_runtime": 8.7331, "eval_samples_per_second": 537.383, "eval_steps_per_second": 67.216, "step": 492500 }, { "epoch": 113.0, "learning_rate": 4.50969866244167e-05, "loss": 1.0734, "step": 493000 }, { "epoch": 113.0, "eval_loss": 1.7372052669525146, "eval_runtime": 8.7252, "eval_samples_per_second": 537.866, "eval_steps_per_second": 67.276, "step": 493000 }, { "epoch": 113.11, "learning_rate": 4.5082105669698164e-05, "loss": 1.0593, "step": 493500 }, { "epoch": 113.11, "eval_loss": 1.7368478775024414, "eval_runtime": 8.736, "eval_samples_per_second": 537.202, "eval_steps_per_second": 67.193, "step": 493500 }, { "epoch": 113.22, "learning_rate": 4.50672046299009e-05, "loss": 1.0108, "step": 494000 }, { "epoch": 113.22, "eval_loss": 1.6647762060165405, "eval_runtime": 8.7329, "eval_samples_per_second": 537.393, "eval_steps_per_second": 67.217, "step": 494000 }, { "epoch": 113.34, "learning_rate": 4.505228351992816e-05, "loss": 0.9764, "step": 494500 }, { "epoch": 113.34, "eval_loss": 1.6814993619918823, "eval_runtime": 8.7265, "eval_samples_per_second": 537.79, "eval_steps_per_second": 67.267, "step": 494500 }, { "epoch": 113.45, "learning_rate": 4.503734235470326e-05, "loss": 0.9977, "step": 495000 }, { "epoch": 113.45, "eval_loss": 1.6229385137557983, "eval_runtime": 8.7358, "eval_samples_per_second": 537.217, "eval_steps_per_second": 67.195, "step": 495000 }, { "epoch": 113.57, "learning_rate": 4.5022381149169576e-05, "loss": 0.9949, "step": 495500 }, { "epoch": 113.57, "eval_loss": 1.6421515941619873, "eval_runtime": 8.7337, "eval_samples_per_second": 537.343, "eval_steps_per_second": 67.211, "step": 495500 }, { "epoch": 113.68, "learning_rate": 4.5007399918290525e-05, "loss": 0.9828, "step": 496000 }, { "epoch": 113.68, "eval_loss": 1.6093535423278809, "eval_runtime": 8.7261, "eval_samples_per_second": 537.81, "eval_steps_per_second": 67.269, "step": 496000 }, { "epoch": 113.8, "learning_rate": 4.499239867704958e-05, "loss": 0.9732, "step": 496500 }, { "epoch": 113.8, "eval_loss": 1.614203929901123, "eval_runtime": 8.7976, "eval_samples_per_second": 533.44, "eval_steps_per_second": 66.723, "step": 496500 }, { "epoch": 113.91, "learning_rate": 4.4977377440450164e-05, "loss": 0.9748, "step": 497000 }, { "epoch": 113.91, "eval_loss": 1.9841771125793457, "eval_runtime": 8.6864, "eval_samples_per_second": 540.272, "eval_steps_per_second": 67.577, "step": 497000 }, { "epoch": 114.03, "learning_rate": 4.496233622351576e-05, "loss": 0.9914, "step": 497500 }, { "epoch": 114.03, "eval_loss": 1.6946983337402344, "eval_runtime": 8.69, "eval_samples_per_second": 540.043, "eval_steps_per_second": 67.549, "step": 497500 }, { "epoch": 114.14, "learning_rate": 4.4947275041289814e-05, "loss": 0.9742, "step": 498000 }, { "epoch": 114.14, "eval_loss": 1.6249239444732666, "eval_runtime": 8.6888, "eval_samples_per_second": 540.12, "eval_steps_per_second": 67.558, "step": 498000 }, { "epoch": 114.26, "learning_rate": 4.493219390883573e-05, "loss": 0.9847, "step": 498500 }, { "epoch": 114.26, "eval_loss": 1.6804436445236206, "eval_runtime": 8.6904, "eval_samples_per_second": 540.024, "eval_steps_per_second": 67.546, "step": 498500 }, { "epoch": 114.37, "learning_rate": 4.491709284123688e-05, "loss": 0.9829, "step": 499000 }, { "epoch": 114.37, "eval_loss": 1.6171901226043701, "eval_runtime": 8.6933, "eval_samples_per_second": 539.841, "eval_steps_per_second": 67.523, "step": 499000 }, { "epoch": 114.49, "learning_rate": 4.490197185359656e-05, "loss": 0.9677, "step": 499500 }, { "epoch": 114.49, "eval_loss": 1.6243633031845093, "eval_runtime": 8.692, "eval_samples_per_second": 539.919, "eval_steps_per_second": 67.533, "step": 499500 }, { "epoch": 114.6, "learning_rate": 4.4886830961038e-05, "loss": 0.9698, "step": 500000 }, { "epoch": 114.6, "eval_loss": 1.6156061887741089, "eval_runtime": 8.6918, "eval_samples_per_second": 539.937, "eval_steps_per_second": 67.535, "step": 500000 }, { "epoch": 114.71, "learning_rate": 4.487167017870434e-05, "loss": 0.9755, "step": 500500 }, { "epoch": 114.71, "eval_loss": 1.607391119003296, "eval_runtime": 8.6877, "eval_samples_per_second": 540.192, "eval_steps_per_second": 67.567, "step": 500500 }, { "epoch": 114.83, "learning_rate": 4.485648952175859e-05, "loss": 0.9661, "step": 501000 }, { "epoch": 114.83, "eval_loss": 1.5920171737670898, "eval_runtime": 8.695, "eval_samples_per_second": 539.738, "eval_steps_per_second": 67.51, "step": 501000 }, { "epoch": 114.94, "learning_rate": 4.484128900538367e-05, "loss": 0.9707, "step": 501500 }, { "epoch": 114.94, "eval_loss": 1.6026335954666138, "eval_runtime": 8.6903, "eval_samples_per_second": 540.027, "eval_steps_per_second": 67.547, "step": 501500 }, { "epoch": 115.06, "learning_rate": 4.4826068644782344e-05, "loss": 0.9672, "step": 502000 }, { "epoch": 115.06, "eval_loss": 1.6032124757766724, "eval_runtime": 8.6889, "eval_samples_per_second": 540.115, "eval_steps_per_second": 67.558, "step": 502000 }, { "epoch": 115.17, "learning_rate": 4.481082845517722e-05, "loss": 0.9485, "step": 502500 }, { "epoch": 115.17, "eval_loss": 1.6159048080444336, "eval_runtime": 8.6894, "eval_samples_per_second": 540.082, "eval_steps_per_second": 67.553, "step": 502500 }, { "epoch": 115.29, "learning_rate": 4.479556845181074e-05, "loss": 0.9595, "step": 503000 }, { "epoch": 115.29, "eval_loss": 1.594183087348938, "eval_runtime": 8.6901, "eval_samples_per_second": 540.041, "eval_steps_per_second": 67.548, "step": 503000 }, { "epoch": 115.4, "learning_rate": 4.478028864994519e-05, "loss": 0.9657, "step": 503500 }, { "epoch": 115.4, "eval_loss": 1.617986798286438, "eval_runtime": 8.698, "eval_samples_per_second": 539.548, "eval_steps_per_second": 67.487, "step": 503500 }, { "epoch": 115.52, "learning_rate": 4.4764989064862604e-05, "loss": 0.9758, "step": 504000 }, { "epoch": 115.52, "eval_loss": 1.6271494626998901, "eval_runtime": 8.688, "eval_samples_per_second": 540.173, "eval_steps_per_second": 67.565, "step": 504000 }, { "epoch": 115.63, "learning_rate": 4.474966971186486e-05, "loss": 0.9673, "step": 504500 }, { "epoch": 115.63, "eval_loss": 1.6108884811401367, "eval_runtime": 8.6897, "eval_samples_per_second": 540.063, "eval_steps_per_second": 67.551, "step": 504500 }, { "epoch": 115.75, "learning_rate": 4.4734330606273554e-05, "loss": 0.9629, "step": 505000 }, { "epoch": 115.75, "eval_loss": 1.589713215827942, "eval_runtime": 8.6913, "eval_samples_per_second": 539.968, "eval_steps_per_second": 67.539, "step": 505000 }, { "epoch": 115.86, "learning_rate": 4.4718971763430074e-05, "loss": 0.9539, "step": 505500 }, { "epoch": 115.86, "eval_loss": 1.600268006324768, "eval_runtime": 8.6911, "eval_samples_per_second": 539.979, "eval_steps_per_second": 67.541, "step": 505500 }, { "epoch": 115.98, "learning_rate": 4.4703593198695536e-05, "loss": 0.9623, "step": 506000 }, { "epoch": 115.98, "eval_loss": 1.5850238800048828, "eval_runtime": 8.6913, "eval_samples_per_second": 539.963, "eval_steps_per_second": 67.539, "step": 506000 }, { "epoch": 116.09, "learning_rate": 4.4688194927450784e-05, "loss": 0.9458, "step": 506500 }, { "epoch": 116.09, "eval_loss": 1.60283625125885, "eval_runtime": 8.6855, "eval_samples_per_second": 540.323, "eval_steps_per_second": 67.584, "step": 506500 }, { "epoch": 116.2, "learning_rate": 4.467277696509636e-05, "loss": 0.9529, "step": 507000 }, { "epoch": 116.2, "eval_loss": 1.5970717668533325, "eval_runtime": 8.6865, "eval_samples_per_second": 540.261, "eval_steps_per_second": 67.576, "step": 507000 }, { "epoch": 116.32, "learning_rate": 4.465733932705253e-05, "loss": 0.9519, "step": 507500 }, { "epoch": 116.32, "eval_loss": 1.6814393997192383, "eval_runtime": 8.6922, "eval_samples_per_second": 539.911, "eval_steps_per_second": 67.532, "step": 507500 }, { "epoch": 116.43, "learning_rate": 4.464188202875919e-05, "loss": 0.966, "step": 508000 }, { "epoch": 116.43, "eval_loss": 1.6115036010742188, "eval_runtime": 8.6869, "eval_samples_per_second": 540.236, "eval_steps_per_second": 67.573, "step": 508000 }, { "epoch": 116.55, "learning_rate": 4.4626405085675936e-05, "loss": 0.9515, "step": 508500 }, { "epoch": 116.55, "eval_loss": 1.5917835235595703, "eval_runtime": 8.6876, "eval_samples_per_second": 540.194, "eval_steps_per_second": 67.567, "step": 508500 }, { "epoch": 116.66, "learning_rate": 4.4610908513282015e-05, "loss": 0.9544, "step": 509000 }, { "epoch": 116.66, "eval_loss": 1.6245625019073486, "eval_runtime": 8.6856, "eval_samples_per_second": 540.317, "eval_steps_per_second": 67.583, "step": 509000 }, { "epoch": 116.78, "learning_rate": 4.459539232707628e-05, "loss": 0.9617, "step": 509500 }, { "epoch": 116.78, "eval_loss": 1.6053537130355835, "eval_runtime": 8.6927, "eval_samples_per_second": 539.88, "eval_steps_per_second": 67.528, "step": 509500 }, { "epoch": 116.89, "learning_rate": 4.457985654257722e-05, "loss": 0.9537, "step": 510000 }, { "epoch": 116.89, "eval_loss": 1.6154859066009521, "eval_runtime": 8.6908, "eval_samples_per_second": 539.995, "eval_steps_per_second": 67.542, "step": 510000 }, { "epoch": 117.01, "learning_rate": 4.456430117532292e-05, "loss": 0.9559, "step": 510500 }, { "epoch": 117.01, "eval_loss": 1.5867810249328613, "eval_runtime": 8.6944, "eval_samples_per_second": 539.772, "eval_steps_per_second": 67.515, "step": 510500 }, { "epoch": 117.12, "learning_rate": 4.4548726240871044e-05, "loss": 0.9442, "step": 511000 }, { "epoch": 117.12, "eval_loss": 1.6334598064422607, "eval_runtime": 8.6933, "eval_samples_per_second": 539.839, "eval_steps_per_second": 67.523, "step": 511000 }, { "epoch": 117.24, "learning_rate": 4.453313175479884e-05, "loss": 0.9561, "step": 511500 }, { "epoch": 117.24, "eval_loss": 1.6053472757339478, "eval_runtime": 8.6982, "eval_samples_per_second": 539.535, "eval_steps_per_second": 67.485, "step": 511500 }, { "epoch": 117.35, "learning_rate": 4.45175177327031e-05, "loss": 0.9457, "step": 512000 }, { "epoch": 117.35, "eval_loss": 1.6328548192977905, "eval_runtime": 8.6904, "eval_samples_per_second": 540.021, "eval_steps_per_second": 67.546, "step": 512000 }, { "epoch": 117.46, "learning_rate": 4.450188419020016e-05, "loss": 0.9531, "step": 512500 }, { "epoch": 117.46, "eval_loss": 1.6003743410110474, "eval_runtime": 8.6898, "eval_samples_per_second": 540.06, "eval_steps_per_second": 67.551, "step": 512500 }, { "epoch": 117.58, "learning_rate": 4.4486231142925865e-05, "loss": 0.9506, "step": 513000 }, { "epoch": 117.58, "eval_loss": 1.6203809976577759, "eval_runtime": 8.6869, "eval_samples_per_second": 540.238, "eval_steps_per_second": 67.573, "step": 513000 }, { "epoch": 117.69, "learning_rate": 4.447055860653559e-05, "loss": 0.9505, "step": 513500 }, { "epoch": 117.69, "eval_loss": 1.6067628860473633, "eval_runtime": 8.6846, "eval_samples_per_second": 540.383, "eval_steps_per_second": 67.591, "step": 513500 }, { "epoch": 117.81, "learning_rate": 4.445486659670419e-05, "loss": 0.9551, "step": 514000 }, { "epoch": 117.81, "eval_loss": 1.6080553531646729, "eval_runtime": 8.6907, "eval_samples_per_second": 540.003, "eval_steps_per_second": 67.544, "step": 514000 }, { "epoch": 117.92, "learning_rate": 4.443915512912601e-05, "loss": 0.9739, "step": 514500 }, { "epoch": 117.92, "eval_loss": 1.6187809705734253, "eval_runtime": 8.6902, "eval_samples_per_second": 540.034, "eval_steps_per_second": 67.547, "step": 514500 }, { "epoch": 118.04, "learning_rate": 4.442342421951482e-05, "loss": 0.9481, "step": 515000 }, { "epoch": 118.04, "eval_loss": 1.600054383277893, "eval_runtime": 8.6944, "eval_samples_per_second": 539.773, "eval_steps_per_second": 67.515, "step": 515000 }, { "epoch": 118.15, "learning_rate": 4.440767388360387e-05, "loss": 0.9424, "step": 515500 }, { "epoch": 118.15, "eval_loss": 1.6815961599349976, "eval_runtime": 8.69, "eval_samples_per_second": 540.048, "eval_steps_per_second": 67.549, "step": 515500 }, { "epoch": 118.27, "learning_rate": 4.439190413714584e-05, "loss": 0.9494, "step": 516000 }, { "epoch": 118.27, "eval_loss": 1.624975323677063, "eval_runtime": 8.6984, "eval_samples_per_second": 539.521, "eval_steps_per_second": 67.483, "step": 516000 }, { "epoch": 118.38, "learning_rate": 4.437611499591278e-05, "loss": 0.9503, "step": 516500 }, { "epoch": 118.38, "eval_loss": 1.5989420413970947, "eval_runtime": 8.6901, "eval_samples_per_second": 540.042, "eval_steps_per_second": 67.548, "step": 516500 }, { "epoch": 118.5, "learning_rate": 4.43603064756962e-05, "loss": 0.9439, "step": 517000 }, { "epoch": 118.5, "eval_loss": 1.6178922653198242, "eval_runtime": 8.6871, "eval_samples_per_second": 540.227, "eval_steps_per_second": 67.572, "step": 517000 }, { "epoch": 118.61, "learning_rate": 4.434447859230694e-05, "loss": 0.9462, "step": 517500 }, { "epoch": 118.61, "eval_loss": 1.6109418869018555, "eval_runtime": 8.694, "eval_samples_per_second": 539.795, "eval_steps_per_second": 67.518, "step": 517500 }, { "epoch": 118.73, "learning_rate": 4.4328631361575244e-05, "loss": 0.9405, "step": 518000 }, { "epoch": 118.73, "eval_loss": 1.5971952676773071, "eval_runtime": 8.6902, "eval_samples_per_second": 540.036, "eval_steps_per_second": 67.548, "step": 518000 }, { "epoch": 118.84, "learning_rate": 4.431276479935067e-05, "loss": 0.9453, "step": 518500 }, { "epoch": 118.84, "eval_loss": 1.6249332427978516, "eval_runtime": 8.6893, "eval_samples_per_second": 540.092, "eval_steps_per_second": 67.555, "step": 518500 }, { "epoch": 118.95, "learning_rate": 4.429687892150215e-05, "loss": 0.951, "step": 519000 }, { "epoch": 118.95, "eval_loss": 1.6047487258911133, "eval_runtime": 8.6908, "eval_samples_per_second": 539.999, "eval_steps_per_second": 67.543, "step": 519000 }, { "epoch": 119.07, "learning_rate": 4.42809737439179e-05, "loss": 0.9376, "step": 519500 }, { "epoch": 119.07, "eval_loss": 1.6043548583984375, "eval_runtime": 8.6869, "eval_samples_per_second": 540.237, "eval_steps_per_second": 67.573, "step": 519500 }, { "epoch": 119.18, "learning_rate": 4.4265049282505455e-05, "loss": 0.9308, "step": 520000 }, { "epoch": 119.18, "eval_loss": 1.6041561365127563, "eval_runtime": 8.6978, "eval_samples_per_second": 539.561, "eval_steps_per_second": 67.488, "step": 520000 }, { "epoch": 119.3, "learning_rate": 4.424910555319163e-05, "loss": 0.9284, "step": 520500 }, { "epoch": 119.3, "eval_loss": 1.6094601154327393, "eval_runtime": 8.6846, "eval_samples_per_second": 540.382, "eval_steps_per_second": 67.591, "step": 520500 }, { "epoch": 119.41, "learning_rate": 4.423314257192252e-05, "loss": 0.9405, "step": 521000 }, { "epoch": 119.41, "eval_loss": 1.605827808380127, "eval_runtime": 8.6865, "eval_samples_per_second": 540.261, "eval_steps_per_second": 67.576, "step": 521000 }, { "epoch": 119.53, "learning_rate": 4.421716035466347e-05, "loss": 0.9395, "step": 521500 }, { "epoch": 119.53, "eval_loss": 1.5961343050003052, "eval_runtime": 8.6883, "eval_samples_per_second": 540.153, "eval_steps_per_second": 67.562, "step": 521500 }, { "epoch": 119.64, "learning_rate": 4.4201158917399055e-05, "loss": 0.9615, "step": 522000 }, { "epoch": 119.64, "eval_loss": 1.6078680753707886, "eval_runtime": 8.689, "eval_samples_per_second": 540.107, "eval_steps_per_second": 67.557, "step": 522000 }, { "epoch": 119.76, "learning_rate": 4.4185138276133095e-05, "loss": 0.9479, "step": 522500 }, { "epoch": 119.76, "eval_loss": 1.6051418781280518, "eval_runtime": 8.6909, "eval_samples_per_second": 539.99, "eval_steps_per_second": 67.542, "step": 522500 }, { "epoch": 119.87, "learning_rate": 4.416909844688859e-05, "loss": 0.943, "step": 523000 }, { "epoch": 119.87, "eval_loss": 1.6176496744155884, "eval_runtime": 8.6989, "eval_samples_per_second": 539.495, "eval_steps_per_second": 67.48, "step": 523000 }, { "epoch": 119.99, "learning_rate": 4.4153039445707757e-05, "loss": 0.9393, "step": 523500 }, { "epoch": 119.99, "eval_loss": 1.6083084344863892, "eval_runtime": 8.6991, "eval_samples_per_second": 539.484, "eval_steps_per_second": 67.479, "step": 523500 }, { "epoch": 120.1, "learning_rate": 4.413696128865196e-05, "loss": 0.9433, "step": 524000 }, { "epoch": 120.1, "eval_loss": 1.6595702171325684, "eval_runtime": 8.6964, "eval_samples_per_second": 539.649, "eval_steps_per_second": 67.499, "step": 524000 }, { "epoch": 120.22, "learning_rate": 4.412086399180174e-05, "loss": 0.9423, "step": 524500 }, { "epoch": 120.22, "eval_loss": 1.6165319681167603, "eval_runtime": 8.6942, "eval_samples_per_second": 539.783, "eval_steps_per_second": 67.516, "step": 524500 }, { "epoch": 120.33, "learning_rate": 4.410474757125679e-05, "loss": 0.9456, "step": 525000 }, { "epoch": 120.33, "eval_loss": 1.6133010387420654, "eval_runtime": 8.6898, "eval_samples_per_second": 540.057, "eval_steps_per_second": 67.55, "step": 525000 }, { "epoch": 120.44, "learning_rate": 4.40886120431359e-05, "loss": 0.9515, "step": 525500 }, { "epoch": 120.44, "eval_loss": 1.6099945306777954, "eval_runtime": 8.6925, "eval_samples_per_second": 539.888, "eval_steps_per_second": 67.529, "step": 525500 }, { "epoch": 120.56, "learning_rate": 4.4072457423577004e-05, "loss": 0.9382, "step": 526000 }, { "epoch": 120.56, "eval_loss": 1.618138313293457, "eval_runtime": 8.6944, "eval_samples_per_second": 539.771, "eval_steps_per_second": 67.515, "step": 526000 }, { "epoch": 120.67, "learning_rate": 4.405628372873709e-05, "loss": 0.9549, "step": 526500 }, { "epoch": 120.67, "eval_loss": 1.6497670412063599, "eval_runtime": 8.6892, "eval_samples_per_second": 540.099, "eval_steps_per_second": 67.556, "step": 526500 }, { "epoch": 120.79, "learning_rate": 4.4040090974792266e-05, "loss": 0.9551, "step": 527000 }, { "epoch": 120.79, "eval_loss": 1.6196248531341553, "eval_runtime": 8.6881, "eval_samples_per_second": 540.162, "eval_steps_per_second": 67.563, "step": 527000 }, { "epoch": 120.9, "learning_rate": 4.4023879177937675e-05, "loss": 0.9576, "step": 527500 }, { "epoch": 120.9, "eval_loss": 1.5981040000915527, "eval_runtime": 8.6961, "eval_samples_per_second": 539.665, "eval_steps_per_second": 67.501, "step": 527500 }, { "epoch": 121.02, "learning_rate": 4.400764835438752e-05, "loss": 0.9494, "step": 528000 }, { "epoch": 121.02, "eval_loss": 1.6123318672180176, "eval_runtime": 8.692, "eval_samples_per_second": 539.924, "eval_steps_per_second": 67.534, "step": 528000 }, { "epoch": 121.13, "learning_rate": 4.3991398520375025e-05, "loss": 0.9303, "step": 528500 }, { "epoch": 121.13, "eval_loss": 1.6161092519760132, "eval_runtime": 8.6921, "eval_samples_per_second": 539.913, "eval_steps_per_second": 67.532, "step": 528500 }, { "epoch": 121.25, "learning_rate": 4.397512969215243e-05, "loss": 0.9491, "step": 529000 }, { "epoch": 121.25, "eval_loss": 1.5892908573150635, "eval_runtime": 8.6892, "eval_samples_per_second": 540.096, "eval_steps_per_second": 67.555, "step": 529000 }, { "epoch": 121.36, "learning_rate": 4.395884188599096e-05, "loss": 0.9459, "step": 529500 }, { "epoch": 121.36, "eval_loss": 1.6284010410308838, "eval_runtime": 8.6949, "eval_samples_per_second": 539.742, "eval_steps_per_second": 67.511, "step": 529500 }, { "epoch": 121.48, "learning_rate": 4.394253511818085e-05, "loss": 0.9405, "step": 530000 }, { "epoch": 121.48, "eval_loss": 1.6068472862243652, "eval_runtime": 8.6926, "eval_samples_per_second": 539.885, "eval_steps_per_second": 67.529, "step": 530000 }, { "epoch": 121.59, "learning_rate": 4.392620940503128e-05, "loss": 0.9492, "step": 530500 }, { "epoch": 121.59, "eval_loss": 1.6254081726074219, "eval_runtime": 8.6922, "eval_samples_per_second": 539.907, "eval_steps_per_second": 67.532, "step": 530500 }, { "epoch": 121.71, "learning_rate": 4.390986476287037e-05, "loss": 0.9433, "step": 531000 }, { "epoch": 121.71, "eval_loss": 1.6008356809616089, "eval_runtime": 8.6905, "eval_samples_per_second": 540.016, "eval_steps_per_second": 67.545, "step": 531000 }, { "epoch": 121.82, "learning_rate": 4.389350120804518e-05, "loss": 0.956, "step": 531500 }, { "epoch": 121.82, "eval_loss": 1.612384557723999, "eval_runtime": 8.696, "eval_samples_per_second": 539.676, "eval_steps_per_second": 67.503, "step": 531500 }, { "epoch": 121.93, "learning_rate": 4.3877118756921696e-05, "loss": 0.9459, "step": 532000 }, { "epoch": 121.93, "eval_loss": 1.601844072341919, "eval_runtime": 8.7073, "eval_samples_per_second": 538.975, "eval_steps_per_second": 67.415, "step": 532000 }, { "epoch": 122.05, "learning_rate": 4.38607174258848e-05, "loss": 0.9377, "step": 532500 }, { "epoch": 122.05, "eval_loss": 1.6022794246673584, "eval_runtime": 8.6932, "eval_samples_per_second": 539.846, "eval_steps_per_second": 67.524, "step": 532500 }, { "epoch": 122.16, "learning_rate": 4.384429723133824e-05, "loss": 0.9295, "step": 533000 }, { "epoch": 122.16, "eval_loss": 1.6156625747680664, "eval_runtime": 8.6955, "eval_samples_per_second": 539.703, "eval_steps_per_second": 67.506, "step": 533000 }, { "epoch": 122.28, "learning_rate": 4.382785818970465e-05, "loss": 0.9324, "step": 533500 }, { "epoch": 122.28, "eval_loss": 1.625414252281189, "eval_runtime": 8.7005, "eval_samples_per_second": 539.391, "eval_steps_per_second": 67.467, "step": 533500 }, { "epoch": 122.39, "learning_rate": 4.381140031742549e-05, "loss": 0.943, "step": 534000 }, { "epoch": 122.39, "eval_loss": 1.6159225702285767, "eval_runtime": 8.6962, "eval_samples_per_second": 539.658, "eval_steps_per_second": 67.5, "step": 534000 }, { "epoch": 122.51, "learning_rate": 4.379492363096109e-05, "loss": 0.9326, "step": 534500 }, { "epoch": 122.51, "eval_loss": 1.6080431938171387, "eval_runtime": 8.687, "eval_samples_per_second": 540.235, "eval_steps_per_second": 67.573, "step": 534500 }, { "epoch": 122.62, "learning_rate": 4.377842814679056e-05, "loss": 0.9338, "step": 535000 }, { "epoch": 122.62, "eval_loss": 1.6275850534439087, "eval_runtime": 8.69, "eval_samples_per_second": 540.049, "eval_steps_per_second": 67.549, "step": 535000 }, { "epoch": 122.74, "learning_rate": 4.3761913881411833e-05, "loss": 0.9476, "step": 535500 }, { "epoch": 122.74, "eval_loss": 1.62529456615448, "eval_runtime": 8.6871, "eval_samples_per_second": 540.224, "eval_steps_per_second": 67.571, "step": 535500 }, { "epoch": 122.85, "learning_rate": 4.374538085134161e-05, "loss": 0.9412, "step": 536000 }, { "epoch": 122.85, "eval_loss": 1.6020119190216064, "eval_runtime": 8.6868, "eval_samples_per_second": 540.244, "eval_steps_per_second": 67.574, "step": 536000 }, { "epoch": 122.97, "learning_rate": 4.372882907311538e-05, "loss": 0.9406, "step": 536500 }, { "epoch": 122.97, "eval_loss": 1.6018931865692139, "eval_runtime": 8.7004, "eval_samples_per_second": 539.399, "eval_steps_per_second": 67.468, "step": 536500 }, { "epoch": 123.08, "learning_rate": 4.3712258563287376e-05, "loss": 0.9299, "step": 537000 }, { "epoch": 123.08, "eval_loss": 1.6170152425765991, "eval_runtime": 8.6945, "eval_samples_per_second": 539.765, "eval_steps_per_second": 67.514, "step": 537000 }, { "epoch": 123.2, "learning_rate": 4.369566933843055e-05, "loss": 0.9275, "step": 537500 }, { "epoch": 123.2, "eval_loss": 1.6432937383651733, "eval_runtime": 8.69, "eval_samples_per_second": 540.043, "eval_steps_per_second": 67.549, "step": 537500 }, { "epoch": 123.31, "learning_rate": 4.367906141513658e-05, "loss": 0.9287, "step": 538000 }, { "epoch": 123.31, "eval_loss": 1.6192868947982788, "eval_runtime": 8.6934, "eval_samples_per_second": 539.835, "eval_steps_per_second": 67.523, "step": 538000 }, { "epoch": 123.42, "learning_rate": 4.366243481001586e-05, "loss": 0.9307, "step": 538500 }, { "epoch": 123.42, "eval_loss": 1.625187873840332, "eval_runtime": 8.6909, "eval_samples_per_second": 539.99, "eval_steps_per_second": 67.542, "step": 538500 }, { "epoch": 123.54, "learning_rate": 4.364578953969745e-05, "loss": 0.9323, "step": 539000 }, { "epoch": 123.54, "eval_loss": 1.624022126197815, "eval_runtime": 8.6854, "eval_samples_per_second": 540.332, "eval_steps_per_second": 67.585, "step": 539000 }, { "epoch": 123.65, "learning_rate": 4.362912562082908e-05, "loss": 0.9309, "step": 539500 }, { "epoch": 123.65, "eval_loss": 1.6090184450149536, "eval_runtime": 8.6909, "eval_samples_per_second": 539.989, "eval_steps_per_second": 67.542, "step": 539500 }, { "epoch": 123.77, "learning_rate": 4.3612443070077144e-05, "loss": 0.936, "step": 540000 }, { "epoch": 123.77, "eval_loss": 1.641719937324524, "eval_runtime": 8.6914, "eval_samples_per_second": 539.961, "eval_steps_per_second": 67.538, "step": 540000 }, { "epoch": 123.88, "learning_rate": 4.359574190412665e-05, "loss": 0.9316, "step": 540500 }, { "epoch": 123.88, "eval_loss": 1.6123720407485962, "eval_runtime": 8.6921, "eval_samples_per_second": 539.914, "eval_steps_per_second": 67.532, "step": 540500 }, { "epoch": 124.0, "learning_rate": 4.357902213968126e-05, "loss": 0.9353, "step": 541000 }, { "epoch": 124.0, "eval_loss": 1.6286252737045288, "eval_runtime": 8.6943, "eval_samples_per_second": 539.78, "eval_steps_per_second": 67.516, "step": 541000 }, { "epoch": 124.11, "learning_rate": 4.356228379346319e-05, "loss": 0.9236, "step": 541500 }, { "epoch": 124.11, "eval_loss": 1.6262346506118774, "eval_runtime": 8.6979, "eval_samples_per_second": 539.557, "eval_steps_per_second": 67.488, "step": 541500 }, { "epoch": 124.23, "learning_rate": 4.3545526882213285e-05, "loss": 0.9278, "step": 542000 }, { "epoch": 124.23, "eval_loss": 1.620430827140808, "eval_runtime": 8.6875, "eval_samples_per_second": 540.205, "eval_steps_per_second": 67.569, "step": 542000 }, { "epoch": 124.34, "learning_rate": 4.3528751422690916e-05, "loss": 0.9301, "step": 542500 }, { "epoch": 124.34, "eval_loss": 1.6282331943511963, "eval_runtime": 8.689, "eval_samples_per_second": 540.11, "eval_steps_per_second": 67.557, "step": 542500 }, { "epoch": 124.46, "learning_rate": 4.351195743167403e-05, "loss": 0.9253, "step": 543000 }, { "epoch": 124.46, "eval_loss": 1.631150484085083, "eval_runtime": 8.6928, "eval_samples_per_second": 539.87, "eval_steps_per_second": 67.527, "step": 543000 }, { "epoch": 124.57, "learning_rate": 4.349514492595912e-05, "loss": 0.9331, "step": 543500 }, { "epoch": 124.57, "eval_loss": 1.6239254474639893, "eval_runtime": 8.6898, "eval_samples_per_second": 540.056, "eval_steps_per_second": 67.55, "step": 543500 }, { "epoch": 124.68, "learning_rate": 4.347831392236117e-05, "loss": 0.9296, "step": 544000 }, { "epoch": 124.68, "eval_loss": 1.6128579378128052, "eval_runtime": 8.693, "eval_samples_per_second": 539.858, "eval_steps_per_second": 67.525, "step": 544000 }, { "epoch": 124.8, "learning_rate": 4.346146443771367e-05, "loss": 0.9381, "step": 544500 }, { "epoch": 124.8, "eval_loss": 1.6257288455963135, "eval_runtime": 8.6966, "eval_samples_per_second": 539.634, "eval_steps_per_second": 67.497, "step": 544500 }, { "epoch": 124.91, "learning_rate": 4.344459648886862e-05, "loss": 0.9263, "step": 545000 }, { "epoch": 124.91, "eval_loss": 1.6629536151885986, "eval_runtime": 8.6953, "eval_samples_per_second": 539.715, "eval_steps_per_second": 67.508, "step": 545000 }, { "epoch": 125.03, "learning_rate": 4.342771009269644e-05, "loss": 0.9336, "step": 545500 }, { "epoch": 125.03, "eval_loss": 1.6178438663482666, "eval_runtime": 8.6927, "eval_samples_per_second": 539.876, "eval_steps_per_second": 67.528, "step": 545500 }, { "epoch": 125.14, "learning_rate": 4.3410805266086044e-05, "loss": 0.9199, "step": 546000 }, { "epoch": 125.14, "eval_loss": 1.6330198049545288, "eval_runtime": 8.6889, "eval_samples_per_second": 540.112, "eval_steps_per_second": 67.557, "step": 546000 }, { "epoch": 125.26, "learning_rate": 4.339388202594474e-05, "loss": 0.9256, "step": 546500 }, { "epoch": 125.26, "eval_loss": 1.6242115497589111, "eval_runtime": 8.6848, "eval_samples_per_second": 540.366, "eval_steps_per_second": 67.589, "step": 546500 }, { "epoch": 125.37, "learning_rate": 4.33769403891983e-05, "loss": 0.9233, "step": 547000 }, { "epoch": 125.37, "eval_loss": 1.6064412593841553, "eval_runtime": 8.693, "eval_samples_per_second": 539.862, "eval_steps_per_second": 67.526, "step": 547000 }, { "epoch": 125.49, "learning_rate": 4.335998037279085e-05, "loss": 0.9311, "step": 547500 }, { "epoch": 125.49, "eval_loss": 1.6127852201461792, "eval_runtime": 8.6867, "eval_samples_per_second": 540.249, "eval_steps_per_second": 67.574, "step": 547500 }, { "epoch": 125.6, "learning_rate": 4.334300199368494e-05, "loss": 0.9321, "step": 548000 }, { "epoch": 125.6, "eval_loss": 1.6301032304763794, "eval_runtime": 8.6912, "eval_samples_per_second": 539.974, "eval_steps_per_second": 67.54, "step": 548000 }, { "epoch": 125.72, "learning_rate": 4.3326005268861436e-05, "loss": 0.9341, "step": 548500 }, { "epoch": 125.72, "eval_loss": 1.615949034690857, "eval_runtime": 8.6896, "eval_samples_per_second": 540.074, "eval_steps_per_second": 67.552, "step": 548500 }, { "epoch": 125.83, "learning_rate": 4.330899021531959e-05, "loss": 0.9294, "step": 549000 }, { "epoch": 125.83, "eval_loss": 1.6071819067001343, "eval_runtime": 8.6952, "eval_samples_per_second": 539.723, "eval_steps_per_second": 67.508, "step": 549000 }, { "epoch": 125.95, "learning_rate": 4.329195685007698e-05, "loss": 0.9352, "step": 549500 }, { "epoch": 125.95, "eval_loss": 1.6032053232192993, "eval_runtime": 8.6883, "eval_samples_per_second": 540.154, "eval_steps_per_second": 67.562, "step": 549500 }, { "epoch": 126.06, "learning_rate": 4.3274905190169476e-05, "loss": 0.9278, "step": 550000 }, { "epoch": 126.06, "eval_loss": 1.6284618377685547, "eval_runtime": 8.6938, "eval_samples_per_second": 539.81, "eval_steps_per_second": 67.519, "step": 550000 }, { "epoch": 126.17, "learning_rate": 4.325783525265128e-05, "loss": 0.9127, "step": 550500 }, { "epoch": 126.17, "eval_loss": 1.601733684539795, "eval_runtime": 8.6902, "eval_samples_per_second": 540.035, "eval_steps_per_second": 67.547, "step": 550500 }, { "epoch": 126.29, "learning_rate": 4.3240747054594854e-05, "loss": 0.923, "step": 551000 }, { "epoch": 126.29, "eval_loss": 1.613336205482483, "eval_runtime": 8.6854, "eval_samples_per_second": 540.334, "eval_steps_per_second": 67.585, "step": 551000 }, { "epoch": 126.4, "learning_rate": 4.3223640613090924e-05, "loss": 0.9204, "step": 551500 }, { "epoch": 126.4, "eval_loss": 1.6163504123687744, "eval_runtime": 8.686, "eval_samples_per_second": 540.296, "eval_steps_per_second": 67.58, "step": 551500 }, { "epoch": 126.52, "learning_rate": 4.320651594524846e-05, "loss": 0.9245, "step": 552000 }, { "epoch": 126.52, "eval_loss": 1.6383869647979736, "eval_runtime": 8.6969, "eval_samples_per_second": 539.615, "eval_steps_per_second": 67.495, "step": 552000 }, { "epoch": 126.63, "learning_rate": 4.318937306819466e-05, "loss": 0.9218, "step": 552500 }, { "epoch": 126.63, "eval_loss": 1.6347148418426514, "eval_runtime": 8.6972, "eval_samples_per_second": 539.596, "eval_steps_per_second": 67.493, "step": 552500 }, { "epoch": 126.75, "learning_rate": 4.317221199907496e-05, "loss": 0.9334, "step": 553000 }, { "epoch": 126.75, "eval_loss": 1.6332036256790161, "eval_runtime": 8.6874, "eval_samples_per_second": 540.207, "eval_steps_per_second": 67.569, "step": 553000 }, { "epoch": 126.86, "learning_rate": 4.315503275505295e-05, "loss": 0.9251, "step": 553500 }, { "epoch": 126.86, "eval_loss": 1.6296501159667969, "eval_runtime": 8.6794, "eval_samples_per_second": 540.705, "eval_steps_per_second": 67.631, "step": 553500 }, { "epoch": 126.98, "learning_rate": 4.313783535331042e-05, "loss": 0.9343, "step": 554000 }, { "epoch": 126.98, "eval_loss": 1.6115436553955078, "eval_runtime": 8.6922, "eval_samples_per_second": 539.91, "eval_steps_per_second": 67.532, "step": 554000 }, { "epoch": 127.09, "learning_rate": 4.312061981104732e-05, "loss": 0.9217, "step": 554500 }, { "epoch": 127.09, "eval_loss": 1.6251835823059082, "eval_runtime": 8.694, "eval_samples_per_second": 539.797, "eval_steps_per_second": 67.518, "step": 554500 }, { "epoch": 127.21, "learning_rate": 4.310338614548176e-05, "loss": 0.9139, "step": 555000 }, { "epoch": 127.21, "eval_loss": 1.6016409397125244, "eval_runtime": 8.6893, "eval_samples_per_second": 540.087, "eval_steps_per_second": 67.554, "step": 555000 }, { "epoch": 127.32, "learning_rate": 4.308613437384992e-05, "loss": 0.9116, "step": 555500 }, { "epoch": 127.32, "eval_loss": 1.6283104419708252, "eval_runtime": 8.695, "eval_samples_per_second": 539.734, "eval_steps_per_second": 67.51, "step": 555500 }, { "epoch": 127.44, "learning_rate": 4.306886451340615e-05, "loss": 0.9204, "step": 556000 }, { "epoch": 127.44, "eval_loss": 1.6159018278121948, "eval_runtime": 8.6965, "eval_samples_per_second": 539.641, "eval_steps_per_second": 67.498, "step": 556000 }, { "epoch": 127.55, "learning_rate": 4.3051576581422864e-05, "loss": 0.9243, "step": 556500 }, { "epoch": 127.55, "eval_loss": 1.6535073518753052, "eval_runtime": 8.6885, "eval_samples_per_second": 540.142, "eval_steps_per_second": 67.561, "step": 556500 }, { "epoch": 127.66, "learning_rate": 4.303427059519055e-05, "loss": 0.9273, "step": 557000 }, { "epoch": 127.66, "eval_loss": 1.6220444440841675, "eval_runtime": 8.6983, "eval_samples_per_second": 539.528, "eval_steps_per_second": 67.484, "step": 557000 }, { "epoch": 127.78, "learning_rate": 4.301694657201776e-05, "loss": 0.9291, "step": 557500 }, { "epoch": 127.78, "eval_loss": 1.609269142150879, "eval_runtime": 8.6931, "eval_samples_per_second": 539.852, "eval_steps_per_second": 67.525, "step": 557500 }, { "epoch": 127.89, "learning_rate": 4.2999604529231076e-05, "loss": 0.9276, "step": 558000 }, { "epoch": 127.89, "eval_loss": 1.6148854494094849, "eval_runtime": 8.6861, "eval_samples_per_second": 540.29, "eval_steps_per_second": 67.579, "step": 558000 }, { "epoch": 128.01, "learning_rate": 4.2982244484175115e-05, "loss": 0.9214, "step": 558500 }, { "epoch": 128.01, "eval_loss": 1.645634651184082, "eval_runtime": 8.6951, "eval_samples_per_second": 539.732, "eval_steps_per_second": 67.51, "step": 558500 }, { "epoch": 128.12, "learning_rate": 4.2964866454212486e-05, "loss": 0.9058, "step": 559000 }, { "epoch": 128.12, "eval_loss": 1.6120163202285767, "eval_runtime": 8.6885, "eval_samples_per_second": 540.142, "eval_steps_per_second": 67.561, "step": 559000 }, { "epoch": 128.24, "learning_rate": 4.29474704567238e-05, "loss": 0.9135, "step": 559500 }, { "epoch": 128.24, "eval_loss": 1.5996527671813965, "eval_runtime": 8.6847, "eval_samples_per_second": 540.375, "eval_steps_per_second": 67.59, "step": 559500 }, { "epoch": 128.35, "learning_rate": 4.293005650910762e-05, "loss": 0.9154, "step": 560000 }, { "epoch": 128.35, "eval_loss": 1.6114107370376587, "eval_runtime": 8.6936, "eval_samples_per_second": 539.822, "eval_steps_per_second": 67.521, "step": 560000 }, { "epoch": 128.47, "learning_rate": 4.2912624628780474e-05, "loss": 0.9205, "step": 560500 }, { "epoch": 128.47, "eval_loss": 1.6492161750793457, "eval_runtime": 8.6904, "eval_samples_per_second": 540.021, "eval_steps_per_second": 67.546, "step": 560500 }, { "epoch": 128.58, "learning_rate": 4.2895174833176834e-05, "loss": 0.9224, "step": 561000 }, { "epoch": 128.58, "eval_loss": 1.6072591543197632, "eval_runtime": 8.6853, "eval_samples_per_second": 540.335, "eval_steps_per_second": 67.585, "step": 561000 }, { "epoch": 128.7, "learning_rate": 4.287770713974906e-05, "loss": 0.9166, "step": 561500 }, { "epoch": 128.7, "eval_loss": 1.6149767637252808, "eval_runtime": 8.691, "eval_samples_per_second": 539.985, "eval_steps_per_second": 67.541, "step": 561500 }, { "epoch": 128.81, "learning_rate": 4.286022156596745e-05, "loss": 0.922, "step": 562000 }, { "epoch": 128.81, "eval_loss": 1.6000447273254395, "eval_runtime": 8.6962, "eval_samples_per_second": 539.662, "eval_steps_per_second": 67.501, "step": 562000 }, { "epoch": 128.92, "learning_rate": 4.284271812932015e-05, "loss": 0.9269, "step": 562500 }, { "epoch": 128.92, "eval_loss": 1.6284106969833374, "eval_runtime": 8.6952, "eval_samples_per_second": 539.726, "eval_steps_per_second": 67.509, "step": 562500 }, { "epoch": 129.04, "learning_rate": 4.28251968473132e-05, "loss": 0.9332, "step": 563000 }, { "epoch": 129.04, "eval_loss": 1.6233971118927002, "eval_runtime": 8.6924, "eval_samples_per_second": 539.896, "eval_steps_per_second": 67.53, "step": 563000 }, { "epoch": 129.15, "learning_rate": 4.280765773747047e-05, "loss": 0.9153, "step": 563500 }, { "epoch": 129.15, "eval_loss": 1.6223101615905762, "eval_runtime": 8.6932, "eval_samples_per_second": 539.847, "eval_steps_per_second": 67.524, "step": 563500 }, { "epoch": 129.27, "learning_rate": 4.279010081733368e-05, "loss": 0.9167, "step": 564000 }, { "epoch": 129.27, "eval_loss": 1.6181808710098267, "eval_runtime": 8.6927, "eval_samples_per_second": 539.876, "eval_steps_per_second": 67.528, "step": 564000 }, { "epoch": 129.38, "learning_rate": 4.2772526104462326e-05, "loss": 0.9107, "step": 564500 }, { "epoch": 129.38, "eval_loss": 1.6224877834320068, "eval_runtime": 8.6883, "eval_samples_per_second": 540.15, "eval_steps_per_second": 67.562, "step": 564500 }, { "epoch": 129.5, "learning_rate": 4.275493361643373e-05, "loss": 0.9281, "step": 565000 }, { "epoch": 129.5, "eval_loss": 1.6280591487884521, "eval_runtime": 8.6928, "eval_samples_per_second": 539.872, "eval_steps_per_second": 67.527, "step": 565000 }, { "epoch": 129.61, "learning_rate": 4.2737323370842996e-05, "loss": 0.9224, "step": 565500 }, { "epoch": 129.61, "eval_loss": 1.6310957670211792, "eval_runtime": 8.6882, "eval_samples_per_second": 540.158, "eval_steps_per_second": 67.563, "step": 565500 }, { "epoch": 129.73, "learning_rate": 4.271969538530297e-05, "loss": 0.9133, "step": 566000 }, { "epoch": 129.73, "eval_loss": 1.6084930896759033, "eval_runtime": 8.6908, "eval_samples_per_second": 539.994, "eval_steps_per_second": 67.542, "step": 566000 }, { "epoch": 129.84, "learning_rate": 4.2702049677444245e-05, "loss": 0.9205, "step": 566500 }, { "epoch": 129.84, "eval_loss": 1.6262813806533813, "eval_runtime": 8.6865, "eval_samples_per_second": 540.265, "eval_steps_per_second": 67.576, "step": 566500 }, { "epoch": 129.96, "learning_rate": 4.268438626491514e-05, "loss": 0.9224, "step": 567000 }, { "epoch": 129.96, "eval_loss": 1.6292060613632202, "eval_runtime": 8.6888, "eval_samples_per_second": 540.118, "eval_steps_per_second": 67.558, "step": 567000 }, { "epoch": 130.07, "learning_rate": 4.2666705165381695e-05, "loss": 0.9078, "step": 567500 }, { "epoch": 130.07, "eval_loss": 1.635664463043213, "eval_runtime": 8.6849, "eval_samples_per_second": 540.365, "eval_steps_per_second": 67.589, "step": 567500 }, { "epoch": 130.19, "learning_rate": 4.264900639652761e-05, "loss": 0.9148, "step": 568000 }, { "epoch": 130.19, "eval_loss": 1.6334251165390015, "eval_runtime": 8.6909, "eval_samples_per_second": 539.989, "eval_steps_per_second": 67.542, "step": 568000 }, { "epoch": 130.3, "learning_rate": 4.263128997605429e-05, "loss": 0.914, "step": 568500 }, { "epoch": 130.3, "eval_loss": 1.615344524383545, "eval_runtime": 8.6899, "eval_samples_per_second": 540.054, "eval_steps_per_second": 67.55, "step": 568500 }, { "epoch": 130.41, "learning_rate": 4.261355592168076e-05, "loss": 0.9125, "step": 569000 }, { "epoch": 130.41, "eval_loss": 1.6120339632034302, "eval_runtime": 8.6892, "eval_samples_per_second": 540.096, "eval_steps_per_second": 67.555, "step": 569000 }, { "epoch": 130.53, "learning_rate": 4.259580425114372e-05, "loss": 0.9229, "step": 569500 }, { "epoch": 130.53, "eval_loss": 1.6312016248703003, "eval_runtime": 8.6903, "eval_samples_per_second": 540.026, "eval_steps_per_second": 67.546, "step": 569500 }, { "epoch": 130.64, "learning_rate": 4.257803498219745e-05, "loss": 0.9265, "step": 570000 }, { "epoch": 130.64, "eval_loss": 1.620993971824646, "eval_runtime": 8.69, "eval_samples_per_second": 540.045, "eval_steps_per_second": 67.549, "step": 570000 }, { "epoch": 130.76, "learning_rate": 4.256024813261386e-05, "loss": 0.9159, "step": 570500 }, { "epoch": 130.76, "eval_loss": 1.637595534324646, "eval_runtime": 8.6954, "eval_samples_per_second": 539.709, "eval_steps_per_second": 67.507, "step": 570500 }, { "epoch": 130.87, "learning_rate": 4.254244372018244e-05, "loss": 0.9195, "step": 571000 }, { "epoch": 130.87, "eval_loss": 1.6227585077285767, "eval_runtime": 8.6914, "eval_samples_per_second": 539.957, "eval_steps_per_second": 67.538, "step": 571000 }, { "epoch": 130.99, "learning_rate": 4.2524621762710214e-05, "loss": 0.9136, "step": 571500 }, { "epoch": 130.99, "eval_loss": 1.608605980873108, "eval_runtime": 8.6894, "eval_samples_per_second": 540.083, "eval_steps_per_second": 67.554, "step": 571500 }, { "epoch": 131.1, "learning_rate": 4.2506782278021795e-05, "loss": 0.9148, "step": 572000 }, { "epoch": 131.1, "eval_loss": 1.6268929243087769, "eval_runtime": 8.6851, "eval_samples_per_second": 540.353, "eval_steps_per_second": 67.587, "step": 572000 }, { "epoch": 131.22, "learning_rate": 4.248892528395932e-05, "loss": 0.9045, "step": 572500 }, { "epoch": 131.22, "eval_loss": 1.6439810991287231, "eval_runtime": 8.6971, "eval_samples_per_second": 539.605, "eval_steps_per_second": 67.494, "step": 572500 }, { "epoch": 131.33, "learning_rate": 4.247105079838241e-05, "loss": 0.9142, "step": 573000 }, { "epoch": 131.33, "eval_loss": 1.5870765447616577, "eval_runtime": 8.7017, "eval_samples_per_second": 539.319, "eval_steps_per_second": 67.458, "step": 573000 }, { "epoch": 131.45, "learning_rate": 4.2453158839168186e-05, "loss": 0.9126, "step": 573500 }, { "epoch": 131.45, "eval_loss": 1.6030893325805664, "eval_runtime": 8.6843, "eval_samples_per_second": 540.403, "eval_steps_per_second": 67.594, "step": 573500 }, { "epoch": 131.56, "learning_rate": 4.243524942421128e-05, "loss": 0.9109, "step": 574000 }, { "epoch": 131.56, "eval_loss": 1.6342146396636963, "eval_runtime": 8.6892, "eval_samples_per_second": 540.098, "eval_steps_per_second": 67.555, "step": 574000 }, { "epoch": 131.68, "learning_rate": 4.241732257142376e-05, "loss": 0.9077, "step": 574500 }, { "epoch": 131.68, "eval_loss": 1.6387219429016113, "eval_runtime": 8.6904, "eval_samples_per_second": 540.022, "eval_steps_per_second": 67.546, "step": 574500 }, { "epoch": 131.79, "learning_rate": 4.23993782987351e-05, "loss": 0.9179, "step": 575000 }, { "epoch": 131.79, "eval_loss": 1.6341513395309448, "eval_runtime": 8.6828, "eval_samples_per_second": 540.497, "eval_steps_per_second": 67.605, "step": 575000 }, { "epoch": 131.9, "learning_rate": 4.238141662409226e-05, "loss": 0.9326, "step": 575500 }, { "epoch": 131.9, "eval_loss": 1.6298811435699463, "eval_runtime": 8.6869, "eval_samples_per_second": 540.241, "eval_steps_per_second": 67.573, "step": 575500 }, { "epoch": 132.02, "learning_rate": 4.2363437565459574e-05, "loss": 0.9204, "step": 576000 }, { "epoch": 132.02, "eval_loss": 1.6143007278442383, "eval_runtime": 8.6857, "eval_samples_per_second": 540.312, "eval_steps_per_second": 67.582, "step": 576000 }, { "epoch": 132.13, "learning_rate": 4.234544114081874e-05, "loss": 0.9082, "step": 576500 }, { "epoch": 132.13, "eval_loss": 1.6387896537780762, "eval_runtime": 8.6924, "eval_samples_per_second": 539.899, "eval_steps_per_second": 67.531, "step": 576500 }, { "epoch": 132.25, "learning_rate": 4.232742736816887e-05, "loss": 0.9112, "step": 577000 }, { "epoch": 132.25, "eval_loss": 1.6232578754425049, "eval_runtime": 8.6894, "eval_samples_per_second": 540.082, "eval_steps_per_second": 67.553, "step": 577000 }, { "epoch": 132.36, "learning_rate": 4.2309396265526385e-05, "loss": 0.9096, "step": 577500 }, { "epoch": 132.36, "eval_loss": 1.6346944570541382, "eval_runtime": 8.699, "eval_samples_per_second": 539.489, "eval_steps_per_second": 67.479, "step": 577500 }, { "epoch": 132.48, "learning_rate": 4.2291347850925065e-05, "loss": 0.9218, "step": 578000 }, { "epoch": 132.48, "eval_loss": 1.6578986644744873, "eval_runtime": 8.6837, "eval_samples_per_second": 540.44, "eval_steps_per_second": 67.598, "step": 578000 }, { "epoch": 132.59, "learning_rate": 4.2273282142416005e-05, "loss": 0.9201, "step": 578500 }, { "epoch": 132.59, "eval_loss": 1.6357676982879639, "eval_runtime": 8.6891, "eval_samples_per_second": 540.103, "eval_steps_per_second": 67.556, "step": 578500 }, { "epoch": 132.71, "learning_rate": 4.2255199158067575e-05, "loss": 0.9172, "step": 579000 }, { "epoch": 132.71, "eval_loss": 1.6180429458618164, "eval_runtime": 8.6816, "eval_samples_per_second": 540.569, "eval_steps_per_second": 67.614, "step": 579000 }, { "epoch": 132.82, "learning_rate": 4.223709891596545e-05, "loss": 0.9204, "step": 579500 }, { "epoch": 132.82, "eval_loss": 1.6350277662277222, "eval_runtime": 8.6853, "eval_samples_per_second": 540.339, "eval_steps_per_second": 67.586, "step": 579500 }, { "epoch": 132.94, "learning_rate": 4.2218981434212526e-05, "loss": 0.9122, "step": 580000 }, { "epoch": 132.94, "eval_loss": 1.6323140859603882, "eval_runtime": 8.6831, "eval_samples_per_second": 540.477, "eval_steps_per_second": 67.603, "step": 580000 }, { "epoch": 133.05, "learning_rate": 4.2200846730929e-05, "loss": 0.9156, "step": 580500 }, { "epoch": 133.05, "eval_loss": 1.6335684061050415, "eval_runtime": 8.6853, "eval_samples_per_second": 540.337, "eval_steps_per_second": 67.585, "step": 580500 }, { "epoch": 133.17, "learning_rate": 4.2182694824252235e-05, "loss": 0.9088, "step": 581000 }, { "epoch": 133.17, "eval_loss": 1.6303545236587524, "eval_runtime": 8.6912, "eval_samples_per_second": 539.97, "eval_steps_per_second": 67.539, "step": 581000 }, { "epoch": 133.28, "learning_rate": 4.2164525732336833e-05, "loss": 0.9106, "step": 581500 }, { "epoch": 133.28, "eval_loss": 1.6316580772399902, "eval_runtime": 8.688, "eval_samples_per_second": 540.17, "eval_steps_per_second": 67.564, "step": 581500 }, { "epoch": 133.39, "learning_rate": 4.214633947335458e-05, "loss": 0.9162, "step": 582000 }, { "epoch": 133.39, "eval_loss": 1.6304960250854492, "eval_runtime": 8.6876, "eval_samples_per_second": 540.195, "eval_steps_per_second": 67.568, "step": 582000 }, { "epoch": 133.51, "learning_rate": 4.212813606549443e-05, "loss": 0.9126, "step": 582500 }, { "epoch": 133.51, "eval_loss": 1.6293832063674927, "eval_runtime": 8.6887, "eval_samples_per_second": 540.128, "eval_steps_per_second": 67.559, "step": 582500 }, { "epoch": 133.62, "learning_rate": 4.2109915526962475e-05, "loss": 0.903, "step": 583000 }, { "epoch": 133.62, "eval_loss": 1.607029676437378, "eval_runtime": 8.6883, "eval_samples_per_second": 540.152, "eval_steps_per_second": 67.562, "step": 583000 }, { "epoch": 133.74, "learning_rate": 4.209167787598196e-05, "loss": 0.9117, "step": 583500 }, { "epoch": 133.74, "eval_loss": 1.6445306539535522, "eval_runtime": 8.6861, "eval_samples_per_second": 540.288, "eval_steps_per_second": 67.579, "step": 583500 }, { "epoch": 133.85, "learning_rate": 4.207342313079322e-05, "loss": 0.9107, "step": 584000 }, { "epoch": 133.85, "eval_loss": 1.6344465017318726, "eval_runtime": 8.6892, "eval_samples_per_second": 540.097, "eval_steps_per_second": 67.555, "step": 584000 }, { "epoch": 133.97, "learning_rate": 4.205515130965373e-05, "loss": 0.9129, "step": 584500 }, { "epoch": 133.97, "eval_loss": 1.629918098449707, "eval_runtime": 8.6856, "eval_samples_per_second": 540.319, "eval_steps_per_second": 67.583, "step": 584500 }, { "epoch": 134.08, "learning_rate": 4.203686243083799e-05, "loss": 0.9085, "step": 585000 }, { "epoch": 134.08, "eval_loss": 1.6373454332351685, "eval_runtime": 8.6954, "eval_samples_per_second": 539.71, "eval_steps_per_second": 67.507, "step": 585000 }, { "epoch": 134.2, "learning_rate": 4.2018556512637605e-05, "loss": 0.9, "step": 585500 }, { "epoch": 134.2, "eval_loss": 1.6321882009506226, "eval_runtime": 8.6995, "eval_samples_per_second": 539.456, "eval_steps_per_second": 67.475, "step": 585500 }, { "epoch": 134.31, "learning_rate": 4.200023357336119e-05, "loss": 0.9089, "step": 586000 }, { "epoch": 134.31, "eval_loss": 1.6399563550949097, "eval_runtime": 8.6847, "eval_samples_per_second": 540.378, "eval_steps_per_second": 67.59, "step": 586000 }, { "epoch": 134.43, "learning_rate": 4.19818936313344e-05, "loss": 0.9166, "step": 586500 }, { "epoch": 134.43, "eval_loss": 1.6353459358215332, "eval_runtime": 8.6869, "eval_samples_per_second": 540.241, "eval_steps_per_second": 67.573, "step": 586500 }, { "epoch": 134.54, "learning_rate": 4.196353670489991e-05, "loss": 0.9126, "step": 587000 }, { "epoch": 134.54, "eval_loss": 1.630407691001892, "eval_runtime": 8.6922, "eval_samples_per_second": 539.908, "eval_steps_per_second": 67.532, "step": 587000 }, { "epoch": 134.65, "learning_rate": 4.1945162812417335e-05, "loss": 0.9125, "step": 587500 }, { "epoch": 134.65, "eval_loss": 1.616973638534546, "eval_runtime": 8.6885, "eval_samples_per_second": 540.138, "eval_steps_per_second": 67.56, "step": 587500 }, { "epoch": 134.77, "learning_rate": 4.19267719722633e-05, "loss": 0.9163, "step": 588000 }, { "epoch": 134.77, "eval_loss": 1.6314259767532349, "eval_runtime": 8.6863, "eval_samples_per_second": 540.274, "eval_steps_per_second": 67.577, "step": 588000 }, { "epoch": 134.88, "learning_rate": 4.190836420283137e-05, "loss": 0.9082, "step": 588500 }, { "epoch": 134.88, "eval_loss": 1.64652419090271, "eval_runtime": 8.6834, "eval_samples_per_second": 540.459, "eval_steps_per_second": 67.601, "step": 588500 }, { "epoch": 135.0, "learning_rate": 4.188993952253205e-05, "loss": 0.9156, "step": 589000 }, { "epoch": 135.0, "eval_loss": 1.612499475479126, "eval_runtime": 8.6848, "eval_samples_per_second": 540.369, "eval_steps_per_second": 67.589, "step": 589000 }, { "epoch": 135.11, "learning_rate": 4.187149794979273e-05, "loss": 0.8983, "step": 589500 }, { "epoch": 135.11, "eval_loss": 1.6217238903045654, "eval_runtime": 8.6967, "eval_samples_per_second": 539.632, "eval_steps_per_second": 67.497, "step": 589500 }, { "epoch": 135.23, "learning_rate": 4.185303950305772e-05, "loss": 0.9031, "step": 590000 }, { "epoch": 135.23, "eval_loss": 1.634496808052063, "eval_runtime": 8.685, "eval_samples_per_second": 540.355, "eval_steps_per_second": 67.588, "step": 590000 }, { "epoch": 135.34, "learning_rate": 4.1834564200788194e-05, "loss": 0.9072, "step": 590500 }, { "epoch": 135.34, "eval_loss": 1.618513822555542, "eval_runtime": 8.6847, "eval_samples_per_second": 540.376, "eval_steps_per_second": 67.59, "step": 590500 }, { "epoch": 135.46, "learning_rate": 4.1816072061462206e-05, "loss": 0.9043, "step": 591000 }, { "epoch": 135.46, "eval_loss": 1.6251182556152344, "eval_runtime": 8.686, "eval_samples_per_second": 540.293, "eval_steps_per_second": 67.58, "step": 591000 }, { "epoch": 135.57, "learning_rate": 4.179756310357461e-05, "loss": 0.9164, "step": 591500 }, { "epoch": 135.57, "eval_loss": 1.615295171737671, "eval_runtime": 8.6886, "eval_samples_per_second": 540.131, "eval_steps_per_second": 67.559, "step": 591500 }, { "epoch": 135.69, "learning_rate": 4.1779037345637116e-05, "loss": 0.9161, "step": 592000 }, { "epoch": 135.69, "eval_loss": 1.639054298400879, "eval_runtime": 8.6877, "eval_samples_per_second": 540.188, "eval_steps_per_second": 67.567, "step": 592000 }, { "epoch": 135.8, "learning_rate": 4.176049480617823e-05, "loss": 0.9045, "step": 592500 }, { "epoch": 135.8, "eval_loss": 1.6169302463531494, "eval_runtime": 8.6823, "eval_samples_per_second": 540.525, "eval_steps_per_second": 67.609, "step": 592500 }, { "epoch": 135.92, "learning_rate": 4.1741935503743236e-05, "loss": 0.9176, "step": 593000 }, { "epoch": 135.92, "eval_loss": 1.609183430671692, "eval_runtime": 8.6859, "eval_samples_per_second": 540.303, "eval_steps_per_second": 67.581, "step": 593000 }, { "epoch": 136.03, "learning_rate": 4.172335945689417e-05, "loss": 0.9038, "step": 593500 }, { "epoch": 136.03, "eval_loss": 1.6228059530258179, "eval_runtime": 8.6935, "eval_samples_per_second": 539.831, "eval_steps_per_second": 67.522, "step": 593500 }, { "epoch": 136.14, "learning_rate": 4.170476668420984e-05, "loss": 0.8974, "step": 594000 }, { "epoch": 136.14, "eval_loss": 1.635487675666809, "eval_runtime": 8.6848, "eval_samples_per_second": 540.368, "eval_steps_per_second": 67.589, "step": 594000 }, { "epoch": 136.26, "learning_rate": 4.168615720428577e-05, "loss": 0.8989, "step": 594500 }, { "epoch": 136.26, "eval_loss": 1.62802255153656, "eval_runtime": 8.6863, "eval_samples_per_second": 540.273, "eval_steps_per_second": 67.577, "step": 594500 }, { "epoch": 136.37, "learning_rate": 4.166753103573419e-05, "loss": 0.9096, "step": 595000 }, { "epoch": 136.37, "eval_loss": 1.6259125471115112, "eval_runtime": 8.6895, "eval_samples_per_second": 540.078, "eval_steps_per_second": 67.553, "step": 595000 }, { "epoch": 136.49, "learning_rate": 4.164888819718403e-05, "loss": 0.9069, "step": 595500 }, { "epoch": 136.49, "eval_loss": 1.6604769229888916, "eval_runtime": 8.6857, "eval_samples_per_second": 540.316, "eval_steps_per_second": 67.583, "step": 595500 }, { "epoch": 136.6, "learning_rate": 4.1630228707280896e-05, "loss": 0.9085, "step": 596000 }, { "epoch": 136.6, "eval_loss": 1.6183357238769531, "eval_runtime": 8.6874, "eval_samples_per_second": 540.21, "eval_steps_per_second": 67.569, "step": 596000 }, { "epoch": 136.72, "learning_rate": 4.161155258468703e-05, "loss": 0.905, "step": 596500 }, { "epoch": 136.72, "eval_loss": 1.6558185815811157, "eval_runtime": 8.6854, "eval_samples_per_second": 540.332, "eval_steps_per_second": 67.585, "step": 596500 }, { "epoch": 136.83, "learning_rate": 4.159285984808132e-05, "loss": 0.905, "step": 597000 }, { "epoch": 136.83, "eval_loss": 1.6242684125900269, "eval_runtime": 8.6835, "eval_samples_per_second": 540.448, "eval_steps_per_second": 67.599, "step": 597000 }, { "epoch": 136.95, "learning_rate": 4.1574150516159275e-05, "loss": 0.9058, "step": 597500 }, { "epoch": 136.95, "eval_loss": 1.6403347253799438, "eval_runtime": 8.6857, "eval_samples_per_second": 540.315, "eval_steps_per_second": 67.583, "step": 597500 }, { "epoch": 137.06, "learning_rate": 4.1555424607632994e-05, "loss": 0.9004, "step": 598000 }, { "epoch": 137.06, "eval_loss": 1.654471516609192, "eval_runtime": 8.6903, "eval_samples_per_second": 540.026, "eval_steps_per_second": 67.546, "step": 598000 }, { "epoch": 137.18, "learning_rate": 4.153668214123115e-05, "loss": 0.8936, "step": 598500 }, { "epoch": 137.18, "eval_loss": 1.6222373247146606, "eval_runtime": 8.6925, "eval_samples_per_second": 539.89, "eval_steps_per_second": 67.529, "step": 598500 }, { "epoch": 137.29, "learning_rate": 4.1517923135699e-05, "loss": 0.9012, "step": 599000 }, { "epoch": 137.29, "eval_loss": 1.6459195613861084, "eval_runtime": 8.6871, "eval_samples_per_second": 540.227, "eval_steps_per_second": 67.572, "step": 599000 }, { "epoch": 137.41, "learning_rate": 4.149914760979833e-05, "loss": 0.8976, "step": 599500 }, { "epoch": 137.41, "eval_loss": 1.6431206464767456, "eval_runtime": 8.6812, "eval_samples_per_second": 540.595, "eval_steps_per_second": 67.618, "step": 599500 }, { "epoch": 137.52, "learning_rate": 4.1480355582307416e-05, "loss": 0.9082, "step": 600000 }, { "epoch": 137.52, "eval_loss": 1.6203123331069946, "eval_runtime": 8.6938, "eval_samples_per_second": 539.812, "eval_steps_per_second": 67.52, "step": 600000 }, { "epoch": 137.63, "learning_rate": 4.146154707202109e-05, "loss": 0.9009, "step": 600500 }, { "epoch": 137.63, "eval_loss": 1.6463496685028076, "eval_runtime": 8.693, "eval_samples_per_second": 539.862, "eval_steps_per_second": 67.526, "step": 600500 }, { "epoch": 137.75, "learning_rate": 4.144272209775064e-05, "loss": 0.9109, "step": 601000 }, { "epoch": 137.75, "eval_loss": 1.6385164260864258, "eval_runtime": 8.6909, "eval_samples_per_second": 539.99, "eval_steps_per_second": 67.542, "step": 601000 }, { "epoch": 137.86, "learning_rate": 4.1423880678323846e-05, "loss": 0.9105, "step": 601500 }, { "epoch": 137.86, "eval_loss": 1.608390212059021, "eval_runtime": 8.6951, "eval_samples_per_second": 539.729, "eval_steps_per_second": 67.509, "step": 601500 }, { "epoch": 137.98, "learning_rate": 4.1405022832584884e-05, "loss": 0.9033, "step": 602000 }, { "epoch": 137.98, "eval_loss": 1.6386945247650146, "eval_runtime": 8.6854, "eval_samples_per_second": 540.333, "eval_steps_per_second": 67.585, "step": 602000 }, { "epoch": 138.09, "learning_rate": 4.1386148579394413e-05, "loss": 0.8969, "step": 602500 }, { "epoch": 138.09, "eval_loss": 1.609856128692627, "eval_runtime": 8.682, "eval_samples_per_second": 540.544, "eval_steps_per_second": 67.611, "step": 602500 }, { "epoch": 138.21, "learning_rate": 4.136725793762948e-05, "loss": 0.8949, "step": 603000 }, { "epoch": 138.21, "eval_loss": 1.6195242404937744, "eval_runtime": 8.6876, "eval_samples_per_second": 540.194, "eval_steps_per_second": 67.567, "step": 603000 }, { "epoch": 138.32, "learning_rate": 4.1348350926183516e-05, "loss": 0.9028, "step": 603500 }, { "epoch": 138.32, "eval_loss": 1.6188088655471802, "eval_runtime": 8.6842, "eval_samples_per_second": 540.405, "eval_steps_per_second": 67.594, "step": 603500 }, { "epoch": 138.44, "learning_rate": 4.132942756396633e-05, "loss": 0.9012, "step": 604000 }, { "epoch": 138.44, "eval_loss": 1.634516954421997, "eval_runtime": 8.6916, "eval_samples_per_second": 539.946, "eval_steps_per_second": 67.536, "step": 604000 }, { "epoch": 138.55, "learning_rate": 4.1310487869904105e-05, "loss": 0.9003, "step": 604500 }, { "epoch": 138.55, "eval_loss": 1.6170322895050049, "eval_runtime": 8.6871, "eval_samples_per_second": 540.227, "eval_steps_per_second": 67.572, "step": 604500 }, { "epoch": 138.67, "learning_rate": 4.1291531862939305e-05, "loss": 0.8994, "step": 605000 }, { "epoch": 138.67, "eval_loss": 1.6201380491256714, "eval_runtime": 8.6862, "eval_samples_per_second": 540.282, "eval_steps_per_second": 67.578, "step": 605000 }, { "epoch": 138.78, "learning_rate": 4.127255956203078e-05, "loss": 0.9156, "step": 605500 }, { "epoch": 138.78, "eval_loss": 1.636027216911316, "eval_runtime": 8.6872, "eval_samples_per_second": 540.217, "eval_steps_per_second": 67.57, "step": 605500 }, { "epoch": 138.9, "learning_rate": 4.125357098615361e-05, "loss": 0.9033, "step": 606000 }, { "epoch": 138.9, "eval_loss": 1.656206488609314, "eval_runtime": 8.7, "eval_samples_per_second": 539.425, "eval_steps_per_second": 67.471, "step": 606000 }, { "epoch": 139.01, "learning_rate": 4.1234566154299195e-05, "loss": 0.9065, "step": 606500 }, { "epoch": 139.01, "eval_loss": 1.6388254165649414, "eval_runtime": 8.6895, "eval_samples_per_second": 540.076, "eval_steps_per_second": 67.553, "step": 606500 }, { "epoch": 139.12, "learning_rate": 4.121554508547518e-05, "loss": 0.8991, "step": 607000 }, { "epoch": 139.12, "eval_loss": 1.6297885179519653, "eval_runtime": 8.6861, "eval_samples_per_second": 540.289, "eval_steps_per_second": 67.579, "step": 607000 }, { "epoch": 139.24, "learning_rate": 4.1196507798705466e-05, "loss": 0.8925, "step": 607500 }, { "epoch": 139.24, "eval_loss": 1.6419588327407837, "eval_runtime": 8.692, "eval_samples_per_second": 539.92, "eval_steps_per_second": 67.533, "step": 607500 }, { "epoch": 139.35, "learning_rate": 4.117745431303012e-05, "loss": 0.8946, "step": 608000 }, { "epoch": 139.35, "eval_loss": 1.6211525201797485, "eval_runtime": 8.6896, "eval_samples_per_second": 540.068, "eval_steps_per_second": 67.552, "step": 608000 }, { "epoch": 139.47, "learning_rate": 4.1158384647505485e-05, "loss": 0.9013, "step": 608500 }, { "epoch": 139.47, "eval_loss": 1.6185187101364136, "eval_runtime": 8.6902, "eval_samples_per_second": 540.032, "eval_steps_per_second": 67.547, "step": 608500 }, { "epoch": 139.58, "learning_rate": 4.113929882120402e-05, "loss": 0.8964, "step": 609000 }, { "epoch": 139.58, "eval_loss": 1.630597710609436, "eval_runtime": 8.6875, "eval_samples_per_second": 540.201, "eval_steps_per_second": 67.568, "step": 609000 }, { "epoch": 139.7, "learning_rate": 4.112019685321441e-05, "loss": 0.9001, "step": 609500 }, { "epoch": 139.7, "eval_loss": 1.6340128183364868, "eval_runtime": 8.6868, "eval_samples_per_second": 540.245, "eval_steps_per_second": 67.574, "step": 609500 }, { "epoch": 139.81, "learning_rate": 4.110107876264142e-05, "loss": 0.8968, "step": 610000 }, { "epoch": 139.81, "eval_loss": 1.657415509223938, "eval_runtime": 8.6938, "eval_samples_per_second": 539.812, "eval_steps_per_second": 67.52, "step": 610000 }, { "epoch": 139.93, "learning_rate": 4.1081944568605985e-05, "loss": 0.9011, "step": 610500 }, { "epoch": 139.93, "eval_loss": 1.6394609212875366, "eval_runtime": 8.6894, "eval_samples_per_second": 540.086, "eval_steps_per_second": 67.554, "step": 610500 }, { "epoch": 140.04, "learning_rate": 4.106279429024513e-05, "loss": 0.8953, "step": 611000 }, { "epoch": 140.04, "eval_loss": 1.6338999271392822, "eval_runtime": 8.6868, "eval_samples_per_second": 540.242, "eval_steps_per_second": 67.573, "step": 611000 }, { "epoch": 140.16, "learning_rate": 4.104362794671197e-05, "loss": 0.8992, "step": 611500 }, { "epoch": 140.16, "eval_loss": 1.6105777025222778, "eval_runtime": 8.6917, "eval_samples_per_second": 539.943, "eval_steps_per_second": 67.536, "step": 611500 }, { "epoch": 140.27, "learning_rate": 4.1024445557175694e-05, "loss": 0.8955, "step": 612000 }, { "epoch": 140.27, "eval_loss": 1.625017523765564, "eval_runtime": 8.694, "eval_samples_per_second": 539.799, "eval_steps_per_second": 67.518, "step": 612000 }, { "epoch": 140.39, "learning_rate": 4.100524714082151e-05, "loss": 0.8922, "step": 612500 }, { "epoch": 140.39, "eval_loss": 1.62802255153656, "eval_runtime": 8.6824, "eval_samples_per_second": 540.518, "eval_steps_per_second": 67.608, "step": 612500 }, { "epoch": 140.5, "learning_rate": 4.0986032716850696e-05, "loss": 0.8977, "step": 613000 }, { "epoch": 140.5, "eval_loss": 1.6328892707824707, "eval_runtime": 8.6918, "eval_samples_per_second": 539.933, "eval_steps_per_second": 67.535, "step": 613000 }, { "epoch": 140.61, "learning_rate": 4.096680230448051e-05, "loss": 0.9013, "step": 613500 }, { "epoch": 140.61, "eval_loss": 1.6436128616333008, "eval_runtime": 8.6943, "eval_samples_per_second": 539.78, "eval_steps_per_second": 67.516, "step": 613500 }, { "epoch": 140.73, "learning_rate": 4.0947555922944214e-05, "loss": 0.8963, "step": 614000 }, { "epoch": 140.73, "eval_loss": 1.6370834112167358, "eval_runtime": 8.6893, "eval_samples_per_second": 540.09, "eval_steps_per_second": 67.554, "step": 614000 }, { "epoch": 140.84, "learning_rate": 4.092829359149104e-05, "loss": 0.9015, "step": 614500 }, { "epoch": 140.84, "eval_loss": 1.6159191131591797, "eval_runtime": 8.6903, "eval_samples_per_second": 540.029, "eval_steps_per_second": 67.547, "step": 614500 }, { "epoch": 140.96, "learning_rate": 4.090901532938617e-05, "loss": 0.9026, "step": 615000 }, { "epoch": 140.96, "eval_loss": 1.6262280941009521, "eval_runtime": 8.688, "eval_samples_per_second": 540.171, "eval_steps_per_second": 67.564, "step": 615000 }, { "epoch": 141.07, "learning_rate": 4.088972115591072e-05, "loss": 0.8944, "step": 615500 }, { "epoch": 141.07, "eval_loss": 1.6402074098587036, "eval_runtime": 8.6899, "eval_samples_per_second": 540.052, "eval_steps_per_second": 67.55, "step": 615500 }, { "epoch": 141.19, "learning_rate": 4.087041109036173e-05, "loss": 0.8947, "step": 616000 }, { "epoch": 141.19, "eval_loss": 1.6567100286483765, "eval_runtime": 8.688, "eval_samples_per_second": 540.169, "eval_steps_per_second": 67.564, "step": 616000 }, { "epoch": 141.3, "learning_rate": 4.08510851520521e-05, "loss": 0.8933, "step": 616500 }, { "epoch": 141.3, "eval_loss": 1.622989296913147, "eval_runtime": 8.6919, "eval_samples_per_second": 539.926, "eval_steps_per_second": 67.534, "step": 616500 }, { "epoch": 141.42, "learning_rate": 4.083174336031066e-05, "loss": 0.889, "step": 617000 }, { "epoch": 141.42, "eval_loss": 1.6457456350326538, "eval_runtime": 8.6904, "eval_samples_per_second": 540.021, "eval_steps_per_second": 67.546, "step": 617000 }, { "epoch": 141.53, "learning_rate": 4.0812385734482036e-05, "loss": 0.8936, "step": 617500 }, { "epoch": 141.53, "eval_loss": 1.64780592918396, "eval_runtime": 8.686, "eval_samples_per_second": 540.297, "eval_steps_per_second": 67.58, "step": 617500 }, { "epoch": 141.65, "learning_rate": 4.079301229392675e-05, "loss": 0.8938, "step": 618000 }, { "epoch": 141.65, "eval_loss": 1.6391727924346924, "eval_runtime": 8.6866, "eval_samples_per_second": 540.259, "eval_steps_per_second": 67.575, "step": 618000 }, { "epoch": 141.76, "learning_rate": 4.077362305802108e-05, "loss": 0.9068, "step": 618500 }, { "epoch": 141.76, "eval_loss": 1.6561940908432007, "eval_runtime": 8.6955, "eval_samples_per_second": 539.705, "eval_steps_per_second": 67.506, "step": 618500 }, { "epoch": 141.87, "learning_rate": 4.075421804615714e-05, "loss": 0.9158, "step": 619000 }, { "epoch": 141.87, "eval_loss": 1.6326408386230469, "eval_runtime": 8.6919, "eval_samples_per_second": 539.929, "eval_steps_per_second": 67.534, "step": 619000 }, { "epoch": 141.99, "learning_rate": 4.073479727774284e-05, "loss": 0.9127, "step": 619500 }, { "epoch": 141.99, "eval_loss": 1.601078748703003, "eval_runtime": 8.6849, "eval_samples_per_second": 540.362, "eval_steps_per_second": 67.588, "step": 619500 }, { "epoch": 142.1, "learning_rate": 4.071536077220179e-05, "loss": 0.8918, "step": 620000 }, { "epoch": 142.1, "eval_loss": 1.6368951797485352, "eval_runtime": 8.6905, "eval_samples_per_second": 540.017, "eval_steps_per_second": 67.545, "step": 620000 }, { "epoch": 142.22, "learning_rate": 4.0695908548973396e-05, "loss": 0.893, "step": 620500 }, { "epoch": 142.22, "eval_loss": 1.6475955247879028, "eval_runtime": 8.6838, "eval_samples_per_second": 540.433, "eval_steps_per_second": 67.597, "step": 620500 }, { "epoch": 142.33, "learning_rate": 4.067644062751275e-05, "loss": 0.8922, "step": 621000 }, { "epoch": 142.33, "eval_loss": 1.6383358240127563, "eval_runtime": 8.6865, "eval_samples_per_second": 540.265, "eval_steps_per_second": 67.576, "step": 621000 }, { "epoch": 142.45, "learning_rate": 4.065695702729066e-05, "loss": 0.8926, "step": 621500 }, { "epoch": 142.45, "eval_loss": 1.6334296464920044, "eval_runtime": 8.6978, "eval_samples_per_second": 539.565, "eval_steps_per_second": 67.489, "step": 621500 }, { "epoch": 142.56, "learning_rate": 4.0637457767793605e-05, "loss": 0.8968, "step": 622000 }, { "epoch": 142.56, "eval_loss": 1.6286453008651733, "eval_runtime": 8.6942, "eval_samples_per_second": 539.784, "eval_steps_per_second": 67.516, "step": 622000 }, { "epoch": 142.68, "learning_rate": 4.061794286852374e-05, "loss": 0.9019, "step": 622500 }, { "epoch": 142.68, "eval_loss": 1.6450518369674683, "eval_runtime": 8.8639, "eval_samples_per_second": 529.449, "eval_steps_per_second": 66.223, "step": 622500 }, { "epoch": 142.79, "learning_rate": 4.059841234899885e-05, "loss": 0.9023, "step": 623000 }, { "epoch": 142.79, "eval_loss": 1.6627355813980103, "eval_runtime": 8.6882, "eval_samples_per_second": 540.157, "eval_steps_per_second": 67.563, "step": 623000 }, { "epoch": 142.91, "learning_rate": 4.057886622875234e-05, "loss": 0.9026, "step": 623500 }, { "epoch": 142.91, "eval_loss": 1.64748215675354, "eval_runtime": 8.6942, "eval_samples_per_second": 539.784, "eval_steps_per_second": 67.516, "step": 623500 }, { "epoch": 143.02, "learning_rate": 4.055930452733322e-05, "loss": 0.9041, "step": 624000 }, { "epoch": 143.02, "eval_loss": 1.628924012184143, "eval_runtime": 8.6923, "eval_samples_per_second": 539.905, "eval_steps_per_second": 67.531, "step": 624000 }, { "epoch": 143.14, "learning_rate": 4.0539727264306116e-05, "loss": 0.8928, "step": 624500 }, { "epoch": 143.14, "eval_loss": 1.6313964128494263, "eval_runtime": 8.6909, "eval_samples_per_second": 539.993, "eval_steps_per_second": 67.542, "step": 624500 }, { "epoch": 143.25, "learning_rate": 4.052013445925116e-05, "loss": 0.8941, "step": 625000 }, { "epoch": 143.25, "eval_loss": 1.6330546140670776, "eval_runtime": 8.6959, "eval_samples_per_second": 539.678, "eval_steps_per_second": 67.503, "step": 625000 }, { "epoch": 143.36, "learning_rate": 4.050052613176406e-05, "loss": 0.8886, "step": 625500 }, { "epoch": 143.36, "eval_loss": 1.6379090547561646, "eval_runtime": 8.6917, "eval_samples_per_second": 539.943, "eval_steps_per_second": 67.536, "step": 625500 }, { "epoch": 143.48, "learning_rate": 4.048090230145606e-05, "loss": 0.8919, "step": 626000 }, { "epoch": 143.48, "eval_loss": 1.6273715496063232, "eval_runtime": 8.69, "eval_samples_per_second": 540.045, "eval_steps_per_second": 67.549, "step": 626000 }, { "epoch": 143.59, "learning_rate": 4.046126298795389e-05, "loss": 0.8911, "step": 626500 }, { "epoch": 143.59, "eval_loss": 1.625076413154602, "eval_runtime": 8.6991, "eval_samples_per_second": 539.482, "eval_steps_per_second": 67.478, "step": 626500 }, { "epoch": 143.71, "learning_rate": 4.044160821089976e-05, "loss": 0.9013, "step": 627000 }, { "epoch": 143.71, "eval_loss": 1.6330511569976807, "eval_runtime": 8.6883, "eval_samples_per_second": 540.153, "eval_steps_per_second": 67.562, "step": 627000 }, { "epoch": 143.82, "learning_rate": 4.0421937989951374e-05, "loss": 0.9011, "step": 627500 }, { "epoch": 143.82, "eval_loss": 1.6165556907653809, "eval_runtime": 8.6885, "eval_samples_per_second": 540.138, "eval_steps_per_second": 67.56, "step": 627500 }, { "epoch": 143.94, "learning_rate": 4.0402252344781854e-05, "loss": 0.9064, "step": 628000 }, { "epoch": 143.94, "eval_loss": 1.6491215229034424, "eval_runtime": 8.6932, "eval_samples_per_second": 539.847, "eval_steps_per_second": 67.524, "step": 628000 }, { "epoch": 144.05, "learning_rate": 4.038255129507977e-05, "loss": 0.8928, "step": 628500 }, { "epoch": 144.05, "eval_loss": 1.6312342882156372, "eval_runtime": 8.6921, "eval_samples_per_second": 539.917, "eval_steps_per_second": 67.533, "step": 628500 }, { "epoch": 144.17, "learning_rate": 4.0362834860549085e-05, "loss": 0.8866, "step": 629000 }, { "epoch": 144.17, "eval_loss": 1.6479047536849976, "eval_runtime": 8.6881, "eval_samples_per_second": 540.167, "eval_steps_per_second": 67.564, "step": 629000 }, { "epoch": 144.28, "learning_rate": 4.0343103060909135e-05, "loss": 0.8867, "step": 629500 }, { "epoch": 144.28, "eval_loss": 1.6312938928604126, "eval_runtime": 8.6898, "eval_samples_per_second": 540.058, "eval_steps_per_second": 67.55, "step": 629500 }, { "epoch": 144.4, "learning_rate": 4.0323355915894664e-05, "loss": 0.8906, "step": 630000 }, { "epoch": 144.4, "eval_loss": 1.6307443380355835, "eval_runtime": 8.6869, "eval_samples_per_second": 540.239, "eval_steps_per_second": 67.573, "step": 630000 }, { "epoch": 144.51, "learning_rate": 4.030359344525573e-05, "loss": 0.89, "step": 630500 }, { "epoch": 144.51, "eval_loss": 1.623841404914856, "eval_runtime": 8.6932, "eval_samples_per_second": 539.847, "eval_steps_per_second": 67.524, "step": 630500 }, { "epoch": 144.63, "learning_rate": 4.028381566875773e-05, "loss": 0.8897, "step": 631000 }, { "epoch": 144.63, "eval_loss": 1.6291393041610718, "eval_runtime": 8.6932, "eval_samples_per_second": 539.849, "eval_steps_per_second": 67.524, "step": 631000 }, { "epoch": 144.74, "learning_rate": 4.0264022606181364e-05, "loss": 0.9003, "step": 631500 }, { "epoch": 144.74, "eval_loss": 1.6282932758331299, "eval_runtime": 8.6896, "eval_samples_per_second": 540.073, "eval_steps_per_second": 67.552, "step": 631500 }, { "epoch": 144.85, "learning_rate": 4.0244214277322634e-05, "loss": 0.897, "step": 632000 }, { "epoch": 144.85, "eval_loss": 1.6470948457717896, "eval_runtime": 8.6872, "eval_samples_per_second": 540.218, "eval_steps_per_second": 67.57, "step": 632000 }, { "epoch": 144.97, "learning_rate": 4.0224390701992805e-05, "loss": 0.8982, "step": 632500 }, { "epoch": 144.97, "eval_loss": 1.6174496412277222, "eval_runtime": 8.6877, "eval_samples_per_second": 540.191, "eval_steps_per_second": 67.567, "step": 632500 }, { "epoch": 145.08, "learning_rate": 4.020455190001837e-05, "loss": 0.8908, "step": 633000 }, { "epoch": 145.08, "eval_loss": 1.652155876159668, "eval_runtime": 8.6935, "eval_samples_per_second": 539.826, "eval_steps_per_second": 67.521, "step": 633000 }, { "epoch": 145.2, "learning_rate": 4.018469789124109e-05, "loss": 0.8861, "step": 633500 }, { "epoch": 145.2, "eval_loss": 1.6358752250671387, "eval_runtime": 8.6885, "eval_samples_per_second": 540.141, "eval_steps_per_second": 67.561, "step": 633500 }, { "epoch": 145.31, "learning_rate": 4.016482869551788e-05, "loss": 0.8827, "step": 634000 }, { "epoch": 145.31, "eval_loss": 1.6570569276809692, "eval_runtime": 8.6878, "eval_samples_per_second": 540.185, "eval_steps_per_second": 67.566, "step": 634000 }, { "epoch": 145.43, "learning_rate": 4.0144944332720914e-05, "loss": 0.8988, "step": 634500 }, { "epoch": 145.43, "eval_loss": 1.6583516597747803, "eval_runtime": 8.7044, "eval_samples_per_second": 539.155, "eval_steps_per_second": 67.438, "step": 634500 }, { "epoch": 145.54, "learning_rate": 4.012504482273748e-05, "loss": 0.9035, "step": 635000 }, { "epoch": 145.54, "eval_loss": 1.6285314559936523, "eval_runtime": 8.6925, "eval_samples_per_second": 539.888, "eval_steps_per_second": 67.529, "step": 635000 }, { "epoch": 145.66, "learning_rate": 4.010513018547003e-05, "loss": 0.8924, "step": 635500 }, { "epoch": 145.66, "eval_loss": 1.6524533033370972, "eval_runtime": 8.6816, "eval_samples_per_second": 540.571, "eval_steps_per_second": 67.615, "step": 635500 }, { "epoch": 145.77, "learning_rate": 4.0085200440836156e-05, "loss": 0.8924, "step": 636000 }, { "epoch": 145.77, "eval_loss": 1.6400840282440186, "eval_runtime": 8.6947, "eval_samples_per_second": 539.752, "eval_steps_per_second": 67.512, "step": 636000 }, { "epoch": 145.89, "learning_rate": 4.006525560876856e-05, "loss": 0.9031, "step": 636500 }, { "epoch": 145.89, "eval_loss": 1.6287167072296143, "eval_runtime": 8.6883, "eval_samples_per_second": 540.151, "eval_steps_per_second": 67.562, "step": 636500 }, { "epoch": 146.0, "learning_rate": 4.0045295709215016e-05, "loss": 0.8942, "step": 637000 }, { "epoch": 146.0, "eval_loss": 1.6365638971328735, "eval_runtime": 8.6899, "eval_samples_per_second": 540.054, "eval_steps_per_second": 67.55, "step": 637000 }, { "epoch": 146.12, "learning_rate": 4.0025320762138385e-05, "loss": 0.8807, "step": 637500 }, { "epoch": 146.12, "eval_loss": 1.6324903964996338, "eval_runtime": 8.6919, "eval_samples_per_second": 539.928, "eval_steps_per_second": 67.534, "step": 637500 }, { "epoch": 146.23, "learning_rate": 4.000533078751657e-05, "loss": 0.8838, "step": 638000 }, { "epoch": 146.23, "eval_loss": 1.6048221588134766, "eval_runtime": 8.6916, "eval_samples_per_second": 539.944, "eval_steps_per_second": 67.536, "step": 638000 }, { "epoch": 146.34, "learning_rate": 3.9985325805342494e-05, "loss": 0.8916, "step": 638500 }, { "epoch": 146.34, "eval_loss": 1.6405774354934692, "eval_runtime": 8.6871, "eval_samples_per_second": 540.225, "eval_steps_per_second": 67.571, "step": 638500 }, { "epoch": 146.46, "learning_rate": 3.996530583562412e-05, "loss": 0.8824, "step": 639000 }, { "epoch": 146.46, "eval_loss": 1.651458978652954, "eval_runtime": 8.6832, "eval_samples_per_second": 540.468, "eval_steps_per_second": 67.602, "step": 639000 }, { "epoch": 146.57, "learning_rate": 3.994527089838437e-05, "loss": 0.8936, "step": 639500 }, { "epoch": 146.57, "eval_loss": 1.6389702558517456, "eval_runtime": 8.6872, "eval_samples_per_second": 540.223, "eval_steps_per_second": 67.571, "step": 639500 }, { "epoch": 146.69, "learning_rate": 3.992522101366114e-05, "loss": 0.8854, "step": 640000 }, { "epoch": 146.69, "eval_loss": 1.6575437784194946, "eval_runtime": 8.6906, "eval_samples_per_second": 540.009, "eval_steps_per_second": 67.544, "step": 640000 }, { "epoch": 146.8, "learning_rate": 3.990515620150731e-05, "loss": 0.8936, "step": 640500 }, { "epoch": 146.8, "eval_loss": 1.6375508308410645, "eval_runtime": 8.6913, "eval_samples_per_second": 539.964, "eval_steps_per_second": 67.539, "step": 640500 }, { "epoch": 146.92, "learning_rate": 3.988507648199064e-05, "loss": 0.8919, "step": 641000 }, { "epoch": 146.92, "eval_loss": 1.6463440656661987, "eval_runtime": 8.6828, "eval_samples_per_second": 540.496, "eval_steps_per_second": 67.605, "step": 641000 }, { "epoch": 147.03, "learning_rate": 3.986498187519382e-05, "loss": 0.8915, "step": 641500 }, { "epoch": 147.03, "eval_loss": 1.6318600177764893, "eval_runtime": 8.6807, "eval_samples_per_second": 540.622, "eval_steps_per_second": 67.621, "step": 641500 }, { "epoch": 147.15, "learning_rate": 3.984487240121444e-05, "loss": 0.8829, "step": 642000 }, { "epoch": 147.15, "eval_loss": 1.626175880432129, "eval_runtime": 8.6908, "eval_samples_per_second": 539.998, "eval_steps_per_second": 67.543, "step": 642000 }, { "epoch": 147.26, "learning_rate": 3.9824748080164956e-05, "loss": 0.883, "step": 642500 }, { "epoch": 147.26, "eval_loss": 1.6470015048980713, "eval_runtime": 8.6955, "eval_samples_per_second": 539.702, "eval_steps_per_second": 67.506, "step": 642500 }, { "epoch": 147.38, "learning_rate": 3.980460893217266e-05, "loss": 0.8869, "step": 643000 }, { "epoch": 147.38, "eval_loss": 1.6353437900543213, "eval_runtime": 8.6875, "eval_samples_per_second": 540.2, "eval_steps_per_second": 67.568, "step": 643000 }, { "epoch": 147.49, "learning_rate": 3.978445497737968e-05, "loss": 0.8879, "step": 643500 }, { "epoch": 147.49, "eval_loss": 1.6533515453338623, "eval_runtime": 8.6855, "eval_samples_per_second": 540.328, "eval_steps_per_second": 67.584, "step": 643500 }, { "epoch": 147.6, "learning_rate": 3.976428623594296e-05, "loss": 0.8883, "step": 644000 }, { "epoch": 147.6, "eval_loss": 1.6576550006866455, "eval_runtime": 8.686, "eval_samples_per_second": 540.295, "eval_steps_per_second": 67.58, "step": 644000 }, { "epoch": 147.72, "learning_rate": 3.974410272803423e-05, "loss": 0.8869, "step": 644500 }, { "epoch": 147.72, "eval_loss": 1.6190848350524902, "eval_runtime": 8.6897, "eval_samples_per_second": 540.067, "eval_steps_per_second": 67.552, "step": 644500 }, { "epoch": 147.83, "learning_rate": 3.9723904473839973e-05, "loss": 0.8869, "step": 645000 }, { "epoch": 147.83, "eval_loss": 1.6329985857009888, "eval_runtime": 8.6895, "eval_samples_per_second": 540.078, "eval_steps_per_second": 67.553, "step": 645000 }, { "epoch": 147.95, "learning_rate": 3.970369149356144e-05, "loss": 0.8923, "step": 645500 }, { "epoch": 147.95, "eval_loss": 1.6668699979782104, "eval_runtime": 8.6827, "eval_samples_per_second": 540.5, "eval_steps_per_second": 67.606, "step": 645500 }, { "epoch": 148.06, "learning_rate": 3.968346380741462e-05, "loss": 0.892, "step": 646000 }, { "epoch": 148.06, "eval_loss": 1.6584398746490479, "eval_runtime": 8.6806, "eval_samples_per_second": 540.634, "eval_steps_per_second": 67.622, "step": 646000 }, { "epoch": 148.18, "learning_rate": 3.966322143563017e-05, "loss": 0.9005, "step": 646500 }, { "epoch": 148.18, "eval_loss": 1.643206000328064, "eval_runtime": 8.6816, "eval_samples_per_second": 540.571, "eval_steps_per_second": 67.615, "step": 646500 }, { "epoch": 148.29, "learning_rate": 3.9642964398453475e-05, "loss": 0.8768, "step": 647000 }, { "epoch": 148.29, "eval_loss": 1.6247221231460571, "eval_runtime": 8.6919, "eval_samples_per_second": 539.928, "eval_steps_per_second": 67.534, "step": 647000 }, { "epoch": 148.41, "learning_rate": 3.962269271614456e-05, "loss": 0.8813, "step": 647500 }, { "epoch": 148.41, "eval_loss": 1.6437422037124634, "eval_runtime": 8.6873, "eval_samples_per_second": 540.214, "eval_steps_per_second": 67.57, "step": 647500 }, { "epoch": 148.52, "learning_rate": 3.960240640897812e-05, "loss": 0.8832, "step": 648000 }, { "epoch": 148.52, "eval_loss": 1.6505812406539917, "eval_runtime": 8.6876, "eval_samples_per_second": 540.194, "eval_steps_per_second": 67.567, "step": 648000 }, { "epoch": 148.64, "learning_rate": 3.958210549724345e-05, "loss": 0.8831, "step": 648500 }, { "epoch": 148.64, "eval_loss": 1.6402254104614258, "eval_runtime": 8.6902, "eval_samples_per_second": 540.034, "eval_steps_per_second": 67.547, "step": 648500 }, { "epoch": 148.75, "learning_rate": 3.956179000124448e-05, "loss": 0.897, "step": 649000 }, { "epoch": 148.75, "eval_loss": 1.6350386142730713, "eval_runtime": 8.6866, "eval_samples_per_second": 540.258, "eval_steps_per_second": 67.575, "step": 649000 }, { "epoch": 148.87, "learning_rate": 3.95414599412997e-05, "loss": 0.8832, "step": 649500 }, { "epoch": 148.87, "eval_loss": 1.643047571182251, "eval_runtime": 8.687, "eval_samples_per_second": 540.232, "eval_steps_per_second": 67.572, "step": 649500 }, { "epoch": 148.98, "learning_rate": 3.9521115337742195e-05, "loss": 0.8958, "step": 650000 }, { "epoch": 148.98, "eval_loss": 1.639625072479248, "eval_runtime": 8.6863, "eval_samples_per_second": 540.278, "eval_steps_per_second": 67.578, "step": 650000 }, { "epoch": 149.09, "learning_rate": 3.950075621091957e-05, "loss": 0.8773, "step": 650500 }, { "epoch": 149.09, "eval_loss": 1.6595066785812378, "eval_runtime": 8.6884, "eval_samples_per_second": 540.147, "eval_steps_per_second": 67.562, "step": 650500 }, { "epoch": 149.21, "learning_rate": 3.9480382581193974e-05, "loss": 0.8773, "step": 651000 }, { "epoch": 149.21, "eval_loss": 1.6473450660705566, "eval_runtime": 8.6922, "eval_samples_per_second": 539.91, "eval_steps_per_second": 67.532, "step": 651000 }, { "epoch": 149.32, "learning_rate": 3.945999446894205e-05, "loss": 0.8799, "step": 651500 }, { "epoch": 149.32, "eval_loss": 1.6340577602386475, "eval_runtime": 8.686, "eval_samples_per_second": 540.296, "eval_steps_per_second": 67.58, "step": 651500 }, { "epoch": 149.44, "learning_rate": 3.943959189455493e-05, "loss": 0.884, "step": 652000 }, { "epoch": 149.44, "eval_loss": 1.6386632919311523, "eval_runtime": 8.6842, "eval_samples_per_second": 540.404, "eval_steps_per_second": 67.594, "step": 652000 }, { "epoch": 149.55, "learning_rate": 3.94191748784382e-05, "loss": 0.8908, "step": 652500 }, { "epoch": 149.55, "eval_loss": 1.655232310295105, "eval_runtime": 8.6858, "eval_samples_per_second": 540.308, "eval_steps_per_second": 67.582, "step": 652500 }, { "epoch": 149.67, "learning_rate": 3.93987434410119e-05, "loss": 0.886, "step": 653000 }, { "epoch": 149.67, "eval_loss": 1.6399110555648804, "eval_runtime": 8.6857, "eval_samples_per_second": 540.311, "eval_steps_per_second": 67.582, "step": 653000 }, { "epoch": 149.78, "learning_rate": 3.937829760271051e-05, "loss": 0.883, "step": 653500 }, { "epoch": 149.78, "eval_loss": 1.6382166147232056, "eval_runtime": 8.6879, "eval_samples_per_second": 540.178, "eval_steps_per_second": 67.565, "step": 653500 }, { "epoch": 149.9, "learning_rate": 3.935783738398288e-05, "loss": 0.8812, "step": 654000 }, { "epoch": 149.9, "eval_loss": 1.642242670059204, "eval_runtime": 8.6878, "eval_samples_per_second": 540.183, "eval_steps_per_second": 67.566, "step": 654000 }, { "epoch": 150.01, "learning_rate": 3.933736280529226e-05, "loss": 0.8855, "step": 654500 }, { "epoch": 150.01, "eval_loss": 1.6440383195877075, "eval_runtime": 8.6886, "eval_samples_per_second": 540.132, "eval_steps_per_second": 67.56, "step": 654500 }, { "epoch": 150.13, "learning_rate": 3.9316873887116256e-05, "loss": 0.869, "step": 655000 }, { "epoch": 150.13, "eval_loss": 1.65940260887146, "eval_runtime": 8.6986, "eval_samples_per_second": 539.513, "eval_steps_per_second": 67.482, "step": 655000 }, { "epoch": 150.24, "learning_rate": 3.929637064994683e-05, "loss": 0.8757, "step": 655500 }, { "epoch": 150.24, "eval_loss": 1.656050205230713, "eval_runtime": 8.6871, "eval_samples_per_second": 540.227, "eval_steps_per_second": 67.572, "step": 655500 }, { "epoch": 150.36, "learning_rate": 3.927585311429024e-05, "loss": 0.8793, "step": 656000 }, { "epoch": 150.36, "eval_loss": 1.6515686511993408, "eval_runtime": 8.688, "eval_samples_per_second": 540.168, "eval_steps_per_second": 67.564, "step": 656000 }, { "epoch": 150.47, "learning_rate": 3.925532130066707e-05, "loss": 0.8817, "step": 656500 }, { "epoch": 150.47, "eval_loss": 1.6525505781173706, "eval_runtime": 8.6932, "eval_samples_per_second": 539.846, "eval_steps_per_second": 67.524, "step": 656500 }, { "epoch": 150.58, "learning_rate": 3.923477522961218e-05, "loss": 0.8822, "step": 657000 }, { "epoch": 150.58, "eval_loss": 1.6316841840744019, "eval_runtime": 8.6879, "eval_samples_per_second": 540.177, "eval_steps_per_second": 67.565, "step": 657000 }, { "epoch": 150.7, "learning_rate": 3.921421492167466e-05, "loss": 0.885, "step": 657500 }, { "epoch": 150.7, "eval_loss": 1.6457939147949219, "eval_runtime": 8.6915, "eval_samples_per_second": 539.955, "eval_steps_per_second": 67.538, "step": 657500 }, { "epoch": 150.81, "learning_rate": 3.9193640397417884e-05, "loss": 0.8825, "step": 658000 }, { "epoch": 150.81, "eval_loss": 1.6477224826812744, "eval_runtime": 8.6811, "eval_samples_per_second": 540.598, "eval_steps_per_second": 67.618, "step": 658000 }, { "epoch": 150.93, "learning_rate": 3.917305167741941e-05, "loss": 0.8845, "step": 658500 }, { "epoch": 150.93, "eval_loss": 1.6389938592910767, "eval_runtime": 8.6865, "eval_samples_per_second": 540.262, "eval_steps_per_second": 67.576, "step": 658500 }, { "epoch": 151.04, "learning_rate": 3.915244878227101e-05, "loss": 0.8876, "step": 659000 }, { "epoch": 151.04, "eval_loss": 1.6627730131149292, "eval_runtime": 8.6858, "eval_samples_per_second": 540.31, "eval_steps_per_second": 67.582, "step": 659000 }, { "epoch": 151.16, "learning_rate": 3.9131831732578625e-05, "loss": 0.8769, "step": 659500 }, { "epoch": 151.16, "eval_loss": 1.6637473106384277, "eval_runtime": 8.6886, "eval_samples_per_second": 540.13, "eval_steps_per_second": 67.559, "step": 659500 }, { "epoch": 151.27, "learning_rate": 3.911120054896237e-05, "loss": 0.8682, "step": 660000 }, { "epoch": 151.27, "eval_loss": 1.6467854976654053, "eval_runtime": 8.6945, "eval_samples_per_second": 539.768, "eval_steps_per_second": 67.514, "step": 660000 }, { "epoch": 151.39, "learning_rate": 3.909055525205647e-05, "loss": 0.8715, "step": 660500 }, { "epoch": 151.39, "eval_loss": 1.6549075841903687, "eval_runtime": 8.6889, "eval_samples_per_second": 540.112, "eval_steps_per_second": 67.557, "step": 660500 }, { "epoch": 151.5, "learning_rate": 3.906989586250928e-05, "loss": 0.8786, "step": 661000 }, { "epoch": 151.5, "eval_loss": 1.6242666244506836, "eval_runtime": 8.6826, "eval_samples_per_second": 540.508, "eval_steps_per_second": 67.607, "step": 661000 }, { "epoch": 151.62, "learning_rate": 3.9049222400983255e-05, "loss": 0.8741, "step": 661500 }, { "epoch": 151.62, "eval_loss": 1.6488248109817505, "eval_runtime": 8.6882, "eval_samples_per_second": 540.161, "eval_steps_per_second": 67.563, "step": 661500 }, { "epoch": 151.73, "learning_rate": 3.9028534888154907e-05, "loss": 0.8815, "step": 662000 }, { "epoch": 151.73, "eval_loss": 1.6306791305541992, "eval_runtime": 8.6886, "eval_samples_per_second": 540.13, "eval_steps_per_second": 67.559, "step": 662000 }, { "epoch": 151.84, "learning_rate": 3.9007833344714816e-05, "loss": 0.8803, "step": 662500 }, { "epoch": 151.84, "eval_loss": 1.655139684677124, "eval_runtime": 8.6891, "eval_samples_per_second": 540.104, "eval_steps_per_second": 67.556, "step": 662500 }, { "epoch": 151.96, "learning_rate": 3.8987117791367587e-05, "loss": 0.8766, "step": 663000 }, { "epoch": 151.96, "eval_loss": 1.6316804885864258, "eval_runtime": 8.6932, "eval_samples_per_second": 539.846, "eval_steps_per_second": 67.524, "step": 663000 }, { "epoch": 152.07, "learning_rate": 3.896638824883186e-05, "loss": 0.8763, "step": 663500 }, { "epoch": 152.07, "eval_loss": 1.6226086616516113, "eval_runtime": 8.6869, "eval_samples_per_second": 540.239, "eval_steps_per_second": 67.573, "step": 663500 }, { "epoch": 152.19, "learning_rate": 3.894564473784021e-05, "loss": 0.8699, "step": 664000 }, { "epoch": 152.19, "eval_loss": 1.642508625984192, "eval_runtime": 8.691, "eval_samples_per_second": 539.983, "eval_steps_per_second": 67.541, "step": 664000 }, { "epoch": 152.3, "learning_rate": 3.8924887279139256e-05, "loss": 0.8772, "step": 664500 }, { "epoch": 152.3, "eval_loss": 1.634535312652588, "eval_runtime": 8.6874, "eval_samples_per_second": 540.209, "eval_steps_per_second": 67.569, "step": 664500 }, { "epoch": 152.42, "learning_rate": 3.8904115893489506e-05, "loss": 0.8736, "step": 665000 }, { "epoch": 152.42, "eval_loss": 1.624881625175476, "eval_runtime": 8.6871, "eval_samples_per_second": 540.225, "eval_steps_per_second": 67.571, "step": 665000 }, { "epoch": 152.53, "learning_rate": 3.888333060166543e-05, "loss": 0.8802, "step": 665500 }, { "epoch": 152.53, "eval_loss": 1.658016562461853, "eval_runtime": 8.6854, "eval_samples_per_second": 540.333, "eval_steps_per_second": 67.585, "step": 665500 }, { "epoch": 152.65, "learning_rate": 3.8862531424455394e-05, "loss": 0.8735, "step": 666000 }, { "epoch": 152.65, "eval_loss": 1.6332541704177856, "eval_runtime": 8.6907, "eval_samples_per_second": 540.0, "eval_steps_per_second": 67.543, "step": 666000 }, { "epoch": 152.76, "learning_rate": 3.884171838266166e-05, "loss": 0.8777, "step": 666500 }, { "epoch": 152.76, "eval_loss": 1.6396609544754028, "eval_runtime": 8.6868, "eval_samples_per_second": 540.242, "eval_steps_per_second": 67.573, "step": 666500 }, { "epoch": 152.88, "learning_rate": 3.882089149710034e-05, "loss": 0.883, "step": 667000 }, { "epoch": 152.88, "eval_loss": 1.6492760181427002, "eval_runtime": 8.6866, "eval_samples_per_second": 540.254, "eval_steps_per_second": 67.575, "step": 667000 }, { "epoch": 152.99, "learning_rate": 3.8800050788601414e-05, "loss": 0.8746, "step": 667500 }, { "epoch": 152.99, "eval_loss": 1.6324907541275024, "eval_runtime": 8.6938, "eval_samples_per_second": 539.811, "eval_steps_per_second": 67.519, "step": 667500 }, { "epoch": 153.11, "learning_rate": 3.8779196278008674e-05, "loss": 0.8708, "step": 668000 }, { "epoch": 153.11, "eval_loss": 1.6575522422790527, "eval_runtime": 8.6857, "eval_samples_per_second": 540.315, "eval_steps_per_second": 67.583, "step": 668000 }, { "epoch": 153.22, "learning_rate": 3.875832798617971e-05, "loss": 0.8741, "step": 668500 }, { "epoch": 153.22, "eval_loss": 1.6326961517333984, "eval_runtime": 8.6914, "eval_samples_per_second": 539.959, "eval_steps_per_second": 67.538, "step": 668500 }, { "epoch": 153.33, "learning_rate": 3.87374459339859e-05, "loss": 0.8729, "step": 669000 }, { "epoch": 153.33, "eval_loss": 1.6771869659423828, "eval_runtime": 8.6836, "eval_samples_per_second": 540.445, "eval_steps_per_second": 67.599, "step": 669000 }, { "epoch": 153.45, "learning_rate": 3.8716550142312416e-05, "loss": 0.8723, "step": 669500 }, { "epoch": 153.45, "eval_loss": 1.6529971361160278, "eval_runtime": 8.6874, "eval_samples_per_second": 540.211, "eval_steps_per_second": 67.57, "step": 669500 }, { "epoch": 153.56, "learning_rate": 3.869564063205811e-05, "loss": 0.8757, "step": 670000 }, { "epoch": 153.56, "eval_loss": 1.6325232982635498, "eval_runtime": 8.6917, "eval_samples_per_second": 539.943, "eval_steps_per_second": 67.536, "step": 670000 }, { "epoch": 153.68, "learning_rate": 3.8674717424135605e-05, "loss": 0.8808, "step": 670500 }, { "epoch": 153.68, "eval_loss": 1.6461838483810425, "eval_runtime": 8.6892, "eval_samples_per_second": 540.093, "eval_steps_per_second": 67.555, "step": 670500 }, { "epoch": 153.79, "learning_rate": 3.8653780539471196e-05, "loss": 0.88, "step": 671000 }, { "epoch": 153.79, "eval_loss": 1.6365278959274292, "eval_runtime": 8.6959, "eval_samples_per_second": 539.678, "eval_steps_per_second": 67.503, "step": 671000 }, { "epoch": 153.91, "learning_rate": 3.863282999900488e-05, "loss": 0.8774, "step": 671500 }, { "epoch": 153.91, "eval_loss": 1.6294695138931274, "eval_runtime": 8.6901, "eval_samples_per_second": 540.041, "eval_steps_per_second": 67.548, "step": 671500 }, { "epoch": 154.02, "learning_rate": 3.86118658236903e-05, "loss": 0.8813, "step": 672000 }, { "epoch": 154.02, "eval_loss": 1.6245492696762085, "eval_runtime": 8.686, "eval_samples_per_second": 540.293, "eval_steps_per_second": 67.58, "step": 672000 }, { "epoch": 154.14, "learning_rate": 3.859088803449472e-05, "loss": 0.867, "step": 672500 }, { "epoch": 154.14, "eval_loss": 1.6294951438903809, "eval_runtime": 8.6843, "eval_samples_per_second": 540.401, "eval_steps_per_second": 67.593, "step": 672500 }, { "epoch": 154.25, "learning_rate": 3.856989665239904e-05, "loss": 0.8665, "step": 673000 }, { "epoch": 154.25, "eval_loss": 1.645458459854126, "eval_runtime": 8.689, "eval_samples_per_second": 540.105, "eval_steps_per_second": 67.556, "step": 673000 }, { "epoch": 154.37, "learning_rate": 3.854889169839776e-05, "loss": 0.8708, "step": 673500 }, { "epoch": 154.37, "eval_loss": 1.643025279045105, "eval_runtime": 8.683, "eval_samples_per_second": 540.482, "eval_steps_per_second": 67.603, "step": 673500 }, { "epoch": 154.48, "learning_rate": 3.852787319349893e-05, "loss": 0.8699, "step": 674000 }, { "epoch": 154.48, "eval_loss": 1.6404807567596436, "eval_runtime": 8.6891, "eval_samples_per_second": 540.104, "eval_steps_per_second": 67.556, "step": 674000 }, { "epoch": 154.6, "learning_rate": 3.850684115872418e-05, "loss": 0.8736, "step": 674500 }, { "epoch": 154.6, "eval_loss": 1.6570442914962769, "eval_runtime": 8.6899, "eval_samples_per_second": 540.054, "eval_steps_per_second": 67.55, "step": 674500 }, { "epoch": 154.71, "learning_rate": 3.848579561510864e-05, "loss": 0.8717, "step": 675000 }, { "epoch": 154.71, "eval_loss": 1.6490638256072998, "eval_runtime": 8.6918, "eval_samples_per_second": 539.935, "eval_steps_per_second": 67.535, "step": 675000 }, { "epoch": 154.82, "learning_rate": 3.846473658370099e-05, "loss": 0.8802, "step": 675500 }, { "epoch": 154.82, "eval_loss": 1.6457552909851074, "eval_runtime": 8.7018, "eval_samples_per_second": 539.317, "eval_steps_per_second": 67.458, "step": 675500 }, { "epoch": 154.94, "learning_rate": 3.844366408556337e-05, "loss": 0.8829, "step": 676000 }, { "epoch": 154.94, "eval_loss": 1.6349563598632812, "eval_runtime": 8.6912, "eval_samples_per_second": 539.969, "eval_steps_per_second": 67.539, "step": 676000 }, { "epoch": 155.05, "learning_rate": 3.84225781417714e-05, "loss": 0.878, "step": 676500 }, { "epoch": 155.05, "eval_loss": 1.6507903337478638, "eval_runtime": 8.6904, "eval_samples_per_second": 540.02, "eval_steps_per_second": 67.546, "step": 676500 }, { "epoch": 155.17, "learning_rate": 3.840147877341414e-05, "loss": 0.8673, "step": 677000 }, { "epoch": 155.17, "eval_loss": 1.6390118598937988, "eval_runtime": 8.6916, "eval_samples_per_second": 539.946, "eval_steps_per_second": 67.536, "step": 677000 }, { "epoch": 155.28, "learning_rate": 3.838036600159408e-05, "loss": 0.8721, "step": 677500 }, { "epoch": 155.28, "eval_loss": 1.6378865242004395, "eval_runtime": 8.6993, "eval_samples_per_second": 539.47, "eval_steps_per_second": 67.477, "step": 677500 }, { "epoch": 155.4, "learning_rate": 3.835923984742712e-05, "loss": 0.8673, "step": 678000 }, { "epoch": 155.4, "eval_loss": 1.6378217935562134, "eval_runtime": 8.6845, "eval_samples_per_second": 540.385, "eval_steps_per_second": 67.591, "step": 678000 }, { "epoch": 155.51, "learning_rate": 3.833810033204255e-05, "loss": 0.8656, "step": 678500 }, { "epoch": 155.51, "eval_loss": 1.6483370065689087, "eval_runtime": 8.6939, "eval_samples_per_second": 539.805, "eval_steps_per_second": 67.519, "step": 678500 }, { "epoch": 155.63, "learning_rate": 3.831694747658301e-05, "loss": 0.8752, "step": 679000 }, { "epoch": 155.63, "eval_loss": 1.6279138326644897, "eval_runtime": 8.6895, "eval_samples_per_second": 540.077, "eval_steps_per_second": 67.553, "step": 679000 }, { "epoch": 155.74, "learning_rate": 3.8295781302204484e-05, "loss": 0.8788, "step": 679500 }, { "epoch": 155.74, "eval_loss": 1.6377413272857666, "eval_runtime": 8.6886, "eval_samples_per_second": 540.135, "eval_steps_per_second": 67.56, "step": 679500 }, { "epoch": 155.86, "learning_rate": 3.827460183007629e-05, "loss": 0.8707, "step": 680000 }, { "epoch": 155.86, "eval_loss": 1.6460464000701904, "eval_runtime": 8.6851, "eval_samples_per_second": 540.352, "eval_steps_per_second": 67.587, "step": 680000 }, { "epoch": 155.97, "learning_rate": 3.825340908138102e-05, "loss": 0.8778, "step": 680500 }, { "epoch": 155.97, "eval_loss": 1.6587073802947998, "eval_runtime": 8.6854, "eval_samples_per_second": 540.334, "eval_steps_per_second": 67.585, "step": 680500 }, { "epoch": 156.09, "learning_rate": 3.823220307731456e-05, "loss": 0.8641, "step": 681000 }, { "epoch": 156.09, "eval_loss": 1.632367491722107, "eval_runtime": 8.6961, "eval_samples_per_second": 539.67, "eval_steps_per_second": 67.502, "step": 681000 }, { "epoch": 156.2, "learning_rate": 3.821098383908608e-05, "loss": 0.864, "step": 681500 }, { "epoch": 156.2, "eval_loss": 1.6502468585968018, "eval_runtime": 8.686, "eval_samples_per_second": 540.294, "eval_steps_per_second": 67.58, "step": 681500 }, { "epoch": 156.31, "learning_rate": 3.8189751387917926e-05, "loss": 0.8641, "step": 682000 }, { "epoch": 156.31, "eval_loss": 1.6504474878311157, "eval_runtime": 8.6864, "eval_samples_per_second": 540.271, "eval_steps_per_second": 67.577, "step": 682000 }, { "epoch": 156.43, "learning_rate": 3.816850574504572e-05, "loss": 0.8705, "step": 682500 }, { "epoch": 156.43, "eval_loss": 1.6420010328292847, "eval_runtime": 8.6898, "eval_samples_per_second": 540.06, "eval_steps_per_second": 67.551, "step": 682500 }, { "epoch": 156.54, "learning_rate": 3.814724693171823e-05, "loss": 0.874, "step": 683000 }, { "epoch": 156.54, "eval_loss": 1.6473772525787354, "eval_runtime": 8.6854, "eval_samples_per_second": 540.335, "eval_steps_per_second": 67.585, "step": 683000 }, { "epoch": 156.66, "learning_rate": 3.812597496919743e-05, "loss": 0.8672, "step": 683500 }, { "epoch": 156.66, "eval_loss": 1.6236424446105957, "eval_runtime": 8.6941, "eval_samples_per_second": 539.791, "eval_steps_per_second": 67.517, "step": 683500 }, { "epoch": 156.77, "learning_rate": 3.810468987875842e-05, "loss": 0.875, "step": 684000 }, { "epoch": 156.77, "eval_loss": 1.6359535455703735, "eval_runtime": 8.6897, "eval_samples_per_second": 540.063, "eval_steps_per_second": 67.551, "step": 684000 }, { "epoch": 156.89, "learning_rate": 3.808339168168945e-05, "loss": 0.8771, "step": 684500 }, { "epoch": 156.89, "eval_loss": 1.6455187797546387, "eval_runtime": 8.6835, "eval_samples_per_second": 540.448, "eval_steps_per_second": 67.599, "step": 684500 }, { "epoch": 157.0, "learning_rate": 3.8062080399291875e-05, "loss": 0.8878, "step": 685000 }, { "epoch": 157.0, "eval_loss": 1.6550112962722778, "eval_runtime": 8.6857, "eval_samples_per_second": 540.313, "eval_steps_per_second": 67.582, "step": 685000 }, { "epoch": 157.12, "learning_rate": 3.8040756052880114e-05, "loss": 0.8676, "step": 685500 }, { "epoch": 157.12, "eval_loss": 1.6449847221374512, "eval_runtime": 8.6914, "eval_samples_per_second": 539.957, "eval_steps_per_second": 67.538, "step": 685500 }, { "epoch": 157.23, "learning_rate": 3.801941866378168e-05, "loss": 0.8649, "step": 686000 }, { "epoch": 157.23, "eval_loss": 1.6647926568984985, "eval_runtime": 8.6837, "eval_samples_per_second": 540.438, "eval_steps_per_second": 67.598, "step": 686000 }, { "epoch": 157.35, "learning_rate": 3.7998068253337136e-05, "loss": 0.8741, "step": 686500 }, { "epoch": 157.35, "eval_loss": 1.6588300466537476, "eval_runtime": 8.6992, "eval_samples_per_second": 539.476, "eval_steps_per_second": 67.478, "step": 686500 }, { "epoch": 157.46, "learning_rate": 3.797670484290003e-05, "loss": 0.8689, "step": 687000 }, { "epoch": 157.46, "eval_loss": 1.651749610900879, "eval_runtime": 8.6886, "eval_samples_per_second": 540.136, "eval_steps_per_second": 67.56, "step": 687000 }, { "epoch": 157.57, "learning_rate": 3.795532845383695e-05, "loss": 0.8734, "step": 687500 }, { "epoch": 157.57, "eval_loss": 1.6633410453796387, "eval_runtime": 8.691, "eval_samples_per_second": 539.986, "eval_steps_per_second": 67.541, "step": 687500 }, { "epoch": 157.69, "learning_rate": 3.793393910752744e-05, "loss": 0.8725, "step": 688000 }, { "epoch": 157.69, "eval_loss": 1.661882758140564, "eval_runtime": 8.6882, "eval_samples_per_second": 540.16, "eval_steps_per_second": 67.563, "step": 688000 }, { "epoch": 157.8, "learning_rate": 3.7912536825364034e-05, "loss": 0.8715, "step": 688500 }, { "epoch": 157.8, "eval_loss": 1.643898606300354, "eval_runtime": 8.686, "eval_samples_per_second": 540.296, "eval_steps_per_second": 67.58, "step": 688500 }, { "epoch": 157.92, "learning_rate": 3.789112162875215e-05, "loss": 0.8715, "step": 689000 }, { "epoch": 157.92, "eval_loss": 1.6352702379226685, "eval_runtime": 8.691, "eval_samples_per_second": 539.985, "eval_steps_per_second": 67.541, "step": 689000 }, { "epoch": 158.03, "learning_rate": 3.786969353911017e-05, "loss": 0.8726, "step": 689500 }, { "epoch": 158.03, "eval_loss": 1.636493444442749, "eval_runtime": 8.6902, "eval_samples_per_second": 540.031, "eval_steps_per_second": 67.547, "step": 689500 }, { "epoch": 158.15, "learning_rate": 3.784825257786936e-05, "loss": 0.8535, "step": 690000 }, { "epoch": 158.15, "eval_loss": 1.6396374702453613, "eval_runtime": 8.682, "eval_samples_per_second": 540.542, "eval_steps_per_second": 67.611, "step": 690000 }, { "epoch": 158.26, "learning_rate": 3.7826798766473856e-05, "loss": 0.8664, "step": 690500 }, { "epoch": 158.26, "eval_loss": 1.6687723398208618, "eval_runtime": 8.6881, "eval_samples_per_second": 540.163, "eval_steps_per_second": 67.564, "step": 690500 }, { "epoch": 158.38, "learning_rate": 3.7805332126380646e-05, "loss": 0.8638, "step": 691000 }, { "epoch": 158.38, "eval_loss": 1.6533771753311157, "eval_runtime": 8.6924, "eval_samples_per_second": 539.896, "eval_steps_per_second": 67.53, "step": 691000 }, { "epoch": 158.49, "learning_rate": 3.778385267905954e-05, "loss": 0.8648, "step": 691500 }, { "epoch": 158.49, "eval_loss": 1.6665762662887573, "eval_runtime": 8.6936, "eval_samples_per_second": 539.822, "eval_steps_per_second": 67.521, "step": 691500 }, { "epoch": 158.61, "learning_rate": 3.7762360445993164e-05, "loss": 0.8663, "step": 692000 }, { "epoch": 158.61, "eval_loss": 1.6204906702041626, "eval_runtime": 8.6919, "eval_samples_per_second": 539.927, "eval_steps_per_second": 67.534, "step": 692000 }, { "epoch": 158.72, "learning_rate": 3.774085544867695e-05, "loss": 0.8658, "step": 692500 }, { "epoch": 158.72, "eval_loss": 1.6667275428771973, "eval_runtime": 8.693, "eval_samples_per_second": 539.863, "eval_steps_per_second": 67.526, "step": 692500 }, { "epoch": 158.84, "learning_rate": 3.771933770861906e-05, "loss": 0.8744, "step": 693000 }, { "epoch": 158.84, "eval_loss": 1.660191535949707, "eval_runtime": 8.6921, "eval_samples_per_second": 539.915, "eval_steps_per_second": 67.532, "step": 693000 }, { "epoch": 158.95, "learning_rate": 3.7697807247340433e-05, "loss": 0.8652, "step": 693500 }, { "epoch": 158.95, "eval_loss": 1.6426407098770142, "eval_runtime": 8.6931, "eval_samples_per_second": 539.855, "eval_steps_per_second": 67.525, "step": 693500 }, { "epoch": 159.06, "learning_rate": 3.767626408637471e-05, "loss": 0.8679, "step": 694000 }, { "epoch": 159.06, "eval_loss": 1.6396753787994385, "eval_runtime": 8.6861, "eval_samples_per_second": 540.289, "eval_steps_per_second": 67.579, "step": 694000 }, { "epoch": 159.18, "learning_rate": 3.765470824726824e-05, "loss": 0.8642, "step": 694500 }, { "epoch": 159.18, "eval_loss": 1.6494487524032593, "eval_runtime": 8.6931, "eval_samples_per_second": 539.853, "eval_steps_per_second": 67.525, "step": 694500 }, { "epoch": 159.29, "learning_rate": 3.763313975158006e-05, "loss": 0.8714, "step": 695000 }, { "epoch": 159.29, "eval_loss": 1.6507108211517334, "eval_runtime": 8.6942, "eval_samples_per_second": 539.786, "eval_steps_per_second": 67.516, "step": 695000 }, { "epoch": 159.41, "learning_rate": 3.761155862088186e-05, "loss": 0.8641, "step": 695500 }, { "epoch": 159.41, "eval_loss": 1.6605374813079834, "eval_runtime": 8.6919, "eval_samples_per_second": 539.927, "eval_steps_per_second": 67.534, "step": 695500 }, { "epoch": 159.52, "learning_rate": 3.7589964876757955e-05, "loss": 0.8676, "step": 696000 }, { "epoch": 159.52, "eval_loss": 1.6440552473068237, "eval_runtime": 8.7008, "eval_samples_per_second": 539.373, "eval_steps_per_second": 67.465, "step": 696000 }, { "epoch": 159.64, "learning_rate": 3.756835854080529e-05, "loss": 0.8647, "step": 696500 }, { "epoch": 159.64, "eval_loss": 1.6503032445907593, "eval_runtime": 8.6834, "eval_samples_per_second": 540.46, "eval_steps_per_second": 67.601, "step": 696500 }, { "epoch": 159.75, "learning_rate": 3.754673963463341e-05, "loss": 0.8691, "step": 697000 }, { "epoch": 159.75, "eval_loss": 1.6346650123596191, "eval_runtime": 8.687, "eval_samples_per_second": 540.231, "eval_steps_per_second": 67.572, "step": 697000 }, { "epoch": 159.87, "learning_rate": 3.752510817986441e-05, "loss": 0.869, "step": 697500 }, { "epoch": 159.87, "eval_loss": 1.6387041807174683, "eval_runtime": 8.6902, "eval_samples_per_second": 540.034, "eval_steps_per_second": 67.547, "step": 697500 }, { "epoch": 159.98, "learning_rate": 3.7503464198132945e-05, "loss": 0.8678, "step": 698000 }, { "epoch": 159.98, "eval_loss": 1.6403894424438477, "eval_runtime": 8.6863, "eval_samples_per_second": 540.274, "eval_steps_per_second": 67.577, "step": 698000 }, { "epoch": 160.1, "learning_rate": 3.7481807711086216e-05, "loss": 0.8678, "step": 698500 }, { "epoch": 160.1, "eval_loss": 1.6630040407180786, "eval_runtime": 8.6891, "eval_samples_per_second": 540.103, "eval_steps_per_second": 67.556, "step": 698500 }, { "epoch": 160.21, "learning_rate": 3.7460138740383904e-05, "loss": 0.8575, "step": 699000 }, { "epoch": 160.21, "eval_loss": 1.6367716789245605, "eval_runtime": 8.691, "eval_samples_per_second": 539.982, "eval_steps_per_second": 67.541, "step": 699000 }, { "epoch": 160.33, "learning_rate": 3.7438457307698196e-05, "loss": 0.8636, "step": 699500 }, { "epoch": 160.33, "eval_loss": 1.655773639678955, "eval_runtime": 8.6878, "eval_samples_per_second": 540.181, "eval_steps_per_second": 67.566, "step": 699500 }, { "epoch": 160.44, "learning_rate": 3.741676343471372e-05, "loss": 0.8508, "step": 700000 }, { "epoch": 160.44, "eval_loss": 1.6481581926345825, "eval_runtime": 8.6966, "eval_samples_per_second": 539.635, "eval_steps_per_second": 67.498, "step": 700000 }, { "epoch": 160.55, "learning_rate": 3.739505714312757e-05, "loss": 0.863, "step": 700500 }, { "epoch": 160.55, "eval_loss": 1.6548280715942383, "eval_runtime": 8.6973, "eval_samples_per_second": 539.595, "eval_steps_per_second": 67.492, "step": 700500 }, { "epoch": 160.67, "learning_rate": 3.737333845464925e-05, "loss": 0.8737, "step": 701000 }, { "epoch": 160.67, "eval_loss": 1.6612112522125244, "eval_runtime": 8.6898, "eval_samples_per_second": 540.056, "eval_steps_per_second": 67.55, "step": 701000 }, { "epoch": 160.78, "learning_rate": 3.735160739100068e-05, "loss": 0.8621, "step": 701500 }, { "epoch": 160.78, "eval_loss": 1.6328641176223755, "eval_runtime": 8.6906, "eval_samples_per_second": 540.007, "eval_steps_per_second": 67.544, "step": 701500 }, { "epoch": 160.9, "learning_rate": 3.7329863973916106e-05, "loss": 0.8765, "step": 702000 }, { "epoch": 160.9, "eval_loss": 1.6410528421401978, "eval_runtime": 8.6966, "eval_samples_per_second": 539.634, "eval_steps_per_second": 67.497, "step": 702000 }, { "epoch": 161.01, "learning_rate": 3.730810822514219e-05, "loss": 0.8682, "step": 702500 }, { "epoch": 161.01, "eval_loss": 1.636192798614502, "eval_runtime": 8.6952, "eval_samples_per_second": 539.724, "eval_steps_per_second": 67.509, "step": 702500 }, { "epoch": 161.13, "learning_rate": 3.728634016643791e-05, "loss": 0.8562, "step": 703000 }, { "epoch": 161.13, "eval_loss": 1.634828805923462, "eval_runtime": 8.6963, "eval_samples_per_second": 539.657, "eval_steps_per_second": 67.5, "step": 703000 }, { "epoch": 161.24, "learning_rate": 3.7264559819574514e-05, "loss": 0.8616, "step": 703500 }, { "epoch": 161.24, "eval_loss": 1.6663001775741577, "eval_runtime": 8.6889, "eval_samples_per_second": 540.113, "eval_steps_per_second": 67.557, "step": 703500 }, { "epoch": 161.36, "learning_rate": 3.72427672063356e-05, "loss": 0.857, "step": 704000 }, { "epoch": 161.36, "eval_loss": 1.6747862100601196, "eval_runtime": 8.6938, "eval_samples_per_second": 539.811, "eval_steps_per_second": 67.519, "step": 704000 }, { "epoch": 161.47, "learning_rate": 3.7220962348516996e-05, "loss": 0.8646, "step": 704500 }, { "epoch": 161.47, "eval_loss": 1.670619010925293, "eval_runtime": 8.6946, "eval_samples_per_second": 539.759, "eval_steps_per_second": 67.513, "step": 704500 }, { "epoch": 161.59, "learning_rate": 3.719914526792679e-05, "loss": 0.8719, "step": 705000 }, { "epoch": 161.59, "eval_loss": 1.6693438291549683, "eval_runtime": 8.6878, "eval_samples_per_second": 540.18, "eval_steps_per_second": 67.566, "step": 705000 }, { "epoch": 161.7, "learning_rate": 3.7177315986385305e-05, "loss": 0.8602, "step": 705500 }, { "epoch": 161.7, "eval_loss": 1.6237274408340454, "eval_runtime": 8.6892, "eval_samples_per_second": 540.093, "eval_steps_per_second": 67.555, "step": 705500 }, { "epoch": 161.82, "learning_rate": 3.715547452572503e-05, "loss": 0.8648, "step": 706000 }, { "epoch": 161.82, "eval_loss": 1.6446540355682373, "eval_runtime": 8.6936, "eval_samples_per_second": 539.823, "eval_steps_per_second": 67.521, "step": 706000 }, { "epoch": 161.93, "learning_rate": 3.7133620907790676e-05, "loss": 0.8665, "step": 706500 }, { "epoch": 161.93, "eval_loss": 1.6413966417312622, "eval_runtime": 8.683, "eval_samples_per_second": 540.481, "eval_steps_per_second": 67.603, "step": 706500 }, { "epoch": 162.04, "learning_rate": 3.711175515443909e-05, "loss": 0.8677, "step": 707000 }, { "epoch": 162.04, "eval_loss": 1.6472437381744385, "eval_runtime": 8.6899, "eval_samples_per_second": 540.05, "eval_steps_per_second": 67.549, "step": 707000 }, { "epoch": 162.16, "learning_rate": 3.7089877287539275e-05, "loss": 0.8561, "step": 707500 }, { "epoch": 162.16, "eval_loss": 1.6560864448547363, "eval_runtime": 8.6892, "eval_samples_per_second": 540.096, "eval_steps_per_second": 67.555, "step": 707500 }, { "epoch": 162.27, "learning_rate": 3.706798732897232e-05, "loss": 0.8513, "step": 708000 }, { "epoch": 162.27, "eval_loss": 1.6459615230560303, "eval_runtime": 8.6823, "eval_samples_per_second": 540.526, "eval_steps_per_second": 67.609, "step": 708000 }, { "epoch": 162.39, "learning_rate": 3.704608530063143e-05, "loss": 0.8572, "step": 708500 }, { "epoch": 162.39, "eval_loss": 1.6440556049346924, "eval_runtime": 8.6929, "eval_samples_per_second": 539.865, "eval_steps_per_second": 67.526, "step": 708500 }, { "epoch": 162.5, "learning_rate": 3.7024171224421885e-05, "loss": 0.8622, "step": 709000 }, { "epoch": 162.5, "eval_loss": 1.6477402448654175, "eval_runtime": 8.6862, "eval_samples_per_second": 540.284, "eval_steps_per_second": 67.579, "step": 709000 }, { "epoch": 162.62, "learning_rate": 3.700224512226099e-05, "loss": 0.855, "step": 709500 }, { "epoch": 162.62, "eval_loss": 1.64897620677948, "eval_runtime": 8.6851, "eval_samples_per_second": 540.351, "eval_steps_per_second": 67.587, "step": 709500 }, { "epoch": 162.73, "learning_rate": 3.69803070160781e-05, "loss": 0.8644, "step": 710000 }, { "epoch": 162.73, "eval_loss": 1.6465318202972412, "eval_runtime": 8.6884, "eval_samples_per_second": 540.147, "eval_steps_per_second": 67.561, "step": 710000 }, { "epoch": 162.85, "learning_rate": 3.6958356927814576e-05, "loss": 0.8661, "step": 710500 }, { "epoch": 162.85, "eval_loss": 1.6403148174285889, "eval_runtime": 8.6965, "eval_samples_per_second": 539.645, "eval_steps_per_second": 67.499, "step": 710500 }, { "epoch": 162.96, "learning_rate": 3.693639487942374e-05, "loss": 0.8615, "step": 711000 }, { "epoch": 162.96, "eval_loss": 1.632047414779663, "eval_runtime": 8.6927, "eval_samples_per_second": 539.876, "eval_steps_per_second": 67.528, "step": 711000 }, { "epoch": 163.08, "learning_rate": 3.6914420892870916e-05, "loss": 0.8493, "step": 711500 }, { "epoch": 163.08, "eval_loss": 1.6537387371063232, "eval_runtime": 8.6904, "eval_samples_per_second": 540.024, "eval_steps_per_second": 67.546, "step": 711500 }, { "epoch": 163.19, "learning_rate": 3.6892434990133315e-05, "loss": 0.8542, "step": 712000 }, { "epoch": 163.19, "eval_loss": 1.6449295282363892, "eval_runtime": 8.6908, "eval_samples_per_second": 539.995, "eval_steps_per_second": 67.543, "step": 712000 }, { "epoch": 163.31, "learning_rate": 3.687043719320011e-05, "loss": 0.8525, "step": 712500 }, { "epoch": 163.31, "eval_loss": 1.6291323900222778, "eval_runtime": 8.6927, "eval_samples_per_second": 539.877, "eval_steps_per_second": 67.528, "step": 712500 }, { "epoch": 163.42, "learning_rate": 3.684842752407237e-05, "loss": 0.853, "step": 713000 }, { "epoch": 163.42, "eval_loss": 1.6690075397491455, "eval_runtime": 8.6894, "eval_samples_per_second": 540.083, "eval_steps_per_second": 67.553, "step": 713000 }, { "epoch": 163.53, "learning_rate": 3.682640600476299e-05, "loss": 0.8561, "step": 713500 }, { "epoch": 163.53, "eval_loss": 1.6392165422439575, "eval_runtime": 8.6937, "eval_samples_per_second": 539.813, "eval_steps_per_second": 67.52, "step": 713500 }, { "epoch": 163.65, "learning_rate": 3.6804372657296774e-05, "loss": 0.8624, "step": 714000 }, { "epoch": 163.65, "eval_loss": 1.6606343984603882, "eval_runtime": 8.69, "eval_samples_per_second": 540.043, "eval_steps_per_second": 67.549, "step": 714000 }, { "epoch": 163.76, "learning_rate": 3.678232750371031e-05, "loss": 0.8647, "step": 714500 }, { "epoch": 163.76, "eval_loss": 1.655575156211853, "eval_runtime": 8.6963, "eval_samples_per_second": 539.654, "eval_steps_per_second": 67.5, "step": 714500 }, { "epoch": 163.88, "learning_rate": 3.676027056605203e-05, "loss": 0.8618, "step": 715000 }, { "epoch": 163.88, "eval_loss": 1.6404838562011719, "eval_runtime": 8.6874, "eval_samples_per_second": 540.208, "eval_steps_per_second": 67.569, "step": 715000 }, { "epoch": 163.99, "learning_rate": 3.673820186638215e-05, "loss": 0.8623, "step": 715500 }, { "epoch": 163.99, "eval_loss": 1.6655701398849487, "eval_runtime": 8.6927, "eval_samples_per_second": 539.881, "eval_steps_per_second": 67.528, "step": 715500 }, { "epoch": 164.11, "learning_rate": 3.671612142677261e-05, "loss": 0.8551, "step": 716000 }, { "epoch": 164.11, "eval_loss": 1.6519867181777954, "eval_runtime": 8.6897, "eval_samples_per_second": 540.062, "eval_steps_per_second": 67.551, "step": 716000 }, { "epoch": 164.22, "learning_rate": 3.669402926930713e-05, "loss": 0.8602, "step": 716500 }, { "epoch": 164.22, "eval_loss": 1.6469820737838745, "eval_runtime": 8.6959, "eval_samples_per_second": 539.677, "eval_steps_per_second": 67.503, "step": 716500 }, { "epoch": 164.34, "learning_rate": 3.6671925416081136e-05, "loss": 0.8493, "step": 717000 }, { "epoch": 164.34, "eval_loss": 1.6413202285766602, "eval_runtime": 8.6843, "eval_samples_per_second": 540.399, "eval_steps_per_second": 67.593, "step": 717000 }, { "epoch": 164.45, "learning_rate": 3.6649809889201765e-05, "loss": 0.8673, "step": 717500 }, { "epoch": 164.45, "eval_loss": 1.6583791971206665, "eval_runtime": 8.695, "eval_samples_per_second": 539.738, "eval_steps_per_second": 67.51, "step": 717500 }, { "epoch": 164.57, "learning_rate": 3.6627682710787805e-05, "loss": 0.8592, "step": 718000 }, { "epoch": 164.57, "eval_loss": 1.6535284519195557, "eval_runtime": 8.6934, "eval_samples_per_second": 539.834, "eval_steps_per_second": 67.522, "step": 718000 }, { "epoch": 164.68, "learning_rate": 3.6605543902969714e-05, "loss": 0.861, "step": 718500 }, { "epoch": 164.68, "eval_loss": 1.6805613040924072, "eval_runtime": 8.6912, "eval_samples_per_second": 539.971, "eval_steps_per_second": 67.54, "step": 718500 }, { "epoch": 164.79, "learning_rate": 3.6583393487889575e-05, "loss": 0.8613, "step": 719000 }, { "epoch": 164.79, "eval_loss": 1.6545108556747437, "eval_runtime": 8.6844, "eval_samples_per_second": 540.391, "eval_steps_per_second": 67.592, "step": 719000 }, { "epoch": 164.91, "learning_rate": 3.656123148770109e-05, "loss": 0.858, "step": 719500 }, { "epoch": 164.91, "eval_loss": 1.6601040363311768, "eval_runtime": 8.6908, "eval_samples_per_second": 539.998, "eval_steps_per_second": 67.543, "step": 719500 }, { "epoch": 165.02, "learning_rate": 3.653905792456953e-05, "loss": 0.8598, "step": 720000 }, { "epoch": 165.02, "eval_loss": 1.6365116834640503, "eval_runtime": 8.6907, "eval_samples_per_second": 540.0, "eval_steps_per_second": 67.543, "step": 720000 }, { "epoch": 165.14, "learning_rate": 3.651687282067175e-05, "loss": 0.8467, "step": 720500 }, { "epoch": 165.14, "eval_loss": 1.6534998416900635, "eval_runtime": 8.6882, "eval_samples_per_second": 540.16, "eval_steps_per_second": 67.563, "step": 720500 }, { "epoch": 165.25, "learning_rate": 3.649467619819613e-05, "loss": 0.8466, "step": 721000 }, { "epoch": 165.25, "eval_loss": 1.6717320680618286, "eval_runtime": 8.6916, "eval_samples_per_second": 539.948, "eval_steps_per_second": 67.537, "step": 721000 }, { "epoch": 165.37, "learning_rate": 3.6472468079342585e-05, "loss": 0.848, "step": 721500 }, { "epoch": 165.37, "eval_loss": 1.6382529735565186, "eval_runtime": 8.6916, "eval_samples_per_second": 539.945, "eval_steps_per_second": 67.536, "step": 721500 }, { "epoch": 165.48, "learning_rate": 3.645024848632253e-05, "loss": 0.8505, "step": 722000 }, { "epoch": 165.48, "eval_loss": 1.6543768644332886, "eval_runtime": 8.6898, "eval_samples_per_second": 540.061, "eval_steps_per_second": 67.551, "step": 722000 }, { "epoch": 165.6, "learning_rate": 3.642801744135882e-05, "loss": 0.8551, "step": 722500 }, { "epoch": 165.6, "eval_loss": 1.6680477857589722, "eval_runtime": 8.6887, "eval_samples_per_second": 540.129, "eval_steps_per_second": 67.559, "step": 722500 }, { "epoch": 165.71, "learning_rate": 3.6405774966685816e-05, "loss": 0.8667, "step": 723000 }, { "epoch": 165.71, "eval_loss": 1.673436164855957, "eval_runtime": 8.6882, "eval_samples_per_second": 540.158, "eval_steps_per_second": 67.563, "step": 723000 }, { "epoch": 165.83, "learning_rate": 3.6383521084549276e-05, "loss": 0.8556, "step": 723500 }, { "epoch": 165.83, "eval_loss": 1.6501983404159546, "eval_runtime": 8.6894, "eval_samples_per_second": 540.08, "eval_steps_per_second": 67.553, "step": 723500 }, { "epoch": 165.94, "learning_rate": 3.636125581720638e-05, "loss": 0.8612, "step": 724000 }, { "epoch": 165.94, "eval_loss": 1.6652088165283203, "eval_runtime": 8.6834, "eval_samples_per_second": 540.455, "eval_steps_per_second": 67.6, "step": 724000 }, { "epoch": 166.06, "learning_rate": 3.633897918692569e-05, "loss": 0.8621, "step": 724500 }, { "epoch": 166.06, "eval_loss": 1.6645950078964233, "eval_runtime": 8.698, "eval_samples_per_second": 539.548, "eval_steps_per_second": 67.487, "step": 724500 }, { "epoch": 166.17, "learning_rate": 3.631669121598714e-05, "loss": 0.8511, "step": 725000 }, { "epoch": 166.17, "eval_loss": 1.6413064002990723, "eval_runtime": 8.6933, "eval_samples_per_second": 539.841, "eval_steps_per_second": 67.523, "step": 725000 }, { "epoch": 166.28, "learning_rate": 3.629439192668198e-05, "loss": 0.8532, "step": 725500 }, { "epoch": 166.28, "eval_loss": 1.643091082572937, "eval_runtime": 8.6842, "eval_samples_per_second": 540.409, "eval_steps_per_second": 67.594, "step": 725500 }, { "epoch": 166.4, "learning_rate": 3.627208134131283e-05, "loss": 0.8557, "step": 726000 }, { "epoch": 166.4, "eval_loss": 1.6485600471496582, "eval_runtime": 8.6899, "eval_samples_per_second": 540.051, "eval_steps_per_second": 67.55, "step": 726000 }, { "epoch": 166.51, "learning_rate": 3.624975948219356e-05, "loss": 0.847, "step": 726500 }, { "epoch": 166.51, "eval_loss": 1.6612542867660522, "eval_runtime": 8.6944, "eval_samples_per_second": 539.772, "eval_steps_per_second": 67.515, "step": 726500 }, { "epoch": 166.63, "learning_rate": 3.622742637164933e-05, "loss": 0.8516, "step": 727000 }, { "epoch": 166.63, "eval_loss": 1.6752729415893555, "eval_runtime": 8.6879, "eval_samples_per_second": 540.18, "eval_steps_per_second": 67.566, "step": 727000 }, { "epoch": 166.74, "learning_rate": 3.6205082032016565e-05, "loss": 0.866, "step": 727500 }, { "epoch": 166.74, "eval_loss": 1.6566331386566162, "eval_runtime": 8.692, "eval_samples_per_second": 539.923, "eval_steps_per_second": 67.534, "step": 727500 }, { "epoch": 166.86, "learning_rate": 3.6182726485642925e-05, "loss": 0.8577, "step": 728000 }, { "epoch": 166.86, "eval_loss": 1.6476384401321411, "eval_runtime": 8.6857, "eval_samples_per_second": 540.31, "eval_steps_per_second": 67.582, "step": 728000 }, { "epoch": 166.97, "learning_rate": 3.616035975488724e-05, "loss": 0.8479, "step": 728500 }, { "epoch": 166.97, "eval_loss": 1.6680394411087036, "eval_runtime": 8.6849, "eval_samples_per_second": 540.363, "eval_steps_per_second": 67.588, "step": 728500 }, { "epoch": 167.09, "learning_rate": 3.6137981862119566e-05, "loss": 0.8516, "step": 729000 }, { "epoch": 167.09, "eval_loss": 1.6523115634918213, "eval_runtime": 8.6934, "eval_samples_per_second": 539.836, "eval_steps_per_second": 67.523, "step": 729000 }, { "epoch": 167.2, "learning_rate": 3.6115592829721113e-05, "loss": 0.8532, "step": 729500 }, { "epoch": 167.2, "eval_loss": 1.6700098514556885, "eval_runtime": 8.6914, "eval_samples_per_second": 539.957, "eval_steps_per_second": 67.538, "step": 729500 }, { "epoch": 167.32, "learning_rate": 3.609319268008422e-05, "loss": 0.8518, "step": 730000 }, { "epoch": 167.32, "eval_loss": 1.651278018951416, "eval_runtime": 8.6863, "eval_samples_per_second": 540.276, "eval_steps_per_second": 67.578, "step": 730000 }, { "epoch": 167.43, "learning_rate": 3.6070781435612357e-05, "loss": 0.8552, "step": 730500 }, { "epoch": 167.43, "eval_loss": 1.6463713645935059, "eval_runtime": 8.6944, "eval_samples_per_second": 539.774, "eval_steps_per_second": 67.515, "step": 730500 }, { "epoch": 167.55, "learning_rate": 3.6048359118720083e-05, "loss": 0.8528, "step": 731000 }, { "epoch": 167.55, "eval_loss": 1.6402217149734497, "eval_runtime": 8.6961, "eval_samples_per_second": 539.665, "eval_steps_per_second": 67.501, "step": 731000 }, { "epoch": 167.66, "learning_rate": 3.6025925751833036e-05, "loss": 0.8579, "step": 731500 }, { "epoch": 167.66, "eval_loss": 1.6441363096237183, "eval_runtime": 8.6901, "eval_samples_per_second": 540.037, "eval_steps_per_second": 67.548, "step": 731500 }, { "epoch": 167.77, "learning_rate": 3.60034813573879e-05, "loss": 0.8531, "step": 732000 }, { "epoch": 167.77, "eval_loss": 1.641304612159729, "eval_runtime": 8.6942, "eval_samples_per_second": 539.788, "eval_steps_per_second": 67.517, "step": 732000 }, { "epoch": 167.89, "learning_rate": 3.598102595783241e-05, "loss": 0.8569, "step": 732500 }, { "epoch": 167.89, "eval_loss": 1.6527477502822876, "eval_runtime": 8.7018, "eval_samples_per_second": 539.312, "eval_steps_per_second": 67.457, "step": 732500 }, { "epoch": 168.0, "learning_rate": 3.595855957562527e-05, "loss": 0.852, "step": 733000 }, { "epoch": 168.0, "eval_loss": 1.6751631498336792, "eval_runtime": 8.6882, "eval_samples_per_second": 540.158, "eval_steps_per_second": 67.563, "step": 733000 }, { "epoch": 168.12, "learning_rate": 3.593608223323619e-05, "loss": 0.8437, "step": 733500 }, { "epoch": 168.12, "eval_loss": 1.6405912637710571, "eval_runtime": 8.6928, "eval_samples_per_second": 539.869, "eval_steps_per_second": 67.527, "step": 733500 }, { "epoch": 168.23, "learning_rate": 3.591359395314585e-05, "loss": 0.8417, "step": 734000 }, { "epoch": 168.23, "eval_loss": 1.6418390274047852, "eval_runtime": 8.699, "eval_samples_per_second": 539.486, "eval_steps_per_second": 67.479, "step": 734000 }, { "epoch": 168.35, "learning_rate": 3.5891094757845856e-05, "loss": 0.8512, "step": 734500 }, { "epoch": 168.35, "eval_loss": 1.6776525974273682, "eval_runtime": 8.693, "eval_samples_per_second": 539.862, "eval_steps_per_second": 67.526, "step": 734500 }, { "epoch": 168.46, "learning_rate": 3.5868584669838724e-05, "loss": 0.8398, "step": 735000 }, { "epoch": 168.46, "eval_loss": 1.6698766946792603, "eval_runtime": 8.6925, "eval_samples_per_second": 539.893, "eval_steps_per_second": 67.53, "step": 735000 }, { "epoch": 168.58, "learning_rate": 3.584606371163789e-05, "loss": 0.8572, "step": 735500 }, { "epoch": 168.58, "eval_loss": 1.6503386497497559, "eval_runtime": 8.6877, "eval_samples_per_second": 540.186, "eval_steps_per_second": 67.566, "step": 735500 }, { "epoch": 168.69, "learning_rate": 3.582353190576763e-05, "loss": 0.8569, "step": 736000 }, { "epoch": 168.69, "eval_loss": 1.6577887535095215, "eval_runtime": 8.6811, "eval_samples_per_second": 540.6, "eval_steps_per_second": 67.618, "step": 736000 }, { "epoch": 168.81, "learning_rate": 3.580098927476311e-05, "loss": 0.8548, "step": 736500 }, { "epoch": 168.81, "eval_loss": 1.6630058288574219, "eval_runtime": 8.6848, "eval_samples_per_second": 540.367, "eval_steps_per_second": 67.589, "step": 736500 }, { "epoch": 168.92, "learning_rate": 3.5778435841170265e-05, "loss": 0.86, "step": 737000 }, { "epoch": 168.92, "eval_loss": 1.6798275709152222, "eval_runtime": 8.6914, "eval_samples_per_second": 539.959, "eval_steps_per_second": 67.538, "step": 737000 }, { "epoch": 169.04, "learning_rate": 3.57558716275459e-05, "loss": 0.8516, "step": 737500 }, { "epoch": 169.04, "eval_loss": 1.6724140644073486, "eval_runtime": 8.6908, "eval_samples_per_second": 539.998, "eval_steps_per_second": 67.543, "step": 737500 }, { "epoch": 169.15, "learning_rate": 3.573329665645754e-05, "loss": 0.8399, "step": 738000 }, { "epoch": 169.15, "eval_loss": 1.6647998094558716, "eval_runtime": 8.6889, "eval_samples_per_second": 540.112, "eval_steps_per_second": 67.557, "step": 738000 }, { "epoch": 169.26, "learning_rate": 3.5710710950483525e-05, "loss": 0.8428, "step": 738500 }, { "epoch": 169.26, "eval_loss": 1.6645073890686035, "eval_runtime": 8.6824, "eval_samples_per_second": 540.518, "eval_steps_per_second": 67.608, "step": 738500 }, { "epoch": 169.38, "learning_rate": 3.56881145322129e-05, "loss": 0.8517, "step": 739000 }, { "epoch": 169.38, "eval_loss": 1.668154001235962, "eval_runtime": 8.69, "eval_samples_per_second": 540.044, "eval_steps_per_second": 67.549, "step": 739000 }, { "epoch": 169.49, "learning_rate": 3.56655074242454e-05, "loss": 0.8495, "step": 739500 }, { "epoch": 169.49, "eval_loss": 1.6641733646392822, "eval_runtime": 8.6894, "eval_samples_per_second": 540.086, "eval_steps_per_second": 67.554, "step": 739500 }, { "epoch": 169.61, "learning_rate": 3.5642889649191516e-05, "loss": 0.8482, "step": 740000 }, { "epoch": 169.61, "eval_loss": 1.646368384361267, "eval_runtime": 8.6896, "eval_samples_per_second": 540.073, "eval_steps_per_second": 67.552, "step": 740000 }, { "epoch": 169.72, "learning_rate": 3.5620261229672355e-05, "loss": 0.8574, "step": 740500 }, { "epoch": 169.72, "eval_loss": 1.6725502014160156, "eval_runtime": 8.6882, "eval_samples_per_second": 540.16, "eval_steps_per_second": 67.563, "step": 740500 }, { "epoch": 169.84, "learning_rate": 3.559762218831968e-05, "loss": 0.8461, "step": 741000 }, { "epoch": 169.84, "eval_loss": 1.64681875705719, "eval_runtime": 8.6864, "eval_samples_per_second": 540.269, "eval_steps_per_second": 67.577, "step": 741000 }, { "epoch": 169.95, "learning_rate": 3.55749725477759e-05, "loss": 0.8502, "step": 741500 }, { "epoch": 169.95, "eval_loss": 1.658753514289856, "eval_runtime": 8.6948, "eval_samples_per_second": 539.75, "eval_steps_per_second": 67.512, "step": 741500 }, { "epoch": 170.07, "learning_rate": 3.5552312330694e-05, "loss": 0.8464, "step": 742000 }, { "epoch": 170.07, "eval_loss": 1.6577194929122925, "eval_runtime": 8.6877, "eval_samples_per_second": 540.192, "eval_steps_per_second": 67.567, "step": 742000 }, { "epoch": 170.18, "learning_rate": 3.552964155973755e-05, "loss": 0.8417, "step": 742500 }, { "epoch": 170.18, "eval_loss": 1.6705745458602905, "eval_runtime": 8.69, "eval_samples_per_second": 540.044, "eval_steps_per_second": 67.549, "step": 742500 }, { "epoch": 170.3, "learning_rate": 3.550696025758069e-05, "loss": 0.8428, "step": 743000 }, { "epoch": 170.3, "eval_loss": 1.6483538150787354, "eval_runtime": 8.6877, "eval_samples_per_second": 540.186, "eval_steps_per_second": 67.566, "step": 743000 }, { "epoch": 170.41, "learning_rate": 3.548426844690806e-05, "loss": 0.8429, "step": 743500 }, { "epoch": 170.41, "eval_loss": 1.6502680778503418, "eval_runtime": 8.6901, "eval_samples_per_second": 540.037, "eval_steps_per_second": 67.548, "step": 743500 }, { "epoch": 170.52, "learning_rate": 3.5461566150414837e-05, "loss": 0.8441, "step": 744000 }, { "epoch": 170.52, "eval_loss": 1.6611392498016357, "eval_runtime": 8.6931, "eval_samples_per_second": 539.855, "eval_steps_per_second": 67.525, "step": 744000 }, { "epoch": 170.64, "learning_rate": 3.543885339080668e-05, "loss": 0.8435, "step": 744500 }, { "epoch": 170.64, "eval_loss": 1.671925663948059, "eval_runtime": 8.6924, "eval_samples_per_second": 539.895, "eval_steps_per_second": 67.53, "step": 744500 }, { "epoch": 170.75, "learning_rate": 3.5416130190799725e-05, "loss": 0.8501, "step": 745000 }, { "epoch": 170.75, "eval_loss": 1.6831583976745605, "eval_runtime": 8.6989, "eval_samples_per_second": 539.496, "eval_steps_per_second": 67.48, "step": 745000 }, { "epoch": 170.87, "learning_rate": 3.539339657312051e-05, "loss": 0.852, "step": 745500 }, { "epoch": 170.87, "eval_loss": 1.6627886295318604, "eval_runtime": 8.689, "eval_samples_per_second": 540.106, "eval_steps_per_second": 67.556, "step": 745500 }, { "epoch": 170.98, "learning_rate": 3.537065256050604e-05, "loss": 0.8513, "step": 746000 }, { "epoch": 170.98, "eval_loss": 1.6525306701660156, "eval_runtime": 8.6902, "eval_samples_per_second": 540.034, "eval_steps_per_second": 67.547, "step": 746000 }, { "epoch": 171.1, "learning_rate": 3.534789817570368e-05, "loss": 0.8436, "step": 746500 }, { "epoch": 171.1, "eval_loss": 1.665276288986206, "eval_runtime": 8.6924, "eval_samples_per_second": 539.898, "eval_steps_per_second": 67.53, "step": 746500 }, { "epoch": 171.21, "learning_rate": 3.5325133441471196e-05, "loss": 0.8455, "step": 747000 }, { "epoch": 171.21, "eval_loss": 1.656273365020752, "eval_runtime": 8.6906, "eval_samples_per_second": 540.007, "eval_steps_per_second": 67.544, "step": 747000 }, { "epoch": 171.33, "learning_rate": 3.530235838057668e-05, "loss": 0.8371, "step": 747500 }, { "epoch": 171.33, "eval_loss": 1.6755496263504028, "eval_runtime": 8.6876, "eval_samples_per_second": 540.198, "eval_steps_per_second": 67.568, "step": 747500 }, { "epoch": 171.44, "learning_rate": 3.527957301579859e-05, "loss": 0.8425, "step": 748000 }, { "epoch": 171.44, "eval_loss": 1.6562288999557495, "eval_runtime": 8.6911, "eval_samples_per_second": 539.979, "eval_steps_per_second": 67.54, "step": 748000 }, { "epoch": 171.56, "learning_rate": 3.525677736992562e-05, "loss": 0.8409, "step": 748500 }, { "epoch": 171.56, "eval_loss": 1.652481198310852, "eval_runtime": 8.6942, "eval_samples_per_second": 539.787, "eval_steps_per_second": 67.517, "step": 748500 }, { "epoch": 171.67, "learning_rate": 3.523397146575683e-05, "loss": 0.8456, "step": 749000 }, { "epoch": 171.67, "eval_loss": 1.6956733465194702, "eval_runtime": 8.6898, "eval_samples_per_second": 540.061, "eval_steps_per_second": 67.551, "step": 749000 }, { "epoch": 171.79, "learning_rate": 3.521115532610148e-05, "loss": 0.8475, "step": 749500 }, { "epoch": 171.79, "eval_loss": 1.663335919380188, "eval_runtime": 8.6941, "eval_samples_per_second": 539.793, "eval_steps_per_second": 67.517, "step": 749500 }, { "epoch": 171.9, "learning_rate": 3.5188328973779095e-05, "loss": 0.8535, "step": 750000 }, { "epoch": 171.9, "eval_loss": 1.6808110475540161, "eval_runtime": 8.6923, "eval_samples_per_second": 539.904, "eval_steps_per_second": 67.531, "step": 750000 }, { "epoch": 172.01, "learning_rate": 3.5165492431619406e-05, "loss": 0.8512, "step": 750500 }, { "epoch": 172.01, "eval_loss": 1.651939868927002, "eval_runtime": 8.6882, "eval_samples_per_second": 540.157, "eval_steps_per_second": 67.563, "step": 750500 }, { "epoch": 172.13, "learning_rate": 3.514264572246234e-05, "loss": 0.8423, "step": 751000 }, { "epoch": 172.13, "eval_loss": 1.6601263284683228, "eval_runtime": 8.6983, "eval_samples_per_second": 539.532, "eval_steps_per_second": 67.485, "step": 751000 }, { "epoch": 172.24, "learning_rate": 3.511978886915798e-05, "loss": 0.8388, "step": 751500 }, { "epoch": 172.24, "eval_loss": 1.6647635698318481, "eval_runtime": 8.6891, "eval_samples_per_second": 540.1, "eval_steps_per_second": 67.556, "step": 751500 }, { "epoch": 172.36, "learning_rate": 3.509692189456656e-05, "loss": 0.8373, "step": 752000 }, { "epoch": 172.36, "eval_loss": 1.641958236694336, "eval_runtime": 8.702, "eval_samples_per_second": 539.299, "eval_steps_per_second": 67.456, "step": 752000 }, { "epoch": 172.47, "learning_rate": 3.507404482155846e-05, "loss": 0.8455, "step": 752500 }, { "epoch": 172.47, "eval_loss": 1.6570532321929932, "eval_runtime": 8.6921, "eval_samples_per_second": 539.916, "eval_steps_per_second": 67.533, "step": 752500 }, { "epoch": 172.59, "learning_rate": 3.5051157673014124e-05, "loss": 0.8427, "step": 753000 }, { "epoch": 172.59, "eval_loss": 1.677271842956543, "eval_runtime": 8.6905, "eval_samples_per_second": 540.017, "eval_steps_per_second": 67.545, "step": 753000 }, { "epoch": 172.7, "learning_rate": 3.5028260471824086e-05, "loss": 0.8388, "step": 753500 }, { "epoch": 172.7, "eval_loss": 1.6543389558792114, "eval_runtime": 8.6888, "eval_samples_per_second": 540.122, "eval_steps_per_second": 67.558, "step": 753500 }, { "epoch": 172.82, "learning_rate": 3.500535324088895e-05, "loss": 0.8439, "step": 754000 }, { "epoch": 172.82, "eval_loss": 1.6822351217269897, "eval_runtime": 8.6903, "eval_samples_per_second": 540.028, "eval_steps_per_second": 67.547, "step": 754000 }, { "epoch": 172.93, "learning_rate": 3.4982436003119315e-05, "loss": 0.855, "step": 754500 }, { "epoch": 172.93, "eval_loss": 1.643269419670105, "eval_runtime": 8.6944, "eval_samples_per_second": 539.772, "eval_steps_per_second": 67.515, "step": 754500 }, { "epoch": 173.05, "learning_rate": 3.495950878143585e-05, "loss": 0.8433, "step": 755000 }, { "epoch": 173.05, "eval_loss": 1.6573554277420044, "eval_runtime": 8.6982, "eval_samples_per_second": 539.536, "eval_steps_per_second": 67.485, "step": 755000 }, { "epoch": 173.16, "learning_rate": 3.493657159876913e-05, "loss": 0.8369, "step": 755500 }, { "epoch": 173.16, "eval_loss": 1.6417639255523682, "eval_runtime": 8.6919, "eval_samples_per_second": 539.927, "eval_steps_per_second": 67.534, "step": 755500 }, { "epoch": 173.28, "learning_rate": 3.491362447805976e-05, "loss": 0.8439, "step": 756000 }, { "epoch": 173.28, "eval_loss": 1.6844594478607178, "eval_runtime": 8.6956, "eval_samples_per_second": 539.701, "eval_steps_per_second": 67.506, "step": 756000 }, { "epoch": 173.39, "learning_rate": 3.489066744225825e-05, "loss": 0.8409, "step": 756500 }, { "epoch": 173.39, "eval_loss": 1.647477149963379, "eval_runtime": 8.6931, "eval_samples_per_second": 539.856, "eval_steps_per_second": 67.525, "step": 756500 }, { "epoch": 173.5, "learning_rate": 3.486770051432503e-05, "loss": 0.8468, "step": 757000 }, { "epoch": 173.5, "eval_loss": 1.65805184841156, "eval_runtime": 8.6841, "eval_samples_per_second": 540.414, "eval_steps_per_second": 67.595, "step": 757000 }, { "epoch": 173.62, "learning_rate": 3.484472371723043e-05, "loss": 0.8464, "step": 757500 }, { "epoch": 173.62, "eval_loss": 1.6494855880737305, "eval_runtime": 8.6978, "eval_samples_per_second": 539.562, "eval_steps_per_second": 67.488, "step": 757500 }, { "epoch": 173.73, "learning_rate": 3.482173707395466e-05, "loss": 0.8399, "step": 758000 }, { "epoch": 173.73, "eval_loss": 1.6392954587936401, "eval_runtime": 8.6888, "eval_samples_per_second": 540.123, "eval_steps_per_second": 67.558, "step": 758000 }, { "epoch": 173.85, "learning_rate": 3.4798740607487746e-05, "loss": 0.842, "step": 758500 }, { "epoch": 173.85, "eval_loss": 1.6659653186798096, "eval_runtime": 8.6874, "eval_samples_per_second": 540.211, "eval_steps_per_second": 67.569, "step": 758500 }, { "epoch": 173.96, "learning_rate": 3.477573434082958e-05, "loss": 0.8426, "step": 759000 }, { "epoch": 173.96, "eval_loss": 1.66963791847229, "eval_runtime": 8.6983, "eval_samples_per_second": 539.528, "eval_steps_per_second": 67.484, "step": 759000 }, { "epoch": 174.08, "learning_rate": 3.475271829698982e-05, "loss": 0.8343, "step": 759500 }, { "epoch": 174.08, "eval_loss": 1.6662708520889282, "eval_runtime": 8.695, "eval_samples_per_second": 539.733, "eval_steps_per_second": 67.51, "step": 759500 }, { "epoch": 174.19, "learning_rate": 3.472969249898793e-05, "loss": 0.832, "step": 760000 }, { "epoch": 174.19, "eval_loss": 1.6905179023742676, "eval_runtime": 8.6921, "eval_samples_per_second": 539.916, "eval_steps_per_second": 67.533, "step": 760000 }, { "epoch": 174.31, "learning_rate": 3.4706656969853094e-05, "loss": 0.8389, "step": 760500 }, { "epoch": 174.31, "eval_loss": 1.6551920175552368, "eval_runtime": 8.6863, "eval_samples_per_second": 540.278, "eval_steps_per_second": 67.578, "step": 760500 }, { "epoch": 174.42, "learning_rate": 3.468361173262428e-05, "loss": 0.8504, "step": 761000 }, { "epoch": 174.42, "eval_loss": 1.6652156114578247, "eval_runtime": 8.6903, "eval_samples_per_second": 540.029, "eval_steps_per_second": 67.547, "step": 761000 }, { "epoch": 174.54, "learning_rate": 3.466055681035012e-05, "loss": 0.8435, "step": 761500 }, { "epoch": 174.54, "eval_loss": 1.670395016670227, "eval_runtime": 8.6801, "eval_samples_per_second": 540.661, "eval_steps_per_second": 67.626, "step": 761500 }, { "epoch": 174.65, "learning_rate": 3.463749222608895e-05, "loss": 0.8442, "step": 762000 }, { "epoch": 174.65, "eval_loss": 1.636944055557251, "eval_runtime": 8.6865, "eval_samples_per_second": 540.266, "eval_steps_per_second": 67.576, "step": 762000 }, { "epoch": 174.76, "learning_rate": 3.461441800290877e-05, "loss": 0.8367, "step": 762500 }, { "epoch": 174.76, "eval_loss": 1.6607961654663086, "eval_runtime": 8.6848, "eval_samples_per_second": 540.37, "eval_steps_per_second": 67.589, "step": 762500 }, { "epoch": 174.88, "learning_rate": 3.459133416388722e-05, "loss": 0.8415, "step": 763000 }, { "epoch": 174.88, "eval_loss": 1.6622413396835327, "eval_runtime": 8.6895, "eval_samples_per_second": 540.077, "eval_steps_per_second": 67.553, "step": 763000 }, { "epoch": 174.99, "learning_rate": 3.456824073211157e-05, "loss": 0.8423, "step": 763500 }, { "epoch": 174.99, "eval_loss": 1.6678518056869507, "eval_runtime": 8.6954, "eval_samples_per_second": 539.71, "eval_steps_per_second": 67.507, "step": 763500 }, { "epoch": 175.11, "learning_rate": 3.4545137730678644e-05, "loss": 0.8364, "step": 764000 }, { "epoch": 175.11, "eval_loss": 1.6440266370773315, "eval_runtime": 8.6927, "eval_samples_per_second": 539.876, "eval_steps_per_second": 67.528, "step": 764000 }, { "epoch": 175.22, "learning_rate": 3.4522025182694884e-05, "loss": 0.8372, "step": 764500 }, { "epoch": 175.22, "eval_loss": 1.6550650596618652, "eval_runtime": 8.6874, "eval_samples_per_second": 540.207, "eval_steps_per_second": 67.569, "step": 764500 }, { "epoch": 175.34, "learning_rate": 3.449890311127625e-05, "loss": 0.8365, "step": 765000 }, { "epoch": 175.34, "eval_loss": 1.6573537588119507, "eval_runtime": 8.6881, "eval_samples_per_second": 540.167, "eval_steps_per_second": 67.564, "step": 765000 }, { "epoch": 175.45, "learning_rate": 3.4475771539548244e-05, "loss": 0.836, "step": 765500 }, { "epoch": 175.45, "eval_loss": 1.6475054025650024, "eval_runtime": 8.6936, "eval_samples_per_second": 539.825, "eval_steps_per_second": 67.521, "step": 765500 }, { "epoch": 175.57, "learning_rate": 3.445263049064585e-05, "loss": 0.8443, "step": 766000 }, { "epoch": 175.57, "eval_loss": 1.6414843797683716, "eval_runtime": 8.6936, "eval_samples_per_second": 539.823, "eval_steps_per_second": 67.521, "step": 766000 }, { "epoch": 175.68, "learning_rate": 3.442947998771356e-05, "loss": 0.8372, "step": 766500 }, { "epoch": 175.68, "eval_loss": 1.6529768705368042, "eval_runtime": 8.6891, "eval_samples_per_second": 540.101, "eval_steps_per_second": 67.556, "step": 766500 }, { "epoch": 175.8, "learning_rate": 3.4406320053905296e-05, "loss": 0.8415, "step": 767000 }, { "epoch": 175.8, "eval_loss": 1.6713749170303345, "eval_runtime": 8.6879, "eval_samples_per_second": 540.179, "eval_steps_per_second": 67.565, "step": 767000 }, { "epoch": 175.91, "learning_rate": 3.438315071238442e-05, "loss": 0.8426, "step": 767500 }, { "epoch": 175.91, "eval_loss": 1.6457741260528564, "eval_runtime": 8.6947, "eval_samples_per_second": 539.755, "eval_steps_per_second": 67.512, "step": 767500 }, { "epoch": 176.03, "learning_rate": 3.43599719863237e-05, "loss": 0.8405, "step": 768000 }, { "epoch": 176.03, "eval_loss": 1.6712685823440552, "eval_runtime": 8.6934, "eval_samples_per_second": 539.833, "eval_steps_per_second": 67.522, "step": 768000 }, { "epoch": 176.14, "learning_rate": 3.433678389890529e-05, "loss": 0.8203, "step": 768500 }, { "epoch": 176.14, "eval_loss": 1.6764007806777954, "eval_runtime": 8.6893, "eval_samples_per_second": 540.087, "eval_steps_per_second": 67.554, "step": 768500 }, { "epoch": 176.25, "learning_rate": 3.431358647332072e-05, "loss": 0.8281, "step": 769000 }, { "epoch": 176.25, "eval_loss": 1.691066861152649, "eval_runtime": 8.6872, "eval_samples_per_second": 540.221, "eval_steps_per_second": 67.571, "step": 769000 }, { "epoch": 176.37, "learning_rate": 3.4290379732770854e-05, "loss": 0.8299, "step": 769500 }, { "epoch": 176.37, "eval_loss": 1.6625564098358154, "eval_runtime": 8.6911, "eval_samples_per_second": 539.98, "eval_steps_per_second": 67.541, "step": 769500 }, { "epoch": 176.48, "learning_rate": 3.426716370046586e-05, "loss": 0.8324, "step": 770000 }, { "epoch": 176.48, "eval_loss": 1.664273738861084, "eval_runtime": 8.6953, "eval_samples_per_second": 539.714, "eval_steps_per_second": 67.507, "step": 770000 }, { "epoch": 176.6, "learning_rate": 3.42439383996252e-05, "loss": 0.8425, "step": 770500 }, { "epoch": 176.6, "eval_loss": 1.667003870010376, "eval_runtime": 8.6887, "eval_samples_per_second": 540.129, "eval_steps_per_second": 67.559, "step": 770500 }, { "epoch": 176.71, "learning_rate": 3.4220703853477625e-05, "loss": 0.8373, "step": 771000 }, { "epoch": 176.71, "eval_loss": 1.6399197578430176, "eval_runtime": 8.6913, "eval_samples_per_second": 539.968, "eval_steps_per_second": 67.539, "step": 771000 }, { "epoch": 176.83, "learning_rate": 3.4197460085261127e-05, "loss": 0.8419, "step": 771500 }, { "epoch": 176.83, "eval_loss": 1.675099492073059, "eval_runtime": 8.6852, "eval_samples_per_second": 540.347, "eval_steps_per_second": 67.587, "step": 771500 }, { "epoch": 176.94, "learning_rate": 3.4174207118222904e-05, "loss": 0.8451, "step": 772000 }, { "epoch": 176.94, "eval_loss": 1.638519525527954, "eval_runtime": 8.6918, "eval_samples_per_second": 539.933, "eval_steps_per_second": 67.535, "step": 772000 }, { "epoch": 177.06, "learning_rate": 3.415094497561936e-05, "loss": 0.8408, "step": 772500 }, { "epoch": 177.06, "eval_loss": 1.654270887374878, "eval_runtime": 8.6927, "eval_samples_per_second": 539.877, "eval_steps_per_second": 67.528, "step": 772500 }, { "epoch": 177.17, "learning_rate": 3.4127673680716085e-05, "loss": 0.8275, "step": 773000 }, { "epoch": 177.17, "eval_loss": 1.6652295589447021, "eval_runtime": 8.6899, "eval_samples_per_second": 540.053, "eval_steps_per_second": 67.55, "step": 773000 }, { "epoch": 177.29, "learning_rate": 3.4104393256787836e-05, "loss": 0.8295, "step": 773500 }, { "epoch": 177.29, "eval_loss": 1.6490285396575928, "eval_runtime": 8.7064, "eval_samples_per_second": 539.03, "eval_steps_per_second": 67.422, "step": 773500 }, { "epoch": 177.4, "learning_rate": 3.408110372711847e-05, "loss": 0.8345, "step": 774000 }, { "epoch": 177.4, "eval_loss": 1.6688812971115112, "eval_runtime": 8.6905, "eval_samples_per_second": 540.013, "eval_steps_per_second": 67.545, "step": 774000 }, { "epoch": 177.52, "learning_rate": 3.4057805115000957e-05, "loss": 0.8295, "step": 774500 }, { "epoch": 177.52, "eval_loss": 1.6671730279922485, "eval_runtime": 8.6979, "eval_samples_per_second": 539.554, "eval_steps_per_second": 67.487, "step": 774500 }, { "epoch": 177.63, "learning_rate": 3.4034497443737365e-05, "loss": 0.8289, "step": 775000 }, { "epoch": 177.63, "eval_loss": 1.6699085235595703, "eval_runtime": 8.688, "eval_samples_per_second": 540.171, "eval_steps_per_second": 67.565, "step": 775000 }, { "epoch": 177.74, "learning_rate": 3.401118073663882e-05, "loss": 0.8351, "step": 775500 }, { "epoch": 177.74, "eval_loss": 1.6503345966339111, "eval_runtime": 8.6864, "eval_samples_per_second": 540.267, "eval_steps_per_second": 67.577, "step": 775500 }, { "epoch": 177.86, "learning_rate": 3.398785501702548e-05, "loss": 0.8402, "step": 776000 }, { "epoch": 177.86, "eval_loss": 1.67694091796875, "eval_runtime": 8.6975, "eval_samples_per_second": 539.58, "eval_steps_per_second": 67.491, "step": 776000 }, { "epoch": 177.97, "learning_rate": 3.396452030822652e-05, "loss": 0.8335, "step": 776500 }, { "epoch": 177.97, "eval_loss": 1.657251238822937, "eval_runtime": 8.6888, "eval_samples_per_second": 540.124, "eval_steps_per_second": 67.559, "step": 776500 }, { "epoch": 178.09, "learning_rate": 3.3941176633580096e-05, "loss": 0.826, "step": 777000 }, { "epoch": 178.09, "eval_loss": 1.6626018285751343, "eval_runtime": 8.6838, "eval_samples_per_second": 540.435, "eval_steps_per_second": 67.598, "step": 777000 }, { "epoch": 178.2, "learning_rate": 3.391782401643334e-05, "loss": 0.8227, "step": 777500 }, { "epoch": 178.2, "eval_loss": 1.6756612062454224, "eval_runtime": 8.6903, "eval_samples_per_second": 540.025, "eval_steps_per_second": 67.546, "step": 777500 }, { "epoch": 178.32, "learning_rate": 3.389446248014233e-05, "loss": 0.8286, "step": 778000 }, { "epoch": 178.32, "eval_loss": 1.6686533689498901, "eval_runtime": 8.6971, "eval_samples_per_second": 539.603, "eval_steps_per_second": 67.493, "step": 778000 }, { "epoch": 178.43, "learning_rate": 3.387109204807206e-05, "loss": 0.831, "step": 778500 }, { "epoch": 178.43, "eval_loss": 1.6711094379425049, "eval_runtime": 8.6899, "eval_samples_per_second": 540.053, "eval_steps_per_second": 67.55, "step": 778500 }, { "epoch": 178.55, "learning_rate": 3.384771274359642e-05, "loss": 0.8244, "step": 779000 }, { "epoch": 178.55, "eval_loss": 1.661521553993225, "eval_runtime": 8.6918, "eval_samples_per_second": 539.935, "eval_steps_per_second": 67.535, "step": 779000 }, { "epoch": 178.66, "learning_rate": 3.382432459009818e-05, "loss": 0.8308, "step": 779500 }, { "epoch": 178.66, "eval_loss": 1.6521097421646118, "eval_runtime": 8.6931, "eval_samples_per_second": 539.855, "eval_steps_per_second": 67.525, "step": 779500 }, { "epoch": 178.78, "learning_rate": 3.3800927610968955e-05, "loss": 0.8395, "step": 780000 }, { "epoch": 178.78, "eval_loss": 1.6626859903335571, "eval_runtime": 8.6879, "eval_samples_per_second": 540.176, "eval_steps_per_second": 67.565, "step": 780000 }, { "epoch": 178.89, "learning_rate": 3.3777521829609185e-05, "loss": 0.8332, "step": 780500 }, { "epoch": 178.89, "eval_loss": 1.6493417024612427, "eval_runtime": 8.6921, "eval_samples_per_second": 539.917, "eval_steps_per_second": 67.533, "step": 780500 }, { "epoch": 179.01, "learning_rate": 3.375410726942811e-05, "loss": 0.8418, "step": 781000 }, { "epoch": 179.01, "eval_loss": 1.6525682210922241, "eval_runtime": 8.6947, "eval_samples_per_second": 539.751, "eval_steps_per_second": 67.512, "step": 781000 }, { "epoch": 179.12, "learning_rate": 3.373068395384377e-05, "loss": 0.8276, "step": 781500 }, { "epoch": 179.12, "eval_loss": 1.6718677282333374, "eval_runtime": 8.6892, "eval_samples_per_second": 540.093, "eval_steps_per_second": 67.555, "step": 781500 }, { "epoch": 179.23, "learning_rate": 3.370725190628294e-05, "loss": 0.8359, "step": 782000 }, { "epoch": 179.23, "eval_loss": 1.6742792129516602, "eval_runtime": 8.6938, "eval_samples_per_second": 539.811, "eval_steps_per_second": 67.52, "step": 782000 }, { "epoch": 179.35, "learning_rate": 3.368381115018114e-05, "loss": 0.8423, "step": 782500 }, { "epoch": 179.35, "eval_loss": 1.6501457691192627, "eval_runtime": 8.6984, "eval_samples_per_second": 539.523, "eval_steps_per_second": 67.483, "step": 782500 }, { "epoch": 179.46, "learning_rate": 3.3660361708982596e-05, "loss": 0.8301, "step": 783000 }, { "epoch": 179.46, "eval_loss": 1.6624658107757568, "eval_runtime": 8.6924, "eval_samples_per_second": 539.894, "eval_steps_per_second": 67.53, "step": 783000 }, { "epoch": 179.58, "learning_rate": 3.363690360614022e-05, "loss": 0.8246, "step": 783500 }, { "epoch": 179.58, "eval_loss": 1.6642955541610718, "eval_runtime": 8.693, "eval_samples_per_second": 539.859, "eval_steps_per_second": 67.526, "step": 783500 }, { "epoch": 179.69, "learning_rate": 3.361343686511559e-05, "loss": 0.835, "step": 784000 }, { "epoch": 179.69, "eval_loss": 1.6800591945648193, "eval_runtime": 8.6933, "eval_samples_per_second": 539.839, "eval_steps_per_second": 67.523, "step": 784000 }, { "epoch": 179.81, "learning_rate": 3.358996150937891e-05, "loss": 0.8328, "step": 784500 }, { "epoch": 179.81, "eval_loss": 1.668575406074524, "eval_runtime": 8.7081, "eval_samples_per_second": 538.923, "eval_steps_per_second": 67.408, "step": 784500 }, { "epoch": 179.92, "learning_rate": 3.356647756240903e-05, "loss": 0.8357, "step": 785000 }, { "epoch": 179.92, "eval_loss": 1.655759334564209, "eval_runtime": 8.6962, "eval_samples_per_second": 539.662, "eval_steps_per_second": 67.501, "step": 785000 }, { "epoch": 180.04, "learning_rate": 3.3542985047693355e-05, "loss": 0.8287, "step": 785500 }, { "epoch": 180.04, "eval_loss": 1.6449671983718872, "eval_runtime": 8.6954, "eval_samples_per_second": 539.712, "eval_steps_per_second": 67.507, "step": 785500 }, { "epoch": 180.15, "learning_rate": 3.351948398872789e-05, "loss": 0.8242, "step": 786000 }, { "epoch": 180.15, "eval_loss": 1.6611863374710083, "eval_runtime": 8.6926, "eval_samples_per_second": 539.882, "eval_steps_per_second": 67.528, "step": 786000 }, { "epoch": 180.27, "learning_rate": 3.3495974409017175e-05, "loss": 0.823, "step": 786500 }, { "epoch": 180.27, "eval_loss": 1.6659575700759888, "eval_runtime": 8.6896, "eval_samples_per_second": 540.07, "eval_steps_per_second": 67.552, "step": 786500 }, { "epoch": 180.38, "learning_rate": 3.347245633207426e-05, "loss": 0.8302, "step": 787000 }, { "epoch": 180.38, "eval_loss": 1.656981110572815, "eval_runtime": 8.689, "eval_samples_per_second": 540.108, "eval_steps_per_second": 67.557, "step": 787000 }, { "epoch": 180.5, "learning_rate": 3.344892978142071e-05, "loss": 0.8256, "step": 787500 }, { "epoch": 180.5, "eval_loss": 1.6739863157272339, "eval_runtime": 8.6952, "eval_samples_per_second": 539.722, "eval_steps_per_second": 67.508, "step": 787500 }, { "epoch": 180.61, "learning_rate": 3.342539478058655e-05, "loss": 0.8278, "step": 788000 }, { "epoch": 180.61, "eval_loss": 1.67192542552948, "eval_runtime": 8.6936, "eval_samples_per_second": 539.821, "eval_steps_per_second": 67.521, "step": 788000 }, { "epoch": 180.72, "learning_rate": 3.340185135311028e-05, "loss": 0.8311, "step": 788500 }, { "epoch": 180.72, "eval_loss": 1.6716740131378174, "eval_runtime": 8.6886, "eval_samples_per_second": 540.13, "eval_steps_per_second": 67.559, "step": 788500 }, { "epoch": 180.84, "learning_rate": 3.33782995225388e-05, "loss": 0.8271, "step": 789000 }, { "epoch": 180.84, "eval_loss": 1.6679131984710693, "eval_runtime": 8.6975, "eval_samples_per_second": 539.583, "eval_steps_per_second": 67.491, "step": 789000 }, { "epoch": 180.95, "learning_rate": 3.335473931242743e-05, "loss": 0.8349, "step": 789500 }, { "epoch": 180.95, "eval_loss": 1.6587533950805664, "eval_runtime": 8.6889, "eval_samples_per_second": 540.117, "eval_steps_per_second": 67.558, "step": 789500 }, { "epoch": 181.07, "learning_rate": 3.333117074633988e-05, "loss": 0.8272, "step": 790000 }, { "epoch": 181.07, "eval_loss": 1.6765450239181519, "eval_runtime": 8.6953, "eval_samples_per_second": 539.719, "eval_steps_per_second": 67.508, "step": 790000 }, { "epoch": 181.18, "learning_rate": 3.330759384784819e-05, "loss": 0.8138, "step": 790500 }, { "epoch": 181.18, "eval_loss": 1.6804587841033936, "eval_runtime": 8.6944, "eval_samples_per_second": 539.776, "eval_steps_per_second": 67.515, "step": 790500 }, { "epoch": 181.3, "learning_rate": 3.328400864053276e-05, "loss": 0.8232, "step": 791000 }, { "epoch": 181.3, "eval_loss": 1.679307460784912, "eval_runtime": 8.6926, "eval_samples_per_second": 539.883, "eval_steps_per_second": 67.529, "step": 791000 }, { "epoch": 181.41, "learning_rate": 3.326041514798227e-05, "loss": 0.8277, "step": 791500 }, { "epoch": 181.41, "eval_loss": 1.670904517173767, "eval_runtime": 8.6911, "eval_samples_per_second": 539.98, "eval_steps_per_second": 67.541, "step": 791500 }, { "epoch": 181.53, "learning_rate": 3.323681339379374e-05, "loss": 0.8224, "step": 792000 }, { "epoch": 181.53, "eval_loss": 1.6841404438018799, "eval_runtime": 8.6907, "eval_samples_per_second": 540.003, "eval_steps_per_second": 67.544, "step": 792000 }, { "epoch": 181.64, "learning_rate": 3.321320340157238e-05, "loss": 0.8274, "step": 792500 }, { "epoch": 181.64, "eval_loss": 1.673904299736023, "eval_runtime": 8.6938, "eval_samples_per_second": 539.81, "eval_steps_per_second": 67.519, "step": 792500 }, { "epoch": 181.76, "learning_rate": 3.318958519493171e-05, "loss": 0.8218, "step": 793000 }, { "epoch": 181.76, "eval_loss": 1.650976300239563, "eval_runtime": 8.6953, "eval_samples_per_second": 539.717, "eval_steps_per_second": 67.508, "step": 793000 }, { "epoch": 181.87, "learning_rate": 3.3165958797493404e-05, "loss": 0.8338, "step": 793500 }, { "epoch": 181.87, "eval_loss": 1.6757090091705322, "eval_runtime": 8.7004, "eval_samples_per_second": 539.399, "eval_steps_per_second": 67.468, "step": 793500 }, { "epoch": 181.98, "learning_rate": 3.3142324232887374e-05, "loss": 0.831, "step": 794000 }, { "epoch": 181.98, "eval_loss": 1.6376826763153076, "eval_runtime": 8.6948, "eval_samples_per_second": 539.75, "eval_steps_per_second": 67.512, "step": 794000 }, { "epoch": 182.1, "learning_rate": 3.311868152475169e-05, "loss": 0.8239, "step": 794500 }, { "epoch": 182.1, "eval_loss": 1.653681993484497, "eval_runtime": 8.6978, "eval_samples_per_second": 539.562, "eval_steps_per_second": 67.488, "step": 794500 }, { "epoch": 182.21, "learning_rate": 3.309503069673254e-05, "loss": 0.8241, "step": 795000 }, { "epoch": 182.21, "eval_loss": 1.6491385698318481, "eval_runtime": 8.6942, "eval_samples_per_second": 539.782, "eval_steps_per_second": 67.516, "step": 795000 }, { "epoch": 182.33, "learning_rate": 3.307137177248427e-05, "loss": 0.8231, "step": 795500 }, { "epoch": 182.33, "eval_loss": 1.6692581176757812, "eval_runtime": 8.6867, "eval_samples_per_second": 540.25, "eval_steps_per_second": 67.574, "step": 795500 }, { "epoch": 182.44, "learning_rate": 3.30477047756693e-05, "loss": 0.8245, "step": 796000 }, { "epoch": 182.44, "eval_loss": 1.6736787557601929, "eval_runtime": 8.6849, "eval_samples_per_second": 540.362, "eval_steps_per_second": 67.588, "step": 796000 }, { "epoch": 182.56, "learning_rate": 3.3024029729958134e-05, "loss": 0.8219, "step": 796500 }, { "epoch": 182.56, "eval_loss": 1.6766908168792725, "eval_runtime": 8.688, "eval_samples_per_second": 540.167, "eval_steps_per_second": 67.564, "step": 796500 }, { "epoch": 182.67, "learning_rate": 3.300034665902931e-05, "loss": 0.8259, "step": 797000 }, { "epoch": 182.67, "eval_loss": 1.673221230506897, "eval_runtime": 8.7002, "eval_samples_per_second": 539.413, "eval_steps_per_second": 67.47, "step": 797000 }, { "epoch": 182.79, "learning_rate": 3.2976655586569424e-05, "loss": 0.828, "step": 797500 }, { "epoch": 182.79, "eval_loss": 1.6834449768066406, "eval_runtime": 8.6975, "eval_samples_per_second": 539.581, "eval_steps_per_second": 67.491, "step": 797500 }, { "epoch": 182.9, "learning_rate": 3.295295653627305e-05, "loss": 0.8352, "step": 798000 }, { "epoch": 182.9, "eval_loss": 1.6574147939682007, "eval_runtime": 8.6995, "eval_samples_per_second": 539.457, "eval_steps_per_second": 67.475, "step": 798000 }, { "epoch": 183.02, "learning_rate": 3.292924953184274e-05, "loss": 0.8269, "step": 798500 }, { "epoch": 183.02, "eval_loss": 1.6699882745742798, "eval_runtime": 8.6875, "eval_samples_per_second": 540.202, "eval_steps_per_second": 67.568, "step": 798500 }, { "epoch": 183.13, "learning_rate": 3.290553459698903e-05, "loss": 0.8186, "step": 799000 }, { "epoch": 183.13, "eval_loss": 1.6673717498779297, "eval_runtime": 8.6929, "eval_samples_per_second": 539.866, "eval_steps_per_second": 67.526, "step": 799000 }, { "epoch": 183.25, "learning_rate": 3.288181175543033e-05, "loss": 0.8234, "step": 799500 }, { "epoch": 183.25, "eval_loss": 1.6724622249603271, "eval_runtime": 8.6944, "eval_samples_per_second": 539.775, "eval_steps_per_second": 67.515, "step": 799500 }, { "epoch": 183.36, "learning_rate": 3.285808103089301e-05, "loss": 0.8231, "step": 800000 }, { "epoch": 183.36, "eval_loss": 1.6637797355651855, "eval_runtime": 8.6888, "eval_samples_per_second": 540.121, "eval_steps_per_second": 67.558, "step": 800000 }, { "epoch": 183.47, "learning_rate": 3.283434244711132e-05, "loss": 0.8195, "step": 800500 }, { "epoch": 183.47, "eval_loss": 1.6700588464736938, "eval_runtime": 8.7003, "eval_samples_per_second": 539.407, "eval_steps_per_second": 67.469, "step": 800500 }, { "epoch": 183.59, "learning_rate": 3.2810596027827346e-05, "loss": 0.825, "step": 801000 }, { "epoch": 183.59, "eval_loss": 1.6776975393295288, "eval_runtime": 8.7006, "eval_samples_per_second": 539.387, "eval_steps_per_second": 67.466, "step": 801000 }, { "epoch": 183.7, "learning_rate": 3.278684179679103e-05, "loss": 0.8271, "step": 801500 }, { "epoch": 183.7, "eval_loss": 1.647273063659668, "eval_runtime": 8.7046, "eval_samples_per_second": 539.138, "eval_steps_per_second": 67.435, "step": 801500 }, { "epoch": 183.82, "learning_rate": 3.27630797777601e-05, "loss": 0.8246, "step": 802000 }, { "epoch": 183.82, "eval_loss": 1.6542545557022095, "eval_runtime": 8.6964, "eval_samples_per_second": 539.647, "eval_steps_per_second": 67.499, "step": 802000 }, { "epoch": 183.93, "learning_rate": 3.2739309994500114e-05, "loss": 0.8291, "step": 802500 }, { "epoch": 183.93, "eval_loss": 1.6534409523010254, "eval_runtime": 8.6896, "eval_samples_per_second": 540.073, "eval_steps_per_second": 67.552, "step": 802500 }, { "epoch": 184.05, "learning_rate": 3.271553247078437e-05, "loss": 0.8249, "step": 803000 }, { "epoch": 184.05, "eval_loss": 1.6720616817474365, "eval_runtime": 8.7124, "eval_samples_per_second": 538.66, "eval_steps_per_second": 67.376, "step": 803000 }, { "epoch": 184.16, "learning_rate": 3.269174723039391e-05, "loss": 0.8091, "step": 803500 }, { "epoch": 184.16, "eval_loss": 1.6606301069259644, "eval_runtime": 8.7012, "eval_samples_per_second": 539.352, "eval_steps_per_second": 67.462, "step": 803500 }, { "epoch": 184.28, "learning_rate": 3.2667954297117495e-05, "loss": 0.8174, "step": 804000 }, { "epoch": 184.28, "eval_loss": 1.6665774583816528, "eval_runtime": 8.6924, "eval_samples_per_second": 539.899, "eval_steps_per_second": 67.531, "step": 804000 }, { "epoch": 184.39, "learning_rate": 3.2644153694751575e-05, "loss": 0.8248, "step": 804500 }, { "epoch": 184.39, "eval_loss": 1.670276403427124, "eval_runtime": 8.6973, "eval_samples_per_second": 539.591, "eval_steps_per_second": 67.492, "step": 804500 }, { "epoch": 184.51, "learning_rate": 3.262034544710029e-05, "loss": 0.8229, "step": 805000 }, { "epoch": 184.51, "eval_loss": 1.6539345979690552, "eval_runtime": 8.6997, "eval_samples_per_second": 539.443, "eval_steps_per_second": 67.473, "step": 805000 }, { "epoch": 184.62, "learning_rate": 3.2596529577975396e-05, "loss": 0.8207, "step": 805500 }, { "epoch": 184.62, "eval_loss": 1.6820614337921143, "eval_runtime": 8.6955, "eval_samples_per_second": 539.706, "eval_steps_per_second": 67.506, "step": 805500 }, { "epoch": 184.74, "learning_rate": 3.2572706111196294e-05, "loss": 0.8239, "step": 806000 }, { "epoch": 184.74, "eval_loss": 1.6710187196731567, "eval_runtime": 8.7001, "eval_samples_per_second": 539.417, "eval_steps_per_second": 67.47, "step": 806000 }, { "epoch": 184.85, "learning_rate": 3.2548875070589985e-05, "loss": 0.8249, "step": 806500 }, { "epoch": 184.85, "eval_loss": 1.6577794551849365, "eval_runtime": 8.6943, "eval_samples_per_second": 539.777, "eval_steps_per_second": 67.515, "step": 806500 }, { "epoch": 184.96, "learning_rate": 3.2525036479991036e-05, "loss": 0.8221, "step": 807000 }, { "epoch": 184.96, "eval_loss": 1.683269739151001, "eval_runtime": 8.6934, "eval_samples_per_second": 539.833, "eval_steps_per_second": 67.522, "step": 807000 }, { "epoch": 185.08, "learning_rate": 3.250119036324156e-05, "loss": 0.8191, "step": 807500 }, { "epoch": 185.08, "eval_loss": 1.6964691877365112, "eval_runtime": 8.7073, "eval_samples_per_second": 538.972, "eval_steps_per_second": 67.415, "step": 807500 }, { "epoch": 185.19, "learning_rate": 3.247733674419121e-05, "loss": 0.8252, "step": 808000 }, { "epoch": 185.19, "eval_loss": 1.6716361045837402, "eval_runtime": 8.7018, "eval_samples_per_second": 539.316, "eval_steps_per_second": 67.458, "step": 808000 }, { "epoch": 185.31, "learning_rate": 3.245347564669712e-05, "loss": 0.8183, "step": 808500 }, { "epoch": 185.31, "eval_loss": 1.6529775857925415, "eval_runtime": 8.7007, "eval_samples_per_second": 539.379, "eval_steps_per_second": 67.466, "step": 808500 }, { "epoch": 185.42, "learning_rate": 3.242960709462395e-05, "loss": 0.824, "step": 809000 }, { "epoch": 185.42, "eval_loss": 1.6793829202651978, "eval_runtime": 8.6949, "eval_samples_per_second": 539.74, "eval_steps_per_second": 67.511, "step": 809000 }, { "epoch": 185.54, "learning_rate": 3.240573111184376e-05, "loss": 0.8185, "step": 809500 }, { "epoch": 185.54, "eval_loss": 1.6874668598175049, "eval_runtime": 8.6953, "eval_samples_per_second": 539.716, "eval_steps_per_second": 67.508, "step": 809500 }, { "epoch": 185.65, "learning_rate": 3.2381847722236084e-05, "loss": 0.8146, "step": 810000 }, { "epoch": 185.65, "eval_loss": 1.6652933359146118, "eval_runtime": 8.709, "eval_samples_per_second": 538.866, "eval_steps_per_second": 67.401, "step": 810000 }, { "epoch": 185.77, "learning_rate": 3.2357956949687835e-05, "loss": 0.8253, "step": 810500 }, { "epoch": 185.77, "eval_loss": 1.6663633584976196, "eval_runtime": 8.6919, "eval_samples_per_second": 539.926, "eval_steps_per_second": 67.534, "step": 810500 }, { "epoch": 185.88, "learning_rate": 3.233405881809333e-05, "loss": 0.8209, "step": 811000 }, { "epoch": 185.88, "eval_loss": 1.6608340740203857, "eval_runtime": 8.696, "eval_samples_per_second": 539.673, "eval_steps_per_second": 67.502, "step": 811000 }, { "epoch": 186.0, "learning_rate": 3.231015335135424e-05, "loss": 0.823, "step": 811500 }, { "epoch": 186.0, "eval_loss": 1.6543347835540771, "eval_runtime": 8.6894, "eval_samples_per_second": 540.086, "eval_steps_per_second": 67.554, "step": 811500 }, { "epoch": 186.11, "learning_rate": 3.2286240573379565e-05, "loss": 0.8181, "step": 812000 }, { "epoch": 186.11, "eval_loss": 1.659487009048462, "eval_runtime": 8.694, "eval_samples_per_second": 539.799, "eval_steps_per_second": 67.518, "step": 812000 }, { "epoch": 186.23, "learning_rate": 3.226232050808562e-05, "loss": 0.8172, "step": 812500 }, { "epoch": 186.23, "eval_loss": 1.6513561010360718, "eval_runtime": 8.6961, "eval_samples_per_second": 539.667, "eval_steps_per_second": 67.502, "step": 812500 }, { "epoch": 186.34, "learning_rate": 3.223839317939602e-05, "loss": 0.8178, "step": 813000 }, { "epoch": 186.34, "eval_loss": 1.6929652690887451, "eval_runtime": 8.6915, "eval_samples_per_second": 539.952, "eval_steps_per_second": 67.537, "step": 813000 }, { "epoch": 186.45, "learning_rate": 3.221445861124164e-05, "loss": 0.8253, "step": 813500 }, { "epoch": 186.45, "eval_loss": 1.6688289642333984, "eval_runtime": 8.6947, "eval_samples_per_second": 539.754, "eval_steps_per_second": 67.512, "step": 813500 }, { "epoch": 186.57, "learning_rate": 3.2190516827560575e-05, "loss": 0.8156, "step": 814000 }, { "epoch": 186.57, "eval_loss": 1.667924165725708, "eval_runtime": 8.6958, "eval_samples_per_second": 539.683, "eval_steps_per_second": 67.503, "step": 814000 }, { "epoch": 186.68, "learning_rate": 3.2166567852298166e-05, "loss": 0.8257, "step": 814500 }, { "epoch": 186.68, "eval_loss": 1.6542818546295166, "eval_runtime": 8.7024, "eval_samples_per_second": 539.278, "eval_steps_per_second": 67.453, "step": 814500 }, { "epoch": 186.8, "learning_rate": 3.214261170940693e-05, "loss": 0.8157, "step": 815000 }, { "epoch": 186.8, "eval_loss": 1.6560546159744263, "eval_runtime": 8.6897, "eval_samples_per_second": 540.067, "eval_steps_per_second": 67.552, "step": 815000 }, { "epoch": 186.91, "learning_rate": 3.211864842284656e-05, "loss": 0.8217, "step": 815500 }, { "epoch": 186.91, "eval_loss": 1.6597298383712769, "eval_runtime": 8.6909, "eval_samples_per_second": 539.99, "eval_steps_per_second": 67.542, "step": 815500 }, { "epoch": 187.03, "learning_rate": 3.209467801658388e-05, "loss": 0.8178, "step": 816000 }, { "epoch": 187.03, "eval_loss": 1.668703556060791, "eval_runtime": 8.6936, "eval_samples_per_second": 539.822, "eval_steps_per_second": 67.521, "step": 816000 }, { "epoch": 187.14, "learning_rate": 3.2070700514592856e-05, "loss": 0.8158, "step": 816500 }, { "epoch": 187.14, "eval_loss": 1.6680394411087036, "eval_runtime": 8.6939, "eval_samples_per_second": 539.802, "eval_steps_per_second": 67.518, "step": 816500 }, { "epoch": 187.26, "learning_rate": 3.204671594085453e-05, "loss": 0.8158, "step": 817000 }, { "epoch": 187.26, "eval_loss": 1.671481966972351, "eval_runtime": 8.6944, "eval_samples_per_second": 539.776, "eval_steps_per_second": 67.515, "step": 817000 }, { "epoch": 187.37, "learning_rate": 3.202272431935704e-05, "loss": 0.8173, "step": 817500 }, { "epoch": 187.37, "eval_loss": 1.6933059692382812, "eval_runtime": 8.6963, "eval_samples_per_second": 539.654, "eval_steps_per_second": 67.5, "step": 817500 }, { "epoch": 187.49, "learning_rate": 3.1998725674095556e-05, "loss": 0.819, "step": 818000 }, { "epoch": 187.49, "eval_loss": 1.6961278915405273, "eval_runtime": 8.6909, "eval_samples_per_second": 539.989, "eval_steps_per_second": 67.542, "step": 818000 }, { "epoch": 187.6, "learning_rate": 3.1974720029072257e-05, "loss": 0.8139, "step": 818500 }, { "epoch": 187.6, "eval_loss": 1.6692328453063965, "eval_runtime": 8.6901, "eval_samples_per_second": 540.039, "eval_steps_per_second": 67.548, "step": 818500 }, { "epoch": 187.71, "learning_rate": 3.195070740829637e-05, "loss": 0.8265, "step": 819000 }, { "epoch": 187.71, "eval_loss": 1.679097294807434, "eval_runtime": 8.6915, "eval_samples_per_second": 539.953, "eval_steps_per_second": 67.537, "step": 819000 }, { "epoch": 187.83, "learning_rate": 3.192668783578405e-05, "loss": 0.8221, "step": 819500 }, { "epoch": 187.83, "eval_loss": 1.6887224912643433, "eval_runtime": 8.6939, "eval_samples_per_second": 539.804, "eval_steps_per_second": 67.519, "step": 819500 }, { "epoch": 187.94, "learning_rate": 3.190266133555844e-05, "loss": 0.8204, "step": 820000 }, { "epoch": 187.94, "eval_loss": 1.6592334508895874, "eval_runtime": 8.6932, "eval_samples_per_second": 539.848, "eval_steps_per_second": 67.524, "step": 820000 }, { "epoch": 188.06, "learning_rate": 3.187862793164958e-05, "loss": 0.8176, "step": 820500 }, { "epoch": 188.06, "eval_loss": 1.690359354019165, "eval_runtime": 8.6961, "eval_samples_per_second": 539.669, "eval_steps_per_second": 67.502, "step": 820500 }, { "epoch": 188.17, "learning_rate": 3.185458764809445e-05, "loss": 0.814, "step": 821000 }, { "epoch": 188.17, "eval_loss": 1.6716324090957642, "eval_runtime": 8.6914, "eval_samples_per_second": 539.961, "eval_steps_per_second": 67.538, "step": 821000 }, { "epoch": 188.29, "learning_rate": 3.183054050893688e-05, "loss": 0.8062, "step": 821500 }, { "epoch": 188.29, "eval_loss": 1.6801156997680664, "eval_runtime": 8.6915, "eval_samples_per_second": 539.955, "eval_steps_per_second": 67.538, "step": 821500 }, { "epoch": 188.4, "learning_rate": 3.180648653822758e-05, "loss": 0.8134, "step": 822000 }, { "epoch": 188.4, "eval_loss": 1.6665304899215698, "eval_runtime": 8.6937, "eval_samples_per_second": 539.817, "eval_steps_per_second": 67.52, "step": 822000 }, { "epoch": 188.52, "learning_rate": 3.1782425760024074e-05, "loss": 0.8152, "step": 822500 }, { "epoch": 188.52, "eval_loss": 1.6643030643463135, "eval_runtime": 8.6906, "eval_samples_per_second": 540.011, "eval_steps_per_second": 67.544, "step": 822500 }, { "epoch": 188.63, "learning_rate": 3.17583581983907e-05, "loss": 0.8137, "step": 823000 }, { "epoch": 188.63, "eval_loss": 1.6929978132247925, "eval_runtime": 8.6927, "eval_samples_per_second": 539.879, "eval_steps_per_second": 67.528, "step": 823000 }, { "epoch": 188.75, "learning_rate": 3.173428387739858e-05, "loss": 0.8152, "step": 823500 }, { "epoch": 188.75, "eval_loss": 1.6588995456695557, "eval_runtime": 8.6946, "eval_samples_per_second": 539.757, "eval_steps_per_second": 67.513, "step": 823500 }, { "epoch": 188.86, "learning_rate": 3.1710202821125623e-05, "loss": 0.8218, "step": 824000 }, { "epoch": 188.86, "eval_loss": 1.6523231267929077, "eval_runtime": 8.6923, "eval_samples_per_second": 539.901, "eval_steps_per_second": 67.531, "step": 824000 }, { "epoch": 188.98, "learning_rate": 3.1686115053656416e-05, "loss": 0.8305, "step": 824500 }, { "epoch": 188.98, "eval_loss": 1.6643202304840088, "eval_runtime": 8.6991, "eval_samples_per_second": 539.482, "eval_steps_per_second": 67.478, "step": 824500 }, { "epoch": 189.09, "learning_rate": 3.166202059908232e-05, "loss": 0.8081, "step": 825000 }, { "epoch": 189.09, "eval_loss": 1.6629326343536377, "eval_runtime": 8.6973, "eval_samples_per_second": 539.59, "eval_steps_per_second": 67.492, "step": 825000 }, { "epoch": 189.2, "learning_rate": 3.163791948150134e-05, "loss": 0.8088, "step": 825500 }, { "epoch": 189.2, "eval_loss": 1.6732749938964844, "eval_runtime": 8.697, "eval_samples_per_second": 539.609, "eval_steps_per_second": 67.494, "step": 825500 }, { "epoch": 189.32, "learning_rate": 3.161381172501818e-05, "loss": 0.8062, "step": 826000 }, { "epoch": 189.32, "eval_loss": 1.6552104949951172, "eval_runtime": 8.6937, "eval_samples_per_second": 539.815, "eval_steps_per_second": 67.52, "step": 826000 }, { "epoch": 189.43, "learning_rate": 3.158969735374414e-05, "loss": 0.8132, "step": 826500 }, { "epoch": 189.43, "eval_loss": 1.6730916500091553, "eval_runtime": 8.6994, "eval_samples_per_second": 539.463, "eval_steps_per_second": 67.476, "step": 826500 }, { "epoch": 189.55, "learning_rate": 3.156557639179718e-05, "loss": 0.8111, "step": 827000 }, { "epoch": 189.55, "eval_loss": 1.6512221097946167, "eval_runtime": 8.696, "eval_samples_per_second": 539.674, "eval_steps_per_second": 67.502, "step": 827000 }, { "epoch": 189.66, "learning_rate": 3.154144886330183e-05, "loss": 0.8128, "step": 827500 }, { "epoch": 189.66, "eval_loss": 1.6621840000152588, "eval_runtime": 8.6927, "eval_samples_per_second": 539.876, "eval_steps_per_second": 67.528, "step": 827500 }, { "epoch": 189.78, "learning_rate": 3.151731479238919e-05, "loss": 0.8237, "step": 828000 }, { "epoch": 189.78, "eval_loss": 1.6917556524276733, "eval_runtime": 8.6921, "eval_samples_per_second": 539.914, "eval_steps_per_second": 67.532, "step": 828000 }, { "epoch": 189.89, "learning_rate": 3.149317420319689e-05, "loss": 0.8273, "step": 828500 }, { "epoch": 189.89, "eval_loss": 1.676473617553711, "eval_runtime": 8.6941, "eval_samples_per_second": 539.793, "eval_steps_per_second": 67.517, "step": 828500 }, { "epoch": 190.01, "learning_rate": 3.146902711986911e-05, "loss": 0.8209, "step": 829000 }, { "epoch": 190.01, "eval_loss": 1.6833518743515015, "eval_runtime": 8.691, "eval_samples_per_second": 539.985, "eval_steps_per_second": 67.541, "step": 829000 }, { "epoch": 190.12, "learning_rate": 3.144487356655651e-05, "loss": 0.8054, "step": 829500 }, { "epoch": 190.12, "eval_loss": 1.6572078466415405, "eval_runtime": 8.69, "eval_samples_per_second": 540.047, "eval_steps_per_second": 67.549, "step": 829500 }, { "epoch": 190.24, "learning_rate": 3.142071356741621e-05, "loss": 0.8088, "step": 830000 }, { "epoch": 190.24, "eval_loss": 1.6794331073760986, "eval_runtime": 8.7043, "eval_samples_per_second": 539.157, "eval_steps_per_second": 67.438, "step": 830000 }, { "epoch": 190.35, "learning_rate": 3.139654714661178e-05, "loss": 0.8132, "step": 830500 }, { "epoch": 190.35, "eval_loss": 1.6662617921829224, "eval_runtime": 8.6973, "eval_samples_per_second": 539.593, "eval_steps_per_second": 67.492, "step": 830500 }, { "epoch": 190.47, "learning_rate": 3.137237432831324e-05, "loss": 0.8146, "step": 831000 }, { "epoch": 190.47, "eval_loss": 1.6977335214614868, "eval_runtime": 8.6917, "eval_samples_per_second": 539.942, "eval_steps_per_second": 67.536, "step": 831000 }, { "epoch": 190.58, "learning_rate": 3.134819513669697e-05, "loss": 0.8143, "step": 831500 }, { "epoch": 190.58, "eval_loss": 1.6655166149139404, "eval_runtime": 8.6912, "eval_samples_per_second": 539.971, "eval_steps_per_second": 67.54, "step": 831500 }, { "epoch": 190.69, "learning_rate": 3.132400959594574e-05, "loss": 0.8114, "step": 832000 }, { "epoch": 190.69, "eval_loss": 1.6710119247436523, "eval_runtime": 8.6981, "eval_samples_per_second": 539.542, "eval_steps_per_second": 67.486, "step": 832000 }, { "epoch": 190.81, "learning_rate": 3.1299817730248674e-05, "loss": 0.8198, "step": 832500 }, { "epoch": 190.81, "eval_loss": 1.6513673067092896, "eval_runtime": 8.708, "eval_samples_per_second": 538.927, "eval_steps_per_second": 67.409, "step": 832500 }, { "epoch": 190.92, "learning_rate": 3.127561956380123e-05, "loss": 0.8118, "step": 833000 }, { "epoch": 190.92, "eval_loss": 1.6586047410964966, "eval_runtime": 8.6959, "eval_samples_per_second": 539.677, "eval_steps_per_second": 67.503, "step": 833000 }, { "epoch": 191.04, "learning_rate": 3.125141512080514e-05, "loss": 0.8144, "step": 833500 }, { "epoch": 191.04, "eval_loss": 1.6907293796539307, "eval_runtime": 8.6929, "eval_samples_per_second": 539.864, "eval_steps_per_second": 67.526, "step": 833500 }, { "epoch": 191.15, "learning_rate": 3.122720442546844e-05, "loss": 0.8062, "step": 834000 }, { "epoch": 191.15, "eval_loss": 1.6791356801986694, "eval_runtime": 8.7055, "eval_samples_per_second": 539.086, "eval_steps_per_second": 67.429, "step": 834000 }, { "epoch": 191.27, "learning_rate": 3.120298750200542e-05, "loss": 0.8088, "step": 834500 }, { "epoch": 191.27, "eval_loss": 1.6858444213867188, "eval_runtime": 8.692, "eval_samples_per_second": 539.924, "eval_steps_per_second": 67.534, "step": 834500 }, { "epoch": 191.38, "learning_rate": 3.117876437463656e-05, "loss": 0.8069, "step": 835000 }, { "epoch": 191.38, "eval_loss": 1.6647320985794067, "eval_runtime": 8.6921, "eval_samples_per_second": 539.914, "eval_steps_per_second": 67.532, "step": 835000 }, { "epoch": 191.5, "learning_rate": 3.11545350675886e-05, "loss": 0.8081, "step": 835500 }, { "epoch": 191.5, "eval_loss": 1.6740485429763794, "eval_runtime": 8.6972, "eval_samples_per_second": 539.602, "eval_steps_per_second": 67.493, "step": 835500 }, { "epoch": 191.61, "learning_rate": 3.113029960509442e-05, "loss": 0.8146, "step": 836000 }, { "epoch": 191.61, "eval_loss": 1.6725314855575562, "eval_runtime": 8.6966, "eval_samples_per_second": 539.636, "eval_steps_per_second": 67.498, "step": 836000 }, { "epoch": 191.73, "learning_rate": 3.110605801139308e-05, "loss": 0.8213, "step": 836500 }, { "epoch": 191.73, "eval_loss": 1.6777098178863525, "eval_runtime": 8.6854, "eval_samples_per_second": 540.331, "eval_steps_per_second": 67.585, "step": 836500 }, { "epoch": 191.84, "learning_rate": 3.108181031072975e-05, "loss": 0.8122, "step": 837000 }, { "epoch": 191.84, "eval_loss": 1.6783965826034546, "eval_runtime": 8.6969, "eval_samples_per_second": 539.617, "eval_steps_per_second": 67.495, "step": 837000 }, { "epoch": 191.95, "learning_rate": 3.105755652735573e-05, "loss": 0.8122, "step": 837500 }, { "epoch": 191.95, "eval_loss": 1.6728233098983765, "eval_runtime": 8.6933, "eval_samples_per_second": 539.839, "eval_steps_per_second": 67.523, "step": 837500 }, { "epoch": 192.07, "learning_rate": 3.10332966855284e-05, "loss": 0.8104, "step": 838000 }, { "epoch": 192.07, "eval_loss": 1.6852580308914185, "eval_runtime": 8.7001, "eval_samples_per_second": 539.419, "eval_steps_per_second": 67.47, "step": 838000 }, { "epoch": 192.18, "learning_rate": 3.100903080951117e-05, "loss": 0.8006, "step": 838500 }, { "epoch": 192.18, "eval_loss": 1.6820462942123413, "eval_runtime": 8.6967, "eval_samples_per_second": 539.628, "eval_steps_per_second": 67.497, "step": 838500 }, { "epoch": 192.3, "learning_rate": 3.0984758923573535e-05, "loss": 0.8102, "step": 839000 }, { "epoch": 192.3, "eval_loss": 1.6767605543136597, "eval_runtime": 8.6876, "eval_samples_per_second": 540.193, "eval_steps_per_second": 67.567, "step": 839000 }, { "epoch": 192.41, "learning_rate": 3.096048105199096e-05, "loss": 0.8082, "step": 839500 }, { "epoch": 192.41, "eval_loss": 1.6952568292617798, "eval_runtime": 8.6921, "eval_samples_per_second": 539.917, "eval_steps_per_second": 67.533, "step": 839500 }, { "epoch": 192.53, "learning_rate": 3.093619721904492e-05, "loss": 0.8122, "step": 840000 }, { "epoch": 192.53, "eval_loss": 1.67466402053833, "eval_runtime": 8.6915, "eval_samples_per_second": 539.955, "eval_steps_per_second": 67.537, "step": 840000 }, { "epoch": 192.64, "learning_rate": 3.091190744902283e-05, "loss": 0.8067, "step": 840500 }, { "epoch": 192.64, "eval_loss": 1.6913167238235474, "eval_runtime": 8.6935, "eval_samples_per_second": 539.826, "eval_steps_per_second": 67.521, "step": 840500 }, { "epoch": 192.76, "learning_rate": 3.0887611766218065e-05, "loss": 0.8123, "step": 841000 }, { "epoch": 192.76, "eval_loss": 1.6990282535552979, "eval_runtime": 8.6896, "eval_samples_per_second": 540.073, "eval_steps_per_second": 67.552, "step": 841000 }, { "epoch": 192.87, "learning_rate": 3.086331019492991e-05, "loss": 0.812, "step": 841500 }, { "epoch": 192.87, "eval_loss": 1.6814254522323608, "eval_runtime": 8.6918, "eval_samples_per_second": 539.933, "eval_steps_per_second": 67.535, "step": 841500 }, { "epoch": 192.99, "learning_rate": 3.083900275946351e-05, "loss": 0.8129, "step": 842000 }, { "epoch": 192.99, "eval_loss": 1.6731325387954712, "eval_runtime": 8.6938, "eval_samples_per_second": 539.807, "eval_steps_per_second": 67.519, "step": 842000 }, { "epoch": 193.1, "learning_rate": 3.0814689484129926e-05, "loss": 0.8033, "step": 842500 }, { "epoch": 193.1, "eval_loss": 1.666045069694519, "eval_runtime": 8.6938, "eval_samples_per_second": 539.812, "eval_steps_per_second": 67.52, "step": 842500 }, { "epoch": 193.22, "learning_rate": 3.0790370393246024e-05, "loss": 0.8001, "step": 843000 }, { "epoch": 193.22, "eval_loss": 1.6927074193954468, "eval_runtime": 8.7045, "eval_samples_per_second": 539.147, "eval_steps_per_second": 67.437, "step": 843000 }, { "epoch": 193.33, "learning_rate": 3.076604551113449e-05, "loss": 0.8065, "step": 843500 }, { "epoch": 193.33, "eval_loss": 1.6715892553329468, "eval_runtime": 8.7031, "eval_samples_per_second": 539.233, "eval_steps_per_second": 67.447, "step": 843500 }, { "epoch": 193.44, "learning_rate": 3.0741714862123804e-05, "loss": 0.8086, "step": 844000 }, { "epoch": 193.44, "eval_loss": 1.6679490804672241, "eval_runtime": 8.6993, "eval_samples_per_second": 539.466, "eval_steps_per_second": 67.476, "step": 844000 }, { "epoch": 193.56, "learning_rate": 3.0717378470548234e-05, "loss": 0.804, "step": 844500 }, { "epoch": 193.56, "eval_loss": 1.6712647676467896, "eval_runtime": 8.6966, "eval_samples_per_second": 539.637, "eval_steps_per_second": 67.498, "step": 844500 }, { "epoch": 193.67, "learning_rate": 3.0693036360747765e-05, "loss": 0.807, "step": 845000 }, { "epoch": 193.67, "eval_loss": 1.651455283164978, "eval_runtime": 8.7083, "eval_samples_per_second": 538.911, "eval_steps_per_second": 67.407, "step": 845000 }, { "epoch": 193.79, "learning_rate": 3.06686885570681e-05, "loss": 0.8097, "step": 845500 }, { "epoch": 193.79, "eval_loss": 1.6669743061065674, "eval_runtime": 8.6874, "eval_samples_per_second": 540.21, "eval_steps_per_second": 67.569, "step": 845500 }, { "epoch": 193.9, "learning_rate": 3.0644335083860656e-05, "loss": 0.8022, "step": 846000 }, { "epoch": 193.9, "eval_loss": 1.6706504821777344, "eval_runtime": 8.6925, "eval_samples_per_second": 539.889, "eval_steps_per_second": 67.529, "step": 846000 }, { "epoch": 194.02, "learning_rate": 3.061997596548253e-05, "loss": 0.8143, "step": 846500 }, { "epoch": 194.02, "eval_loss": 1.6737509965896606, "eval_runtime": 8.6938, "eval_samples_per_second": 539.811, "eval_steps_per_second": 67.519, "step": 846500 }, { "epoch": 194.13, "learning_rate": 3.059561122629642e-05, "loss": 0.8052, "step": 847000 }, { "epoch": 194.13, "eval_loss": 1.6813565492630005, "eval_runtime": 8.6957, "eval_samples_per_second": 539.689, "eval_steps_per_second": 67.504, "step": 847000 }, { "epoch": 194.25, "learning_rate": 3.0571240890670674e-05, "loss": 0.8052, "step": 847500 }, { "epoch": 194.25, "eval_loss": 1.703461766242981, "eval_runtime": 8.7099, "eval_samples_per_second": 538.812, "eval_steps_per_second": 67.395, "step": 847500 }, { "epoch": 194.36, "learning_rate": 3.0546864982979254e-05, "loss": 0.804, "step": 848000 }, { "epoch": 194.36, "eval_loss": 1.6596298217773438, "eval_runtime": 8.7051, "eval_samples_per_second": 539.109, "eval_steps_per_second": 67.432, "step": 848000 }, { "epoch": 194.48, "learning_rate": 3.0522483527601656e-05, "loss": 0.8042, "step": 848500 }, { "epoch": 194.48, "eval_loss": 1.6881006956100464, "eval_runtime": 8.6931, "eval_samples_per_second": 539.855, "eval_steps_per_second": 67.525, "step": 848500 }, { "epoch": 194.59, "learning_rate": 3.0498096548922962e-05, "loss": 0.8035, "step": 849000 }, { "epoch": 194.59, "eval_loss": 1.662846326828003, "eval_runtime": 8.7037, "eval_samples_per_second": 539.198, "eval_steps_per_second": 67.443, "step": 849000 }, { "epoch": 194.71, "learning_rate": 3.0473704071333747e-05, "loss": 0.8039, "step": 849500 }, { "epoch": 194.71, "eval_loss": 1.664160132408142, "eval_runtime": 8.7076, "eval_samples_per_second": 538.952, "eval_steps_per_second": 67.412, "step": 849500 }, { "epoch": 194.82, "learning_rate": 3.044930611923011e-05, "loss": 0.8114, "step": 850000 }, { "epoch": 194.82, "eval_loss": 1.661158561706543, "eval_runtime": 8.6994, "eval_samples_per_second": 539.463, "eval_steps_per_second": 67.476, "step": 850000 } ], "max_steps": 872600, "num_train_epochs": 200, "total_flos": 3.5806401315750574e+18, "trial_name": null, "trial_params": null }