|
{ |
|
"best_metric": 2.9959683418273926, |
|
"best_model_checkpoint": "./model_tweets_2020_Q2_75/checkpoint-192000", |
|
"epoch": 20.214101019969846, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 3.1519558429718018, |
|
"eval_runtime": 113.5135, |
|
"eval_samples_per_second": 880.794, |
|
"eval_steps_per_second": 55.051, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.939131159843243e-06, |
|
"loss": 3.3704, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 3.1285579204559326, |
|
"eval_runtime": 113.7953, |
|
"eval_samples_per_second": 878.613, |
|
"eval_steps_per_second": 54.914, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 3.107924699783325, |
|
"eval_runtime": 113.2981, |
|
"eval_samples_per_second": 882.468, |
|
"eval_steps_per_second": 55.155, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.872425581589261e-06, |
|
"loss": 3.2908, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 3.084472179412842, |
|
"eval_runtime": 112.7537, |
|
"eval_samples_per_second": 886.73, |
|
"eval_steps_per_second": 55.422, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 3.0868372917175293, |
|
"eval_runtime": 113.2037, |
|
"eval_samples_per_second": 883.204, |
|
"eval_steps_per_second": 55.201, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.80572000333528e-06, |
|
"loss": 3.2742, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 3.076770067214966, |
|
"eval_runtime": 112.4863, |
|
"eval_samples_per_second": 888.837, |
|
"eval_steps_per_second": 55.553, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 3.0705816745758057, |
|
"eval_runtime": 113.9358, |
|
"eval_samples_per_second": 877.529, |
|
"eval_steps_per_second": 54.847, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.739014425081299e-06, |
|
"loss": 3.2579, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 3.0620689392089844, |
|
"eval_runtime": 112.5706, |
|
"eval_samples_per_second": 888.172, |
|
"eval_steps_per_second": 55.512, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 3.0659217834472656, |
|
"eval_runtime": 112.8069, |
|
"eval_samples_per_second": 886.311, |
|
"eval_steps_per_second": 55.396, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.672308846827316e-06, |
|
"loss": 3.2448, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 3.0456507205963135, |
|
"eval_runtime": 114.4983, |
|
"eval_samples_per_second": 873.218, |
|
"eval_steps_per_second": 54.577, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 3.055419683456421, |
|
"eval_runtime": 113.1402, |
|
"eval_samples_per_second": 883.7, |
|
"eval_steps_per_second": 55.232, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.605603268573334e-06, |
|
"loss": 3.2416, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 3.0335144996643066, |
|
"eval_runtime": 115.0306, |
|
"eval_samples_per_second": 869.178, |
|
"eval_steps_per_second": 54.325, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 3.0320653915405273, |
|
"eval_runtime": 115.4623, |
|
"eval_samples_per_second": 865.928, |
|
"eval_steps_per_second": 54.122, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.538897690319354e-06, |
|
"loss": 3.23, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 3.0136971473693848, |
|
"eval_runtime": 114.4021, |
|
"eval_samples_per_second": 873.952, |
|
"eval_steps_per_second": 54.623, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 3.0060510635375977, |
|
"eval_runtime": 114.4587, |
|
"eval_samples_per_second": 873.52, |
|
"eval_steps_per_second": 54.596, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.472192112065373e-06, |
|
"loss": 3.2084, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 3.025130033493042, |
|
"eval_runtime": 113.0671, |
|
"eval_samples_per_second": 884.272, |
|
"eval_steps_per_second": 55.268, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 3.009242296218872, |
|
"eval_runtime": 112.796, |
|
"eval_samples_per_second": 886.397, |
|
"eval_steps_per_second": 55.401, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.405486533811392e-06, |
|
"loss": 3.2055, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 3.0042872428894043, |
|
"eval_runtime": 112.6772, |
|
"eval_samples_per_second": 887.332, |
|
"eval_steps_per_second": 55.459, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 3.0054819583892822, |
|
"eval_runtime": 112.4817, |
|
"eval_samples_per_second": 888.873, |
|
"eval_steps_per_second": 55.556, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.338780955557409e-06, |
|
"loss": 3.2026, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 3.0065696239471436, |
|
"eval_runtime": 112.6067, |
|
"eval_samples_per_second": 887.887, |
|
"eval_steps_per_second": 55.494, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 3.01246976852417, |
|
"eval_runtime": 112.6363, |
|
"eval_samples_per_second": 887.654, |
|
"eval_steps_per_second": 55.479, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.272075377303427e-06, |
|
"loss": 3.2069, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 3.0032289028167725, |
|
"eval_runtime": 112.0908, |
|
"eval_samples_per_second": 891.974, |
|
"eval_steps_per_second": 55.749, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.995911121368408, |
|
"eval_runtime": 114.3666, |
|
"eval_samples_per_second": 874.224, |
|
"eval_steps_per_second": 54.64, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.205369799049446e-06, |
|
"loss": 3.1904, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.9959683418273926, |
|
"eval_runtime": 113.8999, |
|
"eval_samples_per_second": 877.806, |
|
"eval_steps_per_second": 54.864, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 3.003798246383667, |
|
"eval_runtime": 114.6475, |
|
"eval_samples_per_second": 872.082, |
|
"eval_steps_per_second": 54.506, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.138664220795464e-06, |
|
"loss": 3.1989, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 3.0016160011291504, |
|
"eval_runtime": 113.2837, |
|
"eval_samples_per_second": 882.58, |
|
"eval_steps_per_second": 55.162, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 3.004883050918579, |
|
"eval_runtime": 113.3643, |
|
"eval_samples_per_second": 881.953, |
|
"eval_steps_per_second": 55.123, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.071958642541483e-06, |
|
"loss": 3.2113, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 3.0085606575012207, |
|
"eval_runtime": 114.3159, |
|
"eval_samples_per_second": 874.612, |
|
"eval_steps_per_second": 54.664, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 3.010385274887085, |
|
"eval_runtime": 113.1162, |
|
"eval_samples_per_second": 883.887, |
|
"eval_steps_per_second": 55.244, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.005253064287502e-06, |
|
"loss": 3.217, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 3.0165932178497314, |
|
"eval_runtime": 113.3739, |
|
"eval_samples_per_second": 881.878, |
|
"eval_steps_per_second": 55.118, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 3.0139265060424805, |
|
"eval_runtime": 112.9548, |
|
"eval_samples_per_second": 885.15, |
|
"eval_steps_per_second": 55.323, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.93854748603352e-06, |
|
"loss": 3.2029, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 3.021667242050171, |
|
"eval_runtime": 113.782, |
|
"eval_samples_per_second": 878.716, |
|
"eval_steps_per_second": 54.921, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 3.0237627029418945, |
|
"eval_runtime": 112.8799, |
|
"eval_samples_per_second": 885.738, |
|
"eval_steps_per_second": 55.36, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.871841907779539e-06, |
|
"loss": 3.2226, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 3.0233664512634277, |
|
"eval_runtime": 112.7482, |
|
"eval_samples_per_second": 886.772, |
|
"eval_steps_per_second": 55.424, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 3.0216429233551025, |
|
"eval_runtime": 112.8816, |
|
"eval_samples_per_second": 885.725, |
|
"eval_steps_per_second": 55.359, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 8.805136329525557e-06, |
|
"loss": 3.2199, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 3.0175387859344482, |
|
"eval_runtime": 113.1399, |
|
"eval_samples_per_second": 883.703, |
|
"eval_steps_per_second": 55.233, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 3.036482095718384, |
|
"eval_runtime": 112.7456, |
|
"eval_samples_per_second": 886.793, |
|
"eval_steps_per_second": 55.426, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.738430751271576e-06, |
|
"loss": 3.2254, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 3.028167247772217, |
|
"eval_runtime": 113.0478, |
|
"eval_samples_per_second": 884.422, |
|
"eval_steps_per_second": 55.277, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 3.0228230953216553, |
|
"eval_runtime": 112.6735, |
|
"eval_samples_per_second": 887.36, |
|
"eval_steps_per_second": 55.461, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.671725173017595e-06, |
|
"loss": 3.2349, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 3.0204546451568604, |
|
"eval_runtime": 113.395, |
|
"eval_samples_per_second": 881.714, |
|
"eval_steps_per_second": 55.108, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 3.0406222343444824, |
|
"eval_runtime": 113.6641, |
|
"eval_samples_per_second": 879.627, |
|
"eval_steps_per_second": 54.978, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.605019594763613e-06, |
|
"loss": 3.2424, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 3.0306990146636963, |
|
"eval_runtime": 113.8559, |
|
"eval_samples_per_second": 878.145, |
|
"eval_steps_per_second": 54.885, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 3.041322708129883, |
|
"eval_runtime": 112.6936, |
|
"eval_samples_per_second": 887.202, |
|
"eval_steps_per_second": 55.451, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.538314016509632e-06, |
|
"loss": 3.2347, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 3.0401053428649902, |
|
"eval_runtime": 113.3728, |
|
"eval_samples_per_second": 881.887, |
|
"eval_steps_per_second": 55.119, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 3.051990270614624, |
|
"eval_runtime": 112.6317, |
|
"eval_samples_per_second": 887.69, |
|
"eval_steps_per_second": 55.482, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 8.471608438255649e-06, |
|
"loss": 3.2476, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 3.0489377975463867, |
|
"eval_runtime": 112.9, |
|
"eval_samples_per_second": 885.581, |
|
"eval_steps_per_second": 55.35, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 3.052086353302002, |
|
"eval_runtime": 114.3268, |
|
"eval_samples_per_second": 874.528, |
|
"eval_steps_per_second": 54.659, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 8.404902860001667e-06, |
|
"loss": 3.2506, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 3.068549633026123, |
|
"eval_runtime": 113.2619, |
|
"eval_samples_per_second": 882.751, |
|
"eval_steps_per_second": 55.173, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 3.0546491146087646, |
|
"eval_runtime": 113.505, |
|
"eval_samples_per_second": 880.86, |
|
"eval_steps_per_second": 55.055, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.338197281747686e-06, |
|
"loss": 3.2547, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 3.054222822189331, |
|
"eval_runtime": 113.4821, |
|
"eval_samples_per_second": 881.037, |
|
"eval_steps_per_second": 55.066, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 3.0536701679229736, |
|
"eval_runtime": 113.6042, |
|
"eval_samples_per_second": 880.091, |
|
"eval_steps_per_second": 55.007, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 8.271491703493705e-06, |
|
"loss": 3.2519, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 3.0587801933288574, |
|
"eval_runtime": 112.9459, |
|
"eval_samples_per_second": 885.22, |
|
"eval_steps_per_second": 55.327, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 3.0728721618652344, |
|
"eval_runtime": 113.366, |
|
"eval_samples_per_second": 881.94, |
|
"eval_steps_per_second": 55.122, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 8.204786125239725e-06, |
|
"loss": 3.2679, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 3.0841500759124756, |
|
"eval_runtime": 112.9942, |
|
"eval_samples_per_second": 884.842, |
|
"eval_steps_per_second": 55.304, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 3.0685195922851562, |
|
"eval_runtime": 113.8052, |
|
"eval_samples_per_second": 878.537, |
|
"eval_steps_per_second": 54.91, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.138080546985743e-06, |
|
"loss": 3.2656, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 3.094174861907959, |
|
"eval_runtime": 113.3221, |
|
"eval_samples_per_second": 882.281, |
|
"eval_steps_per_second": 55.144, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 3.094191074371338, |
|
"eval_runtime": 113.0432, |
|
"eval_samples_per_second": 884.458, |
|
"eval_steps_per_second": 55.28, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.07137496873176e-06, |
|
"loss": 3.2908, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 3.091813087463379, |
|
"eval_runtime": 113.945, |
|
"eval_samples_per_second": 877.459, |
|
"eval_steps_per_second": 54.842, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 3.0922000408172607, |
|
"eval_runtime": 113.1262, |
|
"eval_samples_per_second": 883.81, |
|
"eval_steps_per_second": 55.239, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 8.004669390477779e-06, |
|
"loss": 3.2944, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 3.109328508377075, |
|
"eval_runtime": 112.2933, |
|
"eval_samples_per_second": 890.365, |
|
"eval_steps_per_second": 55.649, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 3.11584734916687, |
|
"eval_runtime": 113.071, |
|
"eval_samples_per_second": 884.241, |
|
"eval_steps_per_second": 55.266, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.937963812223798e-06, |
|
"loss": 3.2917, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 3.0996689796447754, |
|
"eval_runtime": 113.0722, |
|
"eval_samples_per_second": 884.232, |
|
"eval_steps_per_second": 55.266, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 3.111070394515991, |
|
"eval_runtime": 112.4182, |
|
"eval_samples_per_second": 889.376, |
|
"eval_steps_per_second": 55.587, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 7.871258233969816e-06, |
|
"loss": 3.2916, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 3.1132729053497314, |
|
"eval_runtime": 113.7916, |
|
"eval_samples_per_second": 878.641, |
|
"eval_steps_per_second": 54.916, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 3.1128952503204346, |
|
"eval_runtime": 113.6553, |
|
"eval_samples_per_second": 879.695, |
|
"eval_steps_per_second": 54.982, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 7.804552655715835e-06, |
|
"loss": 3.2836, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 3.113443613052368, |
|
"eval_runtime": 113.5086, |
|
"eval_samples_per_second": 880.832, |
|
"eval_steps_per_second": 55.053, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 3.1057605743408203, |
|
"eval_runtime": 113.3609, |
|
"eval_samples_per_second": 881.98, |
|
"eval_steps_per_second": 55.125, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 7.737847077461853e-06, |
|
"loss": 3.3068, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 3.121134042739868, |
|
"eval_runtime": 114.162, |
|
"eval_samples_per_second": 875.79, |
|
"eval_steps_per_second": 54.738, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 3.094620704650879, |
|
"eval_runtime": 113.8018, |
|
"eval_samples_per_second": 878.563, |
|
"eval_steps_per_second": 54.911, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 7.671141499207872e-06, |
|
"loss": 3.3026, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 3.107940912246704, |
|
"eval_runtime": 112.9645, |
|
"eval_samples_per_second": 885.075, |
|
"eval_steps_per_second": 55.318, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 3.120192527770996, |
|
"eval_runtime": 112.8661, |
|
"eval_samples_per_second": 885.846, |
|
"eval_steps_per_second": 55.366, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 7.604435920953891e-06, |
|
"loss": 3.3078, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 3.1155478954315186, |
|
"eval_runtime": 114.0442, |
|
"eval_samples_per_second": 876.695, |
|
"eval_steps_per_second": 54.795, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 3.125351905822754, |
|
"eval_runtime": 112.7895, |
|
"eval_samples_per_second": 886.448, |
|
"eval_steps_per_second": 55.404, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 7.537730342699909e-06, |
|
"loss": 3.3168, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 3.127920627593994, |
|
"eval_runtime": 113.3145, |
|
"eval_samples_per_second": 882.34, |
|
"eval_steps_per_second": 55.147, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 3.1179165840148926, |
|
"eval_runtime": 113.6285, |
|
"eval_samples_per_second": 879.902, |
|
"eval_steps_per_second": 54.995, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 7.471024764445928e-06, |
|
"loss": 3.3113, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 3.1277198791503906, |
|
"eval_runtime": 114.2174, |
|
"eval_samples_per_second": 875.366, |
|
"eval_steps_per_second": 54.711, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 3.133394241333008, |
|
"eval_runtime": 113.6646, |
|
"eval_samples_per_second": 879.623, |
|
"eval_steps_per_second": 54.978, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.4043191861919465e-06, |
|
"loss": 3.3102, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_loss": 3.123286485671997, |
|
"eval_runtime": 113.9343, |
|
"eval_samples_per_second": 877.541, |
|
"eval_steps_per_second": 54.847, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 3.1273744106292725, |
|
"eval_runtime": 113.8778, |
|
"eval_samples_per_second": 877.976, |
|
"eval_steps_per_second": 54.875, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 7.337613607937964e-06, |
|
"loss": 3.3235, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 3.1433892250061035, |
|
"eval_runtime": 113.0915, |
|
"eval_samples_per_second": 884.08, |
|
"eval_steps_per_second": 55.256, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 3.1368374824523926, |
|
"eval_runtime": 113.2499, |
|
"eval_samples_per_second": 882.844, |
|
"eval_steps_per_second": 55.179, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 7.270908029683983e-06, |
|
"loss": 3.331, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_loss": 3.1590983867645264, |
|
"eval_runtime": 114.2316, |
|
"eval_samples_per_second": 875.257, |
|
"eval_steps_per_second": 54.705, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 3.154599189758301, |
|
"eval_runtime": 113.4705, |
|
"eval_samples_per_second": 881.128, |
|
"eval_steps_per_second": 55.072, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 7.2042024514300015e-06, |
|
"loss": 3.3308, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 3.1662509441375732, |
|
"eval_runtime": 113.7727, |
|
"eval_samples_per_second": 878.787, |
|
"eval_steps_per_second": 54.925, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 3.1535351276397705, |
|
"eval_runtime": 113.1426, |
|
"eval_samples_per_second": 883.681, |
|
"eval_steps_per_second": 55.231, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 7.13749687317602e-06, |
|
"loss": 3.3396, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 3.1558427810668945, |
|
"eval_runtime": 113.3017, |
|
"eval_samples_per_second": 882.44, |
|
"eval_steps_per_second": 55.154, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 3.169811725616455, |
|
"eval_runtime": 113.2795, |
|
"eval_samples_per_second": 882.614, |
|
"eval_steps_per_second": 55.164, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 7.070791294922038e-06, |
|
"loss": 3.3558, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 3.165127754211426, |
|
"eval_runtime": 113.7995, |
|
"eval_samples_per_second": 878.581, |
|
"eval_steps_per_second": 54.912, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.1705756187438965, |
|
"eval_runtime": 113.6919, |
|
"eval_samples_per_second": 879.412, |
|
"eval_steps_per_second": 54.964, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 7.0040857166680564e-06, |
|
"loss": 3.3474, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 3.194214344024658, |
|
"eval_runtime": 113.2694, |
|
"eval_samples_per_second": 882.692, |
|
"eval_steps_per_second": 55.169, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 3.170464515686035, |
|
"eval_runtime": 113.1666, |
|
"eval_samples_per_second": 883.494, |
|
"eval_steps_per_second": 55.219, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 6.937380138414076e-06, |
|
"loss": 3.3513, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 3.1834402084350586, |
|
"eval_runtime": 113.2814, |
|
"eval_samples_per_second": 882.599, |
|
"eval_steps_per_second": 55.164, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 3.1810226440429688, |
|
"eval_runtime": 113.726, |
|
"eval_samples_per_second": 879.148, |
|
"eval_steps_per_second": 54.948, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 6.8706745601600945e-06, |
|
"loss": 3.362, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 3.172321319580078, |
|
"eval_runtime": 113.4364, |
|
"eval_samples_per_second": 881.392, |
|
"eval_steps_per_second": 55.088, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 3.1826891899108887, |
|
"eval_runtime": 113.3067, |
|
"eval_samples_per_second": 882.401, |
|
"eval_steps_per_second": 55.151, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 6.803968981906113e-06, |
|
"loss": 3.3694, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 3.1937167644500732, |
|
"eval_runtime": 113.4796, |
|
"eval_samples_per_second": 881.057, |
|
"eval_steps_per_second": 55.067, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_loss": 3.2004363536834717, |
|
"eval_runtime": 114.2163, |
|
"eval_samples_per_second": 875.374, |
|
"eval_steps_per_second": 54.712, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.737263403652131e-06, |
|
"loss": 3.378, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 3.2023050785064697, |
|
"eval_runtime": 113.4517, |
|
"eval_samples_per_second": 881.273, |
|
"eval_steps_per_second": 55.081, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 3.1935720443725586, |
|
"eval_runtime": 113.2158, |
|
"eval_samples_per_second": 883.11, |
|
"eval_steps_per_second": 55.195, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 6.6705578253981495e-06, |
|
"loss": 3.3703, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 3.19478702545166, |
|
"eval_runtime": 112.9761, |
|
"eval_samples_per_second": 884.984, |
|
"eval_steps_per_second": 55.313, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"eval_loss": 3.2082369327545166, |
|
"eval_runtime": 113.4959, |
|
"eval_samples_per_second": 880.93, |
|
"eval_steps_per_second": 55.059, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 6.603852247144168e-06, |
|
"loss": 3.3838, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 3.1974425315856934, |
|
"eval_runtime": 113.5005, |
|
"eval_samples_per_second": 880.895, |
|
"eval_steps_per_second": 55.057, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 3.2029407024383545, |
|
"eval_runtime": 113.4958, |
|
"eval_samples_per_second": 880.932, |
|
"eval_steps_per_second": 55.059, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 6.537146668890187e-06, |
|
"loss": 3.3871, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_loss": 3.216017007827759, |
|
"eval_runtime": 113.9004, |
|
"eval_samples_per_second": 877.802, |
|
"eval_steps_per_second": 54.864, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 3.21976900100708, |
|
"eval_runtime": 113.1938, |
|
"eval_samples_per_second": 883.281, |
|
"eval_steps_per_second": 55.206, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 6.4704410906362044e-06, |
|
"loss": 3.3839, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 3.219007968902588, |
|
"eval_runtime": 113.9329, |
|
"eval_samples_per_second": 877.552, |
|
"eval_steps_per_second": 54.848, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 3.2204408645629883, |
|
"eval_runtime": 114.3981, |
|
"eval_samples_per_second": 873.983, |
|
"eval_steps_per_second": 54.625, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 6.403735512382223e-06, |
|
"loss": 3.389, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 3.218768835067749, |
|
"eval_runtime": 115.2564, |
|
"eval_samples_per_second": 867.475, |
|
"eval_steps_per_second": 54.218, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 3.2246193885803223, |
|
"eval_runtime": 113.3, |
|
"eval_samples_per_second": 882.454, |
|
"eval_steps_per_second": 55.154, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 6.337029934128242e-06, |
|
"loss": 3.398, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 3.233250617980957, |
|
"eval_runtime": 113.6851, |
|
"eval_samples_per_second": 879.464, |
|
"eval_steps_per_second": 54.968, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 3.216823101043701, |
|
"eval_runtime": 113.0594, |
|
"eval_samples_per_second": 884.332, |
|
"eval_steps_per_second": 55.272, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 6.270324355874261e-06, |
|
"loss": 3.4001, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 3.2311105728149414, |
|
"eval_runtime": 113.3088, |
|
"eval_samples_per_second": 882.385, |
|
"eval_steps_per_second": 55.15, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 3.2389867305755615, |
|
"eval_runtime": 113.4715, |
|
"eval_samples_per_second": 881.12, |
|
"eval_steps_per_second": 55.071, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 6.20361877762028e-06, |
|
"loss": 3.4255, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 3.2446951866149902, |
|
"eval_runtime": 114.3046, |
|
"eval_samples_per_second": 874.698, |
|
"eval_steps_per_second": 54.67, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 3.254612684249878, |
|
"eval_runtime": 113.4286, |
|
"eval_samples_per_second": 881.453, |
|
"eval_steps_per_second": 55.092, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.1369131993662975e-06, |
|
"loss": 3.4218, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"eval_loss": 3.250980854034424, |
|
"eval_runtime": 113.6708, |
|
"eval_samples_per_second": 879.575, |
|
"eval_steps_per_second": 54.975, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 3.243265151977539, |
|
"eval_runtime": 114.4709, |
|
"eval_samples_per_second": 873.427, |
|
"eval_steps_per_second": 54.59, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 6.070207621112316e-06, |
|
"loss": 3.4326, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 3.2509450912475586, |
|
"eval_runtime": 113.5188, |
|
"eval_samples_per_second": 880.753, |
|
"eval_steps_per_second": 55.048, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 3.257272481918335, |
|
"eval_runtime": 113.6074, |
|
"eval_samples_per_second": 880.066, |
|
"eval_steps_per_second": 55.005, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.003502042858335e-06, |
|
"loss": 3.4268, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"eval_loss": 3.249929428100586, |
|
"eval_runtime": 113.664, |
|
"eval_samples_per_second": 879.628, |
|
"eval_steps_per_second": 54.978, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 3.2704036235809326, |
|
"eval_runtime": 113.9356, |
|
"eval_samples_per_second": 877.53, |
|
"eval_steps_per_second": 54.847, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 5.936796464604353e-06, |
|
"loss": 3.4165, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 3.25793194770813, |
|
"eval_runtime": 114.3815, |
|
"eval_samples_per_second": 874.11, |
|
"eval_steps_per_second": 54.633, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 3.266918420791626, |
|
"eval_runtime": 113.5329, |
|
"eval_samples_per_second": 880.643, |
|
"eval_steps_per_second": 55.041, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 5.870090886350371e-06, |
|
"loss": 3.4425, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 3.272322416305542, |
|
"eval_runtime": 113.6369, |
|
"eval_samples_per_second": 879.838, |
|
"eval_steps_per_second": 54.991, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 3.2718310356140137, |
|
"eval_runtime": 113.6845, |
|
"eval_samples_per_second": 879.469, |
|
"eval_steps_per_second": 54.968, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 5.80338530809639e-06, |
|
"loss": 3.4433, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 3.2655293941497803, |
|
"eval_runtime": 114.0468, |
|
"eval_samples_per_second": 876.675, |
|
"eval_steps_per_second": 54.793, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 3.2794032096862793, |
|
"eval_runtime": 115.3844, |
|
"eval_samples_per_second": 866.512, |
|
"eval_steps_per_second": 54.158, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 5.736679729842408e-06, |
|
"loss": 3.4437, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 3.2807533740997314, |
|
"eval_runtime": 114.2793, |
|
"eval_samples_per_second": 874.892, |
|
"eval_steps_per_second": 54.682, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 3.2731096744537354, |
|
"eval_runtime": 113.8065, |
|
"eval_samples_per_second": 878.527, |
|
"eval_steps_per_second": 54.909, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 5.669974151588427e-06, |
|
"loss": 3.4499, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 3.27854323387146, |
|
"eval_runtime": 113.78, |
|
"eval_samples_per_second": 878.731, |
|
"eval_steps_per_second": 54.922, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"eval_loss": 3.28226900100708, |
|
"eval_runtime": 113.9563, |
|
"eval_samples_per_second": 877.371, |
|
"eval_steps_per_second": 54.837, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.603268573334446e-06, |
|
"loss": 3.4593, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 3.2843921184539795, |
|
"eval_runtime": 114.3227, |
|
"eval_samples_per_second": 874.559, |
|
"eval_steps_per_second": 54.661, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 3.2876641750335693, |
|
"eval_runtime": 114.4486, |
|
"eval_samples_per_second": 873.597, |
|
"eval_steps_per_second": 54.601, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 5.536562995080464e-06, |
|
"loss": 3.4481, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 3.2969281673431396, |
|
"eval_runtime": 113.9166, |
|
"eval_samples_per_second": 877.677, |
|
"eval_steps_per_second": 54.856, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_loss": 3.286954879760742, |
|
"eval_runtime": 114.297, |
|
"eval_samples_per_second": 874.756, |
|
"eval_steps_per_second": 54.673, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 5.469857416826483e-06, |
|
"loss": 3.4542, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 3.294614791870117, |
|
"eval_runtime": 114.0483, |
|
"eval_samples_per_second": 876.663, |
|
"eval_steps_per_second": 54.793, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 3.2901484966278076, |
|
"eval_runtime": 114.4116, |
|
"eval_samples_per_second": 873.88, |
|
"eval_steps_per_second": 54.619, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.403151838572501e-06, |
|
"loss": 3.4547, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 3.2812633514404297, |
|
"eval_runtime": 114.838, |
|
"eval_samples_per_second": 870.635, |
|
"eval_steps_per_second": 54.416, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"eval_loss": 3.2909789085388184, |
|
"eval_runtime": 114.5859, |
|
"eval_samples_per_second": 872.551, |
|
"eval_steps_per_second": 54.536, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 5.33644626031852e-06, |
|
"loss": 3.4618, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 3.2978355884552, |
|
"eval_runtime": 113.8913, |
|
"eval_samples_per_second": 877.872, |
|
"eval_steps_per_second": 54.868, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 3.3054616451263428, |
|
"eval_runtime": 114.4856, |
|
"eval_samples_per_second": 873.315, |
|
"eval_steps_per_second": 54.583, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 5.269740682064538e-06, |
|
"loss": 3.46, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 3.288513422012329, |
|
"eval_runtime": 114.5126, |
|
"eval_samples_per_second": 873.109, |
|
"eval_steps_per_second": 54.57, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_loss": 3.2871181964874268, |
|
"eval_runtime": 114.4417, |
|
"eval_samples_per_second": 873.65, |
|
"eval_steps_per_second": 54.604, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.203035103810556e-06, |
|
"loss": 3.4572, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 3.2905023097991943, |
|
"eval_runtime": 114.8756, |
|
"eval_samples_per_second": 870.35, |
|
"eval_steps_per_second": 54.398, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 3.300590991973877, |
|
"eval_runtime": 114.1062, |
|
"eval_samples_per_second": 876.219, |
|
"eval_steps_per_second": 54.765, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 5.136329525556575e-06, |
|
"loss": 3.4597, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_loss": 3.3080625534057617, |
|
"eval_runtime": 114.1639, |
|
"eval_samples_per_second": 875.776, |
|
"eval_steps_per_second": 54.737, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 3.3031253814697266, |
|
"eval_runtime": 115.5682, |
|
"eval_samples_per_second": 865.134, |
|
"eval_steps_per_second": 54.072, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 5.0696239473025935e-06, |
|
"loss": 3.4651, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 3.288254737854004, |
|
"eval_runtime": 115.6327, |
|
"eval_samples_per_second": 864.652, |
|
"eval_steps_per_second": 54.042, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 3.31886625289917, |
|
"eval_runtime": 115.302, |
|
"eval_samples_per_second": 867.131, |
|
"eval_steps_per_second": 54.197, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 5.002918369048611e-06, |
|
"loss": 3.4571, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 3.297788619995117, |
|
"eval_runtime": 116.3659, |
|
"eval_samples_per_second": 859.204, |
|
"eval_steps_per_second": 53.701, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"eval_loss": 3.309051990509033, |
|
"eval_runtime": 116.156, |
|
"eval_samples_per_second": 860.756, |
|
"eval_steps_per_second": 53.798, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 4.936212790794631e-06, |
|
"loss": 3.4567, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"eval_loss": 3.275514602661133, |
|
"eval_runtime": 114.7773, |
|
"eval_samples_per_second": 871.096, |
|
"eval_steps_per_second": 54.445, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"eval_loss": 3.296752691268921, |
|
"eval_runtime": 116.1942, |
|
"eval_samples_per_second": 860.473, |
|
"eval_steps_per_second": 53.781, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 4.869507212540649e-06, |
|
"loss": 3.4584, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 3.29911732673645, |
|
"eval_runtime": 115.3134, |
|
"eval_samples_per_second": 867.046, |
|
"eval_steps_per_second": 54.191, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"eval_loss": 3.2818071842193604, |
|
"eval_runtime": 115.3726, |
|
"eval_samples_per_second": 866.601, |
|
"eval_steps_per_second": 54.164, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 4.802801634286667e-06, |
|
"loss": 3.4459, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"eval_loss": 3.282339334487915, |
|
"eval_runtime": 115.2771, |
|
"eval_samples_per_second": 867.319, |
|
"eval_steps_per_second": 54.209, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"eval_loss": 3.2800145149230957, |
|
"eval_runtime": 114.9649, |
|
"eval_samples_per_second": 869.674, |
|
"eval_steps_per_second": 54.356, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 4.7360960560326865e-06, |
|
"loss": 3.4474, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"eval_loss": 3.285576820373535, |
|
"eval_runtime": 114.3634, |
|
"eval_samples_per_second": 874.248, |
|
"eval_steps_per_second": 54.642, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"eval_loss": 3.2844762802124023, |
|
"eval_runtime": 114.9015, |
|
"eval_samples_per_second": 870.154, |
|
"eval_steps_per_second": 54.386, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 4.669390477778704e-06, |
|
"loss": 3.4383, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"eval_loss": 3.2804107666015625, |
|
"eval_runtime": 115.558, |
|
"eval_samples_per_second": 865.211, |
|
"eval_steps_per_second": 54.077, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_loss": 3.270656108856201, |
|
"eval_runtime": 114.5704, |
|
"eval_samples_per_second": 872.669, |
|
"eval_steps_per_second": 54.543, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 4.602684899524723e-06, |
|
"loss": 3.4496, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"eval_loss": 3.28237247467041, |
|
"eval_runtime": 115.9515, |
|
"eval_samples_per_second": 862.274, |
|
"eval_steps_per_second": 53.893, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 3.276503562927246, |
|
"eval_runtime": 114.4558, |
|
"eval_samples_per_second": 873.542, |
|
"eval_steps_per_second": 54.597, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 4.5359793212707415e-06, |
|
"loss": 3.4411, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"eval_loss": 3.2838242053985596, |
|
"eval_runtime": 115.3787, |
|
"eval_samples_per_second": 866.555, |
|
"eval_steps_per_second": 54.161, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"eval_loss": 3.2839205265045166, |
|
"eval_runtime": 114.911, |
|
"eval_samples_per_second": 870.082, |
|
"eval_steps_per_second": 54.381, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 4.46927374301676e-06, |
|
"loss": 3.4305, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"eval_loss": 3.274820327758789, |
|
"eval_runtime": 114.8509, |
|
"eval_samples_per_second": 870.538, |
|
"eval_steps_per_second": 54.41, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"eval_loss": 3.28206205368042, |
|
"eval_runtime": 115.1535, |
|
"eval_samples_per_second": 868.25, |
|
"eval_steps_per_second": 54.267, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 4.402568164762779e-06, |
|
"loss": 3.4258, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 3.274627447128296, |
|
"eval_runtime": 115.2346, |
|
"eval_samples_per_second": 867.639, |
|
"eval_steps_per_second": 54.229, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"eval_loss": 3.2860774993896484, |
|
"eval_runtime": 115.4118, |
|
"eval_samples_per_second": 866.307, |
|
"eval_steps_per_second": 54.145, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 4.335862586508797e-06, |
|
"loss": 3.4227, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"eval_loss": 3.27103853225708, |
|
"eval_runtime": 115.4526, |
|
"eval_samples_per_second": 866.0, |
|
"eval_steps_per_second": 54.126, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_loss": 3.2788245677948, |
|
"eval_runtime": 114.6345, |
|
"eval_samples_per_second": 872.181, |
|
"eval_steps_per_second": 54.512, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 4.269157008254816e-06, |
|
"loss": 3.4319, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"eval_loss": 3.279372215270996, |
|
"eval_runtime": 114.9532, |
|
"eval_samples_per_second": 869.763, |
|
"eval_steps_per_second": 54.361, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"eval_loss": 3.27657413482666, |
|
"eval_runtime": 114.59, |
|
"eval_samples_per_second": 872.519, |
|
"eval_steps_per_second": 54.534, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 4.202451430000834e-06, |
|
"loss": 3.436, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"eval_loss": 3.2923691272735596, |
|
"eval_runtime": 114.6828, |
|
"eval_samples_per_second": 871.813, |
|
"eval_steps_per_second": 54.489, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_loss": 3.281205654144287, |
|
"eval_runtime": 114.5759, |
|
"eval_samples_per_second": 872.626, |
|
"eval_steps_per_second": 54.54, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 4.135745851746852e-06, |
|
"loss": 3.4368, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"eval_loss": 3.2851309776306152, |
|
"eval_runtime": 115.7382, |
|
"eval_samples_per_second": 863.863, |
|
"eval_steps_per_second": 53.993, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"eval_loss": 3.282189130783081, |
|
"eval_runtime": 115.2625, |
|
"eval_samples_per_second": 867.429, |
|
"eval_steps_per_second": 54.215, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 4.069040273492872e-06, |
|
"loss": 3.4346, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_loss": 3.265688419342041, |
|
"eval_runtime": 114.9393, |
|
"eval_samples_per_second": 869.868, |
|
"eval_steps_per_second": 54.368, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"eval_loss": 3.2747557163238525, |
|
"eval_runtime": 115.4082, |
|
"eval_samples_per_second": 866.334, |
|
"eval_steps_per_second": 54.147, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 4.0023346952388895e-06, |
|
"loss": 3.4265, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 3.2684779167175293, |
|
"eval_runtime": 115.418, |
|
"eval_samples_per_second": 866.26, |
|
"eval_steps_per_second": 54.142, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 3.294729709625244, |
|
"eval_runtime": 116.8415, |
|
"eval_samples_per_second": 855.706, |
|
"eval_steps_per_second": 53.483, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 3.935629116984908e-06, |
|
"loss": 3.4306, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 3.2840843200683594, |
|
"eval_runtime": 114.8072, |
|
"eval_samples_per_second": 870.868, |
|
"eval_steps_per_second": 54.43, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": 3.2748351097106934, |
|
"eval_runtime": 114.5932, |
|
"eval_samples_per_second": 872.495, |
|
"eval_steps_per_second": 54.532, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 3.868923538730927e-06, |
|
"loss": 3.4254, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"eval_loss": 3.2793681621551514, |
|
"eval_runtime": 114.5871, |
|
"eval_samples_per_second": 872.542, |
|
"eval_steps_per_second": 54.535, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"eval_loss": 3.2773730754852295, |
|
"eval_runtime": 114.5544, |
|
"eval_samples_per_second": 872.79, |
|
"eval_steps_per_second": 54.55, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 3.8022179604769453e-06, |
|
"loss": 3.4353, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"eval_loss": 3.272627592086792, |
|
"eval_runtime": 115.5185, |
|
"eval_samples_per_second": 865.506, |
|
"eval_steps_per_second": 54.095, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_loss": 3.27628493309021, |
|
"eval_runtime": 115.6599, |
|
"eval_samples_per_second": 864.448, |
|
"eval_steps_per_second": 54.029, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 3.735512382222964e-06, |
|
"loss": 3.4358, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"eval_loss": 3.265916109085083, |
|
"eval_runtime": 116.0704, |
|
"eval_samples_per_second": 861.391, |
|
"eval_steps_per_second": 53.838, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"eval_loss": 3.270989418029785, |
|
"eval_runtime": 114.6284, |
|
"eval_samples_per_second": 872.227, |
|
"eval_steps_per_second": 54.515, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3.668806803968982e-06, |
|
"loss": 3.4182, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 3.27774977684021, |
|
"eval_runtime": 114.716, |
|
"eval_samples_per_second": 871.561, |
|
"eval_steps_per_second": 54.474, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"eval_loss": 3.2824196815490723, |
|
"eval_runtime": 114.6356, |
|
"eval_samples_per_second": 872.173, |
|
"eval_steps_per_second": 54.512, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 3.6021012257150007e-06, |
|
"loss": 3.4384, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"eval_loss": 3.2886571884155273, |
|
"eval_runtime": 117.3403, |
|
"eval_samples_per_second": 852.069, |
|
"eval_steps_per_second": 53.255, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 3.2667236328125, |
|
"eval_runtime": 115.4584, |
|
"eval_samples_per_second": 865.957, |
|
"eval_steps_per_second": 54.123, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 3.535395647461019e-06, |
|
"loss": 3.4287, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"eval_loss": 3.271289110183716, |
|
"eval_runtime": 115.0142, |
|
"eval_samples_per_second": 869.302, |
|
"eval_steps_per_second": 54.332, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"eval_loss": 3.2639546394348145, |
|
"eval_runtime": 116.2578, |
|
"eval_samples_per_second": 860.003, |
|
"eval_steps_per_second": 53.751, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 3.468690069207038e-06, |
|
"loss": 3.4181, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 3.260748863220215, |
|
"eval_runtime": 115.3695, |
|
"eval_samples_per_second": 866.624, |
|
"eval_steps_per_second": 54.165, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"eval_loss": 3.264313220977783, |
|
"eval_runtime": 115.2949, |
|
"eval_samples_per_second": 867.185, |
|
"eval_steps_per_second": 54.2, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 3.4019844909530565e-06, |
|
"loss": 3.4173, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"eval_loss": 3.262951612472534, |
|
"eval_runtime": 115.3025, |
|
"eval_samples_per_second": 867.128, |
|
"eval_steps_per_second": 54.197, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"eval_loss": 3.2571523189544678, |
|
"eval_runtime": 115.8833, |
|
"eval_samples_per_second": 862.781, |
|
"eval_steps_per_second": 53.925, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 3.3352789126990747e-06, |
|
"loss": 3.4214, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"eval_loss": 3.2727572917938232, |
|
"eval_runtime": 114.8401, |
|
"eval_samples_per_second": 870.62, |
|
"eval_steps_per_second": 54.415, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_loss": 3.2822391986846924, |
|
"eval_runtime": 115.763, |
|
"eval_samples_per_second": 863.678, |
|
"eval_steps_per_second": 53.981, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 3.2685733344450933e-06, |
|
"loss": 3.4223, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"eval_loss": 3.270418882369995, |
|
"eval_runtime": 114.8559, |
|
"eval_samples_per_second": 870.499, |
|
"eval_steps_per_second": 54.407, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"eval_loss": 3.263397216796875, |
|
"eval_runtime": 115.6612, |
|
"eval_samples_per_second": 864.439, |
|
"eval_steps_per_second": 54.029, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 3.2018677561911115e-06, |
|
"loss": 3.417, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"eval_loss": 3.269148349761963, |
|
"eval_runtime": 116.0444, |
|
"eval_samples_per_second": 861.584, |
|
"eval_steps_per_second": 53.85, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"eval_loss": 3.255032539367676, |
|
"eval_runtime": 116.0315, |
|
"eval_samples_per_second": 861.68, |
|
"eval_steps_per_second": 53.856, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 3.1351621779371306e-06, |
|
"loss": 3.4146, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"eval_loss": 3.2528505325317383, |
|
"eval_runtime": 115.6943, |
|
"eval_samples_per_second": 864.191, |
|
"eval_steps_per_second": 54.013, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"eval_loss": 3.271348237991333, |
|
"eval_runtime": 114.7472, |
|
"eval_samples_per_second": 871.324, |
|
"eval_steps_per_second": 54.459, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 3.0684565996831487e-06, |
|
"loss": 3.4186, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_loss": 3.2671616077423096, |
|
"eval_runtime": 115.2164, |
|
"eval_samples_per_second": 867.776, |
|
"eval_steps_per_second": 54.237, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"eval_loss": 3.2542309761047363, |
|
"eval_runtime": 115.2327, |
|
"eval_samples_per_second": 867.653, |
|
"eval_steps_per_second": 54.229, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 3.0017510214291673e-06, |
|
"loss": 3.4082, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 3.2575573921203613, |
|
"eval_runtime": 115.6638, |
|
"eval_samples_per_second": 864.419, |
|
"eval_steps_per_second": 54.027, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"eval_loss": 3.2680304050445557, |
|
"eval_runtime": 116.6886, |
|
"eval_samples_per_second": 856.827, |
|
"eval_steps_per_second": 53.553, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 2.9350454431751855e-06, |
|
"loss": 3.4186, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"eval_loss": 3.266725778579712, |
|
"eval_runtime": 115.7616, |
|
"eval_samples_per_second": 863.689, |
|
"eval_steps_per_second": 53.982, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 3.269421339035034, |
|
"eval_runtime": 115.2493, |
|
"eval_samples_per_second": 867.528, |
|
"eval_steps_per_second": 54.222, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 2.868339864921204e-06, |
|
"loss": 3.4131, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 3.2606043815612793, |
|
"eval_runtime": 116.1606, |
|
"eval_samples_per_second": 860.722, |
|
"eval_steps_per_second": 53.796, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_loss": 3.2622175216674805, |
|
"eval_runtime": 115.2414, |
|
"eval_samples_per_second": 867.587, |
|
"eval_steps_per_second": 54.225, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 2.801634286667223e-06, |
|
"loss": 3.4239, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_loss": 3.2678098678588867, |
|
"eval_runtime": 116.298, |
|
"eval_samples_per_second": 859.705, |
|
"eval_steps_per_second": 53.733, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"eval_loss": 3.2707767486572266, |
|
"eval_runtime": 115.813, |
|
"eval_samples_per_second": 863.305, |
|
"eval_steps_per_second": 53.958, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 2.7349287084132413e-06, |
|
"loss": 3.4197, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"eval_loss": 3.2622435092926025, |
|
"eval_runtime": 116.7137, |
|
"eval_samples_per_second": 856.643, |
|
"eval_steps_per_second": 53.541, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"eval_loss": 3.260528087615967, |
|
"eval_runtime": 115.8618, |
|
"eval_samples_per_second": 862.942, |
|
"eval_steps_per_second": 53.935, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 2.66822313015926e-06, |
|
"loss": 3.4073, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"eval_loss": 3.2647342681884766, |
|
"eval_runtime": 116.0753, |
|
"eval_samples_per_second": 861.355, |
|
"eval_steps_per_second": 53.836, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"eval_loss": 3.2618629932403564, |
|
"eval_runtime": 115.7243, |
|
"eval_samples_per_second": 863.967, |
|
"eval_steps_per_second": 53.999, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 2.601517551905278e-06, |
|
"loss": 3.4167, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_loss": 3.2816412448883057, |
|
"eval_runtime": 116.0459, |
|
"eval_samples_per_second": 861.573, |
|
"eval_steps_per_second": 53.849, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"eval_loss": 3.260328531265259, |
|
"eval_runtime": 115.7751, |
|
"eval_samples_per_second": 863.588, |
|
"eval_steps_per_second": 53.975, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 2.5348119736512967e-06, |
|
"loss": 3.413, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 3.2661468982696533, |
|
"eval_runtime": 115.5087, |
|
"eval_samples_per_second": 865.58, |
|
"eval_steps_per_second": 54.1, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"eval_loss": 3.2589173316955566, |
|
"eval_runtime": 115.8968, |
|
"eval_samples_per_second": 862.681, |
|
"eval_steps_per_second": 53.919, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 2.4681063953973154e-06, |
|
"loss": 3.4117, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"eval_loss": 3.2688403129577637, |
|
"eval_runtime": 115.5062, |
|
"eval_samples_per_second": 865.598, |
|
"eval_steps_per_second": 54.101, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"eval_loss": 3.2677767276763916, |
|
"eval_runtime": 115.467, |
|
"eval_samples_per_second": 865.892, |
|
"eval_steps_per_second": 54.119, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 2.4014008171433335e-06, |
|
"loss": 3.4103, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"eval_loss": 3.266075372695923, |
|
"eval_runtime": 114.9844, |
|
"eval_samples_per_second": 869.527, |
|
"eval_steps_per_second": 54.347, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"eval_loss": 3.270479917526245, |
|
"eval_runtime": 116.2693, |
|
"eval_samples_per_second": 859.917, |
|
"eval_steps_per_second": 53.746, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 2.334695238889352e-06, |
|
"loss": 3.4074, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"eval_loss": 3.267005443572998, |
|
"eval_runtime": 115.371, |
|
"eval_samples_per_second": 866.613, |
|
"eval_steps_per_second": 54.164, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"eval_loss": 3.2619106769561768, |
|
"eval_runtime": 115.9983, |
|
"eval_samples_per_second": 861.926, |
|
"eval_steps_per_second": 53.871, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 2.2679896606353707e-06, |
|
"loss": 3.4167, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"eval_loss": 3.262441635131836, |
|
"eval_runtime": 115.7529, |
|
"eval_samples_per_second": 863.754, |
|
"eval_steps_per_second": 53.986, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"eval_loss": 3.255234479904175, |
|
"eval_runtime": 115.5701, |
|
"eval_samples_per_second": 865.12, |
|
"eval_steps_per_second": 54.071, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 2.2012840823813894e-06, |
|
"loss": 3.4195, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"eval_loss": 3.2503316402435303, |
|
"eval_runtime": 115.6488, |
|
"eval_samples_per_second": 864.531, |
|
"eval_steps_per_second": 54.034, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"eval_loss": 3.2605812549591064, |
|
"eval_runtime": 115.723, |
|
"eval_samples_per_second": 863.977, |
|
"eval_steps_per_second": 54.0, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 2.134578504127408e-06, |
|
"loss": 3.4091, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"eval_loss": 3.2811596393585205, |
|
"eval_runtime": 116.4883, |
|
"eval_samples_per_second": 858.301, |
|
"eval_steps_per_second": 53.645, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"eval_loss": 3.2836642265319824, |
|
"eval_runtime": 117.0403, |
|
"eval_samples_per_second": 854.253, |
|
"eval_steps_per_second": 53.392, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 2.067872925873426e-06, |
|
"loss": 3.4116, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_loss": 3.2657785415649414, |
|
"eval_runtime": 116.2602, |
|
"eval_samples_per_second": 859.985, |
|
"eval_steps_per_second": 53.75, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"eval_loss": 3.267613410949707, |
|
"eval_runtime": 115.5655, |
|
"eval_samples_per_second": 865.155, |
|
"eval_steps_per_second": 54.073, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 2.0011673476194448e-06, |
|
"loss": 3.4183, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"eval_loss": 3.277005910873413, |
|
"eval_runtime": 116.4955, |
|
"eval_samples_per_second": 858.248, |
|
"eval_steps_per_second": 53.642, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"eval_loss": 3.2755773067474365, |
|
"eval_runtime": 116.1016, |
|
"eval_samples_per_second": 861.159, |
|
"eval_steps_per_second": 53.824, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 1.9344617693654634e-06, |
|
"loss": 3.4177, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"eval_loss": 3.28764271736145, |
|
"eval_runtime": 115.8567, |
|
"eval_samples_per_second": 862.98, |
|
"eval_steps_per_second": 53.937, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"eval_loss": 3.261235475540161, |
|
"eval_runtime": 116.3096, |
|
"eval_samples_per_second": 859.619, |
|
"eval_steps_per_second": 53.727, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 1.867756191111482e-06, |
|
"loss": 3.4226, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"eval_loss": 3.274751901626587, |
|
"eval_runtime": 116.724, |
|
"eval_samples_per_second": 856.568, |
|
"eval_steps_per_second": 53.537, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_loss": 3.2679269313812256, |
|
"eval_runtime": 115.9032, |
|
"eval_samples_per_second": 862.634, |
|
"eval_steps_per_second": 53.916, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 1.8010506128575004e-06, |
|
"loss": 3.4154, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"eval_loss": 3.2658944129943848, |
|
"eval_runtime": 116.7603, |
|
"eval_samples_per_second": 856.301, |
|
"eval_steps_per_second": 53.52, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"eval_loss": 3.268889904022217, |
|
"eval_runtime": 116.8782, |
|
"eval_samples_per_second": 855.438, |
|
"eval_steps_per_second": 53.466, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 1.734345034603519e-06, |
|
"loss": 3.4199, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"eval_loss": 3.2701141834259033, |
|
"eval_runtime": 116.3032, |
|
"eval_samples_per_second": 859.667, |
|
"eval_steps_per_second": 53.73, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"eval_loss": 3.256370782852173, |
|
"eval_runtime": 116.1051, |
|
"eval_samples_per_second": 861.133, |
|
"eval_steps_per_second": 53.822, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 1.6676394563495374e-06, |
|
"loss": 3.4166, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"eval_loss": 3.2714390754699707, |
|
"eval_runtime": 117.0225, |
|
"eval_samples_per_second": 854.383, |
|
"eval_steps_per_second": 53.4, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"eval_loss": 3.2737603187561035, |
|
"eval_runtime": 115.9388, |
|
"eval_samples_per_second": 862.369, |
|
"eval_steps_per_second": 53.899, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1.6009338780955558e-06, |
|
"loss": 3.4054, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_loss": 3.2632555961608887, |
|
"eval_runtime": 116.0352, |
|
"eval_samples_per_second": 861.653, |
|
"eval_steps_per_second": 53.854, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_loss": 3.2573704719543457, |
|
"eval_runtime": 116.0533, |
|
"eval_samples_per_second": 861.518, |
|
"eval_steps_per_second": 53.846, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 1.5342282998415744e-06, |
|
"loss": 3.4022, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"eval_loss": 3.2636642456054688, |
|
"eval_runtime": 116.7961, |
|
"eval_samples_per_second": 856.039, |
|
"eval_steps_per_second": 53.503, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"eval_loss": 3.268772840499878, |
|
"eval_runtime": 116.7746, |
|
"eval_samples_per_second": 856.197, |
|
"eval_steps_per_second": 53.513, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 1.4675227215875928e-06, |
|
"loss": 3.408, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"eval_loss": 3.2667033672332764, |
|
"eval_runtime": 116.5476, |
|
"eval_samples_per_second": 857.864, |
|
"eval_steps_per_second": 53.618, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"eval_loss": 3.2577567100524902, |
|
"eval_runtime": 116.0215, |
|
"eval_samples_per_second": 861.754, |
|
"eval_steps_per_second": 53.861, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 1.4008171433336116e-06, |
|
"loss": 3.4065, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"eval_loss": 3.2604563236236572, |
|
"eval_runtime": 116.1892, |
|
"eval_samples_per_second": 860.51, |
|
"eval_steps_per_second": 53.783, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"eval_loss": 3.2768325805664062, |
|
"eval_runtime": 116.824, |
|
"eval_samples_per_second": 855.834, |
|
"eval_steps_per_second": 53.491, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 1.33411156507963e-06, |
|
"loss": 3.4105, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"eval_loss": 3.256869316101074, |
|
"eval_runtime": 116.5621, |
|
"eval_samples_per_second": 857.758, |
|
"eval_steps_per_second": 53.611, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"eval_loss": 3.2519402503967285, |
|
"eval_runtime": 117.0733, |
|
"eval_samples_per_second": 854.012, |
|
"eval_steps_per_second": 53.377, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 1.2674059868256484e-06, |
|
"loss": 3.4011, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"eval_loss": 3.2555432319641113, |
|
"eval_runtime": 116.8888, |
|
"eval_samples_per_second": 855.36, |
|
"eval_steps_per_second": 53.461, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"eval_loss": 3.248750686645508, |
|
"eval_runtime": 116.2807, |
|
"eval_samples_per_second": 859.833, |
|
"eval_steps_per_second": 53.741, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 1.2007004085716668e-06, |
|
"loss": 3.4078, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"eval_loss": 3.2515714168548584, |
|
"eval_runtime": 115.6718, |
|
"eval_samples_per_second": 864.359, |
|
"eval_steps_per_second": 54.024, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"eval_loss": 3.252725124359131, |
|
"eval_runtime": 116.4663, |
|
"eval_samples_per_second": 858.463, |
|
"eval_steps_per_second": 53.655, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.1339948303176854e-06, |
|
"loss": 3.4105, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": 3.256073236465454, |
|
"eval_runtime": 116.7382, |
|
"eval_samples_per_second": 856.464, |
|
"eval_steps_per_second": 53.53, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_loss": 3.2580018043518066, |
|
"eval_runtime": 117.5932, |
|
"eval_samples_per_second": 850.236, |
|
"eval_steps_per_second": 53.141, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 1.067289252063704e-06, |
|
"loss": 3.4054, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"eval_loss": 3.2453107833862305, |
|
"eval_runtime": 117.5514, |
|
"eval_samples_per_second": 850.538, |
|
"eval_steps_per_second": 53.16, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 3.2425551414489746, |
|
"eval_runtime": 116.4473, |
|
"eval_samples_per_second": 858.603, |
|
"eval_steps_per_second": 53.664, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 1.0005836738097224e-06, |
|
"loss": 3.3937, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"eval_loss": 3.251696825027466, |
|
"eval_runtime": 116.4315, |
|
"eval_samples_per_second": 858.719, |
|
"eval_steps_per_second": 53.671, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 3.2446274757385254, |
|
"eval_runtime": 116.261, |
|
"eval_samples_per_second": 859.979, |
|
"eval_steps_per_second": 53.75, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 9.33878095555741e-07, |
|
"loss": 3.4001, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"eval_loss": 3.2449288368225098, |
|
"eval_runtime": 116.9317, |
|
"eval_samples_per_second": 855.046, |
|
"eval_steps_per_second": 53.441, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"eval_loss": 3.252725601196289, |
|
"eval_runtime": 116.826, |
|
"eval_samples_per_second": 855.82, |
|
"eval_steps_per_second": 53.49, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 8.671725173017595e-07, |
|
"loss": 3.413, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"eval_loss": 3.2557225227355957, |
|
"eval_runtime": 117.66, |
|
"eval_samples_per_second": 849.754, |
|
"eval_steps_per_second": 53.111, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"eval_loss": 3.2483036518096924, |
|
"eval_runtime": 116.1271, |
|
"eval_samples_per_second": 860.97, |
|
"eval_steps_per_second": 53.812, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 8.004669390477779e-07, |
|
"loss": 3.3882, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"eval_loss": 3.25201416015625, |
|
"eval_runtime": 117.5264, |
|
"eval_samples_per_second": 850.72, |
|
"eval_steps_per_second": 53.171, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_loss": 3.2354042530059814, |
|
"eval_runtime": 117.3232, |
|
"eval_samples_per_second": 852.193, |
|
"eval_steps_per_second": 53.263, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 7.337613607937964e-07, |
|
"loss": 3.3974, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"eval_loss": 3.2540123462677, |
|
"eval_runtime": 116.4684, |
|
"eval_samples_per_second": 858.448, |
|
"eval_steps_per_second": 53.654, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"eval_loss": 3.242626190185547, |
|
"eval_runtime": 116.2833, |
|
"eval_samples_per_second": 859.814, |
|
"eval_steps_per_second": 53.739, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 6.67055782539815e-07, |
|
"loss": 3.3864, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_loss": 3.234111785888672, |
|
"eval_runtime": 117.5517, |
|
"eval_samples_per_second": 850.537, |
|
"eval_steps_per_second": 53.16, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"eval_loss": 3.240849018096924, |
|
"eval_runtime": 117.6624, |
|
"eval_samples_per_second": 849.736, |
|
"eval_steps_per_second": 53.11, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 6.003502042858334e-07, |
|
"loss": 3.3896, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 3.234224557876587, |
|
"eval_runtime": 117.5095, |
|
"eval_samples_per_second": 850.842, |
|
"eval_steps_per_second": 53.179, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"eval_loss": 3.2414724826812744, |
|
"eval_runtime": 117.6058, |
|
"eval_samples_per_second": 850.145, |
|
"eval_steps_per_second": 53.135, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"learning_rate": 5.33644626031852e-07, |
|
"loss": 3.3845, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"eval_loss": 3.2445499897003174, |
|
"eval_runtime": 117.4897, |
|
"eval_samples_per_second": 850.985, |
|
"eval_steps_per_second": 53.188, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 3.2422473430633545, |
|
"eval_runtime": 117.2174, |
|
"eval_samples_per_second": 852.962, |
|
"eval_steps_per_second": 53.311, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 4.669390477778705e-07, |
|
"loss": 3.3916, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"eval_loss": 3.2379391193389893, |
|
"eval_runtime": 116.6572, |
|
"eval_samples_per_second": 857.058, |
|
"eval_steps_per_second": 53.567, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"eval_loss": 3.241091012954712, |
|
"eval_runtime": 117.6824, |
|
"eval_samples_per_second": 849.592, |
|
"eval_steps_per_second": 53.101, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 4.0023346952388894e-07, |
|
"loss": 3.3919, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"eval_loss": 3.24294114112854, |
|
"eval_runtime": 116.557, |
|
"eval_samples_per_second": 857.795, |
|
"eval_steps_per_second": 53.613, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"eval_loss": 3.2372183799743652, |
|
"eval_runtime": 117.1036, |
|
"eval_samples_per_second": 853.791, |
|
"eval_steps_per_second": 53.363, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 3.335278912699075e-07, |
|
"loss": 3.39, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"eval_loss": 3.2379844188690186, |
|
"eval_runtime": 118.6177, |
|
"eval_samples_per_second": 842.892, |
|
"eval_steps_per_second": 52.682, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"eval_loss": 3.2353270053863525, |
|
"eval_runtime": 117.8042, |
|
"eval_samples_per_second": 848.713, |
|
"eval_steps_per_second": 53.046, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"learning_rate": 2.66822313015926e-07, |
|
"loss": 3.3905, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"eval_loss": 3.2327044010162354, |
|
"eval_runtime": 117.5491, |
|
"eval_samples_per_second": 850.555, |
|
"eval_steps_per_second": 53.161, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"eval_loss": 3.2494277954101562, |
|
"eval_runtime": 117.2833, |
|
"eval_samples_per_second": 852.483, |
|
"eval_steps_per_second": 53.281, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 2.0011673476194447e-07, |
|
"loss": 3.3826, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 3.2369370460510254, |
|
"eval_runtime": 117.3893, |
|
"eval_samples_per_second": 851.713, |
|
"eval_steps_per_second": 53.233, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"eval_loss": 3.2389721870422363, |
|
"eval_runtime": 117.5802, |
|
"eval_samples_per_second": 850.33, |
|
"eval_steps_per_second": 53.147, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 1.33411156507963e-07, |
|
"loss": 3.3935, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"eval_loss": 3.241490125656128, |
|
"eval_runtime": 116.6908, |
|
"eval_samples_per_second": 856.811, |
|
"eval_steps_per_second": 53.552, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"eval_loss": 3.2485716342926025, |
|
"eval_runtime": 117.193, |
|
"eval_samples_per_second": 853.14, |
|
"eval_steps_per_second": 53.322, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"learning_rate": 6.67055782539815e-08, |
|
"loss": 3.3846, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"eval_loss": 3.2353618144989014, |
|
"eval_runtime": 116.9874, |
|
"eval_samples_per_second": 854.639, |
|
"eval_steps_per_second": 53.416, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"eval_loss": 3.2465925216674805, |
|
"eval_runtime": 117.8085, |
|
"eval_samples_per_second": 848.683, |
|
"eval_steps_per_second": 53.044, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 0.0, |
|
"loss": 3.3875, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"eval_loss": 3.2425458431243896, |
|
"eval_runtime": 117.6341, |
|
"eval_samples_per_second": 849.941, |
|
"eval_steps_per_second": 53.122, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"step": 2400000, |
|
"total_flos": 7.857549112634404e+17, |
|
"train_loss": 3.3721608984375, |
|
"train_runtime": 188638.1929, |
|
"train_samples_per_second": 203.564, |
|
"train_steps_per_second": 12.723 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 21, |
|
"save_steps": 32000, |
|
"total_flos": 7.857549112634404e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|