|
{ |
|
"best_metric": 2.249734878540039, |
|
"best_model_checkpoint": "./model_tweets_2020_Q4_75/checkpoint-1824000", |
|
"epoch": 20.210356123316856, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.578623056411743, |
|
"eval_runtime": 123.0205, |
|
"eval_samples_per_second": 812.873, |
|
"eval_steps_per_second": 50.805, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.8197, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.4787847995758057, |
|
"eval_runtime": 121.9308, |
|
"eval_samples_per_second": 820.137, |
|
"eval_steps_per_second": 51.259, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.434333086013794, |
|
"eval_runtime": 121.4115, |
|
"eval_samples_per_second": 823.645, |
|
"eval_steps_per_second": 51.478, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.5564, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.4142847061157227, |
|
"eval_runtime": 121.0734, |
|
"eval_samples_per_second": 825.945, |
|
"eval_steps_per_second": 51.622, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.3825528621673584, |
|
"eval_runtime": 121.0153, |
|
"eval_samples_per_second": 826.341, |
|
"eval_steps_per_second": 51.646, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.4967, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.3654873371124268, |
|
"eval_runtime": 120.7184, |
|
"eval_samples_per_second": 828.374, |
|
"eval_steps_per_second": 51.773, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.3449532985687256, |
|
"eval_runtime": 121.1097, |
|
"eval_samples_per_second": 825.698, |
|
"eval_steps_per_second": 51.606, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.476, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.3501105308532715, |
|
"eval_runtime": 121.0094, |
|
"eval_samples_per_second": 826.382, |
|
"eval_steps_per_second": 51.649, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.331531524658203, |
|
"eval_runtime": 121.5665, |
|
"eval_samples_per_second": 822.595, |
|
"eval_steps_per_second": 51.412, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.4525, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.3285999298095703, |
|
"eval_runtime": 122.0584, |
|
"eval_samples_per_second": 819.28, |
|
"eval_steps_per_second": 51.205, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.327819585800171, |
|
"eval_runtime": 121.7307, |
|
"eval_samples_per_second": 821.486, |
|
"eval_steps_per_second": 51.343, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.445, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.3187131881713867, |
|
"eval_runtime": 121.9578, |
|
"eval_samples_per_second": 819.956, |
|
"eval_steps_per_second": 51.247, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 2.3113441467285156, |
|
"eval_runtime": 121.6182, |
|
"eval_samples_per_second": 822.245, |
|
"eval_steps_per_second": 51.39, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.438, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.3128702640533447, |
|
"eval_runtime": 120.7683, |
|
"eval_samples_per_second": 828.032, |
|
"eval_steps_per_second": 51.752, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.3160533905029297, |
|
"eval_runtime": 120.7418, |
|
"eval_samples_per_second": 828.214, |
|
"eval_steps_per_second": 51.763, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.4233, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.3008599281311035, |
|
"eval_runtime": 121.6785, |
|
"eval_samples_per_second": 821.838, |
|
"eval_steps_per_second": 51.365, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.3072261810302734, |
|
"eval_runtime": 121.7737, |
|
"eval_samples_per_second": 821.196, |
|
"eval_steps_per_second": 51.325, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.4182, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.3069441318511963, |
|
"eval_runtime": 121.4616, |
|
"eval_samples_per_second": 823.306, |
|
"eval_steps_per_second": 51.457, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.3059937953948975, |
|
"eval_runtime": 121.1919, |
|
"eval_samples_per_second": 825.137, |
|
"eval_steps_per_second": 51.571, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.418, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.296349048614502, |
|
"eval_runtime": 120.8775, |
|
"eval_samples_per_second": 827.284, |
|
"eval_steps_per_second": 51.705, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.3016867637634277, |
|
"eval_runtime": 122.1083, |
|
"eval_samples_per_second": 818.945, |
|
"eval_steps_per_second": 51.184, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.4106, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.2863121032714844, |
|
"eval_runtime": 121.754, |
|
"eval_samples_per_second": 821.328, |
|
"eval_steps_per_second": 51.333, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.2870755195617676, |
|
"eval_runtime": 121.9197, |
|
"eval_samples_per_second": 820.212, |
|
"eval_steps_per_second": 51.263, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.4093, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.2869644165039062, |
|
"eval_runtime": 123.0294, |
|
"eval_samples_per_second": 812.814, |
|
"eval_steps_per_second": 50.801, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.2844796180725098, |
|
"eval_runtime": 122.1632, |
|
"eval_samples_per_second": 818.577, |
|
"eval_steps_per_second": 51.161, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.4124, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.2971222400665283, |
|
"eval_runtime": 122.9029, |
|
"eval_samples_per_second": 813.651, |
|
"eval_steps_per_second": 50.853, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.283277988433838, |
|
"eval_runtime": 122.8004, |
|
"eval_samples_per_second": 814.33, |
|
"eval_steps_per_second": 50.896, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.4031, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.286567449569702, |
|
"eval_runtime": 123.6365, |
|
"eval_samples_per_second": 808.822, |
|
"eval_steps_per_second": 50.551, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.2832698822021484, |
|
"eval_runtime": 124.0411, |
|
"eval_samples_per_second": 806.184, |
|
"eval_steps_per_second": 50.387, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.4056, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.2877089977264404, |
|
"eval_runtime": 123.3004, |
|
"eval_samples_per_second": 811.028, |
|
"eval_steps_per_second": 50.689, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.278871774673462, |
|
"eval_runtime": 121.4394, |
|
"eval_samples_per_second": 823.456, |
|
"eval_steps_per_second": 51.466, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.4035, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.287219762802124, |
|
"eval_runtime": 122.192, |
|
"eval_samples_per_second": 818.384, |
|
"eval_steps_per_second": 51.149, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.277144193649292, |
|
"eval_runtime": 122.8125, |
|
"eval_samples_per_second": 814.25, |
|
"eval_steps_per_second": 50.891, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.4068, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.2823851108551025, |
|
"eval_runtime": 122.2322, |
|
"eval_samples_per_second": 818.115, |
|
"eval_steps_per_second": 51.132, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.268087148666382, |
|
"eval_runtime": 122.1078, |
|
"eval_samples_per_second": 818.949, |
|
"eval_steps_per_second": 51.184, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.4069, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.2866132259368896, |
|
"eval_runtime": 122.7666, |
|
"eval_samples_per_second": 814.554, |
|
"eval_steps_per_second": 50.91, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.2837841510772705, |
|
"eval_runtime": 122.6627, |
|
"eval_samples_per_second": 815.244, |
|
"eval_steps_per_second": 50.953, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.4059, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.280437707901001, |
|
"eval_runtime": 121.9348, |
|
"eval_samples_per_second": 820.111, |
|
"eval_steps_per_second": 51.257, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 2.275745391845703, |
|
"eval_runtime": 121.6125, |
|
"eval_samples_per_second": 822.284, |
|
"eval_steps_per_second": 51.393, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.3997, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.2774524688720703, |
|
"eval_runtime": 121.7427, |
|
"eval_samples_per_second": 821.404, |
|
"eval_steps_per_second": 51.338, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.2692716121673584, |
|
"eval_runtime": 121.9867, |
|
"eval_samples_per_second": 819.762, |
|
"eval_steps_per_second": 51.235, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.4025, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.2751219272613525, |
|
"eval_runtime": 123.4237, |
|
"eval_samples_per_second": 810.217, |
|
"eval_steps_per_second": 50.639, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.2686424255371094, |
|
"eval_runtime": 122.8579, |
|
"eval_samples_per_second": 813.948, |
|
"eval_steps_per_second": 50.872, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.399, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 2.2784106731414795, |
|
"eval_runtime": 123.6612, |
|
"eval_samples_per_second": 808.661, |
|
"eval_steps_per_second": 50.541, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.278184175491333, |
|
"eval_runtime": 122.1432, |
|
"eval_samples_per_second": 818.711, |
|
"eval_steps_per_second": 51.169, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.3953, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 2.2693536281585693, |
|
"eval_runtime": 123.798, |
|
"eval_samples_per_second": 807.767, |
|
"eval_steps_per_second": 50.485, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.263842821121216, |
|
"eval_runtime": 123.5987, |
|
"eval_samples_per_second": 809.07, |
|
"eval_steps_per_second": 50.567, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.4002, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.2785000801086426, |
|
"eval_runtime": 123.8322, |
|
"eval_samples_per_second": 807.544, |
|
"eval_steps_per_second": 50.472, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 2.278545618057251, |
|
"eval_runtime": 123.0521, |
|
"eval_samples_per_second": 812.664, |
|
"eval_steps_per_second": 50.791, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.4035, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.277418375015259, |
|
"eval_runtime": 121.6188, |
|
"eval_samples_per_second": 822.241, |
|
"eval_steps_per_second": 51.39, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 2.273623466491699, |
|
"eval_runtime": 123.5249, |
|
"eval_samples_per_second": 809.554, |
|
"eval_steps_per_second": 50.597, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.3985, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.28084397315979, |
|
"eval_runtime": 124.25, |
|
"eval_samples_per_second": 804.829, |
|
"eval_steps_per_second": 50.302, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.2672338485717773, |
|
"eval_runtime": 122.2338, |
|
"eval_samples_per_second": 818.104, |
|
"eval_steps_per_second": 51.132, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.3996, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.2764840126037598, |
|
"eval_runtime": 121.8322, |
|
"eval_samples_per_second": 820.801, |
|
"eval_steps_per_second": 51.3, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.2748093605041504, |
|
"eval_runtime": 123.7721, |
|
"eval_samples_per_second": 807.937, |
|
"eval_steps_per_second": 50.496, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.4052, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 2.2646682262420654, |
|
"eval_runtime": 124.0373, |
|
"eval_samples_per_second": 806.209, |
|
"eval_steps_per_second": 50.388, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.2776055335998535, |
|
"eval_runtime": 122.9793, |
|
"eval_samples_per_second": 813.145, |
|
"eval_steps_per_second": 50.822, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.4025, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.2734146118164062, |
|
"eval_runtime": 122.7865, |
|
"eval_samples_per_second": 814.422, |
|
"eval_steps_per_second": 50.901, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 2.258847713470459, |
|
"eval_runtime": 122.3563, |
|
"eval_samples_per_second": 817.285, |
|
"eval_steps_per_second": 51.08, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.4082, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.272439479827881, |
|
"eval_runtime": 121.6654, |
|
"eval_samples_per_second": 821.927, |
|
"eval_steps_per_second": 51.37, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.2740166187286377, |
|
"eval_runtime": 121.9787, |
|
"eval_samples_per_second": 819.815, |
|
"eval_steps_per_second": 51.238, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.3993, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 2.272566556930542, |
|
"eval_runtime": 122.2763, |
|
"eval_samples_per_second": 817.82, |
|
"eval_steps_per_second": 51.114, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.282744884490967, |
|
"eval_runtime": 122.9895, |
|
"eval_samples_per_second": 813.078, |
|
"eval_steps_per_second": 50.817, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.4029, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.2727839946746826, |
|
"eval_runtime": 123.1921, |
|
"eval_samples_per_second": 811.74, |
|
"eval_steps_per_second": 50.734, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.2833046913146973, |
|
"eval_runtime": 122.6812, |
|
"eval_samples_per_second": 815.121, |
|
"eval_steps_per_second": 50.945, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.407, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 2.2636303901672363, |
|
"eval_runtime": 121.8819, |
|
"eval_samples_per_second": 820.467, |
|
"eval_steps_per_second": 51.279, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 2.2689170837402344, |
|
"eval_runtime": 121.6415, |
|
"eval_samples_per_second": 822.088, |
|
"eval_steps_per_second": 51.38, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.4039, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.2741005420684814, |
|
"eval_runtime": 122.571, |
|
"eval_samples_per_second": 815.854, |
|
"eval_steps_per_second": 50.991, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.271497964859009, |
|
"eval_runtime": 121.9112, |
|
"eval_samples_per_second": 820.269, |
|
"eval_steps_per_second": 51.267, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.3983, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.280510187149048, |
|
"eval_runtime": 122.4175, |
|
"eval_samples_per_second": 816.877, |
|
"eval_steps_per_second": 51.055, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 2.274358034133911, |
|
"eval_runtime": 123.0247, |
|
"eval_samples_per_second": 812.845, |
|
"eval_steps_per_second": 50.803, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.3974, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.2677698135375977, |
|
"eval_runtime": 122.4679, |
|
"eval_samples_per_second": 816.54, |
|
"eval_steps_per_second": 51.034, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.27225399017334, |
|
"eval_runtime": 121.7124, |
|
"eval_samples_per_second": 821.609, |
|
"eval_steps_per_second": 51.351, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.388, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 2.2655186653137207, |
|
"eval_runtime": 123.2663, |
|
"eval_samples_per_second": 811.252, |
|
"eval_steps_per_second": 50.703, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.2716100215911865, |
|
"eval_runtime": 122.2663, |
|
"eval_samples_per_second": 817.887, |
|
"eval_steps_per_second": 51.118, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.3921, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 2.27711820602417, |
|
"eval_runtime": 123.6023, |
|
"eval_samples_per_second": 809.046, |
|
"eval_steps_per_second": 50.565, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 2.276036262512207, |
|
"eval_runtime": 122.8165, |
|
"eval_samples_per_second": 814.223, |
|
"eval_steps_per_second": 50.889, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.3963, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 2.2806384563446045, |
|
"eval_runtime": 124.0725, |
|
"eval_samples_per_second": 805.98, |
|
"eval_steps_per_second": 50.374, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 2.2697396278381348, |
|
"eval_runtime": 123.9418, |
|
"eval_samples_per_second": 806.831, |
|
"eval_steps_per_second": 50.427, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.3891, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 2.270462989807129, |
|
"eval_runtime": 123.8524, |
|
"eval_samples_per_second": 807.412, |
|
"eval_steps_per_second": 50.463, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.270759105682373, |
|
"eval_runtime": 125.0675, |
|
"eval_samples_per_second": 799.568, |
|
"eval_steps_per_second": 49.973, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.3968, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 2.2689437866210938, |
|
"eval_runtime": 123.4597, |
|
"eval_samples_per_second": 809.981, |
|
"eval_steps_per_second": 50.624, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 2.265118360519409, |
|
"eval_runtime": 123.1184, |
|
"eval_samples_per_second": 812.226, |
|
"eval_steps_per_second": 50.764, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.3951, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.276594638824463, |
|
"eval_runtime": 122.2204, |
|
"eval_samples_per_second": 818.194, |
|
"eval_steps_per_second": 51.137, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.2717082500457764, |
|
"eval_runtime": 121.454, |
|
"eval_samples_per_second": 823.357, |
|
"eval_steps_per_second": 51.46, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.3986, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 2.262892246246338, |
|
"eval_runtime": 121.1279, |
|
"eval_samples_per_second": 825.574, |
|
"eval_steps_per_second": 51.598, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 2.2623705863952637, |
|
"eval_runtime": 122.6601, |
|
"eval_samples_per_second": 815.261, |
|
"eval_steps_per_second": 50.954, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.3985, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 2.2692642211914062, |
|
"eval_runtime": 122.4517, |
|
"eval_samples_per_second": 816.648, |
|
"eval_steps_per_second": 51.041, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.2631709575653076, |
|
"eval_runtime": 122.6658, |
|
"eval_samples_per_second": 815.223, |
|
"eval_steps_per_second": 50.951, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.4009, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.271480083465576, |
|
"eval_runtime": 123.6589, |
|
"eval_samples_per_second": 808.676, |
|
"eval_steps_per_second": 50.542, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.2654306888580322, |
|
"eval_runtime": 122.4949, |
|
"eval_samples_per_second": 816.36, |
|
"eval_steps_per_second": 51.023, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.4015, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.2700347900390625, |
|
"eval_runtime": 123.7479, |
|
"eval_samples_per_second": 808.095, |
|
"eval_steps_per_second": 50.506, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.2672719955444336, |
|
"eval_runtime": 122.6192, |
|
"eval_samples_per_second": 815.533, |
|
"eval_steps_per_second": 50.971, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.3927, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.2701234817504883, |
|
"eval_runtime": 122.5955, |
|
"eval_samples_per_second": 815.691, |
|
"eval_steps_per_second": 50.981, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.266552448272705, |
|
"eval_runtime": 122.7923, |
|
"eval_samples_per_second": 814.383, |
|
"eval_steps_per_second": 50.899, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.3941, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 2.2584707736968994, |
|
"eval_runtime": 122.6812, |
|
"eval_samples_per_second": 815.121, |
|
"eval_steps_per_second": 50.945, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 2.2679240703582764, |
|
"eval_runtime": 123.1769, |
|
"eval_samples_per_second": 811.84, |
|
"eval_steps_per_second": 50.74, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.393, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 2.2623932361602783, |
|
"eval_runtime": 122.5019, |
|
"eval_samples_per_second": 816.314, |
|
"eval_steps_per_second": 51.02, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 2.2706010341644287, |
|
"eval_runtime": 121.9417, |
|
"eval_samples_per_second": 820.064, |
|
"eval_steps_per_second": 51.254, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.4025, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.278485059738159, |
|
"eval_runtime": 121.1371, |
|
"eval_samples_per_second": 825.511, |
|
"eval_steps_per_second": 51.594, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 2.265829086303711, |
|
"eval_runtime": 122.5144, |
|
"eval_samples_per_second": 816.231, |
|
"eval_steps_per_second": 51.014, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.3992, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 2.2557029724121094, |
|
"eval_runtime": 122.5492, |
|
"eval_samples_per_second": 815.999, |
|
"eval_steps_per_second": 51.0, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 2.2580955028533936, |
|
"eval_runtime": 122.7074, |
|
"eval_samples_per_second": 814.947, |
|
"eval_steps_per_second": 50.934, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.4055, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_loss": 2.2724661827087402, |
|
"eval_runtime": 123.1095, |
|
"eval_samples_per_second": 812.285, |
|
"eval_steps_per_second": 50.768, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 2.2608320713043213, |
|
"eval_runtime": 123.2985, |
|
"eval_samples_per_second": 811.04, |
|
"eval_steps_per_second": 50.69, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.3965, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 2.2716965675354004, |
|
"eval_runtime": 123.0254, |
|
"eval_samples_per_second": 812.84, |
|
"eval_steps_per_second": 50.803, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 2.2643144130706787, |
|
"eval_runtime": 124.1312, |
|
"eval_samples_per_second": 805.599, |
|
"eval_steps_per_second": 50.35, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.4028, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 2.269681930541992, |
|
"eval_runtime": 124.1538, |
|
"eval_samples_per_second": 805.453, |
|
"eval_steps_per_second": 50.341, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 2.269113063812256, |
|
"eval_runtime": 122.6058, |
|
"eval_samples_per_second": 815.622, |
|
"eval_steps_per_second": 50.976, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.3943, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 2.2628066539764404, |
|
"eval_runtime": 123.121, |
|
"eval_samples_per_second": 812.209, |
|
"eval_steps_per_second": 50.763, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 2.262988567352295, |
|
"eval_runtime": 122.3602, |
|
"eval_samples_per_second": 817.259, |
|
"eval_steps_per_second": 51.079, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.3918, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 2.269148349761963, |
|
"eval_runtime": 121.9179, |
|
"eval_samples_per_second": 820.224, |
|
"eval_steps_per_second": 51.264, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 2.2778499126434326, |
|
"eval_runtime": 121.8423, |
|
"eval_samples_per_second": 820.733, |
|
"eval_steps_per_second": 51.296, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.3897, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 2.2576756477355957, |
|
"eval_runtime": 122.7735, |
|
"eval_samples_per_second": 814.508, |
|
"eval_steps_per_second": 50.907, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 2.2689757347106934, |
|
"eval_runtime": 122.5565, |
|
"eval_samples_per_second": 815.95, |
|
"eval_steps_per_second": 50.997, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.3996, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 2.2631115913391113, |
|
"eval_runtime": 122.8254, |
|
"eval_samples_per_second": 814.164, |
|
"eval_steps_per_second": 50.885, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 2.260575294494629, |
|
"eval_runtime": 123.4359, |
|
"eval_samples_per_second": 810.137, |
|
"eval_steps_per_second": 50.634, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.4016, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 2.274198293685913, |
|
"eval_runtime": 122.9226, |
|
"eval_samples_per_second": 813.52, |
|
"eval_steps_per_second": 50.845, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 2.270534038543701, |
|
"eval_runtime": 122.6348, |
|
"eval_samples_per_second": 815.429, |
|
"eval_steps_per_second": 50.964, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.3989, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 2.26939058303833, |
|
"eval_runtime": 122.9893, |
|
"eval_samples_per_second": 813.079, |
|
"eval_steps_per_second": 50.817, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 2.26764178276062, |
|
"eval_runtime": 123.268, |
|
"eval_samples_per_second": 811.241, |
|
"eval_steps_per_second": 50.703, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.3989, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 2.265949010848999, |
|
"eval_runtime": 122.977, |
|
"eval_samples_per_second": 813.16, |
|
"eval_steps_per_second": 50.823, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 2.2675843238830566, |
|
"eval_runtime": 123.7208, |
|
"eval_samples_per_second": 808.272, |
|
"eval_steps_per_second": 50.517, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.3995, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 2.2751786708831787, |
|
"eval_runtime": 124.1169, |
|
"eval_samples_per_second": 805.692, |
|
"eval_steps_per_second": 50.356, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 2.276003360748291, |
|
"eval_runtime": 122.615, |
|
"eval_samples_per_second": 815.561, |
|
"eval_steps_per_second": 50.973, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.3958, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 2.2779273986816406, |
|
"eval_runtime": 124.3332, |
|
"eval_samples_per_second": 804.29, |
|
"eval_steps_per_second": 50.268, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 2.2626090049743652, |
|
"eval_runtime": 124.2615, |
|
"eval_samples_per_second": 804.754, |
|
"eval_steps_per_second": 50.297, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.3962, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 2.264645576477051, |
|
"eval_runtime": 125.4442, |
|
"eval_samples_per_second": 797.167, |
|
"eval_steps_per_second": 49.823, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 2.264505386352539, |
|
"eval_runtime": 123.3616, |
|
"eval_samples_per_second": 810.625, |
|
"eval_steps_per_second": 50.664, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.3966, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 2.2602715492248535, |
|
"eval_runtime": 125.0544, |
|
"eval_samples_per_second": 799.652, |
|
"eval_steps_per_second": 49.978, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"eval_loss": 2.254918098449707, |
|
"eval_runtime": 125.3507, |
|
"eval_samples_per_second": 797.762, |
|
"eval_steps_per_second": 49.86, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.3934, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 2.2668979167938232, |
|
"eval_runtime": 124.2577, |
|
"eval_samples_per_second": 804.779, |
|
"eval_steps_per_second": 50.299, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 2.2576217651367188, |
|
"eval_runtime": 124.3543, |
|
"eval_samples_per_second": 804.154, |
|
"eval_steps_per_second": 50.26, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.3918, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 2.270716905593872, |
|
"eval_runtime": 124.4458, |
|
"eval_samples_per_second": 803.563, |
|
"eval_steps_per_second": 50.223, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 2.261819362640381, |
|
"eval_runtime": 123.2038, |
|
"eval_samples_per_second": 811.663, |
|
"eval_steps_per_second": 50.729, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.401, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 2.2680015563964844, |
|
"eval_runtime": 122.7384, |
|
"eval_samples_per_second": 814.741, |
|
"eval_steps_per_second": 50.921, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 2.2721402645111084, |
|
"eval_runtime": 122.6217, |
|
"eval_samples_per_second": 815.516, |
|
"eval_steps_per_second": 50.97, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.3938, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 2.2636525630950928, |
|
"eval_runtime": 123.3896, |
|
"eval_samples_per_second": 810.441, |
|
"eval_steps_per_second": 50.653, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"eval_loss": 2.265684127807617, |
|
"eval_runtime": 124.4365, |
|
"eval_samples_per_second": 803.622, |
|
"eval_steps_per_second": 50.226, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.3982, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 2.2576382160186768, |
|
"eval_runtime": 123.4041, |
|
"eval_samples_per_second": 810.346, |
|
"eval_steps_per_second": 50.647, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 2.2633111476898193, |
|
"eval_runtime": 124.0094, |
|
"eval_samples_per_second": 806.39, |
|
"eval_steps_per_second": 50.399, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.4006, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 2.2667734622955322, |
|
"eval_runtime": 124.2747, |
|
"eval_samples_per_second": 804.669, |
|
"eval_steps_per_second": 50.292, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 2.265963077545166, |
|
"eval_runtime": 123.6085, |
|
"eval_samples_per_second": 809.006, |
|
"eval_steps_per_second": 50.563, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.3971, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 2.265885829925537, |
|
"eval_runtime": 122.706, |
|
"eval_samples_per_second": 814.956, |
|
"eval_steps_per_second": 50.935, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 2.272273063659668, |
|
"eval_runtime": 122.3266, |
|
"eval_samples_per_second": 817.483, |
|
"eval_steps_per_second": 51.093, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.4004, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_loss": 2.2626895904541016, |
|
"eval_runtime": 122.5255, |
|
"eval_samples_per_second": 816.157, |
|
"eval_steps_per_second": 51.01, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 2.2708263397216797, |
|
"eval_runtime": 123.4094, |
|
"eval_samples_per_second": 810.311, |
|
"eval_steps_per_second": 50.644, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.3903, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 2.257643222808838, |
|
"eval_runtime": 124.6842, |
|
"eval_samples_per_second": 802.026, |
|
"eval_steps_per_second": 50.127, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 2.2625293731689453, |
|
"eval_runtime": 123.3124, |
|
"eval_samples_per_second": 810.949, |
|
"eval_steps_per_second": 50.684, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.3909, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 2.254274606704712, |
|
"eval_runtime": 123.3737, |
|
"eval_samples_per_second": 810.545, |
|
"eval_steps_per_second": 50.659, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"eval_loss": 2.259488582611084, |
|
"eval_runtime": 123.697, |
|
"eval_samples_per_second": 808.427, |
|
"eval_steps_per_second": 50.527, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.4004, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"eval_loss": 2.2560548782348633, |
|
"eval_runtime": 124.7416, |
|
"eval_samples_per_second": 801.657, |
|
"eval_steps_per_second": 50.104, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"eval_loss": 2.26065993309021, |
|
"eval_runtime": 124.3446, |
|
"eval_samples_per_second": 804.217, |
|
"eval_steps_per_second": 50.264, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.3964, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_loss": 2.2605860233306885, |
|
"eval_runtime": 124.6826, |
|
"eval_samples_per_second": 802.037, |
|
"eval_steps_per_second": 50.127, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"eval_loss": 2.2635273933410645, |
|
"eval_runtime": 124.8015, |
|
"eval_samples_per_second": 801.272, |
|
"eval_steps_per_second": 50.08, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.4007, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"eval_loss": 2.262328863143921, |
|
"eval_runtime": 124.2584, |
|
"eval_samples_per_second": 804.775, |
|
"eval_steps_per_second": 50.298, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"eval_loss": 2.2696375846862793, |
|
"eval_runtime": 124.2659, |
|
"eval_samples_per_second": 804.726, |
|
"eval_steps_per_second": 50.295, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.3993, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"eval_loss": 2.2700283527374268, |
|
"eval_runtime": 125.1405, |
|
"eval_samples_per_second": 799.102, |
|
"eval_steps_per_second": 49.944, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"eval_loss": 2.2731199264526367, |
|
"eval_runtime": 124.158, |
|
"eval_samples_per_second": 805.425, |
|
"eval_steps_per_second": 50.339, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.4048, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"eval_loss": 2.2701127529144287, |
|
"eval_runtime": 123.8055, |
|
"eval_samples_per_second": 807.719, |
|
"eval_steps_per_second": 50.482, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_loss": 2.270094871520996, |
|
"eval_runtime": 124.7299, |
|
"eval_samples_per_second": 801.732, |
|
"eval_steps_per_second": 50.108, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.3936, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"eval_loss": 2.2705767154693604, |
|
"eval_runtime": 124.6548, |
|
"eval_samples_per_second": 802.216, |
|
"eval_steps_per_second": 50.138, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 2.259617567062378, |
|
"eval_runtime": 124.0017, |
|
"eval_samples_per_second": 806.441, |
|
"eval_steps_per_second": 50.403, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.3951, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"eval_loss": 2.2812488079071045, |
|
"eval_runtime": 123.9447, |
|
"eval_samples_per_second": 806.811, |
|
"eval_steps_per_second": 50.426, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"eval_loss": 2.25225830078125, |
|
"eval_runtime": 122.9659, |
|
"eval_samples_per_second": 813.234, |
|
"eval_steps_per_second": 50.827, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.39, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"eval_loss": 2.259617805480957, |
|
"eval_runtime": 122.4932, |
|
"eval_samples_per_second": 816.372, |
|
"eval_steps_per_second": 51.023, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"eval_loss": 2.272284507751465, |
|
"eval_runtime": 123.4502, |
|
"eval_samples_per_second": 810.043, |
|
"eval_steps_per_second": 50.628, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.393, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 2.2695858478546143, |
|
"eval_runtime": 124.1104, |
|
"eval_samples_per_second": 805.734, |
|
"eval_steps_per_second": 50.358, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"eval_loss": 2.261369466781616, |
|
"eval_runtime": 123.2613, |
|
"eval_samples_per_second": 811.285, |
|
"eval_steps_per_second": 50.705, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.3915, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"eval_loss": 2.268724203109741, |
|
"eval_runtime": 122.9565, |
|
"eval_samples_per_second": 813.296, |
|
"eval_steps_per_second": 50.831, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_loss": 2.2566559314727783, |
|
"eval_runtime": 123.2157, |
|
"eval_samples_per_second": 811.585, |
|
"eval_steps_per_second": 50.724, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.405, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"eval_loss": 2.2717325687408447, |
|
"eval_runtime": 122.7605, |
|
"eval_samples_per_second": 814.594, |
|
"eval_steps_per_second": 50.912, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"eval_loss": 2.2732982635498047, |
|
"eval_runtime": 123.3107, |
|
"eval_samples_per_second": 810.959, |
|
"eval_steps_per_second": 50.685, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.3898, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"eval_loss": 2.2679965496063232, |
|
"eval_runtime": 122.9066, |
|
"eval_samples_per_second": 813.626, |
|
"eval_steps_per_second": 50.852, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_loss": 2.2626819610595703, |
|
"eval_runtime": 123.3147, |
|
"eval_samples_per_second": 810.933, |
|
"eval_steps_per_second": 50.683, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.3956, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"eval_loss": 2.268873929977417, |
|
"eval_runtime": 125.7341, |
|
"eval_samples_per_second": 795.329, |
|
"eval_steps_per_second": 49.708, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"eval_loss": 2.2668938636779785, |
|
"eval_runtime": 125.1617, |
|
"eval_samples_per_second": 798.966, |
|
"eval_steps_per_second": 49.935, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.4041, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_loss": 2.2610137462615967, |
|
"eval_runtime": 125.5606, |
|
"eval_samples_per_second": 796.428, |
|
"eval_steps_per_second": 49.777, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"eval_loss": 2.2688722610473633, |
|
"eval_runtime": 126.602, |
|
"eval_samples_per_second": 789.877, |
|
"eval_steps_per_second": 49.367, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.3968, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 2.274923086166382, |
|
"eval_runtime": 126.5506, |
|
"eval_samples_per_second": 790.198, |
|
"eval_steps_per_second": 49.387, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"eval_loss": 2.264037609100342, |
|
"eval_runtime": 125.1647, |
|
"eval_samples_per_second": 798.948, |
|
"eval_steps_per_second": 49.934, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.4048, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 2.260209798812866, |
|
"eval_runtime": 124.3269, |
|
"eval_samples_per_second": 804.331, |
|
"eval_steps_per_second": 50.271, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": 2.269806146621704, |
|
"eval_runtime": 123.3478, |
|
"eval_samples_per_second": 810.715, |
|
"eval_steps_per_second": 50.67, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.4025, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"eval_loss": 2.2544891834259033, |
|
"eval_runtime": 125.1006, |
|
"eval_samples_per_second": 799.357, |
|
"eval_steps_per_second": 49.96, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 2.2685253620147705, |
|
"eval_runtime": 123.5222, |
|
"eval_samples_per_second": 809.571, |
|
"eval_steps_per_second": 50.598, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.3977, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"eval_loss": 2.262343406677246, |
|
"eval_runtime": 123.0303, |
|
"eval_samples_per_second": 812.808, |
|
"eval_steps_per_second": 50.8, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_loss": 2.2679247856140137, |
|
"eval_runtime": 123.7046, |
|
"eval_samples_per_second": 808.378, |
|
"eval_steps_per_second": 50.524, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.3965, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"eval_loss": 2.250502109527588, |
|
"eval_runtime": 124.441, |
|
"eval_samples_per_second": 803.594, |
|
"eval_steps_per_second": 50.225, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"eval_loss": 2.270815134048462, |
|
"eval_runtime": 124.209, |
|
"eval_samples_per_second": 805.095, |
|
"eval_steps_per_second": 50.318, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.3945, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 2.2654542922973633, |
|
"eval_runtime": 124.6421, |
|
"eval_samples_per_second": 802.297, |
|
"eval_steps_per_second": 50.144, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"eval_loss": 2.267200231552124, |
|
"eval_runtime": 125.0498, |
|
"eval_samples_per_second": 799.681, |
|
"eval_steps_per_second": 49.98, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.3957, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"eval_loss": 2.269829273223877, |
|
"eval_runtime": 124.6935, |
|
"eval_samples_per_second": 801.966, |
|
"eval_steps_per_second": 50.123, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.2660622596740723, |
|
"eval_runtime": 125.1539, |
|
"eval_samples_per_second": 799.016, |
|
"eval_steps_per_second": 49.939, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.3951, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"eval_loss": 2.263535737991333, |
|
"eval_runtime": 124.319, |
|
"eval_samples_per_second": 804.382, |
|
"eval_steps_per_second": 50.274, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"eval_loss": 2.2597036361694336, |
|
"eval_runtime": 124.2784, |
|
"eval_samples_per_second": 804.645, |
|
"eval_steps_per_second": 50.29, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.4005, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_loss": 2.2575347423553467, |
|
"eval_runtime": 123.3519, |
|
"eval_samples_per_second": 810.689, |
|
"eval_steps_per_second": 50.668, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"eval_loss": 2.2647805213928223, |
|
"eval_runtime": 124.82, |
|
"eval_samples_per_second": 801.154, |
|
"eval_steps_per_second": 50.072, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.394, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"eval_loss": 2.2745957374572754, |
|
"eval_runtime": 124.719, |
|
"eval_samples_per_second": 801.803, |
|
"eval_steps_per_second": 50.113, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"eval_loss": 2.272183656692505, |
|
"eval_runtime": 125.5079, |
|
"eval_samples_per_second": 796.762, |
|
"eval_steps_per_second": 49.798, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.4016, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"eval_loss": 2.2566604614257812, |
|
"eval_runtime": 123.9875, |
|
"eval_samples_per_second": 806.533, |
|
"eval_steps_per_second": 50.408, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_loss": 2.259911298751831, |
|
"eval_runtime": 126.1542, |
|
"eval_samples_per_second": 792.681, |
|
"eval_steps_per_second": 49.543, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.392, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"eval_loss": 2.258845090866089, |
|
"eval_runtime": 126.2191, |
|
"eval_samples_per_second": 792.273, |
|
"eval_steps_per_second": 49.517, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"eval_loss": 2.2644309997558594, |
|
"eval_runtime": 125.6333, |
|
"eval_samples_per_second": 795.968, |
|
"eval_steps_per_second": 49.748, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.3936, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"eval_loss": 2.266770839691162, |
|
"eval_runtime": 125.3086, |
|
"eval_samples_per_second": 798.03, |
|
"eval_steps_per_second": 49.877, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"eval_loss": 2.2446775436401367, |
|
"eval_runtime": 124.755, |
|
"eval_samples_per_second": 801.571, |
|
"eval_steps_per_second": 50.098, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.3954, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"eval_loss": 2.25016713142395, |
|
"eval_runtime": 125.12, |
|
"eval_samples_per_second": 799.233, |
|
"eval_steps_per_second": 49.952, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"eval_loss": 2.2736761569976807, |
|
"eval_runtime": 124.2206, |
|
"eval_samples_per_second": 805.019, |
|
"eval_steps_per_second": 50.314, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.3901, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"eval_loss": 2.2700750827789307, |
|
"eval_runtime": 124.082, |
|
"eval_samples_per_second": 805.919, |
|
"eval_steps_per_second": 50.37, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"eval_loss": 2.263190746307373, |
|
"eval_runtime": 123.8135, |
|
"eval_samples_per_second": 807.667, |
|
"eval_steps_per_second": 50.479, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.3963, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 2.2660787105560303, |
|
"eval_runtime": 123.6011, |
|
"eval_samples_per_second": 809.054, |
|
"eval_steps_per_second": 50.566, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"eval_loss": 2.2628071308135986, |
|
"eval_runtime": 124.3028, |
|
"eval_samples_per_second": 804.487, |
|
"eval_steps_per_second": 50.28, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.4005, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"eval_loss": 2.2605719566345215, |
|
"eval_runtime": 124.679, |
|
"eval_samples_per_second": 802.06, |
|
"eval_steps_per_second": 50.129, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 2.257826328277588, |
|
"eval_runtime": 124.1165, |
|
"eval_samples_per_second": 805.695, |
|
"eval_steps_per_second": 50.356, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.3877, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 2.267404079437256, |
|
"eval_runtime": 123.854, |
|
"eval_samples_per_second": 807.402, |
|
"eval_steps_per_second": 50.463, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_loss": 2.263066053390503, |
|
"eval_runtime": 124.1271, |
|
"eval_samples_per_second": 805.626, |
|
"eval_steps_per_second": 50.352, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.3958, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_loss": 2.267526149749756, |
|
"eval_runtime": 124.1257, |
|
"eval_samples_per_second": 805.635, |
|
"eval_steps_per_second": 50.352, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"eval_loss": 2.2751998901367188, |
|
"eval_runtime": 123.9013, |
|
"eval_samples_per_second": 807.094, |
|
"eval_steps_per_second": 50.443, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.3858, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"eval_loss": 2.2622976303100586, |
|
"eval_runtime": 124.0119, |
|
"eval_samples_per_second": 806.374, |
|
"eval_steps_per_second": 50.398, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"eval_loss": 2.2577433586120605, |
|
"eval_runtime": 124.3224, |
|
"eval_samples_per_second": 804.36, |
|
"eval_steps_per_second": 50.273, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.403, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"eval_loss": 2.251173496246338, |
|
"eval_runtime": 124.4564, |
|
"eval_samples_per_second": 803.494, |
|
"eval_steps_per_second": 50.218, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"eval_loss": 2.2610392570495605, |
|
"eval_runtime": 124.6434, |
|
"eval_samples_per_second": 802.289, |
|
"eval_steps_per_second": 50.143, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.3969, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_loss": 2.259674310684204, |
|
"eval_runtime": 125.2932, |
|
"eval_samples_per_second": 798.128, |
|
"eval_steps_per_second": 49.883, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"eval_loss": 2.274836778640747, |
|
"eval_runtime": 125.8602, |
|
"eval_samples_per_second": 794.532, |
|
"eval_steps_per_second": 49.658, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.4016, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 2.2631607055664062, |
|
"eval_runtime": 126.3601, |
|
"eval_samples_per_second": 791.389, |
|
"eval_steps_per_second": 49.462, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"eval_loss": 2.2650434970855713, |
|
"eval_runtime": 125.0464, |
|
"eval_samples_per_second": 799.703, |
|
"eval_steps_per_second": 49.981, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.4018, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"eval_loss": 2.266899824142456, |
|
"eval_runtime": 125.1187, |
|
"eval_samples_per_second": 799.241, |
|
"eval_steps_per_second": 49.953, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"eval_loss": 2.252521276473999, |
|
"eval_runtime": 124.7901, |
|
"eval_samples_per_second": 801.346, |
|
"eval_steps_per_second": 50.084, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.3954, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"eval_loss": 2.249734878540039, |
|
"eval_runtime": 125.3184, |
|
"eval_samples_per_second": 797.967, |
|
"eval_steps_per_second": 49.873, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"eval_loss": 2.274369716644287, |
|
"eval_runtime": 125.8354, |
|
"eval_samples_per_second": 794.689, |
|
"eval_steps_per_second": 49.668, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.396, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"eval_loss": 2.267287492752075, |
|
"eval_runtime": 124.6245, |
|
"eval_samples_per_second": 802.41, |
|
"eval_steps_per_second": 50.151, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"eval_loss": 2.263681173324585, |
|
"eval_runtime": 124.8384, |
|
"eval_samples_per_second": 801.035, |
|
"eval_steps_per_second": 50.065, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.3951, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"eval_loss": 2.2615184783935547, |
|
"eval_runtime": 125.4015, |
|
"eval_samples_per_second": 797.439, |
|
"eval_steps_per_second": 49.84, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"eval_loss": 2.2643656730651855, |
|
"eval_runtime": 124.6138, |
|
"eval_samples_per_second": 802.479, |
|
"eval_steps_per_second": 50.155, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.4017, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"eval_loss": 2.265629768371582, |
|
"eval_runtime": 124.2729, |
|
"eval_samples_per_second": 804.681, |
|
"eval_steps_per_second": 50.293, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"eval_loss": 2.268179178237915, |
|
"eval_runtime": 124.3886, |
|
"eval_samples_per_second": 803.932, |
|
"eval_steps_per_second": 50.246, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.3962, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"eval_loss": 2.2591919898986816, |
|
"eval_runtime": 124.0671, |
|
"eval_samples_per_second": 806.015, |
|
"eval_steps_per_second": 50.376, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"eval_loss": 2.264333724975586, |
|
"eval_runtime": 123.9572, |
|
"eval_samples_per_second": 806.73, |
|
"eval_steps_per_second": 50.421, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.3996, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"eval_loss": 2.264843225479126, |
|
"eval_runtime": 124.1244, |
|
"eval_samples_per_second": 805.644, |
|
"eval_steps_per_second": 50.353, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"eval_loss": 2.2705702781677246, |
|
"eval_runtime": 124.9853, |
|
"eval_samples_per_second": 800.094, |
|
"eval_steps_per_second": 50.006, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.3994, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"eval_loss": 2.270008087158203, |
|
"eval_runtime": 124.4796, |
|
"eval_samples_per_second": 803.345, |
|
"eval_steps_per_second": 50.209, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"eval_loss": 2.2626538276672363, |
|
"eval_runtime": 124.7698, |
|
"eval_samples_per_second": 801.476, |
|
"eval_steps_per_second": 50.092, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.3976, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"eval_loss": 2.259154796600342, |
|
"eval_runtime": 125.5881, |
|
"eval_samples_per_second": 796.254, |
|
"eval_steps_per_second": 49.766, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"eval_loss": 2.2606401443481445, |
|
"eval_runtime": 124.5151, |
|
"eval_samples_per_second": 803.116, |
|
"eval_steps_per_second": 50.195, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.3971, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"eval_loss": 2.2588131427764893, |
|
"eval_runtime": 125.6386, |
|
"eval_samples_per_second": 795.934, |
|
"eval_steps_per_second": 49.746, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_loss": 2.2607157230377197, |
|
"eval_runtime": 125.7855, |
|
"eval_samples_per_second": 795.004, |
|
"eval_steps_per_second": 49.688, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.3991, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"eval_loss": 2.2692136764526367, |
|
"eval_runtime": 124.174, |
|
"eval_samples_per_second": 805.322, |
|
"eval_steps_per_second": 50.333, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"eval_loss": 2.2548389434814453, |
|
"eval_runtime": 126.1957, |
|
"eval_samples_per_second": 792.42, |
|
"eval_steps_per_second": 49.526, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.3952, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"eval_loss": 2.2572038173675537, |
|
"eval_runtime": 126.1234, |
|
"eval_samples_per_second": 792.874, |
|
"eval_steps_per_second": 49.555, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"eval_loss": 2.262612819671631, |
|
"eval_runtime": 126.0135, |
|
"eval_samples_per_second": 793.566, |
|
"eval_steps_per_second": 49.598, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.4002, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"eval_loss": 2.268040657043457, |
|
"eval_runtime": 126.4057, |
|
"eval_samples_per_second": 791.104, |
|
"eval_steps_per_second": 49.444, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"eval_loss": 2.268988847732544, |
|
"eval_runtime": 126.2757, |
|
"eval_samples_per_second": 791.918, |
|
"eval_steps_per_second": 49.495, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.3937, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_loss": 2.2523088455200195, |
|
"eval_runtime": 125.8129, |
|
"eval_samples_per_second": 794.831, |
|
"eval_steps_per_second": 49.677, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"eval_loss": 2.2699527740478516, |
|
"eval_runtime": 126.3749, |
|
"eval_samples_per_second": 791.296, |
|
"eval_steps_per_second": 49.456, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.3999, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"eval_loss": 2.265235662460327, |
|
"eval_runtime": 125.4726, |
|
"eval_samples_per_second": 796.987, |
|
"eval_steps_per_second": 49.812, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"eval_loss": 2.2670557498931885, |
|
"eval_runtime": 125.952, |
|
"eval_samples_per_second": 793.953, |
|
"eval_steps_per_second": 49.622, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.3891, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"eval_loss": 2.2700319290161133, |
|
"eval_runtime": 126.128, |
|
"eval_samples_per_second": 792.845, |
|
"eval_steps_per_second": 49.553, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"eval_loss": 2.258948802947998, |
|
"eval_runtime": 126.6616, |
|
"eval_samples_per_second": 789.505, |
|
"eval_steps_per_second": 49.344, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.397, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"eval_loss": 2.2626419067382812, |
|
"eval_runtime": 125.2648, |
|
"eval_samples_per_second": 798.309, |
|
"eval_steps_per_second": 49.894, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"eval_loss": 2.260664463043213, |
|
"eval_runtime": 124.8328, |
|
"eval_samples_per_second": 801.071, |
|
"eval_steps_per_second": 50.067, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.3968, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"eval_loss": 2.2663474082946777, |
|
"eval_runtime": 126.0403, |
|
"eval_samples_per_second": 793.397, |
|
"eval_steps_per_second": 49.587, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"eval_loss": 2.263674020767212, |
|
"eval_runtime": 125.5699, |
|
"eval_samples_per_second": 796.369, |
|
"eval_steps_per_second": 49.773, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.3932, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"eval_loss": 2.2622973918914795, |
|
"eval_runtime": 125.0302, |
|
"eval_samples_per_second": 799.807, |
|
"eval_steps_per_second": 49.988, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"eval_loss": 2.267320394515991, |
|
"eval_runtime": 126.5313, |
|
"eval_samples_per_second": 790.318, |
|
"eval_steps_per_second": 49.395, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.3981, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"eval_loss": 2.2546768188476562, |
|
"eval_runtime": 126.1555, |
|
"eval_samples_per_second": 792.673, |
|
"eval_steps_per_second": 49.542, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"eval_loss": 2.2597532272338867, |
|
"eval_runtime": 126.0713, |
|
"eval_samples_per_second": 793.202, |
|
"eval_steps_per_second": 49.575, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.3964, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": 2.2690351009368896, |
|
"eval_runtime": 127.1255, |
|
"eval_samples_per_second": 786.624, |
|
"eval_steps_per_second": 49.164, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_loss": 2.2618870735168457, |
|
"eval_runtime": 126.4894, |
|
"eval_samples_per_second": 790.58, |
|
"eval_steps_per_second": 49.411, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.3941, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"eval_loss": 2.255786657333374, |
|
"eval_runtime": 125.2961, |
|
"eval_samples_per_second": 798.11, |
|
"eval_steps_per_second": 49.882, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_loss": 2.2658987045288086, |
|
"eval_runtime": 125.33, |
|
"eval_samples_per_second": 797.893, |
|
"eval_steps_per_second": 49.868, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.3926, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"eval_loss": 2.255154848098755, |
|
"eval_runtime": 125.9241, |
|
"eval_samples_per_second": 794.129, |
|
"eval_steps_per_second": 49.633, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 2.267132043838501, |
|
"eval_runtime": 126.3428, |
|
"eval_samples_per_second": 791.498, |
|
"eval_steps_per_second": 49.469, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.399, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"eval_loss": 2.2660810947418213, |
|
"eval_runtime": 126.922, |
|
"eval_samples_per_second": 787.886, |
|
"eval_steps_per_second": 49.243, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"eval_loss": 2.259093999862671, |
|
"eval_runtime": 126.8243, |
|
"eval_samples_per_second": 788.493, |
|
"eval_steps_per_second": 49.281, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.3941, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"eval_loss": 2.2567954063415527, |
|
"eval_runtime": 126.0771, |
|
"eval_samples_per_second": 793.165, |
|
"eval_steps_per_second": 49.573, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"eval_loss": 2.258824348449707, |
|
"eval_runtime": 126.912, |
|
"eval_samples_per_second": 787.947, |
|
"eval_steps_per_second": 49.247, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.3975, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"eval_loss": 2.2631142139434814, |
|
"eval_runtime": 126.1148, |
|
"eval_samples_per_second": 792.928, |
|
"eval_steps_per_second": 49.558, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_loss": 2.265528917312622, |
|
"eval_runtime": 125.083, |
|
"eval_samples_per_second": 799.469, |
|
"eval_steps_per_second": 49.967, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.3884, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"eval_loss": 2.262817859649658, |
|
"eval_runtime": 126.358, |
|
"eval_samples_per_second": 791.402, |
|
"eval_steps_per_second": 49.463, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"eval_loss": 2.265577554702759, |
|
"eval_runtime": 125.5796, |
|
"eval_samples_per_second": 796.308, |
|
"eval_steps_per_second": 49.769, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.399, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"eval_loss": 2.264435052871704, |
|
"eval_runtime": 125.6584, |
|
"eval_samples_per_second": 795.808, |
|
"eval_steps_per_second": 49.738, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"eval_loss": 2.2607643604278564, |
|
"eval_runtime": 124.8913, |
|
"eval_samples_per_second": 800.697, |
|
"eval_steps_per_second": 50.044, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.4064, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.256071090698242, |
|
"eval_runtime": 124.5565, |
|
"eval_samples_per_second": 802.849, |
|
"eval_steps_per_second": 50.178, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"eval_loss": 2.2680041790008545, |
|
"eval_runtime": 125.2469, |
|
"eval_samples_per_second": 798.423, |
|
"eval_steps_per_second": 49.901, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.3999, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_loss": 2.27026104927063, |
|
"eval_runtime": 124.766, |
|
"eval_samples_per_second": 801.501, |
|
"eval_steps_per_second": 50.094, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 2.2624101638793945, |
|
"eval_runtime": 124.8172, |
|
"eval_samples_per_second": 801.172, |
|
"eval_steps_per_second": 50.073, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.398, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"eval_loss": 2.270665407180786, |
|
"eval_runtime": 124.8089, |
|
"eval_samples_per_second": 801.225, |
|
"eval_steps_per_second": 50.077, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"eval_loss": 2.264582872390747, |
|
"eval_runtime": 124.6344, |
|
"eval_samples_per_second": 802.347, |
|
"eval_steps_per_second": 50.147, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.4007, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"eval_loss": 2.2658579349517822, |
|
"eval_runtime": 125.9424, |
|
"eval_samples_per_second": 794.014, |
|
"eval_steps_per_second": 49.626, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"eval_loss": 2.2709732055664062, |
|
"eval_runtime": 125.5781, |
|
"eval_samples_per_second": 796.317, |
|
"eval_steps_per_second": 49.77, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.3955, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"eval_loss": 2.2720258235931396, |
|
"eval_runtime": 125.3571, |
|
"eval_samples_per_second": 797.721, |
|
"eval_steps_per_second": 49.858, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"eval_loss": 2.2569046020507812, |
|
"eval_runtime": 126.2089, |
|
"eval_samples_per_second": 792.337, |
|
"eval_steps_per_second": 49.521, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.3973, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"eval_loss": 2.264122486114502, |
|
"eval_runtime": 124.8579, |
|
"eval_samples_per_second": 800.91, |
|
"eval_steps_per_second": 50.057, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"eval_loss": 2.263296127319336, |
|
"eval_runtime": 126.0432, |
|
"eval_samples_per_second": 793.379, |
|
"eval_steps_per_second": 49.586, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.4059, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 2.262235164642334, |
|
"eval_runtime": 126.1477, |
|
"eval_samples_per_second": 792.722, |
|
"eval_steps_per_second": 49.545, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"eval_loss": 2.253866672515869, |
|
"eval_runtime": 126.3644, |
|
"eval_samples_per_second": 791.362, |
|
"eval_steps_per_second": 49.46, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.3899, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"eval_loss": 2.266547441482544, |
|
"eval_runtime": 128.0069, |
|
"eval_samples_per_second": 781.208, |
|
"eval_steps_per_second": 48.825, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"eval_loss": 2.262890577316284, |
|
"eval_runtime": 128.4011, |
|
"eval_samples_per_second": 778.809, |
|
"eval_steps_per_second": 48.676, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.4025, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"eval_loss": 2.2551090717315674, |
|
"eval_runtime": 127.3686, |
|
"eval_samples_per_second": 785.123, |
|
"eval_steps_per_second": 49.07, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 20.14, |
|
"eval_loss": 2.2545723915100098, |
|
"eval_runtime": 127.74, |
|
"eval_samples_per_second": 782.84, |
|
"eval_steps_per_second": 48.928, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 0.0, |
|
"loss": 2.3956, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"eval_loss": 2.2619526386260986, |
|
"eval_runtime": 127.428, |
|
"eval_samples_per_second": 784.757, |
|
"eval_steps_per_second": 49.047, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"step": 2400000, |
|
"total_flos": 8.365104666768984e+17, |
|
"train_loss": 2.404444656575521, |
|
"train_runtime": 193218.8586, |
|
"train_samples_per_second": 198.738, |
|
"train_steps_per_second": 12.421 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 21, |
|
"save_steps": 32000, |
|
"total_flos": 8.365104666768984e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|