{ "best_metric": 2.9959683418273926, "best_model_checkpoint": "./model_tweets_2020_Q2_75/checkpoint-192000", "epoch": 20.214101019969846, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "eval_loss": 3.1519558429718018, "eval_runtime": 113.5135, "eval_samples_per_second": 880.794, "eval_steps_per_second": 55.051, "step": 8000 }, { "epoch": 0.13, "learning_rate": 9.939131159843243e-06, "loss": 3.3704, "step": 16000 }, { "epoch": 0.13, "eval_loss": 3.1285579204559326, "eval_runtime": 113.7953, "eval_samples_per_second": 878.613, "eval_steps_per_second": 54.914, "step": 16000 }, { "epoch": 0.2, "eval_loss": 3.107924699783325, "eval_runtime": 113.2981, "eval_samples_per_second": 882.468, "eval_steps_per_second": 55.155, "step": 24000 }, { "epoch": 0.27, "learning_rate": 9.872425581589261e-06, "loss": 3.2908, "step": 32000 }, { "epoch": 0.27, "eval_loss": 3.084472179412842, "eval_runtime": 112.7537, "eval_samples_per_second": 886.73, "eval_steps_per_second": 55.422, "step": 32000 }, { "epoch": 0.34, "eval_loss": 3.0868372917175293, "eval_runtime": 113.2037, "eval_samples_per_second": 883.204, "eval_steps_per_second": 55.201, "step": 40000 }, { "epoch": 0.4, "learning_rate": 9.80572000333528e-06, "loss": 3.2742, "step": 48000 }, { "epoch": 0.4, "eval_loss": 3.076770067214966, "eval_runtime": 112.4863, "eval_samples_per_second": 888.837, "eval_steps_per_second": 55.553, "step": 48000 }, { "epoch": 0.47, "eval_loss": 3.0705816745758057, "eval_runtime": 113.9358, "eval_samples_per_second": 877.529, "eval_steps_per_second": 54.847, "step": 56000 }, { "epoch": 0.54, "learning_rate": 9.739014425081299e-06, "loss": 3.2579, "step": 64000 }, { "epoch": 0.54, "eval_loss": 3.0620689392089844, "eval_runtime": 112.5706, "eval_samples_per_second": 888.172, "eval_steps_per_second": 55.512, "step": 64000 }, { "epoch": 0.61, "eval_loss": 3.0659217834472656, "eval_runtime": 112.8069, "eval_samples_per_second": 886.311, "eval_steps_per_second": 55.396, "step": 72000 }, { "epoch": 0.67, "learning_rate": 9.672308846827316e-06, "loss": 3.2448, "step": 80000 }, { "epoch": 0.67, "eval_loss": 3.0456507205963135, "eval_runtime": 114.4983, "eval_samples_per_second": 873.218, "eval_steps_per_second": 54.577, "step": 80000 }, { "epoch": 0.74, "eval_loss": 3.055419683456421, "eval_runtime": 113.1402, "eval_samples_per_second": 883.7, "eval_steps_per_second": 55.232, "step": 88000 }, { "epoch": 0.81, "learning_rate": 9.605603268573334e-06, "loss": 3.2416, "step": 96000 }, { "epoch": 0.81, "eval_loss": 3.0335144996643066, "eval_runtime": 115.0306, "eval_samples_per_second": 869.178, "eval_steps_per_second": 54.325, "step": 96000 }, { "epoch": 0.88, "eval_loss": 3.0320653915405273, "eval_runtime": 115.4623, "eval_samples_per_second": 865.928, "eval_steps_per_second": 54.122, "step": 104000 }, { "epoch": 0.94, "learning_rate": 9.538897690319354e-06, "loss": 3.23, "step": 112000 }, { "epoch": 0.94, "eval_loss": 3.0136971473693848, "eval_runtime": 114.4021, "eval_samples_per_second": 873.952, "eval_steps_per_second": 54.623, "step": 112000 }, { "epoch": 1.01, "eval_loss": 3.0060510635375977, "eval_runtime": 114.4587, "eval_samples_per_second": 873.52, "eval_steps_per_second": 54.596, "step": 120000 }, { "epoch": 1.08, "learning_rate": 9.472192112065373e-06, "loss": 3.2084, "step": 128000 }, { "epoch": 1.08, "eval_loss": 3.025130033493042, "eval_runtime": 113.0671, "eval_samples_per_second": 884.272, "eval_steps_per_second": 55.268, "step": 128000 }, { "epoch": 1.15, "eval_loss": 3.009242296218872, "eval_runtime": 112.796, "eval_samples_per_second": 886.397, "eval_steps_per_second": 55.401, "step": 136000 }, { "epoch": 1.21, "learning_rate": 9.405486533811392e-06, "loss": 3.2055, "step": 144000 }, { "epoch": 1.21, "eval_loss": 3.0042872428894043, "eval_runtime": 112.6772, "eval_samples_per_second": 887.332, "eval_steps_per_second": 55.459, "step": 144000 }, { "epoch": 1.28, "eval_loss": 3.0054819583892822, "eval_runtime": 112.4817, "eval_samples_per_second": 888.873, "eval_steps_per_second": 55.556, "step": 152000 }, { "epoch": 1.35, "learning_rate": 9.338780955557409e-06, "loss": 3.2026, "step": 160000 }, { "epoch": 1.35, "eval_loss": 3.0065696239471436, "eval_runtime": 112.6067, "eval_samples_per_second": 887.887, "eval_steps_per_second": 55.494, "step": 160000 }, { "epoch": 1.41, "eval_loss": 3.01246976852417, "eval_runtime": 112.6363, "eval_samples_per_second": 887.654, "eval_steps_per_second": 55.479, "step": 168000 }, { "epoch": 1.48, "learning_rate": 9.272075377303427e-06, "loss": 3.2069, "step": 176000 }, { "epoch": 1.48, "eval_loss": 3.0032289028167725, "eval_runtime": 112.0908, "eval_samples_per_second": 891.974, "eval_steps_per_second": 55.749, "step": 176000 }, { "epoch": 1.55, "eval_loss": 2.995911121368408, "eval_runtime": 114.3666, "eval_samples_per_second": 874.224, "eval_steps_per_second": 54.64, "step": 184000 }, { "epoch": 1.62, "learning_rate": 9.205369799049446e-06, "loss": 3.1904, "step": 192000 }, { "epoch": 1.62, "eval_loss": 2.9959683418273926, "eval_runtime": 113.8999, "eval_samples_per_second": 877.806, "eval_steps_per_second": 54.864, "step": 192000 }, { "epoch": 1.68, "eval_loss": 3.003798246383667, "eval_runtime": 114.6475, "eval_samples_per_second": 872.082, "eval_steps_per_second": 54.506, "step": 200000 }, { "epoch": 1.75, "learning_rate": 9.138664220795464e-06, "loss": 3.1989, "step": 208000 }, { "epoch": 1.75, "eval_loss": 3.0016160011291504, "eval_runtime": 113.2837, "eval_samples_per_second": 882.58, "eval_steps_per_second": 55.162, "step": 208000 }, { "epoch": 1.82, "eval_loss": 3.004883050918579, "eval_runtime": 113.3643, "eval_samples_per_second": 881.953, "eval_steps_per_second": 55.123, "step": 216000 }, { "epoch": 1.89, "learning_rate": 9.071958642541483e-06, "loss": 3.2113, "step": 224000 }, { "epoch": 1.89, "eval_loss": 3.0085606575012207, "eval_runtime": 114.3159, "eval_samples_per_second": 874.612, "eval_steps_per_second": 54.664, "step": 224000 }, { "epoch": 1.95, "eval_loss": 3.010385274887085, "eval_runtime": 113.1162, "eval_samples_per_second": 883.887, "eval_steps_per_second": 55.244, "step": 232000 }, { "epoch": 2.02, "learning_rate": 9.005253064287502e-06, "loss": 3.217, "step": 240000 }, { "epoch": 2.02, "eval_loss": 3.0165932178497314, "eval_runtime": 113.3739, "eval_samples_per_second": 881.878, "eval_steps_per_second": 55.118, "step": 240000 }, { "epoch": 2.09, "eval_loss": 3.0139265060424805, "eval_runtime": 112.9548, "eval_samples_per_second": 885.15, "eval_steps_per_second": 55.323, "step": 248000 }, { "epoch": 2.16, "learning_rate": 8.93854748603352e-06, "loss": 3.2029, "step": 256000 }, { "epoch": 2.16, "eval_loss": 3.021667242050171, "eval_runtime": 113.782, "eval_samples_per_second": 878.716, "eval_steps_per_second": 54.921, "step": 256000 }, { "epoch": 2.22, "eval_loss": 3.0237627029418945, "eval_runtime": 112.8799, "eval_samples_per_second": 885.738, "eval_steps_per_second": 55.36, "step": 264000 }, { "epoch": 2.29, "learning_rate": 8.871841907779539e-06, "loss": 3.2226, "step": 272000 }, { "epoch": 2.29, "eval_loss": 3.0233664512634277, "eval_runtime": 112.7482, "eval_samples_per_second": 886.772, "eval_steps_per_second": 55.424, "step": 272000 }, { "epoch": 2.36, "eval_loss": 3.0216429233551025, "eval_runtime": 112.8816, "eval_samples_per_second": 885.725, "eval_steps_per_second": 55.359, "step": 280000 }, { "epoch": 2.43, "learning_rate": 8.805136329525557e-06, "loss": 3.2199, "step": 288000 }, { "epoch": 2.43, "eval_loss": 3.0175387859344482, "eval_runtime": 113.1399, "eval_samples_per_second": 883.703, "eval_steps_per_second": 55.233, "step": 288000 }, { "epoch": 2.49, "eval_loss": 3.036482095718384, "eval_runtime": 112.7456, "eval_samples_per_second": 886.793, "eval_steps_per_second": 55.426, "step": 296000 }, { "epoch": 2.56, "learning_rate": 8.738430751271576e-06, "loss": 3.2254, "step": 304000 }, { "epoch": 2.56, "eval_loss": 3.028167247772217, "eval_runtime": 113.0478, "eval_samples_per_second": 884.422, "eval_steps_per_second": 55.277, "step": 304000 }, { "epoch": 2.63, "eval_loss": 3.0228230953216553, "eval_runtime": 112.6735, "eval_samples_per_second": 887.36, "eval_steps_per_second": 55.461, "step": 312000 }, { "epoch": 2.7, "learning_rate": 8.671725173017595e-06, "loss": 3.2349, "step": 320000 }, { "epoch": 2.7, "eval_loss": 3.0204546451568604, "eval_runtime": 113.395, "eval_samples_per_second": 881.714, "eval_steps_per_second": 55.108, "step": 320000 }, { "epoch": 2.76, "eval_loss": 3.0406222343444824, "eval_runtime": 113.6641, "eval_samples_per_second": 879.627, "eval_steps_per_second": 54.978, "step": 328000 }, { "epoch": 2.83, "learning_rate": 8.605019594763613e-06, "loss": 3.2424, "step": 336000 }, { "epoch": 2.83, "eval_loss": 3.0306990146636963, "eval_runtime": 113.8559, "eval_samples_per_second": 878.145, "eval_steps_per_second": 54.885, "step": 336000 }, { "epoch": 2.9, "eval_loss": 3.041322708129883, "eval_runtime": 112.6936, "eval_samples_per_second": 887.202, "eval_steps_per_second": 55.451, "step": 344000 }, { "epoch": 2.96, "learning_rate": 8.538314016509632e-06, "loss": 3.2347, "step": 352000 }, { "epoch": 2.96, "eval_loss": 3.0401053428649902, "eval_runtime": 113.3728, "eval_samples_per_second": 881.887, "eval_steps_per_second": 55.119, "step": 352000 }, { "epoch": 3.03, "eval_loss": 3.051990270614624, "eval_runtime": 112.6317, "eval_samples_per_second": 887.69, "eval_steps_per_second": 55.482, "step": 360000 }, { "epoch": 3.1, "learning_rate": 8.471608438255649e-06, "loss": 3.2476, "step": 368000 }, { "epoch": 3.1, "eval_loss": 3.0489377975463867, "eval_runtime": 112.9, "eval_samples_per_second": 885.581, "eval_steps_per_second": 55.35, "step": 368000 }, { "epoch": 3.17, "eval_loss": 3.052086353302002, "eval_runtime": 114.3268, "eval_samples_per_second": 874.528, "eval_steps_per_second": 54.659, "step": 376000 }, { "epoch": 3.23, "learning_rate": 8.404902860001667e-06, "loss": 3.2506, "step": 384000 }, { "epoch": 3.23, "eval_loss": 3.068549633026123, "eval_runtime": 113.2619, "eval_samples_per_second": 882.751, "eval_steps_per_second": 55.173, "step": 384000 }, { "epoch": 3.3, "eval_loss": 3.0546491146087646, "eval_runtime": 113.505, "eval_samples_per_second": 880.86, "eval_steps_per_second": 55.055, "step": 392000 }, { "epoch": 3.37, "learning_rate": 8.338197281747686e-06, "loss": 3.2547, "step": 400000 }, { "epoch": 3.37, "eval_loss": 3.054222822189331, "eval_runtime": 113.4821, "eval_samples_per_second": 881.037, "eval_steps_per_second": 55.066, "step": 400000 }, { "epoch": 3.44, "eval_loss": 3.0536701679229736, "eval_runtime": 113.6042, "eval_samples_per_second": 880.091, "eval_steps_per_second": 55.007, "step": 408000 }, { "epoch": 3.5, "learning_rate": 8.271491703493705e-06, "loss": 3.2519, "step": 416000 }, { "epoch": 3.5, "eval_loss": 3.0587801933288574, "eval_runtime": 112.9459, "eval_samples_per_second": 885.22, "eval_steps_per_second": 55.327, "step": 416000 }, { "epoch": 3.57, "eval_loss": 3.0728721618652344, "eval_runtime": 113.366, "eval_samples_per_second": 881.94, "eval_steps_per_second": 55.122, "step": 424000 }, { "epoch": 3.64, "learning_rate": 8.204786125239725e-06, "loss": 3.2679, "step": 432000 }, { "epoch": 3.64, "eval_loss": 3.0841500759124756, "eval_runtime": 112.9942, "eval_samples_per_second": 884.842, "eval_steps_per_second": 55.304, "step": 432000 }, { "epoch": 3.71, "eval_loss": 3.0685195922851562, "eval_runtime": 113.8052, "eval_samples_per_second": 878.537, "eval_steps_per_second": 54.91, "step": 440000 }, { "epoch": 3.77, "learning_rate": 8.138080546985743e-06, "loss": 3.2656, "step": 448000 }, { "epoch": 3.77, "eval_loss": 3.094174861907959, "eval_runtime": 113.3221, "eval_samples_per_second": 882.281, "eval_steps_per_second": 55.144, "step": 448000 }, { "epoch": 3.84, "eval_loss": 3.094191074371338, "eval_runtime": 113.0432, "eval_samples_per_second": 884.458, "eval_steps_per_second": 55.28, "step": 456000 }, { "epoch": 3.91, "learning_rate": 8.07137496873176e-06, "loss": 3.2908, "step": 464000 }, { "epoch": 3.91, "eval_loss": 3.091813087463379, "eval_runtime": 113.945, "eval_samples_per_second": 877.459, "eval_steps_per_second": 54.842, "step": 464000 }, { "epoch": 3.98, "eval_loss": 3.0922000408172607, "eval_runtime": 113.1262, "eval_samples_per_second": 883.81, "eval_steps_per_second": 55.239, "step": 472000 }, { "epoch": 4.04, "learning_rate": 8.004669390477779e-06, "loss": 3.2944, "step": 480000 }, { "epoch": 4.04, "eval_loss": 3.109328508377075, "eval_runtime": 112.2933, "eval_samples_per_second": 890.365, "eval_steps_per_second": 55.649, "step": 480000 }, { "epoch": 4.11, "eval_loss": 3.11584734916687, "eval_runtime": 113.071, "eval_samples_per_second": 884.241, "eval_steps_per_second": 55.266, "step": 488000 }, { "epoch": 4.18, "learning_rate": 7.937963812223798e-06, "loss": 3.2917, "step": 496000 }, { "epoch": 4.18, "eval_loss": 3.0996689796447754, "eval_runtime": 113.0722, "eval_samples_per_second": 884.232, "eval_steps_per_second": 55.266, "step": 496000 }, { "epoch": 4.24, "eval_loss": 3.111070394515991, "eval_runtime": 112.4182, "eval_samples_per_second": 889.376, "eval_steps_per_second": 55.587, "step": 504000 }, { "epoch": 4.31, "learning_rate": 7.871258233969816e-06, "loss": 3.2916, "step": 512000 }, { "epoch": 4.31, "eval_loss": 3.1132729053497314, "eval_runtime": 113.7916, "eval_samples_per_second": 878.641, "eval_steps_per_second": 54.916, "step": 512000 }, { "epoch": 4.38, "eval_loss": 3.1128952503204346, "eval_runtime": 113.6553, "eval_samples_per_second": 879.695, "eval_steps_per_second": 54.982, "step": 520000 }, { "epoch": 4.45, "learning_rate": 7.804552655715835e-06, "loss": 3.2836, "step": 528000 }, { "epoch": 4.45, "eval_loss": 3.113443613052368, "eval_runtime": 113.5086, "eval_samples_per_second": 880.832, "eval_steps_per_second": 55.053, "step": 528000 }, { "epoch": 4.51, "eval_loss": 3.1057605743408203, "eval_runtime": 113.3609, "eval_samples_per_second": 881.98, "eval_steps_per_second": 55.125, "step": 536000 }, { "epoch": 4.58, "learning_rate": 7.737847077461853e-06, "loss": 3.3068, "step": 544000 }, { "epoch": 4.58, "eval_loss": 3.121134042739868, "eval_runtime": 114.162, "eval_samples_per_second": 875.79, "eval_steps_per_second": 54.738, "step": 544000 }, { "epoch": 4.65, "eval_loss": 3.094620704650879, "eval_runtime": 113.8018, "eval_samples_per_second": 878.563, "eval_steps_per_second": 54.911, "step": 552000 }, { "epoch": 4.72, "learning_rate": 7.671141499207872e-06, "loss": 3.3026, "step": 560000 }, { "epoch": 4.72, "eval_loss": 3.107940912246704, "eval_runtime": 112.9645, "eval_samples_per_second": 885.075, "eval_steps_per_second": 55.318, "step": 560000 }, { "epoch": 4.78, "eval_loss": 3.120192527770996, "eval_runtime": 112.8661, "eval_samples_per_second": 885.846, "eval_steps_per_second": 55.366, "step": 568000 }, { "epoch": 4.85, "learning_rate": 7.604435920953891e-06, "loss": 3.3078, "step": 576000 }, { "epoch": 4.85, "eval_loss": 3.1155478954315186, "eval_runtime": 114.0442, "eval_samples_per_second": 876.695, "eval_steps_per_second": 54.795, "step": 576000 }, { "epoch": 4.92, "eval_loss": 3.125351905822754, "eval_runtime": 112.7895, "eval_samples_per_second": 886.448, "eval_steps_per_second": 55.404, "step": 584000 }, { "epoch": 4.99, "learning_rate": 7.537730342699909e-06, "loss": 3.3168, "step": 592000 }, { "epoch": 4.99, "eval_loss": 3.127920627593994, "eval_runtime": 113.3145, "eval_samples_per_second": 882.34, "eval_steps_per_second": 55.147, "step": 592000 }, { "epoch": 5.05, "eval_loss": 3.1179165840148926, "eval_runtime": 113.6285, "eval_samples_per_second": 879.902, "eval_steps_per_second": 54.995, "step": 600000 }, { "epoch": 5.12, "learning_rate": 7.471024764445928e-06, "loss": 3.3113, "step": 608000 }, { "epoch": 5.12, "eval_loss": 3.1277198791503906, "eval_runtime": 114.2174, "eval_samples_per_second": 875.366, "eval_steps_per_second": 54.711, "step": 608000 }, { "epoch": 5.19, "eval_loss": 3.133394241333008, "eval_runtime": 113.6646, "eval_samples_per_second": 879.623, "eval_steps_per_second": 54.978, "step": 616000 }, { "epoch": 5.26, "learning_rate": 7.4043191861919465e-06, "loss": 3.3102, "step": 624000 }, { "epoch": 5.26, "eval_loss": 3.123286485671997, "eval_runtime": 113.9343, "eval_samples_per_second": 877.541, "eval_steps_per_second": 54.847, "step": 624000 }, { "epoch": 5.32, "eval_loss": 3.1273744106292725, "eval_runtime": 113.8778, "eval_samples_per_second": 877.976, "eval_steps_per_second": 54.875, "step": 632000 }, { "epoch": 5.39, "learning_rate": 7.337613607937964e-06, "loss": 3.3235, "step": 640000 }, { "epoch": 5.39, "eval_loss": 3.1433892250061035, "eval_runtime": 113.0915, "eval_samples_per_second": 884.08, "eval_steps_per_second": 55.256, "step": 640000 }, { "epoch": 5.46, "eval_loss": 3.1368374824523926, "eval_runtime": 113.2499, "eval_samples_per_second": 882.844, "eval_steps_per_second": 55.179, "step": 648000 }, { "epoch": 5.53, "learning_rate": 7.270908029683983e-06, "loss": 3.331, "step": 656000 }, { "epoch": 5.53, "eval_loss": 3.1590983867645264, "eval_runtime": 114.2316, "eval_samples_per_second": 875.257, "eval_steps_per_second": 54.705, "step": 656000 }, { "epoch": 5.59, "eval_loss": 3.154599189758301, "eval_runtime": 113.4705, "eval_samples_per_second": 881.128, "eval_steps_per_second": 55.072, "step": 664000 }, { "epoch": 5.66, "learning_rate": 7.2042024514300015e-06, "loss": 3.3308, "step": 672000 }, { "epoch": 5.66, "eval_loss": 3.1662509441375732, "eval_runtime": 113.7727, "eval_samples_per_second": 878.787, "eval_steps_per_second": 54.925, "step": 672000 }, { "epoch": 5.73, "eval_loss": 3.1535351276397705, "eval_runtime": 113.1426, "eval_samples_per_second": 883.681, "eval_steps_per_second": 55.231, "step": 680000 }, { "epoch": 5.79, "learning_rate": 7.13749687317602e-06, "loss": 3.3396, "step": 688000 }, { "epoch": 5.79, "eval_loss": 3.1558427810668945, "eval_runtime": 113.3017, "eval_samples_per_second": 882.44, "eval_steps_per_second": 55.154, "step": 688000 }, { "epoch": 5.86, "eval_loss": 3.169811725616455, "eval_runtime": 113.2795, "eval_samples_per_second": 882.614, "eval_steps_per_second": 55.164, "step": 696000 }, { "epoch": 5.93, "learning_rate": 7.070791294922038e-06, "loss": 3.3558, "step": 704000 }, { "epoch": 5.93, "eval_loss": 3.165127754211426, "eval_runtime": 113.7995, "eval_samples_per_second": 878.581, "eval_steps_per_second": 54.912, "step": 704000 }, { "epoch": 6.0, "eval_loss": 3.1705756187438965, "eval_runtime": 113.6919, "eval_samples_per_second": 879.412, "eval_steps_per_second": 54.964, "step": 712000 }, { "epoch": 6.06, "learning_rate": 7.0040857166680564e-06, "loss": 3.3474, "step": 720000 }, { "epoch": 6.06, "eval_loss": 3.194214344024658, "eval_runtime": 113.2694, "eval_samples_per_second": 882.692, "eval_steps_per_second": 55.169, "step": 720000 }, { "epoch": 6.13, "eval_loss": 3.170464515686035, "eval_runtime": 113.1666, "eval_samples_per_second": 883.494, "eval_steps_per_second": 55.219, "step": 728000 }, { "epoch": 6.2, "learning_rate": 6.937380138414076e-06, "loss": 3.3513, "step": 736000 }, { "epoch": 6.2, "eval_loss": 3.1834402084350586, "eval_runtime": 113.2814, "eval_samples_per_second": 882.599, "eval_steps_per_second": 55.164, "step": 736000 }, { "epoch": 6.27, "eval_loss": 3.1810226440429688, "eval_runtime": 113.726, "eval_samples_per_second": 879.148, "eval_steps_per_second": 54.948, "step": 744000 }, { "epoch": 6.33, "learning_rate": 6.8706745601600945e-06, "loss": 3.362, "step": 752000 }, { "epoch": 6.33, "eval_loss": 3.172321319580078, "eval_runtime": 113.4364, "eval_samples_per_second": 881.392, "eval_steps_per_second": 55.088, "step": 752000 }, { "epoch": 6.4, "eval_loss": 3.1826891899108887, "eval_runtime": 113.3067, "eval_samples_per_second": 882.401, "eval_steps_per_second": 55.151, "step": 760000 }, { "epoch": 6.47, "learning_rate": 6.803968981906113e-06, "loss": 3.3694, "step": 768000 }, { "epoch": 6.47, "eval_loss": 3.1937167644500732, "eval_runtime": 113.4796, "eval_samples_per_second": 881.057, "eval_steps_per_second": 55.067, "step": 768000 }, { "epoch": 6.54, "eval_loss": 3.2004363536834717, "eval_runtime": 114.2163, "eval_samples_per_second": 875.374, "eval_steps_per_second": 54.712, "step": 776000 }, { "epoch": 6.6, "learning_rate": 6.737263403652131e-06, "loss": 3.378, "step": 784000 }, { "epoch": 6.6, "eval_loss": 3.2023050785064697, "eval_runtime": 113.4517, "eval_samples_per_second": 881.273, "eval_steps_per_second": 55.081, "step": 784000 }, { "epoch": 6.67, "eval_loss": 3.1935720443725586, "eval_runtime": 113.2158, "eval_samples_per_second": 883.11, "eval_steps_per_second": 55.195, "step": 792000 }, { "epoch": 6.74, "learning_rate": 6.6705578253981495e-06, "loss": 3.3703, "step": 800000 }, { "epoch": 6.74, "eval_loss": 3.19478702545166, "eval_runtime": 112.9761, "eval_samples_per_second": 884.984, "eval_steps_per_second": 55.313, "step": 800000 }, { "epoch": 6.81, "eval_loss": 3.2082369327545166, "eval_runtime": 113.4959, "eval_samples_per_second": 880.93, "eval_steps_per_second": 55.059, "step": 808000 }, { "epoch": 6.87, "learning_rate": 6.603852247144168e-06, "loss": 3.3838, "step": 816000 }, { "epoch": 6.87, "eval_loss": 3.1974425315856934, "eval_runtime": 113.5005, "eval_samples_per_second": 880.895, "eval_steps_per_second": 55.057, "step": 816000 }, { "epoch": 6.94, "eval_loss": 3.2029407024383545, "eval_runtime": 113.4958, "eval_samples_per_second": 880.932, "eval_steps_per_second": 55.059, "step": 824000 }, { "epoch": 7.01, "learning_rate": 6.537146668890187e-06, "loss": 3.3871, "step": 832000 }, { "epoch": 7.01, "eval_loss": 3.216017007827759, "eval_runtime": 113.9004, "eval_samples_per_second": 877.802, "eval_steps_per_second": 54.864, "step": 832000 }, { "epoch": 7.07, "eval_loss": 3.21976900100708, "eval_runtime": 113.1938, "eval_samples_per_second": 883.281, "eval_steps_per_second": 55.206, "step": 840000 }, { "epoch": 7.14, "learning_rate": 6.4704410906362044e-06, "loss": 3.3839, "step": 848000 }, { "epoch": 7.14, "eval_loss": 3.219007968902588, "eval_runtime": 113.9329, "eval_samples_per_second": 877.552, "eval_steps_per_second": 54.848, "step": 848000 }, { "epoch": 7.21, "eval_loss": 3.2204408645629883, "eval_runtime": 114.3981, "eval_samples_per_second": 873.983, "eval_steps_per_second": 54.625, "step": 856000 }, { "epoch": 7.28, "learning_rate": 6.403735512382223e-06, "loss": 3.389, "step": 864000 }, { "epoch": 7.28, "eval_loss": 3.218768835067749, "eval_runtime": 115.2564, "eval_samples_per_second": 867.475, "eval_steps_per_second": 54.218, "step": 864000 }, { "epoch": 7.34, "eval_loss": 3.2246193885803223, "eval_runtime": 113.3, "eval_samples_per_second": 882.454, "eval_steps_per_second": 55.154, "step": 872000 }, { "epoch": 7.41, "learning_rate": 6.337029934128242e-06, "loss": 3.398, "step": 880000 }, { "epoch": 7.41, "eval_loss": 3.233250617980957, "eval_runtime": 113.6851, "eval_samples_per_second": 879.464, "eval_steps_per_second": 54.968, "step": 880000 }, { "epoch": 7.48, "eval_loss": 3.216823101043701, "eval_runtime": 113.0594, "eval_samples_per_second": 884.332, "eval_steps_per_second": 55.272, "step": 888000 }, { "epoch": 7.55, "learning_rate": 6.270324355874261e-06, "loss": 3.4001, "step": 896000 }, { "epoch": 7.55, "eval_loss": 3.2311105728149414, "eval_runtime": 113.3088, "eval_samples_per_second": 882.385, "eval_steps_per_second": 55.15, "step": 896000 }, { "epoch": 7.61, "eval_loss": 3.2389867305755615, "eval_runtime": 113.4715, "eval_samples_per_second": 881.12, "eval_steps_per_second": 55.071, "step": 904000 }, { "epoch": 7.68, "learning_rate": 6.20361877762028e-06, "loss": 3.4255, "step": 912000 }, { "epoch": 7.68, "eval_loss": 3.2446951866149902, "eval_runtime": 114.3046, "eval_samples_per_second": 874.698, "eval_steps_per_second": 54.67, "step": 912000 }, { "epoch": 7.75, "eval_loss": 3.254612684249878, "eval_runtime": 113.4286, "eval_samples_per_second": 881.453, "eval_steps_per_second": 55.092, "step": 920000 }, { "epoch": 7.82, "learning_rate": 6.1369131993662975e-06, "loss": 3.4218, "step": 928000 }, { "epoch": 7.82, "eval_loss": 3.250980854034424, "eval_runtime": 113.6708, "eval_samples_per_second": 879.575, "eval_steps_per_second": 54.975, "step": 928000 }, { "epoch": 7.88, "eval_loss": 3.243265151977539, "eval_runtime": 114.4709, "eval_samples_per_second": 873.427, "eval_steps_per_second": 54.59, "step": 936000 }, { "epoch": 7.95, "learning_rate": 6.070207621112316e-06, "loss": 3.4326, "step": 944000 }, { "epoch": 7.95, "eval_loss": 3.2509450912475586, "eval_runtime": 113.5188, "eval_samples_per_second": 880.753, "eval_steps_per_second": 55.048, "step": 944000 }, { "epoch": 8.02, "eval_loss": 3.257272481918335, "eval_runtime": 113.6074, "eval_samples_per_second": 880.066, "eval_steps_per_second": 55.005, "step": 952000 }, { "epoch": 8.09, "learning_rate": 6.003502042858335e-06, "loss": 3.4268, "step": 960000 }, { "epoch": 8.09, "eval_loss": 3.249929428100586, "eval_runtime": 113.664, "eval_samples_per_second": 879.628, "eval_steps_per_second": 54.978, "step": 960000 }, { "epoch": 8.15, "eval_loss": 3.2704036235809326, "eval_runtime": 113.9356, "eval_samples_per_second": 877.53, "eval_steps_per_second": 54.847, "step": 968000 }, { "epoch": 8.22, "learning_rate": 5.936796464604353e-06, "loss": 3.4165, "step": 976000 }, { "epoch": 8.22, "eval_loss": 3.25793194770813, "eval_runtime": 114.3815, "eval_samples_per_second": 874.11, "eval_steps_per_second": 54.633, "step": 976000 }, { "epoch": 8.29, "eval_loss": 3.266918420791626, "eval_runtime": 113.5329, "eval_samples_per_second": 880.643, "eval_steps_per_second": 55.041, "step": 984000 }, { "epoch": 8.36, "learning_rate": 5.870090886350371e-06, "loss": 3.4425, "step": 992000 }, { "epoch": 8.36, "eval_loss": 3.272322416305542, "eval_runtime": 113.6369, "eval_samples_per_second": 879.838, "eval_steps_per_second": 54.991, "step": 992000 }, { "epoch": 8.42, "eval_loss": 3.2718310356140137, "eval_runtime": 113.6845, "eval_samples_per_second": 879.469, "eval_steps_per_second": 54.968, "step": 1000000 }, { "epoch": 8.49, "learning_rate": 5.80338530809639e-06, "loss": 3.4433, "step": 1008000 }, { "epoch": 8.49, "eval_loss": 3.2655293941497803, "eval_runtime": 114.0468, "eval_samples_per_second": 876.675, "eval_steps_per_second": 54.793, "step": 1008000 }, { "epoch": 8.56, "eval_loss": 3.2794032096862793, "eval_runtime": 115.3844, "eval_samples_per_second": 866.512, "eval_steps_per_second": 54.158, "step": 1016000 }, { "epoch": 8.62, "learning_rate": 5.736679729842408e-06, "loss": 3.4437, "step": 1024000 }, { "epoch": 8.62, "eval_loss": 3.2807533740997314, "eval_runtime": 114.2793, "eval_samples_per_second": 874.892, "eval_steps_per_second": 54.682, "step": 1024000 }, { "epoch": 8.69, "eval_loss": 3.2731096744537354, "eval_runtime": 113.8065, "eval_samples_per_second": 878.527, "eval_steps_per_second": 54.909, "step": 1032000 }, { "epoch": 8.76, "learning_rate": 5.669974151588427e-06, "loss": 3.4499, "step": 1040000 }, { "epoch": 8.76, "eval_loss": 3.27854323387146, "eval_runtime": 113.78, "eval_samples_per_second": 878.731, "eval_steps_per_second": 54.922, "step": 1040000 }, { "epoch": 8.83, "eval_loss": 3.28226900100708, "eval_runtime": 113.9563, "eval_samples_per_second": 877.371, "eval_steps_per_second": 54.837, "step": 1048000 }, { "epoch": 8.89, "learning_rate": 5.603268573334446e-06, "loss": 3.4593, "step": 1056000 }, { "epoch": 8.89, "eval_loss": 3.2843921184539795, "eval_runtime": 114.3227, "eval_samples_per_second": 874.559, "eval_steps_per_second": 54.661, "step": 1056000 }, { "epoch": 8.96, "eval_loss": 3.2876641750335693, "eval_runtime": 114.4486, "eval_samples_per_second": 873.597, "eval_steps_per_second": 54.601, "step": 1064000 }, { "epoch": 9.03, "learning_rate": 5.536562995080464e-06, "loss": 3.4481, "step": 1072000 }, { "epoch": 9.03, "eval_loss": 3.2969281673431396, "eval_runtime": 113.9166, "eval_samples_per_second": 877.677, "eval_steps_per_second": 54.856, "step": 1072000 }, { "epoch": 9.1, "eval_loss": 3.286954879760742, "eval_runtime": 114.297, "eval_samples_per_second": 874.756, "eval_steps_per_second": 54.673, "step": 1080000 }, { "epoch": 9.16, "learning_rate": 5.469857416826483e-06, "loss": 3.4542, "step": 1088000 }, { "epoch": 9.16, "eval_loss": 3.294614791870117, "eval_runtime": 114.0483, "eval_samples_per_second": 876.663, "eval_steps_per_second": 54.793, "step": 1088000 }, { "epoch": 9.23, "eval_loss": 3.2901484966278076, "eval_runtime": 114.4116, "eval_samples_per_second": 873.88, "eval_steps_per_second": 54.619, "step": 1096000 }, { "epoch": 9.3, "learning_rate": 5.403151838572501e-06, "loss": 3.4547, "step": 1104000 }, { "epoch": 9.3, "eval_loss": 3.2812633514404297, "eval_runtime": 114.838, "eval_samples_per_second": 870.635, "eval_steps_per_second": 54.416, "step": 1104000 }, { "epoch": 9.37, "eval_loss": 3.2909789085388184, "eval_runtime": 114.5859, "eval_samples_per_second": 872.551, "eval_steps_per_second": 54.536, "step": 1112000 }, { "epoch": 9.43, "learning_rate": 5.33644626031852e-06, "loss": 3.4618, "step": 1120000 }, { "epoch": 9.43, "eval_loss": 3.2978355884552, "eval_runtime": 113.8913, "eval_samples_per_second": 877.872, "eval_steps_per_second": 54.868, "step": 1120000 }, { "epoch": 9.5, "eval_loss": 3.3054616451263428, "eval_runtime": 114.4856, "eval_samples_per_second": 873.315, "eval_steps_per_second": 54.583, "step": 1128000 }, { "epoch": 9.57, "learning_rate": 5.269740682064538e-06, "loss": 3.46, "step": 1136000 }, { "epoch": 9.57, "eval_loss": 3.288513422012329, "eval_runtime": 114.5126, "eval_samples_per_second": 873.109, "eval_steps_per_second": 54.57, "step": 1136000 }, { "epoch": 9.64, "eval_loss": 3.2871181964874268, "eval_runtime": 114.4417, "eval_samples_per_second": 873.65, "eval_steps_per_second": 54.604, "step": 1144000 }, { "epoch": 9.7, "learning_rate": 5.203035103810556e-06, "loss": 3.4572, "step": 1152000 }, { "epoch": 9.7, "eval_loss": 3.2905023097991943, "eval_runtime": 114.8756, "eval_samples_per_second": 870.35, "eval_steps_per_second": 54.398, "step": 1152000 }, { "epoch": 9.77, "eval_loss": 3.300590991973877, "eval_runtime": 114.1062, "eval_samples_per_second": 876.219, "eval_steps_per_second": 54.765, "step": 1160000 }, { "epoch": 9.84, "learning_rate": 5.136329525556575e-06, "loss": 3.4597, "step": 1168000 }, { "epoch": 9.84, "eval_loss": 3.3080625534057617, "eval_runtime": 114.1639, "eval_samples_per_second": 875.776, "eval_steps_per_second": 54.737, "step": 1168000 }, { "epoch": 9.9, "eval_loss": 3.3031253814697266, "eval_runtime": 115.5682, "eval_samples_per_second": 865.134, "eval_steps_per_second": 54.072, "step": 1176000 }, { "epoch": 9.97, "learning_rate": 5.0696239473025935e-06, "loss": 3.4651, "step": 1184000 }, { "epoch": 9.97, "eval_loss": 3.288254737854004, "eval_runtime": 115.6327, "eval_samples_per_second": 864.652, "eval_steps_per_second": 54.042, "step": 1184000 }, { "epoch": 10.04, "eval_loss": 3.31886625289917, "eval_runtime": 115.302, "eval_samples_per_second": 867.131, "eval_steps_per_second": 54.197, "step": 1192000 }, { "epoch": 10.11, "learning_rate": 5.002918369048611e-06, "loss": 3.4571, "step": 1200000 }, { "epoch": 10.11, "eval_loss": 3.297788619995117, "eval_runtime": 116.3659, "eval_samples_per_second": 859.204, "eval_steps_per_second": 53.701, "step": 1200000 }, { "epoch": 10.17, "eval_loss": 3.309051990509033, "eval_runtime": 116.156, "eval_samples_per_second": 860.756, "eval_steps_per_second": 53.798, "step": 1208000 }, { "epoch": 10.24, "learning_rate": 4.936212790794631e-06, "loss": 3.4567, "step": 1216000 }, { "epoch": 10.24, "eval_loss": 3.275514602661133, "eval_runtime": 114.7773, "eval_samples_per_second": 871.096, "eval_steps_per_second": 54.445, "step": 1216000 }, { "epoch": 10.31, "eval_loss": 3.296752691268921, "eval_runtime": 116.1942, "eval_samples_per_second": 860.473, "eval_steps_per_second": 53.781, "step": 1224000 }, { "epoch": 10.38, "learning_rate": 4.869507212540649e-06, "loss": 3.4584, "step": 1232000 }, { "epoch": 10.38, "eval_loss": 3.29911732673645, "eval_runtime": 115.3134, "eval_samples_per_second": 867.046, "eval_steps_per_second": 54.191, "step": 1232000 }, { "epoch": 10.44, "eval_loss": 3.2818071842193604, "eval_runtime": 115.3726, "eval_samples_per_second": 866.601, "eval_steps_per_second": 54.164, "step": 1240000 }, { "epoch": 10.51, "learning_rate": 4.802801634286667e-06, "loss": 3.4459, "step": 1248000 }, { "epoch": 10.51, "eval_loss": 3.282339334487915, "eval_runtime": 115.2771, "eval_samples_per_second": 867.319, "eval_steps_per_second": 54.209, "step": 1248000 }, { "epoch": 10.58, "eval_loss": 3.2800145149230957, "eval_runtime": 114.9649, "eval_samples_per_second": 869.674, "eval_steps_per_second": 54.356, "step": 1256000 }, { "epoch": 10.65, "learning_rate": 4.7360960560326865e-06, "loss": 3.4474, "step": 1264000 }, { "epoch": 10.65, "eval_loss": 3.285576820373535, "eval_runtime": 114.3634, "eval_samples_per_second": 874.248, "eval_steps_per_second": 54.642, "step": 1264000 }, { "epoch": 10.71, "eval_loss": 3.2844762802124023, "eval_runtime": 114.9015, "eval_samples_per_second": 870.154, "eval_steps_per_second": 54.386, "step": 1272000 }, { "epoch": 10.78, "learning_rate": 4.669390477778704e-06, "loss": 3.4383, "step": 1280000 }, { "epoch": 10.78, "eval_loss": 3.2804107666015625, "eval_runtime": 115.558, "eval_samples_per_second": 865.211, "eval_steps_per_second": 54.077, "step": 1280000 }, { "epoch": 10.85, "eval_loss": 3.270656108856201, "eval_runtime": 114.5704, "eval_samples_per_second": 872.669, "eval_steps_per_second": 54.543, "step": 1288000 }, { "epoch": 10.92, "learning_rate": 4.602684899524723e-06, "loss": 3.4496, "step": 1296000 }, { "epoch": 10.92, "eval_loss": 3.28237247467041, "eval_runtime": 115.9515, "eval_samples_per_second": 862.274, "eval_steps_per_second": 53.893, "step": 1296000 }, { "epoch": 10.98, "eval_loss": 3.276503562927246, "eval_runtime": 114.4558, "eval_samples_per_second": 873.542, "eval_steps_per_second": 54.597, "step": 1304000 }, { "epoch": 11.05, "learning_rate": 4.5359793212707415e-06, "loss": 3.4411, "step": 1312000 }, { "epoch": 11.05, "eval_loss": 3.2838242053985596, "eval_runtime": 115.3787, "eval_samples_per_second": 866.555, "eval_steps_per_second": 54.161, "step": 1312000 }, { "epoch": 11.12, "eval_loss": 3.2839205265045166, "eval_runtime": 114.911, "eval_samples_per_second": 870.082, "eval_steps_per_second": 54.381, "step": 1320000 }, { "epoch": 11.19, "learning_rate": 4.46927374301676e-06, "loss": 3.4305, "step": 1328000 }, { "epoch": 11.19, "eval_loss": 3.274820327758789, "eval_runtime": 114.8509, "eval_samples_per_second": 870.538, "eval_steps_per_second": 54.41, "step": 1328000 }, { "epoch": 11.25, "eval_loss": 3.28206205368042, "eval_runtime": 115.1535, "eval_samples_per_second": 868.25, "eval_steps_per_second": 54.267, "step": 1336000 }, { "epoch": 11.32, "learning_rate": 4.402568164762779e-06, "loss": 3.4258, "step": 1344000 }, { "epoch": 11.32, "eval_loss": 3.274627447128296, "eval_runtime": 115.2346, "eval_samples_per_second": 867.639, "eval_steps_per_second": 54.229, "step": 1344000 }, { "epoch": 11.39, "eval_loss": 3.2860774993896484, "eval_runtime": 115.4118, "eval_samples_per_second": 866.307, "eval_steps_per_second": 54.145, "step": 1352000 }, { "epoch": 11.45, "learning_rate": 4.335862586508797e-06, "loss": 3.4227, "step": 1360000 }, { "epoch": 11.45, "eval_loss": 3.27103853225708, "eval_runtime": 115.4526, "eval_samples_per_second": 866.0, "eval_steps_per_second": 54.126, "step": 1360000 }, { "epoch": 11.52, "eval_loss": 3.2788245677948, "eval_runtime": 114.6345, "eval_samples_per_second": 872.181, "eval_steps_per_second": 54.512, "step": 1368000 }, { "epoch": 11.59, "learning_rate": 4.269157008254816e-06, "loss": 3.4319, "step": 1376000 }, { "epoch": 11.59, "eval_loss": 3.279372215270996, "eval_runtime": 114.9532, "eval_samples_per_second": 869.763, "eval_steps_per_second": 54.361, "step": 1376000 }, { "epoch": 11.66, "eval_loss": 3.27657413482666, "eval_runtime": 114.59, "eval_samples_per_second": 872.519, "eval_steps_per_second": 54.534, "step": 1384000 }, { "epoch": 11.72, "learning_rate": 4.202451430000834e-06, "loss": 3.436, "step": 1392000 }, { "epoch": 11.72, "eval_loss": 3.2923691272735596, "eval_runtime": 114.6828, "eval_samples_per_second": 871.813, "eval_steps_per_second": 54.489, "step": 1392000 }, { "epoch": 11.79, "eval_loss": 3.281205654144287, "eval_runtime": 114.5759, "eval_samples_per_second": 872.626, "eval_steps_per_second": 54.54, "step": 1400000 }, { "epoch": 11.86, "learning_rate": 4.135745851746852e-06, "loss": 3.4368, "step": 1408000 }, { "epoch": 11.86, "eval_loss": 3.2851309776306152, "eval_runtime": 115.7382, "eval_samples_per_second": 863.863, "eval_steps_per_second": 53.993, "step": 1408000 }, { "epoch": 11.93, "eval_loss": 3.282189130783081, "eval_runtime": 115.2625, "eval_samples_per_second": 867.429, "eval_steps_per_second": 54.215, "step": 1416000 }, { "epoch": 11.99, "learning_rate": 4.069040273492872e-06, "loss": 3.4346, "step": 1424000 }, { "epoch": 11.99, "eval_loss": 3.265688419342041, "eval_runtime": 114.9393, "eval_samples_per_second": 869.868, "eval_steps_per_second": 54.368, "step": 1424000 }, { "epoch": 12.06, "eval_loss": 3.2747557163238525, "eval_runtime": 115.4082, "eval_samples_per_second": 866.334, "eval_steps_per_second": 54.147, "step": 1432000 }, { "epoch": 12.13, "learning_rate": 4.0023346952388895e-06, "loss": 3.4265, "step": 1440000 }, { "epoch": 12.13, "eval_loss": 3.2684779167175293, "eval_runtime": 115.418, "eval_samples_per_second": 866.26, "eval_steps_per_second": 54.142, "step": 1440000 }, { "epoch": 12.2, "eval_loss": 3.294729709625244, "eval_runtime": 116.8415, "eval_samples_per_second": 855.706, "eval_steps_per_second": 53.483, "step": 1448000 }, { "epoch": 12.26, "learning_rate": 3.935629116984908e-06, "loss": 3.4306, "step": 1456000 }, { "epoch": 12.26, "eval_loss": 3.2840843200683594, "eval_runtime": 114.8072, "eval_samples_per_second": 870.868, "eval_steps_per_second": 54.43, "step": 1456000 }, { "epoch": 12.33, "eval_loss": 3.2748351097106934, "eval_runtime": 114.5932, "eval_samples_per_second": 872.495, "eval_steps_per_second": 54.532, "step": 1464000 }, { "epoch": 12.4, "learning_rate": 3.868923538730927e-06, "loss": 3.4254, "step": 1472000 }, { "epoch": 12.4, "eval_loss": 3.2793681621551514, "eval_runtime": 114.5871, "eval_samples_per_second": 872.542, "eval_steps_per_second": 54.535, "step": 1472000 }, { "epoch": 12.47, "eval_loss": 3.2773730754852295, "eval_runtime": 114.5544, "eval_samples_per_second": 872.79, "eval_steps_per_second": 54.55, "step": 1480000 }, { "epoch": 12.53, "learning_rate": 3.8022179604769453e-06, "loss": 3.4353, "step": 1488000 }, { "epoch": 12.53, "eval_loss": 3.272627592086792, "eval_runtime": 115.5185, "eval_samples_per_second": 865.506, "eval_steps_per_second": 54.095, "step": 1488000 }, { "epoch": 12.6, "eval_loss": 3.27628493309021, "eval_runtime": 115.6599, "eval_samples_per_second": 864.448, "eval_steps_per_second": 54.029, "step": 1496000 }, { "epoch": 12.67, "learning_rate": 3.735512382222964e-06, "loss": 3.4358, "step": 1504000 }, { "epoch": 12.67, "eval_loss": 3.265916109085083, "eval_runtime": 116.0704, "eval_samples_per_second": 861.391, "eval_steps_per_second": 53.838, "step": 1504000 }, { "epoch": 12.73, "eval_loss": 3.270989418029785, "eval_runtime": 114.6284, "eval_samples_per_second": 872.227, "eval_steps_per_second": 54.515, "step": 1512000 }, { "epoch": 12.8, "learning_rate": 3.668806803968982e-06, "loss": 3.4182, "step": 1520000 }, { "epoch": 12.8, "eval_loss": 3.27774977684021, "eval_runtime": 114.716, "eval_samples_per_second": 871.561, "eval_steps_per_second": 54.474, "step": 1520000 }, { "epoch": 12.87, "eval_loss": 3.2824196815490723, "eval_runtime": 114.6356, "eval_samples_per_second": 872.173, "eval_steps_per_second": 54.512, "step": 1528000 }, { "epoch": 12.94, "learning_rate": 3.6021012257150007e-06, "loss": 3.4384, "step": 1536000 }, { "epoch": 12.94, "eval_loss": 3.2886571884155273, "eval_runtime": 117.3403, "eval_samples_per_second": 852.069, "eval_steps_per_second": 53.255, "step": 1536000 }, { "epoch": 13.0, "eval_loss": 3.2667236328125, "eval_runtime": 115.4584, "eval_samples_per_second": 865.957, "eval_steps_per_second": 54.123, "step": 1544000 }, { "epoch": 13.07, "learning_rate": 3.535395647461019e-06, "loss": 3.4287, "step": 1552000 }, { "epoch": 13.07, "eval_loss": 3.271289110183716, "eval_runtime": 115.0142, "eval_samples_per_second": 869.302, "eval_steps_per_second": 54.332, "step": 1552000 }, { "epoch": 13.14, "eval_loss": 3.2639546394348145, "eval_runtime": 116.2578, "eval_samples_per_second": 860.003, "eval_steps_per_second": 53.751, "step": 1560000 }, { "epoch": 13.21, "learning_rate": 3.468690069207038e-06, "loss": 3.4181, "step": 1568000 }, { "epoch": 13.21, "eval_loss": 3.260748863220215, "eval_runtime": 115.3695, "eval_samples_per_second": 866.624, "eval_steps_per_second": 54.165, "step": 1568000 }, { "epoch": 13.27, "eval_loss": 3.264313220977783, "eval_runtime": 115.2949, "eval_samples_per_second": 867.185, "eval_steps_per_second": 54.2, "step": 1576000 }, { "epoch": 13.34, "learning_rate": 3.4019844909530565e-06, "loss": 3.4173, "step": 1584000 }, { "epoch": 13.34, "eval_loss": 3.262951612472534, "eval_runtime": 115.3025, "eval_samples_per_second": 867.128, "eval_steps_per_second": 54.197, "step": 1584000 }, { "epoch": 13.41, "eval_loss": 3.2571523189544678, "eval_runtime": 115.8833, "eval_samples_per_second": 862.781, "eval_steps_per_second": 53.925, "step": 1592000 }, { "epoch": 13.48, "learning_rate": 3.3352789126990747e-06, "loss": 3.4214, "step": 1600000 }, { "epoch": 13.48, "eval_loss": 3.2727572917938232, "eval_runtime": 114.8401, "eval_samples_per_second": 870.62, "eval_steps_per_second": 54.415, "step": 1600000 }, { "epoch": 13.54, "eval_loss": 3.2822391986846924, "eval_runtime": 115.763, "eval_samples_per_second": 863.678, "eval_steps_per_second": 53.981, "step": 1608000 }, { "epoch": 13.61, "learning_rate": 3.2685733344450933e-06, "loss": 3.4223, "step": 1616000 }, { "epoch": 13.61, "eval_loss": 3.270418882369995, "eval_runtime": 114.8559, "eval_samples_per_second": 870.499, "eval_steps_per_second": 54.407, "step": 1616000 }, { "epoch": 13.68, "eval_loss": 3.263397216796875, "eval_runtime": 115.6612, "eval_samples_per_second": 864.439, "eval_steps_per_second": 54.029, "step": 1624000 }, { "epoch": 13.75, "learning_rate": 3.2018677561911115e-06, "loss": 3.417, "step": 1632000 }, { "epoch": 13.75, "eval_loss": 3.269148349761963, "eval_runtime": 116.0444, "eval_samples_per_second": 861.584, "eval_steps_per_second": 53.85, "step": 1632000 }, { "epoch": 13.81, "eval_loss": 3.255032539367676, "eval_runtime": 116.0315, "eval_samples_per_second": 861.68, "eval_steps_per_second": 53.856, "step": 1640000 }, { "epoch": 13.88, "learning_rate": 3.1351621779371306e-06, "loss": 3.4146, "step": 1648000 }, { "epoch": 13.88, "eval_loss": 3.2528505325317383, "eval_runtime": 115.6943, "eval_samples_per_second": 864.191, "eval_steps_per_second": 54.013, "step": 1648000 }, { "epoch": 13.95, "eval_loss": 3.271348237991333, "eval_runtime": 114.7472, "eval_samples_per_second": 871.324, "eval_steps_per_second": 54.459, "step": 1656000 }, { "epoch": 14.02, "learning_rate": 3.0684565996831487e-06, "loss": 3.4186, "step": 1664000 }, { "epoch": 14.02, "eval_loss": 3.2671616077423096, "eval_runtime": 115.2164, "eval_samples_per_second": 867.776, "eval_steps_per_second": 54.237, "step": 1664000 }, { "epoch": 14.08, "eval_loss": 3.2542309761047363, "eval_runtime": 115.2327, "eval_samples_per_second": 867.653, "eval_steps_per_second": 54.229, "step": 1672000 }, { "epoch": 14.15, "learning_rate": 3.0017510214291673e-06, "loss": 3.4082, "step": 1680000 }, { "epoch": 14.15, "eval_loss": 3.2575573921203613, "eval_runtime": 115.6638, "eval_samples_per_second": 864.419, "eval_steps_per_second": 54.027, "step": 1680000 }, { "epoch": 14.22, "eval_loss": 3.2680304050445557, "eval_runtime": 116.6886, "eval_samples_per_second": 856.827, "eval_steps_per_second": 53.553, "step": 1688000 }, { "epoch": 14.28, "learning_rate": 2.9350454431751855e-06, "loss": 3.4186, "step": 1696000 }, { "epoch": 14.28, "eval_loss": 3.266725778579712, "eval_runtime": 115.7616, "eval_samples_per_second": 863.689, "eval_steps_per_second": 53.982, "step": 1696000 }, { "epoch": 14.35, "eval_loss": 3.269421339035034, "eval_runtime": 115.2493, "eval_samples_per_second": 867.528, "eval_steps_per_second": 54.222, "step": 1704000 }, { "epoch": 14.42, "learning_rate": 2.868339864921204e-06, "loss": 3.4131, "step": 1712000 }, { "epoch": 14.42, "eval_loss": 3.2606043815612793, "eval_runtime": 116.1606, "eval_samples_per_second": 860.722, "eval_steps_per_second": 53.796, "step": 1712000 }, { "epoch": 14.49, "eval_loss": 3.2622175216674805, "eval_runtime": 115.2414, "eval_samples_per_second": 867.587, "eval_steps_per_second": 54.225, "step": 1720000 }, { "epoch": 14.55, "learning_rate": 2.801634286667223e-06, "loss": 3.4239, "step": 1728000 }, { "epoch": 14.55, "eval_loss": 3.2678098678588867, "eval_runtime": 116.298, "eval_samples_per_second": 859.705, "eval_steps_per_second": 53.733, "step": 1728000 }, { "epoch": 14.62, "eval_loss": 3.2707767486572266, "eval_runtime": 115.813, "eval_samples_per_second": 863.305, "eval_steps_per_second": 53.958, "step": 1736000 }, { "epoch": 14.69, "learning_rate": 2.7349287084132413e-06, "loss": 3.4197, "step": 1744000 }, { "epoch": 14.69, "eval_loss": 3.2622435092926025, "eval_runtime": 116.7137, "eval_samples_per_second": 856.643, "eval_steps_per_second": 53.541, "step": 1744000 }, { "epoch": 14.76, "eval_loss": 3.260528087615967, "eval_runtime": 115.8618, "eval_samples_per_second": 862.942, "eval_steps_per_second": 53.935, "step": 1752000 }, { "epoch": 14.82, "learning_rate": 2.66822313015926e-06, "loss": 3.4073, "step": 1760000 }, { "epoch": 14.82, "eval_loss": 3.2647342681884766, "eval_runtime": 116.0753, "eval_samples_per_second": 861.355, "eval_steps_per_second": 53.836, "step": 1760000 }, { "epoch": 14.89, "eval_loss": 3.2618629932403564, "eval_runtime": 115.7243, "eval_samples_per_second": 863.967, "eval_steps_per_second": 53.999, "step": 1768000 }, { "epoch": 14.96, "learning_rate": 2.601517551905278e-06, "loss": 3.4167, "step": 1776000 }, { "epoch": 14.96, "eval_loss": 3.2816412448883057, "eval_runtime": 116.0459, "eval_samples_per_second": 861.573, "eval_steps_per_second": 53.849, "step": 1776000 }, { "epoch": 15.03, "eval_loss": 3.260328531265259, "eval_runtime": 115.7751, "eval_samples_per_second": 863.588, "eval_steps_per_second": 53.975, "step": 1784000 }, { "epoch": 15.09, "learning_rate": 2.5348119736512967e-06, "loss": 3.413, "step": 1792000 }, { "epoch": 15.09, "eval_loss": 3.2661468982696533, "eval_runtime": 115.5087, "eval_samples_per_second": 865.58, "eval_steps_per_second": 54.1, "step": 1792000 }, { "epoch": 15.16, "eval_loss": 3.2589173316955566, "eval_runtime": 115.8968, "eval_samples_per_second": 862.681, "eval_steps_per_second": 53.919, "step": 1800000 }, { "epoch": 15.23, "learning_rate": 2.4681063953973154e-06, "loss": 3.4117, "step": 1808000 }, { "epoch": 15.23, "eval_loss": 3.2688403129577637, "eval_runtime": 115.5062, "eval_samples_per_second": 865.598, "eval_steps_per_second": 54.101, "step": 1808000 }, { "epoch": 15.3, "eval_loss": 3.2677767276763916, "eval_runtime": 115.467, "eval_samples_per_second": 865.892, "eval_steps_per_second": 54.119, "step": 1816000 }, { "epoch": 15.36, "learning_rate": 2.4014008171433335e-06, "loss": 3.4103, "step": 1824000 }, { "epoch": 15.36, "eval_loss": 3.266075372695923, "eval_runtime": 114.9844, "eval_samples_per_second": 869.527, "eval_steps_per_second": 54.347, "step": 1824000 }, { "epoch": 15.43, "eval_loss": 3.270479917526245, "eval_runtime": 116.2693, "eval_samples_per_second": 859.917, "eval_steps_per_second": 53.746, "step": 1832000 }, { "epoch": 15.5, "learning_rate": 2.334695238889352e-06, "loss": 3.4074, "step": 1840000 }, { "epoch": 15.5, "eval_loss": 3.267005443572998, "eval_runtime": 115.371, "eval_samples_per_second": 866.613, "eval_steps_per_second": 54.164, "step": 1840000 }, { "epoch": 15.56, "eval_loss": 3.2619106769561768, "eval_runtime": 115.9983, "eval_samples_per_second": 861.926, "eval_steps_per_second": 53.871, "step": 1848000 }, { "epoch": 15.63, "learning_rate": 2.2679896606353707e-06, "loss": 3.4167, "step": 1856000 }, { "epoch": 15.63, "eval_loss": 3.262441635131836, "eval_runtime": 115.7529, "eval_samples_per_second": 863.754, "eval_steps_per_second": 53.986, "step": 1856000 }, { "epoch": 15.7, "eval_loss": 3.255234479904175, "eval_runtime": 115.5701, "eval_samples_per_second": 865.12, "eval_steps_per_second": 54.071, "step": 1864000 }, { "epoch": 15.77, "learning_rate": 2.2012840823813894e-06, "loss": 3.4195, "step": 1872000 }, { "epoch": 15.77, "eval_loss": 3.2503316402435303, "eval_runtime": 115.6488, "eval_samples_per_second": 864.531, "eval_steps_per_second": 54.034, "step": 1872000 }, { "epoch": 15.83, "eval_loss": 3.2605812549591064, "eval_runtime": 115.723, "eval_samples_per_second": 863.977, "eval_steps_per_second": 54.0, "step": 1880000 }, { "epoch": 15.9, "learning_rate": 2.134578504127408e-06, "loss": 3.4091, "step": 1888000 }, { "epoch": 15.9, "eval_loss": 3.2811596393585205, "eval_runtime": 116.4883, "eval_samples_per_second": 858.301, "eval_steps_per_second": 53.645, "step": 1888000 }, { "epoch": 15.97, "eval_loss": 3.2836642265319824, "eval_runtime": 117.0403, "eval_samples_per_second": 854.253, "eval_steps_per_second": 53.392, "step": 1896000 }, { "epoch": 16.04, "learning_rate": 2.067872925873426e-06, "loss": 3.4116, "step": 1904000 }, { "epoch": 16.04, "eval_loss": 3.2657785415649414, "eval_runtime": 116.2602, "eval_samples_per_second": 859.985, "eval_steps_per_second": 53.75, "step": 1904000 }, { "epoch": 16.1, "eval_loss": 3.267613410949707, "eval_runtime": 115.5655, "eval_samples_per_second": 865.155, "eval_steps_per_second": 54.073, "step": 1912000 }, { "epoch": 16.17, "learning_rate": 2.0011673476194448e-06, "loss": 3.4183, "step": 1920000 }, { "epoch": 16.17, "eval_loss": 3.277005910873413, "eval_runtime": 116.4955, "eval_samples_per_second": 858.248, "eval_steps_per_second": 53.642, "step": 1920000 }, { "epoch": 16.24, "eval_loss": 3.2755773067474365, "eval_runtime": 116.1016, "eval_samples_per_second": 861.159, "eval_steps_per_second": 53.824, "step": 1928000 }, { "epoch": 16.31, "learning_rate": 1.9344617693654634e-06, "loss": 3.4177, "step": 1936000 }, { "epoch": 16.31, "eval_loss": 3.28764271736145, "eval_runtime": 115.8567, "eval_samples_per_second": 862.98, "eval_steps_per_second": 53.937, "step": 1936000 }, { "epoch": 16.37, "eval_loss": 3.261235475540161, "eval_runtime": 116.3096, "eval_samples_per_second": 859.619, "eval_steps_per_second": 53.727, "step": 1944000 }, { "epoch": 16.44, "learning_rate": 1.867756191111482e-06, "loss": 3.4226, "step": 1952000 }, { "epoch": 16.44, "eval_loss": 3.274751901626587, "eval_runtime": 116.724, "eval_samples_per_second": 856.568, "eval_steps_per_second": 53.537, "step": 1952000 }, { "epoch": 16.51, "eval_loss": 3.2679269313812256, "eval_runtime": 115.9032, "eval_samples_per_second": 862.634, "eval_steps_per_second": 53.916, "step": 1960000 }, { "epoch": 16.58, "learning_rate": 1.8010506128575004e-06, "loss": 3.4154, "step": 1968000 }, { "epoch": 16.58, "eval_loss": 3.2658944129943848, "eval_runtime": 116.7603, "eval_samples_per_second": 856.301, "eval_steps_per_second": 53.52, "step": 1968000 }, { "epoch": 16.64, "eval_loss": 3.268889904022217, "eval_runtime": 116.8782, "eval_samples_per_second": 855.438, "eval_steps_per_second": 53.466, "step": 1976000 }, { "epoch": 16.71, "learning_rate": 1.734345034603519e-06, "loss": 3.4199, "step": 1984000 }, { "epoch": 16.71, "eval_loss": 3.2701141834259033, "eval_runtime": 116.3032, "eval_samples_per_second": 859.667, "eval_steps_per_second": 53.73, "step": 1984000 }, { "epoch": 16.78, "eval_loss": 3.256370782852173, "eval_runtime": 116.1051, "eval_samples_per_second": 861.133, "eval_steps_per_second": 53.822, "step": 1992000 }, { "epoch": 16.85, "learning_rate": 1.6676394563495374e-06, "loss": 3.4166, "step": 2000000 }, { "epoch": 16.85, "eval_loss": 3.2714390754699707, "eval_runtime": 117.0225, "eval_samples_per_second": 854.383, "eval_steps_per_second": 53.4, "step": 2000000 }, { "epoch": 16.91, "eval_loss": 3.2737603187561035, "eval_runtime": 115.9388, "eval_samples_per_second": 862.369, "eval_steps_per_second": 53.899, "step": 2008000 }, { "epoch": 16.98, "learning_rate": 1.6009338780955558e-06, "loss": 3.4054, "step": 2016000 }, { "epoch": 16.98, "eval_loss": 3.2632555961608887, "eval_runtime": 116.0352, "eval_samples_per_second": 861.653, "eval_steps_per_second": 53.854, "step": 2016000 }, { "epoch": 17.05, "eval_loss": 3.2573704719543457, "eval_runtime": 116.0533, "eval_samples_per_second": 861.518, "eval_steps_per_second": 53.846, "step": 2024000 }, { "epoch": 17.11, "learning_rate": 1.5342282998415744e-06, "loss": 3.4022, "step": 2032000 }, { "epoch": 17.11, "eval_loss": 3.2636642456054688, "eval_runtime": 116.7961, "eval_samples_per_second": 856.039, "eval_steps_per_second": 53.503, "step": 2032000 }, { "epoch": 17.18, "eval_loss": 3.268772840499878, "eval_runtime": 116.7746, "eval_samples_per_second": 856.197, "eval_steps_per_second": 53.513, "step": 2040000 }, { "epoch": 17.25, "learning_rate": 1.4675227215875928e-06, "loss": 3.408, "step": 2048000 }, { "epoch": 17.25, "eval_loss": 3.2667033672332764, "eval_runtime": 116.5476, "eval_samples_per_second": 857.864, "eval_steps_per_second": 53.618, "step": 2048000 }, { "epoch": 17.32, "eval_loss": 3.2577567100524902, "eval_runtime": 116.0215, "eval_samples_per_second": 861.754, "eval_steps_per_second": 53.861, "step": 2056000 }, { "epoch": 17.38, "learning_rate": 1.4008171433336116e-06, "loss": 3.4065, "step": 2064000 }, { "epoch": 17.38, "eval_loss": 3.2604563236236572, "eval_runtime": 116.1892, "eval_samples_per_second": 860.51, "eval_steps_per_second": 53.783, "step": 2064000 }, { "epoch": 17.45, "eval_loss": 3.2768325805664062, "eval_runtime": 116.824, "eval_samples_per_second": 855.834, "eval_steps_per_second": 53.491, "step": 2072000 }, { "epoch": 17.52, "learning_rate": 1.33411156507963e-06, "loss": 3.4105, "step": 2080000 }, { "epoch": 17.52, "eval_loss": 3.256869316101074, "eval_runtime": 116.5621, "eval_samples_per_second": 857.758, "eval_steps_per_second": 53.611, "step": 2080000 }, { "epoch": 17.59, "eval_loss": 3.2519402503967285, "eval_runtime": 117.0733, "eval_samples_per_second": 854.012, "eval_steps_per_second": 53.377, "step": 2088000 }, { "epoch": 17.65, "learning_rate": 1.2674059868256484e-06, "loss": 3.4011, "step": 2096000 }, { "epoch": 17.65, "eval_loss": 3.2555432319641113, "eval_runtime": 116.8888, "eval_samples_per_second": 855.36, "eval_steps_per_second": 53.461, "step": 2096000 }, { "epoch": 17.72, "eval_loss": 3.248750686645508, "eval_runtime": 116.2807, "eval_samples_per_second": 859.833, "eval_steps_per_second": 53.741, "step": 2104000 }, { "epoch": 17.79, "learning_rate": 1.2007004085716668e-06, "loss": 3.4078, "step": 2112000 }, { "epoch": 17.79, "eval_loss": 3.2515714168548584, "eval_runtime": 115.6718, "eval_samples_per_second": 864.359, "eval_steps_per_second": 54.024, "step": 2112000 }, { "epoch": 17.86, "eval_loss": 3.252725124359131, "eval_runtime": 116.4663, "eval_samples_per_second": 858.463, "eval_steps_per_second": 53.655, "step": 2120000 }, { "epoch": 17.92, "learning_rate": 1.1339948303176854e-06, "loss": 3.4105, "step": 2128000 }, { "epoch": 17.92, "eval_loss": 3.256073236465454, "eval_runtime": 116.7382, "eval_samples_per_second": 856.464, "eval_steps_per_second": 53.53, "step": 2128000 }, { "epoch": 17.99, "eval_loss": 3.2580018043518066, "eval_runtime": 117.5932, "eval_samples_per_second": 850.236, "eval_steps_per_second": 53.141, "step": 2136000 }, { "epoch": 18.06, "learning_rate": 1.067289252063704e-06, "loss": 3.4054, "step": 2144000 }, { "epoch": 18.06, "eval_loss": 3.2453107833862305, "eval_runtime": 117.5514, "eval_samples_per_second": 850.538, "eval_steps_per_second": 53.16, "step": 2144000 }, { "epoch": 18.13, "eval_loss": 3.2425551414489746, "eval_runtime": 116.4473, "eval_samples_per_second": 858.603, "eval_steps_per_second": 53.664, "step": 2152000 }, { "epoch": 18.19, "learning_rate": 1.0005836738097224e-06, "loss": 3.3937, "step": 2160000 }, { "epoch": 18.19, "eval_loss": 3.251696825027466, "eval_runtime": 116.4315, "eval_samples_per_second": 858.719, "eval_steps_per_second": 53.671, "step": 2160000 }, { "epoch": 18.26, "eval_loss": 3.2446274757385254, "eval_runtime": 116.261, "eval_samples_per_second": 859.979, "eval_steps_per_second": 53.75, "step": 2168000 }, { "epoch": 18.33, "learning_rate": 9.33878095555741e-07, "loss": 3.4001, "step": 2176000 }, { "epoch": 18.33, "eval_loss": 3.2449288368225098, "eval_runtime": 116.9317, "eval_samples_per_second": 855.046, "eval_steps_per_second": 53.441, "step": 2176000 }, { "epoch": 18.39, "eval_loss": 3.252725601196289, "eval_runtime": 116.826, "eval_samples_per_second": 855.82, "eval_steps_per_second": 53.49, "step": 2184000 }, { "epoch": 18.46, "learning_rate": 8.671725173017595e-07, "loss": 3.413, "step": 2192000 }, { "epoch": 18.46, "eval_loss": 3.2557225227355957, "eval_runtime": 117.66, "eval_samples_per_second": 849.754, "eval_steps_per_second": 53.111, "step": 2192000 }, { "epoch": 18.53, "eval_loss": 3.2483036518096924, "eval_runtime": 116.1271, "eval_samples_per_second": 860.97, "eval_steps_per_second": 53.812, "step": 2200000 }, { "epoch": 18.6, "learning_rate": 8.004669390477779e-07, "loss": 3.3882, "step": 2208000 }, { "epoch": 18.6, "eval_loss": 3.25201416015625, "eval_runtime": 117.5264, "eval_samples_per_second": 850.72, "eval_steps_per_second": 53.171, "step": 2208000 }, { "epoch": 18.66, "eval_loss": 3.2354042530059814, "eval_runtime": 117.3232, "eval_samples_per_second": 852.193, "eval_steps_per_second": 53.263, "step": 2216000 }, { "epoch": 18.73, "learning_rate": 7.337613607937964e-07, "loss": 3.3974, "step": 2224000 }, { "epoch": 18.73, "eval_loss": 3.2540123462677, "eval_runtime": 116.4684, "eval_samples_per_second": 858.448, "eval_steps_per_second": 53.654, "step": 2224000 }, { "epoch": 18.8, "eval_loss": 3.242626190185547, "eval_runtime": 116.2833, "eval_samples_per_second": 859.814, "eval_steps_per_second": 53.739, "step": 2232000 }, { "epoch": 18.87, "learning_rate": 6.67055782539815e-07, "loss": 3.3864, "step": 2240000 }, { "epoch": 18.87, "eval_loss": 3.234111785888672, "eval_runtime": 117.5517, "eval_samples_per_second": 850.537, "eval_steps_per_second": 53.16, "step": 2240000 }, { "epoch": 18.93, "eval_loss": 3.240849018096924, "eval_runtime": 117.6624, "eval_samples_per_second": 849.736, "eval_steps_per_second": 53.11, "step": 2248000 }, { "epoch": 19.0, "learning_rate": 6.003502042858334e-07, "loss": 3.3896, "step": 2256000 }, { "epoch": 19.0, "eval_loss": 3.234224557876587, "eval_runtime": 117.5095, "eval_samples_per_second": 850.842, "eval_steps_per_second": 53.179, "step": 2256000 }, { "epoch": 19.07, "eval_loss": 3.2414724826812744, "eval_runtime": 117.6058, "eval_samples_per_second": 850.145, "eval_steps_per_second": 53.135, "step": 2264000 }, { "epoch": 19.14, "learning_rate": 5.33644626031852e-07, "loss": 3.3845, "step": 2272000 }, { "epoch": 19.14, "eval_loss": 3.2445499897003174, "eval_runtime": 117.4897, "eval_samples_per_second": 850.985, "eval_steps_per_second": 53.188, "step": 2272000 }, { "epoch": 19.2, "eval_loss": 3.2422473430633545, "eval_runtime": 117.2174, "eval_samples_per_second": 852.962, "eval_steps_per_second": 53.311, "step": 2280000 }, { "epoch": 19.27, "learning_rate": 4.669390477778705e-07, "loss": 3.3916, "step": 2288000 }, { "epoch": 19.27, "eval_loss": 3.2379391193389893, "eval_runtime": 116.6572, "eval_samples_per_second": 857.058, "eval_steps_per_second": 53.567, "step": 2288000 }, { "epoch": 19.34, "eval_loss": 3.241091012954712, "eval_runtime": 117.6824, "eval_samples_per_second": 849.592, "eval_steps_per_second": 53.101, "step": 2296000 }, { "epoch": 19.41, "learning_rate": 4.0023346952388894e-07, "loss": 3.3919, "step": 2304000 }, { "epoch": 19.41, "eval_loss": 3.24294114112854, "eval_runtime": 116.557, "eval_samples_per_second": 857.795, "eval_steps_per_second": 53.613, "step": 2304000 }, { "epoch": 19.47, "eval_loss": 3.2372183799743652, "eval_runtime": 117.1036, "eval_samples_per_second": 853.791, "eval_steps_per_second": 53.363, "step": 2312000 }, { "epoch": 19.54, "learning_rate": 3.335278912699075e-07, "loss": 3.39, "step": 2320000 }, { "epoch": 19.54, "eval_loss": 3.2379844188690186, "eval_runtime": 118.6177, "eval_samples_per_second": 842.892, "eval_steps_per_second": 52.682, "step": 2320000 }, { "epoch": 19.61, "eval_loss": 3.2353270053863525, "eval_runtime": 117.8042, "eval_samples_per_second": 848.713, "eval_steps_per_second": 53.046, "step": 2328000 }, { "epoch": 19.68, "learning_rate": 2.66822313015926e-07, "loss": 3.3905, "step": 2336000 }, { "epoch": 19.68, "eval_loss": 3.2327044010162354, "eval_runtime": 117.5491, "eval_samples_per_second": 850.555, "eval_steps_per_second": 53.161, "step": 2336000 }, { "epoch": 19.74, "eval_loss": 3.2494277954101562, "eval_runtime": 117.2833, "eval_samples_per_second": 852.483, "eval_steps_per_second": 53.281, "step": 2344000 }, { "epoch": 19.81, "learning_rate": 2.0011673476194447e-07, "loss": 3.3826, "step": 2352000 }, { "epoch": 19.81, "eval_loss": 3.2369370460510254, "eval_runtime": 117.3893, "eval_samples_per_second": 851.713, "eval_steps_per_second": 53.233, "step": 2352000 }, { "epoch": 19.88, "eval_loss": 3.2389721870422363, "eval_runtime": 117.5802, "eval_samples_per_second": 850.33, "eval_steps_per_second": 53.147, "step": 2360000 }, { "epoch": 19.94, "learning_rate": 1.33411156507963e-07, "loss": 3.3935, "step": 2368000 }, { "epoch": 19.94, "eval_loss": 3.241490125656128, "eval_runtime": 116.6908, "eval_samples_per_second": 856.811, "eval_steps_per_second": 53.552, "step": 2368000 }, { "epoch": 20.01, "eval_loss": 3.2485716342926025, "eval_runtime": 117.193, "eval_samples_per_second": 853.14, "eval_steps_per_second": 53.322, "step": 2376000 }, { "epoch": 20.08, "learning_rate": 6.67055782539815e-08, "loss": 3.3846, "step": 2384000 }, { "epoch": 20.08, "eval_loss": 3.2353618144989014, "eval_runtime": 116.9874, "eval_samples_per_second": 854.639, "eval_steps_per_second": 53.416, "step": 2384000 }, { "epoch": 20.15, "eval_loss": 3.2465925216674805, "eval_runtime": 117.8085, "eval_samples_per_second": 848.683, "eval_steps_per_second": 53.044, "step": 2392000 }, { "epoch": 20.21, "learning_rate": 0.0, "loss": 3.3875, "step": 2400000 }, { "epoch": 20.21, "eval_loss": 3.2425458431243896, "eval_runtime": 117.6341, "eval_samples_per_second": 849.941, "eval_steps_per_second": 53.122, "step": 2400000 }, { "epoch": 20.21, "step": 2400000, "total_flos": 7.857549112634404e+17, "train_loss": 3.3721608984375, "train_runtime": 188638.1929, "train_samples_per_second": 203.564, "train_steps_per_second": 12.723 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 21, "save_steps": 32000, "total_flos": 7.857549112634404e+17, "trial_name": null, "trial_params": null }