{ "best_metric": null, "best_model_checkpoint": null, "epoch": 500.0, "global_step": 31000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 39.53297424316406, "eval_runtime": 1.5249, "eval_samples_per_second": 79.35, "eval_steps_per_second": 10.493, "step": 62 }, { "epoch": 2.0, "eval_loss": 39.283233642578125, "eval_runtime": 1.5209, "eval_samples_per_second": 79.56, "eval_steps_per_second": 10.52, "step": 124 }, { "epoch": 3.0, "eval_loss": 39.727474212646484, "eval_runtime": 1.5199, "eval_samples_per_second": 79.609, "eval_steps_per_second": 10.527, "step": 186 }, { "epoch": 4.0, "eval_loss": 38.73893356323242, "eval_runtime": 1.5313, "eval_samples_per_second": 79.02, "eval_steps_per_second": 10.449, "step": 248 }, { "epoch": 5.0, "eval_loss": 38.108787536621094, "eval_runtime": 1.5538, "eval_samples_per_second": 77.875, "eval_steps_per_second": 10.298, "step": 310 }, { "epoch": 6.0, "eval_loss": 37.65414047241211, "eval_runtime": 1.5218, "eval_samples_per_second": 79.511, "eval_steps_per_second": 10.514, "step": 372 }, { "epoch": 7.0, "eval_loss": 36.28599548339844, "eval_runtime": 1.5198, "eval_samples_per_second": 79.613, "eval_steps_per_second": 10.527, "step": 434 }, { "epoch": 8.0, "eval_loss": 36.02519607543945, "eval_runtime": 1.5195, "eval_samples_per_second": 79.632, "eval_steps_per_second": 10.53, "step": 496 }, { "epoch": 8.06, "learning_rate": 4.9193548387096775e-08, "loss": 40.88, "step": 500 }, { "epoch": 9.0, "eval_loss": 35.2044563293457, "eval_runtime": 1.5286, "eval_samples_per_second": 79.159, "eval_steps_per_second": 10.467, "step": 558 }, { "epoch": 10.0, "eval_loss": 34.13139724731445, "eval_runtime": 1.5277, "eval_samples_per_second": 79.203, "eval_steps_per_second": 10.473, "step": 620 }, { "epoch": 11.0, "eval_loss": 32.90681838989258, "eval_runtime": 1.5353, "eval_samples_per_second": 78.812, "eval_steps_per_second": 10.421, "step": 682 }, { "epoch": 12.0, "eval_loss": 32.091732025146484, "eval_runtime": 1.5361, "eval_samples_per_second": 78.769, "eval_steps_per_second": 10.416, "step": 744 }, { "epoch": 13.0, "eval_loss": 31.341379165649414, "eval_runtime": 1.5306, "eval_samples_per_second": 79.053, "eval_steps_per_second": 10.453, "step": 806 }, { "epoch": 14.0, "eval_loss": 30.152177810668945, "eval_runtime": 1.5194, "eval_samples_per_second": 79.635, "eval_steps_per_second": 10.53, "step": 868 }, { "epoch": 15.0, "eval_loss": 29.086437225341797, "eval_runtime": 1.522, "eval_samples_per_second": 79.502, "eval_steps_per_second": 10.513, "step": 930 }, { "epoch": 16.0, "eval_loss": 27.76565933227539, "eval_runtime": 1.5196, "eval_samples_per_second": 79.627, "eval_steps_per_second": 10.529, "step": 992 }, { "epoch": 16.13, "learning_rate": 4.8387096774193546e-08, "loss": 32.3449, "step": 1000 }, { "epoch": 17.0, "eval_loss": 26.679487228393555, "eval_runtime": 1.5328, "eval_samples_per_second": 78.942, "eval_steps_per_second": 10.439, "step": 1054 }, { "epoch": 18.0, "eval_loss": 25.430702209472656, "eval_runtime": 1.5318, "eval_samples_per_second": 78.994, "eval_steps_per_second": 10.445, "step": 1116 }, { "epoch": 19.0, "eval_loss": 24.457077026367188, "eval_runtime": 1.5339, "eval_samples_per_second": 78.882, "eval_steps_per_second": 10.431, "step": 1178 }, { "epoch": 20.0, "eval_loss": 24.03400993347168, "eval_runtime": 1.5361, "eval_samples_per_second": 78.771, "eval_steps_per_second": 10.416, "step": 1240 }, { "epoch": 21.0, "eval_loss": 22.688884735107422, "eval_runtime": 1.5349, "eval_samples_per_second": 78.834, "eval_steps_per_second": 10.424, "step": 1302 }, { "epoch": 22.0, "eval_loss": 21.914871215820312, "eval_runtime": 1.5268, "eval_samples_per_second": 79.251, "eval_steps_per_second": 10.48, "step": 1364 }, { "epoch": 23.0, "eval_loss": 20.947105407714844, "eval_runtime": 1.52, "eval_samples_per_second": 79.607, "eval_steps_per_second": 10.527, "step": 1426 }, { "epoch": 24.0, "eval_loss": 20.71515464782715, "eval_runtime": 1.5285, "eval_samples_per_second": 79.164, "eval_steps_per_second": 10.468, "step": 1488 }, { "epoch": 24.19, "learning_rate": 4.7580645161290323e-08, "loss": 24.114, "step": 1500 }, { "epoch": 25.0, "eval_loss": 19.957895278930664, "eval_runtime": 1.5457, "eval_samples_per_second": 78.279, "eval_steps_per_second": 10.351, "step": 1550 }, { "epoch": 26.0, "eval_loss": 19.421499252319336, "eval_runtime": 1.5734, "eval_samples_per_second": 76.905, "eval_steps_per_second": 10.169, "step": 1612 }, { "epoch": 27.0, "eval_loss": 18.702451705932617, "eval_runtime": 1.5338, "eval_samples_per_second": 78.89, "eval_steps_per_second": 10.432, "step": 1674 }, { "epoch": 28.0, "eval_loss": 18.053003311157227, "eval_runtime": 1.5299, "eval_samples_per_second": 79.092, "eval_steps_per_second": 10.458, "step": 1736 }, { "epoch": 29.0, "eval_loss": 17.729013442993164, "eval_runtime": 1.5369, "eval_samples_per_second": 78.732, "eval_steps_per_second": 10.411, "step": 1798 }, { "epoch": 30.0, "eval_loss": 17.282075881958008, "eval_runtime": 1.5244, "eval_samples_per_second": 79.377, "eval_steps_per_second": 10.496, "step": 1860 }, { "epoch": 31.0, "eval_loss": 16.866586685180664, "eval_runtime": 1.5191, "eval_samples_per_second": 79.654, "eval_steps_per_second": 10.533, "step": 1922 }, { "epoch": 32.0, "eval_loss": 16.420963287353516, "eval_runtime": 1.5281, "eval_samples_per_second": 79.185, "eval_steps_per_second": 10.471, "step": 1984 }, { "epoch": 32.26, "learning_rate": 4.6774193548387094e-08, "loss": 18.4015, "step": 2000 }, { "epoch": 33.0, "eval_loss": 16.233917236328125, "eval_runtime": 1.5685, "eval_samples_per_second": 77.146, "eval_steps_per_second": 10.201, "step": 2046 }, { "epoch": 34.0, "eval_loss": 15.798954010009766, "eval_runtime": 1.5347, "eval_samples_per_second": 78.844, "eval_steps_per_second": 10.426, "step": 2108 }, { "epoch": 35.0, "eval_loss": 15.77268123626709, "eval_runtime": 1.5269, "eval_samples_per_second": 79.247, "eval_steps_per_second": 10.479, "step": 2170 }, { "epoch": 36.0, "eval_loss": 15.234251976013184, "eval_runtime": 1.5303, "eval_samples_per_second": 79.07, "eval_steps_per_second": 10.456, "step": 2232 }, { "epoch": 37.0, "eval_loss": 15.142735481262207, "eval_runtime": 1.5404, "eval_samples_per_second": 78.549, "eval_steps_per_second": 10.387, "step": 2294 }, { "epoch": 38.0, "eval_loss": 14.873641014099121, "eval_runtime": 1.5199, "eval_samples_per_second": 79.611, "eval_steps_per_second": 10.527, "step": 2356 }, { "epoch": 39.0, "eval_loss": 14.704235076904297, "eval_runtime": 1.5212, "eval_samples_per_second": 79.542, "eval_steps_per_second": 10.518, "step": 2418 }, { "epoch": 40.0, "eval_loss": 14.539189338684082, "eval_runtime": 1.5248, "eval_samples_per_second": 79.353, "eval_steps_per_second": 10.493, "step": 2480 }, { "epoch": 40.32, "learning_rate": 4.5967741935483865e-08, "loss": 14.946, "step": 2500 }, { "epoch": 41.0, "eval_loss": 14.535553932189941, "eval_runtime": 1.5276, "eval_samples_per_second": 79.21, "eval_steps_per_second": 10.474, "step": 2542 }, { "epoch": 42.0, "eval_loss": 14.1603364944458, "eval_runtime": 1.5303, "eval_samples_per_second": 79.072, "eval_steps_per_second": 10.456, "step": 2604 }, { "epoch": 43.0, "eval_loss": 14.145159721374512, "eval_runtime": 1.5312, "eval_samples_per_second": 79.024, "eval_steps_per_second": 10.449, "step": 2666 }, { "epoch": 44.0, "eval_loss": 14.198166847229004, "eval_runtime": 1.5287, "eval_samples_per_second": 79.152, "eval_steps_per_second": 10.466, "step": 2728 }, { "epoch": 45.0, "eval_loss": 13.976308822631836, "eval_runtime": 1.5411, "eval_samples_per_second": 78.517, "eval_steps_per_second": 10.382, "step": 2790 }, { "epoch": 46.0, "eval_loss": 13.921603202819824, "eval_runtime": 1.5243, "eval_samples_per_second": 79.382, "eval_steps_per_second": 10.497, "step": 2852 }, { "epoch": 47.0, "eval_loss": 13.767107963562012, "eval_runtime": 1.5235, "eval_samples_per_second": 79.42, "eval_steps_per_second": 10.502, "step": 2914 }, { "epoch": 48.0, "eval_loss": 13.634342193603516, "eval_runtime": 1.5202, "eval_samples_per_second": 79.597, "eval_steps_per_second": 10.525, "step": 2976 }, { "epoch": 48.39, "learning_rate": 4.516129032258064e-08, "loss": 13.1518, "step": 3000 }, { "epoch": 49.0, "eval_loss": 13.62414264678955, "eval_runtime": 1.5563, "eval_samples_per_second": 77.747, "eval_steps_per_second": 10.281, "step": 3038 }, { "epoch": 50.0, "eval_loss": 13.416315078735352, "eval_runtime": 1.5393, "eval_samples_per_second": 78.607, "eval_steps_per_second": 10.394, "step": 3100 }, { "epoch": 51.0, "eval_loss": 13.440138816833496, "eval_runtime": 1.5302, "eval_samples_per_second": 79.075, "eval_steps_per_second": 10.456, "step": 3162 }, { "epoch": 52.0, "eval_loss": 13.560946464538574, "eval_runtime": 1.5457, "eval_samples_per_second": 78.282, "eval_steps_per_second": 10.351, "step": 3224 }, { "epoch": 53.0, "eval_loss": 13.285831451416016, "eval_runtime": 1.5455, "eval_samples_per_second": 78.294, "eval_steps_per_second": 10.353, "step": 3286 }, { "epoch": 54.0, "eval_loss": 13.15819263458252, "eval_runtime": 1.5204, "eval_samples_per_second": 79.586, "eval_steps_per_second": 10.524, "step": 3348 }, { "epoch": 55.0, "eval_loss": 13.245644569396973, "eval_runtime": 1.5209, "eval_samples_per_second": 79.557, "eval_steps_per_second": 10.52, "step": 3410 }, { "epoch": 56.0, "eval_loss": 13.234107971191406, "eval_runtime": 1.5187, "eval_samples_per_second": 79.673, "eval_steps_per_second": 10.535, "step": 3472 }, { "epoch": 56.45, "learning_rate": 4.435483870967742e-08, "loss": 12.1359, "step": 3500 }, { "epoch": 57.0, "eval_loss": 13.015525817871094, "eval_runtime": 1.5313, "eval_samples_per_second": 79.02, "eval_steps_per_second": 10.449, "step": 3534 }, { "epoch": 58.0, "eval_loss": 13.029335021972656, "eval_runtime": 1.5381, "eval_samples_per_second": 78.667, "eval_steps_per_second": 10.402, "step": 3596 }, { "epoch": 59.0, "eval_loss": 12.971243858337402, "eval_runtime": 1.5525, "eval_samples_per_second": 77.94, "eval_steps_per_second": 10.306, "step": 3658 }, { "epoch": 60.0, "eval_loss": 12.94456672668457, "eval_runtime": 1.5335, "eval_samples_per_second": 78.903, "eval_steps_per_second": 10.433, "step": 3720 }, { "epoch": 61.0, "eval_loss": 12.830053329467773, "eval_runtime": 1.5349, "eval_samples_per_second": 78.831, "eval_steps_per_second": 10.424, "step": 3782 }, { "epoch": 62.0, "eval_loss": 12.75737190246582, "eval_runtime": 1.5321, "eval_samples_per_second": 78.978, "eval_steps_per_second": 10.443, "step": 3844 }, { "epoch": 63.0, "eval_loss": 12.696720123291016, "eval_runtime": 1.5195, "eval_samples_per_second": 79.632, "eval_steps_per_second": 10.53, "step": 3906 }, { "epoch": 64.0, "eval_loss": 12.653301239013672, "eval_runtime": 1.5219, "eval_samples_per_second": 79.506, "eval_steps_per_second": 10.513, "step": 3968 }, { "epoch": 64.52, "learning_rate": 4.354838709677419e-08, "loss": 11.5584, "step": 4000 }, { "epoch": 65.0, "eval_loss": 12.554749488830566, "eval_runtime": 1.5433, "eval_samples_per_second": 78.403, "eval_steps_per_second": 10.367, "step": 4030 }, { "epoch": 66.0, "eval_loss": 12.586564064025879, "eval_runtime": 1.5317, "eval_samples_per_second": 78.997, "eval_steps_per_second": 10.446, "step": 4092 }, { "epoch": 67.0, "eval_loss": 12.756257057189941, "eval_runtime": 1.5267, "eval_samples_per_second": 79.256, "eval_steps_per_second": 10.48, "step": 4154 }, { "epoch": 68.0, "eval_loss": 12.648726463317871, "eval_runtime": 1.5304, "eval_samples_per_second": 79.064, "eval_steps_per_second": 10.455, "step": 4216 }, { "epoch": 69.0, "eval_loss": 12.601462364196777, "eval_runtime": 1.5297, "eval_samples_per_second": 79.102, "eval_steps_per_second": 10.46, "step": 4278 }, { "epoch": 70.0, "eval_loss": 12.509653091430664, "eval_runtime": 1.5291, "eval_samples_per_second": 79.132, "eval_steps_per_second": 10.464, "step": 4340 }, { "epoch": 71.0, "eval_loss": 12.53116226196289, "eval_runtime": 1.5214, "eval_samples_per_second": 79.53, "eval_steps_per_second": 10.516, "step": 4402 }, { "epoch": 72.0, "eval_loss": 12.467875480651855, "eval_runtime": 1.5274, "eval_samples_per_second": 79.221, "eval_steps_per_second": 10.475, "step": 4464 }, { "epoch": 72.58, "learning_rate": 4.274193548387097e-08, "loss": 11.224, "step": 4500 }, { "epoch": 73.0, "eval_loss": 12.480475425720215, "eval_runtime": 1.5409, "eval_samples_per_second": 78.527, "eval_steps_per_second": 10.384, "step": 4526 }, { "epoch": 74.0, "eval_loss": 12.44310474395752, "eval_runtime": 1.5365, "eval_samples_per_second": 78.752, "eval_steps_per_second": 10.413, "step": 4588 }, { "epoch": 75.0, "eval_loss": 12.302526473999023, "eval_runtime": 1.5302, "eval_samples_per_second": 79.074, "eval_steps_per_second": 10.456, "step": 4650 }, { "epoch": 76.0, "eval_loss": 12.154280662536621, "eval_runtime": 1.528, "eval_samples_per_second": 79.19, "eval_steps_per_second": 10.471, "step": 4712 }, { "epoch": 77.0, "eval_loss": 12.35551929473877, "eval_runtime": 1.5273, "eval_samples_per_second": 79.225, "eval_steps_per_second": 10.476, "step": 4774 }, { "epoch": 78.0, "eval_loss": 12.264195442199707, "eval_runtime": 1.5333, "eval_samples_per_second": 78.913, "eval_steps_per_second": 10.435, "step": 4836 }, { "epoch": 79.0, "eval_loss": 12.355469703674316, "eval_runtime": 1.5317, "eval_samples_per_second": 78.997, "eval_steps_per_second": 10.446, "step": 4898 }, { "epoch": 80.0, "eval_loss": 12.291287422180176, "eval_runtime": 1.5256, "eval_samples_per_second": 79.315, "eval_steps_per_second": 10.488, "step": 4960 }, { "epoch": 80.65, "learning_rate": 4.193548387096774e-08, "loss": 10.9897, "step": 5000 }, { "epoch": 81.0, "eval_loss": 12.248346328735352, "eval_runtime": 1.5278, "eval_samples_per_second": 79.197, "eval_steps_per_second": 10.472, "step": 5022 }, { "epoch": 82.0, "eval_loss": 12.181654930114746, "eval_runtime": 1.5283, "eval_samples_per_second": 79.175, "eval_steps_per_second": 10.469, "step": 5084 }, { "epoch": 83.0, "eval_loss": 12.359329223632812, "eval_runtime": 1.5268, "eval_samples_per_second": 79.251, "eval_steps_per_second": 10.479, "step": 5146 }, { "epoch": 84.0, "eval_loss": 12.071714401245117, "eval_runtime": 1.5407, "eval_samples_per_second": 78.535, "eval_steps_per_second": 10.385, "step": 5208 }, { "epoch": 85.0, "eval_loss": 12.247845649719238, "eval_runtime": 1.5293, "eval_samples_per_second": 79.122, "eval_steps_per_second": 10.462, "step": 5270 }, { "epoch": 86.0, "eval_loss": 12.065207481384277, "eval_runtime": 1.5401, "eval_samples_per_second": 78.568, "eval_steps_per_second": 10.389, "step": 5332 }, { "epoch": 87.0, "eval_loss": 12.124922752380371, "eval_runtime": 1.5205, "eval_samples_per_second": 79.578, "eval_steps_per_second": 10.523, "step": 5394 }, { "epoch": 88.0, "eval_loss": 12.090045928955078, "eval_runtime": 1.5308, "eval_samples_per_second": 79.043, "eval_steps_per_second": 10.452, "step": 5456 }, { "epoch": 88.71, "learning_rate": 4.1129032258064516e-08, "loss": 10.8156, "step": 5500 }, { "epoch": 89.0, "eval_loss": 11.984047889709473, "eval_runtime": 1.5305, "eval_samples_per_second": 79.061, "eval_steps_per_second": 10.454, "step": 5518 }, { "epoch": 90.0, "eval_loss": 12.120992660522461, "eval_runtime": 1.53, "eval_samples_per_second": 79.086, "eval_steps_per_second": 10.458, "step": 5580 }, { "epoch": 91.0, "eval_loss": 12.18094253540039, "eval_runtime": 1.5426, "eval_samples_per_second": 78.437, "eval_steps_per_second": 10.372, "step": 5642 }, { "epoch": 92.0, "eval_loss": 12.021756172180176, "eval_runtime": 1.5405, "eval_samples_per_second": 78.545, "eval_steps_per_second": 10.386, "step": 5704 }, { "epoch": 93.0, "eval_loss": 12.143865585327148, "eval_runtime": 1.5315, "eval_samples_per_second": 79.009, "eval_steps_per_second": 10.447, "step": 5766 }, { "epoch": 94.0, "eval_loss": 11.918268203735352, "eval_runtime": 1.5285, "eval_samples_per_second": 79.163, "eval_steps_per_second": 10.468, "step": 5828 }, { "epoch": 95.0, "eval_loss": 12.067418098449707, "eval_runtime": 1.5204, "eval_samples_per_second": 79.584, "eval_steps_per_second": 10.524, "step": 5890 }, { "epoch": 96.0, "eval_loss": 12.009296417236328, "eval_runtime": 1.52, "eval_samples_per_second": 79.604, "eval_steps_per_second": 10.526, "step": 5952 }, { "epoch": 96.77, "learning_rate": 4.032258064516129e-08, "loss": 10.6906, "step": 6000 }, { "epoch": 97.0, "eval_loss": 12.008870124816895, "eval_runtime": 1.5307, "eval_samples_per_second": 79.05, "eval_steps_per_second": 10.453, "step": 6014 }, { "epoch": 98.0, "eval_loss": 11.956018447875977, "eval_runtime": 1.5309, "eval_samples_per_second": 79.041, "eval_steps_per_second": 10.452, "step": 6076 }, { "epoch": 99.0, "eval_loss": 11.767322540283203, "eval_runtime": 1.5351, "eval_samples_per_second": 78.825, "eval_steps_per_second": 10.423, "step": 6138 }, { "epoch": 100.0, "eval_loss": 12.034104347229004, "eval_runtime": 1.5321, "eval_samples_per_second": 78.979, "eval_steps_per_second": 10.443, "step": 6200 }, { "epoch": 101.0, "eval_loss": 12.03824234008789, "eval_runtime": 1.5289, "eval_samples_per_second": 79.14, "eval_steps_per_second": 10.465, "step": 6262 }, { "epoch": 102.0, "eval_loss": 11.87580680847168, "eval_runtime": 1.5338, "eval_samples_per_second": 78.891, "eval_steps_per_second": 10.432, "step": 6324 }, { "epoch": 103.0, "eval_loss": 11.93928337097168, "eval_runtime": 1.5258, "eval_samples_per_second": 79.301, "eval_steps_per_second": 10.486, "step": 6386 }, { "epoch": 104.0, "eval_loss": 11.878522872924805, "eval_runtime": 1.5273, "eval_samples_per_second": 79.226, "eval_steps_per_second": 10.476, "step": 6448 }, { "epoch": 104.84, "learning_rate": 3.951612903225806e-08, "loss": 10.5767, "step": 6500 }, { "epoch": 105.0, "eval_loss": 11.81560230255127, "eval_runtime": 1.5317, "eval_samples_per_second": 78.998, "eval_steps_per_second": 10.446, "step": 6510 }, { "epoch": 106.0, "eval_loss": 11.750551223754883, "eval_runtime": 1.5404, "eval_samples_per_second": 78.549, "eval_steps_per_second": 10.387, "step": 6572 }, { "epoch": 107.0, "eval_loss": 11.90053653717041, "eval_runtime": 1.5286, "eval_samples_per_second": 79.156, "eval_steps_per_second": 10.467, "step": 6634 }, { "epoch": 108.0, "eval_loss": 11.880046844482422, "eval_runtime": 1.5341, "eval_samples_per_second": 78.874, "eval_steps_per_second": 10.43, "step": 6696 }, { "epoch": 109.0, "eval_loss": 11.9345703125, "eval_runtime": 1.5463, "eval_samples_per_second": 78.251, "eval_steps_per_second": 10.347, "step": 6758 }, { "epoch": 110.0, "eval_loss": 11.833414077758789, "eval_runtime": 1.5284, "eval_samples_per_second": 79.166, "eval_steps_per_second": 10.468, "step": 6820 }, { "epoch": 111.0, "eval_loss": 11.616347312927246, "eval_runtime": 1.5189, "eval_samples_per_second": 79.663, "eval_steps_per_second": 10.534, "step": 6882 }, { "epoch": 112.0, "eval_loss": 11.829395294189453, "eval_runtime": 1.5288, "eval_samples_per_second": 79.144, "eval_steps_per_second": 10.465, "step": 6944 }, { "epoch": 112.9, "learning_rate": 3.8709677419354835e-08, "loss": 10.5013, "step": 7000 }, { "epoch": 113.0, "eval_loss": 11.786639213562012, "eval_runtime": 1.5596, "eval_samples_per_second": 77.582, "eval_steps_per_second": 10.259, "step": 7006 }, { "epoch": 114.0, "eval_loss": 11.724261283874512, "eval_runtime": 1.5228, "eval_samples_per_second": 79.46, "eval_steps_per_second": 10.507, "step": 7068 }, { "epoch": 115.0, "eval_loss": 11.56452751159668, "eval_runtime": 1.5342, "eval_samples_per_second": 78.868, "eval_steps_per_second": 10.429, "step": 7130 }, { "epoch": 116.0, "eval_loss": 11.7550687789917, "eval_runtime": 1.5293, "eval_samples_per_second": 79.122, "eval_steps_per_second": 10.462, "step": 7192 }, { "epoch": 117.0, "eval_loss": 11.625850677490234, "eval_runtime": 1.5487, "eval_samples_per_second": 78.129, "eval_steps_per_second": 10.331, "step": 7254 }, { "epoch": 118.0, "eval_loss": 11.723539352416992, "eval_runtime": 1.5327, "eval_samples_per_second": 78.946, "eval_steps_per_second": 10.439, "step": 7316 }, { "epoch": 119.0, "eval_loss": 11.699857711791992, "eval_runtime": 1.5315, "eval_samples_per_second": 79.006, "eval_steps_per_second": 10.447, "step": 7378 }, { "epoch": 120.0, "eval_loss": 11.485078811645508, "eval_runtime": 1.5203, "eval_samples_per_second": 79.591, "eval_steps_per_second": 10.524, "step": 7440 }, { "epoch": 120.97, "learning_rate": 3.790322580645161e-08, "loss": 10.4493, "step": 7500 }, { "epoch": 121.0, "eval_loss": 11.495078086853027, "eval_runtime": 1.5271, "eval_samples_per_second": 79.234, "eval_steps_per_second": 10.477, "step": 7502 }, { "epoch": 122.0, "eval_loss": 11.521384239196777, "eval_runtime": 1.536, "eval_samples_per_second": 78.776, "eval_steps_per_second": 10.417, "step": 7564 }, { "epoch": 123.0, "eval_loss": 11.739558219909668, "eval_runtime": 1.528, "eval_samples_per_second": 79.186, "eval_steps_per_second": 10.471, "step": 7626 }, { "epoch": 124.0, "eval_loss": 11.696319580078125, "eval_runtime": 1.5494, "eval_samples_per_second": 78.094, "eval_steps_per_second": 10.327, "step": 7688 }, { "epoch": 125.0, "eval_loss": 11.530503273010254, "eval_runtime": 1.5529, "eval_samples_per_second": 77.921, "eval_steps_per_second": 10.304, "step": 7750 }, { "epoch": 126.0, "eval_loss": 11.771147727966309, "eval_runtime": 1.5301, "eval_samples_per_second": 79.081, "eval_steps_per_second": 10.457, "step": 7812 }, { "epoch": 127.0, "eval_loss": 11.80306339263916, "eval_runtime": 1.5204, "eval_samples_per_second": 79.583, "eval_steps_per_second": 10.523, "step": 7874 }, { "epoch": 128.0, "eval_loss": 11.595075607299805, "eval_runtime": 1.5262, "eval_samples_per_second": 79.282, "eval_steps_per_second": 10.484, "step": 7936 }, { "epoch": 129.0, "eval_loss": 11.47359848022461, "eval_runtime": 1.5189, "eval_samples_per_second": 79.663, "eval_steps_per_second": 10.534, "step": 7998 }, { "epoch": 129.03, "learning_rate": 3.7096774193548384e-08, "loss": 10.3973, "step": 8000 }, { "epoch": 130.0, "eval_loss": 11.615568161010742, "eval_runtime": 1.5324, "eval_samples_per_second": 78.963, "eval_steps_per_second": 10.441, "step": 8060 }, { "epoch": 131.0, "eval_loss": 11.544720649719238, "eval_runtime": 1.5295, "eval_samples_per_second": 79.11, "eval_steps_per_second": 10.461, "step": 8122 }, { "epoch": 132.0, "eval_loss": 11.356687545776367, "eval_runtime": 1.5299, "eval_samples_per_second": 79.09, "eval_steps_per_second": 10.458, "step": 8184 }, { "epoch": 133.0, "eval_loss": 11.483879089355469, "eval_runtime": 1.5293, "eval_samples_per_second": 79.121, "eval_steps_per_second": 10.462, "step": 8246 }, { "epoch": 134.0, "eval_loss": 11.441631317138672, "eval_runtime": 1.5309, "eval_samples_per_second": 79.039, "eval_steps_per_second": 10.451, "step": 8308 }, { "epoch": 135.0, "eval_loss": 11.539510726928711, "eval_runtime": 1.528, "eval_samples_per_second": 79.191, "eval_steps_per_second": 10.471, "step": 8370 }, { "epoch": 136.0, "eval_loss": 11.437644004821777, "eval_runtime": 1.5258, "eval_samples_per_second": 79.301, "eval_steps_per_second": 10.486, "step": 8432 }, { "epoch": 137.0, "eval_loss": 11.378108978271484, "eval_runtime": 1.5197, "eval_samples_per_second": 79.619, "eval_steps_per_second": 10.528, "step": 8494 }, { "epoch": 137.1, "learning_rate": 3.629032258064516e-08, "loss": 10.3569, "step": 8500 }, { "epoch": 138.0, "eval_loss": 11.417948722839355, "eval_runtime": 1.5364, "eval_samples_per_second": 78.758, "eval_steps_per_second": 10.414, "step": 8556 }, { "epoch": 139.0, "eval_loss": 11.674735069274902, "eval_runtime": 1.5314, "eval_samples_per_second": 79.012, "eval_steps_per_second": 10.448, "step": 8618 }, { "epoch": 140.0, "eval_loss": 11.611095428466797, "eval_runtime": 1.5272, "eval_samples_per_second": 79.231, "eval_steps_per_second": 10.477, "step": 8680 }, { "epoch": 141.0, "eval_loss": 11.50928020477295, "eval_runtime": 1.5304, "eval_samples_per_second": 79.062, "eval_steps_per_second": 10.455, "step": 8742 }, { "epoch": 142.0, "eval_loss": 11.475595474243164, "eval_runtime": 1.5278, "eval_samples_per_second": 79.201, "eval_steps_per_second": 10.473, "step": 8804 }, { "epoch": 143.0, "eval_loss": 11.34980583190918, "eval_runtime": 1.5199, "eval_samples_per_second": 79.612, "eval_steps_per_second": 10.527, "step": 8866 }, { "epoch": 144.0, "eval_loss": 11.562585830688477, "eval_runtime": 1.5213, "eval_samples_per_second": 79.535, "eval_steps_per_second": 10.517, "step": 8928 }, { "epoch": 145.0, "eval_loss": 11.347404479980469, "eval_runtime": 1.5204, "eval_samples_per_second": 79.584, "eval_steps_per_second": 10.524, "step": 8990 }, { "epoch": 145.16, "learning_rate": 3.548387096774194e-08, "loss": 10.3565, "step": 9000 }, { "epoch": 146.0, "eval_loss": 11.377586364746094, "eval_runtime": 1.5334, "eval_samples_per_second": 78.91, "eval_steps_per_second": 10.434, "step": 9052 }, { "epoch": 147.0, "eval_loss": 11.362828254699707, "eval_runtime": 1.5275, "eval_samples_per_second": 79.217, "eval_steps_per_second": 10.475, "step": 9114 }, { "epoch": 148.0, "eval_loss": 11.485925674438477, "eval_runtime": 1.5452, "eval_samples_per_second": 78.308, "eval_steps_per_second": 10.355, "step": 9176 }, { "epoch": 149.0, "eval_loss": 11.422063827514648, "eval_runtime": 1.5281, "eval_samples_per_second": 79.184, "eval_steps_per_second": 10.471, "step": 9238 }, { "epoch": 150.0, "eval_loss": 11.419939041137695, "eval_runtime": 1.54, "eval_samples_per_second": 78.57, "eval_steps_per_second": 10.389, "step": 9300 }, { "epoch": 151.0, "eval_loss": 11.570083618164062, "eval_runtime": 1.5233, "eval_samples_per_second": 79.432, "eval_steps_per_second": 10.503, "step": 9362 }, { "epoch": 152.0, "eval_loss": 11.463680267333984, "eval_runtime": 1.5197, "eval_samples_per_second": 79.621, "eval_steps_per_second": 10.528, "step": 9424 }, { "epoch": 153.0, "eval_loss": 11.65864086151123, "eval_runtime": 1.5194, "eval_samples_per_second": 79.634, "eval_steps_per_second": 10.53, "step": 9486 }, { "epoch": 153.23, "learning_rate": 3.467741935483871e-08, "loss": 10.3122, "step": 9500 }, { "epoch": 154.0, "eval_loss": 11.487783432006836, "eval_runtime": 1.5302, "eval_samples_per_second": 79.074, "eval_steps_per_second": 10.456, "step": 9548 }, { "epoch": 155.0, "eval_loss": 11.590331077575684, "eval_runtime": 1.5284, "eval_samples_per_second": 79.169, "eval_steps_per_second": 10.469, "step": 9610 }, { "epoch": 156.0, "eval_loss": 11.381085395812988, "eval_runtime": 1.5362, "eval_samples_per_second": 78.767, "eval_steps_per_second": 10.415, "step": 9672 }, { "epoch": 157.0, "eval_loss": 11.454978942871094, "eval_runtime": 1.5486, "eval_samples_per_second": 78.136, "eval_steps_per_second": 10.332, "step": 9734 }, { "epoch": 158.0, "eval_loss": 11.569897651672363, "eval_runtime": 1.5314, "eval_samples_per_second": 79.011, "eval_steps_per_second": 10.448, "step": 9796 }, { "epoch": 159.0, "eval_loss": 11.515423774719238, "eval_runtime": 1.52, "eval_samples_per_second": 79.603, "eval_steps_per_second": 10.526, "step": 9858 }, { "epoch": 160.0, "eval_loss": 11.501609802246094, "eval_runtime": 1.5198, "eval_samples_per_second": 79.617, "eval_steps_per_second": 10.528, "step": 9920 }, { "epoch": 161.0, "eval_loss": 11.571191787719727, "eval_runtime": 1.5313, "eval_samples_per_second": 79.018, "eval_steps_per_second": 10.449, "step": 9982 }, { "epoch": 161.29, "learning_rate": 3.387096774193548e-08, "loss": 10.2888, "step": 10000 }, { "epoch": 162.0, "eval_loss": 11.289880752563477, "eval_runtime": 1.5323, "eval_samples_per_second": 78.967, "eval_steps_per_second": 10.442, "step": 10044 }, { "epoch": 163.0, "eval_loss": 11.250905990600586, "eval_runtime": 1.5304, "eval_samples_per_second": 79.065, "eval_steps_per_second": 10.455, "step": 10106 }, { "epoch": 164.0, "eval_loss": 11.452507972717285, "eval_runtime": 1.5273, "eval_samples_per_second": 79.227, "eval_steps_per_second": 10.476, "step": 10168 }, { "epoch": 165.0, "eval_loss": 11.363972663879395, "eval_runtime": 1.5279, "eval_samples_per_second": 79.192, "eval_steps_per_second": 10.472, "step": 10230 }, { "epoch": 166.0, "eval_loss": 11.337541580200195, "eval_runtime": 1.5301, "eval_samples_per_second": 79.077, "eval_steps_per_second": 10.457, "step": 10292 }, { "epoch": 167.0, "eval_loss": 11.396878242492676, "eval_runtime": 1.5279, "eval_samples_per_second": 79.193, "eval_steps_per_second": 10.472, "step": 10354 }, { "epoch": 168.0, "eval_loss": 11.478124618530273, "eval_runtime": 1.5196, "eval_samples_per_second": 79.628, "eval_steps_per_second": 10.529, "step": 10416 }, { "epoch": 169.0, "eval_loss": 11.397954940795898, "eval_runtime": 1.5231, "eval_samples_per_second": 79.443, "eval_steps_per_second": 10.505, "step": 10478 }, { "epoch": 169.35, "learning_rate": 3.306451612903226e-08, "loss": 10.2642, "step": 10500 }, { "epoch": 170.0, "eval_loss": 11.28774356842041, "eval_runtime": 1.5277, "eval_samples_per_second": 79.204, "eval_steps_per_second": 10.473, "step": 10540 }, { "epoch": 171.0, "eval_loss": 11.336891174316406, "eval_runtime": 1.5318, "eval_samples_per_second": 78.993, "eval_steps_per_second": 10.445, "step": 10602 }, { "epoch": 172.0, "eval_loss": 11.385379791259766, "eval_runtime": 1.5315, "eval_samples_per_second": 79.006, "eval_steps_per_second": 10.447, "step": 10664 }, { "epoch": 173.0, "eval_loss": 11.50495433807373, "eval_runtime": 1.5331, "eval_samples_per_second": 78.924, "eval_steps_per_second": 10.436, "step": 10726 }, { "epoch": 174.0, "eval_loss": 11.482619285583496, "eval_runtime": 1.5436, "eval_samples_per_second": 78.386, "eval_steps_per_second": 10.365, "step": 10788 }, { "epoch": 175.0, "eval_loss": 11.342474937438965, "eval_runtime": 1.5316, "eval_samples_per_second": 79.002, "eval_steps_per_second": 10.447, "step": 10850 }, { "epoch": 176.0, "eval_loss": 11.241662979125977, "eval_runtime": 1.5196, "eval_samples_per_second": 79.629, "eval_steps_per_second": 10.529, "step": 10912 }, { "epoch": 177.0, "eval_loss": 11.126252174377441, "eval_runtime": 1.5201, "eval_samples_per_second": 79.603, "eval_steps_per_second": 10.526, "step": 10974 }, { "epoch": 177.42, "learning_rate": 3.225806451612903e-08, "loss": 10.2586, "step": 11000 }, { "epoch": 178.0, "eval_loss": 11.408693313598633, "eval_runtime": 1.5266, "eval_samples_per_second": 79.261, "eval_steps_per_second": 10.481, "step": 11036 }, { "epoch": 179.0, "eval_loss": 11.391864776611328, "eval_runtime": 1.552, "eval_samples_per_second": 77.965, "eval_steps_per_second": 10.309, "step": 11098 }, { "epoch": 180.0, "eval_loss": 11.251392364501953, "eval_runtime": 1.5284, "eval_samples_per_second": 79.165, "eval_steps_per_second": 10.468, "step": 11160 }, { "epoch": 181.0, "eval_loss": 11.256957054138184, "eval_runtime": 1.5287, "eval_samples_per_second": 79.153, "eval_steps_per_second": 10.467, "step": 11222 }, { "epoch": 182.0, "eval_loss": 11.44185733795166, "eval_runtime": 1.5278, "eval_samples_per_second": 79.197, "eval_steps_per_second": 10.472, "step": 11284 }, { "epoch": 183.0, "eval_loss": 11.41098690032959, "eval_runtime": 1.5598, "eval_samples_per_second": 77.572, "eval_steps_per_second": 10.257, "step": 11346 }, { "epoch": 184.0, "eval_loss": 11.391864776611328, "eval_runtime": 1.5217, "eval_samples_per_second": 79.518, "eval_steps_per_second": 10.515, "step": 11408 }, { "epoch": 185.0, "eval_loss": 11.242551803588867, "eval_runtime": 1.5268, "eval_samples_per_second": 79.25, "eval_steps_per_second": 10.479, "step": 11470 }, { "epoch": 185.48, "learning_rate": 3.1451612903225806e-08, "loss": 10.2479, "step": 11500 }, { "epoch": 186.0, "eval_loss": 11.386938095092773, "eval_runtime": 1.5272, "eval_samples_per_second": 79.228, "eval_steps_per_second": 10.476, "step": 11532 }, { "epoch": 187.0, "eval_loss": 11.295321464538574, "eval_runtime": 1.5298, "eval_samples_per_second": 79.094, "eval_steps_per_second": 10.459, "step": 11594 }, { "epoch": 188.0, "eval_loss": 11.33161449432373, "eval_runtime": 1.5284, "eval_samples_per_second": 79.166, "eval_steps_per_second": 10.468, "step": 11656 }, { "epoch": 189.0, "eval_loss": 11.137282371520996, "eval_runtime": 1.5487, "eval_samples_per_second": 78.13, "eval_steps_per_second": 10.331, "step": 11718 }, { "epoch": 190.0, "eval_loss": 11.165376663208008, "eval_runtime": 1.5348, "eval_samples_per_second": 78.838, "eval_steps_per_second": 10.425, "step": 11780 }, { "epoch": 191.0, "eval_loss": 11.320448875427246, "eval_runtime": 1.5398, "eval_samples_per_second": 78.582, "eval_steps_per_second": 10.391, "step": 11842 }, { "epoch": 192.0, "eval_loss": 11.312908172607422, "eval_runtime": 1.5205, "eval_samples_per_second": 79.582, "eval_steps_per_second": 10.523, "step": 11904 }, { "epoch": 193.0, "eval_loss": 11.271549224853516, "eval_runtime": 1.5262, "eval_samples_per_second": 79.283, "eval_steps_per_second": 10.484, "step": 11966 }, { "epoch": 193.55, "learning_rate": 3.064516129032258e-08, "loss": 10.2172, "step": 12000 }, { "epoch": 194.0, "eval_loss": 11.2645845413208, "eval_runtime": 1.5507, "eval_samples_per_second": 78.027, "eval_steps_per_second": 10.318, "step": 12028 }, { "epoch": 195.0, "eval_loss": 11.335646629333496, "eval_runtime": 1.5277, "eval_samples_per_second": 79.206, "eval_steps_per_second": 10.474, "step": 12090 }, { "epoch": 196.0, "eval_loss": 11.301328659057617, "eval_runtime": 1.5287, "eval_samples_per_second": 79.153, "eval_steps_per_second": 10.466, "step": 12152 }, { "epoch": 197.0, "eval_loss": 11.537822723388672, "eval_runtime": 1.5263, "eval_samples_per_second": 79.277, "eval_steps_per_second": 10.483, "step": 12214 }, { "epoch": 198.0, "eval_loss": 11.364118576049805, "eval_runtime": 1.529, "eval_samples_per_second": 79.138, "eval_steps_per_second": 10.465, "step": 12276 }, { "epoch": 199.0, "eval_loss": 11.369778633117676, "eval_runtime": 1.5279, "eval_samples_per_second": 79.195, "eval_steps_per_second": 10.472, "step": 12338 }, { "epoch": 200.0, "eval_loss": 11.267717361450195, "eval_runtime": 1.5199, "eval_samples_per_second": 79.612, "eval_steps_per_second": 10.527, "step": 12400 }, { "epoch": 201.0, "eval_loss": 11.297280311584473, "eval_runtime": 1.521, "eval_samples_per_second": 79.552, "eval_steps_per_second": 10.519, "step": 12462 }, { "epoch": 201.61, "learning_rate": 2.9838709677419354e-08, "loss": 10.2023, "step": 12500 }, { "epoch": 202.0, "eval_loss": 11.140520095825195, "eval_runtime": 1.5299, "eval_samples_per_second": 79.092, "eval_steps_per_second": 10.458, "step": 12524 }, { "epoch": 203.0, "eval_loss": 11.32856559753418, "eval_runtime": 1.5298, "eval_samples_per_second": 79.093, "eval_steps_per_second": 10.459, "step": 12586 }, { "epoch": 204.0, "eval_loss": 11.212227821350098, "eval_runtime": 1.528, "eval_samples_per_second": 79.19, "eval_steps_per_second": 10.471, "step": 12648 }, { "epoch": 205.0, "eval_loss": 11.232370376586914, "eval_runtime": 1.5285, "eval_samples_per_second": 79.16, "eval_steps_per_second": 10.467, "step": 12710 }, { "epoch": 206.0, "eval_loss": 11.35122299194336, "eval_runtime": 1.5675, "eval_samples_per_second": 77.193, "eval_steps_per_second": 10.207, "step": 12772 }, { "epoch": 207.0, "eval_loss": 11.088189125061035, "eval_runtime": 1.5298, "eval_samples_per_second": 79.096, "eval_steps_per_second": 10.459, "step": 12834 }, { "epoch": 208.0, "eval_loss": 11.35745620727539, "eval_runtime": 1.5205, "eval_samples_per_second": 79.581, "eval_steps_per_second": 10.523, "step": 12896 }, { "epoch": 209.0, "eval_loss": 11.250239372253418, "eval_runtime": 1.5204, "eval_samples_per_second": 79.584, "eval_steps_per_second": 10.523, "step": 12958 }, { "epoch": 209.68, "learning_rate": 2.9032258064516128e-08, "loss": 10.1926, "step": 13000 }, { "epoch": 210.0, "eval_loss": 11.327762603759766, "eval_runtime": 1.5376, "eval_samples_per_second": 78.692, "eval_steps_per_second": 10.406, "step": 13020 }, { "epoch": 211.0, "eval_loss": 11.21988582611084, "eval_runtime": 1.5336, "eval_samples_per_second": 78.901, "eval_steps_per_second": 10.433, "step": 13082 }, { "epoch": 212.0, "eval_loss": 11.256662368774414, "eval_runtime": 1.5406, "eval_samples_per_second": 78.541, "eval_steps_per_second": 10.386, "step": 13144 }, { "epoch": 213.0, "eval_loss": 11.28549861907959, "eval_runtime": 1.5261, "eval_samples_per_second": 79.288, "eval_steps_per_second": 10.484, "step": 13206 }, { "epoch": 214.0, "eval_loss": 11.189271926879883, "eval_runtime": 1.5291, "eval_samples_per_second": 79.132, "eval_steps_per_second": 10.464, "step": 13268 }, { "epoch": 215.0, "eval_loss": 11.260034561157227, "eval_runtime": 1.5268, "eval_samples_per_second": 79.249, "eval_steps_per_second": 10.479, "step": 13330 }, { "epoch": 216.0, "eval_loss": 11.284974098205566, "eval_runtime": 1.5308, "eval_samples_per_second": 79.043, "eval_steps_per_second": 10.452, "step": 13392 }, { "epoch": 217.0, "eval_loss": 11.227448463439941, "eval_runtime": 1.5233, "eval_samples_per_second": 79.431, "eval_steps_per_second": 10.503, "step": 13454 }, { "epoch": 217.74, "learning_rate": 2.8225806451612906e-08, "loss": 10.2075, "step": 13500 }, { "epoch": 218.0, "eval_loss": 11.110801696777344, "eval_runtime": 1.5287, "eval_samples_per_second": 79.151, "eval_steps_per_second": 10.466, "step": 13516 }, { "epoch": 219.0, "eval_loss": 11.275555610656738, "eval_runtime": 1.5279, "eval_samples_per_second": 79.192, "eval_steps_per_second": 10.472, "step": 13578 }, { "epoch": 220.0, "eval_loss": 11.283916473388672, "eval_runtime": 1.5257, "eval_samples_per_second": 79.308, "eval_steps_per_second": 10.487, "step": 13640 }, { "epoch": 221.0, "eval_loss": 11.216109275817871, "eval_runtime": 1.528, "eval_samples_per_second": 79.188, "eval_steps_per_second": 10.471, "step": 13702 }, { "epoch": 222.0, "eval_loss": 11.186559677124023, "eval_runtime": 1.5313, "eval_samples_per_second": 79.017, "eval_steps_per_second": 10.448, "step": 13764 }, { "epoch": 223.0, "eval_loss": 11.12224006652832, "eval_runtime": 1.5319, "eval_samples_per_second": 78.989, "eval_steps_per_second": 10.445, "step": 13826 }, { "epoch": 224.0, "eval_loss": 11.264845848083496, "eval_runtime": 1.5275, "eval_samples_per_second": 79.216, "eval_steps_per_second": 10.475, "step": 13888 }, { "epoch": 225.0, "eval_loss": 11.293610572814941, "eval_runtime": 1.5184, "eval_samples_per_second": 79.688, "eval_steps_per_second": 10.537, "step": 13950 }, { "epoch": 225.81, "learning_rate": 2.7419354838709673e-08, "loss": 10.1822, "step": 14000 }, { "epoch": 226.0, "eval_loss": 11.136199951171875, "eval_runtime": 1.5304, "eval_samples_per_second": 79.063, "eval_steps_per_second": 10.455, "step": 14012 }, { "epoch": 227.0, "eval_loss": 11.259844779968262, "eval_runtime": 1.5312, "eval_samples_per_second": 79.022, "eval_steps_per_second": 10.449, "step": 14074 }, { "epoch": 228.0, "eval_loss": 11.335241317749023, "eval_runtime": 1.5268, "eval_samples_per_second": 79.253, "eval_steps_per_second": 10.48, "step": 14136 }, { "epoch": 229.0, "eval_loss": 11.118280410766602, "eval_runtime": 1.529, "eval_samples_per_second": 79.138, "eval_steps_per_second": 10.465, "step": 14198 }, { "epoch": 230.0, "eval_loss": 11.207478523254395, "eval_runtime": 1.5282, "eval_samples_per_second": 79.178, "eval_steps_per_second": 10.47, "step": 14260 }, { "epoch": 231.0, "eval_loss": 11.190764427185059, "eval_runtime": 1.531, "eval_samples_per_second": 79.032, "eval_steps_per_second": 10.451, "step": 14322 }, { "epoch": 232.0, "eval_loss": 11.133115768432617, "eval_runtime": 1.5224, "eval_samples_per_second": 79.482, "eval_steps_per_second": 10.51, "step": 14384 }, { "epoch": 233.0, "eval_loss": 11.092888832092285, "eval_runtime": 1.521, "eval_samples_per_second": 79.551, "eval_steps_per_second": 10.519, "step": 14446 }, { "epoch": 233.87, "learning_rate": 2.661290322580645e-08, "loss": 10.1863, "step": 14500 }, { "epoch": 234.0, "eval_loss": 11.106066703796387, "eval_runtime": 1.5271, "eval_samples_per_second": 79.233, "eval_steps_per_second": 10.477, "step": 14508 }, { "epoch": 235.0, "eval_loss": 11.054638862609863, "eval_runtime": 1.5301, "eval_samples_per_second": 79.078, "eval_steps_per_second": 10.457, "step": 14570 }, { "epoch": 236.0, "eval_loss": 11.176545143127441, "eval_runtime": 1.528, "eval_samples_per_second": 79.189, "eval_steps_per_second": 10.471, "step": 14632 }, { "epoch": 237.0, "eval_loss": 11.25126838684082, "eval_runtime": 1.5393, "eval_samples_per_second": 78.608, "eval_steps_per_second": 10.394, "step": 14694 }, { "epoch": 238.0, "eval_loss": 11.091902732849121, "eval_runtime": 1.5287, "eval_samples_per_second": 79.155, "eval_steps_per_second": 10.467, "step": 14756 }, { "epoch": 239.0, "eval_loss": 11.2738676071167, "eval_runtime": 1.5269, "eval_samples_per_second": 79.244, "eval_steps_per_second": 10.479, "step": 14818 }, { "epoch": 240.0, "eval_loss": 11.034156799316406, "eval_runtime": 1.5197, "eval_samples_per_second": 79.619, "eval_steps_per_second": 10.528, "step": 14880 }, { "epoch": 241.0, "eval_loss": 11.236822128295898, "eval_runtime": 1.525, "eval_samples_per_second": 79.342, "eval_steps_per_second": 10.492, "step": 14942 }, { "epoch": 241.94, "learning_rate": 2.5806451612903225e-08, "loss": 10.1724, "step": 15000 }, { "epoch": 242.0, "eval_loss": 11.25521469116211, "eval_runtime": 1.5321, "eval_samples_per_second": 78.976, "eval_steps_per_second": 10.443, "step": 15004 }, { "epoch": 243.0, "eval_loss": 11.151459693908691, "eval_runtime": 1.5265, "eval_samples_per_second": 79.267, "eval_steps_per_second": 10.482, "step": 15066 }, { "epoch": 244.0, "eval_loss": 11.049065589904785, "eval_runtime": 1.525, "eval_samples_per_second": 79.342, "eval_steps_per_second": 10.492, "step": 15128 }, { "epoch": 245.0, "eval_loss": 11.018394470214844, "eval_runtime": 1.5424, "eval_samples_per_second": 78.451, "eval_steps_per_second": 10.374, "step": 15190 }, { "epoch": 246.0, "eval_loss": 11.378674507141113, "eval_runtime": 1.5254, "eval_samples_per_second": 79.325, "eval_steps_per_second": 10.489, "step": 15252 }, { "epoch": 247.0, "eval_loss": 11.232932090759277, "eval_runtime": 1.5294, "eval_samples_per_second": 79.117, "eval_steps_per_second": 10.462, "step": 15314 }, { "epoch": 248.0, "eval_loss": 11.072883605957031, "eval_runtime": 1.5274, "eval_samples_per_second": 79.221, "eval_steps_per_second": 10.476, "step": 15376 }, { "epoch": 249.0, "eval_loss": 11.147173881530762, "eval_runtime": 1.5246, "eval_samples_per_second": 79.366, "eval_steps_per_second": 10.495, "step": 15438 }, { "epoch": 250.0, "learning_rate": 2.5e-08, "loss": 10.1671, "step": 15500 }, { "epoch": 250.0, "eval_loss": 11.06186294555664, "eval_runtime": 1.5501, "eval_samples_per_second": 78.061, "eval_steps_per_second": 10.322, "step": 15500 }, { "epoch": 251.0, "eval_loss": 11.219667434692383, "eval_runtime": 1.5267, "eval_samples_per_second": 79.257, "eval_steps_per_second": 10.48, "step": 15562 }, { "epoch": 252.0, "eval_loss": 11.224080085754395, "eval_runtime": 1.5293, "eval_samples_per_second": 79.121, "eval_steps_per_second": 10.462, "step": 15624 }, { "epoch": 253.0, "eval_loss": 11.310192108154297, "eval_runtime": 1.528, "eval_samples_per_second": 79.187, "eval_steps_per_second": 10.471, "step": 15686 }, { "epoch": 254.0, "eval_loss": 11.289962768554688, "eval_runtime": 1.5252, "eval_samples_per_second": 79.335, "eval_steps_per_second": 10.491, "step": 15748 }, { "epoch": 255.0, "eval_loss": 11.300124168395996, "eval_runtime": 1.5278, "eval_samples_per_second": 79.2, "eval_steps_per_second": 10.473, "step": 15810 }, { "epoch": 256.0, "eval_loss": 11.220646858215332, "eval_runtime": 1.5215, "eval_samples_per_second": 79.528, "eval_steps_per_second": 10.516, "step": 15872 }, { "epoch": 257.0, "eval_loss": 11.102697372436523, "eval_runtime": 1.5207, "eval_samples_per_second": 79.571, "eval_steps_per_second": 10.522, "step": 15934 }, { "epoch": 258.0, "eval_loss": 11.306656837463379, "eval_runtime": 1.5184, "eval_samples_per_second": 79.692, "eval_steps_per_second": 10.538, "step": 15996 }, { "epoch": 258.06, "learning_rate": 2.4193548387096773e-08, "loss": 10.1579, "step": 16000 }, { "epoch": 259.0, "eval_loss": 11.30165958404541, "eval_runtime": 1.5278, "eval_samples_per_second": 79.201, "eval_steps_per_second": 10.473, "step": 16058 }, { "epoch": 260.0, "eval_loss": 11.098042488098145, "eval_runtime": 1.5428, "eval_samples_per_second": 78.43, "eval_steps_per_second": 10.371, "step": 16120 }, { "epoch": 261.0, "eval_loss": 11.215791702270508, "eval_runtime": 1.5285, "eval_samples_per_second": 79.163, "eval_steps_per_second": 10.468, "step": 16182 }, { "epoch": 262.0, "eval_loss": 10.940754890441895, "eval_runtime": 1.5416, "eval_samples_per_second": 78.488, "eval_steps_per_second": 10.379, "step": 16244 }, { "epoch": 263.0, "eval_loss": 11.108644485473633, "eval_runtime": 1.5277, "eval_samples_per_second": 79.205, "eval_steps_per_second": 10.473, "step": 16306 }, { "epoch": 264.0, "eval_loss": 11.28579330444336, "eval_runtime": 1.5188, "eval_samples_per_second": 79.67, "eval_steps_per_second": 10.535, "step": 16368 }, { "epoch": 265.0, "eval_loss": 11.280501365661621, "eval_runtime": 1.518, "eval_samples_per_second": 79.708, "eval_steps_per_second": 10.54, "step": 16430 }, { "epoch": 266.0, "eval_loss": 11.08869457244873, "eval_runtime": 1.5206, "eval_samples_per_second": 79.573, "eval_steps_per_second": 10.522, "step": 16492 }, { "epoch": 266.13, "learning_rate": 2.3387096774193547e-08, "loss": 10.1552, "step": 16500 }, { "epoch": 267.0, "eval_loss": 11.171466827392578, "eval_runtime": 1.5404, "eval_samples_per_second": 78.554, "eval_steps_per_second": 10.387, "step": 16554 }, { "epoch": 268.0, "eval_loss": 11.1470365524292, "eval_runtime": 1.5289, "eval_samples_per_second": 79.141, "eval_steps_per_second": 10.465, "step": 16616 }, { "epoch": 269.0, "eval_loss": 11.106681823730469, "eval_runtime": 1.5263, "eval_samples_per_second": 79.276, "eval_steps_per_second": 10.483, "step": 16678 }, { "epoch": 270.0, "eval_loss": 11.163908958435059, "eval_runtime": 1.5252, "eval_samples_per_second": 79.336, "eval_steps_per_second": 10.491, "step": 16740 }, { "epoch": 271.0, "eval_loss": 11.146965980529785, "eval_runtime": 1.5255, "eval_samples_per_second": 79.316, "eval_steps_per_second": 10.488, "step": 16802 }, { "epoch": 272.0, "eval_loss": 11.206341743469238, "eval_runtime": 1.5228, "eval_samples_per_second": 79.461, "eval_steps_per_second": 10.507, "step": 16864 }, { "epoch": 273.0, "eval_loss": 11.16470718383789, "eval_runtime": 1.5197, "eval_samples_per_second": 79.62, "eval_steps_per_second": 10.528, "step": 16926 }, { "epoch": 274.0, "eval_loss": 11.006061553955078, "eval_runtime": 1.5209, "eval_samples_per_second": 79.559, "eval_steps_per_second": 10.52, "step": 16988 }, { "epoch": 274.19, "learning_rate": 2.258064516129032e-08, "loss": 10.1468, "step": 17000 }, { "epoch": 275.0, "eval_loss": 11.382197380065918, "eval_runtime": 1.5267, "eval_samples_per_second": 79.254, "eval_steps_per_second": 10.48, "step": 17050 }, { "epoch": 276.0, "eval_loss": 10.946673393249512, "eval_runtime": 1.5278, "eval_samples_per_second": 79.2, "eval_steps_per_second": 10.473, "step": 17112 }, { "epoch": 277.0, "eval_loss": 11.2734956741333, "eval_runtime": 1.5371, "eval_samples_per_second": 78.717, "eval_steps_per_second": 10.409, "step": 17174 }, { "epoch": 278.0, "eval_loss": 11.013189315795898, "eval_runtime": 1.5409, "eval_samples_per_second": 78.528, "eval_steps_per_second": 10.384, "step": 17236 }, { "epoch": 279.0, "eval_loss": 11.160365104675293, "eval_runtime": 1.5281, "eval_samples_per_second": 79.181, "eval_steps_per_second": 10.47, "step": 17298 }, { "epoch": 280.0, "eval_loss": 11.16196060180664, "eval_runtime": 1.5194, "eval_samples_per_second": 79.636, "eval_steps_per_second": 10.53, "step": 17360 }, { "epoch": 281.0, "eval_loss": 11.064959526062012, "eval_runtime": 1.5184, "eval_samples_per_second": 79.689, "eval_steps_per_second": 10.537, "step": 17422 }, { "epoch": 282.0, "eval_loss": 10.991950035095215, "eval_runtime": 1.5262, "eval_samples_per_second": 79.284, "eval_steps_per_second": 10.484, "step": 17484 }, { "epoch": 282.26, "learning_rate": 2.1774193548387095e-08, "loss": 10.1402, "step": 17500 }, { "epoch": 283.0, "eval_loss": 11.044986724853516, "eval_runtime": 1.5294, "eval_samples_per_second": 79.118, "eval_steps_per_second": 10.462, "step": 17546 }, { "epoch": 284.0, "eval_loss": 10.982977867126465, "eval_runtime": 1.5353, "eval_samples_per_second": 78.811, "eval_steps_per_second": 10.421, "step": 17608 }, { "epoch": 285.0, "eval_loss": 11.013529777526855, "eval_runtime": 1.5385, "eval_samples_per_second": 78.649, "eval_steps_per_second": 10.4, "step": 17670 }, { "epoch": 286.0, "eval_loss": 11.264179229736328, "eval_runtime": 1.5281, "eval_samples_per_second": 79.183, "eval_steps_per_second": 10.47, "step": 17732 }, { "epoch": 287.0, "eval_loss": 11.11242389678955, "eval_runtime": 1.5289, "eval_samples_per_second": 79.142, "eval_steps_per_second": 10.465, "step": 17794 }, { "epoch": 288.0, "eval_loss": 11.192839622497559, "eval_runtime": 1.5192, "eval_samples_per_second": 79.646, "eval_steps_per_second": 10.532, "step": 17856 }, { "epoch": 289.0, "eval_loss": 11.128388404846191, "eval_runtime": 1.5284, "eval_samples_per_second": 79.167, "eval_steps_per_second": 10.468, "step": 17918 }, { "epoch": 290.0, "eval_loss": 11.208813667297363, "eval_runtime": 1.5227, "eval_samples_per_second": 79.463, "eval_steps_per_second": 10.507, "step": 17980 }, { "epoch": 290.32, "learning_rate": 2.096774193548387e-08, "loss": 10.147, "step": 18000 }, { "epoch": 291.0, "eval_loss": 11.075602531433105, "eval_runtime": 1.5304, "eval_samples_per_second": 79.062, "eval_steps_per_second": 10.454, "step": 18042 }, { "epoch": 292.0, "eval_loss": 11.051429748535156, "eval_runtime": 1.5297, "eval_samples_per_second": 79.103, "eval_steps_per_second": 10.46, "step": 18104 }, { "epoch": 293.0, "eval_loss": 11.14745044708252, "eval_runtime": 1.528, "eval_samples_per_second": 79.191, "eval_steps_per_second": 10.471, "step": 18166 }, { "epoch": 294.0, "eval_loss": 11.044754981994629, "eval_runtime": 1.5371, "eval_samples_per_second": 78.718, "eval_steps_per_second": 10.409, "step": 18228 }, { "epoch": 295.0, "eval_loss": 11.241650581359863, "eval_runtime": 1.5413, "eval_samples_per_second": 78.505, "eval_steps_per_second": 10.381, "step": 18290 }, { "epoch": 296.0, "eval_loss": 11.107006072998047, "eval_runtime": 1.5336, "eval_samples_per_second": 78.901, "eval_steps_per_second": 10.433, "step": 18352 }, { "epoch": 297.0, "eval_loss": 11.014945030212402, "eval_runtime": 1.5335, "eval_samples_per_second": 78.906, "eval_steps_per_second": 10.434, "step": 18414 }, { "epoch": 298.0, "eval_loss": 11.168006896972656, "eval_runtime": 1.5218, "eval_samples_per_second": 79.51, "eval_steps_per_second": 10.514, "step": 18476 }, { "epoch": 298.39, "learning_rate": 2.0161290322580644e-08, "loss": 10.1331, "step": 18500 }, { "epoch": 299.0, "eval_loss": 11.100564956665039, "eval_runtime": 1.5329, "eval_samples_per_second": 78.935, "eval_steps_per_second": 10.438, "step": 18538 }, { "epoch": 300.0, "eval_loss": 10.944679260253906, "eval_runtime": 1.5398, "eval_samples_per_second": 78.581, "eval_steps_per_second": 10.391, "step": 18600 }, { "epoch": 301.0, "eval_loss": 10.988174438476562, "eval_runtime": 1.54, "eval_samples_per_second": 78.569, "eval_steps_per_second": 10.389, "step": 18662 }, { "epoch": 302.0, "eval_loss": 11.02690315246582, "eval_runtime": 1.5425, "eval_samples_per_second": 78.446, "eval_steps_per_second": 10.373, "step": 18724 }, { "epoch": 303.0, "eval_loss": 10.976366996765137, "eval_runtime": 1.5362, "eval_samples_per_second": 78.765, "eval_steps_per_second": 10.415, "step": 18786 }, { "epoch": 304.0, "eval_loss": 11.088923454284668, "eval_runtime": 1.5246, "eval_samples_per_second": 79.365, "eval_steps_per_second": 10.495, "step": 18848 }, { "epoch": 305.0, "eval_loss": 11.024333953857422, "eval_runtime": 1.5195, "eval_samples_per_second": 79.629, "eval_steps_per_second": 10.529, "step": 18910 }, { "epoch": 306.0, "eval_loss": 11.025029182434082, "eval_runtime": 1.524, "eval_samples_per_second": 79.394, "eval_steps_per_second": 10.498, "step": 18972 }, { "epoch": 306.45, "learning_rate": 1.9354838709677418e-08, "loss": 10.1244, "step": 19000 }, { "epoch": 307.0, "eval_loss": 11.113330841064453, "eval_runtime": 1.54, "eval_samples_per_second": 78.569, "eval_steps_per_second": 10.389, "step": 19034 }, { "epoch": 308.0, "eval_loss": 11.013413429260254, "eval_runtime": 1.5273, "eval_samples_per_second": 79.226, "eval_steps_per_second": 10.476, "step": 19096 }, { "epoch": 309.0, "eval_loss": 10.995118141174316, "eval_runtime": 1.5332, "eval_samples_per_second": 78.921, "eval_steps_per_second": 10.436, "step": 19158 }, { "epoch": 310.0, "eval_loss": 11.261984825134277, "eval_runtime": 1.5238, "eval_samples_per_second": 79.407, "eval_steps_per_second": 10.5, "step": 19220 }, { "epoch": 311.0, "eval_loss": 11.110681533813477, "eval_runtime": 1.5588, "eval_samples_per_second": 77.623, "eval_steps_per_second": 10.264, "step": 19282 }, { "epoch": 312.0, "eval_loss": 11.05971622467041, "eval_runtime": 1.5228, "eval_samples_per_second": 79.459, "eval_steps_per_second": 10.507, "step": 19344 }, { "epoch": 313.0, "eval_loss": 10.960829734802246, "eval_runtime": 1.5312, "eval_samples_per_second": 79.024, "eval_steps_per_second": 10.449, "step": 19406 }, { "epoch": 314.0, "eval_loss": 10.997540473937988, "eval_runtime": 1.5275, "eval_samples_per_second": 79.216, "eval_steps_per_second": 10.475, "step": 19468 }, { "epoch": 314.52, "learning_rate": 1.8548387096774192e-08, "loss": 10.1251, "step": 19500 }, { "epoch": 315.0, "eval_loss": 11.017022132873535, "eval_runtime": 1.5516, "eval_samples_per_second": 77.982, "eval_steps_per_second": 10.312, "step": 19530 }, { "epoch": 316.0, "eval_loss": 11.152543067932129, "eval_runtime": 1.5321, "eval_samples_per_second": 78.977, "eval_steps_per_second": 10.443, "step": 19592 }, { "epoch": 317.0, "eval_loss": 11.0108003616333, "eval_runtime": 1.521, "eval_samples_per_second": 79.554, "eval_steps_per_second": 10.52, "step": 19654 }, { "epoch": 318.0, "eval_loss": 11.024676322937012, "eval_runtime": 1.5196, "eval_samples_per_second": 79.626, "eval_steps_per_second": 10.529, "step": 19716 }, { "epoch": 319.0, "eval_loss": 11.113091468811035, "eval_runtime": 1.5205, "eval_samples_per_second": 79.581, "eval_steps_per_second": 10.523, "step": 19778 }, { "epoch": 320.0, "eval_loss": 11.04623794555664, "eval_runtime": 1.5226, "eval_samples_per_second": 79.468, "eval_steps_per_second": 10.508, "step": 19840 }, { "epoch": 321.0, "eval_loss": 11.06059741973877, "eval_runtime": 1.5198, "eval_samples_per_second": 79.613, "eval_steps_per_second": 10.527, "step": 19902 }, { "epoch": 322.0, "eval_loss": 11.004892349243164, "eval_runtime": 1.5295, "eval_samples_per_second": 79.109, "eval_steps_per_second": 10.461, "step": 19964 }, { "epoch": 322.58, "learning_rate": 1.774193548387097e-08, "loss": 10.1178, "step": 20000 }, { "epoch": 323.0, "eval_loss": 11.110086441040039, "eval_runtime": 1.5366, "eval_samples_per_second": 78.746, "eval_steps_per_second": 10.413, "step": 20026 }, { "epoch": 324.0, "eval_loss": 11.034819602966309, "eval_runtime": 1.5318, "eval_samples_per_second": 78.992, "eval_steps_per_second": 10.445, "step": 20088 }, { "epoch": 325.0, "eval_loss": 11.286378860473633, "eval_runtime": 1.5319, "eval_samples_per_second": 78.988, "eval_steps_per_second": 10.445, "step": 20150 }, { "epoch": 326.0, "eval_loss": 10.950843811035156, "eval_runtime": 1.5282, "eval_samples_per_second": 79.177, "eval_steps_per_second": 10.47, "step": 20212 }, { "epoch": 327.0, "eval_loss": 10.995577812194824, "eval_runtime": 1.5301, "eval_samples_per_second": 79.078, "eval_steps_per_second": 10.457, "step": 20274 }, { "epoch": 328.0, "eval_loss": 10.956055641174316, "eval_runtime": 1.5197, "eval_samples_per_second": 79.62, "eval_steps_per_second": 10.528, "step": 20336 }, { "epoch": 329.0, "eval_loss": 10.953376770019531, "eval_runtime": 1.522, "eval_samples_per_second": 79.498, "eval_steps_per_second": 10.512, "step": 20398 }, { "epoch": 330.0, "eval_loss": 11.365254402160645, "eval_runtime": 1.5263, "eval_samples_per_second": 79.275, "eval_steps_per_second": 10.483, "step": 20460 }, { "epoch": 330.65, "learning_rate": 1.693548387096774e-08, "loss": 10.1215, "step": 20500 }, { "epoch": 331.0, "eval_loss": 11.118794441223145, "eval_runtime": 1.5308, "eval_samples_per_second": 79.044, "eval_steps_per_second": 10.452, "step": 20522 }, { "epoch": 332.0, "eval_loss": 11.090803146362305, "eval_runtime": 1.5271, "eval_samples_per_second": 79.237, "eval_steps_per_second": 10.478, "step": 20584 }, { "epoch": 333.0, "eval_loss": 10.98558235168457, "eval_runtime": 1.5289, "eval_samples_per_second": 79.144, "eval_steps_per_second": 10.465, "step": 20646 }, { "epoch": 334.0, "eval_loss": 10.987250328063965, "eval_runtime": 1.5588, "eval_samples_per_second": 77.622, "eval_steps_per_second": 10.264, "step": 20708 }, { "epoch": 335.0, "eval_loss": 11.048944473266602, "eval_runtime": 1.5348, "eval_samples_per_second": 78.84, "eval_steps_per_second": 10.425, "step": 20770 }, { "epoch": 336.0, "eval_loss": 11.112990379333496, "eval_runtime": 1.5248, "eval_samples_per_second": 79.355, "eval_steps_per_second": 10.493, "step": 20832 }, { "epoch": 337.0, "eval_loss": 10.877412796020508, "eval_runtime": 1.5203, "eval_samples_per_second": 79.589, "eval_steps_per_second": 10.524, "step": 20894 }, { "epoch": 338.0, "eval_loss": 10.787759780883789, "eval_runtime": 1.5247, "eval_samples_per_second": 79.361, "eval_steps_per_second": 10.494, "step": 20956 }, { "epoch": 338.71, "learning_rate": 1.6129032258064514e-08, "loss": 10.1267, "step": 21000 }, { "epoch": 339.0, "eval_loss": 11.173955917358398, "eval_runtime": 1.535, "eval_samples_per_second": 78.827, "eval_steps_per_second": 10.423, "step": 21018 }, { "epoch": 340.0, "eval_loss": 11.00390911102295, "eval_runtime": 1.5311, "eval_samples_per_second": 79.03, "eval_steps_per_second": 10.45, "step": 21080 }, { "epoch": 341.0, "eval_loss": 11.032459259033203, "eval_runtime": 1.5355, "eval_samples_per_second": 78.804, "eval_steps_per_second": 10.42, "step": 21142 }, { "epoch": 342.0, "eval_loss": 11.04984188079834, "eval_runtime": 1.5396, "eval_samples_per_second": 78.591, "eval_steps_per_second": 10.392, "step": 21204 }, { "epoch": 343.0, "eval_loss": 11.023720741271973, "eval_runtime": 1.5305, "eval_samples_per_second": 79.057, "eval_steps_per_second": 10.454, "step": 21266 }, { "epoch": 344.0, "eval_loss": 11.093502044677734, "eval_runtime": 1.5353, "eval_samples_per_second": 78.813, "eval_steps_per_second": 10.422, "step": 21328 }, { "epoch": 345.0, "eval_loss": 10.816137313842773, "eval_runtime": 1.5197, "eval_samples_per_second": 79.62, "eval_steps_per_second": 10.528, "step": 21390 }, { "epoch": 346.0, "eval_loss": 11.070144653320312, "eval_runtime": 1.5197, "eval_samples_per_second": 79.623, "eval_steps_per_second": 10.529, "step": 21452 }, { "epoch": 346.77, "learning_rate": 1.532258064516129e-08, "loss": 10.1105, "step": 21500 }, { "epoch": 347.0, "eval_loss": 10.87633991241455, "eval_runtime": 1.5347, "eval_samples_per_second": 78.841, "eval_steps_per_second": 10.425, "step": 21514 }, { "epoch": 348.0, "eval_loss": 11.06225299835205, "eval_runtime": 1.5319, "eval_samples_per_second": 78.984, "eval_steps_per_second": 10.444, "step": 21576 }, { "epoch": 349.0, "eval_loss": 10.950231552124023, "eval_runtime": 1.5483, "eval_samples_per_second": 78.15, "eval_steps_per_second": 10.334, "step": 21638 }, { "epoch": 350.0, "eval_loss": 11.076042175292969, "eval_runtime": 1.5401, "eval_samples_per_second": 78.567, "eval_steps_per_second": 10.389, "step": 21700 }, { "epoch": 351.0, "eval_loss": 11.147188186645508, "eval_runtime": 1.5351, "eval_samples_per_second": 78.823, "eval_steps_per_second": 10.423, "step": 21762 }, { "epoch": 352.0, "eval_loss": 11.15673828125, "eval_runtime": 1.5292, "eval_samples_per_second": 79.126, "eval_steps_per_second": 10.463, "step": 21824 }, { "epoch": 353.0, "eval_loss": 11.01186466217041, "eval_runtime": 1.5267, "eval_samples_per_second": 79.255, "eval_steps_per_second": 10.48, "step": 21886 }, { "epoch": 354.0, "eval_loss": 11.07396125793457, "eval_runtime": 1.5223, "eval_samples_per_second": 79.486, "eval_steps_per_second": 10.511, "step": 21948 }, { "epoch": 354.84, "learning_rate": 1.4516129032258064e-08, "loss": 10.1124, "step": 22000 }, { "epoch": 355.0, "eval_loss": 11.035076141357422, "eval_runtime": 1.5453, "eval_samples_per_second": 78.302, "eval_steps_per_second": 10.354, "step": 22010 }, { "epoch": 356.0, "eval_loss": 10.995853424072266, "eval_runtime": 1.5492, "eval_samples_per_second": 78.107, "eval_steps_per_second": 10.328, "step": 22072 }, { "epoch": 357.0, "eval_loss": 10.982865333557129, "eval_runtime": 1.5378, "eval_samples_per_second": 78.686, "eval_steps_per_second": 10.405, "step": 22134 }, { "epoch": 358.0, "eval_loss": 11.162384986877441, "eval_runtime": 1.5367, "eval_samples_per_second": 78.739, "eval_steps_per_second": 10.412, "step": 22196 }, { "epoch": 359.0, "eval_loss": 11.10819149017334, "eval_runtime": 1.5353, "eval_samples_per_second": 78.812, "eval_steps_per_second": 10.421, "step": 22258 }, { "epoch": 360.0, "eval_loss": 11.049622535705566, "eval_runtime": 1.5377, "eval_samples_per_second": 78.689, "eval_steps_per_second": 10.405, "step": 22320 }, { "epoch": 361.0, "eval_loss": 11.111467361450195, "eval_runtime": 1.52, "eval_samples_per_second": 79.604, "eval_steps_per_second": 10.526, "step": 22382 }, { "epoch": 362.0, "eval_loss": 10.887182235717773, "eval_runtime": 1.5238, "eval_samples_per_second": 79.408, "eval_steps_per_second": 10.5, "step": 22444 }, { "epoch": 362.9, "learning_rate": 1.3709677419354837e-08, "loss": 10.1057, "step": 22500 }, { "epoch": 363.0, "eval_loss": 10.899175643920898, "eval_runtime": 1.5381, "eval_samples_per_second": 78.671, "eval_steps_per_second": 10.403, "step": 22506 }, { "epoch": 364.0, "eval_loss": 11.026792526245117, "eval_runtime": 1.5471, "eval_samples_per_second": 78.21, "eval_steps_per_second": 10.342, "step": 22568 }, { "epoch": 365.0, "eval_loss": 10.96151065826416, "eval_runtime": 1.5277, "eval_samples_per_second": 79.204, "eval_steps_per_second": 10.473, "step": 22630 }, { "epoch": 366.0, "eval_loss": 10.967521667480469, "eval_runtime": 1.5284, "eval_samples_per_second": 79.165, "eval_steps_per_second": 10.468, "step": 22692 }, { "epoch": 367.0, "eval_loss": 11.17754077911377, "eval_runtime": 1.5345, "eval_samples_per_second": 78.853, "eval_steps_per_second": 10.427, "step": 22754 }, { "epoch": 368.0, "eval_loss": 10.994891166687012, "eval_runtime": 1.5285, "eval_samples_per_second": 79.161, "eval_steps_per_second": 10.468, "step": 22816 }, { "epoch": 369.0, "eval_loss": 11.081649780273438, "eval_runtime": 1.5214, "eval_samples_per_second": 79.533, "eval_steps_per_second": 10.517, "step": 22878 }, { "epoch": 370.0, "eval_loss": 11.051599502563477, "eval_runtime": 1.5213, "eval_samples_per_second": 79.538, "eval_steps_per_second": 10.517, "step": 22940 }, { "epoch": 370.97, "learning_rate": 1.2903225806451612e-08, "loss": 10.1217, "step": 23000 }, { "epoch": 371.0, "eval_loss": 11.067154884338379, "eval_runtime": 1.5291, "eval_samples_per_second": 79.131, "eval_steps_per_second": 10.464, "step": 23002 }, { "epoch": 372.0, "eval_loss": 10.966989517211914, "eval_runtime": 1.5272, "eval_samples_per_second": 79.228, "eval_steps_per_second": 10.476, "step": 23064 }, { "epoch": 373.0, "eval_loss": 11.200106620788574, "eval_runtime": 1.5333, "eval_samples_per_second": 78.916, "eval_steps_per_second": 10.435, "step": 23126 }, { "epoch": 374.0, "eval_loss": 11.017847061157227, "eval_runtime": 1.534, "eval_samples_per_second": 78.88, "eval_steps_per_second": 10.43, "step": 23188 }, { "epoch": 375.0, "eval_loss": 10.895892143249512, "eval_runtime": 1.5401, "eval_samples_per_second": 78.565, "eval_steps_per_second": 10.389, "step": 23250 }, { "epoch": 376.0, "eval_loss": 11.167061805725098, "eval_runtime": 1.528, "eval_samples_per_second": 79.188, "eval_steps_per_second": 10.471, "step": 23312 }, { "epoch": 377.0, "eval_loss": 11.18134593963623, "eval_runtime": 1.5229, "eval_samples_per_second": 79.454, "eval_steps_per_second": 10.506, "step": 23374 }, { "epoch": 378.0, "eval_loss": 11.156011581420898, "eval_runtime": 1.5299, "eval_samples_per_second": 79.09, "eval_steps_per_second": 10.458, "step": 23436 }, { "epoch": 379.0, "eval_loss": 11.084783554077148, "eval_runtime": 1.5211, "eval_samples_per_second": 79.545, "eval_steps_per_second": 10.518, "step": 23498 }, { "epoch": 379.03, "learning_rate": 1.2096774193548386e-08, "loss": 10.1098, "step": 23500 }, { "epoch": 380.0, "eval_loss": 11.04666805267334, "eval_runtime": 1.5221, "eval_samples_per_second": 79.493, "eval_steps_per_second": 10.512, "step": 23560 }, { "epoch": 381.0, "eval_loss": 11.148889541625977, "eval_runtime": 1.548, "eval_samples_per_second": 78.166, "eval_steps_per_second": 10.336, "step": 23622 }, { "epoch": 382.0, "eval_loss": 11.006622314453125, "eval_runtime": 1.5259, "eval_samples_per_second": 79.298, "eval_steps_per_second": 10.486, "step": 23684 }, { "epoch": 383.0, "eval_loss": 10.977095603942871, "eval_runtime": 1.5282, "eval_samples_per_second": 79.178, "eval_steps_per_second": 10.47, "step": 23746 }, { "epoch": 384.0, "eval_loss": 10.895329475402832, "eval_runtime": 1.5304, "eval_samples_per_second": 79.066, "eval_steps_per_second": 10.455, "step": 23808 }, { "epoch": 385.0, "eval_loss": 10.903176307678223, "eval_runtime": 1.5397, "eval_samples_per_second": 78.586, "eval_steps_per_second": 10.392, "step": 23870 }, { "epoch": 386.0, "eval_loss": 10.892215728759766, "eval_runtime": 1.5258, "eval_samples_per_second": 79.302, "eval_steps_per_second": 10.486, "step": 23932 }, { "epoch": 387.0, "eval_loss": 10.993136405944824, "eval_runtime": 1.5252, "eval_samples_per_second": 79.332, "eval_steps_per_second": 10.49, "step": 23994 }, { "epoch": 387.1, "learning_rate": 1.129032258064516e-08, "loss": 10.1262, "step": 24000 }, { "epoch": 388.0, "eval_loss": 11.127774238586426, "eval_runtime": 1.561, "eval_samples_per_second": 77.516, "eval_steps_per_second": 10.25, "step": 24056 }, { "epoch": 389.0, "eval_loss": 11.101140022277832, "eval_runtime": 1.534, "eval_samples_per_second": 78.877, "eval_steps_per_second": 10.43, "step": 24118 }, { "epoch": 390.0, "eval_loss": 11.042156219482422, "eval_runtime": 1.5371, "eval_samples_per_second": 78.721, "eval_steps_per_second": 10.409, "step": 24180 }, { "epoch": 391.0, "eval_loss": 11.063082695007324, "eval_runtime": 1.5258, "eval_samples_per_second": 79.304, "eval_steps_per_second": 10.487, "step": 24242 }, { "epoch": 392.0, "eval_loss": 10.970271110534668, "eval_runtime": 1.5279, "eval_samples_per_second": 79.192, "eval_steps_per_second": 10.472, "step": 24304 }, { "epoch": 393.0, "eval_loss": 10.953568458557129, "eval_runtime": 1.5296, "eval_samples_per_second": 79.105, "eval_steps_per_second": 10.46, "step": 24366 }, { "epoch": 394.0, "eval_loss": 11.076667785644531, "eval_runtime": 1.5221, "eval_samples_per_second": 79.496, "eval_steps_per_second": 10.512, "step": 24428 }, { "epoch": 395.0, "eval_loss": 10.922811508178711, "eval_runtime": 1.5248, "eval_samples_per_second": 79.353, "eval_steps_per_second": 10.493, "step": 24490 }, { "epoch": 395.16, "learning_rate": 1.0483870967741935e-08, "loss": 10.1038, "step": 24500 }, { "epoch": 396.0, "eval_loss": 11.142001152038574, "eval_runtime": 1.5327, "eval_samples_per_second": 78.944, "eval_steps_per_second": 10.439, "step": 24552 }, { "epoch": 397.0, "eval_loss": 11.145098686218262, "eval_runtime": 1.5305, "eval_samples_per_second": 79.06, "eval_steps_per_second": 10.454, "step": 24614 }, { "epoch": 398.0, "eval_loss": 11.166299819946289, "eval_runtime": 1.5557, "eval_samples_per_second": 77.777, "eval_steps_per_second": 10.285, "step": 24676 }, { "epoch": 399.0, "eval_loss": 11.075632095336914, "eval_runtime": 1.5363, "eval_samples_per_second": 78.761, "eval_steps_per_second": 10.415, "step": 24738 }, { "epoch": 400.0, "eval_loss": 11.114908218383789, "eval_runtime": 1.5291, "eval_samples_per_second": 79.134, "eval_steps_per_second": 10.464, "step": 24800 }, { "epoch": 401.0, "eval_loss": 11.065619468688965, "eval_runtime": 1.5197, "eval_samples_per_second": 79.621, "eval_steps_per_second": 10.528, "step": 24862 }, { "epoch": 402.0, "eval_loss": 11.075223922729492, "eval_runtime": 1.5218, "eval_samples_per_second": 79.51, "eval_steps_per_second": 10.514, "step": 24924 }, { "epoch": 403.0, "eval_loss": 11.141654968261719, "eval_runtime": 1.5194, "eval_samples_per_second": 79.637, "eval_steps_per_second": 10.53, "step": 24986 }, { "epoch": 403.23, "learning_rate": 9.677419354838709e-09, "loss": 10.1159, "step": 25000 }, { "epoch": 404.0, "eval_loss": 10.96292495727539, "eval_runtime": 1.5308, "eval_samples_per_second": 79.043, "eval_steps_per_second": 10.452, "step": 25048 }, { "epoch": 405.0, "eval_loss": 10.93803596496582, "eval_runtime": 1.541, "eval_samples_per_second": 78.519, "eval_steps_per_second": 10.383, "step": 25110 }, { "epoch": 406.0, "eval_loss": 11.00986385345459, "eval_runtime": 1.5347, "eval_samples_per_second": 78.841, "eval_steps_per_second": 10.425, "step": 25172 }, { "epoch": 407.0, "eval_loss": 11.137813568115234, "eval_runtime": 1.5284, "eval_samples_per_second": 79.17, "eval_steps_per_second": 10.469, "step": 25234 }, { "epoch": 408.0, "eval_loss": 10.851067543029785, "eval_runtime": 1.5273, "eval_samples_per_second": 79.223, "eval_steps_per_second": 10.476, "step": 25296 }, { "epoch": 409.0, "eval_loss": 11.01217269897461, "eval_runtime": 1.5232, "eval_samples_per_second": 79.439, "eval_steps_per_second": 10.504, "step": 25358 }, { "epoch": 410.0, "eval_loss": 11.01009750366211, "eval_runtime": 1.5196, "eval_samples_per_second": 79.627, "eval_steps_per_second": 10.529, "step": 25420 }, { "epoch": 411.0, "eval_loss": 10.873472213745117, "eval_runtime": 1.5201, "eval_samples_per_second": 79.6, "eval_steps_per_second": 10.526, "step": 25482 }, { "epoch": 411.29, "learning_rate": 8.870967741935485e-09, "loss": 10.0848, "step": 25500 }, { "epoch": 412.0, "eval_loss": 11.008174896240234, "eval_runtime": 1.5272, "eval_samples_per_second": 79.232, "eval_steps_per_second": 10.477, "step": 25544 }, { "epoch": 413.0, "eval_loss": 11.012767791748047, "eval_runtime": 1.5353, "eval_samples_per_second": 78.81, "eval_steps_per_second": 10.421, "step": 25606 }, { "epoch": 414.0, "eval_loss": 11.137340545654297, "eval_runtime": 1.5293, "eval_samples_per_second": 79.119, "eval_steps_per_second": 10.462, "step": 25668 }, { "epoch": 415.0, "eval_loss": 11.137984275817871, "eval_runtime": 1.5285, "eval_samples_per_second": 79.163, "eval_steps_per_second": 10.468, "step": 25730 }, { "epoch": 416.0, "eval_loss": 10.93875503540039, "eval_runtime": 1.5284, "eval_samples_per_second": 79.167, "eval_steps_per_second": 10.468, "step": 25792 }, { "epoch": 417.0, "eval_loss": 10.93083381652832, "eval_runtime": 1.524, "eval_samples_per_second": 79.396, "eval_steps_per_second": 10.499, "step": 25854 }, { "epoch": 418.0, "eval_loss": 11.082795143127441, "eval_runtime": 1.5216, "eval_samples_per_second": 79.523, "eval_steps_per_second": 10.515, "step": 25916 }, { "epoch": 419.0, "eval_loss": 11.256653785705566, "eval_runtime": 1.5222, "eval_samples_per_second": 79.488, "eval_steps_per_second": 10.511, "step": 25978 }, { "epoch": 419.35, "learning_rate": 8.064516129032257e-09, "loss": 10.0923, "step": 26000 }, { "epoch": 420.0, "eval_loss": 11.040205955505371, "eval_runtime": 1.5329, "eval_samples_per_second": 78.935, "eval_steps_per_second": 10.438, "step": 26040 }, { "epoch": 421.0, "eval_loss": 11.028487205505371, "eval_runtime": 1.541, "eval_samples_per_second": 78.518, "eval_steps_per_second": 10.383, "step": 26102 }, { "epoch": 422.0, "eval_loss": 11.002765655517578, "eval_runtime": 1.5314, "eval_samples_per_second": 79.012, "eval_steps_per_second": 10.448, "step": 26164 }, { "epoch": 423.0, "eval_loss": 10.978453636169434, "eval_runtime": 1.5289, "eval_samples_per_second": 79.144, "eval_steps_per_second": 10.465, "step": 26226 }, { "epoch": 424.0, "eval_loss": 11.046844482421875, "eval_runtime": 1.5378, "eval_samples_per_second": 78.685, "eval_steps_per_second": 10.405, "step": 26288 }, { "epoch": 425.0, "eval_loss": 11.059452056884766, "eval_runtime": 1.523, "eval_samples_per_second": 79.446, "eval_steps_per_second": 10.505, "step": 26350 }, { "epoch": 426.0, "eval_loss": 11.098678588867188, "eval_runtime": 1.521, "eval_samples_per_second": 79.551, "eval_steps_per_second": 10.519, "step": 26412 }, { "epoch": 427.0, "eval_loss": 10.876445770263672, "eval_runtime": 1.5209, "eval_samples_per_second": 79.556, "eval_steps_per_second": 10.52, "step": 26474 }, { "epoch": 427.42, "learning_rate": 7.258064516129032e-09, "loss": 10.1, "step": 26500 }, { "epoch": 428.0, "eval_loss": 10.964273452758789, "eval_runtime": 1.5385, "eval_samples_per_second": 78.65, "eval_steps_per_second": 10.4, "step": 26536 }, { "epoch": 429.0, "eval_loss": 10.986409187316895, "eval_runtime": 1.5468, "eval_samples_per_second": 78.225, "eval_steps_per_second": 10.344, "step": 26598 }, { "epoch": 430.0, "eval_loss": 11.001900672912598, "eval_runtime": 1.529, "eval_samples_per_second": 79.135, "eval_steps_per_second": 10.464, "step": 26660 }, { "epoch": 431.0, "eval_loss": 11.103708267211914, "eval_runtime": 1.5277, "eval_samples_per_second": 79.205, "eval_steps_per_second": 10.473, "step": 26722 }, { "epoch": 432.0, "eval_loss": 10.956040382385254, "eval_runtime": 1.5283, "eval_samples_per_second": 79.174, "eval_steps_per_second": 10.469, "step": 26784 }, { "epoch": 433.0, "eval_loss": 10.98078727722168, "eval_runtime": 1.545, "eval_samples_per_second": 78.316, "eval_steps_per_second": 10.356, "step": 26846 }, { "epoch": 434.0, "eval_loss": 11.044650077819824, "eval_runtime": 1.5194, "eval_samples_per_second": 79.639, "eval_steps_per_second": 10.531, "step": 26908 }, { "epoch": 435.0, "eval_loss": 10.943286895751953, "eval_runtime": 1.5208, "eval_samples_per_second": 79.563, "eval_steps_per_second": 10.521, "step": 26970 }, { "epoch": 435.48, "learning_rate": 6.451612903225806e-09, "loss": 10.0944, "step": 27000 }, { "epoch": 436.0, "eval_loss": 11.040830612182617, "eval_runtime": 1.5264, "eval_samples_per_second": 79.272, "eval_steps_per_second": 10.482, "step": 27032 }, { "epoch": 437.0, "eval_loss": 10.829809188842773, "eval_runtime": 1.531, "eval_samples_per_second": 79.032, "eval_steps_per_second": 10.45, "step": 27094 }, { "epoch": 438.0, "eval_loss": 11.169089317321777, "eval_runtime": 1.5289, "eval_samples_per_second": 79.142, "eval_steps_per_second": 10.465, "step": 27156 }, { "epoch": 439.0, "eval_loss": 11.029060363769531, "eval_runtime": 1.5418, "eval_samples_per_second": 78.481, "eval_steps_per_second": 10.378, "step": 27218 }, { "epoch": 440.0, "eval_loss": 10.99075984954834, "eval_runtime": 1.5372, "eval_samples_per_second": 78.712, "eval_steps_per_second": 10.408, "step": 27280 }, { "epoch": 441.0, "eval_loss": 10.889039039611816, "eval_runtime": 1.5276, "eval_samples_per_second": 79.211, "eval_steps_per_second": 10.474, "step": 27342 }, { "epoch": 442.0, "eval_loss": 11.024713516235352, "eval_runtime": 1.5184, "eval_samples_per_second": 79.688, "eval_steps_per_second": 10.537, "step": 27404 }, { "epoch": 443.0, "eval_loss": 11.10232925415039, "eval_runtime": 1.5225, "eval_samples_per_second": 79.473, "eval_steps_per_second": 10.509, "step": 27466 }, { "epoch": 443.55, "learning_rate": 5.64516129032258e-09, "loss": 10.0965, "step": 27500 }, { "epoch": 444.0, "eval_loss": 11.125219345092773, "eval_runtime": 1.5507, "eval_samples_per_second": 78.027, "eval_steps_per_second": 10.318, "step": 27528 }, { "epoch": 445.0, "eval_loss": 11.037354469299316, "eval_runtime": 1.5307, "eval_samples_per_second": 79.048, "eval_steps_per_second": 10.453, "step": 27590 }, { "epoch": 446.0, "eval_loss": 11.032657623291016, "eval_runtime": 1.5272, "eval_samples_per_second": 79.231, "eval_steps_per_second": 10.477, "step": 27652 }, { "epoch": 447.0, "eval_loss": 10.83928394317627, "eval_runtime": 1.5299, "eval_samples_per_second": 79.09, "eval_steps_per_second": 10.458, "step": 27714 }, { "epoch": 448.0, "eval_loss": 10.952762603759766, "eval_runtime": 1.5436, "eval_samples_per_second": 78.39, "eval_steps_per_second": 10.366, "step": 27776 }, { "epoch": 449.0, "eval_loss": 11.012396812438965, "eval_runtime": 1.5272, "eval_samples_per_second": 79.231, "eval_steps_per_second": 10.477, "step": 27838 }, { "epoch": 450.0, "eval_loss": 10.84825611114502, "eval_runtime": 1.5273, "eval_samples_per_second": 79.226, "eval_steps_per_second": 10.476, "step": 27900 }, { "epoch": 451.0, "eval_loss": 10.897711753845215, "eval_runtime": 1.527, "eval_samples_per_second": 79.241, "eval_steps_per_second": 10.478, "step": 27962 }, { "epoch": 451.61, "learning_rate": 4.8387096774193544e-09, "loss": 10.1023, "step": 28000 }, { "epoch": 452.0, "eval_loss": 11.033035278320312, "eval_runtime": 1.5393, "eval_samples_per_second": 78.607, "eval_steps_per_second": 10.394, "step": 28024 }, { "epoch": 453.0, "eval_loss": 10.92820930480957, "eval_runtime": 1.5378, "eval_samples_per_second": 78.682, "eval_steps_per_second": 10.404, "step": 28086 }, { "epoch": 454.0, "eval_loss": 11.028227806091309, "eval_runtime": 1.5335, "eval_samples_per_second": 78.905, "eval_steps_per_second": 10.434, "step": 28148 }, { "epoch": 455.0, "eval_loss": 10.94653034210205, "eval_runtime": 1.5323, "eval_samples_per_second": 78.968, "eval_steps_per_second": 10.442, "step": 28210 }, { "epoch": 456.0, "eval_loss": 10.933752059936523, "eval_runtime": 1.542, "eval_samples_per_second": 78.468, "eval_steps_per_second": 10.376, "step": 28272 }, { "epoch": 457.0, "eval_loss": 10.998141288757324, "eval_runtime": 1.5243, "eval_samples_per_second": 79.38, "eval_steps_per_second": 10.496, "step": 28334 }, { "epoch": 458.0, "eval_loss": 11.004565238952637, "eval_runtime": 1.5184, "eval_samples_per_second": 79.69, "eval_steps_per_second": 10.537, "step": 28396 }, { "epoch": 459.0, "eval_loss": 11.058143615722656, "eval_runtime": 1.5198, "eval_samples_per_second": 79.618, "eval_steps_per_second": 10.528, "step": 28458 }, { "epoch": 459.68, "learning_rate": 4.0322580645161286e-09, "loss": 10.0928, "step": 28500 }, { "epoch": 460.0, "eval_loss": 11.069917678833008, "eval_runtime": 1.5309, "eval_samples_per_second": 79.038, "eval_steps_per_second": 10.451, "step": 28520 }, { "epoch": 461.0, "eval_loss": 11.008392333984375, "eval_runtime": 1.5319, "eval_samples_per_second": 78.986, "eval_steps_per_second": 10.444, "step": 28582 }, { "epoch": 462.0, "eval_loss": 10.968953132629395, "eval_runtime": 1.5279, "eval_samples_per_second": 79.192, "eval_steps_per_second": 10.472, "step": 28644 }, { "epoch": 463.0, "eval_loss": 11.05852222442627, "eval_runtime": 1.5388, "eval_samples_per_second": 78.635, "eval_steps_per_second": 10.398, "step": 28706 }, { "epoch": 464.0, "eval_loss": 11.158599853515625, "eval_runtime": 1.5307, "eval_samples_per_second": 79.048, "eval_steps_per_second": 10.453, "step": 28768 }, { "epoch": 465.0, "eval_loss": 11.008247375488281, "eval_runtime": 1.5274, "eval_samples_per_second": 79.218, "eval_steps_per_second": 10.475, "step": 28830 }, { "epoch": 466.0, "eval_loss": 11.049484252929688, "eval_runtime": 1.5195, "eval_samples_per_second": 79.634, "eval_steps_per_second": 10.53, "step": 28892 }, { "epoch": 467.0, "eval_loss": 10.853099822998047, "eval_runtime": 1.5203, "eval_samples_per_second": 79.589, "eval_steps_per_second": 10.524, "step": 28954 }, { "epoch": 467.74, "learning_rate": 3.225806451612903e-09, "loss": 10.0925, "step": 29000 }, { "epoch": 468.0, "eval_loss": 10.858702659606934, "eval_runtime": 1.5294, "eval_samples_per_second": 79.118, "eval_steps_per_second": 10.462, "step": 29016 }, { "epoch": 469.0, "eval_loss": 11.159988403320312, "eval_runtime": 1.5283, "eval_samples_per_second": 79.173, "eval_steps_per_second": 10.469, "step": 29078 }, { "epoch": 470.0, "eval_loss": 10.992026329040527, "eval_runtime": 1.5307, "eval_samples_per_second": 79.05, "eval_steps_per_second": 10.453, "step": 29140 }, { "epoch": 471.0, "eval_loss": 11.020038604736328, "eval_runtime": 1.5472, "eval_samples_per_second": 78.208, "eval_steps_per_second": 10.342, "step": 29202 }, { "epoch": 472.0, "eval_loss": 11.056954383850098, "eval_runtime": 1.5255, "eval_samples_per_second": 79.316, "eval_steps_per_second": 10.488, "step": 29264 }, { "epoch": 473.0, "eval_loss": 10.931620597839355, "eval_runtime": 1.5261, "eval_samples_per_second": 79.287, "eval_steps_per_second": 10.484, "step": 29326 }, { "epoch": 474.0, "eval_loss": 11.059667587280273, "eval_runtime": 1.5202, "eval_samples_per_second": 79.597, "eval_steps_per_second": 10.525, "step": 29388 }, { "epoch": 475.0, "eval_loss": 10.994057655334473, "eval_runtime": 1.5275, "eval_samples_per_second": 79.216, "eval_steps_per_second": 10.475, "step": 29450 }, { "epoch": 475.81, "learning_rate": 2.4193548387096772e-09, "loss": 10.0956, "step": 29500 }, { "epoch": 476.0, "eval_loss": 11.07702350616455, "eval_runtime": 1.5276, "eval_samples_per_second": 79.21, "eval_steps_per_second": 10.474, "step": 29512 }, { "epoch": 477.0, "eval_loss": 11.006840705871582, "eval_runtime": 1.5298, "eval_samples_per_second": 79.098, "eval_steps_per_second": 10.459, "step": 29574 }, { "epoch": 478.0, "eval_loss": 10.907955169677734, "eval_runtime": 1.5329, "eval_samples_per_second": 78.936, "eval_steps_per_second": 10.438, "step": 29636 }, { "epoch": 479.0, "eval_loss": 10.865551948547363, "eval_runtime": 1.5272, "eval_samples_per_second": 79.23, "eval_steps_per_second": 10.477, "step": 29698 }, { "epoch": 480.0, "eval_loss": 11.035009384155273, "eval_runtime": 1.5396, "eval_samples_per_second": 78.594, "eval_steps_per_second": 10.393, "step": 29760 }, { "epoch": 481.0, "eval_loss": 11.06725788116455, "eval_runtime": 1.5306, "eval_samples_per_second": 79.056, "eval_steps_per_second": 10.454, "step": 29822 }, { "epoch": 482.0, "eval_loss": 11.034406661987305, "eval_runtime": 1.5223, "eval_samples_per_second": 79.487, "eval_steps_per_second": 10.511, "step": 29884 }, { "epoch": 483.0, "eval_loss": 10.981928825378418, "eval_runtime": 1.5189, "eval_samples_per_second": 79.661, "eval_steps_per_second": 10.534, "step": 29946 }, { "epoch": 483.87, "learning_rate": 1.6129032258064515e-09, "loss": 10.112, "step": 30000 }, { "epoch": 484.0, "eval_loss": 11.066652297973633, "eval_runtime": 1.5282, "eval_samples_per_second": 79.179, "eval_steps_per_second": 10.47, "step": 30008 }, { "epoch": 485.0, "eval_loss": 10.937297821044922, "eval_runtime": 1.5264, "eval_samples_per_second": 79.27, "eval_steps_per_second": 10.482, "step": 30070 }, { "epoch": 486.0, "eval_loss": 10.977715492248535, "eval_runtime": 1.537, "eval_samples_per_second": 78.724, "eval_steps_per_second": 10.41, "step": 30132 }, { "epoch": 487.0, "eval_loss": 10.983244895935059, "eval_runtime": 1.5322, "eval_samples_per_second": 78.969, "eval_steps_per_second": 10.442, "step": 30194 }, { "epoch": 488.0, "eval_loss": 11.142578125, "eval_runtime": 1.532, "eval_samples_per_second": 78.984, "eval_steps_per_second": 10.444, "step": 30256 }, { "epoch": 489.0, "eval_loss": 11.0770845413208, "eval_runtime": 1.527, "eval_samples_per_second": 79.241, "eval_steps_per_second": 10.478, "step": 30318 }, { "epoch": 490.0, "eval_loss": 10.951501846313477, "eval_runtime": 1.5214, "eval_samples_per_second": 79.534, "eval_steps_per_second": 10.517, "step": 30380 }, { "epoch": 491.0, "eval_loss": 10.961722373962402, "eval_runtime": 1.5203, "eval_samples_per_second": 79.59, "eval_steps_per_second": 10.524, "step": 30442 }, { "epoch": 491.94, "learning_rate": 8.064516129032258e-10, "loss": 10.1007, "step": 30500 }, { "epoch": 492.0, "eval_loss": 10.952271461486816, "eval_runtime": 1.528, "eval_samples_per_second": 79.187, "eval_steps_per_second": 10.471, "step": 30504 }, { "epoch": 493.0, "eval_loss": 10.87966537475586, "eval_runtime": 1.5563, "eval_samples_per_second": 77.748, "eval_steps_per_second": 10.281, "step": 30566 }, { "epoch": 494.0, "eval_loss": 10.981633186340332, "eval_runtime": 1.533, "eval_samples_per_second": 78.933, "eval_steps_per_second": 10.437, "step": 30628 }, { "epoch": 495.0, "eval_loss": 10.952598571777344, "eval_runtime": 1.5283, "eval_samples_per_second": 79.171, "eval_steps_per_second": 10.469, "step": 30690 }, { "epoch": 496.0, "eval_loss": 11.00783920288086, "eval_runtime": 1.5275, "eval_samples_per_second": 79.212, "eval_steps_per_second": 10.474, "step": 30752 }, { "epoch": 497.0, "eval_loss": 11.156755447387695, "eval_runtime": 1.536, "eval_samples_per_second": 78.778, "eval_steps_per_second": 10.417, "step": 30814 }, { "epoch": 498.0, "eval_loss": 10.986204147338867, "eval_runtime": 1.521, "eval_samples_per_second": 79.554, "eval_steps_per_second": 10.519, "step": 30876 }, { "epoch": 499.0, "eval_loss": 11.053728103637695, "eval_runtime": 1.5217, "eval_samples_per_second": 79.518, "eval_steps_per_second": 10.515, "step": 30938 }, { "epoch": 500.0, "learning_rate": 0.0, "loss": 10.0953, "step": 31000 }, { "epoch": 500.0, "eval_loss": 11.041763305664062, "eval_runtime": 1.5553, "eval_samples_per_second": 77.797, "eval_steps_per_second": 10.287, "step": 31000 }, { "epoch": 500.0, "step": 31000, "total_flos": 1.6202616878592e+16, "train_loss": 11.618560483870969, "train_runtime": 17098.8381, "train_samples_per_second": 14.358, "train_steps_per_second": 1.813 } ], "max_steps": 31000, "num_train_epochs": 500, "total_flos": 1.6202616878592e+16, "trial_name": null, "trial_params": null }