{ "best_metric": null, "best_model_checkpoint": null, "epoch": 499.9979550102249, "global_step": 30500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 39.79280090332031, "eval_runtime": 1.532, "eval_samples_per_second": 79.634, "eval_steps_per_second": 10.444, "step": 61 }, { "epoch": 2.0, "eval_loss": 39.81950378417969, "eval_runtime": 1.5354, "eval_samples_per_second": 79.461, "eval_steps_per_second": 10.421, "step": 122 }, { "epoch": 3.0, "eval_loss": 39.82277297973633, "eval_runtime": 1.532, "eval_samples_per_second": 79.635, "eval_steps_per_second": 10.444, "step": 183 }, { "epoch": 4.0, "eval_loss": 39.07925796508789, "eval_runtime": 1.5323, "eval_samples_per_second": 79.619, "eval_steps_per_second": 10.442, "step": 244 }, { "epoch": 5.0, "eval_loss": 38.66284942626953, "eval_runtime": 1.5313, "eval_samples_per_second": 79.671, "eval_steps_per_second": 10.449, "step": 305 }, { "epoch": 6.0, "eval_loss": 37.40141296386719, "eval_runtime": 1.5324, "eval_samples_per_second": 79.615, "eval_steps_per_second": 10.441, "step": 366 }, { "epoch": 7.0, "eval_loss": 36.820281982421875, "eval_runtime": 1.5357, "eval_samples_per_second": 79.443, "eval_steps_per_second": 10.419, "step": 427 }, { "epoch": 8.0, "eval_loss": 36.260650634765625, "eval_runtime": 1.5321, "eval_samples_per_second": 79.631, "eval_steps_per_second": 10.443, "step": 488 }, { "epoch": 8.2, "learning_rate": 4.918032786885246e-08, "loss": 41.0436, "step": 500 }, { "epoch": 9.0, "eval_loss": 35.931610107421875, "eval_runtime": 1.5478, "eval_samples_per_second": 78.82, "eval_steps_per_second": 10.337, "step": 549 }, { "epoch": 10.0, "eval_loss": 34.32823181152344, "eval_runtime": 1.5396, "eval_samples_per_second": 79.242, "eval_steps_per_second": 10.392, "step": 610 }, { "epoch": 11.0, "eval_loss": 33.36042404174805, "eval_runtime": 1.5352, "eval_samples_per_second": 79.469, "eval_steps_per_second": 10.422, "step": 671 }, { "epoch": 12.0, "eval_loss": 32.44695281982422, "eval_runtime": 1.5322, "eval_samples_per_second": 79.623, "eval_steps_per_second": 10.442, "step": 732 }, { "epoch": 13.0, "eval_loss": 31.54026985168457, "eval_runtime": 1.5308, "eval_samples_per_second": 79.698, "eval_steps_per_second": 10.452, "step": 793 }, { "epoch": 14.0, "eval_loss": 30.966489791870117, "eval_runtime": 1.531, "eval_samples_per_second": 79.689, "eval_steps_per_second": 10.451, "step": 854 }, { "epoch": 15.0, "eval_loss": 29.417085647583008, "eval_runtime": 1.5409, "eval_samples_per_second": 79.175, "eval_steps_per_second": 10.384, "step": 915 }, { "epoch": 16.0, "eval_loss": 28.458908081054688, "eval_runtime": 1.5307, "eval_samples_per_second": 79.7, "eval_steps_per_second": 10.452, "step": 976 }, { "epoch": 16.39, "learning_rate": 4.8360655737704914e-08, "loss": 32.5506, "step": 1000 }, { "epoch": 17.0, "eval_loss": 27.26991081237793, "eval_runtime": 1.5386, "eval_samples_per_second": 79.293, "eval_steps_per_second": 10.399, "step": 1037 }, { "epoch": 18.0, "eval_loss": 26.061372756958008, "eval_runtime": 1.5472, "eval_samples_per_second": 78.852, "eval_steps_per_second": 10.341, "step": 1098 }, { "epoch": 19.0, "eval_loss": 25.152942657470703, "eval_runtime": 1.5408, "eval_samples_per_second": 79.178, "eval_steps_per_second": 10.384, "step": 1159 }, { "epoch": 20.0, "eval_loss": 24.308048248291016, "eval_runtime": 1.5324, "eval_samples_per_second": 79.616, "eval_steps_per_second": 10.441, "step": 1220 }, { "epoch": 21.0, "eval_loss": 23.15108871459961, "eval_runtime": 1.5344, "eval_samples_per_second": 79.509, "eval_steps_per_second": 10.427, "step": 1281 }, { "epoch": 22.0, "eval_loss": 22.333181381225586, "eval_runtime": 1.5309, "eval_samples_per_second": 79.691, "eval_steps_per_second": 10.451, "step": 1342 }, { "epoch": 23.0, "eval_loss": 21.71536636352539, "eval_runtime": 1.5304, "eval_samples_per_second": 79.718, "eval_steps_per_second": 10.455, "step": 1403 }, { "epoch": 24.0, "eval_loss": 20.736703872680664, "eval_runtime": 1.5313, "eval_samples_per_second": 79.67, "eval_steps_per_second": 10.448, "step": 1464 }, { "epoch": 24.59, "learning_rate": 4.7540983606557375e-08, "loss": 24.3212, "step": 1500 }, { "epoch": 25.0, "eval_loss": 20.180402755737305, "eval_runtime": 1.5364, "eval_samples_per_second": 79.404, "eval_steps_per_second": 10.414, "step": 1525 }, { "epoch": 26.0, "eval_loss": 19.559112548828125, "eval_runtime": 1.5443, "eval_samples_per_second": 79.002, "eval_steps_per_second": 10.361, "step": 1586 }, { "epoch": 27.0, "eval_loss": 18.87604522705078, "eval_runtime": 1.5495, "eval_samples_per_second": 78.733, "eval_steps_per_second": 10.326, "step": 1647 }, { "epoch": 28.0, "eval_loss": 18.329124450683594, "eval_runtime": 1.5379, "eval_samples_per_second": 79.329, "eval_steps_per_second": 10.404, "step": 1708 }, { "epoch": 29.0, "eval_loss": 18.029600143432617, "eval_runtime": 1.5333, "eval_samples_per_second": 79.569, "eval_steps_per_second": 10.435, "step": 1769 }, { "epoch": 30.0, "eval_loss": 17.40913200378418, "eval_runtime": 1.5337, "eval_samples_per_second": 79.546, "eval_steps_per_second": 10.432, "step": 1830 }, { "epoch": 31.0, "eval_loss": 17.215015411376953, "eval_runtime": 1.5336, "eval_samples_per_second": 79.552, "eval_steps_per_second": 10.433, "step": 1891 }, { "epoch": 32.0, "eval_loss": 16.648334503173828, "eval_runtime": 1.5309, "eval_samples_per_second": 79.692, "eval_steps_per_second": 10.451, "step": 1952 }, { "epoch": 32.79, "learning_rate": 4.6721311475409836e-08, "loss": 18.5662, "step": 2000 }, { "epoch": 33.0, "eval_loss": 16.269676208496094, "eval_runtime": 1.5361, "eval_samples_per_second": 79.421, "eval_steps_per_second": 10.416, "step": 2013 }, { "epoch": 34.0, "eval_loss": 16.17144775390625, "eval_runtime": 1.5342, "eval_samples_per_second": 79.519, "eval_steps_per_second": 10.429, "step": 2074 }, { "epoch": 35.0, "eval_loss": 15.8008394241333, "eval_runtime": 1.538, "eval_samples_per_second": 79.323, "eval_steps_per_second": 10.403, "step": 2135 }, { "epoch": 36.0, "eval_loss": 15.264719009399414, "eval_runtime": 1.5357, "eval_samples_per_second": 79.444, "eval_steps_per_second": 10.419, "step": 2196 }, { "epoch": 37.0, "eval_loss": 15.41949462890625, "eval_runtime": 1.5368, "eval_samples_per_second": 79.387, "eval_steps_per_second": 10.411, "step": 2257 }, { "epoch": 38.0, "eval_loss": 14.989760398864746, "eval_runtime": 1.5351, "eval_samples_per_second": 79.476, "eval_steps_per_second": 10.423, "step": 2318 }, { "epoch": 39.0, "eval_loss": 14.859911918640137, "eval_runtime": 1.5312, "eval_samples_per_second": 79.676, "eval_steps_per_second": 10.449, "step": 2379 }, { "epoch": 40.0, "eval_loss": 14.611943244934082, "eval_runtime": 1.5312, "eval_samples_per_second": 79.677, "eval_steps_per_second": 10.449, "step": 2440 }, { "epoch": 40.98, "learning_rate": 4.590163934426229e-08, "loss": 15.1141, "step": 2500 }, { "epoch": 41.0, "eval_loss": 14.464351654052734, "eval_runtime": 1.5311, "eval_samples_per_second": 79.682, "eval_steps_per_second": 10.45, "step": 2501 }, { "epoch": 42.0, "eval_loss": 14.339770317077637, "eval_runtime": 1.5431, "eval_samples_per_second": 79.064, "eval_steps_per_second": 10.369, "step": 2562 }, { "epoch": 43.0, "eval_loss": 14.410659790039062, "eval_runtime": 1.5584, "eval_samples_per_second": 78.285, "eval_steps_per_second": 10.267, "step": 2623 }, { "epoch": 44.0, "eval_loss": 13.903946876525879, "eval_runtime": 1.5462, "eval_samples_per_second": 78.902, "eval_steps_per_second": 10.348, "step": 2684 }, { "epoch": 45.0, "eval_loss": 14.004327774047852, "eval_runtime": 1.5337, "eval_samples_per_second": 79.545, "eval_steps_per_second": 10.432, "step": 2745 }, { "epoch": 46.0, "eval_loss": 13.953380584716797, "eval_runtime": 1.5354, "eval_samples_per_second": 79.459, "eval_steps_per_second": 10.421, "step": 2806 }, { "epoch": 47.0, "eval_loss": 13.926652908325195, "eval_runtime": 1.5336, "eval_samples_per_second": 79.552, "eval_steps_per_second": 10.433, "step": 2867 }, { "epoch": 48.0, "eval_loss": 13.588834762573242, "eval_runtime": 1.563, "eval_samples_per_second": 78.054, "eval_steps_per_second": 10.237, "step": 2928 }, { "epoch": 49.0, "eval_loss": 13.63134479522705, "eval_runtime": 1.5341, "eval_samples_per_second": 79.524, "eval_steps_per_second": 10.429, "step": 2989 }, { "epoch": 49.18, "learning_rate": 4.508196721311475e-08, "loss": 13.182, "step": 3000 }, { "epoch": 50.0, "eval_loss": 13.540535926818848, "eval_runtime": 1.5436, "eval_samples_per_second": 79.038, "eval_steps_per_second": 10.366, "step": 3050 }, { "epoch": 51.0, "eval_loss": 13.337821960449219, "eval_runtime": 1.5382, "eval_samples_per_second": 79.316, "eval_steps_per_second": 10.402, "step": 3111 }, { "epoch": 52.0, "eval_loss": 13.316015243530273, "eval_runtime": 1.5353, "eval_samples_per_second": 79.465, "eval_steps_per_second": 10.422, "step": 3172 }, { "epoch": 53.0, "eval_loss": 13.135573387145996, "eval_runtime": 1.5309, "eval_samples_per_second": 79.692, "eval_steps_per_second": 10.451, "step": 3233 }, { "epoch": 54.0, "eval_loss": 13.248279571533203, "eval_runtime": 1.531, "eval_samples_per_second": 79.688, "eval_steps_per_second": 10.451, "step": 3294 }, { "epoch": 55.0, "eval_loss": 13.125914573669434, "eval_runtime": 1.5309, "eval_samples_per_second": 79.693, "eval_steps_per_second": 10.452, "step": 3355 }, { "epoch": 56.0, "eval_loss": 13.177481651306152, "eval_runtime": 1.5387, "eval_samples_per_second": 79.286, "eval_steps_per_second": 10.398, "step": 3416 }, { "epoch": 57.0, "eval_loss": 13.111815452575684, "eval_runtime": 1.5355, "eval_samples_per_second": 79.455, "eval_steps_per_second": 10.42, "step": 3477 }, { "epoch": 57.38, "learning_rate": 4.426229508196721e-08, "loss": 12.1712, "step": 3500 }, { "epoch": 58.0, "eval_loss": 12.936331748962402, "eval_runtime": 1.5524, "eval_samples_per_second": 78.59, "eval_steps_per_second": 10.307, "step": 3538 }, { "epoch": 59.0, "eval_loss": 12.876540184020996, "eval_runtime": 1.5376, "eval_samples_per_second": 79.345, "eval_steps_per_second": 10.406, "step": 3599 }, { "epoch": 60.0, "eval_loss": 12.79231071472168, "eval_runtime": 1.537, "eval_samples_per_second": 79.375, "eval_steps_per_second": 10.41, "step": 3660 }, { "epoch": 61.0, "eval_loss": 12.973188400268555, "eval_runtime": 1.534, "eval_samples_per_second": 79.532, "eval_steps_per_second": 10.43, "step": 3721 }, { "epoch": 62.0, "eval_loss": 12.860616683959961, "eval_runtime": 1.5352, "eval_samples_per_second": 79.468, "eval_steps_per_second": 10.422, "step": 3782 }, { "epoch": 63.0, "eval_loss": 12.789679527282715, "eval_runtime": 1.5317, "eval_samples_per_second": 79.652, "eval_steps_per_second": 10.446, "step": 3843 }, { "epoch": 64.0, "eval_loss": 12.651591300964355, "eval_runtime": 1.5315, "eval_samples_per_second": 79.662, "eval_steps_per_second": 10.447, "step": 3904 }, { "epoch": 65.0, "eval_loss": 12.624197959899902, "eval_runtime": 1.5373, "eval_samples_per_second": 79.36, "eval_steps_per_second": 10.408, "step": 3965 }, { "epoch": 65.57, "learning_rate": 4.344262295081967e-08, "loss": 11.5853, "step": 4000 }, { "epoch": 66.0, "eval_loss": 12.59536361694336, "eval_runtime": 1.537, "eval_samples_per_second": 79.375, "eval_steps_per_second": 10.41, "step": 4026 }, { "epoch": 67.0, "eval_loss": 12.450535774230957, "eval_runtime": 1.5373, "eval_samples_per_second": 79.357, "eval_steps_per_second": 10.408, "step": 4087 }, { "epoch": 68.0, "eval_loss": 12.559524536132812, "eval_runtime": 1.5374, "eval_samples_per_second": 79.356, "eval_steps_per_second": 10.407, "step": 4148 }, { "epoch": 69.0, "eval_loss": 12.428722381591797, "eval_runtime": 1.5314, "eval_samples_per_second": 79.665, "eval_steps_per_second": 10.448, "step": 4209 }, { "epoch": 70.0, "eval_loss": 12.476819038391113, "eval_runtime": 1.5329, "eval_samples_per_second": 79.59, "eval_steps_per_second": 10.438, "step": 4270 }, { "epoch": 71.0, "eval_loss": 12.400250434875488, "eval_runtime": 1.5417, "eval_samples_per_second": 79.132, "eval_steps_per_second": 10.378, "step": 4331 }, { "epoch": 72.0, "eval_loss": 12.432337760925293, "eval_runtime": 1.5368, "eval_samples_per_second": 79.387, "eval_steps_per_second": 10.411, "step": 4392 }, { "epoch": 73.0, "eval_loss": 12.41788101196289, "eval_runtime": 1.534, "eval_samples_per_second": 79.531, "eval_steps_per_second": 10.43, "step": 4453 }, { "epoch": 73.77, "learning_rate": 4.262295081967213e-08, "loss": 11.25, "step": 4500 }, { "epoch": 74.0, "eval_loss": 12.376703262329102, "eval_runtime": 1.5362, "eval_samples_per_second": 79.418, "eval_steps_per_second": 10.415, "step": 4514 }, { "epoch": 75.0, "eval_loss": 12.434186935424805, "eval_runtime": 1.543, "eval_samples_per_second": 79.065, "eval_steps_per_second": 10.369, "step": 4575 }, { "epoch": 76.0, "eval_loss": 12.256501197814941, "eval_runtime": 1.5414, "eval_samples_per_second": 79.148, "eval_steps_per_second": 10.38, "step": 4636 }, { "epoch": 77.0, "eval_loss": 12.312841415405273, "eval_runtime": 1.5542, "eval_samples_per_second": 78.498, "eval_steps_per_second": 10.295, "step": 4697 }, { "epoch": 78.0, "eval_loss": 12.374871253967285, "eval_runtime": 1.5323, "eval_samples_per_second": 79.62, "eval_steps_per_second": 10.442, "step": 4758 }, { "epoch": 79.0, "eval_loss": 12.356457710266113, "eval_runtime": 1.5352, "eval_samples_per_second": 79.468, "eval_steps_per_second": 10.422, "step": 4819 }, { "epoch": 80.0, "eval_loss": 12.328751564025879, "eval_runtime": 1.5365, "eval_samples_per_second": 79.4, "eval_steps_per_second": 10.413, "step": 4880 }, { "epoch": 81.0, "eval_loss": 12.201430320739746, "eval_runtime": 1.5343, "eval_samples_per_second": 79.517, "eval_steps_per_second": 10.428, "step": 4941 }, { "epoch": 81.97, "learning_rate": 4.180327868852459e-08, "loss": 11.0038, "step": 5000 }, { "epoch": 82.0, "eval_loss": 12.177206039428711, "eval_runtime": 1.5376, "eval_samples_per_second": 79.345, "eval_steps_per_second": 10.406, "step": 5002 }, { "epoch": 83.0, "eval_loss": 12.2689790725708, "eval_runtime": 1.5376, "eval_samples_per_second": 79.346, "eval_steps_per_second": 10.406, "step": 5063 }, { "epoch": 84.0, "eval_loss": 12.198723793029785, "eval_runtime": 1.5387, "eval_samples_per_second": 79.286, "eval_steps_per_second": 10.398, "step": 5124 }, { "epoch": 85.0, "eval_loss": 12.0653715133667, "eval_runtime": 1.5397, "eval_samples_per_second": 79.234, "eval_steps_per_second": 10.391, "step": 5185 }, { "epoch": 86.0, "eval_loss": 12.12946891784668, "eval_runtime": 1.5325, "eval_samples_per_second": 79.61, "eval_steps_per_second": 10.441, "step": 5246 }, { "epoch": 87.0, "eval_loss": 12.156049728393555, "eval_runtime": 1.5372, "eval_samples_per_second": 79.367, "eval_steps_per_second": 10.409, "step": 5307 }, { "epoch": 88.0, "eval_loss": 11.96617317199707, "eval_runtime": 1.5352, "eval_samples_per_second": 79.471, "eval_steps_per_second": 10.422, "step": 5368 }, { "epoch": 89.0, "eval_loss": 12.113394737243652, "eval_runtime": 1.5309, "eval_samples_per_second": 79.693, "eval_steps_per_second": 10.451, "step": 5429 }, { "epoch": 90.0, "eval_loss": 12.029434204101562, "eval_runtime": 1.5322, "eval_samples_per_second": 79.626, "eval_steps_per_second": 10.443, "step": 5490 }, { "epoch": 90.16, "learning_rate": 4.0983606557377046e-08, "loss": 10.8283, "step": 5500 }, { "epoch": 91.0, "eval_loss": 12.111231803894043, "eval_runtime": 1.5402, "eval_samples_per_second": 79.212, "eval_steps_per_second": 10.388, "step": 5551 }, { "epoch": 92.0, "eval_loss": 12.091044425964355, "eval_runtime": 1.5374, "eval_samples_per_second": 79.357, "eval_steps_per_second": 10.407, "step": 5612 }, { "epoch": 93.0, "eval_loss": 12.008685111999512, "eval_runtime": 1.5385, "eval_samples_per_second": 79.298, "eval_steps_per_second": 10.4, "step": 5673 }, { "epoch": 94.0, "eval_loss": 11.980273246765137, "eval_runtime": 1.531, "eval_samples_per_second": 79.688, "eval_steps_per_second": 10.451, "step": 5734 }, { "epoch": 95.0, "eval_loss": 11.909398078918457, "eval_runtime": 1.531, "eval_samples_per_second": 79.689, "eval_steps_per_second": 10.451, "step": 5795 }, { "epoch": 96.0, "eval_loss": 12.159173965454102, "eval_runtime": 1.5365, "eval_samples_per_second": 79.401, "eval_steps_per_second": 10.413, "step": 5856 }, { "epoch": 97.0, "eval_loss": 11.936081886291504, "eval_runtime": 1.5323, "eval_samples_per_second": 79.617, "eval_steps_per_second": 10.442, "step": 5917 }, { "epoch": 98.0, "eval_loss": 11.959556579589844, "eval_runtime": 1.5396, "eval_samples_per_second": 79.243, "eval_steps_per_second": 10.393, "step": 5978 }, { "epoch": 98.36, "learning_rate": 4.016393442622951e-08, "loss": 10.693, "step": 6000 }, { "epoch": 99.0, "eval_loss": 11.902626037597656, "eval_runtime": 1.5386, "eval_samples_per_second": 79.293, "eval_steps_per_second": 10.399, "step": 6039 }, { "epoch": 100.0, "eval_loss": 12.003995895385742, "eval_runtime": 1.54, "eval_samples_per_second": 79.222, "eval_steps_per_second": 10.39, "step": 6100 }, { "epoch": 101.0, "eval_loss": 11.886563301086426, "eval_runtime": 1.5404, "eval_samples_per_second": 79.201, "eval_steps_per_second": 10.387, "step": 6161 }, { "epoch": 102.0, "eval_loss": 11.953593254089355, "eval_runtime": 1.5318, "eval_samples_per_second": 79.643, "eval_steps_per_second": 10.445, "step": 6222 }, { "epoch": 103.0, "eval_loss": 11.803376197814941, "eval_runtime": 1.5312, "eval_samples_per_second": 79.678, "eval_steps_per_second": 10.45, "step": 6283 }, { "epoch": 104.0, "eval_loss": 11.688481330871582, "eval_runtime": 1.5386, "eval_samples_per_second": 79.291, "eval_steps_per_second": 10.399, "step": 6344 }, { "epoch": 105.0, "eval_loss": 11.850458145141602, "eval_runtime": 1.5318, "eval_samples_per_second": 79.647, "eval_steps_per_second": 10.446, "step": 6405 }, { "epoch": 106.0, "eval_loss": 11.828042984008789, "eval_runtime": 1.5312, "eval_samples_per_second": 79.677, "eval_steps_per_second": 10.449, "step": 6466 }, { "epoch": 106.56, "learning_rate": 3.934426229508196e-08, "loss": 10.5875, "step": 6500 }, { "epoch": 107.0, "eval_loss": 11.787361145019531, "eval_runtime": 1.5414, "eval_samples_per_second": 79.149, "eval_steps_per_second": 10.38, "step": 6527 }, { "epoch": 108.0, "eval_loss": 11.734838485717773, "eval_runtime": 1.5374, "eval_samples_per_second": 79.353, "eval_steps_per_second": 10.407, "step": 6588 }, { "epoch": 109.0, "eval_loss": 11.776476860046387, "eval_runtime": 1.5397, "eval_samples_per_second": 79.238, "eval_steps_per_second": 10.392, "step": 6649 }, { "epoch": 110.0, "eval_loss": 11.752671241760254, "eval_runtime": 1.538, "eval_samples_per_second": 79.323, "eval_steps_per_second": 10.403, "step": 6710 }, { "epoch": 111.0, "eval_loss": 11.681644439697266, "eval_runtime": 1.5307, "eval_samples_per_second": 79.7, "eval_steps_per_second": 10.452, "step": 6771 }, { "epoch": 112.0, "eval_loss": 11.739622116088867, "eval_runtime": 1.5312, "eval_samples_per_second": 79.674, "eval_steps_per_second": 10.449, "step": 6832 }, { "epoch": 113.0, "eval_loss": 11.647455215454102, "eval_runtime": 1.5322, "eval_samples_per_second": 79.625, "eval_steps_per_second": 10.443, "step": 6893 }, { "epoch": 114.0, "eval_loss": 11.70103645324707, "eval_runtime": 1.5307, "eval_samples_per_second": 79.702, "eval_steps_per_second": 10.453, "step": 6954 }, { "epoch": 114.75, "learning_rate": 3.852459016393442e-08, "loss": 10.5114, "step": 7000 }, { "epoch": 115.0, "eval_loss": 11.704916000366211, "eval_runtime": 1.543, "eval_samples_per_second": 79.064, "eval_steps_per_second": 10.369, "step": 7015 }, { "epoch": 116.0, "eval_loss": 11.796719551086426, "eval_runtime": 1.5451, "eval_samples_per_second": 78.957, "eval_steps_per_second": 10.355, "step": 7076 }, { "epoch": 117.0, "eval_loss": 11.724781036376953, "eval_runtime": 1.5406, "eval_samples_per_second": 79.189, "eval_steps_per_second": 10.385, "step": 7137 }, { "epoch": 118.0, "eval_loss": 11.654914855957031, "eval_runtime": 1.5376, "eval_samples_per_second": 79.345, "eval_steps_per_second": 10.406, "step": 7198 }, { "epoch": 119.0, "eval_loss": 11.519423484802246, "eval_runtime": 1.5319, "eval_samples_per_second": 79.641, "eval_steps_per_second": 10.445, "step": 7259 }, { "epoch": 120.0, "eval_loss": 11.692423820495605, "eval_runtime": 1.5309, "eval_samples_per_second": 79.691, "eval_steps_per_second": 10.451, "step": 7320 }, { "epoch": 121.0, "eval_loss": 11.519417762756348, "eval_runtime": 1.5318, "eval_samples_per_second": 79.643, "eval_steps_per_second": 10.445, "step": 7381 }, { "epoch": 122.0, "eval_loss": 11.660749435424805, "eval_runtime": 1.5564, "eval_samples_per_second": 78.387, "eval_steps_per_second": 10.28, "step": 7442 }, { "epoch": 122.95, "learning_rate": 3.770491803278688e-08, "loss": 10.4791, "step": 7500 }, { "epoch": 123.0, "eval_loss": 11.518941879272461, "eval_runtime": 1.5358, "eval_samples_per_second": 79.437, "eval_steps_per_second": 10.418, "step": 7503 }, { "epoch": 124.0, "eval_loss": 11.552470207214355, "eval_runtime": 1.5427, "eval_samples_per_second": 79.083, "eval_steps_per_second": 10.372, "step": 7564 }, { "epoch": 125.0, "eval_loss": 11.522570610046387, "eval_runtime": 1.5376, "eval_samples_per_second": 79.342, "eval_steps_per_second": 10.406, "step": 7625 }, { "epoch": 126.0, "eval_loss": 11.495443344116211, "eval_runtime": 1.5392, "eval_samples_per_second": 79.264, "eval_steps_per_second": 10.395, "step": 7686 }, { "epoch": 127.0, "eval_loss": 11.59436321258545, "eval_runtime": 1.5335, "eval_samples_per_second": 79.555, "eval_steps_per_second": 10.433, "step": 7747 }, { "epoch": 128.0, "eval_loss": 11.705589294433594, "eval_runtime": 1.5364, "eval_samples_per_second": 79.408, "eval_steps_per_second": 10.414, "step": 7808 }, { "epoch": 129.0, "eval_loss": 11.746439933776855, "eval_runtime": 1.5314, "eval_samples_per_second": 79.665, "eval_steps_per_second": 10.448, "step": 7869 }, { "epoch": 130.0, "eval_loss": 11.48509407043457, "eval_runtime": 1.5344, "eval_samples_per_second": 79.51, "eval_steps_per_second": 10.428, "step": 7930 }, { "epoch": 131.0, "eval_loss": 11.310612678527832, "eval_runtime": 1.531, "eval_samples_per_second": 79.688, "eval_steps_per_second": 10.451, "step": 7991 }, { "epoch": 131.15, "learning_rate": 3.6885245901639346e-08, "loss": 10.4223, "step": 8000 }, { "epoch": 132.0, "eval_loss": 11.661527633666992, "eval_runtime": 1.543, "eval_samples_per_second": 79.067, "eval_steps_per_second": 10.369, "step": 8052 }, { "epoch": 133.0, "eval_loss": 11.512996673583984, "eval_runtime": 1.5421, "eval_samples_per_second": 79.112, "eval_steps_per_second": 10.375, "step": 8113 }, { "epoch": 134.0, "eval_loss": 11.586601257324219, "eval_runtime": 1.5394, "eval_samples_per_second": 79.249, "eval_steps_per_second": 10.393, "step": 8174 }, { "epoch": 135.0, "eval_loss": 11.499874114990234, "eval_runtime": 1.5311, "eval_samples_per_second": 79.679, "eval_steps_per_second": 10.45, "step": 8235 }, { "epoch": 136.0, "eval_loss": 11.573627471923828, "eval_runtime": 1.5328, "eval_samples_per_second": 79.594, "eval_steps_per_second": 10.439, "step": 8296 }, { "epoch": 137.0, "eval_loss": 11.610065460205078, "eval_runtime": 1.5324, "eval_samples_per_second": 79.611, "eval_steps_per_second": 10.441, "step": 8357 }, { "epoch": 138.0, "eval_loss": 11.601760864257812, "eval_runtime": 1.5321, "eval_samples_per_second": 79.629, "eval_steps_per_second": 10.443, "step": 8418 }, { "epoch": 139.0, "eval_loss": 11.506176948547363, "eval_runtime": 1.5313, "eval_samples_per_second": 79.672, "eval_steps_per_second": 10.449, "step": 8479 }, { "epoch": 139.34, "learning_rate": 3.60655737704918e-08, "loss": 10.3725, "step": 8500 }, { "epoch": 140.0, "eval_loss": 11.469504356384277, "eval_runtime": 1.5361, "eval_samples_per_second": 79.423, "eval_steps_per_second": 10.416, "step": 8540 }, { "epoch": 141.0, "eval_loss": 11.574456214904785, "eval_runtime": 1.5371, "eval_samples_per_second": 79.368, "eval_steps_per_second": 10.409, "step": 8601 }, { "epoch": 142.0, "eval_loss": 11.213080406188965, "eval_runtime": 1.5468, "eval_samples_per_second": 78.874, "eval_steps_per_second": 10.344, "step": 8662 }, { "epoch": 143.0, "eval_loss": 11.486066818237305, "eval_runtime": 1.5321, "eval_samples_per_second": 79.628, "eval_steps_per_second": 10.443, "step": 8723 }, { "epoch": 144.0, "eval_loss": 11.352083206176758, "eval_runtime": 1.5374, "eval_samples_per_second": 79.354, "eval_steps_per_second": 10.407, "step": 8784 }, { "epoch": 145.0, "eval_loss": 11.430201530456543, "eval_runtime": 1.5309, "eval_samples_per_second": 79.692, "eval_steps_per_second": 10.451, "step": 8845 }, { "epoch": 146.0, "eval_loss": 11.389131546020508, "eval_runtime": 1.532, "eval_samples_per_second": 79.634, "eval_steps_per_second": 10.444, "step": 8906 }, { "epoch": 147.0, "eval_loss": 11.494805335998535, "eval_runtime": 1.5431, "eval_samples_per_second": 79.064, "eval_steps_per_second": 10.369, "step": 8967 }, { "epoch": 147.54, "learning_rate": 3.524590163934426e-08, "loss": 10.3579, "step": 9000 }, { "epoch": 148.0, "eval_loss": 11.413972854614258, "eval_runtime": 1.5681, "eval_samples_per_second": 77.803, "eval_steps_per_second": 10.204, "step": 9028 }, { "epoch": 149.0, "eval_loss": 11.28091812133789, "eval_runtime": 1.5378, "eval_samples_per_second": 79.334, "eval_steps_per_second": 10.404, "step": 9089 }, { "epoch": 150.0, "eval_loss": 11.508408546447754, "eval_runtime": 1.5385, "eval_samples_per_second": 79.299, "eval_steps_per_second": 10.4, "step": 9150 }, { "epoch": 151.0, "eval_loss": 11.494176864624023, "eval_runtime": 1.5357, "eval_samples_per_second": 79.441, "eval_steps_per_second": 10.418, "step": 9211 }, { "epoch": 152.0, "eval_loss": 11.431586265563965, "eval_runtime": 1.535, "eval_samples_per_second": 79.477, "eval_steps_per_second": 10.423, "step": 9272 }, { "epoch": 153.0, "eval_loss": 11.505435943603516, "eval_runtime": 1.5329, "eval_samples_per_second": 79.587, "eval_steps_per_second": 10.438, "step": 9333 }, { "epoch": 154.0, "eval_loss": 11.435380935668945, "eval_runtime": 1.5313, "eval_samples_per_second": 79.671, "eval_steps_per_second": 10.449, "step": 9394 }, { "epoch": 155.0, "eval_loss": 11.370787620544434, "eval_runtime": 1.5312, "eval_samples_per_second": 79.677, "eval_steps_per_second": 10.449, "step": 9455 }, { "epoch": 155.74, "learning_rate": 3.442622950819672e-08, "loss": 10.3411, "step": 9500 }, { "epoch": 156.0, "eval_loss": 11.60168170928955, "eval_runtime": 1.541, "eval_samples_per_second": 79.168, "eval_steps_per_second": 10.383, "step": 9516 }, { "epoch": 157.0, "eval_loss": 11.441522598266602, "eval_runtime": 1.5413, "eval_samples_per_second": 79.155, "eval_steps_per_second": 10.381, "step": 9577 }, { "epoch": 158.0, "eval_loss": 11.614983558654785, "eval_runtime": 1.5394, "eval_samples_per_second": 79.253, "eval_steps_per_second": 10.394, "step": 9638 }, { "epoch": 159.0, "eval_loss": 11.37887191772461, "eval_runtime": 1.536, "eval_samples_per_second": 79.425, "eval_steps_per_second": 10.416, "step": 9699 }, { "epoch": 160.0, "eval_loss": 11.334174156188965, "eval_runtime": 1.532, "eval_samples_per_second": 79.637, "eval_steps_per_second": 10.444, "step": 9760 }, { "epoch": 161.0, "eval_loss": 11.476101875305176, "eval_runtime": 1.5311, "eval_samples_per_second": 79.684, "eval_steps_per_second": 10.45, "step": 9821 }, { "epoch": 162.0, "eval_loss": 11.300040245056152, "eval_runtime": 1.5341, "eval_samples_per_second": 79.525, "eval_steps_per_second": 10.43, "step": 9882 }, { "epoch": 163.0, "eval_loss": 11.410900115966797, "eval_runtime": 1.534, "eval_samples_per_second": 79.53, "eval_steps_per_second": 10.43, "step": 9943 }, { "epoch": 163.93, "learning_rate": 3.360655737704918e-08, "loss": 10.3236, "step": 10000 }, { "epoch": 164.0, "eval_loss": 11.425009727478027, "eval_runtime": 1.5324, "eval_samples_per_second": 79.612, "eval_steps_per_second": 10.441, "step": 10004 }, { "epoch": 165.0, "eval_loss": 11.32499885559082, "eval_runtime": 1.5438, "eval_samples_per_second": 79.024, "eval_steps_per_second": 10.364, "step": 10065 }, { "epoch": 166.0, "eval_loss": 11.42471981048584, "eval_runtime": 1.5488, "eval_samples_per_second": 78.773, "eval_steps_per_second": 10.331, "step": 10126 }, { "epoch": 167.0, "eval_loss": 11.16617488861084, "eval_runtime": 1.5363, "eval_samples_per_second": 79.414, "eval_steps_per_second": 10.415, "step": 10187 }, { "epoch": 168.0, "eval_loss": 11.44943904876709, "eval_runtime": 1.5344, "eval_samples_per_second": 79.507, "eval_steps_per_second": 10.427, "step": 10248 }, { "epoch": 169.0, "eval_loss": 11.387123107910156, "eval_runtime": 1.5336, "eval_samples_per_second": 79.551, "eval_steps_per_second": 10.433, "step": 10309 }, { "epoch": 170.0, "eval_loss": 11.296111106872559, "eval_runtime": 1.5331, "eval_samples_per_second": 79.578, "eval_steps_per_second": 10.436, "step": 10370 }, { "epoch": 171.0, "eval_loss": 11.357579231262207, "eval_runtime": 1.5325, "eval_samples_per_second": 79.607, "eval_steps_per_second": 10.44, "step": 10431 }, { "epoch": 172.0, "eval_loss": 11.435007095336914, "eval_runtime": 1.5331, "eval_samples_per_second": 79.577, "eval_steps_per_second": 10.436, "step": 10492 }, { "epoch": 172.13, "learning_rate": 3.278688524590163e-08, "loss": 10.3059, "step": 10500 }, { "epoch": 173.0, "eval_loss": 11.361226081848145, "eval_runtime": 1.5571, "eval_samples_per_second": 78.352, "eval_steps_per_second": 10.276, "step": 10553 }, { "epoch": 174.0, "eval_loss": 11.39792251586914, "eval_runtime": 1.541, "eval_samples_per_second": 79.167, "eval_steps_per_second": 10.383, "step": 10614 }, { "epoch": 175.0, "eval_loss": 11.371596336364746, "eval_runtime": 1.5383, "eval_samples_per_second": 79.306, "eval_steps_per_second": 10.401, "step": 10675 }, { "epoch": 176.0, "eval_loss": 11.40168571472168, "eval_runtime": 1.5313, "eval_samples_per_second": 79.669, "eval_steps_per_second": 10.448, "step": 10736 }, { "epoch": 177.0, "eval_loss": 11.534231185913086, "eval_runtime": 1.533, "eval_samples_per_second": 79.584, "eval_steps_per_second": 10.437, "step": 10797 }, { "epoch": 178.0, "eval_loss": 11.227432250976562, "eval_runtime": 1.532, "eval_samples_per_second": 79.634, "eval_steps_per_second": 10.444, "step": 10858 }, { "epoch": 179.0, "eval_loss": 11.432555198669434, "eval_runtime": 1.5369, "eval_samples_per_second": 79.379, "eval_steps_per_second": 10.41, "step": 10919 }, { "epoch": 180.0, "eval_loss": 11.477941513061523, "eval_runtime": 1.5313, "eval_samples_per_second": 79.671, "eval_steps_per_second": 10.449, "step": 10980 }, { "epoch": 180.33, "learning_rate": 3.19672131147541e-08, "loss": 10.2637, "step": 11000 }, { "epoch": 181.0, "eval_loss": 11.34237003326416, "eval_runtime": 1.5403, "eval_samples_per_second": 79.205, "eval_steps_per_second": 10.388, "step": 11041 }, { "epoch": 182.0, "eval_loss": 11.245933532714844, "eval_runtime": 1.5372, "eval_samples_per_second": 79.363, "eval_steps_per_second": 10.408, "step": 11102 }, { "epoch": 183.0, "eval_loss": 11.317768096923828, "eval_runtime": 1.5396, "eval_samples_per_second": 79.241, "eval_steps_per_second": 10.392, "step": 11163 }, { "epoch": 184.0, "eval_loss": 11.325362205505371, "eval_runtime": 1.5313, "eval_samples_per_second": 79.67, "eval_steps_per_second": 10.449, "step": 11224 }, { "epoch": 185.0, "eval_loss": 11.263531684875488, "eval_runtime": 1.5324, "eval_samples_per_second": 79.611, "eval_steps_per_second": 10.441, "step": 11285 }, { "epoch": 186.0, "eval_loss": 11.21447467803955, "eval_runtime": 1.5319, "eval_samples_per_second": 79.641, "eval_steps_per_second": 10.445, "step": 11346 }, { "epoch": 187.0, "eval_loss": 11.32798957824707, "eval_runtime": 1.5313, "eval_samples_per_second": 79.67, "eval_steps_per_second": 10.449, "step": 11407 }, { "epoch": 188.0, "eval_loss": 11.337279319763184, "eval_runtime": 1.5332, "eval_samples_per_second": 79.571, "eval_steps_per_second": 10.436, "step": 11468 }, { "epoch": 188.52, "learning_rate": 3.1147540983606555e-08, "loss": 10.2837, "step": 11500 }, { "epoch": 189.0, "eval_loss": 11.180814743041992, "eval_runtime": 1.5382, "eval_samples_per_second": 79.316, "eval_steps_per_second": 10.402, "step": 11529 }, { "epoch": 190.0, "eval_loss": 11.221953392028809, "eval_runtime": 1.5474, "eval_samples_per_second": 78.844, "eval_steps_per_second": 10.34, "step": 11590 }, { "epoch": 191.0, "eval_loss": 11.125133514404297, "eval_runtime": 1.5562, "eval_samples_per_second": 78.399, "eval_steps_per_second": 10.282, "step": 11651 }, { "epoch": 192.0, "eval_loss": 11.356901168823242, "eval_runtime": 1.5319, "eval_samples_per_second": 79.64, "eval_steps_per_second": 10.445, "step": 11712 }, { "epoch": 193.0, "eval_loss": 11.188824653625488, "eval_runtime": 1.531, "eval_samples_per_second": 79.686, "eval_steps_per_second": 10.451, "step": 11773 }, { "epoch": 194.0, "eval_loss": 11.257243156433105, "eval_runtime": 1.5371, "eval_samples_per_second": 79.368, "eval_steps_per_second": 10.409, "step": 11834 }, { "epoch": 195.0, "eval_loss": 11.315752029418945, "eval_runtime": 1.5316, "eval_samples_per_second": 79.655, "eval_steps_per_second": 10.447, "step": 11895 }, { "epoch": 196.0, "eval_loss": 11.294753074645996, "eval_runtime": 1.5332, "eval_samples_per_second": 79.571, "eval_steps_per_second": 10.436, "step": 11956 }, { "epoch": 196.72, "learning_rate": 3.032786885245902e-08, "loss": 10.2404, "step": 12000 }, { "epoch": 197.0, "eval_loss": 11.21860408782959, "eval_runtime": 1.5446, "eval_samples_per_second": 78.984, "eval_steps_per_second": 10.359, "step": 12017 }, { "epoch": 198.0, "eval_loss": 11.28231143951416, "eval_runtime": 1.5623, "eval_samples_per_second": 78.089, "eval_steps_per_second": 10.241, "step": 12078 }, { "epoch": 199.0, "eval_loss": 11.250580787658691, "eval_runtime": 1.5444, "eval_samples_per_second": 78.996, "eval_steps_per_second": 10.36, "step": 12139 }, { "epoch": 200.0, "eval_loss": 11.55415153503418, "eval_runtime": 1.5349, "eval_samples_per_second": 79.484, "eval_steps_per_second": 10.424, "step": 12200 }, { "epoch": 201.0, "eval_loss": 11.313996315002441, "eval_runtime": 1.5323, "eval_samples_per_second": 79.619, "eval_steps_per_second": 10.442, "step": 12261 }, { "epoch": 202.0, "eval_loss": 11.200800895690918, "eval_runtime": 1.5314, "eval_samples_per_second": 79.664, "eval_steps_per_second": 10.448, "step": 12322 }, { "epoch": 203.0, "eval_loss": 11.157076835632324, "eval_runtime": 1.5375, "eval_samples_per_second": 79.352, "eval_steps_per_second": 10.407, "step": 12383 }, { "epoch": 204.0, "eval_loss": 11.233716011047363, "eval_runtime": 1.5313, "eval_samples_per_second": 79.671, "eval_steps_per_second": 10.449, "step": 12444 }, { "epoch": 204.92, "learning_rate": 2.9508196721311475e-08, "loss": 10.2304, "step": 12500 }, { "epoch": 205.0, "eval_loss": 11.270787239074707, "eval_runtime": 1.5324, "eval_samples_per_second": 79.615, "eval_steps_per_second": 10.441, "step": 12505 }, { "epoch": 206.0, "eval_loss": 11.30806827545166, "eval_runtime": 1.5392, "eval_samples_per_second": 79.261, "eval_steps_per_second": 10.395, "step": 12566 }, { "epoch": 207.0, "eval_loss": 11.11026382446289, "eval_runtime": 1.5416, "eval_samples_per_second": 79.137, "eval_steps_per_second": 10.379, "step": 12627 }, { "epoch": 208.0, "eval_loss": 11.123950958251953, "eval_runtime": 1.538, "eval_samples_per_second": 79.323, "eval_steps_per_second": 10.403, "step": 12688 }, { "epoch": 209.0, "eval_loss": 11.3450288772583, "eval_runtime": 1.5336, "eval_samples_per_second": 79.55, "eval_steps_per_second": 10.433, "step": 12749 }, { "epoch": 210.0, "eval_loss": 11.059000015258789, "eval_runtime": 1.5311, "eval_samples_per_second": 79.683, "eval_steps_per_second": 10.45, "step": 12810 }, { "epoch": 211.0, "eval_loss": 11.257308006286621, "eval_runtime": 1.5316, "eval_samples_per_second": 79.654, "eval_steps_per_second": 10.446, "step": 12871 }, { "epoch": 212.0, "eval_loss": 11.207562446594238, "eval_runtime": 1.5416, "eval_samples_per_second": 79.141, "eval_steps_per_second": 10.379, "step": 12932 }, { "epoch": 213.0, "eval_loss": 11.18131160736084, "eval_runtime": 1.5315, "eval_samples_per_second": 79.659, "eval_steps_per_second": 10.447, "step": 12993 }, { "epoch": 213.11, "learning_rate": 2.8688524590163933e-08, "loss": 10.2405, "step": 13000 }, { "epoch": 214.0, "eval_loss": 11.338172912597656, "eval_runtime": 1.5378, "eval_samples_per_second": 79.334, "eval_steps_per_second": 10.404, "step": 13054 }, { "epoch": 215.0, "eval_loss": 11.409613609313965, "eval_runtime": 1.5389, "eval_samples_per_second": 79.277, "eval_steps_per_second": 10.397, "step": 13115 }, { "epoch": 216.0, "eval_loss": 11.152140617370605, "eval_runtime": 1.5382, "eval_samples_per_second": 79.313, "eval_steps_per_second": 10.402, "step": 13176 }, { "epoch": 217.0, "eval_loss": 11.312527656555176, "eval_runtime": 1.5316, "eval_samples_per_second": 79.653, "eval_steps_per_second": 10.446, "step": 13237 }, { "epoch": 218.0, "eval_loss": 11.191665649414062, "eval_runtime": 1.5311, "eval_samples_per_second": 79.682, "eval_steps_per_second": 10.45, "step": 13298 }, { "epoch": 219.0, "eval_loss": 11.279202461242676, "eval_runtime": 1.5384, "eval_samples_per_second": 79.305, "eval_steps_per_second": 10.401, "step": 13359 }, { "epoch": 220.0, "eval_loss": 11.123604774475098, "eval_runtime": 1.5361, "eval_samples_per_second": 79.422, "eval_steps_per_second": 10.416, "step": 13420 }, { "epoch": 221.0, "eval_loss": 11.239697456359863, "eval_runtime": 1.5358, "eval_samples_per_second": 79.438, "eval_steps_per_second": 10.418, "step": 13481 }, { "epoch": 221.31, "learning_rate": 2.786885245901639e-08, "loss": 10.2096, "step": 13500 }, { "epoch": 222.0, "eval_loss": 11.187539100646973, "eval_runtime": 1.5423, "eval_samples_per_second": 79.104, "eval_steps_per_second": 10.374, "step": 13542 }, { "epoch": 223.0, "eval_loss": 11.311732292175293, "eval_runtime": 1.5401, "eval_samples_per_second": 79.213, "eval_steps_per_second": 10.389, "step": 13603 }, { "epoch": 224.0, "eval_loss": 11.156542778015137, "eval_runtime": 1.5374, "eval_samples_per_second": 79.353, "eval_steps_per_second": 10.407, "step": 13664 }, { "epoch": 225.0, "eval_loss": 11.416545867919922, "eval_runtime": 1.533, "eval_samples_per_second": 79.581, "eval_steps_per_second": 10.437, "step": 13725 }, { "epoch": 226.0, "eval_loss": 11.147370338439941, "eval_runtime": 1.5328, "eval_samples_per_second": 79.591, "eval_steps_per_second": 10.438, "step": 13786 }, { "epoch": 227.0, "eval_loss": 10.985366821289062, "eval_runtime": 1.5331, "eval_samples_per_second": 79.579, "eval_steps_per_second": 10.437, "step": 13847 }, { "epoch": 228.0, "eval_loss": 11.134580612182617, "eval_runtime": 1.5339, "eval_samples_per_second": 79.538, "eval_steps_per_second": 10.431, "step": 13908 }, { "epoch": 229.0, "eval_loss": 11.212328910827637, "eval_runtime": 1.5322, "eval_samples_per_second": 79.625, "eval_steps_per_second": 10.443, "step": 13969 }, { "epoch": 229.51, "learning_rate": 2.7049180327868852e-08, "loss": 10.1998, "step": 14000 }, { "epoch": 230.0, "eval_loss": 11.24535846710205, "eval_runtime": 1.5423, "eval_samples_per_second": 79.101, "eval_steps_per_second": 10.374, "step": 14030 }, { "epoch": 231.0, "eval_loss": 11.335315704345703, "eval_runtime": 1.5438, "eval_samples_per_second": 79.027, "eval_steps_per_second": 10.364, "step": 14091 }, { "epoch": 232.0, "eval_loss": 11.305168151855469, "eval_runtime": 1.5455, "eval_samples_per_second": 78.937, "eval_steps_per_second": 10.352, "step": 14152 }, { "epoch": 233.0, "eval_loss": 11.177278518676758, "eval_runtime": 1.5321, "eval_samples_per_second": 79.631, "eval_steps_per_second": 10.443, "step": 14213 }, { "epoch": 234.0, "eval_loss": 11.132712364196777, "eval_runtime": 1.5332, "eval_samples_per_second": 79.57, "eval_steps_per_second": 10.435, "step": 14274 }, { "epoch": 235.0, "eval_loss": 11.310881614685059, "eval_runtime": 1.5363, "eval_samples_per_second": 79.411, "eval_steps_per_second": 10.415, "step": 14335 }, { "epoch": 236.0, "eval_loss": 11.178828239440918, "eval_runtime": 1.5317, "eval_samples_per_second": 79.652, "eval_steps_per_second": 10.446, "step": 14396 }, { "epoch": 237.0, "eval_loss": 11.376667022705078, "eval_runtime": 1.542, "eval_samples_per_second": 79.118, "eval_steps_per_second": 10.376, "step": 14457 }, { "epoch": 237.7, "learning_rate": 2.622950819672131e-08, "loss": 10.1947, "step": 14500 }, { "epoch": 238.0, "eval_loss": 11.21568489074707, "eval_runtime": 1.5435, "eval_samples_per_second": 79.039, "eval_steps_per_second": 10.366, "step": 14518 }, { "epoch": 239.0, "eval_loss": 11.210213661193848, "eval_runtime": 1.555, "eval_samples_per_second": 78.455, "eval_steps_per_second": 10.289, "step": 14579 }, { "epoch": 240.0, "eval_loss": 11.184165954589844, "eval_runtime": 1.5406, "eval_samples_per_second": 79.191, "eval_steps_per_second": 10.386, "step": 14640 }, { "epoch": 241.0, "eval_loss": 11.139164924621582, "eval_runtime": 1.5333, "eval_samples_per_second": 79.568, "eval_steps_per_second": 10.435, "step": 14701 }, { "epoch": 242.0, "eval_loss": 11.139853477478027, "eval_runtime": 1.5354, "eval_samples_per_second": 79.456, "eval_steps_per_second": 10.42, "step": 14762 }, { "epoch": 243.0, "eval_loss": 11.163023948669434, "eval_runtime": 1.5336, "eval_samples_per_second": 79.55, "eval_steps_per_second": 10.433, "step": 14823 }, { "epoch": 244.0, "eval_loss": 11.19721794128418, "eval_runtime": 1.5334, "eval_samples_per_second": 79.56, "eval_steps_per_second": 10.434, "step": 14884 }, { "epoch": 245.0, "eval_loss": 11.054798126220703, "eval_runtime": 1.5315, "eval_samples_per_second": 79.661, "eval_steps_per_second": 10.447, "step": 14945 }, { "epoch": 245.9, "learning_rate": 2.5409836065573768e-08, "loss": 10.1922, "step": 15000 }, { "epoch": 246.0, "eval_loss": 11.127946853637695, "eval_runtime": 1.5532, "eval_samples_per_second": 78.545, "eval_steps_per_second": 10.301, "step": 15006 }, { "epoch": 247.0, "eval_loss": 11.096878051757812, "eval_runtime": 1.5376, "eval_samples_per_second": 79.343, "eval_steps_per_second": 10.406, "step": 15067 }, { "epoch": 248.0, "eval_loss": 11.234783172607422, "eval_runtime": 1.5405, "eval_samples_per_second": 79.193, "eval_steps_per_second": 10.386, "step": 15128 }, { "epoch": 249.0, "eval_loss": 11.115097045898438, "eval_runtime": 1.5469, "eval_samples_per_second": 78.865, "eval_steps_per_second": 10.343, "step": 15189 }, { "epoch": 250.0, "eval_loss": 11.53684139251709, "eval_runtime": 1.5325, "eval_samples_per_second": 79.611, "eval_steps_per_second": 10.441, "step": 15250 }, { "epoch": 251.0, "eval_loss": 11.224416732788086, "eval_runtime": 1.5321, "eval_samples_per_second": 79.627, "eval_steps_per_second": 10.443, "step": 15311 }, { "epoch": 252.0, "eval_loss": 11.210186958312988, "eval_runtime": 1.5368, "eval_samples_per_second": 79.387, "eval_steps_per_second": 10.411, "step": 15372 }, { "epoch": 253.0, "eval_loss": 11.273506164550781, "eval_runtime": 1.5322, "eval_samples_per_second": 79.622, "eval_steps_per_second": 10.442, "step": 15433 }, { "epoch": 254.0, "eval_loss": 11.3226900100708, "eval_runtime": 1.5371, "eval_samples_per_second": 79.371, "eval_steps_per_second": 10.409, "step": 15494 }, { "epoch": 254.1, "learning_rate": 2.459016393442623e-08, "loss": 10.1994, "step": 15500 }, { "epoch": 255.0, "eval_loss": 11.237663269042969, "eval_runtime": 1.5421, "eval_samples_per_second": 79.114, "eval_steps_per_second": 10.376, "step": 15555 }, { "epoch": 256.0, "eval_loss": 11.283459663391113, "eval_runtime": 1.539, "eval_samples_per_second": 79.272, "eval_steps_per_second": 10.396, "step": 15616 }, { "epoch": 257.0, "eval_loss": 11.347498893737793, "eval_runtime": 1.5437, "eval_samples_per_second": 79.032, "eval_steps_per_second": 10.365, "step": 15677 }, { "epoch": 258.0, "eval_loss": 11.209207534790039, "eval_runtime": 1.5326, "eval_samples_per_second": 79.605, "eval_steps_per_second": 10.44, "step": 15738 }, { "epoch": 259.0, "eval_loss": 11.19356918334961, "eval_runtime": 1.5356, "eval_samples_per_second": 79.448, "eval_steps_per_second": 10.419, "step": 15799 }, { "epoch": 260.0, "eval_loss": 11.0318603515625, "eval_runtime": 1.5384, "eval_samples_per_second": 79.303, "eval_steps_per_second": 10.4, "step": 15860 }, { "epoch": 261.0, "eval_loss": 11.191557884216309, "eval_runtime": 1.5356, "eval_samples_per_second": 79.447, "eval_steps_per_second": 10.419, "step": 15921 }, { "epoch": 262.0, "eval_loss": 11.135727882385254, "eval_runtime": 1.5318, "eval_samples_per_second": 79.643, "eval_steps_per_second": 10.445, "step": 15982 }, { "epoch": 262.29, "learning_rate": 2.3770491803278688e-08, "loss": 10.1883, "step": 16000 }, { "epoch": 263.0, "eval_loss": 10.97315788269043, "eval_runtime": 1.5381, "eval_samples_per_second": 79.317, "eval_steps_per_second": 10.402, "step": 16043 }, { "epoch": 264.0, "eval_loss": 11.183859825134277, "eval_runtime": 1.5418, "eval_samples_per_second": 79.131, "eval_steps_per_second": 10.378, "step": 16104 }, { "epoch": 265.0, "eval_loss": 11.070130348205566, "eval_runtime": 1.5422, "eval_samples_per_second": 79.109, "eval_steps_per_second": 10.375, "step": 16165 }, { "epoch": 266.0, "eval_loss": 11.161293029785156, "eval_runtime": 1.5365, "eval_samples_per_second": 79.403, "eval_steps_per_second": 10.414, "step": 16226 }, { "epoch": 267.0, "eval_loss": 11.130182266235352, "eval_runtime": 1.5332, "eval_samples_per_second": 79.572, "eval_steps_per_second": 10.436, "step": 16287 }, { "epoch": 268.0, "eval_loss": 11.095097541809082, "eval_runtime": 1.5319, "eval_samples_per_second": 79.642, "eval_steps_per_second": 10.445, "step": 16348 }, { "epoch": 269.0, "eval_loss": 11.05788803100586, "eval_runtime": 1.5336, "eval_samples_per_second": 79.552, "eval_steps_per_second": 10.433, "step": 16409 }, { "epoch": 270.0, "eval_loss": 11.145880699157715, "eval_runtime": 1.5341, "eval_samples_per_second": 79.523, "eval_steps_per_second": 10.429, "step": 16470 }, { "epoch": 270.49, "learning_rate": 2.2950819672131146e-08, "loss": 10.1863, "step": 16500 }, { "epoch": 271.0, "eval_loss": 11.196855545043945, "eval_runtime": 1.5401, "eval_samples_per_second": 79.218, "eval_steps_per_second": 10.389, "step": 16531 }, { "epoch": 272.0, "eval_loss": 11.127517700195312, "eval_runtime": 1.5433, "eval_samples_per_second": 79.049, "eval_steps_per_second": 10.367, "step": 16592 }, { "epoch": 273.0, "eval_loss": 11.111509323120117, "eval_runtime": 1.5367, "eval_samples_per_second": 79.393, "eval_steps_per_second": 10.412, "step": 16653 }, { "epoch": 274.0, "eval_loss": 11.128540992736816, "eval_runtime": 1.5324, "eval_samples_per_second": 79.616, "eval_steps_per_second": 10.441, "step": 16714 }, { "epoch": 275.0, "eval_loss": 11.105259895324707, "eval_runtime": 1.5326, "eval_samples_per_second": 79.601, "eval_steps_per_second": 10.439, "step": 16775 }, { "epoch": 276.0, "eval_loss": 11.010540008544922, "eval_runtime": 1.5357, "eval_samples_per_second": 79.442, "eval_steps_per_second": 10.419, "step": 16836 }, { "epoch": 277.0, "eval_loss": 11.137755393981934, "eval_runtime": 1.534, "eval_samples_per_second": 79.53, "eval_steps_per_second": 10.43, "step": 16897 }, { "epoch": 278.0, "eval_loss": 11.077096939086914, "eval_runtime": 1.533, "eval_samples_per_second": 79.582, "eval_steps_per_second": 10.437, "step": 16958 }, { "epoch": 278.69, "learning_rate": 2.2131147540983604e-08, "loss": 10.1614, "step": 17000 }, { "epoch": 279.0, "eval_loss": 11.061995506286621, "eval_runtime": 1.5452, "eval_samples_per_second": 78.952, "eval_steps_per_second": 10.354, "step": 17019 }, { "epoch": 280.0, "eval_loss": 10.990643501281738, "eval_runtime": 1.5368, "eval_samples_per_second": 79.385, "eval_steps_per_second": 10.411, "step": 17080 }, { "epoch": 281.0, "eval_loss": 11.077146530151367, "eval_runtime": 1.539, "eval_samples_per_second": 79.273, "eval_steps_per_second": 10.396, "step": 17141 }, { "epoch": 282.0, "eval_loss": 11.035726547241211, "eval_runtime": 1.5344, "eval_samples_per_second": 79.51, "eval_steps_per_second": 10.428, "step": 17202 }, { "epoch": 283.0, "eval_loss": 11.0416841506958, "eval_runtime": 1.5338, "eval_samples_per_second": 79.542, "eval_steps_per_second": 10.432, "step": 17263 }, { "epoch": 284.0, "eval_loss": 11.028667449951172, "eval_runtime": 1.5324, "eval_samples_per_second": 79.612, "eval_steps_per_second": 10.441, "step": 17324 }, { "epoch": 285.0, "eval_loss": 11.117180824279785, "eval_runtime": 1.5342, "eval_samples_per_second": 79.518, "eval_steps_per_second": 10.429, "step": 17385 }, { "epoch": 286.0, "eval_loss": 10.925678253173828, "eval_runtime": 1.5356, "eval_samples_per_second": 79.449, "eval_steps_per_second": 10.42, "step": 17446 }, { "epoch": 286.88, "learning_rate": 2.1311475409836065e-08, "loss": 10.1717, "step": 17500 }, { "epoch": 287.0, "eval_loss": 11.231226921081543, "eval_runtime": 1.542, "eval_samples_per_second": 79.12, "eval_steps_per_second": 10.376, "step": 17507 }, { "epoch": 288.0, "eval_loss": 11.380009651184082, "eval_runtime": 1.5602, "eval_samples_per_second": 78.195, "eval_steps_per_second": 10.255, "step": 17568 }, { "epoch": 289.0, "eval_loss": 11.13857364654541, "eval_runtime": 1.5491, "eval_samples_per_second": 78.754, "eval_steps_per_second": 10.328, "step": 17629 }, { "epoch": 290.0, "eval_loss": 11.172418594360352, "eval_runtime": 1.5472, "eval_samples_per_second": 78.854, "eval_steps_per_second": 10.341, "step": 17690 }, { "epoch": 291.0, "eval_loss": 11.162772178649902, "eval_runtime": 1.534, "eval_samples_per_second": 79.529, "eval_steps_per_second": 10.43, "step": 17751 }, { "epoch": 292.0, "eval_loss": 11.122556686401367, "eval_runtime": 1.5424, "eval_samples_per_second": 79.097, "eval_steps_per_second": 10.373, "step": 17812 }, { "epoch": 293.0, "eval_loss": 11.19551944732666, "eval_runtime": 1.5328, "eval_samples_per_second": 79.592, "eval_steps_per_second": 10.438, "step": 17873 }, { "epoch": 294.0, "eval_loss": 11.138795852661133, "eval_runtime": 1.537, "eval_samples_per_second": 79.376, "eval_steps_per_second": 10.41, "step": 17934 }, { "epoch": 295.0, "eval_loss": 11.087359428405762, "eval_runtime": 1.5338, "eval_samples_per_second": 79.543, "eval_steps_per_second": 10.432, "step": 17995 }, { "epoch": 295.08, "learning_rate": 2.0491803278688523e-08, "loss": 10.1806, "step": 18000 }, { "epoch": 296.0, "eval_loss": 11.081265449523926, "eval_runtime": 1.5381, "eval_samples_per_second": 79.319, "eval_steps_per_second": 10.403, "step": 18056 }, { "epoch": 297.0, "eval_loss": 11.147530555725098, "eval_runtime": 1.5414, "eval_samples_per_second": 79.151, "eval_steps_per_second": 10.381, "step": 18117 }, { "epoch": 298.0, "eval_loss": 11.167841911315918, "eval_runtime": 1.5414, "eval_samples_per_second": 79.146, "eval_steps_per_second": 10.38, "step": 18178 }, { "epoch": 299.0, "eval_loss": 11.244978904724121, "eval_runtime": 1.5322, "eval_samples_per_second": 79.623, "eval_steps_per_second": 10.442, "step": 18239 }, { "epoch": 300.0, "eval_loss": 11.193556785583496, "eval_runtime": 1.5334, "eval_samples_per_second": 79.563, "eval_steps_per_second": 10.434, "step": 18300 }, { "epoch": 301.0, "eval_loss": 11.102090835571289, "eval_runtime": 1.5319, "eval_samples_per_second": 79.642, "eval_steps_per_second": 10.445, "step": 18361 }, { "epoch": 302.0, "eval_loss": 11.187400817871094, "eval_runtime": 1.5378, "eval_samples_per_second": 79.332, "eval_steps_per_second": 10.404, "step": 18422 }, { "epoch": 303.0, "eval_loss": 11.171931266784668, "eval_runtime": 1.5351, "eval_samples_per_second": 79.473, "eval_steps_per_second": 10.423, "step": 18483 }, { "epoch": 303.28, "learning_rate": 1.967213114754098e-08, "loss": 10.1683, "step": 18500 }, { "epoch": 304.0, "eval_loss": 11.155352592468262, "eval_runtime": 1.5403, "eval_samples_per_second": 79.204, "eval_steps_per_second": 10.387, "step": 18544 }, { "epoch": 305.0, "eval_loss": 11.0771484375, "eval_runtime": 1.5429, "eval_samples_per_second": 79.074, "eval_steps_per_second": 10.37, "step": 18605 }, { "epoch": 306.0, "eval_loss": 11.067580223083496, "eval_runtime": 1.5407, "eval_samples_per_second": 79.183, "eval_steps_per_second": 10.385, "step": 18666 }, { "epoch": 307.0, "eval_loss": 11.128029823303223, "eval_runtime": 1.5321, "eval_samples_per_second": 79.63, "eval_steps_per_second": 10.443, "step": 18727 }, { "epoch": 308.0, "eval_loss": 11.023638725280762, "eval_runtime": 1.535, "eval_samples_per_second": 79.477, "eval_steps_per_second": 10.423, "step": 18788 }, { "epoch": 309.0, "eval_loss": 11.14404582977295, "eval_runtime": 1.5352, "eval_samples_per_second": 79.47, "eval_steps_per_second": 10.422, "step": 18849 }, { "epoch": 310.0, "eval_loss": 11.184294700622559, "eval_runtime": 1.5344, "eval_samples_per_second": 79.508, "eval_steps_per_second": 10.427, "step": 18910 }, { "epoch": 311.0, "eval_loss": 11.04742431640625, "eval_runtime": 1.5383, "eval_samples_per_second": 79.309, "eval_steps_per_second": 10.401, "step": 18971 }, { "epoch": 311.47, "learning_rate": 1.885245901639344e-08, "loss": 10.1437, "step": 19000 }, { "epoch": 312.0, "eval_loss": 11.039079666137695, "eval_runtime": 1.5399, "eval_samples_per_second": 79.224, "eval_steps_per_second": 10.39, "step": 19032 }, { "epoch": 313.0, "eval_loss": 10.914813995361328, "eval_runtime": 1.5428, "eval_samples_per_second": 79.078, "eval_steps_per_second": 10.371, "step": 19093 }, { "epoch": 314.0, "eval_loss": 11.057476997375488, "eval_runtime": 1.5481, "eval_samples_per_second": 78.806, "eval_steps_per_second": 10.335, "step": 19154 }, { "epoch": 315.0, "eval_loss": 11.195489883422852, "eval_runtime": 1.536, "eval_samples_per_second": 79.425, "eval_steps_per_second": 10.416, "step": 19215 }, { "epoch": 316.0, "eval_loss": 11.005314826965332, "eval_runtime": 1.5327, "eval_samples_per_second": 79.597, "eval_steps_per_second": 10.439, "step": 19276 }, { "epoch": 317.0, "eval_loss": 11.080973625183105, "eval_runtime": 1.5413, "eval_samples_per_second": 79.154, "eval_steps_per_second": 10.381, "step": 19337 }, { "epoch": 318.0, "eval_loss": 11.13598346710205, "eval_runtime": 1.5363, "eval_samples_per_second": 79.411, "eval_steps_per_second": 10.415, "step": 19398 }, { "epoch": 319.0, "eval_loss": 11.229124069213867, "eval_runtime": 1.5344, "eval_samples_per_second": 79.512, "eval_steps_per_second": 10.428, "step": 19459 }, { "epoch": 319.67, "learning_rate": 1.80327868852459e-08, "loss": 10.1539, "step": 19500 }, { "epoch": 320.0, "eval_loss": 11.023887634277344, "eval_runtime": 1.5371, "eval_samples_per_second": 79.369, "eval_steps_per_second": 10.409, "step": 19520 }, { "epoch": 321.0, "eval_loss": 11.121563911437988, "eval_runtime": 1.5411, "eval_samples_per_second": 79.162, "eval_steps_per_second": 10.382, "step": 19581 }, { "epoch": 322.0, "eval_loss": 11.251620292663574, "eval_runtime": 1.545, "eval_samples_per_second": 78.965, "eval_steps_per_second": 10.356, "step": 19642 }, { "epoch": 323.0, "eval_loss": 10.975918769836426, "eval_runtime": 1.5345, "eval_samples_per_second": 79.505, "eval_steps_per_second": 10.427, "step": 19703 }, { "epoch": 324.0, "eval_loss": 11.039804458618164, "eval_runtime": 1.5356, "eval_samples_per_second": 79.446, "eval_steps_per_second": 10.419, "step": 19764 }, { "epoch": 325.0, "eval_loss": 11.043091773986816, "eval_runtime": 1.5325, "eval_samples_per_second": 79.607, "eval_steps_per_second": 10.44, "step": 19825 }, { "epoch": 326.0, "eval_loss": 10.915124893188477, "eval_runtime": 1.5324, "eval_samples_per_second": 79.612, "eval_steps_per_second": 10.441, "step": 19886 }, { "epoch": 327.0, "eval_loss": 11.090548515319824, "eval_runtime": 1.5322, "eval_samples_per_second": 79.626, "eval_steps_per_second": 10.443, "step": 19947 }, { "epoch": 327.87, "learning_rate": 1.721311475409836e-08, "loss": 10.1432, "step": 20000 }, { "epoch": 328.0, "eval_loss": 11.009906768798828, "eval_runtime": 1.539, "eval_samples_per_second": 79.274, "eval_steps_per_second": 10.397, "step": 20008 }, { "epoch": 329.0, "eval_loss": 11.089301109313965, "eval_runtime": 1.5522, "eval_samples_per_second": 78.6, "eval_steps_per_second": 10.308, "step": 20069 }, { "epoch": 330.0, "eval_loss": 11.134428024291992, "eval_runtime": 1.5358, "eval_samples_per_second": 79.437, "eval_steps_per_second": 10.418, "step": 20130 }, { "epoch": 331.0, "eval_loss": 11.068151473999023, "eval_runtime": 1.5446, "eval_samples_per_second": 78.987, "eval_steps_per_second": 10.359, "step": 20191 }, { "epoch": 332.0, "eval_loss": 10.955802917480469, "eval_runtime": 1.5328, "eval_samples_per_second": 79.593, "eval_steps_per_second": 10.438, "step": 20252 }, { "epoch": 333.0, "eval_loss": 11.066947937011719, "eval_runtime": 1.5339, "eval_samples_per_second": 79.537, "eval_steps_per_second": 10.431, "step": 20313 }, { "epoch": 334.0, "eval_loss": 11.055612564086914, "eval_runtime": 1.5381, "eval_samples_per_second": 79.321, "eval_steps_per_second": 10.403, "step": 20374 }, { "epoch": 335.0, "eval_loss": 11.20965576171875, "eval_runtime": 1.5314, "eval_samples_per_second": 79.666, "eval_steps_per_second": 10.448, "step": 20435 }, { "epoch": 336.0, "eval_loss": 11.019977569580078, "eval_runtime": 1.5335, "eval_samples_per_second": 79.554, "eval_steps_per_second": 10.433, "step": 20496 }, { "epoch": 336.07, "learning_rate": 1.6393442622950816e-08, "loss": 10.1343, "step": 20500 }, { "epoch": 337.0, "eval_loss": 10.968345642089844, "eval_runtime": 1.5381, "eval_samples_per_second": 79.32, "eval_steps_per_second": 10.403, "step": 20557 }, { "epoch": 338.0, "eval_loss": 10.98238754272461, "eval_runtime": 1.5536, "eval_samples_per_second": 78.526, "eval_steps_per_second": 10.299, "step": 20618 }, { "epoch": 339.0, "eval_loss": 11.156255722045898, "eval_runtime": 1.536, "eval_samples_per_second": 79.425, "eval_steps_per_second": 10.416, "step": 20679 }, { "epoch": 340.0, "eval_loss": 11.148921012878418, "eval_runtime": 1.5358, "eval_samples_per_second": 79.438, "eval_steps_per_second": 10.418, "step": 20740 }, { "epoch": 341.0, "eval_loss": 11.138899803161621, "eval_runtime": 1.531, "eval_samples_per_second": 79.686, "eval_steps_per_second": 10.451, "step": 20801 }, { "epoch": 342.0, "eval_loss": 11.112798690795898, "eval_runtime": 1.5319, "eval_samples_per_second": 79.638, "eval_steps_per_second": 10.444, "step": 20862 }, { "epoch": 343.0, "eval_loss": 11.043689727783203, "eval_runtime": 1.5362, "eval_samples_per_second": 79.418, "eval_steps_per_second": 10.416, "step": 20923 }, { "epoch": 344.0, "eval_loss": 11.100526809692383, "eval_runtime": 1.5335, "eval_samples_per_second": 79.556, "eval_steps_per_second": 10.434, "step": 20984 }, { "epoch": 344.26, "learning_rate": 1.5573770491803278e-08, "loss": 10.143, "step": 21000 }, { "epoch": 345.0, "eval_loss": 11.16964054107666, "eval_runtime": 1.5442, "eval_samples_per_second": 79.003, "eval_steps_per_second": 10.361, "step": 21045 }, { "epoch": 346.0, "eval_loss": 11.135641098022461, "eval_runtime": 1.5566, "eval_samples_per_second": 78.375, "eval_steps_per_second": 10.279, "step": 21106 }, { "epoch": 347.0, "eval_loss": 11.079752922058105, "eval_runtime": 1.5382, "eval_samples_per_second": 79.311, "eval_steps_per_second": 10.401, "step": 21167 }, { "epoch": 348.0, "eval_loss": 10.918296813964844, "eval_runtime": 1.5332, "eval_samples_per_second": 79.574, "eval_steps_per_second": 10.436, "step": 21228 }, { "epoch": 349.0, "eval_loss": 11.08788776397705, "eval_runtime": 1.5381, "eval_samples_per_second": 79.317, "eval_steps_per_second": 10.402, "step": 21289 }, { "epoch": 350.0, "eval_loss": 10.965096473693848, "eval_runtime": 1.5313, "eval_samples_per_second": 79.67, "eval_steps_per_second": 10.449, "step": 21350 }, { "epoch": 351.0, "eval_loss": 11.072400093078613, "eval_runtime": 1.5343, "eval_samples_per_second": 79.515, "eval_steps_per_second": 10.428, "step": 21411 }, { "epoch": 352.0, "eval_loss": 11.0264253616333, "eval_runtime": 1.5325, "eval_samples_per_second": 79.608, "eval_steps_per_second": 10.44, "step": 21472 }, { "epoch": 352.46, "learning_rate": 1.4754098360655737e-08, "loss": 10.1456, "step": 21500 }, { "epoch": 353.0, "eval_loss": 11.1398344039917, "eval_runtime": 1.5366, "eval_samples_per_second": 79.396, "eval_steps_per_second": 10.413, "step": 21533 }, { "epoch": 354.0, "eval_loss": 11.249741554260254, "eval_runtime": 1.5389, "eval_samples_per_second": 79.278, "eval_steps_per_second": 10.397, "step": 21594 }, { "epoch": 355.0, "eval_loss": 10.889848709106445, "eval_runtime": 1.5359, "eval_samples_per_second": 79.432, "eval_steps_per_second": 10.417, "step": 21655 }, { "epoch": 356.0, "eval_loss": 10.963099479675293, "eval_runtime": 1.5354, "eval_samples_per_second": 79.457, "eval_steps_per_second": 10.421, "step": 21716 }, { "epoch": 357.0, "eval_loss": 11.073355674743652, "eval_runtime": 1.5372, "eval_samples_per_second": 79.365, "eval_steps_per_second": 10.408, "step": 21777 }, { "epoch": 358.0, "eval_loss": 11.122593879699707, "eval_runtime": 1.5324, "eval_samples_per_second": 79.616, "eval_steps_per_second": 10.441, "step": 21838 }, { "epoch": 359.0, "eval_loss": 11.168622016906738, "eval_runtime": 1.5326, "eval_samples_per_second": 79.602, "eval_steps_per_second": 10.44, "step": 21899 }, { "epoch": 360.0, "eval_loss": 11.031410217285156, "eval_runtime": 1.5309, "eval_samples_per_second": 79.694, "eval_steps_per_second": 10.452, "step": 21960 }, { "epoch": 360.65, "learning_rate": 1.3934426229508195e-08, "loss": 10.1345, "step": 22000 }, { "epoch": 361.0, "eval_loss": 11.09403133392334, "eval_runtime": 1.5394, "eval_samples_per_second": 79.251, "eval_steps_per_second": 10.394, "step": 22021 }, { "epoch": 362.0, "eval_loss": 10.922209739685059, "eval_runtime": 1.5403, "eval_samples_per_second": 79.205, "eval_steps_per_second": 10.388, "step": 22082 }, { "epoch": 363.0, "eval_loss": 11.103551864624023, "eval_runtime": 1.5393, "eval_samples_per_second": 79.254, "eval_steps_per_second": 10.394, "step": 22143 }, { "epoch": 364.0, "eval_loss": 11.19053840637207, "eval_runtime": 1.5388, "eval_samples_per_second": 79.281, "eval_steps_per_second": 10.398, "step": 22204 }, { "epoch": 365.0, "eval_loss": 10.974089622497559, "eval_runtime": 1.531, "eval_samples_per_second": 79.689, "eval_steps_per_second": 10.451, "step": 22265 }, { "epoch": 366.0, "eval_loss": 10.909160614013672, "eval_runtime": 1.5312, "eval_samples_per_second": 79.674, "eval_steps_per_second": 10.449, "step": 22326 }, { "epoch": 367.0, "eval_loss": 11.05643367767334, "eval_runtime": 1.5337, "eval_samples_per_second": 79.544, "eval_steps_per_second": 10.432, "step": 22387 }, { "epoch": 368.0, "eval_loss": 11.053413391113281, "eval_runtime": 1.5382, "eval_samples_per_second": 79.313, "eval_steps_per_second": 10.402, "step": 22448 }, { "epoch": 368.85, "learning_rate": 1.3114754098360655e-08, "loss": 10.1354, "step": 22500 }, { "epoch": 369.0, "eval_loss": 11.030744552612305, "eval_runtime": 1.551, "eval_samples_per_second": 78.66, "eval_steps_per_second": 10.316, "step": 22509 }, { "epoch": 370.0, "eval_loss": 11.14685344696045, "eval_runtime": 1.5382, "eval_samples_per_second": 79.313, "eval_steps_per_second": 10.402, "step": 22570 }, { "epoch": 371.0, "eval_loss": 11.056023597717285, "eval_runtime": 1.5383, "eval_samples_per_second": 79.307, "eval_steps_per_second": 10.401, "step": 22631 }, { "epoch": 372.0, "eval_loss": 11.023992538452148, "eval_runtime": 1.5355, "eval_samples_per_second": 79.454, "eval_steps_per_second": 10.42, "step": 22692 }, { "epoch": 373.0, "eval_loss": 10.986889839172363, "eval_runtime": 1.5328, "eval_samples_per_second": 79.593, "eval_steps_per_second": 10.438, "step": 22753 }, { "epoch": 374.0, "eval_loss": 11.000397682189941, "eval_runtime": 1.5337, "eval_samples_per_second": 79.547, "eval_steps_per_second": 10.432, "step": 22814 }, { "epoch": 375.0, "eval_loss": 11.137325286865234, "eval_runtime": 1.5314, "eval_samples_per_second": 79.665, "eval_steps_per_second": 10.448, "step": 22875 }, { "epoch": 376.0, "eval_loss": 11.095502853393555, "eval_runtime": 1.5326, "eval_samples_per_second": 79.601, "eval_steps_per_second": 10.439, "step": 22936 }, { "epoch": 377.0, "eval_loss": 11.05420207977295, "eval_runtime": 1.5314, "eval_samples_per_second": 79.667, "eval_steps_per_second": 10.448, "step": 22997 }, { "epoch": 377.05, "learning_rate": 1.2295081967213115e-08, "loss": 10.1382, "step": 23000 }, { "epoch": 378.0, "eval_loss": 10.981268882751465, "eval_runtime": 1.542, "eval_samples_per_second": 79.12, "eval_steps_per_second": 10.376, "step": 23058 }, { "epoch": 379.0, "eval_loss": 10.987357139587402, "eval_runtime": 1.547, "eval_samples_per_second": 78.861, "eval_steps_per_second": 10.342, "step": 23119 }, { "epoch": 380.0, "eval_loss": 10.973642349243164, "eval_runtime": 1.5368, "eval_samples_per_second": 79.386, "eval_steps_per_second": 10.411, "step": 23180 }, { "epoch": 381.0, "eval_loss": 11.129460334777832, "eval_runtime": 1.5333, "eval_samples_per_second": 79.565, "eval_steps_per_second": 10.435, "step": 23241 }, { "epoch": 382.0, "eval_loss": 10.87239933013916, "eval_runtime": 1.5313, "eval_samples_per_second": 79.672, "eval_steps_per_second": 10.449, "step": 23302 }, { "epoch": 383.0, "eval_loss": 10.936705589294434, "eval_runtime": 1.5332, "eval_samples_per_second": 79.573, "eval_steps_per_second": 10.436, "step": 23363 }, { "epoch": 384.0, "eval_loss": 11.051558494567871, "eval_runtime": 1.5374, "eval_samples_per_second": 79.353, "eval_steps_per_second": 10.407, "step": 23424 }, { "epoch": 385.0, "eval_loss": 11.027456283569336, "eval_runtime": 1.5332, "eval_samples_per_second": 79.575, "eval_steps_per_second": 10.436, "step": 23485 }, { "epoch": 385.25, "learning_rate": 1.1475409836065573e-08, "loss": 10.1246, "step": 23500 }, { "epoch": 386.0, "eval_loss": 11.018380165100098, "eval_runtime": 1.5356, "eval_samples_per_second": 79.45, "eval_steps_per_second": 10.42, "step": 23546 }, { "epoch": 387.0, "eval_loss": 11.057013511657715, "eval_runtime": 1.5451, "eval_samples_per_second": 78.957, "eval_steps_per_second": 10.355, "step": 23607 }, { "epoch": 388.0, "eval_loss": 11.024608612060547, "eval_runtime": 1.5372, "eval_samples_per_second": 79.364, "eval_steps_per_second": 10.408, "step": 23668 }, { "epoch": 389.0, "eval_loss": 11.013128280639648, "eval_runtime": 1.5353, "eval_samples_per_second": 79.466, "eval_steps_per_second": 10.422, "step": 23729 }, { "epoch": 390.0, "eval_loss": 11.016827583312988, "eval_runtime": 1.534, "eval_samples_per_second": 79.53, "eval_steps_per_second": 10.43, "step": 23790 }, { "epoch": 391.0, "eval_loss": 11.08166217803955, "eval_runtime": 1.5336, "eval_samples_per_second": 79.551, "eval_steps_per_second": 10.433, "step": 23851 }, { "epoch": 392.0, "eval_loss": 10.894896507263184, "eval_runtime": 1.5311, "eval_samples_per_second": 79.681, "eval_steps_per_second": 10.45, "step": 23912 }, { "epoch": 393.0, "eval_loss": 10.769810676574707, "eval_runtime": 1.5322, "eval_samples_per_second": 79.625, "eval_steps_per_second": 10.443, "step": 23973 }, { "epoch": 393.44, "learning_rate": 1.0655737704918032e-08, "loss": 10.1173, "step": 24000 }, { "epoch": 394.0, "eval_loss": 11.004133224487305, "eval_runtime": 1.545, "eval_samples_per_second": 78.965, "eval_steps_per_second": 10.356, "step": 24034 }, { "epoch": 395.0, "eval_loss": 10.925704956054688, "eval_runtime": 1.542, "eval_samples_per_second": 79.12, "eval_steps_per_second": 10.376, "step": 24095 }, { "epoch": 396.0, "eval_loss": 10.9295015335083, "eval_runtime": 1.5407, "eval_samples_per_second": 79.187, "eval_steps_per_second": 10.385, "step": 24156 }, { "epoch": 397.0, "eval_loss": 10.947636604309082, "eval_runtime": 1.5336, "eval_samples_per_second": 79.552, "eval_steps_per_second": 10.433, "step": 24217 }, { "epoch": 398.0, "eval_loss": 11.058280944824219, "eval_runtime": 1.5311, "eval_samples_per_second": 79.682, "eval_steps_per_second": 10.45, "step": 24278 }, { "epoch": 399.0, "eval_loss": 11.00063419342041, "eval_runtime": 1.5321, "eval_samples_per_second": 79.627, "eval_steps_per_second": 10.443, "step": 24339 }, { "epoch": 400.0, "eval_loss": 10.971447944641113, "eval_runtime": 1.5316, "eval_samples_per_second": 79.656, "eval_steps_per_second": 10.447, "step": 24400 }, { "epoch": 401.0, "eval_loss": 11.0480318069458, "eval_runtime": 1.5351, "eval_samples_per_second": 79.474, "eval_steps_per_second": 10.423, "step": 24461 }, { "epoch": 401.64, "learning_rate": 9.83606557377049e-09, "loss": 10.1253, "step": 24500 }, { "epoch": 402.0, "eval_loss": 11.021344184875488, "eval_runtime": 1.5405, "eval_samples_per_second": 79.197, "eval_steps_per_second": 10.386, "step": 24522 }, { "epoch": 403.0, "eval_loss": 10.96359920501709, "eval_runtime": 1.5391, "eval_samples_per_second": 79.267, "eval_steps_per_second": 10.396, "step": 24583 }, { "epoch": 404.0, "eval_loss": 10.988608360290527, "eval_runtime": 1.5469, "eval_samples_per_second": 78.87, "eval_steps_per_second": 10.344, "step": 24644 }, { "epoch": 405.0, "eval_loss": 11.066388130187988, "eval_runtime": 1.5313, "eval_samples_per_second": 79.672, "eval_steps_per_second": 10.449, "step": 24705 }, { "epoch": 406.0, "eval_loss": 11.046152114868164, "eval_runtime": 1.5351, "eval_samples_per_second": 79.475, "eval_steps_per_second": 10.423, "step": 24766 }, { "epoch": 407.0, "eval_loss": 11.012222290039062, "eval_runtime": 1.5324, "eval_samples_per_second": 79.611, "eval_steps_per_second": 10.441, "step": 24827 }, { "epoch": 408.0, "eval_loss": 10.857176780700684, "eval_runtime": 1.5313, "eval_samples_per_second": 79.673, "eval_steps_per_second": 10.449, "step": 24888 }, { "epoch": 409.0, "eval_loss": 11.138158798217773, "eval_runtime": 1.5365, "eval_samples_per_second": 79.399, "eval_steps_per_second": 10.413, "step": 24949 }, { "epoch": 409.83, "learning_rate": 9.01639344262295e-09, "loss": 10.1386, "step": 25000 }, { "epoch": 410.0, "eval_loss": 11.070024490356445, "eval_runtime": 1.5371, "eval_samples_per_second": 79.369, "eval_steps_per_second": 10.409, "step": 25010 }, { "epoch": 411.0, "eval_loss": 10.96763801574707, "eval_runtime": 1.5368, "eval_samples_per_second": 79.384, "eval_steps_per_second": 10.411, "step": 25071 }, { "epoch": 412.0, "eval_loss": 11.1865234375, "eval_runtime": 1.5408, "eval_samples_per_second": 79.181, "eval_steps_per_second": 10.384, "step": 25132 }, { "epoch": 413.0, "eval_loss": 11.078502655029297, "eval_runtime": 1.5486, "eval_samples_per_second": 78.78, "eval_steps_per_second": 10.332, "step": 25193 }, { "epoch": 414.0, "eval_loss": 11.028984069824219, "eval_runtime": 1.5323, "eval_samples_per_second": 79.619, "eval_steps_per_second": 10.442, "step": 25254 }, { "epoch": 415.0, "eval_loss": 11.138273239135742, "eval_runtime": 1.5322, "eval_samples_per_second": 79.622, "eval_steps_per_second": 10.442, "step": 25315 }, { "epoch": 416.0, "eval_loss": 11.113880157470703, "eval_runtime": 1.5312, "eval_samples_per_second": 79.678, "eval_steps_per_second": 10.45, "step": 25376 }, { "epoch": 417.0, "eval_loss": 11.018509864807129, "eval_runtime": 1.5361, "eval_samples_per_second": 79.42, "eval_steps_per_second": 10.416, "step": 25437 }, { "epoch": 418.0, "eval_loss": 11.018741607666016, "eval_runtime": 1.5314, "eval_samples_per_second": 79.664, "eval_steps_per_second": 10.448, "step": 25498 }, { "epoch": 418.03, "learning_rate": 8.196721311475408e-09, "loss": 10.1491, "step": 25500 }, { "epoch": 419.0, "eval_loss": 11.089320182800293, "eval_runtime": 1.5384, "eval_samples_per_second": 79.303, "eval_steps_per_second": 10.4, "step": 25559 }, { "epoch": 420.0, "eval_loss": 11.034805297851562, "eval_runtime": 1.5427, "eval_samples_per_second": 79.083, "eval_steps_per_second": 10.372, "step": 25620 }, { "epoch": 421.0, "eval_loss": 10.993184089660645, "eval_runtime": 1.538, "eval_samples_per_second": 79.323, "eval_steps_per_second": 10.403, "step": 25681 }, { "epoch": 422.0, "eval_loss": 11.076547622680664, "eval_runtime": 1.5334, "eval_samples_per_second": 79.562, "eval_steps_per_second": 10.434, "step": 25742 }, { "epoch": 423.0, "eval_loss": 11.048792839050293, "eval_runtime": 1.5319, "eval_samples_per_second": 79.638, "eval_steps_per_second": 10.444, "step": 25803 }, { "epoch": 424.0, "eval_loss": 11.024137496948242, "eval_runtime": 1.5337, "eval_samples_per_second": 79.548, "eval_steps_per_second": 10.433, "step": 25864 }, { "epoch": 425.0, "eval_loss": 11.069534301757812, "eval_runtime": 1.532, "eval_samples_per_second": 79.632, "eval_steps_per_second": 10.444, "step": 25925 }, { "epoch": 426.0, "eval_loss": 10.898648262023926, "eval_runtime": 1.5315, "eval_samples_per_second": 79.662, "eval_steps_per_second": 10.448, "step": 25986 }, { "epoch": 426.23, "learning_rate": 7.377049180327869e-09, "loss": 10.1184, "step": 26000 }, { "epoch": 427.0, "eval_loss": 10.843330383300781, "eval_runtime": 1.5388, "eval_samples_per_second": 79.284, "eval_steps_per_second": 10.398, "step": 26047 }, { "epoch": 428.0, "eval_loss": 10.847617149353027, "eval_runtime": 1.5395, "eval_samples_per_second": 79.244, "eval_steps_per_second": 10.393, "step": 26108 }, { "epoch": 429.0, "eval_loss": 10.974745750427246, "eval_runtime": 1.5392, "eval_samples_per_second": 79.259, "eval_steps_per_second": 10.395, "step": 26169 }, { "epoch": 430.0, "eval_loss": 10.925885200500488, "eval_runtime": 1.5313, "eval_samples_per_second": 79.672, "eval_steps_per_second": 10.449, "step": 26230 }, { "epoch": 431.0, "eval_loss": 10.864691734313965, "eval_runtime": 1.5326, "eval_samples_per_second": 79.602, "eval_steps_per_second": 10.44, "step": 26291 }, { "epoch": 432.0, "eval_loss": 11.028008460998535, "eval_runtime": 1.5318, "eval_samples_per_second": 79.645, "eval_steps_per_second": 10.445, "step": 26352 }, { "epoch": 433.0, "eval_loss": 10.958173751831055, "eval_runtime": 1.5349, "eval_samples_per_second": 79.482, "eval_steps_per_second": 10.424, "step": 26413 }, { "epoch": 434.0, "eval_loss": 10.98095989227295, "eval_runtime": 1.5339, "eval_samples_per_second": 79.536, "eval_steps_per_second": 10.431, "step": 26474 }, { "epoch": 434.43, "learning_rate": 6.5573770491803275e-09, "loss": 10.1396, "step": 26500 }, { "epoch": 435.0, "eval_loss": 11.049097061157227, "eval_runtime": 1.5478, "eval_samples_per_second": 78.819, "eval_steps_per_second": 10.337, "step": 26535 }, { "epoch": 436.0, "eval_loss": 11.069966316223145, "eval_runtime": 1.5391, "eval_samples_per_second": 79.265, "eval_steps_per_second": 10.395, "step": 26596 }, { "epoch": 437.0, "eval_loss": 10.987845420837402, "eval_runtime": 1.5481, "eval_samples_per_second": 78.804, "eval_steps_per_second": 10.335, "step": 26657 }, { "epoch": 438.0, "eval_loss": 10.939976692199707, "eval_runtime": 1.5375, "eval_samples_per_second": 79.351, "eval_steps_per_second": 10.407, "step": 26718 }, { "epoch": 439.0, "eval_loss": 10.868154525756836, "eval_runtime": 1.5314, "eval_samples_per_second": 79.664, "eval_steps_per_second": 10.448, "step": 26779 }, { "epoch": 440.0, "eval_loss": 10.966716766357422, "eval_runtime": 1.539, "eval_samples_per_second": 79.27, "eval_steps_per_second": 10.396, "step": 26840 }, { "epoch": 441.0, "eval_loss": 11.011680603027344, "eval_runtime": 1.5312, "eval_samples_per_second": 79.674, "eval_steps_per_second": 10.449, "step": 26901 }, { "epoch": 442.0, "eval_loss": 11.037373542785645, "eval_runtime": 1.5311, "eval_samples_per_second": 79.68, "eval_steps_per_second": 10.45, "step": 26962 }, { "epoch": 442.62, "learning_rate": 5.737704918032786e-09, "loss": 10.1337, "step": 27000 }, { "epoch": 443.0, "eval_loss": 11.133712768554688, "eval_runtime": 1.5458, "eval_samples_per_second": 78.922, "eval_steps_per_second": 10.35, "step": 27023 }, { "epoch": 444.0, "eval_loss": 10.941494941711426, "eval_runtime": 1.5386, "eval_samples_per_second": 79.294, "eval_steps_per_second": 10.399, "step": 27084 }, { "epoch": 445.0, "eval_loss": 11.017427444458008, "eval_runtime": 1.5391, "eval_samples_per_second": 79.266, "eval_steps_per_second": 10.396, "step": 27145 }, { "epoch": 446.0, "eval_loss": 11.02389144897461, "eval_runtime": 1.5319, "eval_samples_per_second": 79.641, "eval_steps_per_second": 10.445, "step": 27206 }, { "epoch": 447.0, "eval_loss": 10.897884368896484, "eval_runtime": 1.534, "eval_samples_per_second": 79.53, "eval_steps_per_second": 10.43, "step": 27267 }, { "epoch": 448.0, "eval_loss": 10.921698570251465, "eval_runtime": 1.5334, "eval_samples_per_second": 79.564, "eval_steps_per_second": 10.435, "step": 27328 }, { "epoch": 449.0, "eval_loss": 10.892621994018555, "eval_runtime": 1.5335, "eval_samples_per_second": 79.557, "eval_steps_per_second": 10.434, "step": 27389 }, { "epoch": 450.0, "eval_loss": 11.121941566467285, "eval_runtime": 1.5314, "eval_samples_per_second": 79.668, "eval_steps_per_second": 10.448, "step": 27450 }, { "epoch": 450.82, "learning_rate": 4.918032786885245e-09, "loss": 10.1168, "step": 27500 }, { "epoch": 451.0, "eval_loss": 10.893115043640137, "eval_runtime": 1.5424, "eval_samples_per_second": 79.097, "eval_steps_per_second": 10.373, "step": 27511 }, { "epoch": 452.0, "eval_loss": 11.011245727539062, "eval_runtime": 1.5529, "eval_samples_per_second": 78.564, "eval_steps_per_second": 10.303, "step": 27572 }, { "epoch": 453.0, "eval_loss": 10.982295036315918, "eval_runtime": 1.5411, "eval_samples_per_second": 79.163, "eval_steps_per_second": 10.382, "step": 27633 }, { "epoch": 454.0, "eval_loss": 11.109077453613281, "eval_runtime": 1.5438, "eval_samples_per_second": 79.028, "eval_steps_per_second": 10.364, "step": 27694 }, { "epoch": 455.0, "eval_loss": 10.869407653808594, "eval_runtime": 1.5332, "eval_samples_per_second": 79.575, "eval_steps_per_second": 10.436, "step": 27755 }, { "epoch": 456.0, "eval_loss": 10.962514877319336, "eval_runtime": 1.5316, "eval_samples_per_second": 79.656, "eval_steps_per_second": 10.447, "step": 27816 }, { "epoch": 457.0, "eval_loss": 10.855345726013184, "eval_runtime": 1.5368, "eval_samples_per_second": 79.386, "eval_steps_per_second": 10.411, "step": 27877 }, { "epoch": 458.0, "eval_loss": 10.98884391784668, "eval_runtime": 1.533, "eval_samples_per_second": 79.582, "eval_steps_per_second": 10.437, "step": 27938 }, { "epoch": 459.0, "eval_loss": 10.929617881774902, "eval_runtime": 1.5316, "eval_samples_per_second": 79.658, "eval_steps_per_second": 10.447, "step": 27999 }, { "epoch": 459.02, "learning_rate": 4.098360655737704e-09, "loss": 10.1229, "step": 28000 }, { "epoch": 460.0, "eval_loss": 10.88948917388916, "eval_runtime": 1.5368, "eval_samples_per_second": 79.387, "eval_steps_per_second": 10.411, "step": 28060 }, { "epoch": 461.0, "eval_loss": 10.980300903320312, "eval_runtime": 1.5401, "eval_samples_per_second": 79.214, "eval_steps_per_second": 10.389, "step": 28121 }, { "epoch": 462.0, "eval_loss": 11.014444351196289, "eval_runtime": 1.5678, "eval_samples_per_second": 77.815, "eval_steps_per_second": 10.205, "step": 28182 }, { "epoch": 463.0, "eval_loss": 11.025659561157227, "eval_runtime": 1.5329, "eval_samples_per_second": 79.586, "eval_steps_per_second": 10.438, "step": 28243 }, { "epoch": 464.0, "eval_loss": 10.914053916931152, "eval_runtime": 1.5336, "eval_samples_per_second": 79.55, "eval_steps_per_second": 10.433, "step": 28304 }, { "epoch": 465.0, "eval_loss": 11.134551048278809, "eval_runtime": 1.5313, "eval_samples_per_second": 79.669, "eval_steps_per_second": 10.448, "step": 28365 }, { "epoch": 466.0, "eval_loss": 11.025362968444824, "eval_runtime": 1.533, "eval_samples_per_second": 79.58, "eval_steps_per_second": 10.437, "step": 28426 }, { "epoch": 467.0, "eval_loss": 11.038411140441895, "eval_runtime": 1.5314, "eval_samples_per_second": 79.664, "eval_steps_per_second": 10.448, "step": 28487 }, { "epoch": 467.21, "learning_rate": 3.2786885245901638e-09, "loss": 10.1179, "step": 28500 }, { "epoch": 468.0, "eval_loss": 10.849050521850586, "eval_runtime": 1.5609, "eval_samples_per_second": 78.159, "eval_steps_per_second": 10.25, "step": 28548 }, { "epoch": 469.0, "eval_loss": 11.046910285949707, "eval_runtime": 1.5417, "eval_samples_per_second": 79.132, "eval_steps_per_second": 10.378, "step": 28609 }, { "epoch": 470.0, "eval_loss": 10.967819213867188, "eval_runtime": 1.539, "eval_samples_per_second": 79.271, "eval_steps_per_second": 10.396, "step": 28670 }, { "epoch": 471.0, "eval_loss": 10.890240669250488, "eval_runtime": 1.531, "eval_samples_per_second": 79.687, "eval_steps_per_second": 10.451, "step": 28731 }, { "epoch": 472.0, "eval_loss": 10.964876174926758, "eval_runtime": 1.5349, "eval_samples_per_second": 79.483, "eval_steps_per_second": 10.424, "step": 28792 }, { "epoch": 473.0, "eval_loss": 10.925169944763184, "eval_runtime": 1.5315, "eval_samples_per_second": 79.659, "eval_steps_per_second": 10.447, "step": 28853 }, { "epoch": 474.0, "eval_loss": 11.113059997558594, "eval_runtime": 1.5314, "eval_samples_per_second": 79.666, "eval_steps_per_second": 10.448, "step": 28914 }, { "epoch": 475.0, "eval_loss": 11.026731491088867, "eval_runtime": 1.5312, "eval_samples_per_second": 79.677, "eval_steps_per_second": 10.449, "step": 28975 }, { "epoch": 475.41, "learning_rate": 2.4590163934426226e-09, "loss": 10.1189, "step": 29000 }, { "epoch": 476.0, "eval_loss": 10.842777252197266, "eval_runtime": 1.5457, "eval_samples_per_second": 78.928, "eval_steps_per_second": 10.351, "step": 29036 }, { "epoch": 477.0, "eval_loss": 11.031394958496094, "eval_runtime": 1.5433, "eval_samples_per_second": 79.053, "eval_steps_per_second": 10.368, "step": 29097 }, { "epoch": 478.0, "eval_loss": 11.093620300292969, "eval_runtime": 1.5431, "eval_samples_per_second": 79.061, "eval_steps_per_second": 10.369, "step": 29158 }, { "epoch": 479.0, "eval_loss": 10.99679946899414, "eval_runtime": 1.5405, "eval_samples_per_second": 79.196, "eval_steps_per_second": 10.386, "step": 29219 }, { "epoch": 480.0, "eval_loss": 10.872136116027832, "eval_runtime": 1.5317, "eval_samples_per_second": 79.648, "eval_steps_per_second": 10.446, "step": 29280 }, { "epoch": 481.0, "eval_loss": 11.01526165008545, "eval_runtime": 1.5338, "eval_samples_per_second": 79.539, "eval_steps_per_second": 10.431, "step": 29341 }, { "epoch": 482.0, "eval_loss": 11.176105499267578, "eval_runtime": 1.531, "eval_samples_per_second": 79.688, "eval_steps_per_second": 10.451, "step": 29402 }, { "epoch": 483.0, "eval_loss": 10.983979225158691, "eval_runtime": 1.5364, "eval_samples_per_second": 79.408, "eval_steps_per_second": 10.414, "step": 29463 }, { "epoch": 483.61, "learning_rate": 1.6393442622950819e-09, "loss": 10.1153, "step": 29500 }, { "epoch": 484.0, "eval_loss": 10.964775085449219, "eval_runtime": 1.5399, "eval_samples_per_second": 79.225, "eval_steps_per_second": 10.39, "step": 29524 }, { "epoch": 485.0, "eval_loss": 11.114033699035645, "eval_runtime": 1.5364, "eval_samples_per_second": 79.409, "eval_steps_per_second": 10.414, "step": 29585 }, { "epoch": 486.0, "eval_loss": 11.021162986755371, "eval_runtime": 1.5483, "eval_samples_per_second": 78.798, "eval_steps_per_second": 10.334, "step": 29646 }, { "epoch": 487.0, "eval_loss": 10.919731140136719, "eval_runtime": 1.5334, "eval_samples_per_second": 79.563, "eval_steps_per_second": 10.434, "step": 29707 }, { "epoch": 488.0, "eval_loss": 10.979778289794922, "eval_runtime": 1.5341, "eval_samples_per_second": 79.523, "eval_steps_per_second": 10.429, "step": 29768 }, { "epoch": 489.0, "eval_loss": 10.904738426208496, "eval_runtime": 1.5327, "eval_samples_per_second": 79.596, "eval_steps_per_second": 10.439, "step": 29829 }, { "epoch": 490.0, "eval_loss": 11.014572143554688, "eval_runtime": 1.5384, "eval_samples_per_second": 79.303, "eval_steps_per_second": 10.4, "step": 29890 }, { "epoch": 491.0, "eval_loss": 11.057549476623535, "eval_runtime": 1.5352, "eval_samples_per_second": 79.469, "eval_steps_per_second": 10.422, "step": 29951 }, { "epoch": 491.8, "learning_rate": 8.196721311475409e-10, "loss": 10.1141, "step": 30000 }, { "epoch": 492.0, "eval_loss": 11.040027618408203, "eval_runtime": 1.5409, "eval_samples_per_second": 79.175, "eval_steps_per_second": 10.384, "step": 30012 }, { "epoch": 493.0, "eval_loss": 11.089771270751953, "eval_runtime": 1.5412, "eval_samples_per_second": 79.159, "eval_steps_per_second": 10.382, "step": 30073 }, { "epoch": 494.0, "eval_loss": 10.99104118347168, "eval_runtime": 1.5377, "eval_samples_per_second": 79.339, "eval_steps_per_second": 10.405, "step": 30134 }, { "epoch": 495.0, "eval_loss": 11.057866096496582, "eval_runtime": 1.5447, "eval_samples_per_second": 78.98, "eval_steps_per_second": 10.358, "step": 30195 }, { "epoch": 496.0, "eval_loss": 10.857988357543945, "eval_runtime": 1.533, "eval_samples_per_second": 79.581, "eval_steps_per_second": 10.437, "step": 30256 }, { "epoch": 497.0, "eval_loss": 10.944987297058105, "eval_runtime": 1.5401, "eval_samples_per_second": 79.216, "eval_steps_per_second": 10.389, "step": 30317 }, { "epoch": 498.0, "eval_loss": 11.052291870117188, "eval_runtime": 1.5333, "eval_samples_per_second": 79.568, "eval_steps_per_second": 10.435, "step": 30378 }, { "epoch": 499.0, "eval_loss": 11.122845649719238, "eval_runtime": 1.5327, "eval_samples_per_second": 79.598, "eval_steps_per_second": 10.439, "step": 30439 }, { "epoch": 500.0, "learning_rate": 0.0, "loss": 10.1176, "step": 30500 }, { "epoch": 500.0, "eval_loss": 11.049210548400879, "eval_runtime": 1.5625, "eval_samples_per_second": 78.08, "eval_steps_per_second": 10.24, "step": 30500 }, { "epoch": 500.0, "step": 30500, "total_flos": 1.6136552440728576e+16, "train_loss": 11.67972986039959, "train_runtime": 24510.5451, "train_samples_per_second": 9.975, "train_steps_per_second": 1.244 } ], "max_steps": 30500, "num_train_epochs": 500, "total_flos": 1.6136552440728576e+16, "trial_name": null, "trial_params": null }