|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 500.0, |
|
"global_step": 31000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 39.53297424316406, |
|
"eval_runtime": 1.5249, |
|
"eval_samples_per_second": 79.35, |
|
"eval_steps_per_second": 10.493, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 39.283233642578125, |
|
"eval_runtime": 1.5209, |
|
"eval_samples_per_second": 79.56, |
|
"eval_steps_per_second": 10.52, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 39.727474212646484, |
|
"eval_runtime": 1.5199, |
|
"eval_samples_per_second": 79.609, |
|
"eval_steps_per_second": 10.527, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 38.73893356323242, |
|
"eval_runtime": 1.5313, |
|
"eval_samples_per_second": 79.02, |
|
"eval_steps_per_second": 10.449, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 38.108787536621094, |
|
"eval_runtime": 1.5538, |
|
"eval_samples_per_second": 77.875, |
|
"eval_steps_per_second": 10.298, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 37.65414047241211, |
|
"eval_runtime": 1.5218, |
|
"eval_samples_per_second": 79.511, |
|
"eval_steps_per_second": 10.514, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 36.28599548339844, |
|
"eval_runtime": 1.5198, |
|
"eval_samples_per_second": 79.613, |
|
"eval_steps_per_second": 10.527, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 36.02519607543945, |
|
"eval_runtime": 1.5195, |
|
"eval_samples_per_second": 79.632, |
|
"eval_steps_per_second": 10.53, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 4.9193548387096775e-08, |
|
"loss": 40.88, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 35.2044563293457, |
|
"eval_runtime": 1.5286, |
|
"eval_samples_per_second": 79.159, |
|
"eval_steps_per_second": 10.467, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 34.13139724731445, |
|
"eval_runtime": 1.5277, |
|
"eval_samples_per_second": 79.203, |
|
"eval_steps_per_second": 10.473, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 32.90681838989258, |
|
"eval_runtime": 1.5353, |
|
"eval_samples_per_second": 78.812, |
|
"eval_steps_per_second": 10.421, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 32.091732025146484, |
|
"eval_runtime": 1.5361, |
|
"eval_samples_per_second": 78.769, |
|
"eval_steps_per_second": 10.416, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 31.341379165649414, |
|
"eval_runtime": 1.5306, |
|
"eval_samples_per_second": 79.053, |
|
"eval_steps_per_second": 10.453, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 30.152177810668945, |
|
"eval_runtime": 1.5194, |
|
"eval_samples_per_second": 79.635, |
|
"eval_steps_per_second": 10.53, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 29.086437225341797, |
|
"eval_runtime": 1.522, |
|
"eval_samples_per_second": 79.502, |
|
"eval_steps_per_second": 10.513, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 27.76565933227539, |
|
"eval_runtime": 1.5196, |
|
"eval_samples_per_second": 79.627, |
|
"eval_steps_per_second": 10.529, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 4.8387096774193546e-08, |
|
"loss": 32.3449, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 26.679487228393555, |
|
"eval_runtime": 1.5328, |
|
"eval_samples_per_second": 78.942, |
|
"eval_steps_per_second": 10.439, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 25.430702209472656, |
|
"eval_runtime": 1.5318, |
|
"eval_samples_per_second": 78.994, |
|
"eval_steps_per_second": 10.445, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 24.457077026367188, |
|
"eval_runtime": 1.5339, |
|
"eval_samples_per_second": 78.882, |
|
"eval_steps_per_second": 10.431, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 24.03400993347168, |
|
"eval_runtime": 1.5361, |
|
"eval_samples_per_second": 78.771, |
|
"eval_steps_per_second": 10.416, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 22.688884735107422, |
|
"eval_runtime": 1.5349, |
|
"eval_samples_per_second": 78.834, |
|
"eval_steps_per_second": 10.424, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 21.914871215820312, |
|
"eval_runtime": 1.5268, |
|
"eval_samples_per_second": 79.251, |
|
"eval_steps_per_second": 10.48, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 20.947105407714844, |
|
"eval_runtime": 1.52, |
|
"eval_samples_per_second": 79.607, |
|
"eval_steps_per_second": 10.527, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 20.71515464782715, |
|
"eval_runtime": 1.5285, |
|
"eval_samples_per_second": 79.164, |
|
"eval_steps_per_second": 10.468, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 4.7580645161290323e-08, |
|
"loss": 24.114, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 19.957895278930664, |
|
"eval_runtime": 1.5457, |
|
"eval_samples_per_second": 78.279, |
|
"eval_steps_per_second": 10.351, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 19.421499252319336, |
|
"eval_runtime": 1.5734, |
|
"eval_samples_per_second": 76.905, |
|
"eval_steps_per_second": 10.169, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 18.702451705932617, |
|
"eval_runtime": 1.5338, |
|
"eval_samples_per_second": 78.89, |
|
"eval_steps_per_second": 10.432, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 18.053003311157227, |
|
"eval_runtime": 1.5299, |
|
"eval_samples_per_second": 79.092, |
|
"eval_steps_per_second": 10.458, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 17.729013442993164, |
|
"eval_runtime": 1.5369, |
|
"eval_samples_per_second": 78.732, |
|
"eval_steps_per_second": 10.411, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 17.282075881958008, |
|
"eval_runtime": 1.5244, |
|
"eval_samples_per_second": 79.377, |
|
"eval_steps_per_second": 10.496, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 16.866586685180664, |
|
"eval_runtime": 1.5191, |
|
"eval_samples_per_second": 79.654, |
|
"eval_steps_per_second": 10.533, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 16.420963287353516, |
|
"eval_runtime": 1.5281, |
|
"eval_samples_per_second": 79.185, |
|
"eval_steps_per_second": 10.471, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 4.6774193548387094e-08, |
|
"loss": 18.4015, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 16.233917236328125, |
|
"eval_runtime": 1.5685, |
|
"eval_samples_per_second": 77.146, |
|
"eval_steps_per_second": 10.201, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 15.798954010009766, |
|
"eval_runtime": 1.5347, |
|
"eval_samples_per_second": 78.844, |
|
"eval_steps_per_second": 10.426, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 15.77268123626709, |
|
"eval_runtime": 1.5269, |
|
"eval_samples_per_second": 79.247, |
|
"eval_steps_per_second": 10.479, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 15.234251976013184, |
|
"eval_runtime": 1.5303, |
|
"eval_samples_per_second": 79.07, |
|
"eval_steps_per_second": 10.456, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 15.142735481262207, |
|
"eval_runtime": 1.5404, |
|
"eval_samples_per_second": 78.549, |
|
"eval_steps_per_second": 10.387, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 14.873641014099121, |
|
"eval_runtime": 1.5199, |
|
"eval_samples_per_second": 79.611, |
|
"eval_steps_per_second": 10.527, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 14.704235076904297, |
|
"eval_runtime": 1.5212, |
|
"eval_samples_per_second": 79.542, |
|
"eval_steps_per_second": 10.518, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 14.539189338684082, |
|
"eval_runtime": 1.5248, |
|
"eval_samples_per_second": 79.353, |
|
"eval_steps_per_second": 10.493, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 4.5967741935483865e-08, |
|
"loss": 14.946, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 14.535553932189941, |
|
"eval_runtime": 1.5276, |
|
"eval_samples_per_second": 79.21, |
|
"eval_steps_per_second": 10.474, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 14.1603364944458, |
|
"eval_runtime": 1.5303, |
|
"eval_samples_per_second": 79.072, |
|
"eval_steps_per_second": 10.456, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 14.145159721374512, |
|
"eval_runtime": 1.5312, |
|
"eval_samples_per_second": 79.024, |
|
"eval_steps_per_second": 10.449, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 14.198166847229004, |
|
"eval_runtime": 1.5287, |
|
"eval_samples_per_second": 79.152, |
|
"eval_steps_per_second": 10.466, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 13.976308822631836, |
|
"eval_runtime": 1.5411, |
|
"eval_samples_per_second": 78.517, |
|
"eval_steps_per_second": 10.382, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 13.921603202819824, |
|
"eval_runtime": 1.5243, |
|
"eval_samples_per_second": 79.382, |
|
"eval_steps_per_second": 10.497, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 13.767107963562012, |
|
"eval_runtime": 1.5235, |
|
"eval_samples_per_second": 79.42, |
|
"eval_steps_per_second": 10.502, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 13.634342193603516, |
|
"eval_runtime": 1.5202, |
|
"eval_samples_per_second": 79.597, |
|
"eval_steps_per_second": 10.525, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 4.516129032258064e-08, |
|
"loss": 13.1518, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 13.62414264678955, |
|
"eval_runtime": 1.5563, |
|
"eval_samples_per_second": 77.747, |
|
"eval_steps_per_second": 10.281, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 13.416315078735352, |
|
"eval_runtime": 1.5393, |
|
"eval_samples_per_second": 78.607, |
|
"eval_steps_per_second": 10.394, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 13.440138816833496, |
|
"eval_runtime": 1.5302, |
|
"eval_samples_per_second": 79.075, |
|
"eval_steps_per_second": 10.456, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 13.560946464538574, |
|
"eval_runtime": 1.5457, |
|
"eval_samples_per_second": 78.282, |
|
"eval_steps_per_second": 10.351, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 13.285831451416016, |
|
"eval_runtime": 1.5455, |
|
"eval_samples_per_second": 78.294, |
|
"eval_steps_per_second": 10.353, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 13.15819263458252, |
|
"eval_runtime": 1.5204, |
|
"eval_samples_per_second": 79.586, |
|
"eval_steps_per_second": 10.524, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 13.245644569396973, |
|
"eval_runtime": 1.5209, |
|
"eval_samples_per_second": 79.557, |
|
"eval_steps_per_second": 10.52, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 13.234107971191406, |
|
"eval_runtime": 1.5187, |
|
"eval_samples_per_second": 79.673, |
|
"eval_steps_per_second": 10.535, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"learning_rate": 4.435483870967742e-08, |
|
"loss": 12.1359, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 13.015525817871094, |
|
"eval_runtime": 1.5313, |
|
"eval_samples_per_second": 79.02, |
|
"eval_steps_per_second": 10.449, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 13.029335021972656, |
|
"eval_runtime": 1.5381, |
|
"eval_samples_per_second": 78.667, |
|
"eval_steps_per_second": 10.402, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 12.971243858337402, |
|
"eval_runtime": 1.5525, |
|
"eval_samples_per_second": 77.94, |
|
"eval_steps_per_second": 10.306, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 12.94456672668457, |
|
"eval_runtime": 1.5335, |
|
"eval_samples_per_second": 78.903, |
|
"eval_steps_per_second": 10.433, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 12.830053329467773, |
|
"eval_runtime": 1.5349, |
|
"eval_samples_per_second": 78.831, |
|
"eval_steps_per_second": 10.424, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 12.75737190246582, |
|
"eval_runtime": 1.5321, |
|
"eval_samples_per_second": 78.978, |
|
"eval_steps_per_second": 10.443, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 12.696720123291016, |
|
"eval_runtime": 1.5195, |
|
"eval_samples_per_second": 79.632, |
|
"eval_steps_per_second": 10.53, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 12.653301239013672, |
|
"eval_runtime": 1.5219, |
|
"eval_samples_per_second": 79.506, |
|
"eval_steps_per_second": 10.513, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"learning_rate": 4.354838709677419e-08, |
|
"loss": 11.5584, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 12.554749488830566, |
|
"eval_runtime": 1.5433, |
|
"eval_samples_per_second": 78.403, |
|
"eval_steps_per_second": 10.367, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 12.586564064025879, |
|
"eval_runtime": 1.5317, |
|
"eval_samples_per_second": 78.997, |
|
"eval_steps_per_second": 10.446, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 12.756257057189941, |
|
"eval_runtime": 1.5267, |
|
"eval_samples_per_second": 79.256, |
|
"eval_steps_per_second": 10.48, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 12.648726463317871, |
|
"eval_runtime": 1.5304, |
|
"eval_samples_per_second": 79.064, |
|
"eval_steps_per_second": 10.455, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 12.601462364196777, |
|
"eval_runtime": 1.5297, |
|
"eval_samples_per_second": 79.102, |
|
"eval_steps_per_second": 10.46, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 12.509653091430664, |
|
"eval_runtime": 1.5291, |
|
"eval_samples_per_second": 79.132, |
|
"eval_steps_per_second": 10.464, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 12.53116226196289, |
|
"eval_runtime": 1.5214, |
|
"eval_samples_per_second": 79.53, |
|
"eval_steps_per_second": 10.516, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 12.467875480651855, |
|
"eval_runtime": 1.5274, |
|
"eval_samples_per_second": 79.221, |
|
"eval_steps_per_second": 10.475, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 72.58, |
|
"learning_rate": 4.274193548387097e-08, |
|
"loss": 11.224, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 12.480475425720215, |
|
"eval_runtime": 1.5409, |
|
"eval_samples_per_second": 78.527, |
|
"eval_steps_per_second": 10.384, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 12.44310474395752, |
|
"eval_runtime": 1.5365, |
|
"eval_samples_per_second": 78.752, |
|
"eval_steps_per_second": 10.413, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 12.302526473999023, |
|
"eval_runtime": 1.5302, |
|
"eval_samples_per_second": 79.074, |
|
"eval_steps_per_second": 10.456, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 12.154280662536621, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.19, |
|
"eval_steps_per_second": 10.471, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 12.35551929473877, |
|
"eval_runtime": 1.5273, |
|
"eval_samples_per_second": 79.225, |
|
"eval_steps_per_second": 10.476, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 12.264195442199707, |
|
"eval_runtime": 1.5333, |
|
"eval_samples_per_second": 78.913, |
|
"eval_steps_per_second": 10.435, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 12.355469703674316, |
|
"eval_runtime": 1.5317, |
|
"eval_samples_per_second": 78.997, |
|
"eval_steps_per_second": 10.446, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 12.291287422180176, |
|
"eval_runtime": 1.5256, |
|
"eval_samples_per_second": 79.315, |
|
"eval_steps_per_second": 10.488, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 80.65, |
|
"learning_rate": 4.193548387096774e-08, |
|
"loss": 10.9897, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 12.248346328735352, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 79.197, |
|
"eval_steps_per_second": 10.472, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 12.181654930114746, |
|
"eval_runtime": 1.5283, |
|
"eval_samples_per_second": 79.175, |
|
"eval_steps_per_second": 10.469, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 12.359329223632812, |
|
"eval_runtime": 1.5268, |
|
"eval_samples_per_second": 79.251, |
|
"eval_steps_per_second": 10.479, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 12.071714401245117, |
|
"eval_runtime": 1.5407, |
|
"eval_samples_per_second": 78.535, |
|
"eval_steps_per_second": 10.385, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 12.247845649719238, |
|
"eval_runtime": 1.5293, |
|
"eval_samples_per_second": 79.122, |
|
"eval_steps_per_second": 10.462, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 12.065207481384277, |
|
"eval_runtime": 1.5401, |
|
"eval_samples_per_second": 78.568, |
|
"eval_steps_per_second": 10.389, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 12.124922752380371, |
|
"eval_runtime": 1.5205, |
|
"eval_samples_per_second": 79.578, |
|
"eval_steps_per_second": 10.523, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 12.090045928955078, |
|
"eval_runtime": 1.5308, |
|
"eval_samples_per_second": 79.043, |
|
"eval_steps_per_second": 10.452, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 88.71, |
|
"learning_rate": 4.1129032258064516e-08, |
|
"loss": 10.8156, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 11.984047889709473, |
|
"eval_runtime": 1.5305, |
|
"eval_samples_per_second": 79.061, |
|
"eval_steps_per_second": 10.454, |
|
"step": 5518 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 12.120992660522461, |
|
"eval_runtime": 1.53, |
|
"eval_samples_per_second": 79.086, |
|
"eval_steps_per_second": 10.458, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 12.18094253540039, |
|
"eval_runtime": 1.5426, |
|
"eval_samples_per_second": 78.437, |
|
"eval_steps_per_second": 10.372, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 12.021756172180176, |
|
"eval_runtime": 1.5405, |
|
"eval_samples_per_second": 78.545, |
|
"eval_steps_per_second": 10.386, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 12.143865585327148, |
|
"eval_runtime": 1.5315, |
|
"eval_samples_per_second": 79.009, |
|
"eval_steps_per_second": 10.447, |
|
"step": 5766 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 11.918268203735352, |
|
"eval_runtime": 1.5285, |
|
"eval_samples_per_second": 79.163, |
|
"eval_steps_per_second": 10.468, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 12.067418098449707, |
|
"eval_runtime": 1.5204, |
|
"eval_samples_per_second": 79.584, |
|
"eval_steps_per_second": 10.524, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 12.009296417236328, |
|
"eval_runtime": 1.52, |
|
"eval_samples_per_second": 79.604, |
|
"eval_steps_per_second": 10.526, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 96.77, |
|
"learning_rate": 4.032258064516129e-08, |
|
"loss": 10.6906, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 12.008870124816895, |
|
"eval_runtime": 1.5307, |
|
"eval_samples_per_second": 79.05, |
|
"eval_steps_per_second": 10.453, |
|
"step": 6014 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 11.956018447875977, |
|
"eval_runtime": 1.5309, |
|
"eval_samples_per_second": 79.041, |
|
"eval_steps_per_second": 10.452, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 11.767322540283203, |
|
"eval_runtime": 1.5351, |
|
"eval_samples_per_second": 78.825, |
|
"eval_steps_per_second": 10.423, |
|
"step": 6138 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 12.034104347229004, |
|
"eval_runtime": 1.5321, |
|
"eval_samples_per_second": 78.979, |
|
"eval_steps_per_second": 10.443, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_loss": 12.03824234008789, |
|
"eval_runtime": 1.5289, |
|
"eval_samples_per_second": 79.14, |
|
"eval_steps_per_second": 10.465, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_loss": 11.87580680847168, |
|
"eval_runtime": 1.5338, |
|
"eval_samples_per_second": 78.891, |
|
"eval_steps_per_second": 10.432, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_loss": 11.93928337097168, |
|
"eval_runtime": 1.5258, |
|
"eval_samples_per_second": 79.301, |
|
"eval_steps_per_second": 10.486, |
|
"step": 6386 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_loss": 11.878522872924805, |
|
"eval_runtime": 1.5273, |
|
"eval_samples_per_second": 79.226, |
|
"eval_steps_per_second": 10.476, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 104.84, |
|
"learning_rate": 3.951612903225806e-08, |
|
"loss": 10.5767, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_loss": 11.81560230255127, |
|
"eval_runtime": 1.5317, |
|
"eval_samples_per_second": 78.998, |
|
"eval_steps_per_second": 10.446, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_loss": 11.750551223754883, |
|
"eval_runtime": 1.5404, |
|
"eval_samples_per_second": 78.549, |
|
"eval_steps_per_second": 10.387, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_loss": 11.90053653717041, |
|
"eval_runtime": 1.5286, |
|
"eval_samples_per_second": 79.156, |
|
"eval_steps_per_second": 10.467, |
|
"step": 6634 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_loss": 11.880046844482422, |
|
"eval_runtime": 1.5341, |
|
"eval_samples_per_second": 78.874, |
|
"eval_steps_per_second": 10.43, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_loss": 11.9345703125, |
|
"eval_runtime": 1.5463, |
|
"eval_samples_per_second": 78.251, |
|
"eval_steps_per_second": 10.347, |
|
"step": 6758 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_loss": 11.833414077758789, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.166, |
|
"eval_steps_per_second": 10.468, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_loss": 11.616347312927246, |
|
"eval_runtime": 1.5189, |
|
"eval_samples_per_second": 79.663, |
|
"eval_steps_per_second": 10.534, |
|
"step": 6882 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_loss": 11.829395294189453, |
|
"eval_runtime": 1.5288, |
|
"eval_samples_per_second": 79.144, |
|
"eval_steps_per_second": 10.465, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 112.9, |
|
"learning_rate": 3.8709677419354835e-08, |
|
"loss": 10.5013, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_loss": 11.786639213562012, |
|
"eval_runtime": 1.5596, |
|
"eval_samples_per_second": 77.582, |
|
"eval_steps_per_second": 10.259, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_loss": 11.724261283874512, |
|
"eval_runtime": 1.5228, |
|
"eval_samples_per_second": 79.46, |
|
"eval_steps_per_second": 10.507, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_loss": 11.56452751159668, |
|
"eval_runtime": 1.5342, |
|
"eval_samples_per_second": 78.868, |
|
"eval_steps_per_second": 10.429, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_loss": 11.7550687789917, |
|
"eval_runtime": 1.5293, |
|
"eval_samples_per_second": 79.122, |
|
"eval_steps_per_second": 10.462, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_loss": 11.625850677490234, |
|
"eval_runtime": 1.5487, |
|
"eval_samples_per_second": 78.129, |
|
"eval_steps_per_second": 10.331, |
|
"step": 7254 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_loss": 11.723539352416992, |
|
"eval_runtime": 1.5327, |
|
"eval_samples_per_second": 78.946, |
|
"eval_steps_per_second": 10.439, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_loss": 11.699857711791992, |
|
"eval_runtime": 1.5315, |
|
"eval_samples_per_second": 79.006, |
|
"eval_steps_per_second": 10.447, |
|
"step": 7378 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 11.485078811645508, |
|
"eval_runtime": 1.5203, |
|
"eval_samples_per_second": 79.591, |
|
"eval_steps_per_second": 10.524, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 120.97, |
|
"learning_rate": 3.790322580645161e-08, |
|
"loss": 10.4493, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_loss": 11.495078086853027, |
|
"eval_runtime": 1.5271, |
|
"eval_samples_per_second": 79.234, |
|
"eval_steps_per_second": 10.477, |
|
"step": 7502 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_loss": 11.521384239196777, |
|
"eval_runtime": 1.536, |
|
"eval_samples_per_second": 78.776, |
|
"eval_steps_per_second": 10.417, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_loss": 11.739558219909668, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.186, |
|
"eval_steps_per_second": 10.471, |
|
"step": 7626 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_loss": 11.696319580078125, |
|
"eval_runtime": 1.5494, |
|
"eval_samples_per_second": 78.094, |
|
"eval_steps_per_second": 10.327, |
|
"step": 7688 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 11.530503273010254, |
|
"eval_runtime": 1.5529, |
|
"eval_samples_per_second": 77.921, |
|
"eval_steps_per_second": 10.304, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_loss": 11.771147727966309, |
|
"eval_runtime": 1.5301, |
|
"eval_samples_per_second": 79.081, |
|
"eval_steps_per_second": 10.457, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_loss": 11.80306339263916, |
|
"eval_runtime": 1.5204, |
|
"eval_samples_per_second": 79.583, |
|
"eval_steps_per_second": 10.523, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_loss": 11.595075607299805, |
|
"eval_runtime": 1.5262, |
|
"eval_samples_per_second": 79.282, |
|
"eval_steps_per_second": 10.484, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_loss": 11.47359848022461, |
|
"eval_runtime": 1.5189, |
|
"eval_samples_per_second": 79.663, |
|
"eval_steps_per_second": 10.534, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 129.03, |
|
"learning_rate": 3.7096774193548384e-08, |
|
"loss": 10.3973, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_loss": 11.615568161010742, |
|
"eval_runtime": 1.5324, |
|
"eval_samples_per_second": 78.963, |
|
"eval_steps_per_second": 10.441, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_loss": 11.544720649719238, |
|
"eval_runtime": 1.5295, |
|
"eval_samples_per_second": 79.11, |
|
"eval_steps_per_second": 10.461, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_loss": 11.356687545776367, |
|
"eval_runtime": 1.5299, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 10.458, |
|
"step": 8184 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_loss": 11.483879089355469, |
|
"eval_runtime": 1.5293, |
|
"eval_samples_per_second": 79.121, |
|
"eval_steps_per_second": 10.462, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_loss": 11.441631317138672, |
|
"eval_runtime": 1.5309, |
|
"eval_samples_per_second": 79.039, |
|
"eval_steps_per_second": 10.451, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_loss": 11.539510726928711, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.191, |
|
"eval_steps_per_second": 10.471, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_loss": 11.437644004821777, |
|
"eval_runtime": 1.5258, |
|
"eval_samples_per_second": 79.301, |
|
"eval_steps_per_second": 10.486, |
|
"step": 8432 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_loss": 11.378108978271484, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.619, |
|
"eval_steps_per_second": 10.528, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 137.1, |
|
"learning_rate": 3.629032258064516e-08, |
|
"loss": 10.3569, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_loss": 11.417948722839355, |
|
"eval_runtime": 1.5364, |
|
"eval_samples_per_second": 78.758, |
|
"eval_steps_per_second": 10.414, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_loss": 11.674735069274902, |
|
"eval_runtime": 1.5314, |
|
"eval_samples_per_second": 79.012, |
|
"eval_steps_per_second": 10.448, |
|
"step": 8618 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_loss": 11.611095428466797, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 79.231, |
|
"eval_steps_per_second": 10.477, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_loss": 11.50928020477295, |
|
"eval_runtime": 1.5304, |
|
"eval_samples_per_second": 79.062, |
|
"eval_steps_per_second": 10.455, |
|
"step": 8742 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_loss": 11.475595474243164, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 79.201, |
|
"eval_steps_per_second": 10.473, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_loss": 11.34980583190918, |
|
"eval_runtime": 1.5199, |
|
"eval_samples_per_second": 79.612, |
|
"eval_steps_per_second": 10.527, |
|
"step": 8866 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_loss": 11.562585830688477, |
|
"eval_runtime": 1.5213, |
|
"eval_samples_per_second": 79.535, |
|
"eval_steps_per_second": 10.517, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_loss": 11.347404479980469, |
|
"eval_runtime": 1.5204, |
|
"eval_samples_per_second": 79.584, |
|
"eval_steps_per_second": 10.524, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 145.16, |
|
"learning_rate": 3.548387096774194e-08, |
|
"loss": 10.3565, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_loss": 11.377586364746094, |
|
"eval_runtime": 1.5334, |
|
"eval_samples_per_second": 78.91, |
|
"eval_steps_per_second": 10.434, |
|
"step": 9052 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_loss": 11.362828254699707, |
|
"eval_runtime": 1.5275, |
|
"eval_samples_per_second": 79.217, |
|
"eval_steps_per_second": 10.475, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_loss": 11.485925674438477, |
|
"eval_runtime": 1.5452, |
|
"eval_samples_per_second": 78.308, |
|
"eval_steps_per_second": 10.355, |
|
"step": 9176 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_loss": 11.422063827514648, |
|
"eval_runtime": 1.5281, |
|
"eval_samples_per_second": 79.184, |
|
"eval_steps_per_second": 10.471, |
|
"step": 9238 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_loss": 11.419939041137695, |
|
"eval_runtime": 1.54, |
|
"eval_samples_per_second": 78.57, |
|
"eval_steps_per_second": 10.389, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_loss": 11.570083618164062, |
|
"eval_runtime": 1.5233, |
|
"eval_samples_per_second": 79.432, |
|
"eval_steps_per_second": 10.503, |
|
"step": 9362 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_loss": 11.463680267333984, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.621, |
|
"eval_steps_per_second": 10.528, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_loss": 11.65864086151123, |
|
"eval_runtime": 1.5194, |
|
"eval_samples_per_second": 79.634, |
|
"eval_steps_per_second": 10.53, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 153.23, |
|
"learning_rate": 3.467741935483871e-08, |
|
"loss": 10.3122, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_loss": 11.487783432006836, |
|
"eval_runtime": 1.5302, |
|
"eval_samples_per_second": 79.074, |
|
"eval_steps_per_second": 10.456, |
|
"step": 9548 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_loss": 11.590331077575684, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.169, |
|
"eval_steps_per_second": 10.469, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_loss": 11.381085395812988, |
|
"eval_runtime": 1.5362, |
|
"eval_samples_per_second": 78.767, |
|
"eval_steps_per_second": 10.415, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_loss": 11.454978942871094, |
|
"eval_runtime": 1.5486, |
|
"eval_samples_per_second": 78.136, |
|
"eval_steps_per_second": 10.332, |
|
"step": 9734 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_loss": 11.569897651672363, |
|
"eval_runtime": 1.5314, |
|
"eval_samples_per_second": 79.011, |
|
"eval_steps_per_second": 10.448, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_loss": 11.515423774719238, |
|
"eval_runtime": 1.52, |
|
"eval_samples_per_second": 79.603, |
|
"eval_steps_per_second": 10.526, |
|
"step": 9858 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_loss": 11.501609802246094, |
|
"eval_runtime": 1.5198, |
|
"eval_samples_per_second": 79.617, |
|
"eval_steps_per_second": 10.528, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_loss": 11.571191787719727, |
|
"eval_runtime": 1.5313, |
|
"eval_samples_per_second": 79.018, |
|
"eval_steps_per_second": 10.449, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 161.29, |
|
"learning_rate": 3.387096774193548e-08, |
|
"loss": 10.2888, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_loss": 11.289880752563477, |
|
"eval_runtime": 1.5323, |
|
"eval_samples_per_second": 78.967, |
|
"eval_steps_per_second": 10.442, |
|
"step": 10044 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_loss": 11.250905990600586, |
|
"eval_runtime": 1.5304, |
|
"eval_samples_per_second": 79.065, |
|
"eval_steps_per_second": 10.455, |
|
"step": 10106 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_loss": 11.452507972717285, |
|
"eval_runtime": 1.5273, |
|
"eval_samples_per_second": 79.227, |
|
"eval_steps_per_second": 10.476, |
|
"step": 10168 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_loss": 11.363972663879395, |
|
"eval_runtime": 1.5279, |
|
"eval_samples_per_second": 79.192, |
|
"eval_steps_per_second": 10.472, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_loss": 11.337541580200195, |
|
"eval_runtime": 1.5301, |
|
"eval_samples_per_second": 79.077, |
|
"eval_steps_per_second": 10.457, |
|
"step": 10292 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_loss": 11.396878242492676, |
|
"eval_runtime": 1.5279, |
|
"eval_samples_per_second": 79.193, |
|
"eval_steps_per_second": 10.472, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_loss": 11.478124618530273, |
|
"eval_runtime": 1.5196, |
|
"eval_samples_per_second": 79.628, |
|
"eval_steps_per_second": 10.529, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_loss": 11.397954940795898, |
|
"eval_runtime": 1.5231, |
|
"eval_samples_per_second": 79.443, |
|
"eval_steps_per_second": 10.505, |
|
"step": 10478 |
|
}, |
|
{ |
|
"epoch": 169.35, |
|
"learning_rate": 3.306451612903226e-08, |
|
"loss": 10.2642, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_loss": 11.28774356842041, |
|
"eval_runtime": 1.5277, |
|
"eval_samples_per_second": 79.204, |
|
"eval_steps_per_second": 10.473, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_loss": 11.336891174316406, |
|
"eval_runtime": 1.5318, |
|
"eval_samples_per_second": 78.993, |
|
"eval_steps_per_second": 10.445, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_loss": 11.385379791259766, |
|
"eval_runtime": 1.5315, |
|
"eval_samples_per_second": 79.006, |
|
"eval_steps_per_second": 10.447, |
|
"step": 10664 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_loss": 11.50495433807373, |
|
"eval_runtime": 1.5331, |
|
"eval_samples_per_second": 78.924, |
|
"eval_steps_per_second": 10.436, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_loss": 11.482619285583496, |
|
"eval_runtime": 1.5436, |
|
"eval_samples_per_second": 78.386, |
|
"eval_steps_per_second": 10.365, |
|
"step": 10788 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_loss": 11.342474937438965, |
|
"eval_runtime": 1.5316, |
|
"eval_samples_per_second": 79.002, |
|
"eval_steps_per_second": 10.447, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_loss": 11.241662979125977, |
|
"eval_runtime": 1.5196, |
|
"eval_samples_per_second": 79.629, |
|
"eval_steps_per_second": 10.529, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_loss": 11.126252174377441, |
|
"eval_runtime": 1.5201, |
|
"eval_samples_per_second": 79.603, |
|
"eval_steps_per_second": 10.526, |
|
"step": 10974 |
|
}, |
|
{ |
|
"epoch": 177.42, |
|
"learning_rate": 3.225806451612903e-08, |
|
"loss": 10.2586, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_loss": 11.408693313598633, |
|
"eval_runtime": 1.5266, |
|
"eval_samples_per_second": 79.261, |
|
"eval_steps_per_second": 10.481, |
|
"step": 11036 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_loss": 11.391864776611328, |
|
"eval_runtime": 1.552, |
|
"eval_samples_per_second": 77.965, |
|
"eval_steps_per_second": 10.309, |
|
"step": 11098 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_loss": 11.251392364501953, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.165, |
|
"eval_steps_per_second": 10.468, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_loss": 11.256957054138184, |
|
"eval_runtime": 1.5287, |
|
"eval_samples_per_second": 79.153, |
|
"eval_steps_per_second": 10.467, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_loss": 11.44185733795166, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 79.197, |
|
"eval_steps_per_second": 10.472, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_loss": 11.41098690032959, |
|
"eval_runtime": 1.5598, |
|
"eval_samples_per_second": 77.572, |
|
"eval_steps_per_second": 10.257, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_loss": 11.391864776611328, |
|
"eval_runtime": 1.5217, |
|
"eval_samples_per_second": 79.518, |
|
"eval_steps_per_second": 10.515, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_loss": 11.242551803588867, |
|
"eval_runtime": 1.5268, |
|
"eval_samples_per_second": 79.25, |
|
"eval_steps_per_second": 10.479, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 185.48, |
|
"learning_rate": 3.1451612903225806e-08, |
|
"loss": 10.2479, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_loss": 11.386938095092773, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 79.228, |
|
"eval_steps_per_second": 10.476, |
|
"step": 11532 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_loss": 11.295321464538574, |
|
"eval_runtime": 1.5298, |
|
"eval_samples_per_second": 79.094, |
|
"eval_steps_per_second": 10.459, |
|
"step": 11594 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_loss": 11.33161449432373, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.166, |
|
"eval_steps_per_second": 10.468, |
|
"step": 11656 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_loss": 11.137282371520996, |
|
"eval_runtime": 1.5487, |
|
"eval_samples_per_second": 78.13, |
|
"eval_steps_per_second": 10.331, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_loss": 11.165376663208008, |
|
"eval_runtime": 1.5348, |
|
"eval_samples_per_second": 78.838, |
|
"eval_steps_per_second": 10.425, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_loss": 11.320448875427246, |
|
"eval_runtime": 1.5398, |
|
"eval_samples_per_second": 78.582, |
|
"eval_steps_per_second": 10.391, |
|
"step": 11842 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_loss": 11.312908172607422, |
|
"eval_runtime": 1.5205, |
|
"eval_samples_per_second": 79.582, |
|
"eval_steps_per_second": 10.523, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_loss": 11.271549224853516, |
|
"eval_runtime": 1.5262, |
|
"eval_samples_per_second": 79.283, |
|
"eval_steps_per_second": 10.484, |
|
"step": 11966 |
|
}, |
|
{ |
|
"epoch": 193.55, |
|
"learning_rate": 3.064516129032258e-08, |
|
"loss": 10.2172, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_loss": 11.2645845413208, |
|
"eval_runtime": 1.5507, |
|
"eval_samples_per_second": 78.027, |
|
"eval_steps_per_second": 10.318, |
|
"step": 12028 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"eval_loss": 11.335646629333496, |
|
"eval_runtime": 1.5277, |
|
"eval_samples_per_second": 79.206, |
|
"eval_steps_per_second": 10.474, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_loss": 11.301328659057617, |
|
"eval_runtime": 1.5287, |
|
"eval_samples_per_second": 79.153, |
|
"eval_steps_per_second": 10.466, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"eval_loss": 11.537822723388672, |
|
"eval_runtime": 1.5263, |
|
"eval_samples_per_second": 79.277, |
|
"eval_steps_per_second": 10.483, |
|
"step": 12214 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_loss": 11.364118576049805, |
|
"eval_runtime": 1.529, |
|
"eval_samples_per_second": 79.138, |
|
"eval_steps_per_second": 10.465, |
|
"step": 12276 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"eval_loss": 11.369778633117676, |
|
"eval_runtime": 1.5279, |
|
"eval_samples_per_second": 79.195, |
|
"eval_steps_per_second": 10.472, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 11.267717361450195, |
|
"eval_runtime": 1.5199, |
|
"eval_samples_per_second": 79.612, |
|
"eval_steps_per_second": 10.527, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"eval_loss": 11.297280311584473, |
|
"eval_runtime": 1.521, |
|
"eval_samples_per_second": 79.552, |
|
"eval_steps_per_second": 10.519, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 201.61, |
|
"learning_rate": 2.9838709677419354e-08, |
|
"loss": 10.2023, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"eval_loss": 11.140520095825195, |
|
"eval_runtime": 1.5299, |
|
"eval_samples_per_second": 79.092, |
|
"eval_steps_per_second": 10.458, |
|
"step": 12524 |
|
}, |
|
{ |
|
"epoch": 203.0, |
|
"eval_loss": 11.32856559753418, |
|
"eval_runtime": 1.5298, |
|
"eval_samples_per_second": 79.093, |
|
"eval_steps_per_second": 10.459, |
|
"step": 12586 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_loss": 11.212227821350098, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.19, |
|
"eval_steps_per_second": 10.471, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 205.0, |
|
"eval_loss": 11.232370376586914, |
|
"eval_runtime": 1.5285, |
|
"eval_samples_per_second": 79.16, |
|
"eval_steps_per_second": 10.467, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"eval_loss": 11.35122299194336, |
|
"eval_runtime": 1.5675, |
|
"eval_samples_per_second": 77.193, |
|
"eval_steps_per_second": 10.207, |
|
"step": 12772 |
|
}, |
|
{ |
|
"epoch": 207.0, |
|
"eval_loss": 11.088189125061035, |
|
"eval_runtime": 1.5298, |
|
"eval_samples_per_second": 79.096, |
|
"eval_steps_per_second": 10.459, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_loss": 11.35745620727539, |
|
"eval_runtime": 1.5205, |
|
"eval_samples_per_second": 79.581, |
|
"eval_steps_per_second": 10.523, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 209.0, |
|
"eval_loss": 11.250239372253418, |
|
"eval_runtime": 1.5204, |
|
"eval_samples_per_second": 79.584, |
|
"eval_steps_per_second": 10.523, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 209.68, |
|
"learning_rate": 2.9032258064516128e-08, |
|
"loss": 10.1926, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"eval_loss": 11.327762603759766, |
|
"eval_runtime": 1.5376, |
|
"eval_samples_per_second": 78.692, |
|
"eval_steps_per_second": 10.406, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 211.0, |
|
"eval_loss": 11.21988582611084, |
|
"eval_runtime": 1.5336, |
|
"eval_samples_per_second": 78.901, |
|
"eval_steps_per_second": 10.433, |
|
"step": 13082 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_loss": 11.256662368774414, |
|
"eval_runtime": 1.5406, |
|
"eval_samples_per_second": 78.541, |
|
"eval_steps_per_second": 10.386, |
|
"step": 13144 |
|
}, |
|
{ |
|
"epoch": 213.0, |
|
"eval_loss": 11.28549861907959, |
|
"eval_runtime": 1.5261, |
|
"eval_samples_per_second": 79.288, |
|
"eval_steps_per_second": 10.484, |
|
"step": 13206 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"eval_loss": 11.189271926879883, |
|
"eval_runtime": 1.5291, |
|
"eval_samples_per_second": 79.132, |
|
"eval_steps_per_second": 10.464, |
|
"step": 13268 |
|
}, |
|
{ |
|
"epoch": 215.0, |
|
"eval_loss": 11.260034561157227, |
|
"eval_runtime": 1.5268, |
|
"eval_samples_per_second": 79.249, |
|
"eval_steps_per_second": 10.479, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_loss": 11.284974098205566, |
|
"eval_runtime": 1.5308, |
|
"eval_samples_per_second": 79.043, |
|
"eval_steps_per_second": 10.452, |
|
"step": 13392 |
|
}, |
|
{ |
|
"epoch": 217.0, |
|
"eval_loss": 11.227448463439941, |
|
"eval_runtime": 1.5233, |
|
"eval_samples_per_second": 79.431, |
|
"eval_steps_per_second": 10.503, |
|
"step": 13454 |
|
}, |
|
{ |
|
"epoch": 217.74, |
|
"learning_rate": 2.8225806451612906e-08, |
|
"loss": 10.2075, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"eval_loss": 11.110801696777344, |
|
"eval_runtime": 1.5287, |
|
"eval_samples_per_second": 79.151, |
|
"eval_steps_per_second": 10.466, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 219.0, |
|
"eval_loss": 11.275555610656738, |
|
"eval_runtime": 1.5279, |
|
"eval_samples_per_second": 79.192, |
|
"eval_steps_per_second": 10.472, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_loss": 11.283916473388672, |
|
"eval_runtime": 1.5257, |
|
"eval_samples_per_second": 79.308, |
|
"eval_steps_per_second": 10.487, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 221.0, |
|
"eval_loss": 11.216109275817871, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.188, |
|
"eval_steps_per_second": 10.471, |
|
"step": 13702 |
|
}, |
|
{ |
|
"epoch": 222.0, |
|
"eval_loss": 11.186559677124023, |
|
"eval_runtime": 1.5313, |
|
"eval_samples_per_second": 79.017, |
|
"eval_steps_per_second": 10.448, |
|
"step": 13764 |
|
}, |
|
{ |
|
"epoch": 223.0, |
|
"eval_loss": 11.12224006652832, |
|
"eval_runtime": 1.5319, |
|
"eval_samples_per_second": 78.989, |
|
"eval_steps_per_second": 10.445, |
|
"step": 13826 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_loss": 11.264845848083496, |
|
"eval_runtime": 1.5275, |
|
"eval_samples_per_second": 79.216, |
|
"eval_steps_per_second": 10.475, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"eval_loss": 11.293610572814941, |
|
"eval_runtime": 1.5184, |
|
"eval_samples_per_second": 79.688, |
|
"eval_steps_per_second": 10.537, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 225.81, |
|
"learning_rate": 2.7419354838709673e-08, |
|
"loss": 10.1822, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"eval_loss": 11.136199951171875, |
|
"eval_runtime": 1.5304, |
|
"eval_samples_per_second": 79.063, |
|
"eval_steps_per_second": 10.455, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 227.0, |
|
"eval_loss": 11.259844779968262, |
|
"eval_runtime": 1.5312, |
|
"eval_samples_per_second": 79.022, |
|
"eval_steps_per_second": 10.449, |
|
"step": 14074 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_loss": 11.335241317749023, |
|
"eval_runtime": 1.5268, |
|
"eval_samples_per_second": 79.253, |
|
"eval_steps_per_second": 10.48, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 229.0, |
|
"eval_loss": 11.118280410766602, |
|
"eval_runtime": 1.529, |
|
"eval_samples_per_second": 79.138, |
|
"eval_steps_per_second": 10.465, |
|
"step": 14198 |
|
}, |
|
{ |
|
"epoch": 230.0, |
|
"eval_loss": 11.207478523254395, |
|
"eval_runtime": 1.5282, |
|
"eval_samples_per_second": 79.178, |
|
"eval_steps_per_second": 10.47, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 231.0, |
|
"eval_loss": 11.190764427185059, |
|
"eval_runtime": 1.531, |
|
"eval_samples_per_second": 79.032, |
|
"eval_steps_per_second": 10.451, |
|
"step": 14322 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_loss": 11.133115768432617, |
|
"eval_runtime": 1.5224, |
|
"eval_samples_per_second": 79.482, |
|
"eval_steps_per_second": 10.51, |
|
"step": 14384 |
|
}, |
|
{ |
|
"epoch": 233.0, |
|
"eval_loss": 11.092888832092285, |
|
"eval_runtime": 1.521, |
|
"eval_samples_per_second": 79.551, |
|
"eval_steps_per_second": 10.519, |
|
"step": 14446 |
|
}, |
|
{ |
|
"epoch": 233.87, |
|
"learning_rate": 2.661290322580645e-08, |
|
"loss": 10.1863, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 234.0, |
|
"eval_loss": 11.106066703796387, |
|
"eval_runtime": 1.5271, |
|
"eval_samples_per_second": 79.233, |
|
"eval_steps_per_second": 10.477, |
|
"step": 14508 |
|
}, |
|
{ |
|
"epoch": 235.0, |
|
"eval_loss": 11.054638862609863, |
|
"eval_runtime": 1.5301, |
|
"eval_samples_per_second": 79.078, |
|
"eval_steps_per_second": 10.457, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_loss": 11.176545143127441, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.189, |
|
"eval_steps_per_second": 10.471, |
|
"step": 14632 |
|
}, |
|
{ |
|
"epoch": 237.0, |
|
"eval_loss": 11.25126838684082, |
|
"eval_runtime": 1.5393, |
|
"eval_samples_per_second": 78.608, |
|
"eval_steps_per_second": 10.394, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 238.0, |
|
"eval_loss": 11.091902732849121, |
|
"eval_runtime": 1.5287, |
|
"eval_samples_per_second": 79.155, |
|
"eval_steps_per_second": 10.467, |
|
"step": 14756 |
|
}, |
|
{ |
|
"epoch": 239.0, |
|
"eval_loss": 11.2738676071167, |
|
"eval_runtime": 1.5269, |
|
"eval_samples_per_second": 79.244, |
|
"eval_steps_per_second": 10.479, |
|
"step": 14818 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_loss": 11.034156799316406, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.619, |
|
"eval_steps_per_second": 10.528, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"eval_loss": 11.236822128295898, |
|
"eval_runtime": 1.525, |
|
"eval_samples_per_second": 79.342, |
|
"eval_steps_per_second": 10.492, |
|
"step": 14942 |
|
}, |
|
{ |
|
"epoch": 241.94, |
|
"learning_rate": 2.5806451612903225e-08, |
|
"loss": 10.1724, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 242.0, |
|
"eval_loss": 11.25521469116211, |
|
"eval_runtime": 1.5321, |
|
"eval_samples_per_second": 78.976, |
|
"eval_steps_per_second": 10.443, |
|
"step": 15004 |
|
}, |
|
{ |
|
"epoch": 243.0, |
|
"eval_loss": 11.151459693908691, |
|
"eval_runtime": 1.5265, |
|
"eval_samples_per_second": 79.267, |
|
"eval_steps_per_second": 10.482, |
|
"step": 15066 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_loss": 11.049065589904785, |
|
"eval_runtime": 1.525, |
|
"eval_samples_per_second": 79.342, |
|
"eval_steps_per_second": 10.492, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 245.0, |
|
"eval_loss": 11.018394470214844, |
|
"eval_runtime": 1.5424, |
|
"eval_samples_per_second": 78.451, |
|
"eval_steps_per_second": 10.374, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 246.0, |
|
"eval_loss": 11.378674507141113, |
|
"eval_runtime": 1.5254, |
|
"eval_samples_per_second": 79.325, |
|
"eval_steps_per_second": 10.489, |
|
"step": 15252 |
|
}, |
|
{ |
|
"epoch": 247.0, |
|
"eval_loss": 11.232932090759277, |
|
"eval_runtime": 1.5294, |
|
"eval_samples_per_second": 79.117, |
|
"eval_steps_per_second": 10.462, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_loss": 11.072883605957031, |
|
"eval_runtime": 1.5274, |
|
"eval_samples_per_second": 79.221, |
|
"eval_steps_per_second": 10.476, |
|
"step": 15376 |
|
}, |
|
{ |
|
"epoch": 249.0, |
|
"eval_loss": 11.147173881530762, |
|
"eval_runtime": 1.5246, |
|
"eval_samples_per_second": 79.366, |
|
"eval_steps_per_second": 10.495, |
|
"step": 15438 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 2.5e-08, |
|
"loss": 10.1671, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 11.06186294555664, |
|
"eval_runtime": 1.5501, |
|
"eval_samples_per_second": 78.061, |
|
"eval_steps_per_second": 10.322, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"eval_loss": 11.219667434692383, |
|
"eval_runtime": 1.5267, |
|
"eval_samples_per_second": 79.257, |
|
"eval_steps_per_second": 10.48, |
|
"step": 15562 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_loss": 11.224080085754395, |
|
"eval_runtime": 1.5293, |
|
"eval_samples_per_second": 79.121, |
|
"eval_steps_per_second": 10.462, |
|
"step": 15624 |
|
}, |
|
{ |
|
"epoch": 253.0, |
|
"eval_loss": 11.310192108154297, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.187, |
|
"eval_steps_per_second": 10.471, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 254.0, |
|
"eval_loss": 11.289962768554688, |
|
"eval_runtime": 1.5252, |
|
"eval_samples_per_second": 79.335, |
|
"eval_steps_per_second": 10.491, |
|
"step": 15748 |
|
}, |
|
{ |
|
"epoch": 255.0, |
|
"eval_loss": 11.300124168395996, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 79.2, |
|
"eval_steps_per_second": 10.473, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_loss": 11.220646858215332, |
|
"eval_runtime": 1.5215, |
|
"eval_samples_per_second": 79.528, |
|
"eval_steps_per_second": 10.516, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 257.0, |
|
"eval_loss": 11.102697372436523, |
|
"eval_runtime": 1.5207, |
|
"eval_samples_per_second": 79.571, |
|
"eval_steps_per_second": 10.522, |
|
"step": 15934 |
|
}, |
|
{ |
|
"epoch": 258.0, |
|
"eval_loss": 11.306656837463379, |
|
"eval_runtime": 1.5184, |
|
"eval_samples_per_second": 79.692, |
|
"eval_steps_per_second": 10.538, |
|
"step": 15996 |
|
}, |
|
{ |
|
"epoch": 258.06, |
|
"learning_rate": 2.4193548387096773e-08, |
|
"loss": 10.1579, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"eval_loss": 11.30165958404541, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 79.201, |
|
"eval_steps_per_second": 10.473, |
|
"step": 16058 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_loss": 11.098042488098145, |
|
"eval_runtime": 1.5428, |
|
"eval_samples_per_second": 78.43, |
|
"eval_steps_per_second": 10.371, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 261.0, |
|
"eval_loss": 11.215791702270508, |
|
"eval_runtime": 1.5285, |
|
"eval_samples_per_second": 79.163, |
|
"eval_steps_per_second": 10.468, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 262.0, |
|
"eval_loss": 10.940754890441895, |
|
"eval_runtime": 1.5416, |
|
"eval_samples_per_second": 78.488, |
|
"eval_steps_per_second": 10.379, |
|
"step": 16244 |
|
}, |
|
{ |
|
"epoch": 263.0, |
|
"eval_loss": 11.108644485473633, |
|
"eval_runtime": 1.5277, |
|
"eval_samples_per_second": 79.205, |
|
"eval_steps_per_second": 10.473, |
|
"step": 16306 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_loss": 11.28579330444336, |
|
"eval_runtime": 1.5188, |
|
"eval_samples_per_second": 79.67, |
|
"eval_steps_per_second": 10.535, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 265.0, |
|
"eval_loss": 11.280501365661621, |
|
"eval_runtime": 1.518, |
|
"eval_samples_per_second": 79.708, |
|
"eval_steps_per_second": 10.54, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 266.0, |
|
"eval_loss": 11.08869457244873, |
|
"eval_runtime": 1.5206, |
|
"eval_samples_per_second": 79.573, |
|
"eval_steps_per_second": 10.522, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 266.13, |
|
"learning_rate": 2.3387096774193547e-08, |
|
"loss": 10.1552, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 267.0, |
|
"eval_loss": 11.171466827392578, |
|
"eval_runtime": 1.5404, |
|
"eval_samples_per_second": 78.554, |
|
"eval_steps_per_second": 10.387, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_loss": 11.1470365524292, |
|
"eval_runtime": 1.5289, |
|
"eval_samples_per_second": 79.141, |
|
"eval_steps_per_second": 10.465, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 269.0, |
|
"eval_loss": 11.106681823730469, |
|
"eval_runtime": 1.5263, |
|
"eval_samples_per_second": 79.276, |
|
"eval_steps_per_second": 10.483, |
|
"step": 16678 |
|
}, |
|
{ |
|
"epoch": 270.0, |
|
"eval_loss": 11.163908958435059, |
|
"eval_runtime": 1.5252, |
|
"eval_samples_per_second": 79.336, |
|
"eval_steps_per_second": 10.491, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 271.0, |
|
"eval_loss": 11.146965980529785, |
|
"eval_runtime": 1.5255, |
|
"eval_samples_per_second": 79.316, |
|
"eval_steps_per_second": 10.488, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_loss": 11.206341743469238, |
|
"eval_runtime": 1.5228, |
|
"eval_samples_per_second": 79.461, |
|
"eval_steps_per_second": 10.507, |
|
"step": 16864 |
|
}, |
|
{ |
|
"epoch": 273.0, |
|
"eval_loss": 11.16470718383789, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.62, |
|
"eval_steps_per_second": 10.528, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 274.0, |
|
"eval_loss": 11.006061553955078, |
|
"eval_runtime": 1.5209, |
|
"eval_samples_per_second": 79.559, |
|
"eval_steps_per_second": 10.52, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 274.19, |
|
"learning_rate": 2.258064516129032e-08, |
|
"loss": 10.1468, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"eval_loss": 11.382197380065918, |
|
"eval_runtime": 1.5267, |
|
"eval_samples_per_second": 79.254, |
|
"eval_steps_per_second": 10.48, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_loss": 10.946673393249512, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 79.2, |
|
"eval_steps_per_second": 10.473, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 277.0, |
|
"eval_loss": 11.2734956741333, |
|
"eval_runtime": 1.5371, |
|
"eval_samples_per_second": 78.717, |
|
"eval_steps_per_second": 10.409, |
|
"step": 17174 |
|
}, |
|
{ |
|
"epoch": 278.0, |
|
"eval_loss": 11.013189315795898, |
|
"eval_runtime": 1.5409, |
|
"eval_samples_per_second": 78.528, |
|
"eval_steps_per_second": 10.384, |
|
"step": 17236 |
|
}, |
|
{ |
|
"epoch": 279.0, |
|
"eval_loss": 11.160365104675293, |
|
"eval_runtime": 1.5281, |
|
"eval_samples_per_second": 79.181, |
|
"eval_steps_per_second": 10.47, |
|
"step": 17298 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_loss": 11.16196060180664, |
|
"eval_runtime": 1.5194, |
|
"eval_samples_per_second": 79.636, |
|
"eval_steps_per_second": 10.53, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 281.0, |
|
"eval_loss": 11.064959526062012, |
|
"eval_runtime": 1.5184, |
|
"eval_samples_per_second": 79.689, |
|
"eval_steps_per_second": 10.537, |
|
"step": 17422 |
|
}, |
|
{ |
|
"epoch": 282.0, |
|
"eval_loss": 10.991950035095215, |
|
"eval_runtime": 1.5262, |
|
"eval_samples_per_second": 79.284, |
|
"eval_steps_per_second": 10.484, |
|
"step": 17484 |
|
}, |
|
{ |
|
"epoch": 282.26, |
|
"learning_rate": 2.1774193548387095e-08, |
|
"loss": 10.1402, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 283.0, |
|
"eval_loss": 11.044986724853516, |
|
"eval_runtime": 1.5294, |
|
"eval_samples_per_second": 79.118, |
|
"eval_steps_per_second": 10.462, |
|
"step": 17546 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_loss": 10.982977867126465, |
|
"eval_runtime": 1.5353, |
|
"eval_samples_per_second": 78.811, |
|
"eval_steps_per_second": 10.421, |
|
"step": 17608 |
|
}, |
|
{ |
|
"epoch": 285.0, |
|
"eval_loss": 11.013529777526855, |
|
"eval_runtime": 1.5385, |
|
"eval_samples_per_second": 78.649, |
|
"eval_steps_per_second": 10.4, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 286.0, |
|
"eval_loss": 11.264179229736328, |
|
"eval_runtime": 1.5281, |
|
"eval_samples_per_second": 79.183, |
|
"eval_steps_per_second": 10.47, |
|
"step": 17732 |
|
}, |
|
{ |
|
"epoch": 287.0, |
|
"eval_loss": 11.11242389678955, |
|
"eval_runtime": 1.5289, |
|
"eval_samples_per_second": 79.142, |
|
"eval_steps_per_second": 10.465, |
|
"step": 17794 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_loss": 11.192839622497559, |
|
"eval_runtime": 1.5192, |
|
"eval_samples_per_second": 79.646, |
|
"eval_steps_per_second": 10.532, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 289.0, |
|
"eval_loss": 11.128388404846191, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.167, |
|
"eval_steps_per_second": 10.468, |
|
"step": 17918 |
|
}, |
|
{ |
|
"epoch": 290.0, |
|
"eval_loss": 11.208813667297363, |
|
"eval_runtime": 1.5227, |
|
"eval_samples_per_second": 79.463, |
|
"eval_steps_per_second": 10.507, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 290.32, |
|
"learning_rate": 2.096774193548387e-08, |
|
"loss": 10.147, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 291.0, |
|
"eval_loss": 11.075602531433105, |
|
"eval_runtime": 1.5304, |
|
"eval_samples_per_second": 79.062, |
|
"eval_steps_per_second": 10.454, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_loss": 11.051429748535156, |
|
"eval_runtime": 1.5297, |
|
"eval_samples_per_second": 79.103, |
|
"eval_steps_per_second": 10.46, |
|
"step": 18104 |
|
}, |
|
{ |
|
"epoch": 293.0, |
|
"eval_loss": 11.14745044708252, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.191, |
|
"eval_steps_per_second": 10.471, |
|
"step": 18166 |
|
}, |
|
{ |
|
"epoch": 294.0, |
|
"eval_loss": 11.044754981994629, |
|
"eval_runtime": 1.5371, |
|
"eval_samples_per_second": 78.718, |
|
"eval_steps_per_second": 10.409, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 295.0, |
|
"eval_loss": 11.241650581359863, |
|
"eval_runtime": 1.5413, |
|
"eval_samples_per_second": 78.505, |
|
"eval_steps_per_second": 10.381, |
|
"step": 18290 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_loss": 11.107006072998047, |
|
"eval_runtime": 1.5336, |
|
"eval_samples_per_second": 78.901, |
|
"eval_steps_per_second": 10.433, |
|
"step": 18352 |
|
}, |
|
{ |
|
"epoch": 297.0, |
|
"eval_loss": 11.014945030212402, |
|
"eval_runtime": 1.5335, |
|
"eval_samples_per_second": 78.906, |
|
"eval_steps_per_second": 10.434, |
|
"step": 18414 |
|
}, |
|
{ |
|
"epoch": 298.0, |
|
"eval_loss": 11.168006896972656, |
|
"eval_runtime": 1.5218, |
|
"eval_samples_per_second": 79.51, |
|
"eval_steps_per_second": 10.514, |
|
"step": 18476 |
|
}, |
|
{ |
|
"epoch": 298.39, |
|
"learning_rate": 2.0161290322580644e-08, |
|
"loss": 10.1331, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 299.0, |
|
"eval_loss": 11.100564956665039, |
|
"eval_runtime": 1.5329, |
|
"eval_samples_per_second": 78.935, |
|
"eval_steps_per_second": 10.438, |
|
"step": 18538 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_loss": 10.944679260253906, |
|
"eval_runtime": 1.5398, |
|
"eval_samples_per_second": 78.581, |
|
"eval_steps_per_second": 10.391, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 301.0, |
|
"eval_loss": 10.988174438476562, |
|
"eval_runtime": 1.54, |
|
"eval_samples_per_second": 78.569, |
|
"eval_steps_per_second": 10.389, |
|
"step": 18662 |
|
}, |
|
{ |
|
"epoch": 302.0, |
|
"eval_loss": 11.02690315246582, |
|
"eval_runtime": 1.5425, |
|
"eval_samples_per_second": 78.446, |
|
"eval_steps_per_second": 10.373, |
|
"step": 18724 |
|
}, |
|
{ |
|
"epoch": 303.0, |
|
"eval_loss": 10.976366996765137, |
|
"eval_runtime": 1.5362, |
|
"eval_samples_per_second": 78.765, |
|
"eval_steps_per_second": 10.415, |
|
"step": 18786 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_loss": 11.088923454284668, |
|
"eval_runtime": 1.5246, |
|
"eval_samples_per_second": 79.365, |
|
"eval_steps_per_second": 10.495, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 305.0, |
|
"eval_loss": 11.024333953857422, |
|
"eval_runtime": 1.5195, |
|
"eval_samples_per_second": 79.629, |
|
"eval_steps_per_second": 10.529, |
|
"step": 18910 |
|
}, |
|
{ |
|
"epoch": 306.0, |
|
"eval_loss": 11.025029182434082, |
|
"eval_runtime": 1.524, |
|
"eval_samples_per_second": 79.394, |
|
"eval_steps_per_second": 10.498, |
|
"step": 18972 |
|
}, |
|
{ |
|
"epoch": 306.45, |
|
"learning_rate": 1.9354838709677418e-08, |
|
"loss": 10.1244, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 307.0, |
|
"eval_loss": 11.113330841064453, |
|
"eval_runtime": 1.54, |
|
"eval_samples_per_second": 78.569, |
|
"eval_steps_per_second": 10.389, |
|
"step": 19034 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_loss": 11.013413429260254, |
|
"eval_runtime": 1.5273, |
|
"eval_samples_per_second": 79.226, |
|
"eval_steps_per_second": 10.476, |
|
"step": 19096 |
|
}, |
|
{ |
|
"epoch": 309.0, |
|
"eval_loss": 10.995118141174316, |
|
"eval_runtime": 1.5332, |
|
"eval_samples_per_second": 78.921, |
|
"eval_steps_per_second": 10.436, |
|
"step": 19158 |
|
}, |
|
{ |
|
"epoch": 310.0, |
|
"eval_loss": 11.261984825134277, |
|
"eval_runtime": 1.5238, |
|
"eval_samples_per_second": 79.407, |
|
"eval_steps_per_second": 10.5, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 311.0, |
|
"eval_loss": 11.110681533813477, |
|
"eval_runtime": 1.5588, |
|
"eval_samples_per_second": 77.623, |
|
"eval_steps_per_second": 10.264, |
|
"step": 19282 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_loss": 11.05971622467041, |
|
"eval_runtime": 1.5228, |
|
"eval_samples_per_second": 79.459, |
|
"eval_steps_per_second": 10.507, |
|
"step": 19344 |
|
}, |
|
{ |
|
"epoch": 313.0, |
|
"eval_loss": 10.960829734802246, |
|
"eval_runtime": 1.5312, |
|
"eval_samples_per_second": 79.024, |
|
"eval_steps_per_second": 10.449, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 314.0, |
|
"eval_loss": 10.997540473937988, |
|
"eval_runtime": 1.5275, |
|
"eval_samples_per_second": 79.216, |
|
"eval_steps_per_second": 10.475, |
|
"step": 19468 |
|
}, |
|
{ |
|
"epoch": 314.52, |
|
"learning_rate": 1.8548387096774192e-08, |
|
"loss": 10.1251, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 315.0, |
|
"eval_loss": 11.017022132873535, |
|
"eval_runtime": 1.5516, |
|
"eval_samples_per_second": 77.982, |
|
"eval_steps_per_second": 10.312, |
|
"step": 19530 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_loss": 11.152543067932129, |
|
"eval_runtime": 1.5321, |
|
"eval_samples_per_second": 78.977, |
|
"eval_steps_per_second": 10.443, |
|
"step": 19592 |
|
}, |
|
{ |
|
"epoch": 317.0, |
|
"eval_loss": 11.0108003616333, |
|
"eval_runtime": 1.521, |
|
"eval_samples_per_second": 79.554, |
|
"eval_steps_per_second": 10.52, |
|
"step": 19654 |
|
}, |
|
{ |
|
"epoch": 318.0, |
|
"eval_loss": 11.024676322937012, |
|
"eval_runtime": 1.5196, |
|
"eval_samples_per_second": 79.626, |
|
"eval_steps_per_second": 10.529, |
|
"step": 19716 |
|
}, |
|
{ |
|
"epoch": 319.0, |
|
"eval_loss": 11.113091468811035, |
|
"eval_runtime": 1.5205, |
|
"eval_samples_per_second": 79.581, |
|
"eval_steps_per_second": 10.523, |
|
"step": 19778 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_loss": 11.04623794555664, |
|
"eval_runtime": 1.5226, |
|
"eval_samples_per_second": 79.468, |
|
"eval_steps_per_second": 10.508, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 321.0, |
|
"eval_loss": 11.06059741973877, |
|
"eval_runtime": 1.5198, |
|
"eval_samples_per_second": 79.613, |
|
"eval_steps_per_second": 10.527, |
|
"step": 19902 |
|
}, |
|
{ |
|
"epoch": 322.0, |
|
"eval_loss": 11.004892349243164, |
|
"eval_runtime": 1.5295, |
|
"eval_samples_per_second": 79.109, |
|
"eval_steps_per_second": 10.461, |
|
"step": 19964 |
|
}, |
|
{ |
|
"epoch": 322.58, |
|
"learning_rate": 1.774193548387097e-08, |
|
"loss": 10.1178, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 323.0, |
|
"eval_loss": 11.110086441040039, |
|
"eval_runtime": 1.5366, |
|
"eval_samples_per_second": 78.746, |
|
"eval_steps_per_second": 10.413, |
|
"step": 20026 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_loss": 11.034819602966309, |
|
"eval_runtime": 1.5318, |
|
"eval_samples_per_second": 78.992, |
|
"eval_steps_per_second": 10.445, |
|
"step": 20088 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"eval_loss": 11.286378860473633, |
|
"eval_runtime": 1.5319, |
|
"eval_samples_per_second": 78.988, |
|
"eval_steps_per_second": 10.445, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 326.0, |
|
"eval_loss": 10.950843811035156, |
|
"eval_runtime": 1.5282, |
|
"eval_samples_per_second": 79.177, |
|
"eval_steps_per_second": 10.47, |
|
"step": 20212 |
|
}, |
|
{ |
|
"epoch": 327.0, |
|
"eval_loss": 10.995577812194824, |
|
"eval_runtime": 1.5301, |
|
"eval_samples_per_second": 79.078, |
|
"eval_steps_per_second": 10.457, |
|
"step": 20274 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_loss": 10.956055641174316, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.62, |
|
"eval_steps_per_second": 10.528, |
|
"step": 20336 |
|
}, |
|
{ |
|
"epoch": 329.0, |
|
"eval_loss": 10.953376770019531, |
|
"eval_runtime": 1.522, |
|
"eval_samples_per_second": 79.498, |
|
"eval_steps_per_second": 10.512, |
|
"step": 20398 |
|
}, |
|
{ |
|
"epoch": 330.0, |
|
"eval_loss": 11.365254402160645, |
|
"eval_runtime": 1.5263, |
|
"eval_samples_per_second": 79.275, |
|
"eval_steps_per_second": 10.483, |
|
"step": 20460 |
|
}, |
|
{ |
|
"epoch": 330.65, |
|
"learning_rate": 1.693548387096774e-08, |
|
"loss": 10.1215, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 331.0, |
|
"eval_loss": 11.118794441223145, |
|
"eval_runtime": 1.5308, |
|
"eval_samples_per_second": 79.044, |
|
"eval_steps_per_second": 10.452, |
|
"step": 20522 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_loss": 11.090803146362305, |
|
"eval_runtime": 1.5271, |
|
"eval_samples_per_second": 79.237, |
|
"eval_steps_per_second": 10.478, |
|
"step": 20584 |
|
}, |
|
{ |
|
"epoch": 333.0, |
|
"eval_loss": 10.98558235168457, |
|
"eval_runtime": 1.5289, |
|
"eval_samples_per_second": 79.144, |
|
"eval_steps_per_second": 10.465, |
|
"step": 20646 |
|
}, |
|
{ |
|
"epoch": 334.0, |
|
"eval_loss": 10.987250328063965, |
|
"eval_runtime": 1.5588, |
|
"eval_samples_per_second": 77.622, |
|
"eval_steps_per_second": 10.264, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 335.0, |
|
"eval_loss": 11.048944473266602, |
|
"eval_runtime": 1.5348, |
|
"eval_samples_per_second": 78.84, |
|
"eval_steps_per_second": 10.425, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_loss": 11.112990379333496, |
|
"eval_runtime": 1.5248, |
|
"eval_samples_per_second": 79.355, |
|
"eval_steps_per_second": 10.493, |
|
"step": 20832 |
|
}, |
|
{ |
|
"epoch": 337.0, |
|
"eval_loss": 10.877412796020508, |
|
"eval_runtime": 1.5203, |
|
"eval_samples_per_second": 79.589, |
|
"eval_steps_per_second": 10.524, |
|
"step": 20894 |
|
}, |
|
{ |
|
"epoch": 338.0, |
|
"eval_loss": 10.787759780883789, |
|
"eval_runtime": 1.5247, |
|
"eval_samples_per_second": 79.361, |
|
"eval_steps_per_second": 10.494, |
|
"step": 20956 |
|
}, |
|
{ |
|
"epoch": 338.71, |
|
"learning_rate": 1.6129032258064514e-08, |
|
"loss": 10.1267, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 339.0, |
|
"eval_loss": 11.173955917358398, |
|
"eval_runtime": 1.535, |
|
"eval_samples_per_second": 78.827, |
|
"eval_steps_per_second": 10.423, |
|
"step": 21018 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_loss": 11.00390911102295, |
|
"eval_runtime": 1.5311, |
|
"eval_samples_per_second": 79.03, |
|
"eval_steps_per_second": 10.45, |
|
"step": 21080 |
|
}, |
|
{ |
|
"epoch": 341.0, |
|
"eval_loss": 11.032459259033203, |
|
"eval_runtime": 1.5355, |
|
"eval_samples_per_second": 78.804, |
|
"eval_steps_per_second": 10.42, |
|
"step": 21142 |
|
}, |
|
{ |
|
"epoch": 342.0, |
|
"eval_loss": 11.04984188079834, |
|
"eval_runtime": 1.5396, |
|
"eval_samples_per_second": 78.591, |
|
"eval_steps_per_second": 10.392, |
|
"step": 21204 |
|
}, |
|
{ |
|
"epoch": 343.0, |
|
"eval_loss": 11.023720741271973, |
|
"eval_runtime": 1.5305, |
|
"eval_samples_per_second": 79.057, |
|
"eval_steps_per_second": 10.454, |
|
"step": 21266 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_loss": 11.093502044677734, |
|
"eval_runtime": 1.5353, |
|
"eval_samples_per_second": 78.813, |
|
"eval_steps_per_second": 10.422, |
|
"step": 21328 |
|
}, |
|
{ |
|
"epoch": 345.0, |
|
"eval_loss": 10.816137313842773, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.62, |
|
"eval_steps_per_second": 10.528, |
|
"step": 21390 |
|
}, |
|
{ |
|
"epoch": 346.0, |
|
"eval_loss": 11.070144653320312, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.623, |
|
"eval_steps_per_second": 10.529, |
|
"step": 21452 |
|
}, |
|
{ |
|
"epoch": 346.77, |
|
"learning_rate": 1.532258064516129e-08, |
|
"loss": 10.1105, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 347.0, |
|
"eval_loss": 10.87633991241455, |
|
"eval_runtime": 1.5347, |
|
"eval_samples_per_second": 78.841, |
|
"eval_steps_per_second": 10.425, |
|
"step": 21514 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_loss": 11.06225299835205, |
|
"eval_runtime": 1.5319, |
|
"eval_samples_per_second": 78.984, |
|
"eval_steps_per_second": 10.444, |
|
"step": 21576 |
|
}, |
|
{ |
|
"epoch": 349.0, |
|
"eval_loss": 10.950231552124023, |
|
"eval_runtime": 1.5483, |
|
"eval_samples_per_second": 78.15, |
|
"eval_steps_per_second": 10.334, |
|
"step": 21638 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"eval_loss": 11.076042175292969, |
|
"eval_runtime": 1.5401, |
|
"eval_samples_per_second": 78.567, |
|
"eval_steps_per_second": 10.389, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 351.0, |
|
"eval_loss": 11.147188186645508, |
|
"eval_runtime": 1.5351, |
|
"eval_samples_per_second": 78.823, |
|
"eval_steps_per_second": 10.423, |
|
"step": 21762 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_loss": 11.15673828125, |
|
"eval_runtime": 1.5292, |
|
"eval_samples_per_second": 79.126, |
|
"eval_steps_per_second": 10.463, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 353.0, |
|
"eval_loss": 11.01186466217041, |
|
"eval_runtime": 1.5267, |
|
"eval_samples_per_second": 79.255, |
|
"eval_steps_per_second": 10.48, |
|
"step": 21886 |
|
}, |
|
{ |
|
"epoch": 354.0, |
|
"eval_loss": 11.07396125793457, |
|
"eval_runtime": 1.5223, |
|
"eval_samples_per_second": 79.486, |
|
"eval_steps_per_second": 10.511, |
|
"step": 21948 |
|
}, |
|
{ |
|
"epoch": 354.84, |
|
"learning_rate": 1.4516129032258064e-08, |
|
"loss": 10.1124, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 355.0, |
|
"eval_loss": 11.035076141357422, |
|
"eval_runtime": 1.5453, |
|
"eval_samples_per_second": 78.302, |
|
"eval_steps_per_second": 10.354, |
|
"step": 22010 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_loss": 10.995853424072266, |
|
"eval_runtime": 1.5492, |
|
"eval_samples_per_second": 78.107, |
|
"eval_steps_per_second": 10.328, |
|
"step": 22072 |
|
}, |
|
{ |
|
"epoch": 357.0, |
|
"eval_loss": 10.982865333557129, |
|
"eval_runtime": 1.5378, |
|
"eval_samples_per_second": 78.686, |
|
"eval_steps_per_second": 10.405, |
|
"step": 22134 |
|
}, |
|
{ |
|
"epoch": 358.0, |
|
"eval_loss": 11.162384986877441, |
|
"eval_runtime": 1.5367, |
|
"eval_samples_per_second": 78.739, |
|
"eval_steps_per_second": 10.412, |
|
"step": 22196 |
|
}, |
|
{ |
|
"epoch": 359.0, |
|
"eval_loss": 11.10819149017334, |
|
"eval_runtime": 1.5353, |
|
"eval_samples_per_second": 78.812, |
|
"eval_steps_per_second": 10.421, |
|
"step": 22258 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_loss": 11.049622535705566, |
|
"eval_runtime": 1.5377, |
|
"eval_samples_per_second": 78.689, |
|
"eval_steps_per_second": 10.405, |
|
"step": 22320 |
|
}, |
|
{ |
|
"epoch": 361.0, |
|
"eval_loss": 11.111467361450195, |
|
"eval_runtime": 1.52, |
|
"eval_samples_per_second": 79.604, |
|
"eval_steps_per_second": 10.526, |
|
"step": 22382 |
|
}, |
|
{ |
|
"epoch": 362.0, |
|
"eval_loss": 10.887182235717773, |
|
"eval_runtime": 1.5238, |
|
"eval_samples_per_second": 79.408, |
|
"eval_steps_per_second": 10.5, |
|
"step": 22444 |
|
}, |
|
{ |
|
"epoch": 362.9, |
|
"learning_rate": 1.3709677419354837e-08, |
|
"loss": 10.1057, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 363.0, |
|
"eval_loss": 10.899175643920898, |
|
"eval_runtime": 1.5381, |
|
"eval_samples_per_second": 78.671, |
|
"eval_steps_per_second": 10.403, |
|
"step": 22506 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_loss": 11.026792526245117, |
|
"eval_runtime": 1.5471, |
|
"eval_samples_per_second": 78.21, |
|
"eval_steps_per_second": 10.342, |
|
"step": 22568 |
|
}, |
|
{ |
|
"epoch": 365.0, |
|
"eval_loss": 10.96151065826416, |
|
"eval_runtime": 1.5277, |
|
"eval_samples_per_second": 79.204, |
|
"eval_steps_per_second": 10.473, |
|
"step": 22630 |
|
}, |
|
{ |
|
"epoch": 366.0, |
|
"eval_loss": 10.967521667480469, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.165, |
|
"eval_steps_per_second": 10.468, |
|
"step": 22692 |
|
}, |
|
{ |
|
"epoch": 367.0, |
|
"eval_loss": 11.17754077911377, |
|
"eval_runtime": 1.5345, |
|
"eval_samples_per_second": 78.853, |
|
"eval_steps_per_second": 10.427, |
|
"step": 22754 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_loss": 10.994891166687012, |
|
"eval_runtime": 1.5285, |
|
"eval_samples_per_second": 79.161, |
|
"eval_steps_per_second": 10.468, |
|
"step": 22816 |
|
}, |
|
{ |
|
"epoch": 369.0, |
|
"eval_loss": 11.081649780273438, |
|
"eval_runtime": 1.5214, |
|
"eval_samples_per_second": 79.533, |
|
"eval_steps_per_second": 10.517, |
|
"step": 22878 |
|
}, |
|
{ |
|
"epoch": 370.0, |
|
"eval_loss": 11.051599502563477, |
|
"eval_runtime": 1.5213, |
|
"eval_samples_per_second": 79.538, |
|
"eval_steps_per_second": 10.517, |
|
"step": 22940 |
|
}, |
|
{ |
|
"epoch": 370.97, |
|
"learning_rate": 1.2903225806451612e-08, |
|
"loss": 10.1217, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 371.0, |
|
"eval_loss": 11.067154884338379, |
|
"eval_runtime": 1.5291, |
|
"eval_samples_per_second": 79.131, |
|
"eval_steps_per_second": 10.464, |
|
"step": 23002 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_loss": 10.966989517211914, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 79.228, |
|
"eval_steps_per_second": 10.476, |
|
"step": 23064 |
|
}, |
|
{ |
|
"epoch": 373.0, |
|
"eval_loss": 11.200106620788574, |
|
"eval_runtime": 1.5333, |
|
"eval_samples_per_second": 78.916, |
|
"eval_steps_per_second": 10.435, |
|
"step": 23126 |
|
}, |
|
{ |
|
"epoch": 374.0, |
|
"eval_loss": 11.017847061157227, |
|
"eval_runtime": 1.534, |
|
"eval_samples_per_second": 78.88, |
|
"eval_steps_per_second": 10.43, |
|
"step": 23188 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"eval_loss": 10.895892143249512, |
|
"eval_runtime": 1.5401, |
|
"eval_samples_per_second": 78.565, |
|
"eval_steps_per_second": 10.389, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_loss": 11.167061805725098, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.188, |
|
"eval_steps_per_second": 10.471, |
|
"step": 23312 |
|
}, |
|
{ |
|
"epoch": 377.0, |
|
"eval_loss": 11.18134593963623, |
|
"eval_runtime": 1.5229, |
|
"eval_samples_per_second": 79.454, |
|
"eval_steps_per_second": 10.506, |
|
"step": 23374 |
|
}, |
|
{ |
|
"epoch": 378.0, |
|
"eval_loss": 11.156011581420898, |
|
"eval_runtime": 1.5299, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 10.458, |
|
"step": 23436 |
|
}, |
|
{ |
|
"epoch": 379.0, |
|
"eval_loss": 11.084783554077148, |
|
"eval_runtime": 1.5211, |
|
"eval_samples_per_second": 79.545, |
|
"eval_steps_per_second": 10.518, |
|
"step": 23498 |
|
}, |
|
{ |
|
"epoch": 379.03, |
|
"learning_rate": 1.2096774193548386e-08, |
|
"loss": 10.1098, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_loss": 11.04666805267334, |
|
"eval_runtime": 1.5221, |
|
"eval_samples_per_second": 79.493, |
|
"eval_steps_per_second": 10.512, |
|
"step": 23560 |
|
}, |
|
{ |
|
"epoch": 381.0, |
|
"eval_loss": 11.148889541625977, |
|
"eval_runtime": 1.548, |
|
"eval_samples_per_second": 78.166, |
|
"eval_steps_per_second": 10.336, |
|
"step": 23622 |
|
}, |
|
{ |
|
"epoch": 382.0, |
|
"eval_loss": 11.006622314453125, |
|
"eval_runtime": 1.5259, |
|
"eval_samples_per_second": 79.298, |
|
"eval_steps_per_second": 10.486, |
|
"step": 23684 |
|
}, |
|
{ |
|
"epoch": 383.0, |
|
"eval_loss": 10.977095603942871, |
|
"eval_runtime": 1.5282, |
|
"eval_samples_per_second": 79.178, |
|
"eval_steps_per_second": 10.47, |
|
"step": 23746 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_loss": 10.895329475402832, |
|
"eval_runtime": 1.5304, |
|
"eval_samples_per_second": 79.066, |
|
"eval_steps_per_second": 10.455, |
|
"step": 23808 |
|
}, |
|
{ |
|
"epoch": 385.0, |
|
"eval_loss": 10.903176307678223, |
|
"eval_runtime": 1.5397, |
|
"eval_samples_per_second": 78.586, |
|
"eval_steps_per_second": 10.392, |
|
"step": 23870 |
|
}, |
|
{ |
|
"epoch": 386.0, |
|
"eval_loss": 10.892215728759766, |
|
"eval_runtime": 1.5258, |
|
"eval_samples_per_second": 79.302, |
|
"eval_steps_per_second": 10.486, |
|
"step": 23932 |
|
}, |
|
{ |
|
"epoch": 387.0, |
|
"eval_loss": 10.993136405944824, |
|
"eval_runtime": 1.5252, |
|
"eval_samples_per_second": 79.332, |
|
"eval_steps_per_second": 10.49, |
|
"step": 23994 |
|
}, |
|
{ |
|
"epoch": 387.1, |
|
"learning_rate": 1.129032258064516e-08, |
|
"loss": 10.1262, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_loss": 11.127774238586426, |
|
"eval_runtime": 1.561, |
|
"eval_samples_per_second": 77.516, |
|
"eval_steps_per_second": 10.25, |
|
"step": 24056 |
|
}, |
|
{ |
|
"epoch": 389.0, |
|
"eval_loss": 11.101140022277832, |
|
"eval_runtime": 1.534, |
|
"eval_samples_per_second": 78.877, |
|
"eval_steps_per_second": 10.43, |
|
"step": 24118 |
|
}, |
|
{ |
|
"epoch": 390.0, |
|
"eval_loss": 11.042156219482422, |
|
"eval_runtime": 1.5371, |
|
"eval_samples_per_second": 78.721, |
|
"eval_steps_per_second": 10.409, |
|
"step": 24180 |
|
}, |
|
{ |
|
"epoch": 391.0, |
|
"eval_loss": 11.063082695007324, |
|
"eval_runtime": 1.5258, |
|
"eval_samples_per_second": 79.304, |
|
"eval_steps_per_second": 10.487, |
|
"step": 24242 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_loss": 10.970271110534668, |
|
"eval_runtime": 1.5279, |
|
"eval_samples_per_second": 79.192, |
|
"eval_steps_per_second": 10.472, |
|
"step": 24304 |
|
}, |
|
{ |
|
"epoch": 393.0, |
|
"eval_loss": 10.953568458557129, |
|
"eval_runtime": 1.5296, |
|
"eval_samples_per_second": 79.105, |
|
"eval_steps_per_second": 10.46, |
|
"step": 24366 |
|
}, |
|
{ |
|
"epoch": 394.0, |
|
"eval_loss": 11.076667785644531, |
|
"eval_runtime": 1.5221, |
|
"eval_samples_per_second": 79.496, |
|
"eval_steps_per_second": 10.512, |
|
"step": 24428 |
|
}, |
|
{ |
|
"epoch": 395.0, |
|
"eval_loss": 10.922811508178711, |
|
"eval_runtime": 1.5248, |
|
"eval_samples_per_second": 79.353, |
|
"eval_steps_per_second": 10.493, |
|
"step": 24490 |
|
}, |
|
{ |
|
"epoch": 395.16, |
|
"learning_rate": 1.0483870967741935e-08, |
|
"loss": 10.1038, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_loss": 11.142001152038574, |
|
"eval_runtime": 1.5327, |
|
"eval_samples_per_second": 78.944, |
|
"eval_steps_per_second": 10.439, |
|
"step": 24552 |
|
}, |
|
{ |
|
"epoch": 397.0, |
|
"eval_loss": 11.145098686218262, |
|
"eval_runtime": 1.5305, |
|
"eval_samples_per_second": 79.06, |
|
"eval_steps_per_second": 10.454, |
|
"step": 24614 |
|
}, |
|
{ |
|
"epoch": 398.0, |
|
"eval_loss": 11.166299819946289, |
|
"eval_runtime": 1.5557, |
|
"eval_samples_per_second": 77.777, |
|
"eval_steps_per_second": 10.285, |
|
"step": 24676 |
|
}, |
|
{ |
|
"epoch": 399.0, |
|
"eval_loss": 11.075632095336914, |
|
"eval_runtime": 1.5363, |
|
"eval_samples_per_second": 78.761, |
|
"eval_steps_per_second": 10.415, |
|
"step": 24738 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_loss": 11.114908218383789, |
|
"eval_runtime": 1.5291, |
|
"eval_samples_per_second": 79.134, |
|
"eval_steps_per_second": 10.464, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 401.0, |
|
"eval_loss": 11.065619468688965, |
|
"eval_runtime": 1.5197, |
|
"eval_samples_per_second": 79.621, |
|
"eval_steps_per_second": 10.528, |
|
"step": 24862 |
|
}, |
|
{ |
|
"epoch": 402.0, |
|
"eval_loss": 11.075223922729492, |
|
"eval_runtime": 1.5218, |
|
"eval_samples_per_second": 79.51, |
|
"eval_steps_per_second": 10.514, |
|
"step": 24924 |
|
}, |
|
{ |
|
"epoch": 403.0, |
|
"eval_loss": 11.141654968261719, |
|
"eval_runtime": 1.5194, |
|
"eval_samples_per_second": 79.637, |
|
"eval_steps_per_second": 10.53, |
|
"step": 24986 |
|
}, |
|
{ |
|
"epoch": 403.23, |
|
"learning_rate": 9.677419354838709e-09, |
|
"loss": 10.1159, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_loss": 10.96292495727539, |
|
"eval_runtime": 1.5308, |
|
"eval_samples_per_second": 79.043, |
|
"eval_steps_per_second": 10.452, |
|
"step": 25048 |
|
}, |
|
{ |
|
"epoch": 405.0, |
|
"eval_loss": 10.93803596496582, |
|
"eval_runtime": 1.541, |
|
"eval_samples_per_second": 78.519, |
|
"eval_steps_per_second": 10.383, |
|
"step": 25110 |
|
}, |
|
{ |
|
"epoch": 406.0, |
|
"eval_loss": 11.00986385345459, |
|
"eval_runtime": 1.5347, |
|
"eval_samples_per_second": 78.841, |
|
"eval_steps_per_second": 10.425, |
|
"step": 25172 |
|
}, |
|
{ |
|
"epoch": 407.0, |
|
"eval_loss": 11.137813568115234, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.17, |
|
"eval_steps_per_second": 10.469, |
|
"step": 25234 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_loss": 10.851067543029785, |
|
"eval_runtime": 1.5273, |
|
"eval_samples_per_second": 79.223, |
|
"eval_steps_per_second": 10.476, |
|
"step": 25296 |
|
}, |
|
{ |
|
"epoch": 409.0, |
|
"eval_loss": 11.01217269897461, |
|
"eval_runtime": 1.5232, |
|
"eval_samples_per_second": 79.439, |
|
"eval_steps_per_second": 10.504, |
|
"step": 25358 |
|
}, |
|
{ |
|
"epoch": 410.0, |
|
"eval_loss": 11.01009750366211, |
|
"eval_runtime": 1.5196, |
|
"eval_samples_per_second": 79.627, |
|
"eval_steps_per_second": 10.529, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 411.0, |
|
"eval_loss": 10.873472213745117, |
|
"eval_runtime": 1.5201, |
|
"eval_samples_per_second": 79.6, |
|
"eval_steps_per_second": 10.526, |
|
"step": 25482 |
|
}, |
|
{ |
|
"epoch": 411.29, |
|
"learning_rate": 8.870967741935485e-09, |
|
"loss": 10.0848, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_loss": 11.008174896240234, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 79.232, |
|
"eval_steps_per_second": 10.477, |
|
"step": 25544 |
|
}, |
|
{ |
|
"epoch": 413.0, |
|
"eval_loss": 11.012767791748047, |
|
"eval_runtime": 1.5353, |
|
"eval_samples_per_second": 78.81, |
|
"eval_steps_per_second": 10.421, |
|
"step": 25606 |
|
}, |
|
{ |
|
"epoch": 414.0, |
|
"eval_loss": 11.137340545654297, |
|
"eval_runtime": 1.5293, |
|
"eval_samples_per_second": 79.119, |
|
"eval_steps_per_second": 10.462, |
|
"step": 25668 |
|
}, |
|
{ |
|
"epoch": 415.0, |
|
"eval_loss": 11.137984275817871, |
|
"eval_runtime": 1.5285, |
|
"eval_samples_per_second": 79.163, |
|
"eval_steps_per_second": 10.468, |
|
"step": 25730 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_loss": 10.93875503540039, |
|
"eval_runtime": 1.5284, |
|
"eval_samples_per_second": 79.167, |
|
"eval_steps_per_second": 10.468, |
|
"step": 25792 |
|
}, |
|
{ |
|
"epoch": 417.0, |
|
"eval_loss": 10.93083381652832, |
|
"eval_runtime": 1.524, |
|
"eval_samples_per_second": 79.396, |
|
"eval_steps_per_second": 10.499, |
|
"step": 25854 |
|
}, |
|
{ |
|
"epoch": 418.0, |
|
"eval_loss": 11.082795143127441, |
|
"eval_runtime": 1.5216, |
|
"eval_samples_per_second": 79.523, |
|
"eval_steps_per_second": 10.515, |
|
"step": 25916 |
|
}, |
|
{ |
|
"epoch": 419.0, |
|
"eval_loss": 11.256653785705566, |
|
"eval_runtime": 1.5222, |
|
"eval_samples_per_second": 79.488, |
|
"eval_steps_per_second": 10.511, |
|
"step": 25978 |
|
}, |
|
{ |
|
"epoch": 419.35, |
|
"learning_rate": 8.064516129032257e-09, |
|
"loss": 10.0923, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_loss": 11.040205955505371, |
|
"eval_runtime": 1.5329, |
|
"eval_samples_per_second": 78.935, |
|
"eval_steps_per_second": 10.438, |
|
"step": 26040 |
|
}, |
|
{ |
|
"epoch": 421.0, |
|
"eval_loss": 11.028487205505371, |
|
"eval_runtime": 1.541, |
|
"eval_samples_per_second": 78.518, |
|
"eval_steps_per_second": 10.383, |
|
"step": 26102 |
|
}, |
|
{ |
|
"epoch": 422.0, |
|
"eval_loss": 11.002765655517578, |
|
"eval_runtime": 1.5314, |
|
"eval_samples_per_second": 79.012, |
|
"eval_steps_per_second": 10.448, |
|
"step": 26164 |
|
}, |
|
{ |
|
"epoch": 423.0, |
|
"eval_loss": 10.978453636169434, |
|
"eval_runtime": 1.5289, |
|
"eval_samples_per_second": 79.144, |
|
"eval_steps_per_second": 10.465, |
|
"step": 26226 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_loss": 11.046844482421875, |
|
"eval_runtime": 1.5378, |
|
"eval_samples_per_second": 78.685, |
|
"eval_steps_per_second": 10.405, |
|
"step": 26288 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"eval_loss": 11.059452056884766, |
|
"eval_runtime": 1.523, |
|
"eval_samples_per_second": 79.446, |
|
"eval_steps_per_second": 10.505, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 426.0, |
|
"eval_loss": 11.098678588867188, |
|
"eval_runtime": 1.521, |
|
"eval_samples_per_second": 79.551, |
|
"eval_steps_per_second": 10.519, |
|
"step": 26412 |
|
}, |
|
{ |
|
"epoch": 427.0, |
|
"eval_loss": 10.876445770263672, |
|
"eval_runtime": 1.5209, |
|
"eval_samples_per_second": 79.556, |
|
"eval_steps_per_second": 10.52, |
|
"step": 26474 |
|
}, |
|
{ |
|
"epoch": 427.42, |
|
"learning_rate": 7.258064516129032e-09, |
|
"loss": 10.1, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_loss": 10.964273452758789, |
|
"eval_runtime": 1.5385, |
|
"eval_samples_per_second": 78.65, |
|
"eval_steps_per_second": 10.4, |
|
"step": 26536 |
|
}, |
|
{ |
|
"epoch": 429.0, |
|
"eval_loss": 10.986409187316895, |
|
"eval_runtime": 1.5468, |
|
"eval_samples_per_second": 78.225, |
|
"eval_steps_per_second": 10.344, |
|
"step": 26598 |
|
}, |
|
{ |
|
"epoch": 430.0, |
|
"eval_loss": 11.001900672912598, |
|
"eval_runtime": 1.529, |
|
"eval_samples_per_second": 79.135, |
|
"eval_steps_per_second": 10.464, |
|
"step": 26660 |
|
}, |
|
{ |
|
"epoch": 431.0, |
|
"eval_loss": 11.103708267211914, |
|
"eval_runtime": 1.5277, |
|
"eval_samples_per_second": 79.205, |
|
"eval_steps_per_second": 10.473, |
|
"step": 26722 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_loss": 10.956040382385254, |
|
"eval_runtime": 1.5283, |
|
"eval_samples_per_second": 79.174, |
|
"eval_steps_per_second": 10.469, |
|
"step": 26784 |
|
}, |
|
{ |
|
"epoch": 433.0, |
|
"eval_loss": 10.98078727722168, |
|
"eval_runtime": 1.545, |
|
"eval_samples_per_second": 78.316, |
|
"eval_steps_per_second": 10.356, |
|
"step": 26846 |
|
}, |
|
{ |
|
"epoch": 434.0, |
|
"eval_loss": 11.044650077819824, |
|
"eval_runtime": 1.5194, |
|
"eval_samples_per_second": 79.639, |
|
"eval_steps_per_second": 10.531, |
|
"step": 26908 |
|
}, |
|
{ |
|
"epoch": 435.0, |
|
"eval_loss": 10.943286895751953, |
|
"eval_runtime": 1.5208, |
|
"eval_samples_per_second": 79.563, |
|
"eval_steps_per_second": 10.521, |
|
"step": 26970 |
|
}, |
|
{ |
|
"epoch": 435.48, |
|
"learning_rate": 6.451612903225806e-09, |
|
"loss": 10.0944, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_loss": 11.040830612182617, |
|
"eval_runtime": 1.5264, |
|
"eval_samples_per_second": 79.272, |
|
"eval_steps_per_second": 10.482, |
|
"step": 27032 |
|
}, |
|
{ |
|
"epoch": 437.0, |
|
"eval_loss": 10.829809188842773, |
|
"eval_runtime": 1.531, |
|
"eval_samples_per_second": 79.032, |
|
"eval_steps_per_second": 10.45, |
|
"step": 27094 |
|
}, |
|
{ |
|
"epoch": 438.0, |
|
"eval_loss": 11.169089317321777, |
|
"eval_runtime": 1.5289, |
|
"eval_samples_per_second": 79.142, |
|
"eval_steps_per_second": 10.465, |
|
"step": 27156 |
|
}, |
|
{ |
|
"epoch": 439.0, |
|
"eval_loss": 11.029060363769531, |
|
"eval_runtime": 1.5418, |
|
"eval_samples_per_second": 78.481, |
|
"eval_steps_per_second": 10.378, |
|
"step": 27218 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"eval_loss": 10.99075984954834, |
|
"eval_runtime": 1.5372, |
|
"eval_samples_per_second": 78.712, |
|
"eval_steps_per_second": 10.408, |
|
"step": 27280 |
|
}, |
|
{ |
|
"epoch": 441.0, |
|
"eval_loss": 10.889039039611816, |
|
"eval_runtime": 1.5276, |
|
"eval_samples_per_second": 79.211, |
|
"eval_steps_per_second": 10.474, |
|
"step": 27342 |
|
}, |
|
{ |
|
"epoch": 442.0, |
|
"eval_loss": 11.024713516235352, |
|
"eval_runtime": 1.5184, |
|
"eval_samples_per_second": 79.688, |
|
"eval_steps_per_second": 10.537, |
|
"step": 27404 |
|
}, |
|
{ |
|
"epoch": 443.0, |
|
"eval_loss": 11.10232925415039, |
|
"eval_runtime": 1.5225, |
|
"eval_samples_per_second": 79.473, |
|
"eval_steps_per_second": 10.509, |
|
"step": 27466 |
|
}, |
|
{ |
|
"epoch": 443.55, |
|
"learning_rate": 5.64516129032258e-09, |
|
"loss": 10.0965, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"eval_loss": 11.125219345092773, |
|
"eval_runtime": 1.5507, |
|
"eval_samples_per_second": 78.027, |
|
"eval_steps_per_second": 10.318, |
|
"step": 27528 |
|
}, |
|
{ |
|
"epoch": 445.0, |
|
"eval_loss": 11.037354469299316, |
|
"eval_runtime": 1.5307, |
|
"eval_samples_per_second": 79.048, |
|
"eval_steps_per_second": 10.453, |
|
"step": 27590 |
|
}, |
|
{ |
|
"epoch": 446.0, |
|
"eval_loss": 11.032657623291016, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 79.231, |
|
"eval_steps_per_second": 10.477, |
|
"step": 27652 |
|
}, |
|
{ |
|
"epoch": 447.0, |
|
"eval_loss": 10.83928394317627, |
|
"eval_runtime": 1.5299, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 10.458, |
|
"step": 27714 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"eval_loss": 10.952762603759766, |
|
"eval_runtime": 1.5436, |
|
"eval_samples_per_second": 78.39, |
|
"eval_steps_per_second": 10.366, |
|
"step": 27776 |
|
}, |
|
{ |
|
"epoch": 449.0, |
|
"eval_loss": 11.012396812438965, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 79.231, |
|
"eval_steps_per_second": 10.477, |
|
"step": 27838 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"eval_loss": 10.84825611114502, |
|
"eval_runtime": 1.5273, |
|
"eval_samples_per_second": 79.226, |
|
"eval_steps_per_second": 10.476, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 451.0, |
|
"eval_loss": 10.897711753845215, |
|
"eval_runtime": 1.527, |
|
"eval_samples_per_second": 79.241, |
|
"eval_steps_per_second": 10.478, |
|
"step": 27962 |
|
}, |
|
{ |
|
"epoch": 451.61, |
|
"learning_rate": 4.8387096774193544e-09, |
|
"loss": 10.1023, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"eval_loss": 11.033035278320312, |
|
"eval_runtime": 1.5393, |
|
"eval_samples_per_second": 78.607, |
|
"eval_steps_per_second": 10.394, |
|
"step": 28024 |
|
}, |
|
{ |
|
"epoch": 453.0, |
|
"eval_loss": 10.92820930480957, |
|
"eval_runtime": 1.5378, |
|
"eval_samples_per_second": 78.682, |
|
"eval_steps_per_second": 10.404, |
|
"step": 28086 |
|
}, |
|
{ |
|
"epoch": 454.0, |
|
"eval_loss": 11.028227806091309, |
|
"eval_runtime": 1.5335, |
|
"eval_samples_per_second": 78.905, |
|
"eval_steps_per_second": 10.434, |
|
"step": 28148 |
|
}, |
|
{ |
|
"epoch": 455.0, |
|
"eval_loss": 10.94653034210205, |
|
"eval_runtime": 1.5323, |
|
"eval_samples_per_second": 78.968, |
|
"eval_steps_per_second": 10.442, |
|
"step": 28210 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"eval_loss": 10.933752059936523, |
|
"eval_runtime": 1.542, |
|
"eval_samples_per_second": 78.468, |
|
"eval_steps_per_second": 10.376, |
|
"step": 28272 |
|
}, |
|
{ |
|
"epoch": 457.0, |
|
"eval_loss": 10.998141288757324, |
|
"eval_runtime": 1.5243, |
|
"eval_samples_per_second": 79.38, |
|
"eval_steps_per_second": 10.496, |
|
"step": 28334 |
|
}, |
|
{ |
|
"epoch": 458.0, |
|
"eval_loss": 11.004565238952637, |
|
"eval_runtime": 1.5184, |
|
"eval_samples_per_second": 79.69, |
|
"eval_steps_per_second": 10.537, |
|
"step": 28396 |
|
}, |
|
{ |
|
"epoch": 459.0, |
|
"eval_loss": 11.058143615722656, |
|
"eval_runtime": 1.5198, |
|
"eval_samples_per_second": 79.618, |
|
"eval_steps_per_second": 10.528, |
|
"step": 28458 |
|
}, |
|
{ |
|
"epoch": 459.68, |
|
"learning_rate": 4.0322580645161286e-09, |
|
"loss": 10.0928, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"eval_loss": 11.069917678833008, |
|
"eval_runtime": 1.5309, |
|
"eval_samples_per_second": 79.038, |
|
"eval_steps_per_second": 10.451, |
|
"step": 28520 |
|
}, |
|
{ |
|
"epoch": 461.0, |
|
"eval_loss": 11.008392333984375, |
|
"eval_runtime": 1.5319, |
|
"eval_samples_per_second": 78.986, |
|
"eval_steps_per_second": 10.444, |
|
"step": 28582 |
|
}, |
|
{ |
|
"epoch": 462.0, |
|
"eval_loss": 10.968953132629395, |
|
"eval_runtime": 1.5279, |
|
"eval_samples_per_second": 79.192, |
|
"eval_steps_per_second": 10.472, |
|
"step": 28644 |
|
}, |
|
{ |
|
"epoch": 463.0, |
|
"eval_loss": 11.05852222442627, |
|
"eval_runtime": 1.5388, |
|
"eval_samples_per_second": 78.635, |
|
"eval_steps_per_second": 10.398, |
|
"step": 28706 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"eval_loss": 11.158599853515625, |
|
"eval_runtime": 1.5307, |
|
"eval_samples_per_second": 79.048, |
|
"eval_steps_per_second": 10.453, |
|
"step": 28768 |
|
}, |
|
{ |
|
"epoch": 465.0, |
|
"eval_loss": 11.008247375488281, |
|
"eval_runtime": 1.5274, |
|
"eval_samples_per_second": 79.218, |
|
"eval_steps_per_second": 10.475, |
|
"step": 28830 |
|
}, |
|
{ |
|
"epoch": 466.0, |
|
"eval_loss": 11.049484252929688, |
|
"eval_runtime": 1.5195, |
|
"eval_samples_per_second": 79.634, |
|
"eval_steps_per_second": 10.53, |
|
"step": 28892 |
|
}, |
|
{ |
|
"epoch": 467.0, |
|
"eval_loss": 10.853099822998047, |
|
"eval_runtime": 1.5203, |
|
"eval_samples_per_second": 79.589, |
|
"eval_steps_per_second": 10.524, |
|
"step": 28954 |
|
}, |
|
{ |
|
"epoch": 467.74, |
|
"learning_rate": 3.225806451612903e-09, |
|
"loss": 10.0925, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 468.0, |
|
"eval_loss": 10.858702659606934, |
|
"eval_runtime": 1.5294, |
|
"eval_samples_per_second": 79.118, |
|
"eval_steps_per_second": 10.462, |
|
"step": 29016 |
|
}, |
|
{ |
|
"epoch": 469.0, |
|
"eval_loss": 11.159988403320312, |
|
"eval_runtime": 1.5283, |
|
"eval_samples_per_second": 79.173, |
|
"eval_steps_per_second": 10.469, |
|
"step": 29078 |
|
}, |
|
{ |
|
"epoch": 470.0, |
|
"eval_loss": 10.992026329040527, |
|
"eval_runtime": 1.5307, |
|
"eval_samples_per_second": 79.05, |
|
"eval_steps_per_second": 10.453, |
|
"step": 29140 |
|
}, |
|
{ |
|
"epoch": 471.0, |
|
"eval_loss": 11.020038604736328, |
|
"eval_runtime": 1.5472, |
|
"eval_samples_per_second": 78.208, |
|
"eval_steps_per_second": 10.342, |
|
"step": 29202 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"eval_loss": 11.056954383850098, |
|
"eval_runtime": 1.5255, |
|
"eval_samples_per_second": 79.316, |
|
"eval_steps_per_second": 10.488, |
|
"step": 29264 |
|
}, |
|
{ |
|
"epoch": 473.0, |
|
"eval_loss": 10.931620597839355, |
|
"eval_runtime": 1.5261, |
|
"eval_samples_per_second": 79.287, |
|
"eval_steps_per_second": 10.484, |
|
"step": 29326 |
|
}, |
|
{ |
|
"epoch": 474.0, |
|
"eval_loss": 11.059667587280273, |
|
"eval_runtime": 1.5202, |
|
"eval_samples_per_second": 79.597, |
|
"eval_steps_per_second": 10.525, |
|
"step": 29388 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"eval_loss": 10.994057655334473, |
|
"eval_runtime": 1.5275, |
|
"eval_samples_per_second": 79.216, |
|
"eval_steps_per_second": 10.475, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 475.81, |
|
"learning_rate": 2.4193548387096772e-09, |
|
"loss": 10.0956, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 476.0, |
|
"eval_loss": 11.07702350616455, |
|
"eval_runtime": 1.5276, |
|
"eval_samples_per_second": 79.21, |
|
"eval_steps_per_second": 10.474, |
|
"step": 29512 |
|
}, |
|
{ |
|
"epoch": 477.0, |
|
"eval_loss": 11.006840705871582, |
|
"eval_runtime": 1.5298, |
|
"eval_samples_per_second": 79.098, |
|
"eval_steps_per_second": 10.459, |
|
"step": 29574 |
|
}, |
|
{ |
|
"epoch": 478.0, |
|
"eval_loss": 10.907955169677734, |
|
"eval_runtime": 1.5329, |
|
"eval_samples_per_second": 78.936, |
|
"eval_steps_per_second": 10.438, |
|
"step": 29636 |
|
}, |
|
{ |
|
"epoch": 479.0, |
|
"eval_loss": 10.865551948547363, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 79.23, |
|
"eval_steps_per_second": 10.477, |
|
"step": 29698 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"eval_loss": 11.035009384155273, |
|
"eval_runtime": 1.5396, |
|
"eval_samples_per_second": 78.594, |
|
"eval_steps_per_second": 10.393, |
|
"step": 29760 |
|
}, |
|
{ |
|
"epoch": 481.0, |
|
"eval_loss": 11.06725788116455, |
|
"eval_runtime": 1.5306, |
|
"eval_samples_per_second": 79.056, |
|
"eval_steps_per_second": 10.454, |
|
"step": 29822 |
|
}, |
|
{ |
|
"epoch": 482.0, |
|
"eval_loss": 11.034406661987305, |
|
"eval_runtime": 1.5223, |
|
"eval_samples_per_second": 79.487, |
|
"eval_steps_per_second": 10.511, |
|
"step": 29884 |
|
}, |
|
{ |
|
"epoch": 483.0, |
|
"eval_loss": 10.981928825378418, |
|
"eval_runtime": 1.5189, |
|
"eval_samples_per_second": 79.661, |
|
"eval_steps_per_second": 10.534, |
|
"step": 29946 |
|
}, |
|
{ |
|
"epoch": 483.87, |
|
"learning_rate": 1.6129032258064515e-09, |
|
"loss": 10.112, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 484.0, |
|
"eval_loss": 11.066652297973633, |
|
"eval_runtime": 1.5282, |
|
"eval_samples_per_second": 79.179, |
|
"eval_steps_per_second": 10.47, |
|
"step": 30008 |
|
}, |
|
{ |
|
"epoch": 485.0, |
|
"eval_loss": 10.937297821044922, |
|
"eval_runtime": 1.5264, |
|
"eval_samples_per_second": 79.27, |
|
"eval_steps_per_second": 10.482, |
|
"step": 30070 |
|
}, |
|
{ |
|
"epoch": 486.0, |
|
"eval_loss": 10.977715492248535, |
|
"eval_runtime": 1.537, |
|
"eval_samples_per_second": 78.724, |
|
"eval_steps_per_second": 10.41, |
|
"step": 30132 |
|
}, |
|
{ |
|
"epoch": 487.0, |
|
"eval_loss": 10.983244895935059, |
|
"eval_runtime": 1.5322, |
|
"eval_samples_per_second": 78.969, |
|
"eval_steps_per_second": 10.442, |
|
"step": 30194 |
|
}, |
|
{ |
|
"epoch": 488.0, |
|
"eval_loss": 11.142578125, |
|
"eval_runtime": 1.532, |
|
"eval_samples_per_second": 78.984, |
|
"eval_steps_per_second": 10.444, |
|
"step": 30256 |
|
}, |
|
{ |
|
"epoch": 489.0, |
|
"eval_loss": 11.0770845413208, |
|
"eval_runtime": 1.527, |
|
"eval_samples_per_second": 79.241, |
|
"eval_steps_per_second": 10.478, |
|
"step": 30318 |
|
}, |
|
{ |
|
"epoch": 490.0, |
|
"eval_loss": 10.951501846313477, |
|
"eval_runtime": 1.5214, |
|
"eval_samples_per_second": 79.534, |
|
"eval_steps_per_second": 10.517, |
|
"step": 30380 |
|
}, |
|
{ |
|
"epoch": 491.0, |
|
"eval_loss": 10.961722373962402, |
|
"eval_runtime": 1.5203, |
|
"eval_samples_per_second": 79.59, |
|
"eval_steps_per_second": 10.524, |
|
"step": 30442 |
|
}, |
|
{ |
|
"epoch": 491.94, |
|
"learning_rate": 8.064516129032258e-10, |
|
"loss": 10.1007, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 492.0, |
|
"eval_loss": 10.952271461486816, |
|
"eval_runtime": 1.528, |
|
"eval_samples_per_second": 79.187, |
|
"eval_steps_per_second": 10.471, |
|
"step": 30504 |
|
}, |
|
{ |
|
"epoch": 493.0, |
|
"eval_loss": 10.87966537475586, |
|
"eval_runtime": 1.5563, |
|
"eval_samples_per_second": 77.748, |
|
"eval_steps_per_second": 10.281, |
|
"step": 30566 |
|
}, |
|
{ |
|
"epoch": 494.0, |
|
"eval_loss": 10.981633186340332, |
|
"eval_runtime": 1.533, |
|
"eval_samples_per_second": 78.933, |
|
"eval_steps_per_second": 10.437, |
|
"step": 30628 |
|
}, |
|
{ |
|
"epoch": 495.0, |
|
"eval_loss": 10.952598571777344, |
|
"eval_runtime": 1.5283, |
|
"eval_samples_per_second": 79.171, |
|
"eval_steps_per_second": 10.469, |
|
"step": 30690 |
|
}, |
|
{ |
|
"epoch": 496.0, |
|
"eval_loss": 11.00783920288086, |
|
"eval_runtime": 1.5275, |
|
"eval_samples_per_second": 79.212, |
|
"eval_steps_per_second": 10.474, |
|
"step": 30752 |
|
}, |
|
{ |
|
"epoch": 497.0, |
|
"eval_loss": 11.156755447387695, |
|
"eval_runtime": 1.536, |
|
"eval_samples_per_second": 78.778, |
|
"eval_steps_per_second": 10.417, |
|
"step": 30814 |
|
}, |
|
{ |
|
"epoch": 498.0, |
|
"eval_loss": 10.986204147338867, |
|
"eval_runtime": 1.521, |
|
"eval_samples_per_second": 79.554, |
|
"eval_steps_per_second": 10.519, |
|
"step": 30876 |
|
}, |
|
{ |
|
"epoch": 499.0, |
|
"eval_loss": 11.053728103637695, |
|
"eval_runtime": 1.5217, |
|
"eval_samples_per_second": 79.518, |
|
"eval_steps_per_second": 10.515, |
|
"step": 30938 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"learning_rate": 0.0, |
|
"loss": 10.0953, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_loss": 11.041763305664062, |
|
"eval_runtime": 1.5553, |
|
"eval_samples_per_second": 77.797, |
|
"eval_steps_per_second": 10.287, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 31000, |
|
"total_flos": 1.6202616878592e+16, |
|
"train_loss": 11.618560483870969, |
|
"train_runtime": 17098.8381, |
|
"train_samples_per_second": 14.358, |
|
"train_steps_per_second": 1.813 |
|
} |
|
], |
|
"max_steps": 31000, |
|
"num_train_epochs": 500, |
|
"total_flos": 1.6202616878592e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|