|
{ |
|
"best_metric": 0.5172018348623854, |
|
"best_model_checkpoint": "outputs/soft_prompt/deberta-v2-xlarge/sst2/checkpoint-3400", |
|
"epoch": 3.0, |
|
"global_step": 25257, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.831559658050537, |
|
"eval_runtime": 12.4014, |
|
"eval_samples_per_second": 70.315, |
|
"eval_steps_per_second": 8.789, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7252020835876465, |
|
"eval_runtime": 12.5411, |
|
"eval_samples_per_second": 69.532, |
|
"eval_steps_per_second": 8.691, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0294061052381518, |
|
"loss": 2.7746, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 6.318162441253662, |
|
"eval_runtime": 12.2524, |
|
"eval_samples_per_second": 71.17, |
|
"eval_steps_per_second": 8.896, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.9581830501556396, |
|
"eval_runtime": 13.2254, |
|
"eval_samples_per_second": 65.934, |
|
"eval_steps_per_second": 8.242, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0288122104763036, |
|
"loss": 2.8668, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.97942715883255, |
|
"eval_runtime": 12.2204, |
|
"eval_samples_per_second": 71.356, |
|
"eval_steps_per_second": 8.92, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 9.121758460998535, |
|
"eval_runtime": 12.6821, |
|
"eval_samples_per_second": 68.758, |
|
"eval_steps_per_second": 8.595, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.1010525226593018, |
|
"eval_runtime": 13.3142, |
|
"eval_samples_per_second": 65.494, |
|
"eval_steps_per_second": 8.187, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0282183157144554, |
|
"loss": 3.0595, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.240478754043579, |
|
"eval_runtime": 11.7539, |
|
"eval_samples_per_second": 74.188, |
|
"eval_steps_per_second": 9.274, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.822434663772583, |
|
"eval_runtime": 11.937, |
|
"eval_samples_per_second": 73.05, |
|
"eval_steps_per_second": 9.131, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.027624420952607195, |
|
"loss": 2.7406, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.2581450939178467, |
|
"eval_runtime": 12.944, |
|
"eval_samples_per_second": 67.367, |
|
"eval_steps_per_second": 8.421, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 5.0679168701171875, |
|
"eval_runtime": 12.8993, |
|
"eval_samples_per_second": 67.6, |
|
"eval_steps_per_second": 8.45, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 4.079117774963379, |
|
"eval_runtime": 12.6438, |
|
"eval_samples_per_second": 68.966, |
|
"eval_steps_per_second": 8.621, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.027030526190758998, |
|
"loss": 2.341, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 6.498974800109863, |
|
"eval_runtime": 11.7875, |
|
"eval_samples_per_second": 73.977, |
|
"eval_steps_per_second": 9.247, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.2673330307006836, |
|
"eval_runtime": 12.4045, |
|
"eval_samples_per_second": 70.297, |
|
"eval_steps_per_second": 8.787, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.026436631428910798, |
|
"loss": 2.5017, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.775005340576172, |
|
"eval_runtime": 12.8935, |
|
"eval_samples_per_second": 67.631, |
|
"eval_steps_per_second": 8.454, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5126146788990825, |
|
"eval_loss": 0.8905919790267944, |
|
"eval_runtime": 11.7168, |
|
"eval_samples_per_second": 74.423, |
|
"eval_steps_per_second": 9.303, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5172018348623854, |
|
"eval_loss": 3.1630539894104004, |
|
"eval_runtime": 12.3677, |
|
"eval_samples_per_second": 70.506, |
|
"eval_steps_per_second": 8.813, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.025842736667062594, |
|
"loss": 2.587, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.8831442594528198, |
|
"eval_runtime": 12.7491, |
|
"eval_samples_per_second": 68.397, |
|
"eval_steps_per_second": 8.55, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.40650475025177, |
|
"eval_runtime": 11.8781, |
|
"eval_samples_per_second": 73.412, |
|
"eval_steps_per_second": 9.177, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.025248841905214398, |
|
"loss": 2.2272, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 5.7484235763549805, |
|
"eval_runtime": 12.2084, |
|
"eval_samples_per_second": 71.426, |
|
"eval_steps_per_second": 8.928, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 3.051417589187622, |
|
"eval_runtime": 13.0485, |
|
"eval_samples_per_second": 66.828, |
|
"eval_steps_per_second": 8.353, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1735517978668213, |
|
"eval_runtime": 12.52, |
|
"eval_samples_per_second": 69.648, |
|
"eval_steps_per_second": 8.706, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.024654947143366194, |
|
"loss": 2.474, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 3.5813305377960205, |
|
"eval_runtime": 12.4532, |
|
"eval_samples_per_second": 70.022, |
|
"eval_steps_per_second": 8.753, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 3.0905096530914307, |
|
"eval_runtime": 12.1764, |
|
"eval_samples_per_second": 71.614, |
|
"eval_steps_per_second": 8.952, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.024061052381517994, |
|
"loss": 2.6958, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.6613879203796387, |
|
"eval_runtime": 13.0709, |
|
"eval_samples_per_second": 66.713, |
|
"eval_steps_per_second": 8.339, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.9912046194076538, |
|
"eval_runtime": 11.0684, |
|
"eval_samples_per_second": 78.783, |
|
"eval_steps_per_second": 9.848, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.708740711212158, |
|
"eval_runtime": 12.3098, |
|
"eval_samples_per_second": 70.838, |
|
"eval_steps_per_second": 8.855, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.023467157619669794, |
|
"loss": 2.4571, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.9453651309013367, |
|
"eval_runtime": 12.5807, |
|
"eval_samples_per_second": 69.313, |
|
"eval_steps_per_second": 8.664, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.9460629820823669, |
|
"eval_runtime": 12.3033, |
|
"eval_samples_per_second": 70.875, |
|
"eval_steps_per_second": 8.859, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.022873262857821593, |
|
"loss": 2.4046, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5034403669724771, |
|
"eval_loss": 0.7613060474395752, |
|
"eval_runtime": 12.922, |
|
"eval_samples_per_second": 67.482, |
|
"eval_steps_per_second": 8.435, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 3.473540782928467, |
|
"eval_runtime": 12.5608, |
|
"eval_samples_per_second": 69.423, |
|
"eval_steps_per_second": 8.678, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8309389352798462, |
|
"eval_runtime": 12.3028, |
|
"eval_samples_per_second": 70.878, |
|
"eval_steps_per_second": 8.86, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.022279368095973393, |
|
"loss": 1.9778, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.345905303955078, |
|
"eval_runtime": 12.6993, |
|
"eval_samples_per_second": 68.665, |
|
"eval_steps_per_second": 8.583, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.992404818534851, |
|
"eval_runtime": 11.8953, |
|
"eval_samples_per_second": 73.306, |
|
"eval_steps_per_second": 9.163, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.02168547333412519, |
|
"loss": 1.9132, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.48394495412844035, |
|
"eval_loss": 0.7653124928474426, |
|
"eval_runtime": 12.5926, |
|
"eval_samples_per_second": 69.247, |
|
"eval_steps_per_second": 8.656, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.6025058031082153, |
|
"eval_runtime": 12.8694, |
|
"eval_samples_per_second": 67.757, |
|
"eval_steps_per_second": 8.47, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.3198583126068115, |
|
"eval_runtime": 12.2526, |
|
"eval_samples_per_second": 71.168, |
|
"eval_steps_per_second": 8.896, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.021091578572276993, |
|
"loss": 2.1041, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.9014425277709961, |
|
"eval_runtime": 12.1923, |
|
"eval_samples_per_second": 71.52, |
|
"eval_steps_per_second": 8.94, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.1414848566055298, |
|
"eval_runtime": 12.2534, |
|
"eval_samples_per_second": 71.164, |
|
"eval_steps_per_second": 8.896, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.020497683810428793, |
|
"loss": 2.2236, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.9049626588821411, |
|
"eval_runtime": 12.5976, |
|
"eval_samples_per_second": 69.219, |
|
"eval_steps_per_second": 8.652, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.53812575340271, |
|
"eval_runtime": 12.192, |
|
"eval_samples_per_second": 71.522, |
|
"eval_steps_per_second": 8.94, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 3.9090523719787598, |
|
"eval_runtime": 12.1436, |
|
"eval_samples_per_second": 71.807, |
|
"eval_steps_per_second": 8.976, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.01990378904858059, |
|
"loss": 1.9257, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.44380733944954126, |
|
"eval_loss": 1.3826260566711426, |
|
"eval_runtime": 12.394, |
|
"eval_samples_per_second": 70.357, |
|
"eval_steps_per_second": 8.795, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7106401920318604, |
|
"eval_runtime": 12.254, |
|
"eval_samples_per_second": 71.161, |
|
"eval_steps_per_second": 8.895, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.019309894286732392, |
|
"loss": 1.9533, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.6487476825714111, |
|
"eval_runtime": 13.5392, |
|
"eval_samples_per_second": 64.406, |
|
"eval_steps_per_second": 8.051, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.4527719020843506, |
|
"eval_runtime": 12.2271, |
|
"eval_samples_per_second": 71.317, |
|
"eval_steps_per_second": 8.915, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.1699163913726807, |
|
"eval_runtime": 12.1388, |
|
"eval_samples_per_second": 71.836, |
|
"eval_steps_per_second": 8.979, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.01871599952488419, |
|
"loss": 1.7969, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.155882716178894, |
|
"eval_runtime": 12.1707, |
|
"eval_samples_per_second": 71.648, |
|
"eval_steps_per_second": 8.956, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.0576764345169067, |
|
"eval_runtime": 12.3024, |
|
"eval_samples_per_second": 70.88, |
|
"eval_steps_per_second": 8.86, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.01812210476303599, |
|
"loss": 1.8048, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.1585994958877563, |
|
"eval_runtime": 12.8412, |
|
"eval_samples_per_second": 67.906, |
|
"eval_steps_per_second": 8.488, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7068227529525757, |
|
"eval_runtime": 12.4262, |
|
"eval_samples_per_second": 70.174, |
|
"eval_steps_per_second": 8.772, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7638933658599854, |
|
"eval_runtime": 12.0016, |
|
"eval_samples_per_second": 72.657, |
|
"eval_steps_per_second": 9.082, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.01752821000118779, |
|
"loss": 2.0729, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.188310146331787, |
|
"eval_runtime": 13.0443, |
|
"eval_samples_per_second": 66.849, |
|
"eval_steps_per_second": 8.356, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.6973594427108765, |
|
"eval_runtime": 11.5707, |
|
"eval_samples_per_second": 75.363, |
|
"eval_steps_per_second": 9.42, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.016934315239339588, |
|
"loss": 1.7558, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.8548436164855957, |
|
"eval_runtime": 13.0206, |
|
"eval_samples_per_second": 66.971, |
|
"eval_steps_per_second": 8.371, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7331790924072266, |
|
"eval_runtime": 12.107, |
|
"eval_samples_per_second": 72.024, |
|
"eval_steps_per_second": 9.003, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.7065454721450806, |
|
"eval_runtime": 12.0677, |
|
"eval_samples_per_second": 72.259, |
|
"eval_steps_per_second": 9.032, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.016340420477491388, |
|
"loss": 1.931, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 3.5782742500305176, |
|
"eval_runtime": 12.3183, |
|
"eval_samples_per_second": 70.789, |
|
"eval_steps_per_second": 8.849, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.2677643299102783, |
|
"eval_runtime": 11.6938, |
|
"eval_samples_per_second": 74.569, |
|
"eval_steps_per_second": 9.321, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.015746525715643188, |
|
"loss": 1.739, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.367167353630066, |
|
"eval_runtime": 12.6396, |
|
"eval_samples_per_second": 68.99, |
|
"eval_steps_per_second": 8.624, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.4737409353256226, |
|
"eval_runtime": 12.3657, |
|
"eval_samples_per_second": 70.517, |
|
"eval_steps_per_second": 8.815, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8329254984855652, |
|
"eval_runtime": 12.1625, |
|
"eval_samples_per_second": 71.696, |
|
"eval_steps_per_second": 8.962, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.015152630953794988, |
|
"loss": 1.655, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.237131118774414, |
|
"eval_runtime": 12.2197, |
|
"eval_samples_per_second": 71.36, |
|
"eval_steps_per_second": 8.92, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 3.846944570541382, |
|
"eval_runtime": 12.0155, |
|
"eval_samples_per_second": 72.573, |
|
"eval_steps_per_second": 9.072, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.014558736191946788, |
|
"loss": 1.7284, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.403191328048706, |
|
"eval_runtime": 12.6645, |
|
"eval_samples_per_second": 68.854, |
|
"eval_steps_per_second": 8.607, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1265949010849, |
|
"eval_runtime": 12.3046, |
|
"eval_samples_per_second": 70.868, |
|
"eval_steps_per_second": 8.859, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.9130467176437378, |
|
"eval_runtime": 12.0511, |
|
"eval_samples_per_second": 72.359, |
|
"eval_steps_per_second": 9.045, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.013964841430098586, |
|
"loss": 1.5742, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.832761824131012, |
|
"eval_runtime": 12.2374, |
|
"eval_samples_per_second": 71.257, |
|
"eval_steps_per_second": 8.907, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 3.8501062393188477, |
|
"eval_runtime": 12.573, |
|
"eval_samples_per_second": 69.355, |
|
"eval_steps_per_second": 8.669, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.013370946668250385, |
|
"loss": 1.7039, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.2914493083953857, |
|
"eval_runtime": 12.0326, |
|
"eval_samples_per_second": 72.47, |
|
"eval_steps_per_second": 9.059, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.136923909187317, |
|
"eval_runtime": 11.7256, |
|
"eval_samples_per_second": 74.367, |
|
"eval_steps_per_second": 9.296, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7574475407600403, |
|
"eval_runtime": 12.2584, |
|
"eval_samples_per_second": 71.135, |
|
"eval_steps_per_second": 8.892, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.012777051906402184, |
|
"loss": 1.4352, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7623356580734253, |
|
"eval_runtime": 12.706, |
|
"eval_samples_per_second": 68.629, |
|
"eval_steps_per_second": 8.579, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.6579828262329102, |
|
"eval_runtime": 12.16, |
|
"eval_samples_per_second": 71.71, |
|
"eval_steps_per_second": 8.964, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.012183157144553985, |
|
"loss": 1.6328, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.481651376146789, |
|
"eval_loss": 0.693511426448822, |
|
"eval_runtime": 12.7779, |
|
"eval_samples_per_second": 68.243, |
|
"eval_steps_per_second": 8.53, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.6989684700965881, |
|
"eval_runtime": 12.0393, |
|
"eval_samples_per_second": 72.43, |
|
"eval_steps_per_second": 9.054, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7303033471107483, |
|
"eval_runtime": 12.3696, |
|
"eval_samples_per_second": 70.496, |
|
"eval_steps_per_second": 8.812, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.011589262382705785, |
|
"loss": 1.4498, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.675624132156372, |
|
"eval_runtime": 12.6317, |
|
"eval_samples_per_second": 69.032, |
|
"eval_steps_per_second": 8.629, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.0083491802215576, |
|
"eval_runtime": 12.349, |
|
"eval_samples_per_second": 70.613, |
|
"eval_steps_per_second": 8.827, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.010995367620857583, |
|
"loss": 1.4022, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7790195345878601, |
|
"eval_runtime": 11.9659, |
|
"eval_samples_per_second": 72.874, |
|
"eval_steps_per_second": 9.109, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.296112060546875, |
|
"eval_runtime": 13.0796, |
|
"eval_samples_per_second": 66.669, |
|
"eval_steps_per_second": 8.334, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7208316326141357, |
|
"eval_runtime": 12.4652, |
|
"eval_samples_per_second": 69.955, |
|
"eval_steps_per_second": 8.744, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.010401472859009383, |
|
"loss": 1.4503, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8011333346366882, |
|
"eval_runtime": 12.77, |
|
"eval_samples_per_second": 68.285, |
|
"eval_steps_per_second": 8.536, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8194194436073303, |
|
"eval_runtime": 12.2506, |
|
"eval_samples_per_second": 71.18, |
|
"eval_steps_per_second": 8.898, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.009807578097161183, |
|
"loss": 1.3401, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.9209619760513306, |
|
"eval_runtime": 12.7245, |
|
"eval_samples_per_second": 68.529, |
|
"eval_steps_per_second": 8.566, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.4954128440366973, |
|
"eval_loss": 2.172947645187378, |
|
"eval_runtime": 13.3907, |
|
"eval_samples_per_second": 65.12, |
|
"eval_steps_per_second": 8.14, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.8217873573303223, |
|
"eval_runtime": 12.0414, |
|
"eval_samples_per_second": 72.417, |
|
"eval_steps_per_second": 9.052, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.009213683335312983, |
|
"loss": 1.515, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.8880350589752197, |
|
"eval_runtime": 12.7892, |
|
"eval_samples_per_second": 68.183, |
|
"eval_steps_per_second": 8.523, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8059159517288208, |
|
"eval_runtime": 12.8563, |
|
"eval_samples_per_second": 67.827, |
|
"eval_steps_per_second": 8.478, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.008619788573464782, |
|
"loss": 1.2519, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1668144464492798, |
|
"eval_runtime": 12.0845, |
|
"eval_samples_per_second": 72.158, |
|
"eval_steps_per_second": 9.02, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.8440486192703247, |
|
"eval_runtime": 11.5453, |
|
"eval_samples_per_second": 75.529, |
|
"eval_steps_per_second": 9.441, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.482177972793579, |
|
"eval_runtime": 11.5533, |
|
"eval_samples_per_second": 75.476, |
|
"eval_steps_per_second": 9.435, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.008025893811616582, |
|
"loss": 1.2221, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.6977333426475525, |
|
"eval_runtime": 12.3902, |
|
"eval_samples_per_second": 70.378, |
|
"eval_steps_per_second": 8.797, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.3418121337890625, |
|
"eval_runtime": 12.4195, |
|
"eval_samples_per_second": 70.212, |
|
"eval_steps_per_second": 8.776, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00743199904976838, |
|
"loss": 1.1201, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7915144562721252, |
|
"eval_runtime": 12.717, |
|
"eval_samples_per_second": 68.57, |
|
"eval_steps_per_second": 8.571, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.961918830871582, |
|
"eval_runtime": 13.173, |
|
"eval_samples_per_second": 66.196, |
|
"eval_steps_per_second": 8.275, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.220428228378296, |
|
"eval_runtime": 12.2958, |
|
"eval_samples_per_second": 70.918, |
|
"eval_steps_per_second": 8.865, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.006838104287920181, |
|
"loss": 1.0869, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.6541168689727783, |
|
"eval_runtime": 12.5954, |
|
"eval_samples_per_second": 69.232, |
|
"eval_steps_per_second": 8.654, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.7402684688568115, |
|
"eval_runtime": 12.7516, |
|
"eval_samples_per_second": 68.383, |
|
"eval_steps_per_second": 8.548, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00624420952607198, |
|
"loss": 1.0804, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.2037415504455566, |
|
"eval_runtime": 13.138, |
|
"eval_samples_per_second": 66.372, |
|
"eval_steps_per_second": 8.297, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7337152361869812, |
|
"eval_runtime": 11.6201, |
|
"eval_samples_per_second": 75.043, |
|
"eval_steps_per_second": 9.38, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.8853695392608643, |
|
"eval_runtime": 13.0127, |
|
"eval_samples_per_second": 67.012, |
|
"eval_steps_per_second": 8.376, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.005650314764223779, |
|
"loss": 1.0025, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7113233804702759, |
|
"eval_runtime": 12.2168, |
|
"eval_samples_per_second": 71.377, |
|
"eval_steps_per_second": 8.922, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.0583016872406006, |
|
"eval_runtime": 12.0017, |
|
"eval_samples_per_second": 72.657, |
|
"eval_steps_per_second": 9.082, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.005056420002375579, |
|
"loss": 0.9856, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7113476395606995, |
|
"eval_runtime": 12.9697, |
|
"eval_samples_per_second": 67.234, |
|
"eval_steps_per_second": 8.404, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7482680678367615, |
|
"eval_runtime": 12.7958, |
|
"eval_samples_per_second": 68.147, |
|
"eval_steps_per_second": 8.518, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.6966097950935364, |
|
"eval_runtime": 12.2964, |
|
"eval_samples_per_second": 70.915, |
|
"eval_steps_per_second": 8.864, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.004462525240527379, |
|
"loss": 1.0364, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.0607110261917114, |
|
"eval_runtime": 12.067, |
|
"eval_samples_per_second": 72.263, |
|
"eval_steps_per_second": 9.033, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1381345987319946, |
|
"eval_runtime": 12.2151, |
|
"eval_samples_per_second": 71.387, |
|
"eval_steps_per_second": 8.923, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.003868630478679178, |
|
"loss": 0.9683, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7138826847076416, |
|
"eval_runtime": 12.7162, |
|
"eval_samples_per_second": 68.574, |
|
"eval_steps_per_second": 8.572, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.6361145973205566, |
|
"eval_runtime": 11.8698, |
|
"eval_samples_per_second": 73.464, |
|
"eval_steps_per_second": 9.183, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1421782970428467, |
|
"eval_runtime": 11.321, |
|
"eval_samples_per_second": 77.025, |
|
"eval_steps_per_second": 9.628, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0032747357168309774, |
|
"loss": 0.908, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7317955493927002, |
|
"eval_runtime": 12.2644, |
|
"eval_samples_per_second": 71.1, |
|
"eval_steps_per_second": 8.888, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.6962340474128723, |
|
"eval_runtime": 11.822, |
|
"eval_samples_per_second": 73.761, |
|
"eval_steps_per_second": 9.22, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0026808409549827768, |
|
"loss": 0.8761, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.9568504691123962, |
|
"eval_runtime": 12.3037, |
|
"eval_samples_per_second": 70.873, |
|
"eval_steps_per_second": 8.859, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.9744265675544739, |
|
"eval_runtime": 11.9452, |
|
"eval_samples_per_second": 73.0, |
|
"eval_steps_per_second": 9.125, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.091610312461853, |
|
"eval_runtime": 12.8562, |
|
"eval_samples_per_second": 67.827, |
|
"eval_steps_per_second": 8.478, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0020869461931345766, |
|
"loss": 0.8209, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8106526136398315, |
|
"eval_runtime": 11.7827, |
|
"eval_samples_per_second": 74.007, |
|
"eval_steps_per_second": 9.251, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7210954427719116, |
|
"eval_runtime": 11.3303, |
|
"eval_samples_per_second": 76.962, |
|
"eval_steps_per_second": 9.62, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.001493051431286376, |
|
"loss": 0.8008, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.49770642201834864, |
|
"eval_loss": 0.6930689811706543, |
|
"eval_runtime": 12.4553, |
|
"eval_samples_per_second": 70.01, |
|
"eval_steps_per_second": 8.751, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7184925675392151, |
|
"eval_runtime": 12.3113, |
|
"eval_samples_per_second": 70.829, |
|
"eval_steps_per_second": 8.854, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.729166567325592, |
|
"eval_runtime": 11.2952, |
|
"eval_samples_per_second": 77.201, |
|
"eval_steps_per_second": 9.65, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0008991566694381756, |
|
"loss": 0.7738, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7178325057029724, |
|
"eval_runtime": 12.4374, |
|
"eval_samples_per_second": 70.111, |
|
"eval_steps_per_second": 8.764, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.6961101293563843, |
|
"eval_runtime": 12.5193, |
|
"eval_samples_per_second": 69.652, |
|
"eval_steps_per_second": 8.707, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00030526190758997505, |
|
"loss": 0.755, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7101095914840698, |
|
"eval_runtime": 12.5702, |
|
"eval_samples_per_second": 69.371, |
|
"eval_steps_per_second": 8.671, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7049440145492554, |
|
"eval_runtime": 11.2857, |
|
"eval_samples_per_second": 77.266, |
|
"eval_steps_per_second": 9.658, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 25257, |
|
"total_flos": 1.070008779996841e+17, |
|
"train_loss": 1.708119561667774, |
|
"train_runtime": 7791.0638, |
|
"train_samples_per_second": 25.933, |
|
"train_steps_per_second": 3.242 |
|
} |
|
], |
|
"max_steps": 25257, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.070008779996841e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|