|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-hasta-75-fold1/checkpoint-17", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4166666666666667, |
|
"eval_loss": 1.0838725566864014, |
|
"eval_runtime": 0.1691, |
|
"eval_samples_per_second": 70.976, |
|
"eval_steps_per_second": 5.915, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.8911231160163879, |
|
"eval_runtime": 0.1696, |
|
"eval_samples_per_second": 70.757, |
|
"eval_steps_per_second": 5.896, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5836748480796814, |
|
"eval_runtime": 0.1676, |
|
"eval_samples_per_second": 71.579, |
|
"eval_steps_per_second": 5.965, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.34806403517723083, |
|
"eval_runtime": 0.1645, |
|
"eval_samples_per_second": 72.934, |
|
"eval_steps_per_second": 6.078, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2815200686454773, |
|
"eval_runtime": 0.164, |
|
"eval_samples_per_second": 73.162, |
|
"eval_steps_per_second": 6.097, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.28391170501708984, |
|
"eval_runtime": 0.1402, |
|
"eval_samples_per_second": 85.569, |
|
"eval_steps_per_second": 7.131, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2837619185447693, |
|
"eval_runtime": 0.1695, |
|
"eval_samples_per_second": 70.809, |
|
"eval_steps_per_second": 5.901, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.26923567056655884, |
|
"eval_runtime": 0.1682, |
|
"eval_samples_per_second": 71.361, |
|
"eval_steps_per_second": 5.947, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.27014538645744324, |
|
"eval_runtime": 0.159, |
|
"eval_samples_per_second": 75.493, |
|
"eval_steps_per_second": 6.291, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.2129414081573486, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3107, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.33633777499198914, |
|
"eval_runtime": 0.1692, |
|
"eval_samples_per_second": 70.904, |
|
"eval_steps_per_second": 5.909, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.38160809874534607, |
|
"eval_runtime": 0.1738, |
|
"eval_samples_per_second": 69.048, |
|
"eval_steps_per_second": 5.754, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3427475392818451, |
|
"eval_runtime": 0.1732, |
|
"eval_samples_per_second": 69.302, |
|
"eval_steps_per_second": 5.775, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.27277496457099915, |
|
"eval_runtime": 0.1761, |
|
"eval_samples_per_second": 68.13, |
|
"eval_steps_per_second": 5.677, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.22727306187152863, |
|
"eval_runtime": 0.1733, |
|
"eval_samples_per_second": 69.232, |
|
"eval_steps_per_second": 5.769, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.20517520606517792, |
|
"eval_runtime": 0.1738, |
|
"eval_samples_per_second": 69.061, |
|
"eval_steps_per_second": 5.755, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.18395300209522247, |
|
"eval_runtime": 0.1698, |
|
"eval_samples_per_second": 70.673, |
|
"eval_steps_per_second": 5.889, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.19073201715946198, |
|
"eval_runtime": 0.172, |
|
"eval_samples_per_second": 69.748, |
|
"eval_steps_per_second": 5.812, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.17612700164318085, |
|
"eval_runtime": 0.1704, |
|
"eval_samples_per_second": 70.428, |
|
"eval_steps_per_second": 5.869, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.13020874559879303, |
|
"eval_runtime": 0.1765, |
|
"eval_samples_per_second": 67.981, |
|
"eval_steps_per_second": 5.665, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.7908862829208374, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.1503, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.09369927644729614, |
|
"eval_runtime": 0.1701, |
|
"eval_samples_per_second": 70.561, |
|
"eval_steps_per_second": 5.88, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.07672286778688431, |
|
"eval_runtime": 0.1649, |
|
"eval_samples_per_second": 72.764, |
|
"eval_steps_per_second": 6.064, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0641578882932663, |
|
"eval_runtime": 0.1749, |
|
"eval_samples_per_second": 68.596, |
|
"eval_steps_per_second": 5.716, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0730399563908577, |
|
"eval_runtime": 0.1666, |
|
"eval_samples_per_second": 72.022, |
|
"eval_steps_per_second": 6.002, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0937485471367836, |
|
"eval_runtime": 0.1714, |
|
"eval_samples_per_second": 70.03, |
|
"eval_steps_per_second": 5.836, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.07131379097700119, |
|
"eval_runtime": 0.1686, |
|
"eval_samples_per_second": 71.16, |
|
"eval_steps_per_second": 5.93, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0475582480430603, |
|
"eval_runtime": 0.1744, |
|
"eval_samples_per_second": 68.813, |
|
"eval_steps_per_second": 5.734, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04617106541991234, |
|
"eval_runtime": 0.1731, |
|
"eval_samples_per_second": 69.315, |
|
"eval_steps_per_second": 5.776, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05526432394981384, |
|
"eval_runtime": 0.1758, |
|
"eval_samples_per_second": 68.273, |
|
"eval_steps_per_second": 5.689, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.06893330812454224, |
|
"eval_runtime": 0.1778, |
|
"eval_samples_per_second": 67.474, |
|
"eval_steps_per_second": 5.623, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 1.3284376859664917, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.068, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.06763923913240433, |
|
"eval_runtime": 0.1692, |
|
"eval_samples_per_second": 70.913, |
|
"eval_steps_per_second": 5.909, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05344026908278465, |
|
"eval_runtime": 0.1748, |
|
"eval_samples_per_second": 68.65, |
|
"eval_steps_per_second": 5.721, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04723837599158287, |
|
"eval_runtime": 0.1773, |
|
"eval_samples_per_second": 67.677, |
|
"eval_steps_per_second": 5.64, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.057503703981637955, |
|
"eval_runtime": 0.171, |
|
"eval_samples_per_second": 70.184, |
|
"eval_steps_per_second": 5.849, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.061442967504262924, |
|
"eval_runtime": 0.1735, |
|
"eval_samples_per_second": 69.145, |
|
"eval_steps_per_second": 5.762, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05957895517349243, |
|
"eval_runtime": 0.1743, |
|
"eval_samples_per_second": 68.863, |
|
"eval_steps_per_second": 5.739, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0503091923892498, |
|
"eval_runtime": 0.1743, |
|
"eval_samples_per_second": 68.828, |
|
"eval_steps_per_second": 5.736, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05713028088212013, |
|
"eval_runtime": 0.1741, |
|
"eval_samples_per_second": 68.934, |
|
"eval_steps_per_second": 5.745, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.06938361376523972, |
|
"eval_runtime": 0.1755, |
|
"eval_samples_per_second": 68.376, |
|
"eval_steps_per_second": 5.698, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.0869433656334877, |
|
"eval_runtime": 0.1709, |
|
"eval_samples_per_second": 70.212, |
|
"eval_steps_per_second": 5.851, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 1.0563879013061523, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0416, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.10385680943727493, |
|
"eval_runtime": 0.1741, |
|
"eval_samples_per_second": 68.935, |
|
"eval_steps_per_second": 5.745, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.11296293884515762, |
|
"eval_runtime": 0.1682, |
|
"eval_samples_per_second": 71.335, |
|
"eval_steps_per_second": 5.945, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1127309575676918, |
|
"eval_runtime": 0.1673, |
|
"eval_samples_per_second": 71.714, |
|
"eval_steps_per_second": 5.976, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1028260588645935, |
|
"eval_runtime": 0.1773, |
|
"eval_samples_per_second": 67.664, |
|
"eval_steps_per_second": 5.639, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08404495567083359, |
|
"eval_runtime": 0.1803, |
|
"eval_samples_per_second": 66.548, |
|
"eval_steps_per_second": 5.546, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.07033081352710724, |
|
"eval_runtime": 0.1683, |
|
"eval_samples_per_second": 71.306, |
|
"eval_steps_per_second": 5.942, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.056403230875730515, |
|
"eval_runtime": 0.1695, |
|
"eval_samples_per_second": 70.785, |
|
"eval_steps_per_second": 5.899, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05716688930988312, |
|
"eval_runtime": 0.1751, |
|
"eval_samples_per_second": 68.533, |
|
"eval_steps_per_second": 5.711, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.06894658505916595, |
|
"eval_runtime": 0.1733, |
|
"eval_samples_per_second": 69.246, |
|
"eval_steps_per_second": 5.77, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.09668171405792236, |
|
"eval_runtime": 0.1834, |
|
"eval_samples_per_second": 65.426, |
|
"eval_steps_per_second": 5.452, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 1.5291131734848022, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.048, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.14398609101772308, |
|
"eval_runtime": 0.163, |
|
"eval_samples_per_second": 73.617, |
|
"eval_steps_per_second": 6.135, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.16672177612781525, |
|
"eval_runtime": 0.1711, |
|
"eval_samples_per_second": 70.115, |
|
"eval_steps_per_second": 5.843, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1823185831308365, |
|
"eval_runtime": 0.1741, |
|
"eval_samples_per_second": 68.928, |
|
"eval_steps_per_second": 5.744, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.17343102395534515, |
|
"eval_runtime": 0.1673, |
|
"eval_samples_per_second": 71.742, |
|
"eval_steps_per_second": 5.979, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.154280886054039, |
|
"eval_runtime": 0.1801, |
|
"eval_samples_per_second": 66.619, |
|
"eval_steps_per_second": 5.552, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.13834944367408752, |
|
"eval_runtime": 0.1765, |
|
"eval_samples_per_second": 68.007, |
|
"eval_steps_per_second": 5.667, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.12664343416690826, |
|
"eval_runtime": 0.1668, |
|
"eval_samples_per_second": 71.951, |
|
"eval_steps_per_second": 5.996, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.10125861316919327, |
|
"eval_runtime": 0.1697, |
|
"eval_samples_per_second": 70.734, |
|
"eval_steps_per_second": 5.894, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08296892791986465, |
|
"eval_runtime": 0.1741, |
|
"eval_samples_per_second": 68.927, |
|
"eval_steps_per_second": 5.744, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.07803661376237869, |
|
"eval_runtime": 0.1724, |
|
"eval_samples_per_second": 69.607, |
|
"eval_steps_per_second": 5.801, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 0.15691009163856506, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0118, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.07563833892345428, |
|
"eval_runtime": 0.1682, |
|
"eval_samples_per_second": 71.331, |
|
"eval_steps_per_second": 5.944, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.07225405424833298, |
|
"eval_runtime": 0.1725, |
|
"eval_samples_per_second": 69.554, |
|
"eval_steps_per_second": 5.796, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.056341033428907394, |
|
"eval_runtime": 0.1665, |
|
"eval_samples_per_second": 72.093, |
|
"eval_steps_per_second": 6.008, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04702509567141533, |
|
"eval_runtime": 0.1841, |
|
"eval_samples_per_second": 65.192, |
|
"eval_steps_per_second": 5.433, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.046886246651411057, |
|
"eval_runtime": 0.1732, |
|
"eval_samples_per_second": 69.274, |
|
"eval_steps_per_second": 5.773, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.052169691771268845, |
|
"eval_runtime": 0.1736, |
|
"eval_samples_per_second": 69.127, |
|
"eval_steps_per_second": 5.761, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05759477615356445, |
|
"eval_runtime": 0.1746, |
|
"eval_samples_per_second": 68.735, |
|
"eval_steps_per_second": 5.728, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.06394201517105103, |
|
"eval_runtime": 0.1687, |
|
"eval_samples_per_second": 71.123, |
|
"eval_steps_per_second": 5.927, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08266932517290115, |
|
"eval_runtime": 0.1723, |
|
"eval_samples_per_second": 69.652, |
|
"eval_steps_per_second": 5.804, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.10889662057161331, |
|
"eval_runtime": 0.1757, |
|
"eval_samples_per_second": 68.286, |
|
"eval_steps_per_second": 5.691, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 0.38948971033096313, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0271, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.13427671790122986, |
|
"eval_runtime": 0.171, |
|
"eval_samples_per_second": 70.179, |
|
"eval_steps_per_second": 5.848, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.15138830244541168, |
|
"eval_runtime": 0.1681, |
|
"eval_samples_per_second": 71.38, |
|
"eval_steps_per_second": 5.948, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.15523149073123932, |
|
"eval_runtime": 0.1734, |
|
"eval_samples_per_second": 69.188, |
|
"eval_steps_per_second": 5.766, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1500328928232193, |
|
"eval_runtime": 0.1728, |
|
"eval_samples_per_second": 69.463, |
|
"eval_steps_per_second": 5.789, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.13924731314182281, |
|
"eval_runtime": 0.1749, |
|
"eval_samples_per_second": 68.628, |
|
"eval_steps_per_second": 5.719, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.12288343161344528, |
|
"eval_runtime": 0.1757, |
|
"eval_samples_per_second": 68.307, |
|
"eval_steps_per_second": 5.692, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.10091481357812881, |
|
"eval_runtime": 0.1739, |
|
"eval_samples_per_second": 69.008, |
|
"eval_steps_per_second": 5.751, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08581729978322983, |
|
"eval_runtime": 0.168, |
|
"eval_samples_per_second": 71.423, |
|
"eval_steps_per_second": 5.952, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08439955860376358, |
|
"eval_runtime": 0.1701, |
|
"eval_samples_per_second": 70.534, |
|
"eval_steps_per_second": 5.878, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08550930768251419, |
|
"eval_runtime": 0.1809, |
|
"eval_samples_per_second": 66.325, |
|
"eval_steps_per_second": 5.527, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 1.0177083015441895, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0462, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.0972227081656456, |
|
"eval_runtime": 0.1692, |
|
"eval_samples_per_second": 70.917, |
|
"eval_steps_per_second": 5.91, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.11395915597677231, |
|
"eval_runtime": 0.1682, |
|
"eval_samples_per_second": 71.336, |
|
"eval_steps_per_second": 5.945, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.13984452188014984, |
|
"eval_runtime": 0.17, |
|
"eval_samples_per_second": 70.587, |
|
"eval_steps_per_second": 5.882, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.16390566527843475, |
|
"eval_runtime": 0.1702, |
|
"eval_samples_per_second": 70.502, |
|
"eval_steps_per_second": 5.875, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.18424014747142792, |
|
"eval_runtime": 0.1721, |
|
"eval_samples_per_second": 69.737, |
|
"eval_steps_per_second": 5.811, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1938496083021164, |
|
"eval_runtime": 0.1864, |
|
"eval_samples_per_second": 64.387, |
|
"eval_steps_per_second": 5.366, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.20000122487545013, |
|
"eval_runtime": 0.1802, |
|
"eval_samples_per_second": 66.596, |
|
"eval_steps_per_second": 5.55, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.20081685483455658, |
|
"eval_runtime": 0.1707, |
|
"eval_samples_per_second": 70.294, |
|
"eval_steps_per_second": 5.858, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.19491897523403168, |
|
"eval_runtime": 0.1725, |
|
"eval_samples_per_second": 69.551, |
|
"eval_steps_per_second": 5.796, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.18955135345458984, |
|
"eval_runtime": 0.1729, |
|
"eval_samples_per_second": 69.415, |
|
"eval_steps_per_second": 5.785, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 1.028542399406433, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.022, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.17976105213165283, |
|
"eval_runtime": 0.1649, |
|
"eval_samples_per_second": 72.769, |
|
"eval_steps_per_second": 6.064, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.16998781263828278, |
|
"eval_runtime": 0.1686, |
|
"eval_samples_per_second": 71.156, |
|
"eval_steps_per_second": 5.93, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.161741703748703, |
|
"eval_runtime": 0.1757, |
|
"eval_samples_per_second": 68.313, |
|
"eval_steps_per_second": 5.693, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1549077183008194, |
|
"eval_runtime": 0.1813, |
|
"eval_samples_per_second": 66.195, |
|
"eval_steps_per_second": 5.516, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.14918744564056396, |
|
"eval_runtime": 0.1697, |
|
"eval_samples_per_second": 70.715, |
|
"eval_steps_per_second": 5.893, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.14473901689052582, |
|
"eval_runtime": 0.1757, |
|
"eval_samples_per_second": 68.292, |
|
"eval_steps_per_second": 5.691, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.14346013963222504, |
|
"eval_runtime": 0.1696, |
|
"eval_samples_per_second": 70.75, |
|
"eval_steps_per_second": 5.896, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.14305876195430756, |
|
"eval_runtime": 0.168, |
|
"eval_samples_per_second": 71.43, |
|
"eval_steps_per_second": 5.952, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1418064534664154, |
|
"eval_runtime": 0.1807, |
|
"eval_samples_per_second": 66.421, |
|
"eval_steps_per_second": 5.535, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.14111365377902985, |
|
"eval_runtime": 0.1677, |
|
"eval_samples_per_second": 71.558, |
|
"eval_steps_per_second": 5.963, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 1.2945746183395386, |
|
"learning_rate": 0.0, |
|
"loss": 0.0236, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1407879739999771, |
|
"eval_runtime": 0.1684, |
|
"eval_samples_per_second": 71.262, |
|
"eval_steps_per_second": 5.938, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 100, |
|
"total_flos": 4.804632427843584e+17, |
|
"train_loss": 0.07493457712233066, |
|
"train_runtime": 422.9864, |
|
"train_samples_per_second": 14.658, |
|
"train_steps_per_second": 0.236 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.19073201715946198, |
|
"eval_runtime": 0.2201, |
|
"eval_samples_per_second": 54.512, |
|
"eval_steps_per_second": 4.543, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.804632427843584e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|