|
{ |
|
"best_metric": 0.0358840748667717, |
|
"best_model_checkpoint": "man_woman_face_image_detection/checkpoint-9945", |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 9945, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7541478129713424, |
|
"grad_norm": 0.12809374928474426, |
|
"learning_rate": 9.54522486104093e-07, |
|
"loss": 0.0535, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9918687690023333, |
|
"eval_loss": 0.03707238286733627, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 139.741, |
|
"eval_samples_per_second": 101.209, |
|
"eval_steps_per_second": 12.652, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.5082956259426847, |
|
"grad_norm": 0.07325731962919235, |
|
"learning_rate": 9.039919151086407e-07, |
|
"loss": 0.0514, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9917273562893304, |
|
"eval_loss": 0.037097617983818054, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 145.1641, |
|
"eval_samples_per_second": 97.428, |
|
"eval_steps_per_second": 12.179, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 2.262443438914027, |
|
"grad_norm": 0.12023092061281204, |
|
"learning_rate": 8.534613441131885e-07, |
|
"loss": 0.0536, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9915859435763276, |
|
"eval_loss": 0.03732353821396828, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 142.5995, |
|
"eval_samples_per_second": 99.18, |
|
"eval_steps_per_second": 12.398, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 3.0165912518853695, |
|
"grad_norm": 0.08819910883903503, |
|
"learning_rate": 8.029307731177362e-07, |
|
"loss": 0.0458, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.770739064856712, |
|
"grad_norm": 1.7117729187011719, |
|
"learning_rate": 7.524002021222839e-07, |
|
"loss": 0.0434, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9917980626458318, |
|
"eval_loss": 0.03692413866519928, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 137.5145, |
|
"eval_samples_per_second": 102.847, |
|
"eval_steps_per_second": 12.857, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 4.524886877828054, |
|
"grad_norm": 0.07585655897855759, |
|
"learning_rate": 7.018696311268317e-07, |
|
"loss": 0.0441, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9917980626458318, |
|
"eval_loss": 0.03679042309522629, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 136.8062, |
|
"eval_samples_per_second": 103.38, |
|
"eval_steps_per_second": 12.923, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 5.279034690799397, |
|
"grad_norm": 0.27443283796310425, |
|
"learning_rate": 6.513390601313794e-07, |
|
"loss": 0.0438, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9915859435763276, |
|
"eval_loss": 0.03690684959292412, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 136.7631, |
|
"eval_samples_per_second": 103.412, |
|
"eval_steps_per_second": 12.927, |
|
"step": 3978 |
|
}, |
|
{ |
|
"epoch": 6.033182503770739, |
|
"grad_norm": 0.3576681315898895, |
|
"learning_rate": 6.008084891359272e-07, |
|
"loss": 0.0451, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.787330316742081, |
|
"grad_norm": 0.7324354648590088, |
|
"learning_rate": 5.50277918140475e-07, |
|
"loss": 0.0428, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9915859435763276, |
|
"eval_loss": 0.036498043686151505, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 136.3477, |
|
"eval_samples_per_second": 103.727, |
|
"eval_steps_per_second": 12.967, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 7.541478129713424, |
|
"grad_norm": 10.407703399658203, |
|
"learning_rate": 4.997473471450227e-07, |
|
"loss": 0.0377, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9915859435763276, |
|
"eval_loss": 0.03636253997683525, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 136.8932, |
|
"eval_samples_per_second": 103.314, |
|
"eval_steps_per_second": 12.915, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 8.295625942684767, |
|
"grad_norm": 0.776454508304596, |
|
"learning_rate": 4.492167761495705e-07, |
|
"loss": 0.0415, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9917273562893304, |
|
"eval_loss": 0.036238111555576324, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 140.2201, |
|
"eval_samples_per_second": 100.863, |
|
"eval_steps_per_second": 12.609, |
|
"step": 5967 |
|
}, |
|
{ |
|
"epoch": 9.049773755656108, |
|
"grad_norm": 0.08934183418750763, |
|
"learning_rate": 3.986862051541182e-07, |
|
"loss": 0.0377, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.803921568627452, |
|
"grad_norm": 0.10604743659496307, |
|
"learning_rate": 3.48155634158666e-07, |
|
"loss": 0.0397, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9920101817153362, |
|
"eval_loss": 0.036092888563871384, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 138.0603, |
|
"eval_samples_per_second": 102.441, |
|
"eval_steps_per_second": 12.806, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 10.558069381598793, |
|
"grad_norm": 0.11123999208211899, |
|
"learning_rate": 2.976250631632137e-07, |
|
"loss": 0.0389, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9918687690023333, |
|
"eval_loss": 0.035989925265312195, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 137.8038, |
|
"eval_samples_per_second": 102.631, |
|
"eval_steps_per_second": 12.83, |
|
"step": 7293 |
|
}, |
|
{ |
|
"epoch": 11.312217194570136, |
|
"grad_norm": 2.75396990776062, |
|
"learning_rate": 2.470944921677615e-07, |
|
"loss": 0.0397, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9919394753588348, |
|
"eval_loss": 0.03610728308558464, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 135.6518, |
|
"eval_samples_per_second": 104.26, |
|
"eval_steps_per_second": 13.033, |
|
"step": 7956 |
|
}, |
|
{ |
|
"epoch": 12.066365007541478, |
|
"grad_norm": 0.13482791185379028, |
|
"learning_rate": 1.9656392117230926e-07, |
|
"loss": 0.0338, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.09956225007772446, |
|
"learning_rate": 1.46033350176857e-07, |
|
"loss": 0.0387, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9917980626458318, |
|
"eval_loss": 0.036081310361623764, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 135.9098, |
|
"eval_samples_per_second": 104.062, |
|
"eval_steps_per_second": 13.009, |
|
"step": 8619 |
|
}, |
|
{ |
|
"epoch": 13.574660633484163, |
|
"grad_norm": 2.5569167137145996, |
|
"learning_rate": 9.550277918140474e-08, |
|
"loss": 0.0308, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9920101817153362, |
|
"eval_loss": 0.03591620549559593, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 138.98, |
|
"eval_samples_per_second": 101.763, |
|
"eval_steps_per_second": 12.721, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 14.328808446455506, |
|
"grad_norm": 0.17602261900901794, |
|
"learning_rate": 4.49722081859525e-08, |
|
"loss": 0.0355, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9920101817153362, |
|
"eval_loss": 0.0358840748667717, |
|
"eval_model_preparation_time": 0.0055, |
|
"eval_runtime": 140.3453, |
|
"eval_samples_per_second": 100.773, |
|
"eval_steps_per_second": 12.597, |
|
"step": 9945 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9945, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.46575636354151e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|