|
{ |
|
"best_metric": 0.2892984216768543, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Coptic-Scriptorium/checkpoint-500", |
|
"epoch": 76.92307692307692, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1362, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.966442953020135e-05, |
|
"loss": 1.9133, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 4.932885906040269e-05, |
|
"loss": 1.8901, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 4.8993288590604034e-05, |
|
"loss": 1.8743, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 1.8572, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_accuracy": 0.2892984216768543, |
|
"eval_loss": 1.9470784664154053, |
|
"eval_runtime": 1.9235, |
|
"eval_samples_per_second": 198.078, |
|
"eval_steps_per_second": 24.955, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 4.832214765100672e-05, |
|
"loss": 1.8448, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 4.798657718120805e-05, |
|
"loss": 1.8263, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 4.76510067114094e-05, |
|
"loss": 1.7988, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 4.731543624161074e-05, |
|
"loss": 1.7733, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 1.7321, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"eval_accuracy": 0.27597846911778123, |
|
"eval_loss": 2.090031147003174, |
|
"eval_runtime": 1.9389, |
|
"eval_samples_per_second": 196.498, |
|
"eval_steps_per_second": 24.756, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 4.664429530201342e-05, |
|
"loss": 1.6957, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 4.630872483221477e-05, |
|
"loss": 1.6365, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 4.597315436241611e-05, |
|
"loss": 1.5917, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"learning_rate": 4.5637583892617453e-05, |
|
"loss": 1.5378, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 1.4759, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_accuracy": 0.2506158197244777, |
|
"eval_loss": 2.431821584701538, |
|
"eval_runtime": 1.933, |
|
"eval_samples_per_second": 197.107, |
|
"eval_steps_per_second": 24.832, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 41.03, |
|
"learning_rate": 4.496644295302014e-05, |
|
"loss": 1.4153, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 43.59, |
|
"learning_rate": 4.463087248322148e-05, |
|
"loss": 1.354, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 4.4295302013422824e-05, |
|
"loss": 1.2879, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 48.72, |
|
"learning_rate": 4.395973154362416e-05, |
|
"loss": 1.2279, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 1.1962, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"eval_accuracy": 0.2390292856491196, |
|
"eval_loss": 2.9406466484069824, |
|
"eval_runtime": 1.9342, |
|
"eval_samples_per_second": 196.977, |
|
"eval_steps_per_second": 24.816, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 53.85, |
|
"learning_rate": 4.328859060402685e-05, |
|
"loss": 1.1217, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 56.41, |
|
"learning_rate": 4.295302013422819e-05, |
|
"loss": 1.0833, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"learning_rate": 4.2617449664429534e-05, |
|
"loss": 1.0293, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 4.228187919463087e-05, |
|
"loss": 0.9821, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.9384, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"eval_accuracy": 0.22935863516102545, |
|
"eval_loss": 3.6179699897766113, |
|
"eval_runtime": 1.942, |
|
"eval_samples_per_second": 196.187, |
|
"eval_steps_per_second": 24.716, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 4.161073825503356e-05, |
|
"loss": 0.8994, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 4.1275167785234905e-05, |
|
"loss": 0.8775, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 71.79, |
|
"learning_rate": 4.0942953020134226e-05, |
|
"loss": 0.8304, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 74.36, |
|
"learning_rate": 4.060738255033557e-05, |
|
"loss": 0.8287, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 4.027181208053691e-05, |
|
"loss": 0.7909, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_accuracy": 0.23273423957668096, |
|
"eval_loss": 4.074990749359131, |
|
"eval_runtime": 1.9336, |
|
"eval_samples_per_second": 197.047, |
|
"eval_steps_per_second": 24.825, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"step": 3000, |
|
"total_flos": 1.2335402138867712e+16, |
|
"train_loss": 1.4148986282348632, |
|
"train_runtime": 758.2535, |
|
"train_samples_per_second": 633.034, |
|
"train_steps_per_second": 19.782 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 385, |
|
"total_flos": 1.2335402138867712e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|