{ "best_metric": 0.2892984216768543, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Coptic-Scriptorium/checkpoint-500", "epoch": 76.92307692307692, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.56, "learning_rate": 5e-05, "loss": 2.1362, "step": 100 }, { "epoch": 5.13, "learning_rate": 4.966442953020135e-05, "loss": 1.9133, "step": 200 }, { "epoch": 7.69, "learning_rate": 4.932885906040269e-05, "loss": 1.8901, "step": 300 }, { "epoch": 10.26, "learning_rate": 4.8993288590604034e-05, "loss": 1.8743, "step": 400 }, { "epoch": 12.82, "learning_rate": 4.865771812080537e-05, "loss": 1.8572, "step": 500 }, { "epoch": 12.82, "eval_accuracy": 0.2892984216768543, "eval_loss": 1.9470784664154053, "eval_runtime": 1.9235, "eval_samples_per_second": 198.078, "eval_steps_per_second": 24.955, "step": 500 }, { "epoch": 15.38, "learning_rate": 4.832214765100672e-05, "loss": 1.8448, "step": 600 }, { "epoch": 17.95, "learning_rate": 4.798657718120805e-05, "loss": 1.8263, "step": 700 }, { "epoch": 20.51, "learning_rate": 4.76510067114094e-05, "loss": 1.7988, "step": 800 }, { "epoch": 23.08, "learning_rate": 4.731543624161074e-05, "loss": 1.7733, "step": 900 }, { "epoch": 25.64, "learning_rate": 4.697986577181208e-05, "loss": 1.7321, "step": 1000 }, { "epoch": 25.64, "eval_accuracy": 0.27597846911778123, "eval_loss": 2.090031147003174, "eval_runtime": 1.9389, "eval_samples_per_second": 196.498, "eval_steps_per_second": 24.756, "step": 1000 }, { "epoch": 28.21, "learning_rate": 4.664429530201342e-05, "loss": 1.6957, "step": 1100 }, { "epoch": 30.77, "learning_rate": 4.630872483221477e-05, "loss": 1.6365, "step": 1200 }, { "epoch": 33.33, "learning_rate": 4.597315436241611e-05, "loss": 1.5917, "step": 1300 }, { "epoch": 35.9, "learning_rate": 4.5637583892617453e-05, "loss": 1.5378, "step": 1400 }, { "epoch": 38.46, "learning_rate": 4.530201342281879e-05, "loss": 1.4759, "step": 1500 }, { "epoch": 38.46, "eval_accuracy": 0.2506158197244777, "eval_loss": 2.431821584701538, "eval_runtime": 1.933, "eval_samples_per_second": 197.107, "eval_steps_per_second": 24.832, "step": 1500 }, { "epoch": 41.03, "learning_rate": 4.496644295302014e-05, "loss": 1.4153, "step": 1600 }, { "epoch": 43.59, "learning_rate": 4.463087248322148e-05, "loss": 1.354, "step": 1700 }, { "epoch": 46.15, "learning_rate": 4.4295302013422824e-05, "loss": 1.2879, "step": 1800 }, { "epoch": 48.72, "learning_rate": 4.395973154362416e-05, "loss": 1.2279, "step": 1900 }, { "epoch": 51.28, "learning_rate": 4.36241610738255e-05, "loss": 1.1962, "step": 2000 }, { "epoch": 51.28, "eval_accuracy": 0.2390292856491196, "eval_loss": 2.9406466484069824, "eval_runtime": 1.9342, "eval_samples_per_second": 196.977, "eval_steps_per_second": 24.816, "step": 2000 }, { "epoch": 53.85, "learning_rate": 4.328859060402685e-05, "loss": 1.1217, "step": 2100 }, { "epoch": 56.41, "learning_rate": 4.295302013422819e-05, "loss": 1.0833, "step": 2200 }, { "epoch": 58.97, "learning_rate": 4.2617449664429534e-05, "loss": 1.0293, "step": 2300 }, { "epoch": 61.54, "learning_rate": 4.228187919463087e-05, "loss": 0.9821, "step": 2400 }, { "epoch": 64.1, "learning_rate": 4.194630872483222e-05, "loss": 0.9384, "step": 2500 }, { "epoch": 64.1, "eval_accuracy": 0.22935863516102545, "eval_loss": 3.6179699897766113, "eval_runtime": 1.942, "eval_samples_per_second": 196.187, "eval_steps_per_second": 24.716, "step": 2500 }, { "epoch": 66.67, "learning_rate": 4.161073825503356e-05, "loss": 0.8994, "step": 2600 }, { "epoch": 69.23, "learning_rate": 4.1275167785234905e-05, "loss": 0.8775, "step": 2700 }, { "epoch": 71.79, "learning_rate": 4.0942953020134226e-05, "loss": 0.8304, "step": 2800 }, { "epoch": 74.36, "learning_rate": 4.060738255033557e-05, "loss": 0.8287, "step": 2900 }, { "epoch": 76.92, "learning_rate": 4.027181208053691e-05, "loss": 0.7909, "step": 3000 }, { "epoch": 76.92, "eval_accuracy": 0.23273423957668096, "eval_loss": 4.074990749359131, "eval_runtime": 1.9336, "eval_samples_per_second": 197.047, "eval_steps_per_second": 24.825, "step": 3000 }, { "epoch": 76.92, "step": 3000, "total_flos": 1.2335402138867712e+16, "train_loss": 1.4148986282348632, "train_runtime": 758.2535, "train_samples_per_second": 633.034, "train_steps_per_second": 19.782 } ], "max_steps": 15000, "num_train_epochs": 385, "total_flos": 1.2335402138867712e+16, "trial_name": null, "trial_params": null }