|
{ |
|
"best_metric": 0.9091821374811841, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-pos-ud-Korean-GSD/checkpoint-5000", |
|
"epoch": 54.34782608695652, |
|
"global_step": 7500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 1.4677, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.947382550335571e-05, |
|
"loss": 0.5809, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.893691275167786e-05, |
|
"loss": 0.4184, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 0.339, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.786308724832216e-05, |
|
"loss": 0.2624, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_accuracy": 0.8917042983776551, |
|
"eval_loss": 0.3615211546421051, |
|
"eval_runtime": 3.8662, |
|
"eval_samples_per_second": 245.722, |
|
"eval_steps_per_second": 30.78, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.73261744966443e-05, |
|
"loss": 0.2237, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 7.678926174496645e-05, |
|
"loss": 0.1908, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.62523489932886e-05, |
|
"loss": 0.1433, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 7.571543624161075e-05, |
|
"loss": 0.1169, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 7.51785234899329e-05, |
|
"loss": 0.1012, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_accuracy": 0.8995651446730223, |
|
"eval_loss": 0.4672793745994568, |
|
"eval_runtime": 3.904, |
|
"eval_samples_per_second": 243.342, |
|
"eval_steps_per_second": 30.482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 7.464161073825505e-05, |
|
"loss": 0.0865, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 7.410469798657718e-05, |
|
"loss": 0.0661, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 7.356778523489933e-05, |
|
"loss": 0.0596, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 7.303087248322148e-05, |
|
"loss": 0.0566, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 7.249395973154363e-05, |
|
"loss": 0.0461, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_accuracy": 0.9004014049172102, |
|
"eval_loss": 0.5751976370811462, |
|
"eval_runtime": 3.877, |
|
"eval_samples_per_second": 245.034, |
|
"eval_steps_per_second": 30.694, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 7.195704697986577e-05, |
|
"loss": 0.0401, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 7.142013422818792e-05, |
|
"loss": 0.0363, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 7.088322147651007e-05, |
|
"loss": 0.0335, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 7.034630872483222e-05, |
|
"loss": 0.0307, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 6.980939597315437e-05, |
|
"loss": 0.0305, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_accuracy": 0.9002341528683726, |
|
"eval_loss": 0.628495991230011, |
|
"eval_runtime": 3.8938, |
|
"eval_samples_per_second": 243.98, |
|
"eval_steps_per_second": 30.562, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 6.927248322147651e-05, |
|
"loss": 0.0256, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 6.873557046979866e-05, |
|
"loss": 0.0243, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 6.819865771812081e-05, |
|
"loss": 0.0199, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 6.766174496644296e-05, |
|
"loss": 0.022, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 6.712483221476511e-05, |
|
"loss": 0.0189, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_accuracy": 0.9037464458939622, |
|
"eval_loss": 0.6671296954154968, |
|
"eval_runtime": 3.8657, |
|
"eval_samples_per_second": 245.753, |
|
"eval_steps_per_second": 30.784, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 6.658791946308726e-05, |
|
"loss": 0.0185, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 6.60510067114094e-05, |
|
"loss": 0.0173, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 6.551409395973155e-05, |
|
"loss": 0.0186, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 6.49771812080537e-05, |
|
"loss": 0.0179, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 6.444026845637585e-05, |
|
"loss": 0.0137, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_accuracy": 0.9047499581869878, |
|
"eval_loss": 0.6996186375617981, |
|
"eval_runtime": 3.8813, |
|
"eval_samples_per_second": 244.763, |
|
"eval_steps_per_second": 30.66, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 6.3903355704698e-05, |
|
"loss": 0.0131, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 6.336644295302015e-05, |
|
"loss": 0.0135, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 6.28295302013423e-05, |
|
"loss": 0.0138, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 6.229261744966444e-05, |
|
"loss": 0.0134, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"learning_rate": 6.175570469798658e-05, |
|
"loss": 0.0114, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"eval_accuracy": 0.9012376651613982, |
|
"eval_loss": 0.7291152477264404, |
|
"eval_runtime": 3.8811, |
|
"eval_samples_per_second": 244.777, |
|
"eval_steps_per_second": 30.662, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 6.121879194630873e-05, |
|
"loss": 0.0114, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"learning_rate": 6.068187919463087e-05, |
|
"loss": 0.0124, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 6.014496644295302e-05, |
|
"loss": 0.0114, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 5.960805369127517e-05, |
|
"loss": 0.0095, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 5.907114093959732e-05, |
|
"loss": 0.0096, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9044990801137314, |
|
"eval_loss": 0.755575954914093, |
|
"eval_runtime": 3.856, |
|
"eval_samples_per_second": 246.369, |
|
"eval_steps_per_second": 30.861, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 5.8534228187919466e-05, |
|
"loss": 0.0086, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 5.7997315436241614e-05, |
|
"loss": 0.0103, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"learning_rate": 5.746040268456376e-05, |
|
"loss": 0.0085, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 31.88, |
|
"learning_rate": 5.692348993288591e-05, |
|
"loss": 0.0091, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 5.638657718120806e-05, |
|
"loss": 0.0078, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"eval_accuracy": 0.9049172102358254, |
|
"eval_loss": 0.7404294013977051, |
|
"eval_runtime": 3.8744, |
|
"eval_samples_per_second": 245.198, |
|
"eval_steps_per_second": 30.714, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 5.58496644295302e-05, |
|
"loss": 0.0072, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"learning_rate": 5.531275167785235e-05, |
|
"loss": 0.0086, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 5.47758389261745e-05, |
|
"loss": 0.0075, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 35.51, |
|
"learning_rate": 5.4238926174496645e-05, |
|
"loss": 0.0079, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"learning_rate": 5.3702013422818794e-05, |
|
"loss": 0.0071, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"eval_accuracy": 0.9091821374811841, |
|
"eval_loss": 0.7556995153427124, |
|
"eval_runtime": 3.9089, |
|
"eval_samples_per_second": 243.038, |
|
"eval_steps_per_second": 30.444, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 5.316510067114094e-05, |
|
"loss": 0.0068, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 37.68, |
|
"learning_rate": 5.262818791946309e-05, |
|
"loss": 0.0063, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 38.41, |
|
"learning_rate": 5.209127516778524e-05, |
|
"loss": 0.0082, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 5.155436241610739e-05, |
|
"loss": 0.0064, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 39.86, |
|
"learning_rate": 5.1017449664429535e-05, |
|
"loss": 0.0065, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 39.86, |
|
"eval_accuracy": 0.9068406087974578, |
|
"eval_loss": 0.7957345843315125, |
|
"eval_runtime": 3.8473, |
|
"eval_samples_per_second": 246.929, |
|
"eval_steps_per_second": 30.931, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 40.58, |
|
"learning_rate": 5.048053691275168e-05, |
|
"loss": 0.0072, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"learning_rate": 4.994362416107383e-05, |
|
"loss": 0.0061, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 42.03, |
|
"learning_rate": 4.940671140939597e-05, |
|
"loss": 0.006, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"learning_rate": 4.886979865771812e-05, |
|
"loss": 0.0053, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 4.833288590604027e-05, |
|
"loss": 0.0063, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"eval_accuracy": 0.907175112895133, |
|
"eval_loss": 0.7736948728561401, |
|
"eval_runtime": 3.8584, |
|
"eval_samples_per_second": 246.219, |
|
"eval_steps_per_second": 30.842, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 44.2, |
|
"learning_rate": 4.779597315436242e-05, |
|
"loss": 0.0059, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 44.93, |
|
"learning_rate": 4.7259060402684566e-05, |
|
"loss": 0.0062, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 4.6722147651006714e-05, |
|
"loss": 0.0056, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 4.618523489932886e-05, |
|
"loss": 0.0052, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 47.1, |
|
"learning_rate": 4.564832214765101e-05, |
|
"loss": 0.0037, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 47.1, |
|
"eval_accuracy": 0.9078441210904834, |
|
"eval_loss": 0.8221492171287537, |
|
"eval_runtime": 3.8863, |
|
"eval_samples_per_second": 244.45, |
|
"eval_steps_per_second": 30.621, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 4.511140939597316e-05, |
|
"loss": 0.004, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 48.55, |
|
"learning_rate": 4.457449664429531e-05, |
|
"loss": 0.0039, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 49.28, |
|
"learning_rate": 4.4037583892617455e-05, |
|
"loss": 0.0053, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 4.35006711409396e-05, |
|
"loss": 0.0053, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 50.72, |
|
"learning_rate": 4.2963758389261745e-05, |
|
"loss": 0.004, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 50.72, |
|
"eval_accuracy": 0.9079277471149022, |
|
"eval_loss": 0.8471272587776184, |
|
"eval_runtime": 3.8324, |
|
"eval_samples_per_second": 247.888, |
|
"eval_steps_per_second": 31.051, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 51.45, |
|
"learning_rate": 4.242684563758389e-05, |
|
"loss": 0.0045, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"learning_rate": 4.188993288590604e-05, |
|
"loss": 0.0031, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"learning_rate": 4.135302013422819e-05, |
|
"loss": 0.0047, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 53.62, |
|
"learning_rate": 4.081610738255034e-05, |
|
"loss": 0.0052, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 54.35, |
|
"learning_rate": 4.0279194630872486e-05, |
|
"loss": 0.0039, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 54.35, |
|
"eval_accuracy": 0.9078441210904834, |
|
"eval_loss": 0.8163352012634277, |
|
"eval_runtime": 3.8952, |
|
"eval_samples_per_second": 243.889, |
|
"eval_steps_per_second": 30.55, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 54.35, |
|
"step": 7500, |
|
"total_flos": 3.927232531980288e+16, |
|
"train_loss": 0.06486416501204173, |
|
"train_runtime": 1116.591, |
|
"train_samples_per_second": 429.88, |
|
"train_steps_per_second": 13.434 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 109, |
|
"total_flos": 3.927232531980288e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|