|
{ |
|
"best_metric": 0.5861960041064519, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Chinese-GSD/checkpoint-500", |
|
"epoch": 24.0, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.8173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.966778523489933e-05, |
|
"loss": 1.3257, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.933221476510068e-05, |
|
"loss": 1.2541, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.8996644295302016e-05, |
|
"loss": 1.2279, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.8661073825503355e-05, |
|
"loss": 1.1842, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5861960041064519, |
|
"eval_loss": 1.2214140892028809, |
|
"eval_runtime": 2.5478, |
|
"eval_samples_per_second": 196.251, |
|
"eval_steps_per_second": 24.728, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.83255033557047e-05, |
|
"loss": 1.1457, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 4.798993288590604e-05, |
|
"loss": 1.1089, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 4.765436241610739e-05, |
|
"loss": 1.0922, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 4.7318791946308726e-05, |
|
"loss": 1.0397, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.698322147651007e-05, |
|
"loss": 1.0256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5806680881307748, |
|
"eval_loss": 1.2595834732055664, |
|
"eval_runtime": 2.5388, |
|
"eval_samples_per_second": 196.945, |
|
"eval_steps_per_second": 24.815, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.664765100671141e-05, |
|
"loss": 0.9657, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.631208053691276e-05, |
|
"loss": 0.9376, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.5976510067114097e-05, |
|
"loss": 0.901, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.564093959731544e-05, |
|
"loss": 0.8583, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.5305369127516775e-05, |
|
"loss": 0.832, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5698491668640923, |
|
"eval_loss": 1.4014437198638916, |
|
"eval_runtime": 2.5467, |
|
"eval_samples_per_second": 196.333, |
|
"eval_steps_per_second": 24.738, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 4.496979865771812e-05, |
|
"loss": 0.7809, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 4.463422818791946e-05, |
|
"loss": 0.7387, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 4.4298657718120806e-05, |
|
"loss": 0.7135, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 4.3963087248322146e-05, |
|
"loss": 0.6757, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.362751677852349e-05, |
|
"loss": 0.6519, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5552396746426597, |
|
"eval_loss": 1.6267313957214355, |
|
"eval_runtime": 2.5372, |
|
"eval_samples_per_second": 197.067, |
|
"eval_steps_per_second": 24.83, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 4.329194630872484e-05, |
|
"loss": 0.6038, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.295637583892618e-05, |
|
"loss": 0.5758, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 4.262080536912752e-05, |
|
"loss": 0.545, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.228523489932886e-05, |
|
"loss": 0.5239, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.194966442953021e-05, |
|
"loss": 0.5023, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.550817341862118, |
|
"eval_loss": 1.8601970672607422, |
|
"eval_runtime": 2.5314, |
|
"eval_samples_per_second": 197.52, |
|
"eval_steps_per_second": 24.887, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 4.161409395973155e-05, |
|
"loss": 0.4627, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 4.127852348993289e-05, |
|
"loss": 0.4396, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 4.0942953020134226e-05, |
|
"loss": 0.4187, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 4.060738255033557e-05, |
|
"loss": 0.4056, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.027181208053691e-05, |
|
"loss": 0.3811, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5463160388533523, |
|
"eval_loss": 2.0793230533599854, |
|
"eval_runtime": 2.5286, |
|
"eval_samples_per_second": 197.741, |
|
"eval_steps_per_second": 24.915, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"step": 3000, |
|
"total_flos": 1.2534537269366784e+16, |
|
"train_loss": 0.8378373018900553, |
|
"train_runtime": 774.849, |
|
"train_samples_per_second": 619.476, |
|
"train_steps_per_second": 19.359 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 120, |
|
"total_flos": 1.2534537269366784e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|