|
{ |
|
"best_metric": 66.41312856742334, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mlqa/beto_uncased/epochs_4_bs_16_lr_3e-5/checkpoint-9000", |
|
"epoch": 4.0, |
|
"global_step": 20508, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"eval_exact_match": 23.4, |
|
"eval_f1": 39.87710850110139, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9271503803393797e-05, |
|
"loss": 3.1626, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_exact_match": 28.4, |
|
"eval_f1": 46.509597922029684, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_exact_match": 33.0, |
|
"eval_f1": 52.66068755924698, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.8540081919251023e-05, |
|
"loss": 2.4525, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_exact_match": 35.4, |
|
"eval_f1": 56.28184887315056, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.780866003510825e-05, |
|
"loss": 2.3232, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_exact_match": 35.4, |
|
"eval_f1": 56.05406960312001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_exact_match": 33.2, |
|
"eval_f1": 56.31194498874277, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.707723815096548e-05, |
|
"loss": 2.2057, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_exact_match": 37.6, |
|
"eval_f1": 60.7500417692224, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_exact_match": 38.6, |
|
"eval_f1": 61.9826845317547, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.634727911059099e-05, |
|
"loss": 2.1759, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_exact_match": 39.8, |
|
"eval_f1": 64.26755886141255, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.5615857226448216e-05, |
|
"loss": 2.115, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_exact_match": 35.6, |
|
"eval_f1": 58.87759205203179, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_exact_match": 38.0, |
|
"eval_f1": 63.1853755598259, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.4884435342305442e-05, |
|
"loss": 2.0455, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_exact_match": 38.6, |
|
"eval_f1": 62.17141554579418, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_exact_match": 40.8, |
|
"eval_f1": 63.814839462786935, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.415301345816267e-05, |
|
"loss": 2.0378, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 38.2, |
|
"eval_f1": 63.452901777349766, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.3421591574019895e-05, |
|
"loss": 1.9839, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 64.348618765079, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_exact_match": 39.4, |
|
"eval_f1": 62.57530588719265, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.269163253364541e-05, |
|
"loss": 1.9543, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_exact_match": 37.8, |
|
"eval_f1": 62.47954703797214, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_exact_match": 38.8, |
|
"eval_f1": 62.7759255252296, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.196167349327092e-05, |
|
"loss": 1.7389, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 63.39625245328542, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1230251609128147e-05, |
|
"loss": 1.6611, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 63.10882532261943, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_exact_match": 39.0, |
|
"eval_f1": 63.122784024749606, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0498829724985373e-05, |
|
"loss": 1.6013, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_exact_match": 40.4, |
|
"eval_f1": 64.3832532899268, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_exact_match": 41.2, |
|
"eval_f1": 65.07150560199364, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.97674078408426e-05, |
|
"loss": 1.6499, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_exact_match": 40.8, |
|
"eval_f1": 63.757805668028354, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.9035985956699825e-05, |
|
"loss": 1.6441, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 64.51824146230119, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_exact_match": 41.8, |
|
"eval_f1": 64.5385854544337, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.830456407255705e-05, |
|
"loss": 1.6383, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_exact_match": 41.4, |
|
"eval_f1": 65.35549998349387, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 64.45859595523329, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.7573142188414278e-05, |
|
"loss": 1.5992, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 42.6, |
|
"eval_f1": 66.32210814293916, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.6841720304271504e-05, |
|
"loss": 1.6092, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_exact_match": 44.0, |
|
"eval_f1": 66.41312856742334, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 42.0, |
|
"eval_f1": 65.70423087204887, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.611029842012873e-05, |
|
"loss": 1.637, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_exact_match": 43.0, |
|
"eval_f1": 65.2546958366835, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_exact_match": 42.2, |
|
"eval_f1": 66.10494154148009, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.538033937975424e-05, |
|
"loss": 1.5978, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 63.806088000887634, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.4650380339379754e-05, |
|
"loss": 1.4462, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 65.01325773791683, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_exact_match": 41.2, |
|
"eval_f1": 64.50214370703992, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.3918958455236982e-05, |
|
"loss": 1.2958, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 65.36716761169852, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_exact_match": 40.8, |
|
"eval_f1": 64.11070880878887, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.3187536571094208e-05, |
|
"loss": 1.3027, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_exact_match": 39.2, |
|
"eval_f1": 63.682573407854235, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2456114686951432e-05, |
|
"loss": 1.2731, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_exact_match": 39.8, |
|
"eval_f1": 63.59941485060206, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_exact_match": 37.8, |
|
"eval_f1": 62.92318349722304, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.172469280280866e-05, |
|
"loss": 1.3022, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 39.4, |
|
"eval_f1": 64.09272643867237, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 65.27753571541867, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.0993270918665886e-05, |
|
"loss": 1.3036, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 40.4, |
|
"eval_f1": 64.22110332511052, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.0261849034523113e-05, |
|
"loss": 1.2821, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 39.8, |
|
"eval_f1": 63.332714021581985, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_exact_match": 39.6, |
|
"eval_f1": 63.578162829605446, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.531889994148626e-06, |
|
"loss": 1.2877, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 63.35184925186776, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_exact_match": 40.2, |
|
"eval_f1": 64.16356452794574, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.800468110005852e-06, |
|
"loss": 1.2603, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_exact_match": 41.2, |
|
"eval_f1": 64.11806249593292, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.069046225863078e-06, |
|
"loss": 1.2756, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_exact_match": 40.4, |
|
"eval_f1": 63.824419143757055, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 64.24451284277517, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 7.337624341720305e-06, |
|
"loss": 1.2121, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 63.53412990760953, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_exact_match": 39.0, |
|
"eval_f1": 62.80074251846947, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 6.606202457577531e-06, |
|
"loss": 1.0572, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_exact_match": 39.6, |
|
"eval_f1": 63.656775539732564, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 5.874780573434757e-06, |
|
"loss": 1.0379, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_exact_match": 40.2, |
|
"eval_f1": 63.43415180275925, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 64.05233541882214, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 5.143358689291984e-06, |
|
"loss": 1.0491, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_exact_match": 38.4, |
|
"eval_f1": 63.21753166648462, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_exact_match": 38.4, |
|
"eval_f1": 63.12095402408347, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.4119368051492096e-06, |
|
"loss": 1.0581, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_exact_match": 40.2, |
|
"eval_f1": 64.48156998952923, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.681977764774722e-06, |
|
"loss": 1.0731, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 64.22656695334973, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_exact_match": 40.0, |
|
"eval_f1": 63.74403257605126, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.9505558806319486e-06, |
|
"loss": 1.0486, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_exact_match": 39.8, |
|
"eval_f1": 63.72890744409074, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 64.93728425900885, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.219133996489175e-06, |
|
"loss": 1.0584, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_exact_match": 39.8, |
|
"eval_f1": 64.31547047147733, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.4877121123464014e-06, |
|
"loss": 1.0494, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_exact_match": 40.6, |
|
"eval_f1": 64.73621892117005, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_exact_match": 40.2, |
|
"eval_f1": 64.61480926659202, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 7.577530719719134e-07, |
|
"loss": 1.058, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_exact_match": 40.4, |
|
"eval_f1": 64.58939713486411, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_exact_match": 40.4, |
|
"eval_f1": 64.5227927048114, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.633118782913985e-08, |
|
"loss": 1.0348, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 20508, |
|
"total_flos": 5.891975668325875e+16, |
|
"train_loss": 1.551024980565505, |
|
"train_runtime": 35295.3339, |
|
"train_samples_per_second": 9.296, |
|
"train_steps_per_second": 0.581 |
|
} |
|
], |
|
"max_steps": 20508, |
|
"num_train_epochs": 4, |
|
"total_flos": 5.891975668325875e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|