josecannete's picture
adding model finetuned on QA (MLQA)
6c8d55f
raw
history blame
13.9 kB
{
"best_metric": 68.30131315047515,
"best_model_checkpoint": "/data/jcanete/all_results/mlqa/albeto_large/epochs_4_bs_16_lr_5e-6/checkpoint-9900",
"epoch": 3.9999025056059274,
"global_step": 20512,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"eval_exact_match": 29.0,
"eval_f1": 50.90509628497882,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 4.8798264430577225e-06,
"loss": 3.1446,
"step": 500
},
{
"epoch": 0.12,
"eval_exact_match": 31.8,
"eval_f1": 55.04852261744159,
"step": 600
},
{
"epoch": 0.18,
"eval_exact_match": 35.8,
"eval_f1": 59.213689806872225,
"step": 900
},
{
"epoch": 0.19,
"learning_rate": 4.757946567862715e-06,
"loss": 2.2422,
"step": 1000
},
{
"epoch": 0.23,
"eval_exact_match": 35.8,
"eval_f1": 60.27763276523416,
"step": 1200
},
{
"epoch": 0.29,
"learning_rate": 4.636066692667707e-06,
"loss": 2.0263,
"step": 1500
},
{
"epoch": 0.29,
"eval_exact_match": 37.2,
"eval_f1": 61.640978566156164,
"step": 1500
},
{
"epoch": 0.35,
"eval_exact_match": 38.8,
"eval_f1": 63.63406896655414,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 4.514186817472699e-06,
"loss": 1.9866,
"step": 2000
},
{
"epoch": 0.41,
"eval_exact_match": 38.8,
"eval_f1": 62.922385789980396,
"step": 2100
},
{
"epoch": 0.47,
"eval_exact_match": 41.0,
"eval_f1": 65.67215610318891,
"step": 2400
},
{
"epoch": 0.49,
"learning_rate": 4.3923069422776915e-06,
"loss": 1.9148,
"step": 2500
},
{
"epoch": 0.53,
"eval_exact_match": 39.8,
"eval_f1": 64.93059242091284,
"step": 2700
},
{
"epoch": 0.58,
"learning_rate": 4.270427067082684e-06,
"loss": 1.9195,
"step": 3000
},
{
"epoch": 0.58,
"eval_exact_match": 41.4,
"eval_f1": 65.8577993783837,
"step": 3000
},
{
"epoch": 0.64,
"eval_exact_match": 39.2,
"eval_f1": 63.87734802490549,
"step": 3300
},
{
"epoch": 0.68,
"learning_rate": 4.148547191887676e-06,
"loss": 1.8627,
"step": 3500
},
{
"epoch": 0.7,
"eval_exact_match": 40.2,
"eval_f1": 65.36196332302504,
"step": 3600
},
{
"epoch": 0.76,
"eval_exact_match": 41.0,
"eval_f1": 66.05076968974839,
"step": 3900
},
{
"epoch": 0.78,
"learning_rate": 4.0266673166926675e-06,
"loss": 1.8683,
"step": 4000
},
{
"epoch": 0.82,
"eval_exact_match": 40.0,
"eval_f1": 65.39746267260027,
"step": 4200
},
{
"epoch": 0.88,
"learning_rate": 3.90478744149766e-06,
"loss": 1.8329,
"step": 4500
},
{
"epoch": 0.88,
"eval_exact_match": 40.8,
"eval_f1": 65.6859788771072,
"step": 4500
},
{
"epoch": 0.94,
"eval_exact_match": 43.2,
"eval_f1": 66.51118872985614,
"step": 4800
},
{
"epoch": 0.97,
"learning_rate": 3.782907566302653e-06,
"loss": 1.8251,
"step": 5000
},
{
"epoch": 0.99,
"eval_exact_match": 43.0,
"eval_f1": 66.4592590376383,
"step": 5100
},
{
"epoch": 1.05,
"eval_exact_match": 43.4,
"eval_f1": 67.52269758839552,
"step": 5400
},
{
"epoch": 1.07,
"learning_rate": 3.6610276911076447e-06,
"loss": 1.695,
"step": 5500
},
{
"epoch": 1.11,
"eval_exact_match": 41.2,
"eval_f1": 65.42262622506561,
"step": 5700
},
{
"epoch": 1.17,
"learning_rate": 3.539147815912637e-06,
"loss": 1.6866,
"step": 6000
},
{
"epoch": 1.17,
"eval_exact_match": 43.4,
"eval_f1": 66.35194455433367,
"step": 6000
},
{
"epoch": 1.23,
"eval_exact_match": 42.4,
"eval_f1": 67.08280208802455,
"step": 6300
},
{
"epoch": 1.27,
"learning_rate": 3.4172679407176292e-06,
"loss": 1.67,
"step": 6500
},
{
"epoch": 1.29,
"eval_exact_match": 44.2,
"eval_f1": 67.91419349019203,
"step": 6600
},
{
"epoch": 1.35,
"eval_exact_match": 42.2,
"eval_f1": 66.31105684124356,
"step": 6900
},
{
"epoch": 1.37,
"learning_rate": 3.295388065522621e-06,
"loss": 1.6472,
"step": 7000
},
{
"epoch": 1.4,
"eval_exact_match": 43.8,
"eval_f1": 67.99899733387957,
"step": 7200
},
{
"epoch": 1.46,
"learning_rate": 3.1735081903276133e-06,
"loss": 1.653,
"step": 7500
},
{
"epoch": 1.46,
"eval_exact_match": 43.8,
"eval_f1": 66.75807526659699,
"step": 7500
},
{
"epoch": 1.52,
"eval_exact_match": 42.4,
"eval_f1": 66.893459242555,
"step": 7800
},
{
"epoch": 1.56,
"learning_rate": 3.051628315132605e-06,
"loss": 1.6675,
"step": 8000
},
{
"epoch": 1.58,
"eval_exact_match": 42.2,
"eval_f1": 67.17897557140992,
"step": 8100
},
{
"epoch": 1.64,
"eval_exact_match": 42.6,
"eval_f1": 66.80668914548878,
"step": 8400
},
{
"epoch": 1.66,
"learning_rate": 2.9297484399375974e-06,
"loss": 1.6521,
"step": 8500
},
{
"epoch": 1.7,
"eval_exact_match": 42.8,
"eval_f1": 66.67360447265725,
"step": 8700
},
{
"epoch": 1.75,
"learning_rate": 2.8081123244929796e-06,
"loss": 1.6663,
"step": 9000
},
{
"epoch": 1.75,
"eval_exact_match": 42.8,
"eval_f1": 67.4405587836458,
"step": 9000
},
{
"epoch": 1.81,
"eval_exact_match": 41.8,
"eval_f1": 66.47134668296808,
"step": 9300
},
{
"epoch": 1.85,
"learning_rate": 2.686232449297972e-06,
"loss": 1.6636,
"step": 9500
},
{
"epoch": 1.87,
"eval_exact_match": 43.2,
"eval_f1": 67.05153492932693,
"step": 9600
},
{
"epoch": 1.93,
"eval_exact_match": 43.4,
"eval_f1": 68.30131315047515,
"step": 9900
},
{
"epoch": 1.95,
"learning_rate": 2.5643525741029646e-06,
"loss": 1.636,
"step": 10000
},
{
"epoch": 1.99,
"eval_exact_match": 42.0,
"eval_f1": 66.57096278498234,
"step": 10200
},
{
"epoch": 2.05,
"learning_rate": 2.4424726989079564e-06,
"loss": 1.5741,
"step": 10500
},
{
"epoch": 2.05,
"eval_exact_match": 42.0,
"eval_f1": 67.26443263606592,
"step": 10500
},
{
"epoch": 2.11,
"eval_exact_match": 41.8,
"eval_f1": 65.97313951175933,
"step": 10800
},
{
"epoch": 2.15,
"learning_rate": 2.3205928237129487e-06,
"loss": 1.5013,
"step": 11000
},
{
"epoch": 2.16,
"eval_exact_match": 42.0,
"eval_f1": 66.27870125797807,
"step": 11100
},
{
"epoch": 2.22,
"eval_exact_match": 42.0,
"eval_f1": 66.81389262599433,
"step": 11400
},
{
"epoch": 2.24,
"learning_rate": 2.198712948517941e-06,
"loss": 1.5408,
"step": 11500
},
{
"epoch": 2.28,
"eval_exact_match": 42.0,
"eval_f1": 66.93349815276726,
"step": 11700
},
{
"epoch": 2.34,
"learning_rate": 2.076833073322933e-06,
"loss": 1.5204,
"step": 12000
},
{
"epoch": 2.34,
"eval_exact_match": 41.8,
"eval_f1": 66.98872203081277,
"step": 12000
},
{
"epoch": 2.4,
"eval_exact_match": 41.0,
"eval_f1": 66.69394179465107,
"step": 12300
},
{
"epoch": 2.44,
"learning_rate": 1.954953198127925e-06,
"loss": 1.5073,
"step": 12500
},
{
"epoch": 2.46,
"eval_exact_match": 41.4,
"eval_f1": 66.72832055752183,
"step": 12600
},
{
"epoch": 2.52,
"eval_exact_match": 41.8,
"eval_f1": 66.82588745690937,
"step": 12900
},
{
"epoch": 2.54,
"learning_rate": 1.8333170826833074e-06,
"loss": 1.5393,
"step": 13000
},
{
"epoch": 2.57,
"eval_exact_match": 42.0,
"eval_f1": 66.26976484353048,
"step": 13200
},
{
"epoch": 2.63,
"learning_rate": 1.7114372074882995e-06,
"loss": 1.4919,
"step": 13500
},
{
"epoch": 2.63,
"eval_exact_match": 41.6,
"eval_f1": 66.61814903434147,
"step": 13500
},
{
"epoch": 2.69,
"eval_exact_match": 43.4,
"eval_f1": 67.5204173683535,
"step": 13800
},
{
"epoch": 2.73,
"learning_rate": 1.5898010920436819e-06,
"loss": 1.5187,
"step": 14000
},
{
"epoch": 2.75,
"eval_exact_match": 42.2,
"eval_f1": 67.32150505606054,
"step": 14100
},
{
"epoch": 2.81,
"eval_exact_match": 42.0,
"eval_f1": 66.80272450453026,
"step": 14400
},
{
"epoch": 2.83,
"learning_rate": 1.468164976599064e-06,
"loss": 1.5261,
"step": 14500
},
{
"epoch": 2.87,
"eval_exact_match": 42.4,
"eval_f1": 67.28393744972873,
"step": 14700
},
{
"epoch": 2.93,
"learning_rate": 1.3462851014040563e-06,
"loss": 1.5142,
"step": 15000
},
{
"epoch": 2.93,
"eval_exact_match": 41.6,
"eval_f1": 66.34997901825885,
"step": 15000
},
{
"epoch": 2.98,
"eval_exact_match": 42.6,
"eval_f1": 67.39174453996277,
"step": 15300
},
{
"epoch": 3.02,
"learning_rate": 1.2244052262090486e-06,
"loss": 1.5121,
"step": 15500
},
{
"epoch": 3.04,
"eval_exact_match": 42.0,
"eval_f1": 67.01056668618087,
"step": 15600
},
{
"epoch": 3.1,
"eval_exact_match": 40.8,
"eval_f1": 66.54001835281765,
"step": 15900
},
{
"epoch": 3.12,
"learning_rate": 1.1025253510140406e-06,
"loss": 1.4242,
"step": 16000
},
{
"epoch": 3.16,
"eval_exact_match": 42.2,
"eval_f1": 66.76596500403501,
"step": 16200
},
{
"epoch": 3.22,
"learning_rate": 9.808892355694228e-07,
"loss": 1.4527,
"step": 16500
},
{
"epoch": 3.22,
"eval_exact_match": 40.8,
"eval_f1": 66.44298336844766,
"step": 16500
},
{
"epoch": 3.28,
"eval_exact_match": 40.8,
"eval_f1": 65.94468985507064,
"step": 16800
},
{
"epoch": 3.32,
"learning_rate": 8.59009360374415e-07,
"loss": 1.4366,
"step": 17000
},
{
"epoch": 3.33,
"eval_exact_match": 41.4,
"eval_f1": 66.15019320465619,
"step": 17100
},
{
"epoch": 3.39,
"eval_exact_match": 41.2,
"eval_f1": 66.46393475983746,
"step": 17400
},
{
"epoch": 3.41,
"learning_rate": 7.371294851794072e-07,
"loss": 1.4172,
"step": 17500
},
{
"epoch": 3.45,
"eval_exact_match": 41.4,
"eval_f1": 66.63413637625855,
"step": 17700
},
{
"epoch": 3.51,
"learning_rate": 6.152496099843995e-07,
"loss": 1.3882,
"step": 18000
},
{
"epoch": 3.51,
"eval_exact_match": 42.6,
"eval_f1": 67.05450679893755,
"step": 18000
},
{
"epoch": 3.57,
"eval_exact_match": 41.4,
"eval_f1": 66.97646758024133,
"step": 18300
},
{
"epoch": 3.61,
"learning_rate": 4.933697347893916e-07,
"loss": 1.4208,
"step": 18500
},
{
"epoch": 3.63,
"eval_exact_match": 41.8,
"eval_f1": 66.64499063945128,
"step": 18600
},
{
"epoch": 3.69,
"eval_exact_match": 42.0,
"eval_f1": 66.34641936217525,
"step": 18900
},
{
"epoch": 3.71,
"learning_rate": 3.714898595943838e-07,
"loss": 1.4226,
"step": 19000
},
{
"epoch": 3.74,
"eval_exact_match": 42.2,
"eval_f1": 66.77988878164467,
"step": 19200
},
{
"epoch": 3.8,
"learning_rate": 2.49609984399376e-07,
"loss": 1.4301,
"step": 19500
},
{
"epoch": 3.8,
"eval_exact_match": 41.8,
"eval_f1": 66.8207685935084,
"step": 19500
},
{
"epoch": 3.86,
"eval_exact_match": 42.6,
"eval_f1": 66.67617693028318,
"step": 19800
},
{
"epoch": 3.9,
"learning_rate": 1.2773010920436818e-07,
"loss": 1.4246,
"step": 20000
},
{
"epoch": 3.92,
"eval_exact_match": 42.2,
"eval_f1": 66.93786314807664,
"step": 20100
},
{
"epoch": 3.98,
"eval_exact_match": 42.0,
"eval_f1": 67.00733683228717,
"step": 20400
},
{
"epoch": 4.0,
"learning_rate": 5.850234009360375e-09,
"loss": 1.4253,
"step": 20500
},
{
"epoch": 4.0,
"step": 20512,
"total_flos": 7862366381444640.0,
"train_loss": 1.6645502970668715,
"train_runtime": 13423.2225,
"train_samples_per_second": 24.45,
"train_steps_per_second": 1.528
}
],
"max_steps": 20512,
"num_train_epochs": 4,
"total_flos": 7862366381444640.0,
"trial_name": null,
"trial_params": null
}