|
{ |
|
"best_metric": 1.7131295204162598, |
|
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/t5-small-e-snli-generation-explanation_only-selected-b64/checkpoint-52000", |
|
"epoch": 8.387698042870456, |
|
"global_step": 72000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004659832246039143, |
|
"loss": 1.8699, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.925290377655787, |
|
"eval_bleu": 0.3545199637679763, |
|
"eval_f1": null, |
|
"eval_loss": 1.959277629852295, |
|
"eval_rouge1": 0.5633837391336356, |
|
"eval_rouge2": 0.34674934645388344, |
|
"eval_rougeL": 0.5064883659799413, |
|
"eval_rougeLsum": 0.5086541050813081, |
|
"eval_runtime": 133.1355, |
|
"eval_samples_per_second": 73.925, |
|
"eval_steps_per_second": 1.157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0009319664492078286, |
|
"loss": 1.5318, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9287383144911022, |
|
"eval_bleu": 0.37953656724001, |
|
"eval_f1": null, |
|
"eval_loss": 1.9824762344360352, |
|
"eval_rouge1": 0.5844859677476962, |
|
"eval_rouge2": 0.36581607147006445, |
|
"eval_rougeL": 0.5259486949820125, |
|
"eval_rougeLsum": 0.527845210467109, |
|
"eval_runtime": 119.611, |
|
"eval_samples_per_second": 82.283, |
|
"eval_steps_per_second": 1.288, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0009790552803256977, |
|
"loss": 1.4554, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9303612746835797, |
|
"eval_bleu": 0.38752191605471603, |
|
"eval_f1": null, |
|
"eval_loss": 1.8406306505203247, |
|
"eval_rouge1": 0.5927229059335931, |
|
"eval_rouge2": 0.38003492861574106, |
|
"eval_rougeL": 0.5346859072251302, |
|
"eval_rougeLsum": 0.5366150157714622, |
|
"eval_runtime": 119.2462, |
|
"eval_samples_per_second": 82.535, |
|
"eval_steps_per_second": 1.291, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0009545298474518076, |
|
"loss": 1.4072, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9313987933433873, |
|
"eval_bleu": 0.38894349752263974, |
|
"eval_f1": null, |
|
"eval_loss": 1.8335192203521729, |
|
"eval_rouge1": 0.5951490622955975, |
|
"eval_rouge2": 0.38132596191494406, |
|
"eval_rougeL": 0.5372774002496402, |
|
"eval_rougeLsum": 0.5390370987815991, |
|
"eval_runtime": 124.2778, |
|
"eval_samples_per_second": 79.194, |
|
"eval_steps_per_second": 1.239, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0009300044145779173, |
|
"loss": 1.3591, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9315006548288609, |
|
"eval_bleu": 0.3865096435890867, |
|
"eval_f1": null, |
|
"eval_loss": 1.7958769798278809, |
|
"eval_rouge1": 0.6050046650220731, |
|
"eval_rouge2": 0.3969760699443571, |
|
"eval_rougeL": 0.5467681117587176, |
|
"eval_rougeLsum": 0.5491745425215071, |
|
"eval_runtime": 127.9598, |
|
"eval_samples_per_second": 76.915, |
|
"eval_steps_per_second": 1.204, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0009054789817040271, |
|
"loss": 1.3314, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9321439913486325, |
|
"eval_bleu": 0.40043826944150546, |
|
"eval_f1": null, |
|
"eval_loss": 1.8358908891677856, |
|
"eval_rouge1": 0.6033740241419494, |
|
"eval_rouge2": 0.3943871885637782, |
|
"eval_rougeL": 0.5446854297032411, |
|
"eval_rougeLsum": 0.5468056362503415, |
|
"eval_runtime": 123.4557, |
|
"eval_samples_per_second": 79.721, |
|
"eval_steps_per_second": 1.247, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0008809535488301369, |
|
"loss": 1.322, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9318598681253477, |
|
"eval_bleu": 0.3901308120455454, |
|
"eval_f1": null, |
|
"eval_loss": 1.8139214515686035, |
|
"eval_rouge1": 0.6077021217894953, |
|
"eval_rouge2": 0.39737953301577167, |
|
"eval_rougeL": 0.5487431215145148, |
|
"eval_rougeLsum": 0.5508016845003574, |
|
"eval_runtime": 128.2981, |
|
"eval_samples_per_second": 76.712, |
|
"eval_steps_per_second": 1.2, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0008564281159562467, |
|
"loss": 1.3046, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9335851898597426, |
|
"eval_bleu": 0.4074581189783245, |
|
"eval_f1": null, |
|
"eval_loss": 1.7547861337661743, |
|
"eval_rouge1": 0.6106875900448452, |
|
"eval_rouge2": 0.40481219032920934, |
|
"eval_rougeL": 0.5549309353470346, |
|
"eval_rougeLsum": 0.5570675429924621, |
|
"eval_runtime": 123.362, |
|
"eval_samples_per_second": 79.781, |
|
"eval_steps_per_second": 1.248, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0008319026830823564, |
|
"loss": 1.2811, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9323258479783458, |
|
"eval_bleu": 0.3945938807031178, |
|
"eval_f1": null, |
|
"eval_loss": 1.8054404258728027, |
|
"eval_rouge1": 0.6125229180885745, |
|
"eval_rouge2": 0.40770947555297465, |
|
"eval_rougeL": 0.5571630795127507, |
|
"eval_rougeLsum": 0.5593601401592401, |
|
"eval_runtime": 130.5972, |
|
"eval_samples_per_second": 75.361, |
|
"eval_steps_per_second": 1.179, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0008073772502084662, |
|
"loss": 1.2522, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9318601747725316, |
|
"eval_bleu": 0.3868721016074339, |
|
"eval_f1": null, |
|
"eval_loss": 1.764616847038269, |
|
"eval_rouge1": 0.6049734000636491, |
|
"eval_rouge2": 0.3941804569991841, |
|
"eval_rougeL": 0.5483693677450392, |
|
"eval_rougeLsum": 0.5501449485616742, |
|
"eval_runtime": 127.6902, |
|
"eval_samples_per_second": 77.077, |
|
"eval_steps_per_second": 1.206, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.000782851817334576, |
|
"loss": 1.2499, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.931423125071294, |
|
"eval_bleu": 0.3902916555092431, |
|
"eval_f1": null, |
|
"eval_loss": 1.8007221221923828, |
|
"eval_rouge1": 0.6006911915544173, |
|
"eval_rouge2": 0.39035654600491987, |
|
"eval_rougeL": 0.5444104696749834, |
|
"eval_rougeLsum": 0.5462500361747902, |
|
"eval_runtime": 124.7647, |
|
"eval_samples_per_second": 78.884, |
|
"eval_steps_per_second": 1.234, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0007583263844606856, |
|
"loss": 1.2518, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9337686031202891, |
|
"eval_bleu": 0.40558306708684894, |
|
"eval_f1": null, |
|
"eval_loss": 1.7533162832260132, |
|
"eval_rouge1": 0.6171082316570229, |
|
"eval_rouge2": 0.41294766957360146, |
|
"eval_rougeL": 0.5605456269621005, |
|
"eval_rougeLsum": 0.5627524417212678, |
|
"eval_runtime": 126.2473, |
|
"eval_samples_per_second": 77.958, |
|
"eval_steps_per_second": 1.22, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0007338009515867955, |
|
"loss": 1.2371, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9339726343069636, |
|
"eval_bleu": 0.40189250216565836, |
|
"eval_f1": null, |
|
"eval_loss": 1.7669899463653564, |
|
"eval_rouge1": 0.6127257914599264, |
|
"eval_rouge2": 0.40628759904088707, |
|
"eval_rougeL": 0.5562894564458545, |
|
"eval_rougeLsum": 0.5582640573742271, |
|
"eval_runtime": 126.8636, |
|
"eval_samples_per_second": 77.579, |
|
"eval_steps_per_second": 1.214, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0007092755187129053, |
|
"loss": 1.2049, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9329962301511053, |
|
"eval_bleu": 0.4018379705338028, |
|
"eval_f1": null, |
|
"eval_loss": 1.7540892362594604, |
|
"eval_rouge1": 0.6126118264923794, |
|
"eval_rouge2": 0.401721192722568, |
|
"eval_rougeL": 0.5560492232539358, |
|
"eval_rougeLsum": 0.5578878696736621, |
|
"eval_runtime": 127.0162, |
|
"eval_samples_per_second": 77.486, |
|
"eval_steps_per_second": 1.212, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0006847500858390151, |
|
"loss": 1.2022, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9332761396675715, |
|
"eval_bleu": 0.40081654671926936, |
|
"eval_f1": null, |
|
"eval_loss": 1.7528060674667358, |
|
"eval_rouge1": 0.6126150736720647, |
|
"eval_rouge2": 0.4040778291827031, |
|
"eval_rougeL": 0.5534874799735049, |
|
"eval_rougeLsum": 0.5555363649376426, |
|
"eval_runtime": 129.054, |
|
"eval_samples_per_second": 76.263, |
|
"eval_steps_per_second": 1.193, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.0006602246529651248, |
|
"loss": 1.2035, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9329427619807436, |
|
"eval_bleu": 0.39769307763442124, |
|
"eval_f1": null, |
|
"eval_loss": 1.726220726966858, |
|
"eval_rouge1": 0.6151881785872779, |
|
"eval_rouge2": 0.4071357730525864, |
|
"eval_rougeL": 0.5581296316289542, |
|
"eval_rougeLsum": 0.5599612741043438, |
|
"eval_runtime": 128.7277, |
|
"eval_samples_per_second": 76.456, |
|
"eval_steps_per_second": 1.196, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0006356992200912346, |
|
"loss": 1.2025, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9327217574600184, |
|
"eval_bleu": 0.39532580118131083, |
|
"eval_f1": null, |
|
"eval_loss": 1.7384415864944458, |
|
"eval_rouge1": 0.6151113892112338, |
|
"eval_rouge2": 0.4060270300193623, |
|
"eval_rougeL": 0.5568165666491858, |
|
"eval_rougeLsum": 0.5588803581261892, |
|
"eval_runtime": 127.62, |
|
"eval_samples_per_second": 77.12, |
|
"eval_steps_per_second": 1.207, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0006111737872173444, |
|
"loss": 1.1673, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9335783984609746, |
|
"eval_bleu": 0.40091900271325864, |
|
"eval_f1": null, |
|
"eval_loss": 1.733450174331665, |
|
"eval_rouge1": 0.6145618299704365, |
|
"eval_rouge2": 0.4081499359458297, |
|
"eval_rougeL": 0.5577521621232591, |
|
"eval_rougeLsum": 0.5597557085924665, |
|
"eval_runtime": 124.8207, |
|
"eval_samples_per_second": 78.849, |
|
"eval_steps_per_second": 1.234, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0005866483543434542, |
|
"loss": 1.1623, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9334935503286018, |
|
"eval_bleu": 0.4019902251412491, |
|
"eval_f1": null, |
|
"eval_loss": 1.7328290939331055, |
|
"eval_rouge1": 0.6131765183291862, |
|
"eval_rouge2": 0.4016827316456552, |
|
"eval_rougeL": 0.5554221776720798, |
|
"eval_rougeLsum": 0.5570871691893517, |
|
"eval_runtime": 120.9799, |
|
"eval_samples_per_second": 81.352, |
|
"eval_steps_per_second": 1.273, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0005621229214695639, |
|
"loss": 1.165, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9347834876762903, |
|
"eval_bleu": 0.41110556909183305, |
|
"eval_f1": null, |
|
"eval_loss": 1.7455238103866577, |
|
"eval_rouge1": 0.6222414096206548, |
|
"eval_rouge2": 0.41303505089517123, |
|
"eval_rougeL": 0.5654989953242198, |
|
"eval_rougeLsum": 0.5674212008550099, |
|
"eval_runtime": 122.4394, |
|
"eval_samples_per_second": 80.383, |
|
"eval_steps_per_second": 1.258, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0005375974885956737, |
|
"loss": 1.1674, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9342498932154143, |
|
"eval_bleu": 0.4057773557512404, |
|
"eval_f1": null, |
|
"eval_loss": 1.718157410621643, |
|
"eval_rouge1": 0.6173759208611083, |
|
"eval_rouge2": 0.4116919174551955, |
|
"eval_rougeL": 0.5609282546294716, |
|
"eval_rougeLsum": 0.5629408116210994, |
|
"eval_runtime": 126.5334, |
|
"eval_samples_per_second": 77.782, |
|
"eval_steps_per_second": 1.217, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.0005130720557217835, |
|
"loss": 1.1438, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9344984365233898, |
|
"eval_bleu": 0.40721752035612685, |
|
"eval_f1": null, |
|
"eval_loss": 1.7338136434555054, |
|
"eval_rouge1": 0.6163319555760827, |
|
"eval_rouge2": 0.4104974025783942, |
|
"eval_rougeL": 0.560882495272792, |
|
"eval_rougeLsum": 0.5632657540547901, |
|
"eval_runtime": 123.093, |
|
"eval_samples_per_second": 79.956, |
|
"eval_steps_per_second": 1.251, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0004885466228478932, |
|
"loss": 1.1271, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9343265120005904, |
|
"eval_bleu": 0.4107679859085796, |
|
"eval_f1": null, |
|
"eval_loss": 1.7594177722930908, |
|
"eval_rouge1": 0.6214337711077422, |
|
"eval_rouge2": 0.415499095864183, |
|
"eval_rougeL": 0.5634811122039034, |
|
"eval_rougeLsum": 0.565609388415719, |
|
"eval_runtime": 148.0927, |
|
"eval_samples_per_second": 66.458, |
|
"eval_steps_per_second": 1.04, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00046402118997400306, |
|
"loss": 1.1347, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9346181902246392, |
|
"eval_bleu": 0.41025657015747313, |
|
"eval_f1": null, |
|
"eval_loss": 1.7323421239852905, |
|
"eval_rouge1": 0.6202134304651805, |
|
"eval_rouge2": 0.41367504192967947, |
|
"eval_rougeL": 0.563684952549099, |
|
"eval_rougeLsum": 0.5657342582828795, |
|
"eval_runtime": 141.36, |
|
"eval_samples_per_second": 69.624, |
|
"eval_steps_per_second": 1.089, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.0004394957571001128, |
|
"loss": 1.1324, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9344481716817822, |
|
"eval_bleu": 0.4110716343495672, |
|
"eval_f1": null, |
|
"eval_loss": 1.7247016429901123, |
|
"eval_rouge1": 0.6206955757960952, |
|
"eval_rouge2": 0.4147409191504623, |
|
"eval_rougeL": 0.5632791739699408, |
|
"eval_rougeLsum": 0.565299463519248, |
|
"eval_runtime": 141.5482, |
|
"eval_samples_per_second": 69.531, |
|
"eval_steps_per_second": 1.088, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0004149703242262226, |
|
"loss": 1.1232, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9342022908821602, |
|
"eval_bleu": 0.40558909867684073, |
|
"eval_f1": null, |
|
"eval_loss": 1.7131295204162598, |
|
"eval_rouge1": 0.617381846208605, |
|
"eval_rouge2": 0.4075391372025301, |
|
"eval_rougeL": 0.5593319238191912, |
|
"eval_rougeLsum": 0.5613898662787742, |
|
"eval_runtime": 141.3568, |
|
"eval_samples_per_second": 69.625, |
|
"eval_steps_per_second": 1.089, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.0003904448913523324, |
|
"loss": 1.099, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9344165326315463, |
|
"eval_bleu": 0.4117161078695893, |
|
"eval_f1": null, |
|
"eval_loss": 1.7616441249847412, |
|
"eval_rouge1": 0.6196071982260033, |
|
"eval_rouge2": 0.41061524172454117, |
|
"eval_rougeL": 0.5611350900753369, |
|
"eval_rougeLsum": 0.5630504417819058, |
|
"eval_runtime": 140.5199, |
|
"eval_samples_per_second": 70.04, |
|
"eval_steps_per_second": 1.096, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00036591945847844215, |
|
"loss": 1.1034, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9348069845879133, |
|
"eval_bleu": 0.4108533042307904, |
|
"eval_f1": null, |
|
"eval_loss": 1.7296594381332397, |
|
"eval_rouge1": 0.6208000465641403, |
|
"eval_rouge2": 0.4153571257641863, |
|
"eval_rougeL": 0.5634391781002251, |
|
"eval_rougeLsum": 0.5653490268367348, |
|
"eval_runtime": 144.1405, |
|
"eval_samples_per_second": 68.281, |
|
"eval_steps_per_second": 1.068, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.0003413940256045519, |
|
"loss": 1.1021, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9347718441130257, |
|
"eval_bleu": 0.4145601483938376, |
|
"eval_f1": null, |
|
"eval_loss": 1.7501702308654785, |
|
"eval_rouge1": 0.6220518407228575, |
|
"eval_rouge2": 0.41616106256237295, |
|
"eval_rougeL": 0.5653493806262004, |
|
"eval_rougeLsum": 0.5674926448995362, |
|
"eval_runtime": 143.3032, |
|
"eval_samples_per_second": 68.68, |
|
"eval_steps_per_second": 1.075, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0003168685927306617, |
|
"loss": 1.1028, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9349691032866738, |
|
"eval_bleu": 0.4172329212718154, |
|
"eval_f1": null, |
|
"eval_loss": 1.7387073040008545, |
|
"eval_rouge1": 0.6208100385324387, |
|
"eval_rouge2": 0.41548384315009723, |
|
"eval_rougeL": 0.5641532320511615, |
|
"eval_rougeLsum": 0.5660333813467002, |
|
"eval_runtime": 142.0461, |
|
"eval_samples_per_second": 69.287, |
|
"eval_steps_per_second": 1.084, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00029234315985677146, |
|
"loss": 1.0711, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9346298167519491, |
|
"eval_bleu": 0.40834821818709055, |
|
"eval_f1": null, |
|
"eval_loss": 1.7485404014587402, |
|
"eval_rouge1": 0.6197656710094954, |
|
"eval_rouge2": 0.41278766530342237, |
|
"eval_rougeL": 0.5630552402252382, |
|
"eval_rougeLsum": 0.5650759439058759, |
|
"eval_runtime": 146.6193, |
|
"eval_samples_per_second": 67.126, |
|
"eval_steps_per_second": 1.05, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0002678177269828813, |
|
"loss": 1.0755, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9339887901089875, |
|
"eval_bleu": 0.4076539996327793, |
|
"eval_f1": null, |
|
"eval_loss": 1.752874732017517, |
|
"eval_rouge1": 0.6144795036298025, |
|
"eval_rouge2": 0.4050923100648689, |
|
"eval_rougeL": 0.5573353646470276, |
|
"eval_rougeLsum": 0.5593645460867931, |
|
"eval_runtime": 141.9841, |
|
"eval_samples_per_second": 69.318, |
|
"eval_steps_per_second": 1.085, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00024329229410899103, |
|
"loss": 1.0723, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9346891578390598, |
|
"eval_bleu": 0.4112701971087721, |
|
"eval_f1": null, |
|
"eval_loss": 1.741502046585083, |
|
"eval_rouge1": 0.6224682653237195, |
|
"eval_rouge2": 0.4152409831528609, |
|
"eval_rougeL": 0.5646046534183624, |
|
"eval_rougeLsum": 0.5666606650250839, |
|
"eval_runtime": 142.1817, |
|
"eval_samples_per_second": 69.221, |
|
"eval_steps_per_second": 1.083, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.0002187668612351008, |
|
"loss": 1.0751, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9340490757014009, |
|
"eval_bleu": 0.40620204839955465, |
|
"eval_f1": null, |
|
"eval_loss": 1.7554686069488525, |
|
"eval_rouge1": 0.6164376145373307, |
|
"eval_rouge2": 0.40876980632820026, |
|
"eval_rougeL": 0.5595624559170196, |
|
"eval_rougeLsum": 0.5614936851444701, |
|
"eval_runtime": 141.9779, |
|
"eval_samples_per_second": 69.321, |
|
"eval_steps_per_second": 1.085, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.0001942414283612106, |
|
"loss": 1.053, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9347367377463842, |
|
"eval_bleu": 0.4099625767370798, |
|
"eval_f1": null, |
|
"eval_loss": 1.767859935760498, |
|
"eval_rouge1": 0.6194533859109013, |
|
"eval_rouge2": 0.4117428622305384, |
|
"eval_rougeL": 0.5620848425359636, |
|
"eval_rougeLsum": 0.5640752528686849, |
|
"eval_runtime": 142.9599, |
|
"eval_samples_per_second": 68.844, |
|
"eval_steps_per_second": 1.077, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.00016971599548732037, |
|
"loss": 1.0488, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_accuracy": null, |
|
"eval_bertscore_f1": 0.9345181620816249, |
|
"eval_bleu": 0.4124889872953633, |
|
"eval_f1": null, |
|
"eval_loss": 1.7536756992340088, |
|
"eval_rouge1": 0.6217065929610754, |
|
"eval_rouge2": 0.4146562066585521, |
|
"eval_rougeL": 0.5638042099155474, |
|
"eval_rougeLsum": 0.5658427165750861, |
|
"eval_runtime": 141.3859, |
|
"eval_samples_per_second": 69.611, |
|
"eval_steps_per_second": 1.089, |
|
"step": 72000 |
|
} |
|
], |
|
"max_steps": 85840, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.189532808609792e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|