{ "best_metric": 0.04986047372221947, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf_aragpt2-base/checkpoint-4180", "epoch": 10.0, "eval_steps": 500, "global_step": 8360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.2068207710981369, "learning_rate": 4.896424167694204e-05, "loss": 1.1245, "step": 836 }, { "epoch": 1.0, "eval_bleu": 0.00522127887673706, "eval_loss": 0.060549940913915634, "eval_rouge1": 0.16261381712821865, "eval_rouge2": 0.016191591492833397, "eval_rougeL": 0.16017425267387253, "eval_runtime": 159.9696, "eval_samples_per_second": 10.452, "eval_steps_per_second": 1.306, "step": 836 }, { "epoch": 2.0, "grad_norm": 0.19557742774486542, "learning_rate": 4.638717632552405e-05, "loss": 0.0632, "step": 1672 }, { "epoch": 2.0, "eval_bleu": 0.011943726030788412, "eval_loss": 0.05519399791955948, "eval_rouge1": 0.2541122949630313, "eval_rouge2": 0.051699799633347324, "eval_rougeL": 0.2510765914158727, "eval_runtime": 221.3727, "eval_samples_per_second": 7.553, "eval_steps_per_second": 0.944, "step": 1672 }, { "epoch": 3.0, "grad_norm": 0.14342406392097473, "learning_rate": 4.3810110974106046e-05, "loss": 0.055, "step": 2508 }, { "epoch": 3.0, "eval_bleu": 0.02252740132414782, "eval_loss": 0.05222811922430992, "eval_rouge1": 0.30471930149085674, "eval_rouge2": 0.08185853906387802, "eval_rougeL": 0.30154781860876523, "eval_runtime": 36.8724, "eval_samples_per_second": 45.346, "eval_steps_per_second": 5.668, "step": 2508 }, { "epoch": 4.0, "grad_norm": 0.16051243245601654, "learning_rate": 4.1233045622688044e-05, "loss": 0.0492, "step": 3344 }, { "epoch": 4.0, "eval_bleu": 0.032582447132208606, "eval_loss": 0.051017943769693375, "eval_rouge1": 0.3346924293421615, "eval_rouge2": 0.10192760972807041, "eval_rougeL": 0.33180504017979595, "eval_runtime": 159.805, "eval_samples_per_second": 10.463, "eval_steps_per_second": 1.308, "step": 3344 }, { "epoch": 5.0, "grad_norm": 0.15151090919971466, "learning_rate": 3.8655980271270036e-05, "loss": 0.0444, "step": 4180 }, { "epoch": 5.0, "eval_bleu": 0.03933306151343601, "eval_loss": 0.04986047372221947, "eval_rouge1": 0.35895878709405493, "eval_rouge2": 0.124247769713655, "eval_rougeL": 0.3572225001900925, "eval_runtime": 159.8295, "eval_samples_per_second": 10.461, "eval_steps_per_second": 1.308, "step": 4180 }, { "epoch": 6.0, "grad_norm": 0.11806467920541763, "learning_rate": 3.6078914919852034e-05, "loss": 0.0402, "step": 5016 }, { "epoch": 6.0, "eval_bleu": 0.04622708241683158, "eval_loss": 0.04995572566986084, "eval_rouge1": 0.3810393862934106, "eval_rouge2": 0.13792791296322293, "eval_rougeL": 0.3788340225664032, "eval_runtime": 159.7942, "eval_samples_per_second": 10.463, "eval_steps_per_second": 1.308, "step": 5016 }, { "epoch": 7.0, "grad_norm": 0.15260820090770721, "learning_rate": 3.350184956843403e-05, "loss": 0.0366, "step": 5852 }, { "epoch": 7.0, "eval_bleu": 0.04988840099834184, "eval_loss": 0.05027288198471069, "eval_rouge1": 0.3961215133733164, "eval_rouge2": 0.15246703105477005, "eval_rougeL": 0.3937993288436519, "eval_runtime": 36.8733, "eval_samples_per_second": 45.344, "eval_steps_per_second": 5.668, "step": 5852 }, { "epoch": 8.0, "grad_norm": 0.14294394850730896, "learning_rate": 3.092478421701603e-05, "loss": 0.0334, "step": 6688 }, { "epoch": 8.0, "eval_bleu": 0.05614309403616892, "eval_loss": 0.050896577537059784, "eval_rouge1": 0.407117249999222, "eval_rouge2": 0.15984144200856937, "eval_rougeL": 0.40517229812025873, "eval_runtime": 37.0371, "eval_samples_per_second": 45.144, "eval_steps_per_second": 5.643, "step": 6688 }, { "epoch": 9.0, "grad_norm": 0.20682789385318756, "learning_rate": 2.8347718865598028e-05, "loss": 0.0307, "step": 7524 }, { "epoch": 9.0, "eval_bleu": 0.0606895848585085, "eval_loss": 0.05167483910918236, "eval_rouge1": 0.41011122575047865, "eval_rouge2": 0.1731982844904956, "eval_rougeL": 0.40855415076231816, "eval_runtime": 159.7295, "eval_samples_per_second": 10.468, "eval_steps_per_second": 1.308, "step": 7524 }, { "epoch": 10.0, "grad_norm": 0.13990797102451324, "learning_rate": 2.5770653514180026e-05, "loss": 0.0283, "step": 8360 }, { "epoch": 10.0, "eval_bleu": 0.06531719425427113, "eval_loss": 0.05382031202316284, "eval_rouge1": 0.41672477670989894, "eval_rouge2": 0.17546430438593302, "eval_rougeL": 0.4150315774802549, "eval_runtime": 159.9272, "eval_samples_per_second": 10.455, "eval_steps_per_second": 1.307, "step": 8360 }, { "epoch": 10.0, "step": 8360, "total_flos": 3.49347446784e+16, "train_loss": 0.15054923290270938, "train_runtime": 5668.5001, "train_samples_per_second": 23.586, "train_steps_per_second": 2.95 } ], "logging_steps": 500, "max_steps": 16720, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.49347446784e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }