|
{ |
|
"best_metric": 0.3849843144416809, |
|
"best_model_checkpoint": "autotrain-rj5xv-b9wsb/checkpoint-1584", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1584, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04734848484848485, |
|
"grad_norm": 4.511193752288818, |
|
"learning_rate": 7.861635220125786e-06, |
|
"loss": 2.8758, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0946969696969697, |
|
"grad_norm": 3.8025200366973877, |
|
"learning_rate": 1.572327044025157e-05, |
|
"loss": 2.3043, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14204545454545456, |
|
"grad_norm": 2.95987868309021, |
|
"learning_rate": 2.358490566037736e-05, |
|
"loss": 1.5836, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1893939393939394, |
|
"grad_norm": 3.758180618286133, |
|
"learning_rate": 3.144654088050314e-05, |
|
"loss": 1.4148, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23674242424242425, |
|
"grad_norm": 1.9288899898529053, |
|
"learning_rate": 3.9308176100628936e-05, |
|
"loss": 1.0447, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"grad_norm": 2.4479756355285645, |
|
"learning_rate": 4.716981132075472e-05, |
|
"loss": 0.7194, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3314393939393939, |
|
"grad_norm": 3.4983065128326416, |
|
"learning_rate": 4.943859649122807e-05, |
|
"loss": 0.8141, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3787878787878788, |
|
"grad_norm": 2.1139252185821533, |
|
"learning_rate": 4.856140350877193e-05, |
|
"loss": 0.8187, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42613636363636365, |
|
"grad_norm": 2.1289165019989014, |
|
"learning_rate": 4.7684210526315794e-05, |
|
"loss": 0.8632, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4734848484848485, |
|
"grad_norm": 3.188091516494751, |
|
"learning_rate": 4.680701754385965e-05, |
|
"loss": 0.7706, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 3.666327714920044, |
|
"learning_rate": 4.592982456140351e-05, |
|
"loss": 0.6436, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 3.153630256652832, |
|
"learning_rate": 4.5052631578947366e-05, |
|
"loss": 0.5896, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.615530303030303, |
|
"grad_norm": 1.9767557382583618, |
|
"learning_rate": 4.417543859649123e-05, |
|
"loss": 0.6594, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6628787878787878, |
|
"grad_norm": 1.0973279476165771, |
|
"learning_rate": 4.329824561403509e-05, |
|
"loss": 0.7358, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7102272727272727, |
|
"grad_norm": 3.707737922668457, |
|
"learning_rate": 4.242105263157895e-05, |
|
"loss": 0.5802, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 1.6052640676498413, |
|
"learning_rate": 4.1543859649122806e-05, |
|
"loss": 0.5345, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8049242424242424, |
|
"grad_norm": 2.5281245708465576, |
|
"learning_rate": 4.066666666666667e-05, |
|
"loss": 0.5426, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"grad_norm": 1.1469014883041382, |
|
"learning_rate": 3.978947368421053e-05, |
|
"loss": 0.4517, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8996212121212122, |
|
"grad_norm": 2.1793298721313477, |
|
"learning_rate": 3.891228070175439e-05, |
|
"loss": 0.5795, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.946969696969697, |
|
"grad_norm": 1.2433311939239502, |
|
"learning_rate": 3.8035087719298247e-05, |
|
"loss": 0.4883, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9943181818181818, |
|
"grad_norm": 1.057558536529541, |
|
"learning_rate": 3.715789473684211e-05, |
|
"loss": 0.6399, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 18.9849, |
|
"eval_loss": 0.45811647176742554, |
|
"eval_rouge1": 45.943, |
|
"eval_rouge2": 41.8728, |
|
"eval_rougeL": 45.3719, |
|
"eval_rougeLsum": 45.5844, |
|
"eval_runtime": 638.6799, |
|
"eval_samples_per_second": 0.415, |
|
"eval_steps_per_second": 0.105, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 0.9785796999931335, |
|
"learning_rate": 3.628070175438596e-05, |
|
"loss": 0.4255, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.0890151515151516, |
|
"grad_norm": 2.2381393909454346, |
|
"learning_rate": 3.5403508771929825e-05, |
|
"loss": 0.4439, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 2.753933906555176, |
|
"learning_rate": 3.452631578947369e-05, |
|
"loss": 0.5136, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.183712121212121, |
|
"grad_norm": 1.5527116060256958, |
|
"learning_rate": 3.364912280701755e-05, |
|
"loss": 0.3932, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.231060606060606, |
|
"grad_norm": 0.6465177536010742, |
|
"learning_rate": 3.2771929824561403e-05, |
|
"loss": 0.5194, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.2784090909090908, |
|
"grad_norm": 1.6484010219573975, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 0.4773, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3257575757575757, |
|
"grad_norm": 1.9873112440109253, |
|
"learning_rate": 3.101754385964912e-05, |
|
"loss": 0.4183, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3731060606060606, |
|
"grad_norm": 2.486020803451538, |
|
"learning_rate": 3.0140350877192985e-05, |
|
"loss": 0.6344, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.4204545454545454, |
|
"grad_norm": 1.8892446756362915, |
|
"learning_rate": 2.9263157894736844e-05, |
|
"loss": 0.4328, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.4678030303030303, |
|
"grad_norm": 0.961203932762146, |
|
"learning_rate": 2.8385964912280705e-05, |
|
"loss": 0.5562, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 0.44476789236068726, |
|
"learning_rate": 2.750877192982456e-05, |
|
"loss": 0.3306, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.9097229838371277, |
|
"learning_rate": 2.6631578947368426e-05, |
|
"loss": 0.3695, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.6098484848484849, |
|
"grad_norm": 1.3498631715774536, |
|
"learning_rate": 2.575438596491228e-05, |
|
"loss": 0.3705, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.6571969696969697, |
|
"grad_norm": 0.6882970929145813, |
|
"learning_rate": 2.4877192982456142e-05, |
|
"loss": 0.5327, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.7045454545454546, |
|
"grad_norm": 0.7746265530586243, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.3751, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.7518939393939394, |
|
"grad_norm": 1.2026286125183105, |
|
"learning_rate": 2.312280701754386e-05, |
|
"loss": 0.2608, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.7992424242424243, |
|
"grad_norm": 5.0856099128723145, |
|
"learning_rate": 2.224561403508772e-05, |
|
"loss": 0.4432, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.8465909090909092, |
|
"grad_norm": 2.294633150100708, |
|
"learning_rate": 2.136842105263158e-05, |
|
"loss": 0.5151, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.893939393939394, |
|
"grad_norm": 1.039154052734375, |
|
"learning_rate": 2.0491228070175437e-05, |
|
"loss": 0.3376, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.941287878787879, |
|
"grad_norm": 1.9636164903640747, |
|
"learning_rate": 1.96140350877193e-05, |
|
"loss": 0.4545, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.9886363636363638, |
|
"grad_norm": 1.2051464319229126, |
|
"learning_rate": 1.8736842105263158e-05, |
|
"loss": 0.5154, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 18.9887, |
|
"eval_loss": 0.3989172577857971, |
|
"eval_rouge1": 46.097, |
|
"eval_rouge2": 42.2086, |
|
"eval_rougeL": 45.6885, |
|
"eval_rougeLsum": 45.7885, |
|
"eval_runtime": 648.8496, |
|
"eval_samples_per_second": 0.408, |
|
"eval_steps_per_second": 0.103, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.0359848484848486, |
|
"grad_norm": 0.8088118433952332, |
|
"learning_rate": 1.785964912280702e-05, |
|
"loss": 0.4052, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 0.8511497974395752, |
|
"learning_rate": 1.6982456140350878e-05, |
|
"loss": 0.2922, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.1306818181818183, |
|
"grad_norm": 1.41793954372406, |
|
"learning_rate": 1.6105263157894736e-05, |
|
"loss": 0.1893, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.178030303030303, |
|
"grad_norm": 0.5501357316970825, |
|
"learning_rate": 1.5228070175438596e-05, |
|
"loss": 0.3815, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.225378787878788, |
|
"grad_norm": 0.7588028311729431, |
|
"learning_rate": 1.4350877192982456e-05, |
|
"loss": 0.3866, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 1.9546937942504883, |
|
"learning_rate": 1.3473684210526316e-05, |
|
"loss": 0.3575, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.320075757575758, |
|
"grad_norm": 0.6162445545196533, |
|
"learning_rate": 1.2596491228070176e-05, |
|
"loss": 0.5128, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.367424242424242, |
|
"grad_norm": 2.0828588008880615, |
|
"learning_rate": 1.1719298245614036e-05, |
|
"loss": 0.4164, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.4147727272727275, |
|
"grad_norm": 1.3566968441009521, |
|
"learning_rate": 1.0842105263157895e-05, |
|
"loss": 0.3453, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.462121212121212, |
|
"grad_norm": 0.7142326235771179, |
|
"learning_rate": 9.964912280701755e-06, |
|
"loss": 0.4117, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.5094696969696972, |
|
"grad_norm": 2.7209577560424805, |
|
"learning_rate": 9.087719298245615e-06, |
|
"loss": 0.3381, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.5568181818181817, |
|
"grad_norm": 2.0031161308288574, |
|
"learning_rate": 8.210526315789475e-06, |
|
"loss": 0.2646, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.6041666666666665, |
|
"grad_norm": 0.36383265256881714, |
|
"learning_rate": 7.333333333333334e-06, |
|
"loss": 0.3025, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.6515151515151514, |
|
"grad_norm": 2.464855194091797, |
|
"learning_rate": 6.456140350877193e-06, |
|
"loss": 0.3774, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.6988636363636362, |
|
"grad_norm": 2.341742515563965, |
|
"learning_rate": 5.578947368421053e-06, |
|
"loss": 0.3929, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.746212121212121, |
|
"grad_norm": 0.6026067137718201, |
|
"learning_rate": 4.7017543859649125e-06, |
|
"loss": 0.4215, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.793560606060606, |
|
"grad_norm": 1.8426591157913208, |
|
"learning_rate": 3.8245614035087725e-06, |
|
"loss": 0.2499, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.840909090909091, |
|
"grad_norm": 1.0914125442504883, |
|
"learning_rate": 2.9473684210526317e-06, |
|
"loss": 0.3535, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.8882575757575757, |
|
"grad_norm": 0.6658899784088135, |
|
"learning_rate": 2.0701754385964913e-06, |
|
"loss": 0.3272, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.9356060606060606, |
|
"grad_norm": 0.7760673761367798, |
|
"learning_rate": 1.192982456140351e-06, |
|
"loss": 0.4175, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.9829545454545454, |
|
"grad_norm": 1.258583426475525, |
|
"learning_rate": 3.1578947368421055e-07, |
|
"loss": 0.4278, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 18.9849, |
|
"eval_loss": 0.3849843144416809, |
|
"eval_rouge1": 46.2037, |
|
"eval_rouge2": 42.3541, |
|
"eval_rougeL": 45.8784, |
|
"eval_rougeLsum": 45.9787, |
|
"eval_runtime": 656.7129, |
|
"eval_samples_per_second": 0.404, |
|
"eval_steps_per_second": 0.102, |
|
"step": 1584 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1584, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 390532125671424.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|