|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 25, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"grad_norm": 1.6777013540267944, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.6393, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"eval_accuracy": 0.8759578544061303, |
|
"eval_auc": 0.9551645887113986, |
|
"eval_f1": 0.7757575757575758, |
|
"eval_loss": 0.4472915232181549, |
|
"eval_precision": 0.9032258064516129, |
|
"eval_recall": 0.6798179059180577, |
|
"eval_runtime": 3.1514, |
|
"eval_samples_per_second": 662.564, |
|
"eval_steps_per_second": 2.856, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 1.745863676071167, |
|
"learning_rate": 2.464764460404427e-05, |
|
"loss": 0.2377, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"eval_accuracy": 0.9439655172413793, |
|
"eval_auc": 0.9875004114850522, |
|
"eval_f1": 0.9164882226980728, |
|
"eval_loss": 0.15234950184822083, |
|
"eval_precision": 0.8652291105121294, |
|
"eval_recall": 0.9742033383915023, |
|
"eval_runtime": 3.1506, |
|
"eval_samples_per_second": 662.733, |
|
"eval_steps_per_second": 2.857, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 2.064204454421997, |
|
"learning_rate": 2.3250178002596257e-05, |
|
"loss": 0.1276, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"eval_accuracy": 0.9640804597701149, |
|
"eval_auc": 0.9937093227115326, |
|
"eval_f1": 0.9433106575963719, |
|
"eval_loss": 0.09969615936279297, |
|
"eval_precision": 0.9397590361445783, |
|
"eval_recall": 0.9468892261001517, |
|
"eval_runtime": 3.1599, |
|
"eval_samples_per_second": 660.782, |
|
"eval_steps_per_second": 2.848, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 1.5831186771392822, |
|
"learning_rate": 2.090825467126566e-05, |
|
"loss": 0.1035, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"eval_accuracy": 0.9664750957854407, |
|
"eval_auc": 0.9949464326104294, |
|
"eval_f1": 0.9470499243570348, |
|
"eval_loss": 0.08703920990228653, |
|
"eval_precision": 0.9441930618401206, |
|
"eval_recall": 0.9499241274658573, |
|
"eval_runtime": 3.1587, |
|
"eval_samples_per_second": 661.03, |
|
"eval_steps_per_second": 2.849, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8503401360544217, |
|
"grad_norm": 1.721102237701416, |
|
"learning_rate": 1.7827624249789604e-05, |
|
"loss": 0.0727, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8503401360544217, |
|
"eval_accuracy": 0.9669540229885057, |
|
"eval_auc": 0.9950313843631433, |
|
"eval_f1": 0.9473684210526315, |
|
"eval_loss": 0.08850479125976562, |
|
"eval_precision": 0.9524539877300614, |
|
"eval_recall": 0.9423368740515933, |
|
"eval_runtime": 3.1669, |
|
"eval_samples_per_second": 659.316, |
|
"eval_steps_per_second": 2.842, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 1.1454665660858154, |
|
"learning_rate": 1.4278935478416066e-05, |
|
"loss": 0.0885, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"eval_accuracy": 0.9712643678160919, |
|
"eval_auc": 0.9956770176837693, |
|
"eval_f1": 0.9541984732824428, |
|
"eval_loss": 0.08041754364967346, |
|
"eval_precision": 0.9600614439324117, |
|
"eval_recall": 0.9484066767830045, |
|
"eval_runtime": 3.1728, |
|
"eval_samples_per_second": 658.102, |
|
"eval_steps_per_second": 2.837, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 0.6928611397743225, |
|
"learning_rate": 1.0573958356820683e-05, |
|
"loss": 0.0614, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"eval_accuracy": 0.9746168582375478, |
|
"eval_auc": 0.9961580569835119, |
|
"eval_f1": 0.9594491201224178, |
|
"eval_loss": 0.07542683929204941, |
|
"eval_precision": 0.9675925925925926, |
|
"eval_recall": 0.9514415781487102, |
|
"eval_runtime": 3.1686, |
|
"eval_samples_per_second": 658.971, |
|
"eval_steps_per_second": 2.84, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.3605442176870748, |
|
"grad_norm": 0.8894994854927063, |
|
"learning_rate": 7.038193595383008e-06, |
|
"loss": 0.0448, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3605442176870748, |
|
"eval_accuracy": 0.9712643678160919, |
|
"eval_auc": 0.9961644283649654, |
|
"eval_f1": 0.9541984732824428, |
|
"eval_loss": 0.07568268477916718, |
|
"eval_precision": 0.9600614439324117, |
|
"eval_recall": 0.9484066767830045, |
|
"eval_runtime": 3.162, |
|
"eval_samples_per_second": 660.342, |
|
"eval_steps_per_second": 2.846, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5306122448979593, |
|
"grad_norm": 0.8013057708740234, |
|
"learning_rate": 3.98227575507636e-06, |
|
"loss": 0.0547, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.5306122448979593, |
|
"eval_accuracy": 0.9717432950191571, |
|
"eval_auc": 0.9963555698085719, |
|
"eval_f1": 0.954858454475899, |
|
"eval_loss": 0.07406975328922272, |
|
"eval_precision": 0.9629629629629629, |
|
"eval_recall": 0.9468892261001517, |
|
"eval_runtime": 3.1666, |
|
"eval_samples_per_second": 659.381, |
|
"eval_steps_per_second": 2.842, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.7006802721088436, |
|
"grad_norm": 1.3701196908950806, |
|
"learning_rate": 1.6746824526945163e-06, |
|
"loss": 0.048, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.7006802721088436, |
|
"eval_accuracy": 0.9746168582375478, |
|
"eval_auc": 0.9967229861390597, |
|
"eval_f1": 0.9601203912716328, |
|
"eval_loss": 0.07189524918794632, |
|
"eval_precision": 0.9522388059701492, |
|
"eval_recall": 0.9681335356600911, |
|
"eval_runtime": 3.1579, |
|
"eval_samples_per_second": 661.202, |
|
"eval_steps_per_second": 2.85, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.870748299319728, |
|
"grad_norm": 1.6245123147964478, |
|
"learning_rate": 3.181472637875868e-07, |
|
"loss": 0.0473, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.870748299319728, |
|
"eval_accuracy": 0.975095785440613, |
|
"eval_auc": 0.9967537811494185, |
|
"eval_f1": 0.9606060606060606, |
|
"eval_loss": 0.07112736254930496, |
|
"eval_precision": 0.9591527987897126, |
|
"eval_recall": 0.9620637329286799, |
|
"eval_runtime": 3.1642, |
|
"eval_samples_per_second": 659.887, |
|
"eval_steps_per_second": 2.844, |
|
"step": 275 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4985013284110336.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|