|
{ |
|
"best_metric": 0.31332626938819885, |
|
"best_model_checkpoint": "distilbert_lda_100_v1_qqp/checkpoint-2844", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 9954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.956484794616699, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.4043, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8511006678209251, |
|
"eval_combined_score": 0.8251500670658996, |
|
"eval_f1": 0.7991994663108739, |
|
"eval_loss": 0.32754120230674744, |
|
"eval_runtime": 17.107, |
|
"eval_samples_per_second": 2363.359, |
|
"eval_steps_per_second": 9.236, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.963893413543701, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.2918, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8599554786049963, |
|
"eval_combined_score": 0.8419983745191059, |
|
"eval_f1": 0.8240412704332154, |
|
"eval_loss": 0.31332626938819885, |
|
"eval_runtime": 17.2399, |
|
"eval_samples_per_second": 2345.134, |
|
"eval_steps_per_second": 9.165, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.0235133171081543, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.2305, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8714568389809547, |
|
"eval_combined_score": 0.852738415977335, |
|
"eval_f1": 0.8340199929737153, |
|
"eval_loss": 0.3146589696407318, |
|
"eval_runtime": 17.2803, |
|
"eval_samples_per_second": 2339.665, |
|
"eval_steps_per_second": 9.143, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.2095916271209717, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.179, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8760079149146673, |
|
"eval_combined_score": 0.8519556894083102, |
|
"eval_f1": 0.8279034639019532, |
|
"eval_loss": 0.31782716512680054, |
|
"eval_runtime": 17.1901, |
|
"eval_samples_per_second": 2351.932, |
|
"eval_steps_per_second": 9.191, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.359029531478882, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1389, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8804600544150384, |
|
"eval_combined_score": 0.8585003961999769, |
|
"eval_f1": 0.8365407379849155, |
|
"eval_loss": 0.3524511456489563, |
|
"eval_runtime": 17.2563, |
|
"eval_samples_per_second": 2342.907, |
|
"eval_steps_per_second": 9.156, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.7366631031036377, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.1067, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8782587187731882, |
|
"eval_combined_score": 0.8595780276048511, |
|
"eval_f1": 0.8408973364365141, |
|
"eval_loss": 0.3905220925807953, |
|
"eval_runtime": 17.273, |
|
"eval_samples_per_second": 2340.65, |
|
"eval_steps_per_second": 9.147, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.230625629425049, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.086, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8788276032649023, |
|
"eval_combined_score": 0.8607857401295461, |
|
"eval_f1": 0.84274387699419, |
|
"eval_loss": 0.4037318527698517, |
|
"eval_runtime": 17.1931, |
|
"eval_samples_per_second": 2351.532, |
|
"eval_steps_per_second": 9.19, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 9954, |
|
"total_flos": 1.686920659598684e+17, |
|
"train_loss": 0.20531803780299535, |
|
"train_runtime": 2492.9036, |
|
"train_samples_per_second": 7297.635, |
|
"train_steps_per_second": 28.521 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 71100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.686920659598684e+17, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|