|
{ |
|
"best_metric": 0.2688056230545044, |
|
"best_model_checkpoint": "distilbert_lda_100_v1_book_qqp/checkpoint-4266", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 11376, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.6672399044036865, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.3709, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8678456591639871, |
|
"eval_combined_score": 0.8435809405691093, |
|
"eval_f1": 0.8193162219742315, |
|
"eval_loss": 0.3001379072666168, |
|
"eval_runtime": 12.6438, |
|
"eval_samples_per_second": 3197.621, |
|
"eval_steps_per_second": 12.496, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.6408467292785645, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.2573, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8761810536730151, |
|
"eval_combined_score": 0.8594045790213551, |
|
"eval_f1": 0.842628104369695, |
|
"eval_loss": 0.28343185782432556, |
|
"eval_runtime": 12.6843, |
|
"eval_samples_per_second": 3187.412, |
|
"eval_steps_per_second": 12.456, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.3231992721557617, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.1922, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8875092752906257, |
|
"eval_combined_score": 0.8687645307992504, |
|
"eval_f1": 0.850019786307875, |
|
"eval_loss": 0.2688056230545044, |
|
"eval_runtime": 12.5615, |
|
"eval_samples_per_second": 3218.574, |
|
"eval_steps_per_second": 12.578, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.48140811920166, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.1411, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8910462527825872, |
|
"eval_combined_score": 0.8708443890637921, |
|
"eval_f1": 0.8506425253449971, |
|
"eval_loss": 0.3128622770309448, |
|
"eval_runtime": 13.017, |
|
"eval_samples_per_second": 3105.941, |
|
"eval_steps_per_second": 12.138, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.0898919105529785, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.105, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8932475884244373, |
|
"eval_combined_score": 0.8747814611674087, |
|
"eval_f1": 0.8563153339103802, |
|
"eval_loss": 0.325675368309021, |
|
"eval_runtime": 12.5089, |
|
"eval_samples_per_second": 3232.109, |
|
"eval_steps_per_second": 12.631, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 5.714954853057861, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.0794, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8901063566658421, |
|
"eval_combined_score": 0.8723434607929086, |
|
"eval_f1": 0.8545805649199751, |
|
"eval_loss": 0.36958828568458557, |
|
"eval_runtime": 12.5087, |
|
"eval_samples_per_second": 3232.151, |
|
"eval_steps_per_second": 12.631, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 6.753868579864502, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0646, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8888943853574078, |
|
"eval_combined_score": 0.8712875835581828, |
|
"eval_f1": 0.8536807817589577, |
|
"eval_loss": 0.3887239396572113, |
|
"eval_runtime": 12.5806, |
|
"eval_samples_per_second": 3213.679, |
|
"eval_steps_per_second": 12.559, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.0667166709899902, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0528, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.89060103883255, |
|
"eval_combined_score": 0.873538180447524, |
|
"eval_f1": 0.856475322062498, |
|
"eval_loss": 0.42925697565078735, |
|
"eval_runtime": 12.6223, |
|
"eval_samples_per_second": 3203.05, |
|
"eval_steps_per_second": 12.517, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 11376, |
|
"total_flos": 1.927909325255639e+17, |
|
"train_loss": 0.1579339621606926, |
|
"train_runtime": 2009.8047, |
|
"train_samples_per_second": 9051.775, |
|
"train_steps_per_second": 35.377 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 71100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.927909325255639e+17, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|