{ "best_metric": 0.3234252333641052, "best_model_checkpoint": "bert_base_lda_100_v1_qqp/checkpoint-4266", "epoch": 8.0, "eval_steps": 500, "global_step": 11376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.8851810693740845, "learning_rate": 4.9e-05, "loss": 0.4474, "step": 1422 }, { "epoch": 1.0, "eval_accuracy": 0.8283947563690329, "eval_combined_score": 0.7949221753238371, "eval_f1": 0.7614495942786413, "eval_loss": 0.3685195744037628, "eval_runtime": 26.2526, "eval_samples_per_second": 1540.036, "eval_steps_per_second": 6.018, "step": 1422 }, { "epoch": 2.0, "grad_norm": 2.972749710083008, "learning_rate": 4.8e-05, "loss": 0.3271, "step": 2844 }, { "epoch": 2.0, "eval_accuracy": 0.847588424437299, "eval_combined_score": 0.8289650340099699, "eval_f1": 0.8103416435826409, "eval_loss": 0.3386009633541107, "eval_runtime": 26.2178, "eval_samples_per_second": 1542.084, "eval_steps_per_second": 6.026, "step": 2844 }, { "epoch": 3.0, "grad_norm": 3.082465887069702, "learning_rate": 4.7e-05, "loss": 0.2564, "step": 4266 }, { "epoch": 3.0, "eval_accuracy": 0.8609448429384121, "eval_combined_score": 0.8397833002922801, "eval_f1": 0.818621757646148, "eval_loss": 0.3234252333641052, "eval_runtime": 26.1778, "eval_samples_per_second": 1544.441, "eval_steps_per_second": 6.036, "step": 4266 }, { "epoch": 4.0, "grad_norm": 3.214107036590576, "learning_rate": 4.600000000000001e-05, "loss": 0.1978, "step": 5688 }, { "epoch": 4.0, "eval_accuracy": 0.865322780113777, "eval_combined_score": 0.845827594247875, "eval_f1": 0.8263324083819731, "eval_loss": 0.3627840280532837, "eval_runtime": 26.0619, "eval_samples_per_second": 1551.309, "eval_steps_per_second": 6.062, "step": 5688 }, { "epoch": 5.0, "grad_norm": 4.68060302734375, "learning_rate": 4.5e-05, "loss": 0.1516, "step": 7110 }, { "epoch": 5.0, "eval_accuracy": 0.869527578530794, "eval_combined_score": 0.8474033176016561, "eval_f1": 0.8252790566725182, "eval_loss": 0.40143004059791565, "eval_runtime": 26.1999, "eval_samples_per_second": 1543.138, "eval_steps_per_second": 6.031, "step": 7110 }, { "epoch": 6.0, "grad_norm": 4.411165237426758, "learning_rate": 4.4000000000000006e-05, "loss": 0.1169, "step": 8532 }, { "epoch": 6.0, "eval_accuracy": 0.867276774672273, "eval_combined_score": 0.8475178249685433, "eval_f1": 0.8277588752648135, "eval_loss": 0.3963766396045685, "eval_runtime": 26.1361, "eval_samples_per_second": 1546.904, "eval_steps_per_second": 6.045, "step": 8532 }, { "epoch": 7.0, "grad_norm": 2.608915090560913, "learning_rate": 4.3e-05, "loss": 0.093, "step": 9954 }, { "epoch": 7.0, "eval_accuracy": 0.8676230521889686, "eval_combined_score": 0.8477775754772003, "eval_f1": 0.8279320987654322, "eval_loss": 0.4813096225261688, "eval_runtime": 25.9104, "eval_samples_per_second": 1560.379, "eval_steps_per_second": 6.098, "step": 9954 }, { "epoch": 8.0, "grad_norm": 4.501567840576172, "learning_rate": 4.2e-05, "loss": 0.076, "step": 11376 }, { "epoch": 8.0, "eval_accuracy": 0.8693297056641108, "eval_combined_score": 0.8489380492102074, "eval_f1": 0.8285463927563042, "eval_loss": 0.43455520272254944, "eval_runtime": 26.0711, "eval_samples_per_second": 1550.762, "eval_steps_per_second": 6.06, "step": 11376 }, { "epoch": 8.0, "step": 11376, "total_flos": 3.8292762019405824e+17, "train_loss": 0.2082672454469147, "train_runtime": 4927.5674, "train_samples_per_second": 3691.943, "train_steps_per_second": 14.429 } ], "logging_steps": 1, "max_steps": 71100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.8292762019405824e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }