{ "best_metric": 1.6844276189804077, "best_model_checkpoint": "bert_base_lda_100_v1_stsb/checkpoint-184", "epoch": 13.0, "eval_steps": 500, "global_step": 299, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 9.02434253692627, "learning_rate": 4.9e-05, "loss": 2.7331, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.07012747710946046, "eval_loss": 2.6188812255859375, "eval_pearson": 0.0642887160182263, "eval_runtime": 0.9624, "eval_samples_per_second": 1558.649, "eval_spearmanr": 0.0759662382006946, "eval_steps_per_second": 6.235, "step": 23 }, { "epoch": 2.0, "grad_norm": 23.63865852355957, "learning_rate": 4.8e-05, "loss": 1.9804, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.27527671204636106, "eval_loss": 2.0897152423858643, "eval_pearson": 0.28175468669023457, "eval_runtime": 0.9642, "eval_samples_per_second": 1555.678, "eval_spearmanr": 0.2687987374024875, "eval_steps_per_second": 6.223, "step": 46 }, { "epoch": 3.0, "grad_norm": 10.638742446899414, "learning_rate": 4.7e-05, "loss": 1.7486, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.41554924979667834, "eval_loss": 1.9471008777618408, "eval_pearson": 0.41582453640686257, "eval_runtime": 0.9708, "eval_samples_per_second": 1545.093, "eval_spearmanr": 0.4152739631864941, "eval_steps_per_second": 6.18, "step": 69 }, { "epoch": 4.0, "grad_norm": 16.40125846862793, "learning_rate": 4.600000000000001e-05, "loss": 1.2963, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.4597302763812634, "eval_loss": 2.3058416843414307, "eval_pearson": 0.4520132571391213, "eval_runtime": 0.993, "eval_samples_per_second": 1510.585, "eval_spearmanr": 0.4674472956234055, "eval_steps_per_second": 6.042, "step": 92 }, { "epoch": 5.0, "grad_norm": 42.770668029785156, "learning_rate": 4.5e-05, "loss": 1.0162, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.48877111938435025, "eval_loss": 1.8441771268844604, "eval_pearson": 0.4886953649858171, "eval_runtime": 0.974, "eval_samples_per_second": 1540.11, "eval_spearmanr": 0.48884687378288344, "eval_steps_per_second": 6.16, "step": 115 }, { "epoch": 6.0, "grad_norm": 22.240951538085938, "learning_rate": 4.4000000000000006e-05, "loss": 0.8446, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.5259105826273307, "eval_loss": 1.7663521766662598, "eval_pearson": 0.5228342603176177, "eval_runtime": 0.9691, "eval_samples_per_second": 1547.89, "eval_spearmanr": 0.5289869049370436, "eval_steps_per_second": 6.192, "step": 138 }, { "epoch": 7.0, "grad_norm": 9.541956901550293, "learning_rate": 4.3e-05, "loss": 0.6767, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.5168115431891785, "eval_loss": 1.7573641538619995, "eval_pearson": 0.5151591812766733, "eval_runtime": 0.9781, "eval_samples_per_second": 1533.662, "eval_spearmanr": 0.5184639051016838, "eval_steps_per_second": 6.135, "step": 161 }, { "epoch": 8.0, "grad_norm": 18.553218841552734, "learning_rate": 4.2e-05, "loss": 0.5349, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.5327909501566208, "eval_loss": 1.6844276189804077, "eval_pearson": 0.5330379395182387, "eval_runtime": 0.967, "eval_samples_per_second": 1551.234, "eval_spearmanr": 0.5325439607950028, "eval_steps_per_second": 6.205, "step": 184 }, { "epoch": 9.0, "grad_norm": 8.788359642028809, "learning_rate": 4.1e-05, "loss": 0.4606, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.5061528931646921, "eval_loss": 1.9861546754837036, "eval_pearson": 0.503941115665909, "eval_runtime": 0.9662, "eval_samples_per_second": 1552.405, "eval_spearmanr": 0.5083646706634753, "eval_steps_per_second": 6.21, "step": 207 }, { "epoch": 10.0, "grad_norm": 9.931353569030762, "learning_rate": 4e-05, "loss": 0.3951, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.5270205929961109, "eval_loss": 1.8024095296859741, "eval_pearson": 0.5265505998740807, "eval_runtime": 0.9616, "eval_samples_per_second": 1559.852, "eval_spearmanr": 0.527490586118141, "eval_steps_per_second": 6.239, "step": 230 }, { "epoch": 11.0, "grad_norm": 21.041873931884766, "learning_rate": 3.9000000000000006e-05, "loss": 0.3624, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.5382256315731109, "eval_loss": 2.015653371810913, "eval_pearson": 0.5341689579826407, "eval_runtime": 0.9659, "eval_samples_per_second": 1552.881, "eval_spearmanr": 0.5422823051635811, "eval_steps_per_second": 6.212, "step": 253 }, { "epoch": 12.0, "grad_norm": 19.475910186767578, "learning_rate": 3.8e-05, "loss": 0.3087, "step": 276 }, { "epoch": 12.0, "eval_combined_score": 0.5305901722528907, "eval_loss": 2.409419059753418, "eval_pearson": 0.5226677549863216, "eval_runtime": 0.9641, "eval_samples_per_second": 1555.776, "eval_spearmanr": 0.5385125895194598, "eval_steps_per_second": 6.223, "step": 276 }, { "epoch": 13.0, "grad_norm": 15.23548412322998, "learning_rate": 3.7e-05, "loss": 0.2879, "step": 299 }, { "epoch": 13.0, "eval_combined_score": 0.5327170657272342, "eval_loss": 2.0560126304626465, "eval_pearson": 0.5303940978069547, "eval_runtime": 0.966, "eval_samples_per_second": 1552.868, "eval_spearmanr": 0.5350400336475136, "eval_steps_per_second": 6.211, "step": 299 }, { "epoch": 13.0, "step": 299, "total_flos": 9831977194071552.0, "train_loss": 0.9727236291636592, "train_runtime": 168.3036, "train_samples_per_second": 1707.925, "train_steps_per_second": 6.833 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9831977194071552.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }