{ "best_metric": 2.302783966064453, "best_model_checkpoint": "bert_base_lda_100_v1_stsb/checkpoint-230", "epoch": 15.0, "eval_steps": 500, "global_step": 345, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.897597312927246, "learning_rate": 0.00098, "loss": 5.4876, "step": 23 }, { "epoch": 1.0, "eval_combined_score": NaN, "eval_loss": 2.5971333980560303, "eval_pearson": NaN, "eval_runtime": 0.953, "eval_samples_per_second": 1574.03, "eval_spearmanr": NaN, "eval_steps_per_second": 6.296, "step": 23 }, { "epoch": 2.0, "grad_norm": 16.058734893798828, "learning_rate": 0.00096, "loss": 2.2047, "step": 46 }, { "epoch": 2.0, "eval_combined_score": NaN, "eval_loss": 2.3758695125579834, "eval_pearson": NaN, "eval_runtime": 1.0004, "eval_samples_per_second": 1499.351, "eval_spearmanr": NaN, "eval_steps_per_second": 5.997, "step": 46 }, { "epoch": 3.0, "grad_norm": 11.513495445251465, "learning_rate": 0.00094, "loss": 2.2017, "step": 69 }, { "epoch": 3.0, "eval_combined_score": NaN, "eval_loss": 2.4511916637420654, "eval_pearson": NaN, "eval_runtime": 0.9743, "eval_samples_per_second": 1539.528, "eval_spearmanr": NaN, "eval_steps_per_second": 6.158, "step": 69 }, { "epoch": 4.0, "grad_norm": 5.998810768127441, "learning_rate": 0.00092, "loss": 2.1807, "step": 92 }, { "epoch": 4.0, "eval_combined_score": NaN, "eval_loss": 2.4511916637420654, "eval_pearson": NaN, "eval_runtime": 0.9891, "eval_samples_per_second": 1516.545, "eval_spearmanr": NaN, "eval_steps_per_second": 6.066, "step": 92 }, { "epoch": 5.0, "grad_norm": 17.433738708496094, "learning_rate": 0.0009000000000000001, "loss": 2.1807, "step": 115 }, { "epoch": 5.0, "eval_combined_score": NaN, "eval_loss": 2.511171817779541, "eval_pearson": NaN, "eval_runtime": 0.9632, "eval_samples_per_second": 1557.271, "eval_spearmanr": NaN, "eval_steps_per_second": 6.229, "step": 115 }, { "epoch": 6.0, "grad_norm": 6.638427734375, "learning_rate": 0.00088, "loss": 2.196, "step": 138 }, { "epoch": 6.0, "eval_combined_score": NaN, "eval_loss": 2.3448002338409424, "eval_pearson": NaN, "eval_runtime": 0.9594, "eval_samples_per_second": 1563.421, "eval_spearmanr": NaN, "eval_steps_per_second": 6.254, "step": 138 }, { "epoch": 7.0, "grad_norm": 16.561189651489258, "learning_rate": 0.00086, "loss": 2.1902, "step": 161 }, { "epoch": 7.0, "eval_combined_score": NaN, "eval_loss": 2.716400623321533, "eval_pearson": NaN, "eval_runtime": 0.9593, "eval_samples_per_second": 1563.575, "eval_spearmanr": NaN, "eval_steps_per_second": 6.254, "step": 161 }, { "epoch": 8.0, "grad_norm": 4.18385124206543, "learning_rate": 0.00084, "loss": 2.1899, "step": 184 }, { "epoch": 8.0, "eval_combined_score": NaN, "eval_loss": 2.6349358558654785, "eval_pearson": NaN, "eval_runtime": 0.9637, "eval_samples_per_second": 1556.487, "eval_spearmanr": NaN, "eval_steps_per_second": 6.226, "step": 184 }, { "epoch": 9.0, "grad_norm": 17.808826446533203, "learning_rate": 0.00082, "loss": 2.1962, "step": 207 }, { "epoch": 9.0, "eval_combined_score": NaN, "eval_loss": 2.3354201316833496, "eval_pearson": NaN, "eval_runtime": 0.9676, "eval_samples_per_second": 1550.299, "eval_spearmanr": NaN, "eval_steps_per_second": 6.201, "step": 207 }, { "epoch": 10.0, "grad_norm": 16.991071701049805, "learning_rate": 0.0008, "loss": 2.1802, "step": 230 }, { "epoch": 10.0, "eval_combined_score": NaN, "eval_loss": 2.302783966064453, "eval_pearson": NaN, "eval_runtime": 0.9863, "eval_samples_per_second": 1520.902, "eval_spearmanr": NaN, "eval_steps_per_second": 6.084, "step": 230 }, { "epoch": 11.0, "grad_norm": 17.86025619506836, "learning_rate": 0.0007800000000000001, "loss": 2.1945, "step": 253 }, { "epoch": 11.0, "eval_combined_score": NaN, "eval_loss": 2.716400623321533, "eval_pearson": NaN, "eval_runtime": 0.9587, "eval_samples_per_second": 1564.538, "eval_spearmanr": NaN, "eval_steps_per_second": 6.258, "step": 253 }, { "epoch": 12.0, "grad_norm": 19.099140167236328, "learning_rate": 0.00076, "loss": 2.1932, "step": 276 }, { "epoch": 12.0, "eval_combined_score": NaN, "eval_loss": 2.737987518310547, "eval_pearson": NaN, "eval_runtime": 0.9594, "eval_samples_per_second": 1563.435, "eval_spearmanr": NaN, "eval_steps_per_second": 6.254, "step": 276 }, { "epoch": 13.0, "grad_norm": 13.359597206115723, "learning_rate": 0.00074, "loss": 2.206, "step": 299 }, { "epoch": 13.0, "eval_combined_score": NaN, "eval_loss": 2.737987518310547, "eval_pearson": NaN, "eval_runtime": 0.9641, "eval_samples_per_second": 1555.805, "eval_spearmanr": NaN, "eval_steps_per_second": 6.223, "step": 299 }, { "epoch": 14.0, "grad_norm": 5.281756401062012, "learning_rate": 0.0007199999999999999, "loss": 2.1965, "step": 322 }, { "epoch": 14.0, "eval_combined_score": NaN, "eval_loss": 2.654569625854492, "eval_pearson": NaN, "eval_runtime": 0.9615, "eval_samples_per_second": 1560.105, "eval_spearmanr": NaN, "eval_steps_per_second": 6.24, "step": 322 }, { "epoch": 15.0, "grad_norm": 4.948043346405029, "learning_rate": 0.0007, "loss": 2.1794, "step": 345 }, { "epoch": 15.0, "eval_combined_score": NaN, "eval_loss": 2.4802048206329346, "eval_pearson": NaN, "eval_runtime": 0.9617, "eval_samples_per_second": 1559.791, "eval_spearmanr": NaN, "eval_steps_per_second": 6.239, "step": 345 }, { "epoch": 15.0, "step": 345, "total_flos": 1.134458907008256e+16, "train_loss": 2.4118328260338826, "train_runtime": 190.6822, "train_samples_per_second": 1507.482, "train_steps_per_second": 6.031 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.134458907008256e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }