{ "best_metric": 0.6650304198265076, "best_model_checkpoint": "bert_base_lda_20_v1_book_stsb/checkpoint-276", "epoch": 17.0, "eval_steps": 500, "global_step": 391, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.7477006912231445, "learning_rate": 4.9e-05, "loss": 2.8738, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.17561117616234614, "eval_loss": 2.4670398235321045, "eval_pearson": 0.17646745724560672, "eval_runtime": 0.9997, "eval_samples_per_second": 1500.502, "eval_spearmanr": 0.17475489507908556, "eval_steps_per_second": 6.002, "step": 23 }, { "epoch": 2.0, "grad_norm": 35.44430160522461, "learning_rate": 4.8e-05, "loss": 1.4719, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.7400782076458396, "eval_loss": 1.0279998779296875, "eval_pearson": 0.739723981540606, "eval_runtime": 1.0078, "eval_samples_per_second": 1488.42, "eval_spearmanr": 0.7404324337510731, "eval_steps_per_second": 5.954, "step": 46 }, { "epoch": 3.0, "grad_norm": 13.613875389099121, "learning_rate": 4.7e-05, "loss": 0.9801, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.7954812287252586, "eval_loss": 0.8275899887084961, "eval_pearson": 0.7955648654056936, "eval_runtime": 1.0022, "eval_samples_per_second": 1496.75, "eval_spearmanr": 0.7953975920448235, "eval_steps_per_second": 5.987, "step": 69 }, { "epoch": 4.0, "grad_norm": 16.052230834960938, "learning_rate": 4.600000000000001e-05, "loss": 0.783, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.8195001768993989, "eval_loss": 0.7431384921073914, "eval_pearson": 0.8197032146737679, "eval_runtime": 0.9931, "eval_samples_per_second": 1510.427, "eval_spearmanr": 0.8192971391250299, "eval_steps_per_second": 6.042, "step": 92 }, { "epoch": 5.0, "grad_norm": 12.761335372924805, "learning_rate": 4.5e-05, "loss": 0.5677, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.8143535964097317, "eval_loss": 0.9074862599372864, "eval_pearson": 0.8135246219484462, "eval_runtime": 1.0017, "eval_samples_per_second": 1497.517, "eval_spearmanr": 0.8151825708710173, "eval_steps_per_second": 5.99, "step": 115 }, { "epoch": 6.0, "grad_norm": 14.109099388122559, "learning_rate": 4.4000000000000006e-05, "loss": 0.4407, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.8269323052226907, "eval_loss": 0.7474443316459656, "eval_pearson": 0.8267083714336028, "eval_runtime": 1.0083, "eval_samples_per_second": 1487.66, "eval_spearmanr": 0.8271562390117786, "eval_steps_per_second": 5.951, "step": 138 }, { "epoch": 7.0, "grad_norm": 6.391584396362305, "learning_rate": 4.3e-05, "loss": 0.3821, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.838118234588908, "eval_loss": 0.6753207445144653, "eval_pearson": 0.8391483582280732, "eval_runtime": 0.9964, "eval_samples_per_second": 1505.478, "eval_spearmanr": 0.8370881109497429, "eval_steps_per_second": 6.022, "step": 161 }, { "epoch": 8.0, "grad_norm": 9.437135696411133, "learning_rate": 4.2e-05, "loss": 0.3036, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.8252995711473009, "eval_loss": 0.8726378679275513, "eval_pearson": 0.8245684237762525, "eval_runtime": 0.9644, "eval_samples_per_second": 1555.381, "eval_spearmanr": 0.8260307185183493, "eval_steps_per_second": 6.222, "step": 184 }, { "epoch": 9.0, "grad_norm": 6.426690101623535, "learning_rate": 4.1e-05, "loss": 0.269, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.8302312339843521, "eval_loss": 0.7330970168113708, "eval_pearson": 0.8311362222183013, "eval_runtime": 0.968, "eval_samples_per_second": 1549.529, "eval_spearmanr": 0.8293262457504029, "eval_steps_per_second": 6.198, "step": 207 }, { "epoch": 10.0, "grad_norm": 6.680917263031006, "learning_rate": 4e-05, "loss": 0.2191, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.8375282460597571, "eval_loss": 0.7562392354011536, "eval_pearson": 0.8382616394860685, "eval_runtime": 0.9959, "eval_samples_per_second": 1506.245, "eval_spearmanr": 0.8367948526334458, "eval_steps_per_second": 6.025, "step": 230 }, { "epoch": 11.0, "grad_norm": 3.755885124206543, "learning_rate": 3.9000000000000006e-05, "loss": 0.1854, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.8353770667011837, "eval_loss": 0.7022337317466736, "eval_pearson": 0.8364761178604215, "eval_runtime": 1.0014, "eval_samples_per_second": 1497.897, "eval_spearmanr": 0.834278015541946, "eval_steps_per_second": 5.992, "step": 253 }, { "epoch": 12.0, "grad_norm": 7.932562828063965, "learning_rate": 3.8e-05, "loss": 0.1718, "step": 276 }, { "epoch": 12.0, "eval_combined_score": 0.8394422302882345, "eval_loss": 0.6650304198265076, "eval_pearson": 0.8406845213042464, "eval_runtime": 0.9788, "eval_samples_per_second": 1532.489, "eval_spearmanr": 0.8381999392722225, "eval_steps_per_second": 6.13, "step": 276 }, { "epoch": 13.0, "grad_norm": 6.083053112030029, "learning_rate": 3.7e-05, "loss": 0.1685, "step": 299 }, { "epoch": 13.0, "eval_combined_score": 0.8341740484470688, "eval_loss": 0.7269710302352905, "eval_pearson": 0.8350427176753902, "eval_runtime": 0.9854, "eval_samples_per_second": 1522.222, "eval_spearmanr": 0.8333053792187473, "eval_steps_per_second": 6.089, "step": 299 }, { "epoch": 14.0, "grad_norm": 5.380353927612305, "learning_rate": 3.6e-05, "loss": 0.1368, "step": 322 }, { "epoch": 14.0, "eval_combined_score": 0.8384150656102521, "eval_loss": 0.7532300353050232, "eval_pearson": 0.839212923010702, "eval_runtime": 0.9703, "eval_samples_per_second": 1545.963, "eval_spearmanr": 0.8376172082098022, "eval_steps_per_second": 6.184, "step": 322 }, { "epoch": 15.0, "grad_norm": 2.213524103164673, "learning_rate": 3.5e-05, "loss": 0.1351, "step": 345 }, { "epoch": 15.0, "eval_combined_score": 0.8378988273941752, "eval_loss": 0.8710149526596069, "eval_pearson": 0.8378603538577427, "eval_runtime": 0.9708, "eval_samples_per_second": 1545.192, "eval_spearmanr": 0.8379373009306077, "eval_steps_per_second": 6.181, "step": 345 }, { "epoch": 16.0, "grad_norm": 5.186318874359131, "learning_rate": 3.4000000000000007e-05, "loss": 0.1459, "step": 368 }, { "epoch": 16.0, "eval_combined_score": 0.840746533861038, "eval_loss": 0.7801129817962646, "eval_pearson": 0.841643298465437, "eval_runtime": 0.9846, "eval_samples_per_second": 1523.477, "eval_spearmanr": 0.8398497692566391, "eval_steps_per_second": 6.094, "step": 368 }, { "epoch": 17.0, "grad_norm": 4.310553550720215, "learning_rate": 3.3e-05, "loss": 0.106, "step": 391 }, { "epoch": 17.0, "eval_combined_score": 0.8386885053587652, "eval_loss": 0.6833200454711914, "eval_pearson": 0.8393476630897322, "eval_runtime": 1.0147, "eval_samples_per_second": 1478.234, "eval_spearmanr": 0.838029347627798, "eval_steps_per_second": 5.913, "step": 391 }, { "epoch": 17.0, "step": 391, "total_flos": 1.2857200946093568e+16, "train_loss": 0.5494434967675172, "train_runtime": 221.1064, "train_samples_per_second": 1300.053, "train_steps_per_second": 5.201 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2857200946093568e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }