|
{ |
|
"best_metric": 0.6650304198265076, |
|
"best_model_checkpoint": "bert_base_lda_20_v1_book_stsb/checkpoint-276", |
|
"epoch": 17.0, |
|
"eval_steps": 500, |
|
"global_step": 391, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.7477006912231445, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.8738, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.17561117616234614, |
|
"eval_loss": 2.4670398235321045, |
|
"eval_pearson": 0.17646745724560672, |
|
"eval_runtime": 0.9997, |
|
"eval_samples_per_second": 1500.502, |
|
"eval_spearmanr": 0.17475489507908556, |
|
"eval_steps_per_second": 6.002, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 35.44430160522461, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.4719, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.7400782076458396, |
|
"eval_loss": 1.0279998779296875, |
|
"eval_pearson": 0.739723981540606, |
|
"eval_runtime": 1.0078, |
|
"eval_samples_per_second": 1488.42, |
|
"eval_spearmanr": 0.7404324337510731, |
|
"eval_steps_per_second": 5.954, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 13.613875389099121, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.9801, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.7954812287252586, |
|
"eval_loss": 0.8275899887084961, |
|
"eval_pearson": 0.7955648654056936, |
|
"eval_runtime": 1.0022, |
|
"eval_samples_per_second": 1496.75, |
|
"eval_spearmanr": 0.7953975920448235, |
|
"eval_steps_per_second": 5.987, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 16.052230834960938, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.783, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.8195001768993989, |
|
"eval_loss": 0.7431384921073914, |
|
"eval_pearson": 0.8197032146737679, |
|
"eval_runtime": 0.9931, |
|
"eval_samples_per_second": 1510.427, |
|
"eval_spearmanr": 0.8192971391250299, |
|
"eval_steps_per_second": 6.042, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 12.761335372924805, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5677, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.8143535964097317, |
|
"eval_loss": 0.9074862599372864, |
|
"eval_pearson": 0.8135246219484462, |
|
"eval_runtime": 1.0017, |
|
"eval_samples_per_second": 1497.517, |
|
"eval_spearmanr": 0.8151825708710173, |
|
"eval_steps_per_second": 5.99, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 14.109099388122559, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.4407, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.8269323052226907, |
|
"eval_loss": 0.7474443316459656, |
|
"eval_pearson": 0.8267083714336028, |
|
"eval_runtime": 1.0083, |
|
"eval_samples_per_second": 1487.66, |
|
"eval_spearmanr": 0.8271562390117786, |
|
"eval_steps_per_second": 5.951, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 6.391584396362305, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.3821, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.838118234588908, |
|
"eval_loss": 0.6753207445144653, |
|
"eval_pearson": 0.8391483582280732, |
|
"eval_runtime": 0.9964, |
|
"eval_samples_per_second": 1505.478, |
|
"eval_spearmanr": 0.8370881109497429, |
|
"eval_steps_per_second": 6.022, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 9.437135696411133, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.3036, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.8252995711473009, |
|
"eval_loss": 0.8726378679275513, |
|
"eval_pearson": 0.8245684237762525, |
|
"eval_runtime": 0.9644, |
|
"eval_samples_per_second": 1555.381, |
|
"eval_spearmanr": 0.8260307185183493, |
|
"eval_steps_per_second": 6.222, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.426690101623535, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.269, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.8302312339843521, |
|
"eval_loss": 0.7330970168113708, |
|
"eval_pearson": 0.8311362222183013, |
|
"eval_runtime": 0.968, |
|
"eval_samples_per_second": 1549.529, |
|
"eval_spearmanr": 0.8293262457504029, |
|
"eval_steps_per_second": 6.198, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.680917263031006, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2191, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.8375282460597571, |
|
"eval_loss": 0.7562392354011536, |
|
"eval_pearson": 0.8382616394860685, |
|
"eval_runtime": 0.9959, |
|
"eval_samples_per_second": 1506.245, |
|
"eval_spearmanr": 0.8367948526334458, |
|
"eval_steps_per_second": 6.025, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.755885124206543, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.1854, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.8353770667011837, |
|
"eval_loss": 0.7022337317466736, |
|
"eval_pearson": 0.8364761178604215, |
|
"eval_runtime": 1.0014, |
|
"eval_samples_per_second": 1497.897, |
|
"eval_spearmanr": 0.834278015541946, |
|
"eval_steps_per_second": 5.992, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 7.932562828063965, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.1718, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_combined_score": 0.8394422302882345, |
|
"eval_loss": 0.6650304198265076, |
|
"eval_pearson": 0.8406845213042464, |
|
"eval_runtime": 0.9788, |
|
"eval_samples_per_second": 1532.489, |
|
"eval_spearmanr": 0.8381999392722225, |
|
"eval_steps_per_second": 6.13, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 6.083053112030029, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.1685, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_combined_score": 0.8341740484470688, |
|
"eval_loss": 0.7269710302352905, |
|
"eval_pearson": 0.8350427176753902, |
|
"eval_runtime": 0.9854, |
|
"eval_samples_per_second": 1522.222, |
|
"eval_spearmanr": 0.8333053792187473, |
|
"eval_steps_per_second": 6.089, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 5.380353927612305, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1368, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_combined_score": 0.8384150656102521, |
|
"eval_loss": 0.7532300353050232, |
|
"eval_pearson": 0.839212923010702, |
|
"eval_runtime": 0.9703, |
|
"eval_samples_per_second": 1545.963, |
|
"eval_spearmanr": 0.8376172082098022, |
|
"eval_steps_per_second": 6.184, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.213524103164673, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1351, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_combined_score": 0.8378988273941752, |
|
"eval_loss": 0.8710149526596069, |
|
"eval_pearson": 0.8378603538577427, |
|
"eval_runtime": 0.9708, |
|
"eval_samples_per_second": 1545.192, |
|
"eval_spearmanr": 0.8379373009306077, |
|
"eval_steps_per_second": 6.181, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 5.186318874359131, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.1459, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_combined_score": 0.840746533861038, |
|
"eval_loss": 0.7801129817962646, |
|
"eval_pearson": 0.841643298465437, |
|
"eval_runtime": 0.9846, |
|
"eval_samples_per_second": 1523.477, |
|
"eval_spearmanr": 0.8398497692566391, |
|
"eval_steps_per_second": 6.094, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.310553550720215, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.106, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_combined_score": 0.8386885053587652, |
|
"eval_loss": 0.6833200454711914, |
|
"eval_pearson": 0.8393476630897322, |
|
"eval_runtime": 1.0147, |
|
"eval_samples_per_second": 1478.234, |
|
"eval_spearmanr": 0.838029347627798, |
|
"eval_steps_per_second": 5.913, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"step": 391, |
|
"total_flos": 1.2857200946093568e+16, |
|
"train_loss": 0.5494434967675172, |
|
"train_runtime": 221.1064, |
|
"train_samples_per_second": 1300.053, |
|
"train_steps_per_second": 5.201 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2857200946093568e+16, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|