|
{ |
|
"best_metric": 0.8188873529434204, |
|
"best_model_checkpoint": "bert_tiny_lda_5_v1_book_stsb/checkpoint-368", |
|
"epoch": 21.0, |
|
"eval_steps": 500, |
|
"global_step": 483, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.9694929122924805, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.8706, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.09954146094056157, |
|
"eval_loss": 2.462472438812256, |
|
"eval_pearson": 0.09907601559786483, |
|
"eval_runtime": 0.4799, |
|
"eval_samples_per_second": 3125.334, |
|
"eval_spearmanr": 0.1000069062832583, |
|
"eval_steps_per_second": 12.501, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 22.507034301757812, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.7766, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.6395870539114769, |
|
"eval_loss": 1.3386552333831787, |
|
"eval_pearson": 0.6420563993485077, |
|
"eval_runtime": 0.5098, |
|
"eval_samples_per_second": 2942.208, |
|
"eval_spearmanr": 0.637117708474446, |
|
"eval_steps_per_second": 11.769, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 20.13024139404297, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.1649, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.7499948802864915, |
|
"eval_loss": 0.9883082509040833, |
|
"eval_pearson": 0.7530220350451059, |
|
"eval_runtime": 0.4998, |
|
"eval_samples_per_second": 3001.186, |
|
"eval_spearmanr": 0.746967725527877, |
|
"eval_steps_per_second": 12.005, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 7.193154335021973, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.8446, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.7814449278644269, |
|
"eval_loss": 0.9500728249549866, |
|
"eval_pearson": 0.7801592206629702, |
|
"eval_runtime": 0.4748, |
|
"eval_samples_per_second": 3159.192, |
|
"eval_spearmanr": 0.7827306350658837, |
|
"eval_steps_per_second": 12.637, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 15.801517486572266, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6587, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.7831183677075164, |
|
"eval_loss": 0.9120289087295532, |
|
"eval_pearson": 0.7838836623851876, |
|
"eval_runtime": 0.4934, |
|
"eval_samples_per_second": 3040.098, |
|
"eval_spearmanr": 0.7823530730298451, |
|
"eval_steps_per_second": 12.16, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 14.146763801574707, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.5587, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.7878115431321093, |
|
"eval_loss": 0.8917849659919739, |
|
"eval_pearson": 0.7876118713697591, |
|
"eval_runtime": 0.4812, |
|
"eval_samples_per_second": 3117.386, |
|
"eval_spearmanr": 0.7880112148944596, |
|
"eval_steps_per_second": 12.47, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 12.302091598510742, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.5104, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.782515489635635, |
|
"eval_loss": 0.9523068070411682, |
|
"eval_pearson": 0.7821895116539266, |
|
"eval_runtime": 0.4809, |
|
"eval_samples_per_second": 3119.023, |
|
"eval_spearmanr": 0.7828414676173435, |
|
"eval_steps_per_second": 12.476, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 12.979435920715332, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.4506, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.7919260991828585, |
|
"eval_loss": 0.9420682191848755, |
|
"eval_pearson": 0.791584489351049, |
|
"eval_runtime": 0.4809, |
|
"eval_samples_per_second": 3118.926, |
|
"eval_spearmanr": 0.7922677090146679, |
|
"eval_steps_per_second": 12.476, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 18.948219299316406, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.4174, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.7906456677090563, |
|
"eval_loss": 1.0924893617630005, |
|
"eval_pearson": 0.7875795388915195, |
|
"eval_runtime": 0.4739, |
|
"eval_samples_per_second": 3165.183, |
|
"eval_spearmanr": 0.7937117965265932, |
|
"eval_steps_per_second": 12.661, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.4617791175842285, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3229, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.7957522599726154, |
|
"eval_loss": 0.8792051076889038, |
|
"eval_pearson": 0.7969462774842241, |
|
"eval_runtime": 0.4767, |
|
"eval_samples_per_second": 3146.57, |
|
"eval_spearmanr": 0.7945582424610067, |
|
"eval_steps_per_second": 12.586, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 6.967871189117432, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.3096, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.7885789869359312, |
|
"eval_loss": 0.9665384292602539, |
|
"eval_pearson": 0.7887171688488032, |
|
"eval_runtime": 0.487, |
|
"eval_samples_per_second": 3079.987, |
|
"eval_spearmanr": 0.7884408050230591, |
|
"eval_steps_per_second": 12.32, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 16.113603591918945, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.2918, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_combined_score": 0.8004322602860193, |
|
"eval_loss": 0.8212088346481323, |
|
"eval_pearson": 0.801677826027752, |
|
"eval_runtime": 0.4993, |
|
"eval_samples_per_second": 3003.968, |
|
"eval_spearmanr": 0.7991866945442867, |
|
"eval_steps_per_second": 12.016, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.451534271240234, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.2479, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_combined_score": 0.804614899302114, |
|
"eval_loss": 0.8791233897209167, |
|
"eval_pearson": 0.8055416707360773, |
|
"eval_runtime": 0.4721, |
|
"eval_samples_per_second": 3177.294, |
|
"eval_spearmanr": 0.8036881278681508, |
|
"eval_steps_per_second": 12.709, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 21.7775821685791, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.2409, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_combined_score": 0.794995597443876, |
|
"eval_loss": 0.8217026591300964, |
|
"eval_pearson": 0.7969938156234373, |
|
"eval_runtime": 0.5148, |
|
"eval_samples_per_second": 2913.928, |
|
"eval_spearmanr": 0.7929973792643148, |
|
"eval_steps_per_second": 11.656, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.07222056388855, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2071, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_combined_score": 0.7984497715007259, |
|
"eval_loss": 0.8929257988929749, |
|
"eval_pearson": 0.7998156602749035, |
|
"eval_runtime": 0.4892, |
|
"eval_samples_per_second": 3066.293, |
|
"eval_spearmanr": 0.7970838827265485, |
|
"eval_steps_per_second": 12.265, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.4134459495544434, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.1907, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_combined_score": 0.8007289150909647, |
|
"eval_loss": 0.8188873529434204, |
|
"eval_pearson": 0.8026761278764143, |
|
"eval_runtime": 0.4814, |
|
"eval_samples_per_second": 3116.2, |
|
"eval_spearmanr": 0.798781702305515, |
|
"eval_steps_per_second": 12.465, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.4020676612854, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.1825, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_combined_score": 0.79118899198846, |
|
"eval_loss": 0.9392598271369934, |
|
"eval_pearson": 0.7924365508807105, |
|
"eval_runtime": 0.4766, |
|
"eval_samples_per_second": 3147.292, |
|
"eval_spearmanr": 0.7899414330962096, |
|
"eval_steps_per_second": 12.589, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 5.791975498199463, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.1634, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_combined_score": 0.8034539561261447, |
|
"eval_loss": 0.8721528053283691, |
|
"eval_pearson": 0.8045915853539458, |
|
"eval_runtime": 0.4845, |
|
"eval_samples_per_second": 3095.701, |
|
"eval_spearmanr": 0.8023163268983434, |
|
"eval_steps_per_second": 12.383, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.9870147705078125, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.1531, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_combined_score": 0.8030149651947462, |
|
"eval_loss": 0.9629133343696594, |
|
"eval_pearson": 0.8037084608596774, |
|
"eval_runtime": 0.4776, |
|
"eval_samples_per_second": 3140.565, |
|
"eval_spearmanr": 0.8023214695298152, |
|
"eval_steps_per_second": 12.562, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.91269850730896, |
|
"learning_rate": 3e-05, |
|
"loss": 0.153, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_combined_score": 0.7958772509210114, |
|
"eval_loss": 1.0062919855117798, |
|
"eval_pearson": 0.797094458559933, |
|
"eval_runtime": 0.485, |
|
"eval_samples_per_second": 3092.717, |
|
"eval_spearmanr": 0.7946600432820897, |
|
"eval_steps_per_second": 12.371, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 4.98586893081665, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.1409, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_combined_score": 0.7955185663928932, |
|
"eval_loss": 0.9756875038146973, |
|
"eval_pearson": 0.7968145893338887, |
|
"eval_runtime": 0.478, |
|
"eval_samples_per_second": 3138.129, |
|
"eval_spearmanr": 0.7942225434518977, |
|
"eval_steps_per_second": 12.553, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"step": 483, |
|
"total_flos": 3165851231294976.0, |
|
"train_loss": 0.5645940970189823, |
|
"train_runtime": 97.1506, |
|
"train_samples_per_second": 2958.809, |
|
"train_steps_per_second": 11.837 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3165851231294976.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|