bert_tiny_lda_5_v1_book_stsb / trainer_state.json
gokulsrinivasagan's picture
End of training
5142949 verified
{
"best_metric": 0.8188873529434204,
"best_model_checkpoint": "bert_tiny_lda_5_v1_book_stsb/checkpoint-368",
"epoch": 21.0,
"eval_steps": 500,
"global_step": 483,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.9694929122924805,
"learning_rate": 4.9e-05,
"loss": 2.8706,
"step": 23
},
{
"epoch": 1.0,
"eval_combined_score": 0.09954146094056157,
"eval_loss": 2.462472438812256,
"eval_pearson": 0.09907601559786483,
"eval_runtime": 0.4799,
"eval_samples_per_second": 3125.334,
"eval_spearmanr": 0.1000069062832583,
"eval_steps_per_second": 12.501,
"step": 23
},
{
"epoch": 2.0,
"grad_norm": 22.507034301757812,
"learning_rate": 4.8e-05,
"loss": 1.7766,
"step": 46
},
{
"epoch": 2.0,
"eval_combined_score": 0.6395870539114769,
"eval_loss": 1.3386552333831787,
"eval_pearson": 0.6420563993485077,
"eval_runtime": 0.5098,
"eval_samples_per_second": 2942.208,
"eval_spearmanr": 0.637117708474446,
"eval_steps_per_second": 11.769,
"step": 46
},
{
"epoch": 3.0,
"grad_norm": 20.13024139404297,
"learning_rate": 4.7e-05,
"loss": 1.1649,
"step": 69
},
{
"epoch": 3.0,
"eval_combined_score": 0.7499948802864915,
"eval_loss": 0.9883082509040833,
"eval_pearson": 0.7530220350451059,
"eval_runtime": 0.4998,
"eval_samples_per_second": 3001.186,
"eval_spearmanr": 0.746967725527877,
"eval_steps_per_second": 12.005,
"step": 69
},
{
"epoch": 4.0,
"grad_norm": 7.193154335021973,
"learning_rate": 4.600000000000001e-05,
"loss": 0.8446,
"step": 92
},
{
"epoch": 4.0,
"eval_combined_score": 0.7814449278644269,
"eval_loss": 0.9500728249549866,
"eval_pearson": 0.7801592206629702,
"eval_runtime": 0.4748,
"eval_samples_per_second": 3159.192,
"eval_spearmanr": 0.7827306350658837,
"eval_steps_per_second": 12.637,
"step": 92
},
{
"epoch": 5.0,
"grad_norm": 15.801517486572266,
"learning_rate": 4.5e-05,
"loss": 0.6587,
"step": 115
},
{
"epoch": 5.0,
"eval_combined_score": 0.7831183677075164,
"eval_loss": 0.9120289087295532,
"eval_pearson": 0.7838836623851876,
"eval_runtime": 0.4934,
"eval_samples_per_second": 3040.098,
"eval_spearmanr": 0.7823530730298451,
"eval_steps_per_second": 12.16,
"step": 115
},
{
"epoch": 6.0,
"grad_norm": 14.146763801574707,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.5587,
"step": 138
},
{
"epoch": 6.0,
"eval_combined_score": 0.7878115431321093,
"eval_loss": 0.8917849659919739,
"eval_pearson": 0.7876118713697591,
"eval_runtime": 0.4812,
"eval_samples_per_second": 3117.386,
"eval_spearmanr": 0.7880112148944596,
"eval_steps_per_second": 12.47,
"step": 138
},
{
"epoch": 7.0,
"grad_norm": 12.302091598510742,
"learning_rate": 4.3e-05,
"loss": 0.5104,
"step": 161
},
{
"epoch": 7.0,
"eval_combined_score": 0.782515489635635,
"eval_loss": 0.9523068070411682,
"eval_pearson": 0.7821895116539266,
"eval_runtime": 0.4809,
"eval_samples_per_second": 3119.023,
"eval_spearmanr": 0.7828414676173435,
"eval_steps_per_second": 12.476,
"step": 161
},
{
"epoch": 8.0,
"grad_norm": 12.979435920715332,
"learning_rate": 4.2e-05,
"loss": 0.4506,
"step": 184
},
{
"epoch": 8.0,
"eval_combined_score": 0.7919260991828585,
"eval_loss": 0.9420682191848755,
"eval_pearson": 0.791584489351049,
"eval_runtime": 0.4809,
"eval_samples_per_second": 3118.926,
"eval_spearmanr": 0.7922677090146679,
"eval_steps_per_second": 12.476,
"step": 184
},
{
"epoch": 9.0,
"grad_norm": 18.948219299316406,
"learning_rate": 4.1e-05,
"loss": 0.4174,
"step": 207
},
{
"epoch": 9.0,
"eval_combined_score": 0.7906456677090563,
"eval_loss": 1.0924893617630005,
"eval_pearson": 0.7875795388915195,
"eval_runtime": 0.4739,
"eval_samples_per_second": 3165.183,
"eval_spearmanr": 0.7937117965265932,
"eval_steps_per_second": 12.661,
"step": 207
},
{
"epoch": 10.0,
"grad_norm": 5.4617791175842285,
"learning_rate": 4e-05,
"loss": 0.3229,
"step": 230
},
{
"epoch": 10.0,
"eval_combined_score": 0.7957522599726154,
"eval_loss": 0.8792051076889038,
"eval_pearson": 0.7969462774842241,
"eval_runtime": 0.4767,
"eval_samples_per_second": 3146.57,
"eval_spearmanr": 0.7945582424610067,
"eval_steps_per_second": 12.586,
"step": 230
},
{
"epoch": 11.0,
"grad_norm": 6.967871189117432,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.3096,
"step": 253
},
{
"epoch": 11.0,
"eval_combined_score": 0.7885789869359312,
"eval_loss": 0.9665384292602539,
"eval_pearson": 0.7887171688488032,
"eval_runtime": 0.487,
"eval_samples_per_second": 3079.987,
"eval_spearmanr": 0.7884408050230591,
"eval_steps_per_second": 12.32,
"step": 253
},
{
"epoch": 12.0,
"grad_norm": 16.113603591918945,
"learning_rate": 3.8e-05,
"loss": 0.2918,
"step": 276
},
{
"epoch": 12.0,
"eval_combined_score": 0.8004322602860193,
"eval_loss": 0.8212088346481323,
"eval_pearson": 0.801677826027752,
"eval_runtime": 0.4993,
"eval_samples_per_second": 3003.968,
"eval_spearmanr": 0.7991866945442867,
"eval_steps_per_second": 12.016,
"step": 276
},
{
"epoch": 13.0,
"grad_norm": 4.451534271240234,
"learning_rate": 3.7e-05,
"loss": 0.2479,
"step": 299
},
{
"epoch": 13.0,
"eval_combined_score": 0.804614899302114,
"eval_loss": 0.8791233897209167,
"eval_pearson": 0.8055416707360773,
"eval_runtime": 0.4721,
"eval_samples_per_second": 3177.294,
"eval_spearmanr": 0.8036881278681508,
"eval_steps_per_second": 12.709,
"step": 299
},
{
"epoch": 14.0,
"grad_norm": 21.7775821685791,
"learning_rate": 3.6e-05,
"loss": 0.2409,
"step": 322
},
{
"epoch": 14.0,
"eval_combined_score": 0.794995597443876,
"eval_loss": 0.8217026591300964,
"eval_pearson": 0.7969938156234373,
"eval_runtime": 0.5148,
"eval_samples_per_second": 2913.928,
"eval_spearmanr": 0.7929973792643148,
"eval_steps_per_second": 11.656,
"step": 322
},
{
"epoch": 15.0,
"grad_norm": 3.07222056388855,
"learning_rate": 3.5e-05,
"loss": 0.2071,
"step": 345
},
{
"epoch": 15.0,
"eval_combined_score": 0.7984497715007259,
"eval_loss": 0.8929257988929749,
"eval_pearson": 0.7998156602749035,
"eval_runtime": 0.4892,
"eval_samples_per_second": 3066.293,
"eval_spearmanr": 0.7970838827265485,
"eval_steps_per_second": 12.265,
"step": 345
},
{
"epoch": 16.0,
"grad_norm": 3.4134459495544434,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.1907,
"step": 368
},
{
"epoch": 16.0,
"eval_combined_score": 0.8007289150909647,
"eval_loss": 0.8188873529434204,
"eval_pearson": 0.8026761278764143,
"eval_runtime": 0.4814,
"eval_samples_per_second": 3116.2,
"eval_spearmanr": 0.798781702305515,
"eval_steps_per_second": 12.465,
"step": 368
},
{
"epoch": 17.0,
"grad_norm": 4.4020676612854,
"learning_rate": 3.3e-05,
"loss": 0.1825,
"step": 391
},
{
"epoch": 17.0,
"eval_combined_score": 0.79118899198846,
"eval_loss": 0.9392598271369934,
"eval_pearson": 0.7924365508807105,
"eval_runtime": 0.4766,
"eval_samples_per_second": 3147.292,
"eval_spearmanr": 0.7899414330962096,
"eval_steps_per_second": 12.589,
"step": 391
},
{
"epoch": 18.0,
"grad_norm": 5.791975498199463,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.1634,
"step": 414
},
{
"epoch": 18.0,
"eval_combined_score": 0.8034539561261447,
"eval_loss": 0.8721528053283691,
"eval_pearson": 0.8045915853539458,
"eval_runtime": 0.4845,
"eval_samples_per_second": 3095.701,
"eval_spearmanr": 0.8023163268983434,
"eval_steps_per_second": 12.383,
"step": 414
},
{
"epoch": 19.0,
"grad_norm": 2.9870147705078125,
"learning_rate": 3.1e-05,
"loss": 0.1531,
"step": 437
},
{
"epoch": 19.0,
"eval_combined_score": 0.8030149651947462,
"eval_loss": 0.9629133343696594,
"eval_pearson": 0.8037084608596774,
"eval_runtime": 0.4776,
"eval_samples_per_second": 3140.565,
"eval_spearmanr": 0.8023214695298152,
"eval_steps_per_second": 12.562,
"step": 437
},
{
"epoch": 20.0,
"grad_norm": 3.91269850730896,
"learning_rate": 3e-05,
"loss": 0.153,
"step": 460
},
{
"epoch": 20.0,
"eval_combined_score": 0.7958772509210114,
"eval_loss": 1.0062919855117798,
"eval_pearson": 0.797094458559933,
"eval_runtime": 0.485,
"eval_samples_per_second": 3092.717,
"eval_spearmanr": 0.7946600432820897,
"eval_steps_per_second": 12.371,
"step": 460
},
{
"epoch": 21.0,
"grad_norm": 4.98586893081665,
"learning_rate": 2.9e-05,
"loss": 0.1409,
"step": 483
},
{
"epoch": 21.0,
"eval_combined_score": 0.7955185663928932,
"eval_loss": 0.9756875038146973,
"eval_pearson": 0.7968145893338887,
"eval_runtime": 0.478,
"eval_samples_per_second": 3138.129,
"eval_spearmanr": 0.7942225434518977,
"eval_steps_per_second": 12.553,
"step": 483
},
{
"epoch": 21.0,
"step": 483,
"total_flos": 3165851231294976.0,
"train_loss": 0.5645940970189823,
"train_runtime": 97.1506,
"train_samples_per_second": 2958.809,
"train_steps_per_second": 11.837
}
],
"logging_steps": 1,
"max_steps": 1150,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3165851231294976.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}