distilbert_lda_100_v1_book_stsb / trainer_state.json
gokulsrinivasagan's picture
End of training
1cbc8df verified
{
"best_metric": 0.7980738282203674,
"best_model_checkpoint": "distilbert_lda_100_v1_book_stsb/checkpoint-230",
"epoch": 15.0,
"eval_steps": 500,
"global_step": 345,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 13.065649032592773,
"learning_rate": 4.9e-05,
"loss": 3.1376,
"step": 23
},
{
"epoch": 1.0,
"eval_combined_score": 0.1728512599113079,
"eval_loss": 2.3443872928619385,
"eval_pearson": 0.18002362677750725,
"eval_runtime": 0.4619,
"eval_samples_per_second": 3247.478,
"eval_spearmanr": 0.16567889304510858,
"eval_steps_per_second": 12.99,
"step": 23
},
{
"epoch": 2.0,
"grad_norm": 30.20730209350586,
"learning_rate": 4.8e-05,
"loss": 1.571,
"step": 46
},
{
"epoch": 2.0,
"eval_combined_score": 0.6510883490332047,
"eval_loss": 1.4976587295532227,
"eval_pearson": 0.6469347808355328,
"eval_runtime": 0.468,
"eval_samples_per_second": 3205.005,
"eval_spearmanr": 0.6552419172308765,
"eval_steps_per_second": 12.82,
"step": 46
},
{
"epoch": 3.0,
"grad_norm": 9.512760162353516,
"learning_rate": 4.7e-05,
"loss": 1.0298,
"step": 69
},
{
"epoch": 3.0,
"eval_combined_score": 0.7472320299511837,
"eval_loss": 0.9940208196640015,
"eval_pearson": 0.7482802487477433,
"eval_runtime": 0.5042,
"eval_samples_per_second": 2975.284,
"eval_spearmanr": 0.7461838111546238,
"eval_steps_per_second": 11.901,
"step": 69
},
{
"epoch": 4.0,
"grad_norm": 9.95678424835205,
"learning_rate": 4.600000000000001e-05,
"loss": 0.8795,
"step": 92
},
{
"epoch": 4.0,
"eval_combined_score": 0.7665966692630239,
"eval_loss": 1.0649274587631226,
"eval_pearson": 0.7621958495107538,
"eval_runtime": 0.4869,
"eval_samples_per_second": 3080.609,
"eval_spearmanr": 0.770997489015294,
"eval_steps_per_second": 12.322,
"step": 92
},
{
"epoch": 5.0,
"grad_norm": 29.694782257080078,
"learning_rate": 4.5e-05,
"loss": 0.6951,
"step": 115
},
{
"epoch": 5.0,
"eval_combined_score": 0.7678001045986155,
"eval_loss": 1.5035641193389893,
"eval_pearson": 0.750753453158977,
"eval_runtime": 0.4839,
"eval_samples_per_second": 3099.586,
"eval_spearmanr": 0.7848467560382539,
"eval_steps_per_second": 12.398,
"step": 115
},
{
"epoch": 6.0,
"grad_norm": 15.514448165893555,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.5558,
"step": 138
},
{
"epoch": 6.0,
"eval_combined_score": 0.7895822730889075,
"eval_loss": 0.9067298769950867,
"eval_pearson": 0.7877625518399618,
"eval_runtime": 0.4716,
"eval_samples_per_second": 3180.754,
"eval_spearmanr": 0.7914019943378532,
"eval_steps_per_second": 12.723,
"step": 138
},
{
"epoch": 7.0,
"grad_norm": 7.108041286468506,
"learning_rate": 4.3e-05,
"loss": 0.4306,
"step": 161
},
{
"epoch": 7.0,
"eval_combined_score": 0.8044942171535114,
"eval_loss": 0.8333074450492859,
"eval_pearson": 0.8050894731664774,
"eval_runtime": 0.4837,
"eval_samples_per_second": 3100.907,
"eval_spearmanr": 0.8038989611405454,
"eval_steps_per_second": 12.404,
"step": 161
},
{
"epoch": 8.0,
"grad_norm": 3.6502273082733154,
"learning_rate": 4.2e-05,
"loss": 0.3592,
"step": 184
},
{
"epoch": 8.0,
"eval_combined_score": 0.797110517540158,
"eval_loss": 0.9581788778305054,
"eval_pearson": 0.7966780577453428,
"eval_runtime": 0.4867,
"eval_samples_per_second": 3081.961,
"eval_spearmanr": 0.7975429773349733,
"eval_steps_per_second": 12.328,
"step": 184
},
{
"epoch": 9.0,
"grad_norm": 9.489797592163086,
"learning_rate": 4.1e-05,
"loss": 0.2847,
"step": 207
},
{
"epoch": 9.0,
"eval_combined_score": 0.7941543139172129,
"eval_loss": 1.0402296781539917,
"eval_pearson": 0.7929483799420015,
"eval_runtime": 0.4618,
"eval_samples_per_second": 3248.43,
"eval_spearmanr": 0.7953602478924242,
"eval_steps_per_second": 12.994,
"step": 207
},
{
"epoch": 10.0,
"grad_norm": 5.137160778045654,
"learning_rate": 4e-05,
"loss": 0.2689,
"step": 230
},
{
"epoch": 10.0,
"eval_combined_score": 0.8037144170123958,
"eval_loss": 0.7980738282203674,
"eval_pearson": 0.8060121051059546,
"eval_runtime": 0.4835,
"eval_samples_per_second": 3102.36,
"eval_spearmanr": 0.8014167289188371,
"eval_steps_per_second": 12.409,
"step": 230
},
{
"epoch": 11.0,
"grad_norm": 9.64781665802002,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.2368,
"step": 253
},
{
"epoch": 11.0,
"eval_combined_score": 0.8091608382603208,
"eval_loss": 0.8627774119377136,
"eval_pearson": 0.8100673949446242,
"eval_runtime": 0.4681,
"eval_samples_per_second": 3204.761,
"eval_spearmanr": 0.8082542815760174,
"eval_steps_per_second": 12.819,
"step": 253
},
{
"epoch": 12.0,
"grad_norm": 7.976446151733398,
"learning_rate": 3.8e-05,
"loss": 0.2088,
"step": 276
},
{
"epoch": 12.0,
"eval_combined_score": 0.8001157143831301,
"eval_loss": 1.0528961420059204,
"eval_pearson": 0.7990927893008337,
"eval_runtime": 0.4926,
"eval_samples_per_second": 3044.855,
"eval_spearmanr": 0.8011386394654265,
"eval_steps_per_second": 12.179,
"step": 276
},
{
"epoch": 13.0,
"grad_norm": 9.004283905029297,
"learning_rate": 3.7e-05,
"loss": 0.1912,
"step": 299
},
{
"epoch": 13.0,
"eval_combined_score": 0.8012021697636892,
"eval_loss": 0.8877846598625183,
"eval_pearson": 0.8010653292046944,
"eval_runtime": 0.4576,
"eval_samples_per_second": 3278.316,
"eval_spearmanr": 0.801339010322684,
"eval_steps_per_second": 13.113,
"step": 299
},
{
"epoch": 14.0,
"grad_norm": 8.706100463867188,
"learning_rate": 3.6e-05,
"loss": 0.1618,
"step": 322
},
{
"epoch": 14.0,
"eval_combined_score": 0.7950920104148684,
"eval_loss": 0.8756589293479919,
"eval_pearson": 0.7958563730475406,
"eval_runtime": 0.4905,
"eval_samples_per_second": 3058.076,
"eval_spearmanr": 0.7943276477821963,
"eval_steps_per_second": 12.232,
"step": 322
},
{
"epoch": 15.0,
"grad_norm": 9.078815460205078,
"learning_rate": 3.5e-05,
"loss": 0.1557,
"step": 345
},
{
"epoch": 15.0,
"eval_combined_score": 0.7990363023147462,
"eval_loss": 0.8970615267753601,
"eval_pearson": 0.8001497386998061,
"eval_runtime": 0.4666,
"eval_samples_per_second": 3215.038,
"eval_spearmanr": 0.7979228659296864,
"eval_steps_per_second": 12.86,
"step": 345
},
{
"epoch": 15.0,
"step": 345,
"total_flos": 5711561202147840.0,
"train_loss": 0.6777657121851824,
"train_runtime": 82.4709,
"train_samples_per_second": 3485.471,
"train_steps_per_second": 13.944
}
],
"logging_steps": 1,
"max_steps": 1150,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5711561202147840.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}