distilbert_lda_100_v1_stsb / trainer_state.json
gokulsrinivasagan's picture
End of training
d919325 verified
{
"best_metric": 0.9153493642807007,
"best_model_checkpoint": "distilbert_lda_100_v1_stsb/checkpoint-230",
"epoch": 15.0,
"eval_steps": 500,
"global_step": 345,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 6.528723239898682,
"learning_rate": 4.9e-05,
"loss": 2.622,
"step": 23
},
{
"epoch": 1.0,
"eval_combined_score": 0.09512879196662656,
"eval_loss": 2.529794216156006,
"eval_pearson": 0.10113703827077289,
"eval_runtime": 0.6476,
"eval_samples_per_second": 2316.18,
"eval_spearmanr": 0.08912054566248023,
"eval_steps_per_second": 9.265,
"step": 23
},
{
"epoch": 2.0,
"grad_norm": 14.141378402709961,
"learning_rate": 4.8e-05,
"loss": 1.8404,
"step": 46
},
{
"epoch": 2.0,
"eval_combined_score": 0.46451389801303666,
"eval_loss": 2.334291458129883,
"eval_pearson": 0.4642190872924477,
"eval_runtime": 0.6424,
"eval_samples_per_second": 2334.917,
"eval_spearmanr": 0.4648087087336257,
"eval_steps_per_second": 9.34,
"step": 46
},
{
"epoch": 3.0,
"grad_norm": 15.804608345031738,
"learning_rate": 4.7e-05,
"loss": 1.3143,
"step": 69
},
{
"epoch": 3.0,
"eval_combined_score": 0.6702617110676681,
"eval_loss": 1.2509400844573975,
"eval_pearson": 0.6735628956208805,
"eval_runtime": 0.7279,
"eval_samples_per_second": 2060.6,
"eval_spearmanr": 0.6669605265144558,
"eval_steps_per_second": 8.242,
"step": 69
},
{
"epoch": 4.0,
"grad_norm": 13.790794372558594,
"learning_rate": 4.600000000000001e-05,
"loss": 0.8809,
"step": 92
},
{
"epoch": 4.0,
"eval_combined_score": 0.7213381248378519,
"eval_loss": 1.3873708248138428,
"eval_pearson": 0.7172298554270424,
"eval_runtime": 0.6538,
"eval_samples_per_second": 2294.292,
"eval_spearmanr": 0.7254463942486612,
"eval_steps_per_second": 9.177,
"step": 92
},
{
"epoch": 5.0,
"grad_norm": 12.30782699584961,
"learning_rate": 4.5e-05,
"loss": 0.6317,
"step": 115
},
{
"epoch": 5.0,
"eval_combined_score": 0.7164477017307784,
"eval_loss": 1.583528757095337,
"eval_pearson": 0.7090720167762655,
"eval_runtime": 0.6363,
"eval_samples_per_second": 2357.515,
"eval_spearmanr": 0.7238233866852912,
"eval_steps_per_second": 9.43,
"step": 115
},
{
"epoch": 6.0,
"grad_norm": 10.6016845703125,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.5139,
"step": 138
},
{
"epoch": 6.0,
"eval_combined_score": 0.7456445881950656,
"eval_loss": 1.279284954071045,
"eval_pearson": 0.744296040490723,
"eval_runtime": 0.6455,
"eval_samples_per_second": 2323.754,
"eval_spearmanr": 0.7469931358994083,
"eval_steps_per_second": 9.295,
"step": 138
},
{
"epoch": 7.0,
"grad_norm": 6.149612903594971,
"learning_rate": 4.3e-05,
"loss": 0.3919,
"step": 161
},
{
"epoch": 7.0,
"eval_combined_score": 0.7555569821393648,
"eval_loss": 1.0237528085708618,
"eval_pearson": 0.7576470777149404,
"eval_runtime": 0.6495,
"eval_samples_per_second": 2309.412,
"eval_spearmanr": 0.7534668865637894,
"eval_steps_per_second": 9.238,
"step": 161
},
{
"epoch": 8.0,
"grad_norm": 5.594114303588867,
"learning_rate": 4.2e-05,
"loss": 0.3125,
"step": 184
},
{
"epoch": 8.0,
"eval_combined_score": 0.734024640227608,
"eval_loss": 1.4519073963165283,
"eval_pearson": 0.7331238450711645,
"eval_runtime": 0.6526,
"eval_samples_per_second": 2298.344,
"eval_spearmanr": 0.7349254353840515,
"eval_steps_per_second": 9.193,
"step": 184
},
{
"epoch": 9.0,
"grad_norm": 7.766717910766602,
"learning_rate": 4.1e-05,
"loss": 0.281,
"step": 207
},
{
"epoch": 9.0,
"eval_combined_score": 0.7381812541060337,
"eval_loss": 1.256384253501892,
"eval_pearson": 0.7389521632675855,
"eval_runtime": 0.6521,
"eval_samples_per_second": 2300.157,
"eval_spearmanr": 0.7374103449444821,
"eval_steps_per_second": 9.201,
"step": 207
},
{
"epoch": 10.0,
"grad_norm": 7.7253313064575195,
"learning_rate": 4e-05,
"loss": 0.2395,
"step": 230
},
{
"epoch": 10.0,
"eval_combined_score": 0.7731133282988156,
"eval_loss": 0.9153493642807007,
"eval_pearson": 0.775833107495325,
"eval_runtime": 0.654,
"eval_samples_per_second": 2293.525,
"eval_spearmanr": 0.7703935491023064,
"eval_steps_per_second": 9.174,
"step": 230
},
{
"epoch": 11.0,
"grad_norm": 8.97021484375,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.2219,
"step": 253
},
{
"epoch": 11.0,
"eval_combined_score": 0.7508687271969791,
"eval_loss": 1.2410622835159302,
"eval_pearson": 0.7508843218702125,
"eval_runtime": 0.6479,
"eval_samples_per_second": 2315.036,
"eval_spearmanr": 0.7508531325237456,
"eval_steps_per_second": 9.26,
"step": 253
},
{
"epoch": 12.0,
"grad_norm": 7.8722004890441895,
"learning_rate": 3.8e-05,
"loss": 0.1923,
"step": 276
},
{
"epoch": 12.0,
"eval_combined_score": 0.7436207323425442,
"eval_loss": 1.5144480466842651,
"eval_pearson": 0.7428768737971987,
"eval_runtime": 0.6534,
"eval_samples_per_second": 2295.818,
"eval_spearmanr": 0.7443645908878898,
"eval_steps_per_second": 9.183,
"step": 276
},
{
"epoch": 13.0,
"grad_norm": 5.461670398712158,
"learning_rate": 3.7e-05,
"loss": 0.1688,
"step": 299
},
{
"epoch": 13.0,
"eval_combined_score": 0.7493074527202994,
"eval_loss": 1.0667222738265991,
"eval_pearson": 0.7517996606156556,
"eval_runtime": 0.6697,
"eval_samples_per_second": 2239.742,
"eval_spearmanr": 0.7468152448249431,
"eval_steps_per_second": 8.959,
"step": 299
},
{
"epoch": 14.0,
"grad_norm": 4.178849220275879,
"learning_rate": 3.6e-05,
"loss": 0.1494,
"step": 322
},
{
"epoch": 14.0,
"eval_combined_score": 0.7492505592661682,
"eval_loss": 1.2371269464492798,
"eval_pearson": 0.750178139345508,
"eval_runtime": 0.6719,
"eval_samples_per_second": 2232.526,
"eval_spearmanr": 0.7483229791868286,
"eval_steps_per_second": 8.93,
"step": 322
},
{
"epoch": 15.0,
"grad_norm": 4.821505069732666,
"learning_rate": 3.5e-05,
"loss": 0.1498,
"step": 345
},
{
"epoch": 15.0,
"eval_combined_score": 0.7453157328804569,
"eval_loss": 1.1066056489944458,
"eval_pearson": 0.7473142192268427,
"eval_runtime": 0.6418,
"eval_samples_per_second": 2337.353,
"eval_spearmanr": 0.7433172465340713,
"eval_steps_per_second": 9.349,
"step": 345
},
{
"epoch": 15.0,
"step": 345,
"total_flos": 5711561202147840.0,
"train_loss": 0.6606892288595007,
"train_runtime": 116.1909,
"train_samples_per_second": 2473.946,
"train_steps_per_second": 9.898
}
],
"logging_steps": 1,
"max_steps": 1150,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5711561202147840.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}