|
{ |
|
"best_metric": 0.9153493642807007, |
|
"best_model_checkpoint": "distilbert_lda_100_v1_stsb/checkpoint-230", |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 345, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.528723239898682, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.622, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.09512879196662656, |
|
"eval_loss": 2.529794216156006, |
|
"eval_pearson": 0.10113703827077289, |
|
"eval_runtime": 0.6476, |
|
"eval_samples_per_second": 2316.18, |
|
"eval_spearmanr": 0.08912054566248023, |
|
"eval_steps_per_second": 9.265, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.141378402709961, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.8404, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.46451389801303666, |
|
"eval_loss": 2.334291458129883, |
|
"eval_pearson": 0.4642190872924477, |
|
"eval_runtime": 0.6424, |
|
"eval_samples_per_second": 2334.917, |
|
"eval_spearmanr": 0.4648087087336257, |
|
"eval_steps_per_second": 9.34, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 15.804608345031738, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.3143, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.6702617110676681, |
|
"eval_loss": 1.2509400844573975, |
|
"eval_pearson": 0.6735628956208805, |
|
"eval_runtime": 0.7279, |
|
"eval_samples_per_second": 2060.6, |
|
"eval_spearmanr": 0.6669605265144558, |
|
"eval_steps_per_second": 8.242, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 13.790794372558594, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.8809, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.7213381248378519, |
|
"eval_loss": 1.3873708248138428, |
|
"eval_pearson": 0.7172298554270424, |
|
"eval_runtime": 0.6538, |
|
"eval_samples_per_second": 2294.292, |
|
"eval_spearmanr": 0.7254463942486612, |
|
"eval_steps_per_second": 9.177, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 12.30782699584961, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6317, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.7164477017307784, |
|
"eval_loss": 1.583528757095337, |
|
"eval_pearson": 0.7090720167762655, |
|
"eval_runtime": 0.6363, |
|
"eval_samples_per_second": 2357.515, |
|
"eval_spearmanr": 0.7238233866852912, |
|
"eval_steps_per_second": 9.43, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 10.6016845703125, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.5139, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.7456445881950656, |
|
"eval_loss": 1.279284954071045, |
|
"eval_pearson": 0.744296040490723, |
|
"eval_runtime": 0.6455, |
|
"eval_samples_per_second": 2323.754, |
|
"eval_spearmanr": 0.7469931358994083, |
|
"eval_steps_per_second": 9.295, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 6.149612903594971, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.3919, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.7555569821393648, |
|
"eval_loss": 1.0237528085708618, |
|
"eval_pearson": 0.7576470777149404, |
|
"eval_runtime": 0.6495, |
|
"eval_samples_per_second": 2309.412, |
|
"eval_spearmanr": 0.7534668865637894, |
|
"eval_steps_per_second": 9.238, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.594114303588867, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.3125, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.734024640227608, |
|
"eval_loss": 1.4519073963165283, |
|
"eval_pearson": 0.7331238450711645, |
|
"eval_runtime": 0.6526, |
|
"eval_samples_per_second": 2298.344, |
|
"eval_spearmanr": 0.7349254353840515, |
|
"eval_steps_per_second": 9.193, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.766717910766602, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.281, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.7381812541060337, |
|
"eval_loss": 1.256384253501892, |
|
"eval_pearson": 0.7389521632675855, |
|
"eval_runtime": 0.6521, |
|
"eval_samples_per_second": 2300.157, |
|
"eval_spearmanr": 0.7374103449444821, |
|
"eval_steps_per_second": 9.201, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 7.7253313064575195, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2395, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.7731133282988156, |
|
"eval_loss": 0.9153493642807007, |
|
"eval_pearson": 0.775833107495325, |
|
"eval_runtime": 0.654, |
|
"eval_samples_per_second": 2293.525, |
|
"eval_spearmanr": 0.7703935491023064, |
|
"eval_steps_per_second": 9.174, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 8.97021484375, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.2219, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.7508687271969791, |
|
"eval_loss": 1.2410622835159302, |
|
"eval_pearson": 0.7508843218702125, |
|
"eval_runtime": 0.6479, |
|
"eval_samples_per_second": 2315.036, |
|
"eval_spearmanr": 0.7508531325237456, |
|
"eval_steps_per_second": 9.26, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 7.8722004890441895, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.1923, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_combined_score": 0.7436207323425442, |
|
"eval_loss": 1.5144480466842651, |
|
"eval_pearson": 0.7428768737971987, |
|
"eval_runtime": 0.6534, |
|
"eval_samples_per_second": 2295.818, |
|
"eval_spearmanr": 0.7443645908878898, |
|
"eval_steps_per_second": 9.183, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 5.461670398712158, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.1688, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_combined_score": 0.7493074527202994, |
|
"eval_loss": 1.0667222738265991, |
|
"eval_pearson": 0.7517996606156556, |
|
"eval_runtime": 0.6697, |
|
"eval_samples_per_second": 2239.742, |
|
"eval_spearmanr": 0.7468152448249431, |
|
"eval_steps_per_second": 8.959, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.178849220275879, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1494, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_combined_score": 0.7492505592661682, |
|
"eval_loss": 1.2371269464492798, |
|
"eval_pearson": 0.750178139345508, |
|
"eval_runtime": 0.6719, |
|
"eval_samples_per_second": 2232.526, |
|
"eval_spearmanr": 0.7483229791868286, |
|
"eval_steps_per_second": 8.93, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.821505069732666, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1498, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_combined_score": 0.7453157328804569, |
|
"eval_loss": 1.1066056489944458, |
|
"eval_pearson": 0.7473142192268427, |
|
"eval_runtime": 0.6418, |
|
"eval_samples_per_second": 2337.353, |
|
"eval_spearmanr": 0.7433172465340713, |
|
"eval_steps_per_second": 9.349, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 345, |
|
"total_flos": 5711561202147840.0, |
|
"train_loss": 0.6606892288595007, |
|
"train_runtime": 116.1909, |
|
"train_samples_per_second": 2473.946, |
|
"train_steps_per_second": 9.898 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5711561202147840.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|