{ "best_metric": 0.9153493642807007, "best_model_checkpoint": "distilbert_lda_100_v1_stsb/checkpoint-230", "epoch": 15.0, "eval_steps": 500, "global_step": 345, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.528723239898682, "learning_rate": 4.9e-05, "loss": 2.622, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.09512879196662656, "eval_loss": 2.529794216156006, "eval_pearson": 0.10113703827077289, "eval_runtime": 0.6476, "eval_samples_per_second": 2316.18, "eval_spearmanr": 0.08912054566248023, "eval_steps_per_second": 9.265, "step": 23 }, { "epoch": 2.0, "grad_norm": 14.141378402709961, "learning_rate": 4.8e-05, "loss": 1.8404, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.46451389801303666, "eval_loss": 2.334291458129883, "eval_pearson": 0.4642190872924477, "eval_runtime": 0.6424, "eval_samples_per_second": 2334.917, "eval_spearmanr": 0.4648087087336257, "eval_steps_per_second": 9.34, "step": 46 }, { "epoch": 3.0, "grad_norm": 15.804608345031738, "learning_rate": 4.7e-05, "loss": 1.3143, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.6702617110676681, "eval_loss": 1.2509400844573975, "eval_pearson": 0.6735628956208805, "eval_runtime": 0.7279, "eval_samples_per_second": 2060.6, "eval_spearmanr": 0.6669605265144558, "eval_steps_per_second": 8.242, "step": 69 }, { "epoch": 4.0, "grad_norm": 13.790794372558594, "learning_rate": 4.600000000000001e-05, "loss": 0.8809, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.7213381248378519, "eval_loss": 1.3873708248138428, "eval_pearson": 0.7172298554270424, "eval_runtime": 0.6538, "eval_samples_per_second": 2294.292, "eval_spearmanr": 0.7254463942486612, "eval_steps_per_second": 9.177, "step": 92 }, { "epoch": 5.0, "grad_norm": 12.30782699584961, "learning_rate": 4.5e-05, "loss": 0.6317, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.7164477017307784, "eval_loss": 1.583528757095337, "eval_pearson": 0.7090720167762655, "eval_runtime": 0.6363, "eval_samples_per_second": 2357.515, "eval_spearmanr": 0.7238233866852912, "eval_steps_per_second": 9.43, "step": 115 }, { "epoch": 6.0, "grad_norm": 10.6016845703125, "learning_rate": 4.4000000000000006e-05, "loss": 0.5139, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.7456445881950656, "eval_loss": 1.279284954071045, "eval_pearson": 0.744296040490723, "eval_runtime": 0.6455, "eval_samples_per_second": 2323.754, "eval_spearmanr": 0.7469931358994083, "eval_steps_per_second": 9.295, "step": 138 }, { "epoch": 7.0, "grad_norm": 6.149612903594971, "learning_rate": 4.3e-05, "loss": 0.3919, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.7555569821393648, "eval_loss": 1.0237528085708618, "eval_pearson": 0.7576470777149404, "eval_runtime": 0.6495, "eval_samples_per_second": 2309.412, "eval_spearmanr": 0.7534668865637894, "eval_steps_per_second": 9.238, "step": 161 }, { "epoch": 8.0, "grad_norm": 5.594114303588867, "learning_rate": 4.2e-05, "loss": 0.3125, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.734024640227608, "eval_loss": 1.4519073963165283, "eval_pearson": 0.7331238450711645, "eval_runtime": 0.6526, "eval_samples_per_second": 2298.344, "eval_spearmanr": 0.7349254353840515, "eval_steps_per_second": 9.193, "step": 184 }, { "epoch": 9.0, "grad_norm": 7.766717910766602, "learning_rate": 4.1e-05, "loss": 0.281, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.7381812541060337, "eval_loss": 1.256384253501892, "eval_pearson": 0.7389521632675855, "eval_runtime": 0.6521, "eval_samples_per_second": 2300.157, "eval_spearmanr": 0.7374103449444821, "eval_steps_per_second": 9.201, "step": 207 }, { "epoch": 10.0, "grad_norm": 7.7253313064575195, "learning_rate": 4e-05, "loss": 0.2395, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.7731133282988156, "eval_loss": 0.9153493642807007, "eval_pearson": 0.775833107495325, "eval_runtime": 0.654, "eval_samples_per_second": 2293.525, "eval_spearmanr": 0.7703935491023064, "eval_steps_per_second": 9.174, "step": 230 }, { "epoch": 11.0, "grad_norm": 8.97021484375, "learning_rate": 3.9000000000000006e-05, "loss": 0.2219, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.7508687271969791, "eval_loss": 1.2410622835159302, "eval_pearson": 0.7508843218702125, "eval_runtime": 0.6479, "eval_samples_per_second": 2315.036, "eval_spearmanr": 0.7508531325237456, "eval_steps_per_second": 9.26, "step": 253 }, { "epoch": 12.0, "grad_norm": 7.8722004890441895, "learning_rate": 3.8e-05, "loss": 0.1923, "step": 276 }, { "epoch": 12.0, "eval_combined_score": 0.7436207323425442, "eval_loss": 1.5144480466842651, "eval_pearson": 0.7428768737971987, "eval_runtime": 0.6534, "eval_samples_per_second": 2295.818, "eval_spearmanr": 0.7443645908878898, "eval_steps_per_second": 9.183, "step": 276 }, { "epoch": 13.0, "grad_norm": 5.461670398712158, "learning_rate": 3.7e-05, "loss": 0.1688, "step": 299 }, { "epoch": 13.0, "eval_combined_score": 0.7493074527202994, "eval_loss": 1.0667222738265991, "eval_pearson": 0.7517996606156556, "eval_runtime": 0.6697, "eval_samples_per_second": 2239.742, "eval_spearmanr": 0.7468152448249431, "eval_steps_per_second": 8.959, "step": 299 }, { "epoch": 14.0, "grad_norm": 4.178849220275879, "learning_rate": 3.6e-05, "loss": 0.1494, "step": 322 }, { "epoch": 14.0, "eval_combined_score": 0.7492505592661682, "eval_loss": 1.2371269464492798, "eval_pearson": 0.750178139345508, "eval_runtime": 0.6719, "eval_samples_per_second": 2232.526, "eval_spearmanr": 0.7483229791868286, "eval_steps_per_second": 8.93, "step": 322 }, { "epoch": 15.0, "grad_norm": 4.821505069732666, "learning_rate": 3.5e-05, "loss": 0.1498, "step": 345 }, { "epoch": 15.0, "eval_combined_score": 0.7453157328804569, "eval_loss": 1.1066056489944458, "eval_pearson": 0.7473142192268427, "eval_runtime": 0.6418, "eval_samples_per_second": 2337.353, "eval_spearmanr": 0.7433172465340713, "eval_steps_per_second": 9.349, "step": 345 }, { "epoch": 15.0, "step": 345, "total_flos": 5711561202147840.0, "train_loss": 0.6606892288595007, "train_runtime": 116.1909, "train_samples_per_second": 2473.946, "train_steps_per_second": 9.898 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5711561202147840.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }