{ "best_metric": 0.8188873529434204, "best_model_checkpoint": "bert_tiny_lda_5_v1_book_stsb/checkpoint-368", "epoch": 21.0, "eval_steps": 500, "global_step": 483, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.9694929122924805, "learning_rate": 4.9e-05, "loss": 2.8706, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.09954146094056157, "eval_loss": 2.462472438812256, "eval_pearson": 0.09907601559786483, "eval_runtime": 0.4799, "eval_samples_per_second": 3125.334, "eval_spearmanr": 0.1000069062832583, "eval_steps_per_second": 12.501, "step": 23 }, { "epoch": 2.0, "grad_norm": 22.507034301757812, "learning_rate": 4.8e-05, "loss": 1.7766, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.6395870539114769, "eval_loss": 1.3386552333831787, "eval_pearson": 0.6420563993485077, "eval_runtime": 0.5098, "eval_samples_per_second": 2942.208, "eval_spearmanr": 0.637117708474446, "eval_steps_per_second": 11.769, "step": 46 }, { "epoch": 3.0, "grad_norm": 20.13024139404297, "learning_rate": 4.7e-05, "loss": 1.1649, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.7499948802864915, "eval_loss": 0.9883082509040833, "eval_pearson": 0.7530220350451059, "eval_runtime": 0.4998, "eval_samples_per_second": 3001.186, "eval_spearmanr": 0.746967725527877, "eval_steps_per_second": 12.005, "step": 69 }, { "epoch": 4.0, "grad_norm": 7.193154335021973, "learning_rate": 4.600000000000001e-05, "loss": 0.8446, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.7814449278644269, "eval_loss": 0.9500728249549866, "eval_pearson": 0.7801592206629702, "eval_runtime": 0.4748, "eval_samples_per_second": 3159.192, "eval_spearmanr": 0.7827306350658837, "eval_steps_per_second": 12.637, "step": 92 }, { "epoch": 5.0, "grad_norm": 15.801517486572266, "learning_rate": 4.5e-05, "loss": 0.6587, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.7831183677075164, "eval_loss": 0.9120289087295532, "eval_pearson": 0.7838836623851876, "eval_runtime": 0.4934, "eval_samples_per_second": 3040.098, "eval_spearmanr": 0.7823530730298451, "eval_steps_per_second": 12.16, "step": 115 }, { "epoch": 6.0, "grad_norm": 14.146763801574707, "learning_rate": 4.4000000000000006e-05, "loss": 0.5587, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.7878115431321093, "eval_loss": 0.8917849659919739, "eval_pearson": 0.7876118713697591, "eval_runtime": 0.4812, "eval_samples_per_second": 3117.386, "eval_spearmanr": 0.7880112148944596, "eval_steps_per_second": 12.47, "step": 138 }, { "epoch": 7.0, "grad_norm": 12.302091598510742, "learning_rate": 4.3e-05, "loss": 0.5104, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.782515489635635, "eval_loss": 0.9523068070411682, "eval_pearson": 0.7821895116539266, "eval_runtime": 0.4809, "eval_samples_per_second": 3119.023, "eval_spearmanr": 0.7828414676173435, "eval_steps_per_second": 12.476, "step": 161 }, { "epoch": 8.0, "grad_norm": 12.979435920715332, "learning_rate": 4.2e-05, "loss": 0.4506, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.7919260991828585, "eval_loss": 0.9420682191848755, "eval_pearson": 0.791584489351049, "eval_runtime": 0.4809, "eval_samples_per_second": 3118.926, "eval_spearmanr": 0.7922677090146679, "eval_steps_per_second": 12.476, "step": 184 }, { "epoch": 9.0, "grad_norm": 18.948219299316406, "learning_rate": 4.1e-05, "loss": 0.4174, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.7906456677090563, "eval_loss": 1.0924893617630005, "eval_pearson": 0.7875795388915195, "eval_runtime": 0.4739, "eval_samples_per_second": 3165.183, "eval_spearmanr": 0.7937117965265932, "eval_steps_per_second": 12.661, "step": 207 }, { "epoch": 10.0, "grad_norm": 5.4617791175842285, "learning_rate": 4e-05, "loss": 0.3229, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.7957522599726154, "eval_loss": 0.8792051076889038, "eval_pearson": 0.7969462774842241, "eval_runtime": 0.4767, "eval_samples_per_second": 3146.57, "eval_spearmanr": 0.7945582424610067, "eval_steps_per_second": 12.586, "step": 230 }, { "epoch": 11.0, "grad_norm": 6.967871189117432, "learning_rate": 3.9000000000000006e-05, "loss": 0.3096, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.7885789869359312, "eval_loss": 0.9665384292602539, "eval_pearson": 0.7887171688488032, "eval_runtime": 0.487, "eval_samples_per_second": 3079.987, "eval_spearmanr": 0.7884408050230591, "eval_steps_per_second": 12.32, "step": 253 }, { "epoch": 12.0, "grad_norm": 16.113603591918945, "learning_rate": 3.8e-05, "loss": 0.2918, "step": 276 }, { "epoch": 12.0, "eval_combined_score": 0.8004322602860193, "eval_loss": 0.8212088346481323, "eval_pearson": 0.801677826027752, "eval_runtime": 0.4993, "eval_samples_per_second": 3003.968, "eval_spearmanr": 0.7991866945442867, "eval_steps_per_second": 12.016, "step": 276 }, { "epoch": 13.0, "grad_norm": 4.451534271240234, "learning_rate": 3.7e-05, "loss": 0.2479, "step": 299 }, { "epoch": 13.0, "eval_combined_score": 0.804614899302114, "eval_loss": 0.8791233897209167, "eval_pearson": 0.8055416707360773, "eval_runtime": 0.4721, "eval_samples_per_second": 3177.294, "eval_spearmanr": 0.8036881278681508, "eval_steps_per_second": 12.709, "step": 299 }, { "epoch": 14.0, "grad_norm": 21.7775821685791, "learning_rate": 3.6e-05, "loss": 0.2409, "step": 322 }, { "epoch": 14.0, "eval_combined_score": 0.794995597443876, "eval_loss": 0.8217026591300964, "eval_pearson": 0.7969938156234373, "eval_runtime": 0.5148, "eval_samples_per_second": 2913.928, "eval_spearmanr": 0.7929973792643148, "eval_steps_per_second": 11.656, "step": 322 }, { "epoch": 15.0, "grad_norm": 3.07222056388855, "learning_rate": 3.5e-05, "loss": 0.2071, "step": 345 }, { "epoch": 15.0, "eval_combined_score": 0.7984497715007259, "eval_loss": 0.8929257988929749, "eval_pearson": 0.7998156602749035, "eval_runtime": 0.4892, "eval_samples_per_second": 3066.293, "eval_spearmanr": 0.7970838827265485, "eval_steps_per_second": 12.265, "step": 345 }, { "epoch": 16.0, "grad_norm": 3.4134459495544434, "learning_rate": 3.4000000000000007e-05, "loss": 0.1907, "step": 368 }, { "epoch": 16.0, "eval_combined_score": 0.8007289150909647, "eval_loss": 0.8188873529434204, "eval_pearson": 0.8026761278764143, "eval_runtime": 0.4814, "eval_samples_per_second": 3116.2, "eval_spearmanr": 0.798781702305515, "eval_steps_per_second": 12.465, "step": 368 }, { "epoch": 17.0, "grad_norm": 4.4020676612854, "learning_rate": 3.3e-05, "loss": 0.1825, "step": 391 }, { "epoch": 17.0, "eval_combined_score": 0.79118899198846, "eval_loss": 0.9392598271369934, "eval_pearson": 0.7924365508807105, "eval_runtime": 0.4766, "eval_samples_per_second": 3147.292, "eval_spearmanr": 0.7899414330962096, "eval_steps_per_second": 12.589, "step": 391 }, { "epoch": 18.0, "grad_norm": 5.791975498199463, "learning_rate": 3.2000000000000005e-05, "loss": 0.1634, "step": 414 }, { "epoch": 18.0, "eval_combined_score": 0.8034539561261447, "eval_loss": 0.8721528053283691, "eval_pearson": 0.8045915853539458, "eval_runtime": 0.4845, "eval_samples_per_second": 3095.701, "eval_spearmanr": 0.8023163268983434, "eval_steps_per_second": 12.383, "step": 414 }, { "epoch": 19.0, "grad_norm": 2.9870147705078125, "learning_rate": 3.1e-05, "loss": 0.1531, "step": 437 }, { "epoch": 19.0, "eval_combined_score": 0.8030149651947462, "eval_loss": 0.9629133343696594, "eval_pearson": 0.8037084608596774, "eval_runtime": 0.4776, "eval_samples_per_second": 3140.565, "eval_spearmanr": 0.8023214695298152, "eval_steps_per_second": 12.562, "step": 437 }, { "epoch": 20.0, "grad_norm": 3.91269850730896, "learning_rate": 3e-05, "loss": 0.153, "step": 460 }, { "epoch": 20.0, "eval_combined_score": 0.7958772509210114, "eval_loss": 1.0062919855117798, "eval_pearson": 0.797094458559933, "eval_runtime": 0.485, "eval_samples_per_second": 3092.717, "eval_spearmanr": 0.7946600432820897, "eval_steps_per_second": 12.371, "step": 460 }, { "epoch": 21.0, "grad_norm": 4.98586893081665, "learning_rate": 2.9e-05, "loss": 0.1409, "step": 483 }, { "epoch": 21.0, "eval_combined_score": 0.7955185663928932, "eval_loss": 0.9756875038146973, "eval_pearson": 0.7968145893338887, "eval_runtime": 0.478, "eval_samples_per_second": 3138.129, "eval_spearmanr": 0.7942225434518977, "eval_steps_per_second": 12.553, "step": 483 }, { "epoch": 21.0, "step": 483, "total_flos": 3165851231294976.0, "train_loss": 0.5645940970189823, "train_runtime": 97.1506, "train_samples_per_second": 2958.809, "train_steps_per_second": 11.837 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3165851231294976.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }