{ "best_metric": 0.8317757009345794, "best_model_checkpoint": "BEiT-RHS-NDA\\checkpoint-272", "epoch": 40.0, "eval_steps": 500, "global_step": 320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5887850467289719, "eval_loss": 0.6851425170898438, "eval_runtime": 2.6682, "eval_samples_per_second": 40.103, "eval_steps_per_second": 2.624, "step": 8 }, { "epoch": 1.25, "learning_rate": 3.125e-05, "loss": 0.6911, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.5887850467289719, "eval_loss": 0.6720580458641052, "eval_runtime": 2.054, "eval_samples_per_second": 52.093, "eval_steps_per_second": 3.408, "step": 16 }, { "epoch": 2.5, "learning_rate": 4.9342105263157894e-05, "loss": 0.6739, "step": 20 }, { "epoch": 3.0, "eval_accuracy": 0.5887850467289719, "eval_loss": 0.6504218578338623, "eval_runtime": 2.4524, "eval_samples_per_second": 43.631, "eval_steps_per_second": 2.854, "step": 24 }, { "epoch": 3.75, "learning_rate": 4.769736842105263e-05, "loss": 0.6595, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.5887850467289719, "eval_loss": 0.6432350873947144, "eval_runtime": 2.1328, "eval_samples_per_second": 50.169, "eval_steps_per_second": 3.282, "step": 32 }, { "epoch": 5.0, "learning_rate": 4.605263157894737e-05, "loss": 0.646, "step": 40 }, { "epoch": 5.0, "eval_accuracy": 0.6822429906542056, "eval_loss": 0.6316895484924316, "eval_runtime": 2.4096, "eval_samples_per_second": 44.405, "eval_steps_per_second": 2.905, "step": 40 }, { "epoch": 6.0, "eval_accuracy": 0.6915887850467289, "eval_loss": 0.617514431476593, "eval_runtime": 2.0335, "eval_samples_per_second": 52.617, "eval_steps_per_second": 3.442, "step": 48 }, { "epoch": 6.25, "learning_rate": 4.440789473684211e-05, "loss": 0.6142, "step": 50 }, { "epoch": 7.0, "eval_accuracy": 0.6915887850467289, "eval_loss": 0.6269640326499939, "eval_runtime": 2.0515, "eval_samples_per_second": 52.156, "eval_steps_per_second": 3.412, "step": 56 }, { "epoch": 7.5, "learning_rate": 4.2763157894736847e-05, "loss": 0.608, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.6915887850467289, "eval_loss": 0.6617878675460815, "eval_runtime": 2.0447, "eval_samples_per_second": 52.331, "eval_steps_per_second": 3.424, "step": 64 }, { "epoch": 8.75, "learning_rate": 4.111842105263158e-05, "loss": 0.5927, "step": 70 }, { "epoch": 9.0, "eval_accuracy": 0.6915887850467289, "eval_loss": 0.5347260236740112, "eval_runtime": 2.139, "eval_samples_per_second": 50.023, "eval_steps_per_second": 3.273, "step": 72 }, { "epoch": 10.0, "learning_rate": 3.9473684210526316e-05, "loss": 0.5333, "step": 80 }, { "epoch": 10.0, "eval_accuracy": 0.6448598130841121, "eval_loss": 0.5743899941444397, "eval_runtime": 2.1225, "eval_samples_per_second": 50.412, "eval_steps_per_second": 3.298, "step": 80 }, { "epoch": 11.0, "eval_accuracy": 0.7476635514018691, "eval_loss": 0.4974236786365509, "eval_runtime": 2.059, "eval_samples_per_second": 51.967, "eval_steps_per_second": 3.4, "step": 88 }, { "epoch": 11.25, "learning_rate": 3.7828947368421054e-05, "loss": 0.4987, "step": 90 }, { "epoch": 12.0, "eval_accuracy": 0.6448598130841121, "eval_loss": 0.5970269441604614, "eval_runtime": 2.1492, "eval_samples_per_second": 49.787, "eval_steps_per_second": 3.257, "step": 96 }, { "epoch": 12.5, "learning_rate": 3.618421052631579e-05, "loss": 0.5421, "step": 100 }, { "epoch": 13.0, "eval_accuracy": 0.7383177570093458, "eval_loss": 0.5137068629264832, "eval_runtime": 2.4494, "eval_samples_per_second": 43.684, "eval_steps_per_second": 2.858, "step": 104 }, { "epoch": 13.75, "learning_rate": 3.4539473684210524e-05, "loss": 0.4881, "step": 110 }, { "epoch": 14.0, "eval_accuracy": 0.7663551401869159, "eval_loss": 0.47269827127456665, "eval_runtime": 2.4181, "eval_samples_per_second": 44.249, "eval_steps_per_second": 2.895, "step": 112 }, { "epoch": 15.0, "learning_rate": 3.289473684210527e-05, "loss": 0.4408, "step": 120 }, { "epoch": 15.0, "eval_accuracy": 0.7663551401869159, "eval_loss": 0.5161357522010803, "eval_runtime": 2.086, "eval_samples_per_second": 51.295, "eval_steps_per_second": 3.356, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.6915887850467289, "eval_loss": 0.6732468008995056, "eval_runtime": 2.0757, "eval_samples_per_second": 51.548, "eval_steps_per_second": 3.372, "step": 128 }, { "epoch": 16.25, "learning_rate": 3.125e-05, "loss": 0.4923, "step": 130 }, { "epoch": 17.0, "eval_accuracy": 0.7009345794392523, "eval_loss": 0.6567767262458801, "eval_runtime": 2.0563, "eval_samples_per_second": 52.036, "eval_steps_per_second": 3.404, "step": 136 }, { "epoch": 17.5, "learning_rate": 2.9605263157894735e-05, "loss": 0.4135, "step": 140 }, { "epoch": 18.0, "eval_accuracy": 0.7009345794392523, "eval_loss": 0.665261447429657, "eval_runtime": 2.454, "eval_samples_per_second": 43.602, "eval_steps_per_second": 2.852, "step": 144 }, { "epoch": 18.75, "learning_rate": 2.7960526315789477e-05, "loss": 0.4308, "step": 150 }, { "epoch": 19.0, "eval_accuracy": 0.719626168224299, "eval_loss": 0.6031992435455322, "eval_runtime": 2.0319, "eval_samples_per_second": 52.66, "eval_steps_per_second": 3.445, "step": 152 }, { "epoch": 20.0, "learning_rate": 2.6315789473684212e-05, "loss": 0.3837, "step": 160 }, { "epoch": 20.0, "eval_accuracy": 0.8037383177570093, "eval_loss": 0.44923561811447144, "eval_runtime": 2.1355, "eval_samples_per_second": 50.106, "eval_steps_per_second": 3.278, "step": 160 }, { "epoch": 21.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.454855740070343, "eval_runtime": 2.1041, "eval_samples_per_second": 50.854, "eval_steps_per_second": 3.327, "step": 168 }, { "epoch": 21.25, "learning_rate": 2.4671052631578947e-05, "loss": 0.3297, "step": 170 }, { "epoch": 22.0, "eval_accuracy": 0.7663551401869159, "eval_loss": 0.5525509715080261, "eval_runtime": 2.3723, "eval_samples_per_second": 45.104, "eval_steps_per_second": 2.951, "step": 176 }, { "epoch": 22.5, "learning_rate": 2.3026315789473685e-05, "loss": 0.3264, "step": 180 }, { "epoch": 23.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.5171772241592407, "eval_runtime": 2.1842, "eval_samples_per_second": 48.989, "eval_steps_per_second": 3.205, "step": 184 }, { "epoch": 23.75, "learning_rate": 2.1381578947368423e-05, "loss": 0.3487, "step": 190 }, { "epoch": 24.0, "eval_accuracy": 0.7663551401869159, "eval_loss": 0.5104933381080627, "eval_runtime": 2.1764, "eval_samples_per_second": 49.164, "eval_steps_per_second": 3.216, "step": 192 }, { "epoch": 25.0, "learning_rate": 1.9736842105263158e-05, "loss": 0.2892, "step": 200 }, { "epoch": 25.0, "eval_accuracy": 0.7757009345794392, "eval_loss": 0.4565769135951996, "eval_runtime": 2.1452, "eval_samples_per_second": 49.879, "eval_steps_per_second": 3.263, "step": 200 }, { "epoch": 26.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.523303747177124, "eval_runtime": 2.1458, "eval_samples_per_second": 49.865, "eval_steps_per_second": 3.262, "step": 208 }, { "epoch": 26.25, "learning_rate": 1.8092105263157896e-05, "loss": 0.2505, "step": 210 }, { "epoch": 27.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.4817139804363251, "eval_runtime": 2.0456, "eval_samples_per_second": 52.308, "eval_steps_per_second": 3.422, "step": 216 }, { "epoch": 27.5, "learning_rate": 1.6447368421052635e-05, "loss": 0.2542, "step": 220 }, { "epoch": 28.0, "eval_accuracy": 0.8037383177570093, "eval_loss": 0.5034652948379517, "eval_runtime": 2.3168, "eval_samples_per_second": 46.184, "eval_steps_per_second": 3.021, "step": 224 }, { "epoch": 28.75, "learning_rate": 1.4802631578947368e-05, "loss": 0.2285, "step": 230 }, { "epoch": 29.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.5281862616539001, "eval_runtime": 2.1165, "eval_samples_per_second": 50.556, "eval_steps_per_second": 3.307, "step": 232 }, { "epoch": 30.0, "learning_rate": 1.3157894736842106e-05, "loss": 0.2053, "step": 240 }, { "epoch": 30.0, "eval_accuracy": 0.8130841121495327, "eval_loss": 0.5637905597686768, "eval_runtime": 2.1396, "eval_samples_per_second": 50.009, "eval_steps_per_second": 3.272, "step": 240 }, { "epoch": 31.0, "eval_accuracy": 0.7570093457943925, "eval_loss": 0.6189974546432495, "eval_runtime": 2.2612, "eval_samples_per_second": 47.32, "eval_steps_per_second": 3.096, "step": 248 }, { "epoch": 31.25, "learning_rate": 1.1513157894736843e-05, "loss": 0.2205, "step": 250 }, { "epoch": 32.0, "eval_accuracy": 0.7850467289719626, "eval_loss": 0.614178478717804, "eval_runtime": 2.5358, "eval_samples_per_second": 42.196, "eval_steps_per_second": 2.761, "step": 256 }, { "epoch": 32.5, "learning_rate": 9.868421052631579e-06, "loss": 0.2081, "step": 260 }, { "epoch": 33.0, "eval_accuracy": 0.7850467289719626, "eval_loss": 0.575212836265564, "eval_runtime": 2.0662, "eval_samples_per_second": 51.787, "eval_steps_per_second": 3.388, "step": 264 }, { "epoch": 33.75, "learning_rate": 8.223684210526317e-06, "loss": 0.2075, "step": 270 }, { "epoch": 34.0, "eval_accuracy": 0.8317757009345794, "eval_loss": 0.5321738719940186, "eval_runtime": 2.1157, "eval_samples_per_second": 50.573, "eval_steps_per_second": 3.309, "step": 272 }, { "epoch": 35.0, "learning_rate": 6.578947368421053e-06, "loss": 0.2286, "step": 280 }, { "epoch": 35.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.5312566161155701, "eval_runtime": 2.4167, "eval_samples_per_second": 44.276, "eval_steps_per_second": 2.897, "step": 280 }, { "epoch": 36.0, "eval_accuracy": 0.8130841121495327, "eval_loss": 0.5189207792282104, "eval_runtime": 2.2397, "eval_samples_per_second": 47.773, "eval_steps_per_second": 3.125, "step": 288 }, { "epoch": 36.25, "learning_rate": 4.9342105263157895e-06, "loss": 0.2008, "step": 290 }, { "epoch": 37.0, "eval_accuracy": 0.7850467289719626, "eval_loss": 0.5589626431465149, "eval_runtime": 2.5429, "eval_samples_per_second": 42.078, "eval_steps_per_second": 2.753, "step": 296 }, { "epoch": 37.5, "learning_rate": 3.2894736842105265e-06, "loss": 0.1884, "step": 300 }, { "epoch": 38.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.5488373041152954, "eval_runtime": 2.042, "eval_samples_per_second": 52.399, "eval_steps_per_second": 3.428, "step": 304 }, { "epoch": 38.75, "learning_rate": 1.6447368421052632e-06, "loss": 0.1819, "step": 310 }, { "epoch": 39.0, "eval_accuracy": 0.8037383177570093, "eval_loss": 0.556251585483551, "eval_runtime": 2.015, "eval_samples_per_second": 53.102, "eval_steps_per_second": 3.474, "step": 312 }, { "epoch": 40.0, "learning_rate": 0.0, "loss": 0.1698, "step": 320 }, { "epoch": 40.0, "eval_accuracy": 0.794392523364486, "eval_loss": 0.5678603053092957, "eval_runtime": 2.1445, "eval_samples_per_second": 49.894, "eval_steps_per_second": 3.264, "step": 320 }, { "epoch": 40.0, "step": 320, "total_flos": 1.5429806632629043e+18, "train_loss": 0.3920826520770788, "train_runtime": 766.5439, "train_samples_per_second": 25.987, "train_steps_per_second": 0.417 } ], "logging_steps": 10, "max_steps": 320, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 1.5429806632629043e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }