{ "best_metric": 0.8225806451612904, "best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-99", "epoch": 35.55555555555556, "eval_steps": 500, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.89, "eval_accuracy": 0.14516129032258066, "eval_loss": 1.7602994441986084, "eval_runtime": 2.148, "eval_samples_per_second": 28.864, "eval_steps_per_second": 1.862, "step": 4 }, { "epoch": 2.0, "eval_accuracy": 0.14516129032258066, "eval_loss": 1.685233473777771, "eval_runtime": 2.219, "eval_samples_per_second": 27.94, "eval_steps_per_second": 1.803, "step": 9 }, { "epoch": 2.22, "learning_rate": 3.125e-05, "loss": 1.7571, "step": 10 }, { "epoch": 2.89, "eval_accuracy": 0.14516129032258066, "eval_loss": 1.5655227899551392, "eval_runtime": 2.222, "eval_samples_per_second": 27.903, "eval_steps_per_second": 1.8, "step": 13 }, { "epoch": 4.0, "eval_accuracy": 0.14516129032258066, "eval_loss": 1.3815577030181885, "eval_runtime": 2.3696, "eval_samples_per_second": 26.165, "eval_steps_per_second": 1.688, "step": 18 }, { "epoch": 4.44, "learning_rate": 4.8611111111111115e-05, "loss": 1.5255, "step": 20 }, { "epoch": 4.89, "eval_accuracy": 0.3225806451612903, "eval_loss": 1.2598901987075806, "eval_runtime": 2.178, "eval_samples_per_second": 28.466, "eval_steps_per_second": 1.837, "step": 22 }, { "epoch": 6.0, "eval_accuracy": 0.4838709677419355, "eval_loss": 1.153410792350769, "eval_runtime": 2.227, "eval_samples_per_second": 27.84, "eval_steps_per_second": 1.796, "step": 27 }, { "epoch": 6.67, "learning_rate": 4.5138888888888894e-05, "loss": 1.2245, "step": 30 }, { "epoch": 6.89, "eval_accuracy": 0.4838709677419355, "eval_loss": 1.0641188621520996, "eval_runtime": 2.203, "eval_samples_per_second": 28.143, "eval_steps_per_second": 1.816, "step": 31 }, { "epoch": 8.0, "eval_accuracy": 0.43548387096774194, "eval_loss": 1.0371758937835693, "eval_runtime": 2.2355, "eval_samples_per_second": 27.734, "eval_steps_per_second": 1.789, "step": 36 }, { "epoch": 8.89, "learning_rate": 4.166666666666667e-05, "loss": 1.0438, "step": 40 }, { "epoch": 8.89, "eval_accuracy": 0.43548387096774194, "eval_loss": 0.9987961649894714, "eval_runtime": 2.3086, "eval_samples_per_second": 26.857, "eval_steps_per_second": 1.733, "step": 40 }, { "epoch": 10.0, "eval_accuracy": 0.5161290322580645, "eval_loss": 0.9259945154190063, "eval_runtime": 2.2905, "eval_samples_per_second": 27.068, "eval_steps_per_second": 1.746, "step": 45 }, { "epoch": 10.89, "eval_accuracy": 0.7096774193548387, "eval_loss": 0.9084866046905518, "eval_runtime": 2.2545, "eval_samples_per_second": 27.5, "eval_steps_per_second": 1.774, "step": 49 }, { "epoch": 11.11, "learning_rate": 3.8194444444444444e-05, "loss": 0.9727, "step": 50 }, { "epoch": 12.0, "eval_accuracy": 0.7258064516129032, "eval_loss": 0.843325674533844, "eval_runtime": 2.1865, "eval_samples_per_second": 28.355, "eval_steps_per_second": 1.829, "step": 54 }, { "epoch": 12.89, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.7529016137123108, "eval_runtime": 2.1995, "eval_samples_per_second": 28.188, "eval_steps_per_second": 1.819, "step": 58 }, { "epoch": 13.33, "learning_rate": 3.472222222222222e-05, "loss": 0.8469, "step": 60 }, { "epoch": 14.0, "eval_accuracy": 0.7580645161290323, "eval_loss": 0.7186572551727295, "eval_runtime": 2.2225, "eval_samples_per_second": 27.896, "eval_steps_per_second": 1.8, "step": 63 }, { "epoch": 14.89, "eval_accuracy": 0.7258064516129032, "eval_loss": 0.6805762648582458, "eval_runtime": 2.1855, "eval_samples_per_second": 28.369, "eval_steps_per_second": 1.83, "step": 67 }, { "epoch": 15.56, "learning_rate": 3.125e-05, "loss": 0.6908, "step": 70 }, { "epoch": 16.0, "eval_accuracy": 0.7580645161290323, "eval_loss": 0.6575707197189331, "eval_runtime": 2.2315, "eval_samples_per_second": 27.784, "eval_steps_per_second": 1.792, "step": 72 }, { "epoch": 16.89, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.5742202997207642, "eval_runtime": 2.261, "eval_samples_per_second": 27.421, "eval_steps_per_second": 1.769, "step": 76 }, { "epoch": 17.78, "learning_rate": 2.777777777777778e-05, "loss": 0.6064, "step": 80 }, { "epoch": 18.0, "eval_accuracy": 0.7580645161290323, "eval_loss": 0.6446634531021118, "eval_runtime": 2.3311, "eval_samples_per_second": 26.597, "eval_steps_per_second": 1.716, "step": 81 }, { "epoch": 18.89, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.5602142810821533, "eval_runtime": 2.1895, "eval_samples_per_second": 28.317, "eval_steps_per_second": 1.827, "step": 85 }, { "epoch": 20.0, "learning_rate": 2.4305555555555558e-05, "loss": 0.5303, "step": 90 }, { "epoch": 20.0, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.49427932500839233, "eval_runtime": 2.2745, "eval_samples_per_second": 27.258, "eval_steps_per_second": 1.759, "step": 90 }, { "epoch": 20.89, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.530381441116333, "eval_runtime": 2.142, "eval_samples_per_second": 28.945, "eval_steps_per_second": 1.867, "step": 94 }, { "epoch": 22.0, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.48010584712028503, "eval_runtime": 2.3616, "eval_samples_per_second": 26.254, "eval_steps_per_second": 1.694, "step": 99 }, { "epoch": 22.22, "learning_rate": 2.0833333333333336e-05, "loss": 0.4903, "step": 100 }, { "epoch": 22.89, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.4848884344100952, "eval_runtime": 2.16, "eval_samples_per_second": 28.703, "eval_steps_per_second": 1.852, "step": 103 }, { "epoch": 24.0, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.5709980726242065, "eval_runtime": 2.1685, "eval_samples_per_second": 28.591, "eval_steps_per_second": 1.845, "step": 108 }, { "epoch": 24.44, "learning_rate": 1.736111111111111e-05, "loss": 0.4261, "step": 110 }, { "epoch": 24.89, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.4803168475627899, "eval_runtime": 2.2145, "eval_samples_per_second": 27.997, "eval_steps_per_second": 1.806, "step": 112 }, { "epoch": 26.0, "eval_accuracy": 0.7258064516129032, "eval_loss": 0.5670634508132935, "eval_runtime": 2.1993, "eval_samples_per_second": 28.191, "eval_steps_per_second": 1.819, "step": 117 }, { "epoch": 26.67, "learning_rate": 1.388888888888889e-05, "loss": 0.4122, "step": 120 }, { "epoch": 26.89, "eval_accuracy": 0.8064516129032258, "eval_loss": 0.4585064947605133, "eval_runtime": 2.2065, "eval_samples_per_second": 28.098, "eval_steps_per_second": 1.813, "step": 121 }, { "epoch": 28.0, "eval_accuracy": 0.7096774193548387, "eval_loss": 0.5910329222679138, "eval_runtime": 2.23, "eval_samples_per_second": 27.802, "eval_steps_per_second": 1.794, "step": 126 }, { "epoch": 28.89, "learning_rate": 1.0416666666666668e-05, "loss": 0.3739, "step": 130 }, { "epoch": 28.89, "eval_accuracy": 0.7580645161290323, "eval_loss": 0.5821260213851929, "eval_runtime": 2.2145, "eval_samples_per_second": 27.997, "eval_steps_per_second": 1.806, "step": 130 }, { "epoch": 30.0, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.5329306125640869, "eval_runtime": 2.3526, "eval_samples_per_second": 26.354, "eval_steps_per_second": 1.7, "step": 135 }, { "epoch": 30.89, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.4422537386417389, "eval_runtime": 2.1625, "eval_samples_per_second": 28.67, "eval_steps_per_second": 1.85, "step": 139 }, { "epoch": 31.11, "learning_rate": 6.944444444444445e-06, "loss": 0.3896, "step": 140 }, { "epoch": 32.0, "eval_accuracy": 0.7580645161290323, "eval_loss": 0.47155243158340454, "eval_runtime": 2.1865, "eval_samples_per_second": 28.356, "eval_steps_per_second": 1.829, "step": 144 }, { "epoch": 32.89, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.4785827100276947, "eval_runtime": 2.232, "eval_samples_per_second": 27.777, "eval_steps_per_second": 1.792, "step": 148 }, { "epoch": 33.33, "learning_rate": 3.4722222222222224e-06, "loss": 0.3472, "step": 150 }, { "epoch": 34.0, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.45382845401763916, "eval_runtime": 2.19, "eval_samples_per_second": 28.31, "eval_steps_per_second": 1.826, "step": 153 }, { "epoch": 34.89, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.455282062292099, "eval_runtime": 2.2165, "eval_samples_per_second": 27.972, "eval_steps_per_second": 1.805, "step": 157 }, { "epoch": 35.56, "learning_rate": 0.0, "loss": 0.3349, "step": 160 }, { "epoch": 35.56, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.45279815793037415, "eval_runtime": 2.4931, "eval_samples_per_second": 24.869, "eval_steps_per_second": 1.604, "step": 160 }, { "epoch": 35.56, "step": 160, "total_flos": 7.931930389512192e+17, "train_loss": 0.7482577681541442, "train_runtime": 589.7762, "train_samples_per_second": 19.533, "train_steps_per_second": 0.271 } ], "logging_steps": 10, "max_steps": 160, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 7.931930389512192e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }