{ "best_metric": 0.8071570576540755, "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-101", "epoch": 53.333333333333336, "eval_steps": 500, "global_step": 360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8888888888888888, "eval_accuracy": 0.6898608349900597, "eval_loss": 0.6376240849494934, "eval_runtime": 2.8661, "eval_samples_per_second": 175.501, "eval_steps_per_second": 5.583, "step": 6 }, { "epoch": 1.4814814814814814, "grad_norm": 9.892236709594727, "learning_rate": 1.388888888888889e-05, "loss": 0.6757, "step": 10 }, { "epoch": 1.925925925925926, "eval_accuracy": 0.6938369781312127, "eval_loss": 0.6052560806274414, "eval_runtime": 2.8417, "eval_samples_per_second": 177.006, "eval_steps_per_second": 5.63, "step": 13 }, { "epoch": 2.962962962962963, "grad_norm": 8.984474182128906, "learning_rate": 2.777777777777778e-05, "loss": 0.5472, "step": 20 }, { "epoch": 2.962962962962963, "eval_accuracy": 0.7256461232604374, "eval_loss": 0.5903410315513611, "eval_runtime": 2.7461, "eval_samples_per_second": 183.169, "eval_steps_per_second": 5.826, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.731610337972167, "eval_loss": 0.5782421827316284, "eval_runtime": 2.7061, "eval_samples_per_second": 185.878, "eval_steps_per_second": 5.913, "step": 27 }, { "epoch": 4.444444444444445, "grad_norm": 13.92780590057373, "learning_rate": 4.166666666666667e-05, "loss": 0.4628, "step": 30 }, { "epoch": 4.888888888888889, "eval_accuracy": 0.7455268389662028, "eval_loss": 0.5979239344596863, "eval_runtime": 2.6102, "eval_samples_per_second": 192.706, "eval_steps_per_second": 6.13, "step": 33 }, { "epoch": 5.925925925925926, "grad_norm": 25.355466842651367, "learning_rate": 4.938271604938271e-05, "loss": 0.4181, "step": 40 }, { "epoch": 5.925925925925926, "eval_accuracy": 0.7614314115308151, "eval_loss": 0.5735302567481995, "eval_runtime": 2.7235, "eval_samples_per_second": 184.688, "eval_steps_per_second": 5.875, "step": 40 }, { "epoch": 6.962962962962963, "eval_accuracy": 0.7495029821073559, "eval_loss": 0.5251761078834534, "eval_runtime": 2.662, "eval_samples_per_second": 188.956, "eval_steps_per_second": 6.011, "step": 47 }, { "epoch": 7.407407407407407, "grad_norm": 46.14825439453125, "learning_rate": 4.783950617283951e-05, "loss": 0.4079, "step": 50 }, { "epoch": 8.0, "eval_accuracy": 0.7475149105367793, "eval_loss": 0.5362666249275208, "eval_runtime": 2.6877, "eval_samples_per_second": 187.151, "eval_steps_per_second": 5.953, "step": 54 }, { "epoch": 8.88888888888889, "grad_norm": 22.70941925048828, "learning_rate": 4.62962962962963e-05, "loss": 0.4102, "step": 60 }, { "epoch": 8.88888888888889, "eval_accuracy": 0.7495029821073559, "eval_loss": 0.5288791060447693, "eval_runtime": 2.7002, "eval_samples_per_second": 186.285, "eval_steps_per_second": 5.926, "step": 60 }, { "epoch": 9.925925925925926, "eval_accuracy": 0.7534791252485089, "eval_loss": 0.522729754447937, "eval_runtime": 2.7539, "eval_samples_per_second": 182.652, "eval_steps_per_second": 5.81, "step": 67 }, { "epoch": 10.37037037037037, "grad_norm": 92.61900329589844, "learning_rate": 4.4753086419753084e-05, "loss": 0.373, "step": 70 }, { "epoch": 10.962962962962964, "eval_accuracy": 0.7773359840954275, "eval_loss": 0.46767404675483704, "eval_runtime": 2.7441, "eval_samples_per_second": 183.301, "eval_steps_per_second": 5.831, "step": 74 }, { "epoch": 11.851851851851851, "grad_norm": 13.839974403381348, "learning_rate": 4.3209876543209875e-05, "loss": 0.3639, "step": 80 }, { "epoch": 12.0, "eval_accuracy": 0.7813121272365805, "eval_loss": 0.4978141188621521, "eval_runtime": 2.7627, "eval_samples_per_second": 182.069, "eval_steps_per_second": 5.791, "step": 81 }, { "epoch": 12.88888888888889, "eval_accuracy": 0.7992047713717694, "eval_loss": 0.465084969997406, "eval_runtime": 2.7393, "eval_samples_per_second": 183.625, "eval_steps_per_second": 5.841, "step": 87 }, { "epoch": 13.333333333333334, "grad_norm": 12.678291320800781, "learning_rate": 4.166666666666667e-05, "loss": 0.3779, "step": 90 }, { "epoch": 13.925925925925926, "eval_accuracy": 0.7912524850894632, "eval_loss": 0.4737919569015503, "eval_runtime": 2.7321, "eval_samples_per_second": 184.107, "eval_steps_per_second": 5.856, "step": 94 }, { "epoch": 14.814814814814815, "grad_norm": 13.37376594543457, "learning_rate": 4.012345679012346e-05, "loss": 0.3476, "step": 100 }, { "epoch": 14.962962962962964, "eval_accuracy": 0.8071570576540755, "eval_loss": 0.46969088912010193, "eval_runtime": 2.704, "eval_samples_per_second": 186.017, "eval_steps_per_second": 5.917, "step": 101 }, { "epoch": 16.0, "eval_accuracy": 0.7952286282306164, "eval_loss": 0.47190144658088684, "eval_runtime": 2.651, "eval_samples_per_second": 189.741, "eval_steps_per_second": 6.036, "step": 108 }, { "epoch": 16.296296296296298, "grad_norm": 36.415714263916016, "learning_rate": 3.8580246913580246e-05, "loss": 0.3467, "step": 110 }, { "epoch": 16.88888888888889, "eval_accuracy": 0.7892644135188867, "eval_loss": 0.4551805257797241, "eval_runtime": 2.7016, "eval_samples_per_second": 186.184, "eval_steps_per_second": 5.922, "step": 114 }, { "epoch": 17.77777777777778, "grad_norm": 21.425716400146484, "learning_rate": 3.7037037037037037e-05, "loss": 0.3496, "step": 120 }, { "epoch": 17.925925925925927, "eval_accuracy": 0.7713717693836978, "eval_loss": 0.5186495780944824, "eval_runtime": 2.9189, "eval_samples_per_second": 172.324, "eval_steps_per_second": 5.481, "step": 121 }, { "epoch": 18.962962962962962, "eval_accuracy": 0.7952286282306164, "eval_loss": 0.4575484097003937, "eval_runtime": 2.8214, "eval_samples_per_second": 178.277, "eval_steps_per_second": 5.671, "step": 128 }, { "epoch": 19.25925925925926, "grad_norm": 25.632354736328125, "learning_rate": 3.5493827160493834e-05, "loss": 0.3657, "step": 130 }, { "epoch": 20.0, "eval_accuracy": 0.7793240556660039, "eval_loss": 0.476377010345459, "eval_runtime": 2.6973, "eval_samples_per_second": 186.481, "eval_steps_per_second": 5.932, "step": 135 }, { "epoch": 20.74074074074074, "grad_norm": 37.57546615600586, "learning_rate": 3.395061728395062e-05, "loss": 0.3888, "step": 140 }, { "epoch": 20.88888888888889, "eval_accuracy": 0.7713717693836978, "eval_loss": 0.5008699297904968, "eval_runtime": 2.7366, "eval_samples_per_second": 183.807, "eval_steps_per_second": 5.847, "step": 141 }, { "epoch": 21.925925925925927, "eval_accuracy": 0.7813121272365805, "eval_loss": 0.46731534600257874, "eval_runtime": 2.8548, "eval_samples_per_second": 176.192, "eval_steps_per_second": 5.605, "step": 148 }, { "epoch": 22.22222222222222, "grad_norm": 23.367645263671875, "learning_rate": 3.240740740740741e-05, "loss": 0.3236, "step": 150 }, { "epoch": 22.962962962962962, "eval_accuracy": 0.7753479125248509, "eval_loss": 0.493118554353714, "eval_runtime": 2.8057, "eval_samples_per_second": 179.277, "eval_steps_per_second": 5.703, "step": 155 }, { "epoch": 23.703703703703702, "grad_norm": 41.31706237792969, "learning_rate": 3.08641975308642e-05, "loss": 0.3179, "step": 160 }, { "epoch": 24.0, "eval_accuracy": 0.7654075546719682, "eval_loss": 0.4837174415588379, "eval_runtime": 2.8061, "eval_samples_per_second": 179.252, "eval_steps_per_second": 5.702, "step": 162 }, { "epoch": 24.88888888888889, "eval_accuracy": 0.7693836978131213, "eval_loss": 0.4651556611061096, "eval_runtime": 2.7801, "eval_samples_per_second": 180.927, "eval_steps_per_second": 5.755, "step": 168 }, { "epoch": 25.185185185185187, "grad_norm": 48.94294357299805, "learning_rate": 2.9320987654320992e-05, "loss": 0.327, "step": 170 }, { "epoch": 25.925925925925927, "eval_accuracy": 0.7495029821073559, "eval_loss": 0.5107513070106506, "eval_runtime": 2.746, "eval_samples_per_second": 183.173, "eval_steps_per_second": 5.827, "step": 175 }, { "epoch": 26.666666666666668, "grad_norm": 17.72180938720703, "learning_rate": 2.777777777777778e-05, "loss": 0.3253, "step": 180 }, { "epoch": 26.962962962962962, "eval_accuracy": 0.7833001988071571, "eval_loss": 0.4423621594905853, "eval_runtime": 2.851, "eval_samples_per_second": 176.432, "eval_steps_per_second": 5.612, "step": 182 }, { "epoch": 28.0, "eval_accuracy": 0.7335984095427436, "eval_loss": 0.5621975064277649, "eval_runtime": 2.8441, "eval_samples_per_second": 176.859, "eval_steps_per_second": 5.626, "step": 189 }, { "epoch": 28.14814814814815, "grad_norm": 26.241844177246094, "learning_rate": 2.623456790123457e-05, "loss": 0.3382, "step": 190 }, { "epoch": 28.88888888888889, "eval_accuracy": 0.7693836978131213, "eval_loss": 0.5067819952964783, "eval_runtime": 2.8675, "eval_samples_per_second": 175.415, "eval_steps_per_second": 5.58, "step": 195 }, { "epoch": 29.62962962962963, "grad_norm": 12.02319049835205, "learning_rate": 2.4691358024691357e-05, "loss": 0.331, "step": 200 }, { "epoch": 29.925925925925927, "eval_accuracy": 0.7693836978131213, "eval_loss": 0.45300325751304626, "eval_runtime": 2.7755, "eval_samples_per_second": 181.232, "eval_steps_per_second": 5.765, "step": 202 }, { "epoch": 30.962962962962962, "eval_accuracy": 0.731610337972167, "eval_loss": 0.5205386877059937, "eval_runtime": 2.8057, "eval_samples_per_second": 179.276, "eval_steps_per_second": 5.703, "step": 209 }, { "epoch": 31.11111111111111, "grad_norm": 14.40517807006836, "learning_rate": 2.314814814814815e-05, "loss": 0.3302, "step": 210 }, { "epoch": 32.0, "eval_accuracy": 0.7852882703777336, "eval_loss": 0.4385511875152588, "eval_runtime": 2.8045, "eval_samples_per_second": 179.357, "eval_steps_per_second": 5.705, "step": 216 }, { "epoch": 32.592592592592595, "grad_norm": 18.574872970581055, "learning_rate": 2.1604938271604937e-05, "loss": 0.2972, "step": 220 }, { "epoch": 32.888888888888886, "eval_accuracy": 0.7773359840954275, "eval_loss": 0.5030562281608582, "eval_runtime": 2.6994, "eval_samples_per_second": 186.336, "eval_steps_per_second": 5.927, "step": 222 }, { "epoch": 33.925925925925924, "eval_accuracy": 0.757455268389662, "eval_loss": 0.49088525772094727, "eval_runtime": 2.709, "eval_samples_per_second": 185.676, "eval_steps_per_second": 5.906, "step": 229 }, { "epoch": 34.074074074074076, "grad_norm": 21.681509017944336, "learning_rate": 2.006172839506173e-05, "loss": 0.3121, "step": 230 }, { "epoch": 34.96296296296296, "eval_accuracy": 0.7793240556660039, "eval_loss": 0.47658684849739075, "eval_runtime": 2.7374, "eval_samples_per_second": 183.751, "eval_steps_per_second": 5.845, "step": 236 }, { "epoch": 35.55555555555556, "grad_norm": 29.247716903686523, "learning_rate": 1.8518518518518518e-05, "loss": 0.2956, "step": 240 }, { "epoch": 36.0, "eval_accuracy": 0.7415506958250497, "eval_loss": 0.5262213945388794, "eval_runtime": 2.8701, "eval_samples_per_second": 175.256, "eval_steps_per_second": 5.575, "step": 243 }, { "epoch": 36.888888888888886, "eval_accuracy": 0.731610337972167, "eval_loss": 0.5373868942260742, "eval_runtime": 2.878, "eval_samples_per_second": 174.771, "eval_steps_per_second": 5.559, "step": 249 }, { "epoch": 37.03703703703704, "grad_norm": 13.55726146697998, "learning_rate": 1.697530864197531e-05, "loss": 0.2947, "step": 250 }, { "epoch": 37.925925925925924, "eval_accuracy": 0.7673956262425448, "eval_loss": 0.48880261182785034, "eval_runtime": 2.7134, "eval_samples_per_second": 185.373, "eval_steps_per_second": 5.897, "step": 256 }, { "epoch": 38.51851851851852, "grad_norm": 8.393943786621094, "learning_rate": 1.54320987654321e-05, "loss": 0.2662, "step": 260 }, { "epoch": 38.96296296296296, "eval_accuracy": 0.7693836978131213, "eval_loss": 0.4880698323249817, "eval_runtime": 2.7354, "eval_samples_per_second": 183.888, "eval_steps_per_second": 5.849, "step": 263 }, { "epoch": 40.0, "grad_norm": 24.057205200195312, "learning_rate": 1.388888888888889e-05, "loss": 0.2826, "step": 270 }, { "epoch": 40.0, "eval_accuracy": 0.7892644135188867, "eval_loss": 0.46687519550323486, "eval_runtime": 2.7865, "eval_samples_per_second": 180.514, "eval_steps_per_second": 5.742, "step": 270 }, { "epoch": 40.888888888888886, "eval_accuracy": 0.7972166998011928, "eval_loss": 0.45914533734321594, "eval_runtime": 2.8041, "eval_samples_per_second": 179.381, "eval_steps_per_second": 5.706, "step": 276 }, { "epoch": 41.48148148148148, "grad_norm": 20.68549346923828, "learning_rate": 1.2345679012345678e-05, "loss": 0.2768, "step": 280 }, { "epoch": 41.925925925925924, "eval_accuracy": 0.757455268389662, "eval_loss": 0.5089908838272095, "eval_runtime": 2.9736, "eval_samples_per_second": 169.157, "eval_steps_per_second": 5.381, "step": 283 }, { "epoch": 42.96296296296296, "grad_norm": 9.9234619140625, "learning_rate": 1.0802469135802469e-05, "loss": 0.2836, "step": 290 }, { "epoch": 42.96296296296296, "eval_accuracy": 0.7495029821073559, "eval_loss": 0.5249876379966736, "eval_runtime": 2.8985, "eval_samples_per_second": 173.54, "eval_steps_per_second": 5.52, "step": 290 }, { "epoch": 44.0, "eval_accuracy": 0.7654075546719682, "eval_loss": 0.4747855067253113, "eval_runtime": 2.9057, "eval_samples_per_second": 173.108, "eval_steps_per_second": 5.506, "step": 297 }, { "epoch": 44.44444444444444, "grad_norm": 19.76637840270996, "learning_rate": 9.259259259259259e-06, "loss": 0.2724, "step": 300 }, { "epoch": 44.888888888888886, "eval_accuracy": 0.7833001988071571, "eval_loss": 0.44288724660873413, "eval_runtime": 2.7865, "eval_samples_per_second": 180.51, "eval_steps_per_second": 5.742, "step": 303 }, { "epoch": 45.925925925925924, "grad_norm": 12.390064239501953, "learning_rate": 7.71604938271605e-06, "loss": 0.2498, "step": 310 }, { "epoch": 45.925925925925924, "eval_accuracy": 0.7892644135188867, "eval_loss": 0.4459961950778961, "eval_runtime": 2.8017, "eval_samples_per_second": 179.537, "eval_steps_per_second": 5.711, "step": 310 }, { "epoch": 46.96296296296296, "eval_accuracy": 0.7793240556660039, "eval_loss": 0.4721997082233429, "eval_runtime": 2.8302, "eval_samples_per_second": 177.729, "eval_steps_per_second": 5.653, "step": 317 }, { "epoch": 47.407407407407405, "grad_norm": 11.559773445129395, "learning_rate": 6.172839506172839e-06, "loss": 0.2893, "step": 320 }, { "epoch": 48.0, "eval_accuracy": 0.7713717693836978, "eval_loss": 0.47993555665016174, "eval_runtime": 2.9229, "eval_samples_per_second": 172.087, "eval_steps_per_second": 5.474, "step": 324 }, { "epoch": 48.888888888888886, "grad_norm": 11.171250343322754, "learning_rate": 4.6296296296296296e-06, "loss": 0.2618, "step": 330 }, { "epoch": 48.888888888888886, "eval_accuracy": 0.7713717693836978, "eval_loss": 0.4849596321582794, "eval_runtime": 2.8546, "eval_samples_per_second": 176.209, "eval_steps_per_second": 5.605, "step": 330 }, { "epoch": 49.925925925925924, "eval_accuracy": 0.7495029821073559, "eval_loss": 0.5151545405387878, "eval_runtime": 2.8154, "eval_samples_per_second": 178.658, "eval_steps_per_second": 5.683, "step": 337 }, { "epoch": 50.370370370370374, "grad_norm": 16.287992477416992, "learning_rate": 3.0864197530864196e-06, "loss": 0.2664, "step": 340 }, { "epoch": 50.96296296296296, "eval_accuracy": 0.7395626242544732, "eval_loss": 0.5347036123275757, "eval_runtime": 2.8326, "eval_samples_per_second": 177.574, "eval_steps_per_second": 5.648, "step": 344 }, { "epoch": 51.851851851851855, "grad_norm": 15.289400100708008, "learning_rate": 1.5432098765432098e-06, "loss": 0.27, "step": 350 }, { "epoch": 52.0, "eval_accuracy": 0.7415506958250497, "eval_loss": 0.5342876315116882, "eval_runtime": 2.8365, "eval_samples_per_second": 177.329, "eval_steps_per_second": 5.641, "step": 351 }, { "epoch": 52.888888888888886, "eval_accuracy": 0.7415506958250497, "eval_loss": 0.5330411195755005, "eval_runtime": 2.823, "eval_samples_per_second": 178.181, "eval_steps_per_second": 5.668, "step": 357 }, { "epoch": 53.333333333333336, "grad_norm": 15.15584945678711, "learning_rate": 0.0, "loss": 0.2539, "step": 360 }, { "epoch": 53.333333333333336, "eval_accuracy": 0.7395626242544732, "eval_loss": 0.5319550037384033, "eval_runtime": 2.8191, "eval_samples_per_second": 178.427, "eval_steps_per_second": 5.676, "step": 360 }, { "epoch": 53.333333333333336, "step": 360, "total_flos": 1.140094502803243e+18, "train_loss": 0.34168325927522447, "train_runtime": 709.0525, "train_samples_per_second": 72.773, "train_steps_per_second": 0.508 } ], "logging_steps": 10, "max_steps": 360, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.140094502803243e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }