{ "best_metric": 0.9116022099447514, "best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-51", "epoch": 18.823529411764707, "eval_steps": 500, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7843137254901961, "grad_norm": 5.9126081466674805, "learning_rate": 2.0833333333333336e-05, "loss": 1.3069, "step": 10 }, { "epoch": 0.9411764705882353, "eval_accuracy": 0.6077348066298343, "eval_loss": 0.9999544024467468, "eval_runtime": 1.4322, "eval_samples_per_second": 126.379, "eval_steps_per_second": 4.189, "step": 12 }, { "epoch": 1.5686274509803921, "grad_norm": 7.789252758026123, "learning_rate": 4.166666666666667e-05, "loss": 0.8924, "step": 20 }, { "epoch": 1.9607843137254903, "eval_accuracy": 0.8784530386740331, "eval_loss": 0.43337252736091614, "eval_runtime": 1.4151, "eval_samples_per_second": 127.902, "eval_steps_per_second": 4.24, "step": 25 }, { "epoch": 2.3529411764705883, "grad_norm": 6.7294697761535645, "learning_rate": 4.8611111111111115e-05, "loss": 0.5365, "step": 30 }, { "epoch": 2.980392156862745, "eval_accuracy": 0.9005524861878453, "eval_loss": 0.3143160939216614, "eval_runtime": 1.4092, "eval_samples_per_second": 128.446, "eval_steps_per_second": 4.258, "step": 38 }, { "epoch": 3.1372549019607843, "grad_norm": 8.53775691986084, "learning_rate": 4.62962962962963e-05, "loss": 0.4119, "step": 40 }, { "epoch": 3.9215686274509802, "grad_norm": 8.774258613586426, "learning_rate": 4.3981481481481486e-05, "loss": 0.3814, "step": 50 }, { "epoch": 4.0, "eval_accuracy": 0.9116022099447514, "eval_loss": 0.28710222244262695, "eval_runtime": 1.4257, "eval_samples_per_second": 126.96, "eval_steps_per_second": 4.209, "step": 51 }, { "epoch": 4.705882352941177, "grad_norm": 5.285433769226074, "learning_rate": 4.166666666666667e-05, "loss": 0.3336, "step": 60 }, { "epoch": 4.9411764705882355, "eval_accuracy": 0.9116022099447514, "eval_loss": 0.2963091731071472, "eval_runtime": 1.415, "eval_samples_per_second": 127.916, "eval_steps_per_second": 4.24, "step": 63 }, { "epoch": 5.490196078431373, "grad_norm": 7.04965353012085, "learning_rate": 3.935185185185186e-05, "loss": 0.353, "step": 70 }, { "epoch": 5.96078431372549, "eval_accuracy": 0.8729281767955801, "eval_loss": 0.31954672932624817, "eval_runtime": 1.4556, "eval_samples_per_second": 124.344, "eval_steps_per_second": 4.122, "step": 76 }, { "epoch": 6.2745098039215685, "grad_norm": 5.833162307739258, "learning_rate": 3.7037037037037037e-05, "loss": 0.3069, "step": 80 }, { "epoch": 6.980392156862745, "eval_accuracy": 0.9116022099447514, "eval_loss": 0.29521241784095764, "eval_runtime": 1.4158, "eval_samples_per_second": 127.846, "eval_steps_per_second": 4.238, "step": 89 }, { "epoch": 7.0588235294117645, "grad_norm": 5.050061225891113, "learning_rate": 3.472222222222222e-05, "loss": 0.2789, "step": 90 }, { "epoch": 7.8431372549019605, "grad_norm": 4.222379207611084, "learning_rate": 3.240740740740741e-05, "loss": 0.293, "step": 100 }, { "epoch": 8.0, "eval_accuracy": 0.8895027624309392, "eval_loss": 0.3174145817756653, "eval_runtime": 1.4186, "eval_samples_per_second": 127.591, "eval_steps_per_second": 4.23, "step": 102 }, { "epoch": 8.627450980392156, "grad_norm": 7.039156436920166, "learning_rate": 3.0092592592592593e-05, "loss": 0.2667, "step": 110 }, { "epoch": 8.941176470588236, "eval_accuracy": 0.8950276243093923, "eval_loss": 0.3225868344306946, "eval_runtime": 1.4137, "eval_samples_per_second": 128.03, "eval_steps_per_second": 4.244, "step": 114 }, { "epoch": 9.411764705882353, "grad_norm": 5.598822593688965, "learning_rate": 2.777777777777778e-05, "loss": 0.2424, "step": 120 }, { "epoch": 9.96078431372549, "eval_accuracy": 0.8895027624309392, "eval_loss": 0.3213161826133728, "eval_runtime": 1.4313, "eval_samples_per_second": 126.455, "eval_steps_per_second": 4.192, "step": 127 }, { "epoch": 10.196078431372548, "grad_norm": 4.785697937011719, "learning_rate": 2.5462962962962965e-05, "loss": 0.2544, "step": 130 }, { "epoch": 10.980392156862745, "grad_norm": 5.349719047546387, "learning_rate": 2.314814814814815e-05, "loss": 0.2605, "step": 140 }, { "epoch": 10.980392156862745, "eval_accuracy": 0.8895027624309392, "eval_loss": 0.31716108322143555, "eval_runtime": 1.4269, "eval_samples_per_second": 126.846, "eval_steps_per_second": 4.205, "step": 140 }, { "epoch": 11.764705882352942, "grad_norm": 6.121713161468506, "learning_rate": 2.0833333333333336e-05, "loss": 0.232, "step": 150 }, { "epoch": 12.0, "eval_accuracy": 0.8895027624309392, "eval_loss": 0.33846884965896606, "eval_runtime": 1.406, "eval_samples_per_second": 128.737, "eval_steps_per_second": 4.268, "step": 153 }, { "epoch": 12.549019607843137, "grad_norm": 7.647618770599365, "learning_rate": 1.8518518518518518e-05, "loss": 0.242, "step": 160 }, { "epoch": 12.941176470588236, "eval_accuracy": 0.8950276243093923, "eval_loss": 0.32744264602661133, "eval_runtime": 1.4273, "eval_samples_per_second": 126.813, "eval_steps_per_second": 4.204, "step": 165 }, { "epoch": 13.333333333333334, "grad_norm": 6.248785972595215, "learning_rate": 1.6203703703703704e-05, "loss": 0.215, "step": 170 }, { "epoch": 13.96078431372549, "eval_accuracy": 0.8950276243093923, "eval_loss": 0.33850720524787903, "eval_runtime": 1.4433, "eval_samples_per_second": 125.407, "eval_steps_per_second": 4.157, "step": 178 }, { "epoch": 14.117647058823529, "grad_norm": 5.345800876617432, "learning_rate": 1.388888888888889e-05, "loss": 0.2123, "step": 180 }, { "epoch": 14.901960784313726, "grad_norm": 5.421293258666992, "learning_rate": 1.1574074074074075e-05, "loss": 0.2131, "step": 190 }, { "epoch": 14.980392156862745, "eval_accuracy": 0.8950276243093923, "eval_loss": 0.34223416447639465, "eval_runtime": 1.409, "eval_samples_per_second": 128.462, "eval_steps_per_second": 4.258, "step": 191 }, { "epoch": 15.686274509803921, "grad_norm": 4.188720703125, "learning_rate": 9.259259259259259e-06, "loss": 0.201, "step": 200 }, { "epoch": 16.0, "eval_accuracy": 0.8784530386740331, "eval_loss": 0.341948539018631, "eval_runtime": 1.4191, "eval_samples_per_second": 127.549, "eval_steps_per_second": 4.228, "step": 204 }, { "epoch": 16.470588235294116, "grad_norm": 4.887516498565674, "learning_rate": 6.944444444444445e-06, "loss": 0.1976, "step": 210 }, { "epoch": 16.941176470588236, "eval_accuracy": 0.9005524861878453, "eval_loss": 0.3447644114494324, "eval_runtime": 1.4043, "eval_samples_per_second": 128.89, "eval_steps_per_second": 4.273, "step": 216 }, { "epoch": 17.254901960784313, "grad_norm": 5.946260452270508, "learning_rate": 4.6296296296296296e-06, "loss": 0.1886, "step": 220 }, { "epoch": 17.96078431372549, "eval_accuracy": 0.8895027624309392, "eval_loss": 0.3459985554218292, "eval_runtime": 1.4593, "eval_samples_per_second": 124.035, "eval_steps_per_second": 4.112, "step": 229 }, { "epoch": 18.03921568627451, "grad_norm": 6.021714210510254, "learning_rate": 2.3148148148148148e-06, "loss": 0.2, "step": 230 }, { "epoch": 18.823529411764707, "grad_norm": 3.8110241889953613, "learning_rate": 0.0, "loss": 0.1972, "step": 240 }, { "epoch": 18.823529411764707, "eval_accuracy": 0.8895027624309392, "eval_loss": 0.34530630707740784, "eval_runtime": 1.454, "eval_samples_per_second": 124.486, "eval_steps_per_second": 4.127, "step": 240 }, { "epoch": 18.823529411764707, "step": 240, "total_flos": 7.600391915087462e+17, "train_loss": 0.3423821290334066, "train_runtime": 426.6472, "train_samples_per_second": 76.128, "train_steps_per_second": 0.563 } ], "logging_steps": 10, "max_steps": 240, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "CustomEarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.600391915087462e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }