|
{ |
|
"best_metric": 0.9116022099447514, |
|
"best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-51", |
|
"epoch": 18.823529411764707, |
|
"eval_steps": 500, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 5.9126081466674805, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.3069, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"eval_accuracy": 0.6077348066298343, |
|
"eval_loss": 0.9999544024467468, |
|
"eval_runtime": 1.4322, |
|
"eval_samples_per_second": 126.379, |
|
"eval_steps_per_second": 4.189, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.5686274509803921, |
|
"grad_norm": 7.789252758026123, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.8924, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.9607843137254903, |
|
"eval_accuracy": 0.8784530386740331, |
|
"eval_loss": 0.43337252736091614, |
|
"eval_runtime": 1.4151, |
|
"eval_samples_per_second": 127.902, |
|
"eval_steps_per_second": 4.24, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 6.7294697761535645, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.5365, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.980392156862745, |
|
"eval_accuracy": 0.9005524861878453, |
|
"eval_loss": 0.3143160939216614, |
|
"eval_runtime": 1.4092, |
|
"eval_samples_per_second": 128.446, |
|
"eval_steps_per_second": 4.258, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 3.1372549019607843, |
|
"grad_norm": 8.53775691986084, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4119, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.9215686274509802, |
|
"grad_norm": 8.774258613586426, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.3814, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9116022099447514, |
|
"eval_loss": 0.28710222244262695, |
|
"eval_runtime": 1.4257, |
|
"eval_samples_per_second": 126.96, |
|
"eval_steps_per_second": 4.209, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 5.285433769226074, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.3336, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.9411764705882355, |
|
"eval_accuracy": 0.9116022099447514, |
|
"eval_loss": 0.2963091731071472, |
|
"eval_runtime": 1.415, |
|
"eval_samples_per_second": 127.916, |
|
"eval_steps_per_second": 4.24, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 5.490196078431373, |
|
"grad_norm": 7.04965353012085, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.353, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.96078431372549, |
|
"eval_accuracy": 0.8729281767955801, |
|
"eval_loss": 0.31954672932624817, |
|
"eval_runtime": 1.4556, |
|
"eval_samples_per_second": 124.344, |
|
"eval_steps_per_second": 4.122, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 6.2745098039215685, |
|
"grad_norm": 5.833162307739258, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.3069, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.980392156862745, |
|
"eval_accuracy": 0.9116022099447514, |
|
"eval_loss": 0.29521241784095764, |
|
"eval_runtime": 1.4158, |
|
"eval_samples_per_second": 127.846, |
|
"eval_steps_per_second": 4.238, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 5.050061225891113, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.2789, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.8431372549019605, |
|
"grad_norm": 4.222379207611084, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.293, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8895027624309392, |
|
"eval_loss": 0.3174145817756653, |
|
"eval_runtime": 1.4186, |
|
"eval_samples_per_second": 127.591, |
|
"eval_steps_per_second": 4.23, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 8.627450980392156, |
|
"grad_norm": 7.039156436920166, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.2667, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.941176470588236, |
|
"eval_accuracy": 0.8950276243093923, |
|
"eval_loss": 0.3225868344306946, |
|
"eval_runtime": 1.4137, |
|
"eval_samples_per_second": 128.03, |
|
"eval_steps_per_second": 4.244, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 5.598822593688965, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.2424, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 9.96078431372549, |
|
"eval_accuracy": 0.8895027624309392, |
|
"eval_loss": 0.3213161826133728, |
|
"eval_runtime": 1.4313, |
|
"eval_samples_per_second": 126.455, |
|
"eval_steps_per_second": 4.192, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 10.196078431372548, |
|
"grad_norm": 4.785697937011719, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.2544, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 10.980392156862745, |
|
"grad_norm": 5.349719047546387, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.2605, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.980392156862745, |
|
"eval_accuracy": 0.8895027624309392, |
|
"eval_loss": 0.31716108322143555, |
|
"eval_runtime": 1.4269, |
|
"eval_samples_per_second": 126.846, |
|
"eval_steps_per_second": 4.205, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 11.764705882352942, |
|
"grad_norm": 6.121713161468506, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.232, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8895027624309392, |
|
"eval_loss": 0.33846884965896606, |
|
"eval_runtime": 1.406, |
|
"eval_samples_per_second": 128.737, |
|
"eval_steps_per_second": 4.268, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 12.549019607843137, |
|
"grad_norm": 7.647618770599365, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.242, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 12.941176470588236, |
|
"eval_accuracy": 0.8950276243093923, |
|
"eval_loss": 0.32744264602661133, |
|
"eval_runtime": 1.4273, |
|
"eval_samples_per_second": 126.813, |
|
"eval_steps_per_second": 4.204, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 6.248785972595215, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.215, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 13.96078431372549, |
|
"eval_accuracy": 0.8950276243093923, |
|
"eval_loss": 0.33850720524787903, |
|
"eval_runtime": 1.4433, |
|
"eval_samples_per_second": 125.407, |
|
"eval_steps_per_second": 4.157, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 14.117647058823529, |
|
"grad_norm": 5.345800876617432, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2123, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 14.901960784313726, |
|
"grad_norm": 5.421293258666992, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.2131, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 14.980392156862745, |
|
"eval_accuracy": 0.8950276243093923, |
|
"eval_loss": 0.34223416447639465, |
|
"eval_runtime": 1.409, |
|
"eval_samples_per_second": 128.462, |
|
"eval_steps_per_second": 4.258, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 15.686274509803921, |
|
"grad_norm": 4.188720703125, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.201, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8784530386740331, |
|
"eval_loss": 0.341948539018631, |
|
"eval_runtime": 1.4191, |
|
"eval_samples_per_second": 127.549, |
|
"eval_steps_per_second": 4.228, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 16.470588235294116, |
|
"grad_norm": 4.887516498565674, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1976, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 16.941176470588236, |
|
"eval_accuracy": 0.9005524861878453, |
|
"eval_loss": 0.3447644114494324, |
|
"eval_runtime": 1.4043, |
|
"eval_samples_per_second": 128.89, |
|
"eval_steps_per_second": 4.273, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 17.254901960784313, |
|
"grad_norm": 5.946260452270508, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.1886, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 17.96078431372549, |
|
"eval_accuracy": 0.8895027624309392, |
|
"eval_loss": 0.3459985554218292, |
|
"eval_runtime": 1.4593, |
|
"eval_samples_per_second": 124.035, |
|
"eval_steps_per_second": 4.112, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 18.03921568627451, |
|
"grad_norm": 6.021714210510254, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.2, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"grad_norm": 3.8110241889953613, |
|
"learning_rate": 0.0, |
|
"loss": 0.1972, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"eval_accuracy": 0.8895027624309392, |
|
"eval_loss": 0.34530630707740784, |
|
"eval_runtime": 1.454, |
|
"eval_samples_per_second": 124.486, |
|
"eval_steps_per_second": 4.127, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"step": 240, |
|
"total_flos": 7.600391915087462e+17, |
|
"train_loss": 0.3423821290334066, |
|
"train_runtime": 426.6472, |
|
"train_samples_per_second": 76.128, |
|
"train_steps_per_second": 0.563 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"CustomEarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.600391915087462e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|