{ "best_metric": 0.4536682665348053, "best_model_checkpoint": "SavedModels/Cal_ViT-large-patch16-224_A.ipynb/checkpoint-504", "epoch": 30.0, "eval_steps": 500, "global_step": 3780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7936507936507936, "grad_norm": 7.900213241577148, "learning_rate": 0.000292063492063492, "loss": 0.9853, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.744, "eval_f1": 0.7302269434370317, "eval_loss": 0.7107657194137573, "eval_precision": 0.7712189868380369, "eval_recall": 0.7286540520411489, "eval_runtime": 12.0784, "eval_samples_per_second": 20.698, "eval_steps_per_second": 2.649, "step": 126 }, { "epoch": 1.5873015873015874, "grad_norm": 8.41816234588623, "learning_rate": 0.0002841269841269841, "loss": 0.6452, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.82, "eval_f1": 0.8043829742202345, "eval_loss": 0.4917103350162506, "eval_precision": 0.8345023770555686, "eval_recall": 0.8097303681981102, "eval_runtime": 11.4769, "eval_samples_per_second": 21.783, "eval_steps_per_second": 2.788, "step": 252 }, { "epoch": 2.380952380952381, "grad_norm": 16.99386215209961, "learning_rate": 0.00027619047619047615, "loss": 0.3935, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.816, "eval_f1": 0.7985758096573877, "eval_loss": 0.4719848334789276, "eval_precision": 0.8189957264957265, "eval_recall": 0.8055637015314435, "eval_runtime": 11.5276, "eval_samples_per_second": 21.687, "eval_steps_per_second": 2.776, "step": 378 }, { "epoch": 3.1746031746031744, "grad_norm": 15.368912696838379, "learning_rate": 0.00026825396825396825, "loss": 0.2416, "step": 400 }, { "epoch": 3.9682539682539684, "grad_norm": 19.9014949798584, "learning_rate": 0.0002603174603174603, "loss": 0.1334, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.844, "eval_f1": 0.8364854215918044, "eval_loss": 0.4536682665348053, "eval_precision": 0.8404496818970504, "eval_recall": 0.8392659312014151, "eval_runtime": 11.5071, "eval_samples_per_second": 21.726, "eval_steps_per_second": 2.781, "step": 504 }, { "epoch": 4.761904761904762, "grad_norm": 20.753414154052734, "learning_rate": 0.0002523809523809524, "loss": 0.0575, "step": 600 }, { "epoch": 5.0, "eval_accuracy": 0.836, "eval_f1": 0.8300810566073022, "eval_loss": 0.520072877407074, "eval_precision": 0.8326529759666744, "eval_recall": 0.8296722990271377, "eval_runtime": 11.5252, "eval_samples_per_second": 21.692, "eval_steps_per_second": 2.777, "step": 630 }, { "epoch": 5.555555555555555, "grad_norm": 16.8087100982666, "learning_rate": 0.00024444444444444443, "loss": 0.1109, "step": 700 }, { "epoch": 6.0, "eval_accuracy": 0.832, "eval_f1": 0.8266870743093288, "eval_loss": 0.6697499752044678, "eval_precision": 0.8294946162169833, "eval_recall": 0.8305852301820044, "eval_runtime": 11.4108, "eval_samples_per_second": 21.909, "eval_steps_per_second": 2.804, "step": 756 }, { "epoch": 6.349206349206349, "grad_norm": 14.420797348022461, "learning_rate": 0.0002365079365079365, "loss": 0.0983, "step": 800 }, { "epoch": 7.0, "eval_accuracy": 0.84, "eval_f1": 0.837053931393554, "eval_loss": 0.5976704359054565, "eval_precision": 0.8379383751169328, "eval_recall": 0.8369804031094353, "eval_runtime": 11.3574, "eval_samples_per_second": 22.012, "eval_steps_per_second": 2.818, "step": 882 }, { "epoch": 7.142857142857143, "grad_norm": 18.421493530273438, "learning_rate": 0.00022857142857142854, "loss": 0.0694, "step": 900 }, { "epoch": 7.936507936507937, "grad_norm": 16.508926391601562, "learning_rate": 0.0002206349206349206, "loss": 0.0739, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.824, "eval_f1": 0.8220051553384886, "eval_loss": 0.5506980419158936, "eval_precision": 0.8243932700223792, "eval_recall": 0.8214786109947401, "eval_runtime": 11.3601, "eval_samples_per_second": 22.007, "eval_steps_per_second": 2.817, "step": 1008 }, { "epoch": 8.73015873015873, "grad_norm": 16.224796295166016, "learning_rate": 0.00021269841269841268, "loss": 0.0395, "step": 1100 }, { "epoch": 9.0, "eval_accuracy": 0.82, "eval_f1": 0.8177316510649844, "eval_loss": 0.6578373312950134, "eval_precision": 0.8200073051100985, "eval_recall": 0.8173119443280733, "eval_runtime": 11.3675, "eval_samples_per_second": 21.992, "eval_steps_per_second": 2.815, "step": 1134 }, { "epoch": 9.523809523809524, "grad_norm": 17.235092163085938, "learning_rate": 0.00020476190476190475, "loss": 0.0416, "step": 1200 }, { "epoch": 10.0, "eval_accuracy": 0.82, "eval_f1": 0.8114134932173007, "eval_loss": 0.7492188811302185, "eval_precision": 0.8144965098919531, "eval_recall": 0.8123562817111204, "eval_runtime": 11.3856, "eval_samples_per_second": 21.958, "eval_steps_per_second": 2.811, "step": 1260 }, { "epoch": 10.317460317460318, "grad_norm": 14.80374526977539, "learning_rate": 0.0001968253968253968, "loss": 0.0332, "step": 1300 }, { "epoch": 11.0, "eval_accuracy": 0.828, "eval_f1": 0.8224526625354294, "eval_loss": 0.8895044326782227, "eval_precision": 0.8306511283758787, "eval_recall": 0.8204987664665083, "eval_runtime": 11.3762, "eval_samples_per_second": 21.976, "eval_steps_per_second": 2.813, "step": 1386 }, { "epoch": 11.11111111111111, "grad_norm": 19.538997650146484, "learning_rate": 0.00018888888888888888, "loss": 0.0184, "step": 1400 }, { "epoch": 11.904761904761905, "grad_norm": 18.353506088256836, "learning_rate": 0.00018095238095238093, "loss": 0.0324, "step": 1500 }, { "epoch": 12.0, "eval_accuracy": 0.812, "eval_f1": 0.8088087500673765, "eval_loss": 0.762988269329071, "eval_precision": 0.809127072437966, "eval_recall": 0.8106590094493319, "eval_runtime": 11.385, "eval_samples_per_second": 21.959, "eval_steps_per_second": 2.811, "step": 1512 }, { "epoch": 12.698412698412698, "grad_norm": 18.43031883239746, "learning_rate": 0.000173015873015873, "loss": 0.0335, "step": 1600 }, { "epoch": 13.0, "eval_accuracy": 0.836, "eval_f1": 0.8294732588375853, "eval_loss": 0.5692726969718933, "eval_precision": 0.8316624761497172, "eval_recall": 0.831677372806405, "eval_runtime": 11.4395, "eval_samples_per_second": 21.854, "eval_steps_per_second": 2.797, "step": 1638 }, { "epoch": 13.492063492063492, "grad_norm": 17.18678092956543, "learning_rate": 0.00016507936507936506, "loss": 0.0245, "step": 1700 }, { "epoch": 14.0, "eval_accuracy": 0.86, "eval_f1": 0.8567973521009923, "eval_loss": 0.6419259309768677, "eval_precision": 0.8568399743801923, "eval_recall": 0.8570689615044453, "eval_runtime": 11.436, "eval_samples_per_second": 21.861, "eval_steps_per_second": 2.798, "step": 1764 }, { "epoch": 14.285714285714286, "grad_norm": 15.67866325378418, "learning_rate": 0.00015714285714285713, "loss": 0.0189, "step": 1800 }, { "epoch": 15.0, "eval_accuracy": 0.86, "eval_f1": 0.8584421340648372, "eval_loss": 0.7554742693901062, "eval_precision": 0.8589289889820719, "eval_recall": 0.858138411767444, "eval_runtime": 11.4276, "eval_samples_per_second": 21.877, "eval_steps_per_second": 2.8, "step": 1890 }, { "epoch": 15.079365079365079, "grad_norm": 20.463829040527344, "learning_rate": 0.00014920634920634917, "loss": 0.0099, "step": 1900 }, { "epoch": 15.873015873015873, "grad_norm": 19.685022354125977, "learning_rate": 0.00014126984126984124, "loss": 0.0109, "step": 2000 }, { "epoch": 16.0, "eval_accuracy": 0.848, "eval_f1": 0.8440413675707793, "eval_loss": 0.7789789438247681, "eval_precision": 0.84466028545071, "eval_recall": 0.8448267234557557, "eval_runtime": 11.3821, "eval_samples_per_second": 21.964, "eval_steps_per_second": 2.811, "step": 2016 }, { "epoch": 16.666666666666668, "grad_norm": 19.673765182495117, "learning_rate": 0.0001333333333333333, "loss": 0.0083, "step": 2100 }, { "epoch": 17.0, "eval_accuracy": 0.848, "eval_f1": 0.8440413675707793, "eval_loss": 0.7907350063323975, "eval_precision": 0.84466028545071, "eval_recall": 0.8448267234557557, "eval_runtime": 11.4578, "eval_samples_per_second": 21.819, "eval_steps_per_second": 2.793, "step": 2142 }, { "epoch": 17.46031746031746, "grad_norm": 17.990713119506836, "learning_rate": 0.00012539682539682538, "loss": 0.008, "step": 2200 }, { "epoch": 18.0, "eval_accuracy": 0.856, "eval_f1": 0.8525987278543515, "eval_loss": 0.8481489419937134, "eval_precision": 0.8536676046910646, "eval_recall": 0.8527399571754412, "eval_runtime": 11.4478, "eval_samples_per_second": 21.838, "eval_steps_per_second": 2.795, "step": 2268 }, { "epoch": 18.253968253968253, "grad_norm": 20.667980194091797, "learning_rate": 0.00011746031746031744, "loss": 0.0121, "step": 2300 }, { "epoch": 19.0, "eval_accuracy": 0.852, "eval_f1": 0.8488564789790646, "eval_loss": 0.8253383040428162, "eval_precision": 0.8494105075271848, "eval_recall": 0.8491557277847601, "eval_runtime": 11.4707, "eval_samples_per_second": 21.795, "eval_steps_per_second": 2.79, "step": 2394 }, { "epoch": 19.047619047619047, "grad_norm": 19.05530548095703, "learning_rate": 0.0001095238095238095, "loss": 0.0095, "step": 2400 }, { "epoch": 19.841269841269842, "grad_norm": 19.204843521118164, "learning_rate": 0.00010158730158730157, "loss": 0.0097, "step": 2500 }, { "epoch": 20.0, "eval_accuracy": 0.856, "eval_f1": 0.8531258452442141, "eval_loss": 0.829548180103302, "eval_precision": 0.8533592253010878, "eval_recall": 0.8534847321137643, "eval_runtime": 11.4107, "eval_samples_per_second": 21.909, "eval_steps_per_second": 2.804, "step": 2520 }, { "epoch": 20.634920634920636, "grad_norm": 20.698205947875977, "learning_rate": 9.365079365079364e-05, "loss": 0.0085, "step": 2600 }, { "epoch": 21.0, "eval_accuracy": 0.852, "eval_f1": 0.8488564789790646, "eval_loss": 0.8458440899848938, "eval_precision": 0.8494105075271848, "eval_recall": 0.8491557277847601, "eval_runtime": 11.4413, "eval_samples_per_second": 21.851, "eval_steps_per_second": 2.797, "step": 2646 }, { "epoch": 21.428571428571427, "grad_norm": 19.178499221801758, "learning_rate": 8.57142857142857e-05, "loss": 0.0085, "step": 2700 }, { "epoch": 22.0, "eval_accuracy": 0.856, "eval_f1": 0.8531258452442141, "eval_loss": 0.8456778526306152, "eval_precision": 0.8533592253010878, "eval_recall": 0.8534847321137643, "eval_runtime": 11.4396, "eval_samples_per_second": 21.854, "eval_steps_per_second": 2.797, "step": 2772 }, { "epoch": 22.22222222222222, "grad_norm": 19.464889526367188, "learning_rate": 7.777777777777777e-05, "loss": 0.0091, "step": 2800 }, { "epoch": 23.0, "eval_accuracy": 0.852, "eval_f1": 0.8488564789790646, "eval_loss": 0.8525780439376831, "eval_precision": 0.8494105075271848, "eval_recall": 0.8491557277847601, "eval_runtime": 11.4614, "eval_samples_per_second": 21.812, "eval_steps_per_second": 2.792, "step": 2898 }, { "epoch": 23.015873015873016, "grad_norm": 18.641748428344727, "learning_rate": 6.984126984126984e-05, "loss": 0.0088, "step": 2900 }, { "epoch": 23.80952380952381, "grad_norm": 16.954683303833008, "learning_rate": 6.190476190476189e-05, "loss": 0.0087, "step": 3000 }, { "epoch": 24.0, "eval_accuracy": 0.852, "eval_f1": 0.849117063374952, "eval_loss": 0.8523342609405518, "eval_precision": 0.8492288344390057, "eval_recall": 0.8493180654470978, "eval_runtime": 11.4777, "eval_samples_per_second": 21.781, "eval_steps_per_second": 2.788, "step": 3024 }, { "epoch": 24.603174603174605, "grad_norm": 17.625598907470703, "learning_rate": 5.396825396825396e-05, "loss": 0.0232, "step": 3100 }, { "epoch": 25.0, "eval_accuracy": 0.856, "eval_f1": 0.8503092627516886, "eval_loss": 0.8092365860939026, "eval_precision": 0.8546918325243449, "eval_recall": 0.8510880696364568, "eval_runtime": 11.4882, "eval_samples_per_second": 21.761, "eval_steps_per_second": 2.785, "step": 3150 }, { "epoch": 25.396825396825395, "grad_norm": 17.685829162597656, "learning_rate": 4.603174603174602e-05, "loss": 0.0162, "step": 3200 }, { "epoch": 26.0, "eval_accuracy": 0.844, "eval_f1": 0.8397000162501418, "eval_loss": 0.7603457570075989, "eval_precision": 0.8405141515263873, "eval_recall": 0.8410801564027371, "eval_runtime": 11.4597, "eval_samples_per_second": 21.816, "eval_steps_per_second": 2.792, "step": 3276 }, { "epoch": 26.19047619047619, "grad_norm": 18.523897171020508, "learning_rate": 3.809523809523809e-05, "loss": 0.0112, "step": 3300 }, { "epoch": 26.984126984126984, "grad_norm": 20.7585506439209, "learning_rate": 3.0158730158730154e-05, "loss": 0.0112, "step": 3400 }, { "epoch": 27.0, "eval_accuracy": 0.848, "eval_f1": 0.8432066944548815, "eval_loss": 0.7882433533668518, "eval_precision": 0.8447052075757308, "eval_recall": 0.8440819485174323, "eval_runtime": 11.4572, "eval_samples_per_second": 21.82, "eval_steps_per_second": 2.793, "step": 3402 }, { "epoch": 27.77777777777778, "grad_norm": 19.374753952026367, "learning_rate": 2.222222222222222e-05, "loss": 0.0073, "step": 3500 }, { "epoch": 28.0, "eval_accuracy": 0.844, "eval_f1": 0.8392035860120967, "eval_loss": 0.8009940981864929, "eval_precision": 0.8402476780185758, "eval_recall": 0.8399152818507657, "eval_runtime": 11.4891, "eval_samples_per_second": 21.76, "eval_steps_per_second": 2.785, "step": 3528 }, { "epoch": 28.571428571428573, "grad_norm": 18.367250442504883, "learning_rate": 1.4285714285714284e-05, "loss": 0.0103, "step": 3600 }, { "epoch": 29.0, "eval_accuracy": 0.848, "eval_f1": 0.8445914212496674, "eval_loss": 0.7996464967727661, "eval_precision": 0.8447769509983977, "eval_recall": 0.8449890611180934, "eval_runtime": 11.5367, "eval_samples_per_second": 21.67, "eval_steps_per_second": 2.774, "step": 3654 }, { "epoch": 29.365079365079364, "grad_norm": 19.641939163208008, "learning_rate": 6.349206349206348e-06, "loss": 0.0086, "step": 3700 }, { "epoch": 30.0, "eval_accuracy": 0.848, "eval_f1": 0.8445914212496674, "eval_loss": 0.8016173839569092, "eval_precision": 0.8447769509983977, "eval_recall": 0.8449890611180934, "eval_runtime": 11.4935, "eval_samples_per_second": 21.752, "eval_steps_per_second": 2.784, "step": 3780 } ], "logging_steps": 100, "max_steps": 3780, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6444324389874913e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }