{ "best_metric": 0.6901677250862122, "best_model_checkpoint": "SavedModels/ViT-large-patch16-224_B/checkpoint-1128", "epoch": 20.0, "eval_steps": 500, "global_step": 7520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26595744680851063, "grad_norm": 16.918853759765625, "learning_rate": 0.0002920212765957447, "loss": 1.0273, "step": 100 }, { "epoch": 0.5319148936170213, "grad_norm": 8.12959098815918, "learning_rate": 0.00028404255319148934, "loss": 0.8052, "step": 200 }, { "epoch": 0.7978723404255319, "grad_norm": 4.803711891174316, "learning_rate": 0.00027606382978723404, "loss": 0.7818, "step": 300 }, { "epoch": 1.0, "eval_accuracy": 0.6693333333333333, "eval_f1": 0.6679277175440698, "eval_loss": 0.7451461553573608, "eval_precision": 0.6665516663433287, "eval_recall": 0.6708336668000534, "eval_runtime": 34.5448, "eval_samples_per_second": 21.711, "eval_steps_per_second": 2.721, "step": 376 }, { "epoch": 1.0638297872340425, "grad_norm": 5.264636516571045, "learning_rate": 0.0002680851063829787, "loss": 0.7518, "step": 400 }, { "epoch": 1.3297872340425532, "grad_norm": 6.721948146820068, "learning_rate": 0.0002601063829787234, "loss": 0.7153, "step": 500 }, { "epoch": 1.5957446808510638, "grad_norm": 5.855480670928955, "learning_rate": 0.00025212765957446806, "loss": 0.6854, "step": 600 }, { "epoch": 1.8617021276595744, "grad_norm": 5.206683158874512, "learning_rate": 0.0002441489361702127, "loss": 0.6975, "step": 700 }, { "epoch": 2.0, "eval_accuracy": 0.652, "eval_f1": 0.6527448850868924, "eval_loss": 0.737027108669281, "eval_precision": 0.6598586784457597, "eval_recall": 0.6519173002534348, "eval_runtime": 34.5417, "eval_samples_per_second": 21.713, "eval_steps_per_second": 2.721, "step": 752 }, { "epoch": 2.127659574468085, "grad_norm": 6.592344760894775, "learning_rate": 0.00023617021276595742, "loss": 0.6174, "step": 800 }, { "epoch": 2.393617021276596, "grad_norm": 4.732657432556152, "learning_rate": 0.0002281914893617021, "loss": 0.644, "step": 900 }, { "epoch": 2.6595744680851063, "grad_norm": 5.618279933929443, "learning_rate": 0.00022021276595744679, "loss": 0.6098, "step": 1000 }, { "epoch": 2.925531914893617, "grad_norm": 4.702358245849609, "learning_rate": 0.0002122340425531915, "loss": 0.5934, "step": 1100 }, { "epoch": 3.0, "eval_accuracy": 0.6826666666666666, "eval_f1": 0.6716423210919009, "eval_loss": 0.6901677250862122, "eval_precision": 0.680882951010493, "eval_recall": 0.6834541816726691, "eval_runtime": 35.1911, "eval_samples_per_second": 21.312, "eval_steps_per_second": 2.671, "step": 1128 }, { "epoch": 3.1914893617021276, "grad_norm": 4.94950008392334, "learning_rate": 0.00020425531914893615, "loss": 0.4998, "step": 1200 }, { "epoch": 3.4574468085106385, "grad_norm": 4.887601375579834, "learning_rate": 0.00019627659574468083, "loss": 0.5384, "step": 1300 }, { "epoch": 3.723404255319149, "grad_norm": 5.762096881866455, "learning_rate": 0.0001882978723404255, "loss": 0.5035, "step": 1400 }, { "epoch": 3.9893617021276597, "grad_norm": 5.000032424926758, "learning_rate": 0.0001803191489361702, "loss": 0.4943, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.68, "eval_f1": 0.6607042226524314, "eval_loss": 0.7506471872329712, "eval_precision": 0.6727589541740554, "eval_recall": 0.6823892223556088, "eval_runtime": 34.4906, "eval_samples_per_second": 21.745, "eval_steps_per_second": 2.725, "step": 1504 }, { "epoch": 4.25531914893617, "grad_norm": 7.737178325653076, "learning_rate": 0.0001723404255319149, "loss": 0.3581, "step": 1600 }, { "epoch": 4.5212765957446805, "grad_norm": 5.353149890899658, "learning_rate": 0.00016436170212765956, "loss": 0.3857, "step": 1700 }, { "epoch": 4.787234042553192, "grad_norm": 5.475671768188477, "learning_rate": 0.00015638297872340426, "loss": 0.3625, "step": 1800 }, { "epoch": 5.0, "eval_accuracy": 0.6693333333333333, "eval_f1": 0.6583454266829142, "eval_loss": 0.9006826281547546, "eval_precision": 0.6655033443649184, "eval_recall": 0.6717070828331333, "eval_runtime": 34.3021, "eval_samples_per_second": 21.865, "eval_steps_per_second": 2.74, "step": 1880 }, { "epoch": 5.053191489361702, "grad_norm": 8.006985664367676, "learning_rate": 0.00014840425531914892, "loss": 0.3767, "step": 1900 }, { "epoch": 5.319148936170213, "grad_norm": 8.51475715637207, "learning_rate": 0.0001404255319148936, "loss": 0.2824, "step": 2000 }, { "epoch": 5.585106382978723, "grad_norm": 8.186362266540527, "learning_rate": 0.00013244680851063828, "loss": 0.2434, "step": 2100 }, { "epoch": 5.851063829787234, "grad_norm": 6.735799312591553, "learning_rate": 0.00012446808510638296, "loss": 0.2717, "step": 2200 }, { "epoch": 6.0, "eval_accuracy": 0.6586666666666666, "eval_f1": 0.6614425775783358, "eval_loss": 1.0333963632583618, "eval_precision": 0.6652982915561002, "eval_recall": 0.6589942643724156, "eval_runtime": 34.3254, "eval_samples_per_second": 21.85, "eval_steps_per_second": 2.738, "step": 2256 }, { "epoch": 6.117021276595745, "grad_norm": 6.987758636474609, "learning_rate": 0.00011648936170212764, "loss": 0.2191, "step": 2300 }, { "epoch": 6.382978723404255, "grad_norm": 8.753083229064941, "learning_rate": 0.00010851063829787234, "loss": 0.1532, "step": 2400 }, { "epoch": 6.648936170212766, "grad_norm": 6.87877082824707, "learning_rate": 0.00010053191489361702, "loss": 0.172, "step": 2500 }, { "epoch": 6.914893617021277, "grad_norm": 8.248693466186523, "learning_rate": 9.25531914893617e-05, "loss": 0.188, "step": 2600 }, { "epoch": 7.0, "eval_accuracy": 0.648, "eval_f1": 0.6480604224938936, "eval_loss": 1.3646624088287354, "eval_precision": 0.6474310419368785, "eval_recall": 0.6488264639189009, "eval_runtime": 34.2754, "eval_samples_per_second": 21.882, "eval_steps_per_second": 2.742, "step": 2632 }, { "epoch": 7.180851063829787, "grad_norm": 9.565648078918457, "learning_rate": 8.457446808510637e-05, "loss": 0.1344, "step": 2700 }, { "epoch": 7.446808510638298, "grad_norm": 10.819112777709961, "learning_rate": 7.659574468085105e-05, "loss": 0.0791, "step": 2800 }, { "epoch": 7.712765957446808, "grad_norm": 11.498022079467773, "learning_rate": 6.861702127659574e-05, "loss": 0.0917, "step": 2900 }, { "epoch": 7.9787234042553195, "grad_norm": 10.500368118286133, "learning_rate": 6.063829787234042e-05, "loss": 0.0966, "step": 3000 }, { "epoch": 8.0, "eval_accuracy": 0.6506666666666666, "eval_f1": 0.6510622640996298, "eval_loss": 1.7121126651763916, "eval_precision": 0.6518027984453844, "eval_recall": 0.6519786581299186, "eval_runtime": 34.2979, "eval_samples_per_second": 21.867, "eval_steps_per_second": 2.741, "step": 3008 }, { "epoch": 8.24468085106383, "grad_norm": 10.034985542297363, "learning_rate": 5.26595744680851e-05, "loss": 0.0589, "step": 3100 }, { "epoch": 8.51063829787234, "grad_norm": 8.84762954711914, "learning_rate": 4.468085106382978e-05, "loss": 0.0444, "step": 3200 }, { "epoch": 8.77659574468085, "grad_norm": 8.717729568481445, "learning_rate": 3.670212765957446e-05, "loss": 0.0492, "step": 3300 }, { "epoch": 9.0, "eval_accuracy": 0.664, "eval_f1": 0.6634717422597086, "eval_loss": 2.092434883117676, "eval_precision": 0.6624493384142015, "eval_recall": 0.6648558089902628, "eval_runtime": 34.1372, "eval_samples_per_second": 21.97, "eval_steps_per_second": 2.754, "step": 3384 }, { "epoch": 9.042553191489361, "grad_norm": 10.311498641967773, "learning_rate": 2.8723404255319147e-05, "loss": 0.0461, "step": 3400 }, { "epoch": 9.308510638297872, "grad_norm": 9.477750778198242, "learning_rate": 2.0744680851063828e-05, "loss": 0.0215, "step": 3500 }, { "epoch": 9.574468085106384, "grad_norm": 10.069836616516113, "learning_rate": 1.276595744680851e-05, "loss": 0.0275, "step": 3600 }, { "epoch": 9.840425531914894, "grad_norm": 9.309320449829102, "learning_rate": 4.7872340425531906e-06, "loss": 0.0198, "step": 3700 }, { "epoch": 10.0, "eval_accuracy": 0.6733333333333333, "eval_f1": 0.671461456478878, "eval_loss": 2.349191427230835, "eval_precision": 0.6697804784739011, "eval_recall": 0.6742686407896491, "eval_runtime": 34.2831, "eval_samples_per_second": 21.877, "eval_steps_per_second": 2.742, "step": 3760 }, { "epoch": 10.106382978723405, "grad_norm": 4.254685878753662, "learning_rate": 0.00014840425531914892, "loss": 0.1783, "step": 3800 }, { "epoch": 10.372340425531915, "grad_norm": 6.5849409103393555, "learning_rate": 0.00014441489361702127, "loss": 0.4254, "step": 3900 }, { "epoch": 10.638297872340425, "grad_norm": 6.008872985839844, "learning_rate": 0.0001404255319148936, "loss": 0.3892, "step": 4000 }, { "epoch": 10.904255319148936, "grad_norm": 4.886096000671387, "learning_rate": 0.00013643617021276593, "loss": 0.3308, "step": 4100 }, { "epoch": 11.0, "eval_accuracy": 0.6413333333333333, "eval_f1": 0.6379586335182581, "eval_loss": 1.2812837362289429, "eval_precision": 0.6360221570265966, "eval_recall": 0.6426586634653862, "eval_runtime": 34.4135, "eval_samples_per_second": 21.794, "eval_steps_per_second": 2.731, "step": 4136 }, { "epoch": 11.170212765957446, "grad_norm": 8.697183609008789, "learning_rate": 0.00013244680851063828, "loss": 0.3418, "step": 4200 }, { "epoch": 11.436170212765958, "grad_norm": 1.831715703010559, "learning_rate": 0.00012845744680851063, "loss": 0.2892, "step": 4300 }, { "epoch": 11.702127659574469, "grad_norm": 5.119130611419678, "learning_rate": 0.00012446808510638296, "loss": 0.2597, "step": 4400 }, { "epoch": 11.96808510638298, "grad_norm": 4.450387001037598, "learning_rate": 0.00012047872340425532, "loss": 0.2468, "step": 4500 }, { "epoch": 12.0, "eval_accuracy": 0.6693333333333333, "eval_f1": 0.6644005202701936, "eval_loss": 1.343381404876709, "eval_precision": 0.6702673209494008, "eval_recall": 0.6696177137521676, "eval_runtime": 34.1977, "eval_samples_per_second": 21.931, "eval_steps_per_second": 2.749, "step": 4512 }, { "epoch": 12.23404255319149, "grad_norm": 0.5507918000221252, "learning_rate": 0.00011648936170212764, "loss": 0.161, "step": 4600 }, { "epoch": 12.5, "grad_norm": 3.424229621887207, "learning_rate": 0.0001125, "loss": 0.1698, "step": 4700 }, { "epoch": 12.76595744680851, "grad_norm": 10.304464340209961, "learning_rate": 0.00010851063829787234, "loss": 0.1992, "step": 4800 }, { "epoch": 13.0, "eval_accuracy": 0.6773333333333333, "eval_f1": 0.6757724702487522, "eval_loss": 1.5172154903411865, "eval_precision": 0.6749943823114556, "eval_recall": 0.6780813658796853, "eval_runtime": 34.5579, "eval_samples_per_second": 21.703, "eval_steps_per_second": 2.72, "step": 4888 }, { "epoch": 13.03191489361702, "grad_norm": 0.58838951587677, "learning_rate": 0.00010452127659574466, "loss": 0.153, "step": 4900 }, { "epoch": 13.297872340425531, "grad_norm": 4.339015007019043, "learning_rate": 0.00010053191489361702, "loss": 0.0729, "step": 5000 }, { "epoch": 13.563829787234042, "grad_norm": 5.353603839874268, "learning_rate": 9.654255319148935e-05, "loss": 0.1182, "step": 5100 }, { "epoch": 13.829787234042554, "grad_norm": 0.11711510270833969, "learning_rate": 9.25531914893617e-05, "loss": 0.1391, "step": 5200 }, { "epoch": 14.0, "eval_accuracy": 0.688, "eval_f1": 0.6861192542213884, "eval_loss": 1.8793449401855469, "eval_precision": 0.6887025095519409, "eval_recall": 0.6882689075630252, "eval_runtime": 35.0719, "eval_samples_per_second": 21.385, "eval_steps_per_second": 2.68, "step": 5264 }, { "epoch": 14.095744680851064, "grad_norm": 16.545928955078125, "learning_rate": 8.856382978723404e-05, "loss": 0.0919, "step": 5300 }, { "epoch": 14.361702127659575, "grad_norm": 0.11285369098186493, "learning_rate": 8.457446808510637e-05, "loss": 0.0724, "step": 5400 }, { "epoch": 14.627659574468085, "grad_norm": 1.0866966247558594, "learning_rate": 8.058510638297872e-05, "loss": 0.0615, "step": 5500 }, { "epoch": 14.893617021276595, "grad_norm": 0.0466451533138752, "learning_rate": 7.659574468085105e-05, "loss": 0.062, "step": 5600 }, { "epoch": 15.0, "eval_accuracy": 0.6826666666666666, "eval_f1": 0.6784899933134545, "eval_loss": 2.1949727535247803, "eval_precision": 0.6780378052909217, "eval_recall": 0.6832503668133919, "eval_runtime": 34.2882, "eval_samples_per_second": 21.873, "eval_steps_per_second": 2.741, "step": 5640 }, { "epoch": 15.159574468085106, "grad_norm": 1.6346490383148193, "learning_rate": 7.26063829787234e-05, "loss": 0.0477, "step": 5700 }, { "epoch": 15.425531914893616, "grad_norm": 0.004013681318610907, "learning_rate": 6.861702127659574e-05, "loss": 0.0484, "step": 5800 }, { "epoch": 15.691489361702128, "grad_norm": 0.6463019847869873, "learning_rate": 6.462765957446807e-05, "loss": 0.0389, "step": 5900 }, { "epoch": 15.957446808510639, "grad_norm": 0.4977071285247803, "learning_rate": 6.063829787234042e-05, "loss": 0.0392, "step": 6000 }, { "epoch": 16.0, "eval_accuracy": 0.6626666666666666, "eval_f1": 0.6599147266673199, "eval_loss": 2.2853217124938965, "eval_precision": 0.6593416101537274, "eval_recall": 0.6640821661998133, "eval_runtime": 34.2142, "eval_samples_per_second": 21.921, "eval_steps_per_second": 2.747, "step": 6016 }, { "epoch": 16.22340425531915, "grad_norm": 0.18373289704322815, "learning_rate": 5.6648936170212766e-05, "loss": 0.035, "step": 6100 }, { "epoch": 16.48936170212766, "grad_norm": 11.437871932983398, "learning_rate": 5.26595744680851e-05, "loss": 0.0323, "step": 6200 }, { "epoch": 16.75531914893617, "grad_norm": 0.04929669201374054, "learning_rate": 4.867021276595744e-05, "loss": 0.0286, "step": 6300 }, { "epoch": 17.0, "eval_accuracy": 0.6706666666666666, "eval_f1": 0.6665467903773287, "eval_loss": 2.356433153152466, "eval_precision": 0.6646925224621034, "eval_recall": 0.6718655462184874, "eval_runtime": 34.2754, "eval_samples_per_second": 21.882, "eval_steps_per_second": 2.742, "step": 6392 }, { "epoch": 17.02127659574468, "grad_norm": 0.003023180877789855, "learning_rate": 4.468085106382978e-05, "loss": 0.0232, "step": 6400 }, { "epoch": 17.28723404255319, "grad_norm": 0.009346798993647099, "learning_rate": 4.069148936170212e-05, "loss": 0.0195, "step": 6500 }, { "epoch": 17.5531914893617, "grad_norm": 0.0396982878446579, "learning_rate": 3.670212765957446e-05, "loss": 0.02, "step": 6600 }, { "epoch": 17.819148936170212, "grad_norm": 0.014023613184690475, "learning_rate": 3.271276595744681e-05, "loss": 0.025, "step": 6700 }, { "epoch": 18.0, "eval_accuracy": 0.676, "eval_f1": 0.6721775597644245, "eval_loss": 2.5857865810394287, "eval_precision": 0.6713915090790397, "eval_recall": 0.6767165532879819, "eval_runtime": 34.2491, "eval_samples_per_second": 21.898, "eval_steps_per_second": 2.745, "step": 6768 }, { "epoch": 18.085106382978722, "grad_norm": 0.47041425108909607, "learning_rate": 2.8723404255319147e-05, "loss": 0.0202, "step": 6800 }, { "epoch": 18.351063829787233, "grad_norm": 0.06168466433882713, "learning_rate": 2.4734042553191484e-05, "loss": 0.019, "step": 6900 }, { "epoch": 18.617021276595743, "grad_norm": 0.0014604219468310475, "learning_rate": 2.0744680851063828e-05, "loss": 0.0178, "step": 7000 }, { "epoch": 18.882978723404257, "grad_norm": 0.0028070323169231415, "learning_rate": 1.675531914893617e-05, "loss": 0.0214, "step": 7100 }, { "epoch": 19.0, "eval_accuracy": 0.668, "eval_f1": 0.6658631291530153, "eval_loss": 2.6788387298583984, "eval_precision": 0.6642259414225942, "eval_recall": 0.6688787515006003, "eval_runtime": 34.249, "eval_samples_per_second": 21.898, "eval_steps_per_second": 2.745, "step": 7144 }, { "epoch": 19.148936170212767, "grad_norm": 0.0015123536577448249, "learning_rate": 1.276595744680851e-05, "loss": 0.0204, "step": 7200 }, { "epoch": 19.414893617021278, "grad_norm": 0.0018817168893292546, "learning_rate": 8.77659574468085e-06, "loss": 0.0141, "step": 7300 }, { "epoch": 19.680851063829788, "grad_norm": 0.6008268594741821, "learning_rate": 4.7872340425531906e-06, "loss": 0.0203, "step": 7400 }, { "epoch": 19.9468085106383, "grad_norm": 1.0317440032958984, "learning_rate": 7.978723404255318e-07, "loss": 0.0155, "step": 7500 }, { "epoch": 20.0, "eval_accuracy": 0.672, "eval_f1": 0.6695170440310667, "eval_loss": 2.725527048110962, "eval_precision": 0.66779120407287, "eval_recall": 0.6728787515006003, "eval_runtime": 34.5602, "eval_samples_per_second": 21.701, "eval_steps_per_second": 2.72, "step": 7520 } ], "logging_steps": 100, "max_steps": 7520, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.288864877974981e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }