{ "best_metric": 0.7676969092721835, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-medmnistv2/checkpoint-1090", "epoch": 9.954337899543379, "eval_steps": 500, "global_step": 1090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "grad_norm": 1.0251981019973755, "learning_rate": 0.004954128440366973, "loss": 1.2326, "step": 10 }, { "epoch": 0.18, "grad_norm": 0.6157691478729248, "learning_rate": 0.004908256880733945, "loss": 0.9567, "step": 20 }, { "epoch": 0.27, "grad_norm": 1.101731538772583, "learning_rate": 0.004862385321100918, "loss": 0.905, "step": 30 }, { "epoch": 0.37, "grad_norm": 0.8456130623817444, "learning_rate": 0.00481651376146789, "loss": 0.9132, "step": 40 }, { "epoch": 0.46, "grad_norm": 0.6145790219306946, "learning_rate": 0.0047706422018348625, "loss": 0.8798, "step": 50 }, { "epoch": 0.55, "grad_norm": 1.1609084606170654, "learning_rate": 0.004724770642201835, "loss": 0.7849, "step": 60 }, { "epoch": 0.64, "grad_norm": 0.7732934355735779, "learning_rate": 0.004678899082568808, "loss": 0.7901, "step": 70 }, { "epoch": 0.73, "grad_norm": 0.7393956780433655, "learning_rate": 0.00463302752293578, "loss": 0.7932, "step": 80 }, { "epoch": 0.82, "grad_norm": 0.667579174041748, "learning_rate": 0.0045871559633027525, "loss": 0.7811, "step": 90 }, { "epoch": 0.91, "grad_norm": 0.8475561141967773, "learning_rate": 0.004541284403669725, "loss": 0.7579, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.7427716849451645, "eval_f1": 0.3926656517433595, "eval_loss": 0.7045032978057861, "eval_precision": 0.5204175676877641, "eval_recall": 0.3710290953850673, "eval_runtime": 5.2715, "eval_samples_per_second": 190.268, "eval_steps_per_second": 11.951, "step": 109 }, { "epoch": 1.0, "grad_norm": 0.7959792613983154, "learning_rate": 0.004495412844036698, "loss": 0.8206, "step": 110 }, { "epoch": 1.1, "grad_norm": 0.823697566986084, "learning_rate": 0.0044495412844036695, "loss": 0.7631, "step": 120 }, { "epoch": 1.19, "grad_norm": 0.7514823079109192, "learning_rate": 0.004403669724770643, "loss": 0.7404, "step": 130 }, { "epoch": 1.28, "grad_norm": 1.4910705089569092, "learning_rate": 0.004357798165137615, "loss": 0.7533, "step": 140 }, { "epoch": 1.37, "grad_norm": 1.1586723327636719, "learning_rate": 0.004311926605504587, "loss": 0.7781, "step": 150 }, { "epoch": 1.46, "grad_norm": 1.106838583946228, "learning_rate": 0.0042660550458715595, "loss": 0.7337, "step": 160 }, { "epoch": 1.55, "grad_norm": 1.4145853519439697, "learning_rate": 0.004220183486238533, "loss": 0.7871, "step": 170 }, { "epoch": 1.64, "grad_norm": 1.0210392475128174, "learning_rate": 0.004174311926605505, "loss": 0.827, "step": 180 }, { "epoch": 1.74, "grad_norm": 1.4612568616867065, "learning_rate": 0.004128440366972477, "loss": 0.8952, "step": 190 }, { "epoch": 1.83, "grad_norm": 2.333279848098755, "learning_rate": 0.00408256880733945, "loss": 0.8019, "step": 200 }, { "epoch": 1.92, "grad_norm": 1.9485087394714355, "learning_rate": 0.004036697247706422, "loss": 0.7689, "step": 210 }, { "epoch": 2.0, "eval_accuracy": 0.7278165503489531, "eval_f1": 0.3573330762987477, "eval_loss": 0.75123530626297, "eval_precision": 0.3964031496153109, "eval_recall": 0.35266989798605725, "eval_runtime": 5.3783, "eval_samples_per_second": 186.49, "eval_steps_per_second": 11.714, "step": 219 }, { "epoch": 2.01, "grad_norm": 1.6179887056350708, "learning_rate": 0.003990825688073394, "loss": 0.8527, "step": 220 }, { "epoch": 2.1, "grad_norm": 1.0684826374053955, "learning_rate": 0.00394954128440367, "loss": 0.8174, "step": 230 }, { "epoch": 2.19, "grad_norm": 0.6670346260070801, "learning_rate": 0.003903669724770642, "loss": 0.7894, "step": 240 }, { "epoch": 2.28, "grad_norm": 1.4650906324386597, "learning_rate": 0.003857798165137615, "loss": 0.7712, "step": 250 }, { "epoch": 2.37, "grad_norm": 1.9125275611877441, "learning_rate": 0.003811926605504587, "loss": 0.8228, "step": 260 }, { "epoch": 2.47, "grad_norm": 2.175058603286743, "learning_rate": 0.0037706422018348625, "loss": 0.7592, "step": 270 }, { "epoch": 2.56, "grad_norm": 1.2160431146621704, "learning_rate": 0.003724770642201835, "loss": 0.7986, "step": 280 }, { "epoch": 2.65, "grad_norm": 0.9580293893814087, "learning_rate": 0.0036788990825688075, "loss": 0.7767, "step": 290 }, { "epoch": 2.74, "grad_norm": 1.544980525970459, "learning_rate": 0.0036330275229357802, "loss": 0.7882, "step": 300 }, { "epoch": 2.83, "grad_norm": 2.8008100986480713, "learning_rate": 0.003587155963302752, "loss": 0.8818, "step": 310 }, { "epoch": 2.92, "grad_norm": 3.061676025390625, "learning_rate": 0.003541284403669725, "loss": 0.7353, "step": 320 }, { "epoch": 3.0, "eval_accuracy": 0.7357926221335992, "eval_f1": 0.40020878247545494, "eval_loss": 0.7191066145896912, "eval_precision": 0.4630387632736769, "eval_recall": 0.4201909974743698, "eval_runtime": 5.4433, "eval_samples_per_second": 184.263, "eval_steps_per_second": 11.574, "step": 328 }, { "epoch": 3.01, "grad_norm": 6.1841206550598145, "learning_rate": 0.003495412844036697, "loss": 0.762, "step": 330 }, { "epoch": 3.11, "grad_norm": 2.103243589401245, "learning_rate": 0.00344954128440367, "loss": 0.798, "step": 340 }, { "epoch": 3.2, "grad_norm": 2.337041139602661, "learning_rate": 0.0034082568807339447, "loss": 0.7817, "step": 350 }, { "epoch": 3.29, "grad_norm": 3.2973079681396484, "learning_rate": 0.0033669724770642204, "loss": 0.7669, "step": 360 }, { "epoch": 3.38, "grad_norm": 1.7525138854980469, "learning_rate": 0.003321100917431193, "loss": 0.8788, "step": 370 }, { "epoch": 3.47, "grad_norm": 3.467615842819214, "learning_rate": 0.003275229357798165, "loss": 0.9385, "step": 380 }, { "epoch": 3.56, "grad_norm": 2.560269355773926, "learning_rate": 0.0032293577981651377, "loss": 0.9773, "step": 390 }, { "epoch": 3.65, "grad_norm": 0.9047974348068237, "learning_rate": 0.00318348623853211, "loss": 0.948, "step": 400 }, { "epoch": 3.74, "grad_norm": 0.7493894100189209, "learning_rate": 0.0031376146788990827, "loss": 0.9429, "step": 410 }, { "epoch": 3.84, "grad_norm": 0.787327229976654, "learning_rate": 0.003091743119266055, "loss": 0.9193, "step": 420 }, { "epoch": 3.93, "grad_norm": 0.5133536458015442, "learning_rate": 0.0030458715596330278, "loss": 0.8429, "step": 430 }, { "epoch": 4.0, "eval_accuracy": 0.6809571286141576, "eval_f1": 0.18506746606597574, "eval_loss": 0.7857978940010071, "eval_precision": 0.4280081257540274, "eval_recall": 0.1812976208760752, "eval_runtime": 5.4671, "eval_samples_per_second": 183.46, "eval_steps_per_second": 11.523, "step": 438 }, { "epoch": 4.02, "grad_norm": 1.170052170753479, "learning_rate": 0.003, "loss": 0.7773, "step": 440 }, { "epoch": 4.11, "grad_norm": 1.006057858467102, "learning_rate": 0.002954128440366973, "loss": 0.8049, "step": 450 }, { "epoch": 4.2, "grad_norm": 0.5809192657470703, "learning_rate": 0.002908256880733945, "loss": 0.81, "step": 460 }, { "epoch": 4.29, "grad_norm": 0.7377546429634094, "learning_rate": 0.002862385321100918, "loss": 0.7943, "step": 470 }, { "epoch": 4.38, "grad_norm": 0.5214153528213501, "learning_rate": 0.0028165137614678897, "loss": 0.7664, "step": 480 }, { "epoch": 4.47, "grad_norm": 0.615777313709259, "learning_rate": 0.0027706422018348624, "loss": 0.7769, "step": 490 }, { "epoch": 4.57, "grad_norm": 0.5365859866142273, "learning_rate": 0.0027247706422018347, "loss": 0.8232, "step": 500 }, { "epoch": 4.66, "grad_norm": 0.7110781669616699, "learning_rate": 0.0026788990825688075, "loss": 0.7755, "step": 510 }, { "epoch": 4.75, "grad_norm": 0.8415727615356445, "learning_rate": 0.0026330275229357798, "loss": 0.8154, "step": 520 }, { "epoch": 4.84, "grad_norm": 0.7670438289642334, "learning_rate": 0.0025871559633027525, "loss": 0.7386, "step": 530 }, { "epoch": 4.93, "grad_norm": 0.8495368957519531, "learning_rate": 0.002541284403669725, "loss": 0.7929, "step": 540 }, { "epoch": 5.0, "eval_accuracy": 0.7218344965104686, "eval_f1": 0.35225874228381837, "eval_loss": 0.7013418674468994, "eval_precision": 0.5157907633577941, "eval_recall": 0.397101205649215, "eval_runtime": 5.3335, "eval_samples_per_second": 188.058, "eval_steps_per_second": 11.812, "step": 547 }, { "epoch": 5.02, "grad_norm": 0.9542437791824341, "learning_rate": 0.0024954128440366975, "loss": 0.75, "step": 550 }, { "epoch": 5.11, "grad_norm": 0.7318850755691528, "learning_rate": 0.00244954128440367, "loss": 0.7665, "step": 560 }, { "epoch": 5.21, "grad_norm": 1.0321910381317139, "learning_rate": 0.0024036697247706426, "loss": 0.7015, "step": 570 }, { "epoch": 5.3, "grad_norm": 0.786632239818573, "learning_rate": 0.002357798165137615, "loss": 0.7756, "step": 580 }, { "epoch": 5.39, "grad_norm": 0.8995092511177063, "learning_rate": 0.002311926605504587, "loss": 0.79, "step": 590 }, { "epoch": 5.48, "grad_norm": 0.7932329773902893, "learning_rate": 0.00226605504587156, "loss": 0.7069, "step": 600 }, { "epoch": 5.57, "grad_norm": 0.7889924645423889, "learning_rate": 0.002220183486238532, "loss": 0.7507, "step": 610 }, { "epoch": 5.66, "grad_norm": 0.8880289793014526, "learning_rate": 0.002174311926605505, "loss": 0.7341, "step": 620 }, { "epoch": 5.75, "grad_norm": 0.5282242298126221, "learning_rate": 0.0021284403669724773, "loss": 0.685, "step": 630 }, { "epoch": 5.84, "grad_norm": 1.16853666305542, "learning_rate": 0.0020825688073394496, "loss": 0.7149, "step": 640 }, { "epoch": 5.94, "grad_norm": 0.6140002012252808, "learning_rate": 0.0020366972477064223, "loss": 0.6804, "step": 650 }, { "epoch": 6.0, "eval_accuracy": 0.7607178464606181, "eval_f1": 0.43914392933644575, "eval_loss": 0.6822256445884705, "eval_precision": 0.5010781993513719, "eval_recall": 0.4239506721942319, "eval_runtime": 5.2475, "eval_samples_per_second": 191.137, "eval_steps_per_second": 12.006, "step": 657 }, { "epoch": 6.03, "grad_norm": 0.7350448369979858, "learning_rate": 0.0019908256880733946, "loss": 0.7111, "step": 660 }, { "epoch": 6.12, "grad_norm": 0.6007147431373596, "learning_rate": 0.001944954128440367, "loss": 0.6981, "step": 670 }, { "epoch": 6.21, "grad_norm": 0.8717228770256042, "learning_rate": 0.0018990825688073396, "loss": 0.666, "step": 680 }, { "epoch": 6.3, "grad_norm": 0.5795207023620605, "learning_rate": 0.0018532110091743121, "loss": 0.7206, "step": 690 }, { "epoch": 6.39, "grad_norm": 0.9550521373748779, "learning_rate": 0.0018073394495412844, "loss": 0.7388, "step": 700 }, { "epoch": 6.48, "grad_norm": 1.932844877243042, "learning_rate": 0.001761467889908257, "loss": 0.7198, "step": 710 }, { "epoch": 6.58, "grad_norm": 0.8446419835090637, "learning_rate": 0.0017155963302752295, "loss": 0.6446, "step": 720 }, { "epoch": 6.67, "grad_norm": 1.2699838876724243, "learning_rate": 0.001669724770642202, "loss": 0.683, "step": 730 }, { "epoch": 6.76, "grad_norm": 0.8056779503822327, "learning_rate": 0.0016238532110091745, "loss": 0.7272, "step": 740 }, { "epoch": 6.85, "grad_norm": 0.8416026830673218, "learning_rate": 0.0015779816513761468, "loss": 0.7436, "step": 750 }, { "epoch": 6.94, "grad_norm": 0.8119596242904663, "learning_rate": 0.0015321100917431193, "loss": 0.6922, "step": 760 }, { "epoch": 7.0, "eval_accuracy": 0.7666999002991027, "eval_f1": 0.5226745508907684, "eval_loss": 0.653312087059021, "eval_precision": 0.6761986767002692, "eval_recall": 0.5105511763591389, "eval_runtime": 5.3357, "eval_samples_per_second": 187.978, "eval_steps_per_second": 11.807, "step": 766 }, { "epoch": 7.03, "grad_norm": 1.218758225440979, "learning_rate": 0.0014862385321100919, "loss": 0.7674, "step": 770 }, { "epoch": 7.12, "grad_norm": 1.0076816082000732, "learning_rate": 0.0014403669724770644, "loss": 0.6783, "step": 780 }, { "epoch": 7.21, "grad_norm": 1.2119933366775513, "learning_rate": 0.0013944954128440369, "loss": 0.6743, "step": 790 }, { "epoch": 7.31, "grad_norm": 0.7832847237586975, "learning_rate": 0.0013486238532110092, "loss": 0.751, "step": 800 }, { "epoch": 7.4, "grad_norm": 0.7728474140167236, "learning_rate": 0.0013027522935779817, "loss": 0.7252, "step": 810 }, { "epoch": 7.49, "grad_norm": 1.1566990613937378, "learning_rate": 0.0012568807339449542, "loss": 0.6176, "step": 820 }, { "epoch": 7.58, "grad_norm": 0.8185287117958069, "learning_rate": 0.0012110091743119267, "loss": 0.6928, "step": 830 }, { "epoch": 7.67, "grad_norm": 1.3969968557357788, "learning_rate": 0.0011651376146788993, "loss": 0.6475, "step": 840 }, { "epoch": 7.76, "grad_norm": 1.7943429946899414, "learning_rate": 0.0011192660550458716, "loss": 0.6414, "step": 850 }, { "epoch": 7.85, "grad_norm": 1.6549348831176758, "learning_rate": 0.001073394495412844, "loss": 0.717, "step": 860 }, { "epoch": 7.95, "grad_norm": 1.016310691833496, "learning_rate": 0.0010275229357798166, "loss": 0.6563, "step": 870 }, { "epoch": 8.0, "eval_accuracy": 0.7467597208374875, "eval_f1": 0.4496255647360247, "eval_loss": 0.6757794618606567, "eval_precision": 0.4547903051227771, "eval_recall": 0.4588581671719845, "eval_runtime": 5.4168, "eval_samples_per_second": 185.164, "eval_steps_per_second": 11.63, "step": 876 }, { "epoch": 8.04, "grad_norm": 1.1468579769134521, "learning_rate": 0.0009816513761467891, "loss": 0.7141, "step": 880 }, { "epoch": 8.13, "grad_norm": 3.9916632175445557, "learning_rate": 0.0009357798165137615, "loss": 0.6384, "step": 890 }, { "epoch": 8.22, "grad_norm": 1.3290263414382935, "learning_rate": 0.000889908256880734, "loss": 0.6598, "step": 900 }, { "epoch": 8.31, "grad_norm": 2.3329484462738037, "learning_rate": 0.0008440366972477065, "loss": 0.744, "step": 910 }, { "epoch": 8.4, "grad_norm": 1.5532264709472656, "learning_rate": 0.000798165137614679, "loss": 0.64, "step": 920 }, { "epoch": 8.49, "grad_norm": 0.8567171692848206, "learning_rate": 0.0007522935779816515, "loss": 0.6803, "step": 930 }, { "epoch": 8.58, "grad_norm": 0.8911744356155396, "learning_rate": 0.0007064220183486239, "loss": 0.6808, "step": 940 }, { "epoch": 8.68, "grad_norm": 0.8479019999504089, "learning_rate": 0.0006605504587155964, "loss": 0.6848, "step": 950 }, { "epoch": 8.77, "grad_norm": 1.6710033416748047, "learning_rate": 0.0006146788990825688, "loss": 0.6663, "step": 960 }, { "epoch": 8.86, "grad_norm": 0.7447875142097473, "learning_rate": 0.0005688073394495413, "loss": 0.6389, "step": 970 }, { "epoch": 8.95, "grad_norm": 1.6779547929763794, "learning_rate": 0.0005229357798165138, "loss": 0.6985, "step": 980 }, { "epoch": 9.0, "eval_accuracy": 0.7647058823529411, "eval_f1": 0.49146885132144036, "eval_loss": 0.6264088749885559, "eval_precision": 0.6450867892155376, "eval_recall": 0.4691799634422585, "eval_runtime": 5.2889, "eval_samples_per_second": 189.641, "eval_steps_per_second": 11.912, "step": 985 }, { "epoch": 9.04, "grad_norm": 0.9829887747764587, "learning_rate": 0.00047706422018348627, "loss": 0.608, "step": 990 }, { "epoch": 9.13, "grad_norm": 0.8272444009780884, "learning_rate": 0.00043119266055045873, "loss": 0.6661, "step": 1000 }, { "epoch": 9.22, "grad_norm": 1.056658387184143, "learning_rate": 0.0003853211009174312, "loss": 0.6982, "step": 1010 }, { "epoch": 9.32, "grad_norm": 0.8954740166664124, "learning_rate": 0.0003394495412844037, "loss": 0.5824, "step": 1020 }, { "epoch": 9.41, "grad_norm": 1.664057731628418, "learning_rate": 0.0002935779816513762, "loss": 0.657, "step": 1030 }, { "epoch": 9.5, "grad_norm": 0.9140777587890625, "learning_rate": 0.00024770642201834864, "loss": 0.5975, "step": 1040 }, { "epoch": 9.59, "grad_norm": 1.4085102081298828, "learning_rate": 0.0002018348623853211, "loss": 0.6377, "step": 1050 }, { "epoch": 9.68, "grad_norm": 0.9538210034370422, "learning_rate": 0.0001559633027522936, "loss": 0.6508, "step": 1060 }, { "epoch": 9.77, "grad_norm": 1.5182217359542847, "learning_rate": 0.00011009174311926606, "loss": 0.6776, "step": 1070 }, { "epoch": 9.86, "grad_norm": 2.307227849960327, "learning_rate": 6.422018348623854e-05, "loss": 0.6758, "step": 1080 }, { "epoch": 9.95, "grad_norm": 0.9896465539932251, "learning_rate": 1.834862385321101e-05, "loss": 0.6283, "step": 1090 }, { "epoch": 9.95, "eval_accuracy": 0.7676969092721835, "eval_f1": 0.50879281522548, "eval_loss": 0.6179494857788086, "eval_precision": 0.5889156507510112, "eval_recall": 0.4796382542284182, "eval_runtime": 5.4809, "eval_samples_per_second": 182.998, "eval_steps_per_second": 11.494, "step": 1090 }, { "epoch": 9.95, "step": 1090, "total_flos": 5.442882169274339e+18, "train_loss": 0.7558124568484245, "train_runtime": 825.7866, "train_samples_per_second": 84.852, "train_steps_per_second": 1.32 } ], "logging_steps": 10, "max_steps": 1090, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.442882169274339e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }