|
{ |
|
"best_metric": 0.9481739412098146, |
|
"best_model_checkpoint": "deit-tiny-patch16-224-finetuned-og-dataset-10e/checkpoint-2184", |
|
"epoch": 5.998628257887518, |
|
"global_step": 2184, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.2831050228310503e-06, |
|
"loss": 1.4902, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.566210045662101e-06, |
|
"loss": 1.3426, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 1.1594, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.132420091324201e-06, |
|
"loss": 0.9353, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.1415525114155251e-05, |
|
"loss": 0.8424, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.3698630136986302e-05, |
|
"loss": 0.8113, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.5981735159817352e-05, |
|
"loss": 0.7902, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8264840182648402e-05, |
|
"loss": 0.7755, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.0547945205479453e-05, |
|
"loss": 0.7586, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.2831050228310503e-05, |
|
"loss": 0.7285, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5114155251141553e-05, |
|
"loss": 0.7206, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.7397260273972603e-05, |
|
"loss": 0.6964, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.9680365296803654e-05, |
|
"loss": 0.6705, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1963470319634704e-05, |
|
"loss": 0.6803, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.424657534246575e-05, |
|
"loss": 0.6864, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.6529680365296805e-05, |
|
"loss": 0.6818, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.881278538812785e-05, |
|
"loss": 0.6484, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1095890410958905e-05, |
|
"loss": 0.6527, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.337899543378995e-05, |
|
"loss": 0.632, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5662100456621006e-05, |
|
"loss": 0.6276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.794520547945205e-05, |
|
"loss": 0.6604, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.997455470737914e-05, |
|
"loss": 0.6407, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.9720101781170486e-05, |
|
"loss": 0.6526, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.9465648854961834e-05, |
|
"loss": 0.6136, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.921119592875318e-05, |
|
"loss": 0.5914, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.895674300254453e-05, |
|
"loss": 0.6173, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.8702290076335885e-05, |
|
"loss": 0.6036, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.844783715012723e-05, |
|
"loss": 0.5949, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8193384223918575e-05, |
|
"loss": 0.5636, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.793893129770993e-05, |
|
"loss": 0.557, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.768447837150127e-05, |
|
"loss": 0.567, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.743002544529263e-05, |
|
"loss": 0.5625, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.717557251908397e-05, |
|
"loss": 0.5814, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.6921119592875317e-05, |
|
"loss": 0.5825, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.52, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.641221374045801e-05, |
|
"loss": 0.5434, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8003886954409264, |
|
"eval_loss": 0.46485939621925354, |
|
"eval_runtime": 145.162, |
|
"eval_samples_per_second": 85.07, |
|
"eval_steps_per_second": 1.777, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.615776081424937e-05, |
|
"loss": 0.5464, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.5903307888040716e-05, |
|
"loss": 0.5175, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.5648854961832065e-05, |
|
"loss": 0.542, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.539440203562341e-05, |
|
"loss": 0.5581, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.513994910941476e-05, |
|
"loss": 0.5754, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.488549618320611e-05, |
|
"loss": 0.5116, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.463104325699746e-05, |
|
"loss": 0.5006, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.4376590330788806e-05, |
|
"loss": 0.4939, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.4122137404580154e-05, |
|
"loss": 0.5006, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.38676844783715e-05, |
|
"loss": 0.4861, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.361323155216285e-05, |
|
"loss": 0.4626, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.33587786259542e-05, |
|
"loss": 0.4723, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.3104325699745554e-05, |
|
"loss": 0.4499, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.2849872773536896e-05, |
|
"loss": 0.477, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.2595419847328244e-05, |
|
"loss": 0.4804, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.23409669211196e-05, |
|
"loss": 0.4805, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.208651399491094e-05, |
|
"loss": 0.4611, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.1832061068702296e-05, |
|
"loss": 0.4573, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.157760814249364e-05, |
|
"loss": 0.4532, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.1323155216284985e-05, |
|
"loss": 0.4472, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.106870229007634e-05, |
|
"loss": 0.4563, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.081424936386768e-05, |
|
"loss": 0.4394, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.055979643765904e-05, |
|
"loss": 0.4599, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.0305343511450385e-05, |
|
"loss": 0.4466, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.0050890585241734e-05, |
|
"loss": 0.443, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.979643765903308e-05, |
|
"loss": 0.4413, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.954198473282443e-05, |
|
"loss": 0.4168, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.928753180661578e-05, |
|
"loss": 0.4134, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.903307888040713e-05, |
|
"loss": 0.4401, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.8778625954198475e-05, |
|
"loss": 0.4113, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.852417302798982e-05, |
|
"loss": 0.4051, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.826972010178117e-05, |
|
"loss": 0.435, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.801526717557252e-05, |
|
"loss": 0.4313, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.776081424936387e-05, |
|
"loss": 0.4097, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.750636132315522e-05, |
|
"loss": 0.4292, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.7251908396946565e-05, |
|
"loss": 0.3968, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8713256134099927, |
|
"eval_loss": 0.31458979845046997, |
|
"eval_runtime": 148.1663, |
|
"eval_samples_per_second": 83.346, |
|
"eval_steps_per_second": 1.741, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.699745547073791e-05, |
|
"loss": 0.4217, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.674300254452927e-05, |
|
"loss": 0.404, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.648854961832061e-05, |
|
"loss": 0.3986, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.6234096692111965e-05, |
|
"loss": 0.3785, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.5979643765903306e-05, |
|
"loss": 0.4032, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.5725190839694654e-05, |
|
"loss": 0.3763, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.547073791348601e-05, |
|
"loss": 0.3583, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.521628498727735e-05, |
|
"loss": 0.4012, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.4961832061068706e-05, |
|
"loss": 0.3448, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.4707379134860054e-05, |
|
"loss": 0.398, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.44529262086514e-05, |
|
"loss": 0.3806, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.419847328244275e-05, |
|
"loss": 0.3674, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.39440203562341e-05, |
|
"loss": 0.3559, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.368956743002545e-05, |
|
"loss": 0.3825, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.3435114503816796e-05, |
|
"loss": 0.375, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.3180661577608144e-05, |
|
"loss": 0.3558, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.292620865139949e-05, |
|
"loss": 0.3791, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.267175572519084e-05, |
|
"loss": 0.3627, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.241730279898219e-05, |
|
"loss": 0.3804, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.216284987277354e-05, |
|
"loss": 0.3591, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.190839694656489e-05, |
|
"loss": 0.3745, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.1653944020356234e-05, |
|
"loss": 0.338, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.139949109414758e-05, |
|
"loss": 0.343, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.114503816793894e-05, |
|
"loss": 0.3537, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.089058524173028e-05, |
|
"loss": 0.3347, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.0636132315521633e-05, |
|
"loss": 0.3517, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 3.038167938931298e-05, |
|
"loss": 0.336, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.0127226463104323e-05, |
|
"loss": 0.3813, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.9872773536895675e-05, |
|
"loss": 0.3449, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.9618320610687023e-05, |
|
"loss": 0.3342, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9363867684478375e-05, |
|
"loss": 0.3299, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.910941475826972e-05, |
|
"loss": 0.3376, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.885496183206107e-05, |
|
"loss": 0.3359, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.860050890585242e-05, |
|
"loss": 0.3321, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.8346055979643765e-05, |
|
"loss": 0.3126, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.8091603053435116e-05, |
|
"loss": 0.3352, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.7837150127226465e-05, |
|
"loss": 0.3075, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.901206575431209, |
|
"eval_loss": 0.24771326780319214, |
|
"eval_runtime": 151.6412, |
|
"eval_samples_per_second": 81.436, |
|
"eval_steps_per_second": 1.701, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.7582697201017816e-05, |
|
"loss": 0.3368, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.732824427480916e-05, |
|
"loss": 0.2966, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.707379134860051e-05, |
|
"loss": 0.332, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.681933842239186e-05, |
|
"loss": 0.3006, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.6564885496183206e-05, |
|
"loss": 0.2954, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.6310432569974558e-05, |
|
"loss": 0.3323, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.6055979643765906e-05, |
|
"loss": 0.3164, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.580152671755725e-05, |
|
"loss": 0.3114, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.5547073791348602e-05, |
|
"loss": 0.3139, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.5292620865139947e-05, |
|
"loss": 0.322, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.50381679389313e-05, |
|
"loss": 0.2801, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.4783715012722647e-05, |
|
"loss": 0.3153, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 2.4529262086513996e-05, |
|
"loss": 0.2958, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 2.4274809160305344e-05, |
|
"loss": 0.295, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.4020356234096695e-05, |
|
"loss": 0.2996, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.376590330788804e-05, |
|
"loss": 0.3155, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.351145038167939e-05, |
|
"loss": 0.3116, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.325699745547074e-05, |
|
"loss": 0.2996, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.300254452926209e-05, |
|
"loss": 0.3183, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.2748091603053437e-05, |
|
"loss": 0.2928, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.2493638676844785e-05, |
|
"loss": 0.3158, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.2239185750636133e-05, |
|
"loss": 0.2972, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.198473282442748e-05, |
|
"loss": 0.2891, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.173027989821883e-05, |
|
"loss": 0.2935, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.1475826972010178e-05, |
|
"loss": 0.2984, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.122137404580153e-05, |
|
"loss": 0.2867, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.0966921119592875e-05, |
|
"loss": 0.2849, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.0712468193384223e-05, |
|
"loss": 0.2478, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.0458015267175575e-05, |
|
"loss": 0.2982, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.0203562340966923e-05, |
|
"loss": 0.2891, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.994910941475827e-05, |
|
"loss": 0.3038, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.969465648854962e-05, |
|
"loss": 0.3008, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.9440203562340968e-05, |
|
"loss": 0.2778, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.9185750636132316e-05, |
|
"loss": 0.2744, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.8931297709923664e-05, |
|
"loss": 0.2691, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.8676844783715013e-05, |
|
"loss": 0.2961, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9335168839582152, |
|
"eval_loss": 0.17737890779972076, |
|
"eval_runtime": 142.2521, |
|
"eval_samples_per_second": 86.811, |
|
"eval_steps_per_second": 1.814, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.8422391857506364e-05, |
|
"loss": 0.2954, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.816793893129771e-05, |
|
"loss": 0.2703, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.7913486005089058e-05, |
|
"loss": 0.2547, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.765903307888041e-05, |
|
"loss": 0.2583, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.7404580152671757e-05, |
|
"loss": 0.2686, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7150127226463106e-05, |
|
"loss": 0.2774, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.6895674300254454e-05, |
|
"loss": 0.2708, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.6641221374045802e-05, |
|
"loss": 0.256, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.638676844783715e-05, |
|
"loss": 0.2766, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 1.61323155216285e-05, |
|
"loss": 0.263, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.5877862595419847e-05, |
|
"loss": 0.2478, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.56234096692112e-05, |
|
"loss": 0.2548, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.5368956743002547e-05, |
|
"loss": 0.2878, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.5114503816793894e-05, |
|
"loss": 0.2646, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.4860050890585242e-05, |
|
"loss": 0.2616, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.4605597964376592e-05, |
|
"loss": 0.2469, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.435114503816794e-05, |
|
"loss": 0.2539, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.4096692111959288e-05, |
|
"loss": 0.2536, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.3842239185750635e-05, |
|
"loss": 0.2763, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.3587786259541985e-05, |
|
"loss": 0.2681, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.2618, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.3078880407124683e-05, |
|
"loss": 0.2802, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.2824427480916032e-05, |
|
"loss": 0.2462, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 1.2569974554707382e-05, |
|
"loss": 0.2621, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.2315521628498728e-05, |
|
"loss": 0.2587, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.2061068702290076e-05, |
|
"loss": 0.2762, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.1806615776081426e-05, |
|
"loss": 0.2635, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1.1552162849872775e-05, |
|
"loss": 0.265, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.1297709923664123e-05, |
|
"loss": 0.2712, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.1043256997455471e-05, |
|
"loss": 0.2392, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.0788804071246821e-05, |
|
"loss": 0.263, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 1.0534351145038168e-05, |
|
"loss": 0.2365, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.0279898218829518e-05, |
|
"loss": 0.2504, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.0025445292620864e-05, |
|
"loss": 0.2498, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 9.770992366412214e-06, |
|
"loss": 0.2449, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 9.516539440203563e-06, |
|
"loss": 0.2531, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.26208651399491e-06, |
|
"loss": 0.2523, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9421815531621993, |
|
"eval_loss": 0.15593743324279785, |
|
"eval_runtime": 137.5508, |
|
"eval_samples_per_second": 89.778, |
|
"eval_steps_per_second": 1.876, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 9.00763358778626e-06, |
|
"loss": 0.2562, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 8.753180661577609e-06, |
|
"loss": 0.22, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 8.498727735368957e-06, |
|
"loss": 0.2439, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 8.244274809160306e-06, |
|
"loss": 0.2483, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 7.989821882951656e-06, |
|
"loss": 0.2437, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 7.735368956743002e-06, |
|
"loss": 0.2555, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 7.480916030534352e-06, |
|
"loss": 0.2282, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 7.2264631043257e-06, |
|
"loss": 0.243, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 6.972010178117049e-06, |
|
"loss": 0.2138, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 6.717557251908398e-06, |
|
"loss": 0.2263, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 6.463104325699745e-06, |
|
"loss": 0.2305, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 6.208651399491094e-06, |
|
"loss": 0.2165, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 5.954198473282443e-06, |
|
"loss": 0.2327, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 5.699745547073792e-06, |
|
"loss": 0.253, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 5.44529262086514e-06, |
|
"loss": 0.2206, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 5.190839694656488e-06, |
|
"loss": 0.2571, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.9363867684478375e-06, |
|
"loss": 0.2242, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 4.681933842239187e-06, |
|
"loss": 0.2402, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 4.427480916030535e-06, |
|
"loss": 0.2462, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 4.173027989821883e-06, |
|
"loss": 0.217, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 3.918575063613232e-06, |
|
"loss": 0.2338, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 3.6641221374045806e-06, |
|
"loss": 0.2559, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 3.409669211195929e-06, |
|
"loss": 0.2408, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.155216284987277e-06, |
|
"loss": 0.2546, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.900763358778626e-06, |
|
"loss": 0.2327, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.646310432569975e-06, |
|
"loss": 0.2258, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 2.3918575063613232e-06, |
|
"loss": 0.2406, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 2.137404580152672e-06, |
|
"loss": 0.2117, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1.8829516539440202e-06, |
|
"loss": 0.2513, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 1.6284987277353691e-06, |
|
"loss": 0.2285, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 1.3740458015267176e-06, |
|
"loss": 0.2422, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.119592875318066e-06, |
|
"loss": 0.2333, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 8.651399491094147e-07, |
|
"loss": 0.2206, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 6.106870229007634e-07, |
|
"loss": 0.2436, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 3.56234096692112e-07, |
|
"loss": 0.22, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 1.0178117048346057e-07, |
|
"loss": 0.2304, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9481739412098146, |
|
"eval_loss": 0.14020109176635742, |
|
"eval_runtime": 136.7445, |
|
"eval_samples_per_second": 90.307, |
|
"eval_steps_per_second": 1.887, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 2184, |
|
"total_flos": 2.0946151345503928e+18, |
|
"train_loss": 0.3889721456519413, |
|
"train_runtime": 6320.4879, |
|
"train_samples_per_second": 66.424, |
|
"train_steps_per_second": 0.346 |
|
} |
|
], |
|
"max_steps": 2184, |
|
"num_train_epochs": 6, |
|
"total_flos": 2.0946151345503928e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|