|
{ |
|
"best_metric": 0.9994666666666666, |
|
"best_model_checkpoint": "plant-vit-model-1/checkpoint-332", |
|
"epoch": 10.0, |
|
"global_step": 830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.024096385542169e-06, |
|
"loss": 2.6748, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.2048192771084338e-05, |
|
"loss": 2.5895, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.8072289156626505e-05, |
|
"loss": 2.4413, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.4096385542168677e-05, |
|
"loss": 2.2745, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.012048192771085e-05, |
|
"loss": 2.0245, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.614457831325301e-05, |
|
"loss": 1.7626, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.2168674698795186e-05, |
|
"loss": 1.5706, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.8192771084337354e-05, |
|
"loss": 1.296, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9226666666666666, |
|
"eval_loss": 1.0360676050186157, |
|
"eval_runtime": 28.5267, |
|
"eval_samples_per_second": 65.728, |
|
"eval_steps_per_second": 2.068, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.953145917001339e-05, |
|
"loss": 1.0868, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.886211512717537e-05, |
|
"loss": 0.8857, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.8192771084337354e-05, |
|
"loss": 0.8006, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.7523427041499336e-05, |
|
"loss": 0.6679, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.685408299866131e-05, |
|
"loss": 0.5875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.61847389558233e-05, |
|
"loss": 0.5278, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.5515394912985275e-05, |
|
"loss": 0.4966, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.484605087014726e-05, |
|
"loss": 0.4476, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9904, |
|
"eval_loss": 0.3645952343940735, |
|
"eval_runtime": 27.9536, |
|
"eval_samples_per_second": 67.075, |
|
"eval_steps_per_second": 2.111, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.417670682730924e-05, |
|
"loss": 0.4392, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.350736278447122e-05, |
|
"loss": 0.381, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.2838018741633203e-05, |
|
"loss": 0.3494, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.2168674698795186e-05, |
|
"loss": 0.336, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.149933065595716e-05, |
|
"loss": 0.3078, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.082998661311915e-05, |
|
"loss": 0.3163, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.0160642570281125e-05, |
|
"loss": 0.2963, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.949129852744311e-05, |
|
"loss": 0.2731, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9952, |
|
"eval_loss": 0.21737419068813324, |
|
"eval_runtime": 27.9589, |
|
"eval_samples_per_second": 67.063, |
|
"eval_steps_per_second": 2.11, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.882195448460509e-05, |
|
"loss": 0.284, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.815261044176707e-05, |
|
"loss": 0.2695, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.748326639892905e-05, |
|
"loss": 0.2428, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.6813922356091035e-05, |
|
"loss": 0.239, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.614457831325301e-05, |
|
"loss": 0.2388, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.5475234270415e-05, |
|
"loss": 0.2138, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3.4805890227576974e-05, |
|
"loss": 0.2001, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.413654618473896e-05, |
|
"loss": 0.2144, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.346720214190094e-05, |
|
"loss": 0.2097, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9994666666666666, |
|
"eval_loss": 0.15595677495002747, |
|
"eval_runtime": 27.9371, |
|
"eval_samples_per_second": 67.115, |
|
"eval_steps_per_second": 2.112, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.279785809906292e-05, |
|
"loss": 0.1877, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.21285140562249e-05, |
|
"loss": 0.1834, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3.1459170013386885e-05, |
|
"loss": 0.1849, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 3.078982597054887e-05, |
|
"loss": 0.1581, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 3.012048192771085e-05, |
|
"loss": 0.1943, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.9451137884872827e-05, |
|
"loss": 0.1651, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.878179384203481e-05, |
|
"loss": 0.1896, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.8112449799196788e-05, |
|
"loss": 0.1679, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9973333333333333, |
|
"eval_loss": 0.12879624962806702, |
|
"eval_runtime": 27.9408, |
|
"eval_samples_per_second": 67.106, |
|
"eval_steps_per_second": 2.112, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.7443105756358774e-05, |
|
"loss": 0.1691, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 2.6773761713520752e-05, |
|
"loss": 0.1524, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 2.6104417670682734e-05, |
|
"loss": 0.1379, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 2.5435073627844713e-05, |
|
"loss": 0.1401, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.4765729585006695e-05, |
|
"loss": 0.1446, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 2.4096385542168677e-05, |
|
"loss": 0.1384, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.3427041499330656e-05, |
|
"loss": 0.1356, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 2.2757697456492638e-05, |
|
"loss": 0.135, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9984, |
|
"eval_loss": 0.10523158311843872, |
|
"eval_runtime": 27.794, |
|
"eval_samples_per_second": 67.461, |
|
"eval_steps_per_second": 2.123, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.208835341365462e-05, |
|
"loss": 0.135, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 2.1419009370816602e-05, |
|
"loss": 0.1418, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 2.074966532797858e-05, |
|
"loss": 0.1291, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 2.0080321285140562e-05, |
|
"loss": 0.1321, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.9410977242302544e-05, |
|
"loss": 0.1349, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.8741633199464527e-05, |
|
"loss": 0.1229, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.8072289156626505e-05, |
|
"loss": 0.1285, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.7402945113788487e-05, |
|
"loss": 0.1246, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.673360107095047e-05, |
|
"loss": 0.118, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9989333333333333, |
|
"eval_loss": 0.09175615012645721, |
|
"eval_runtime": 27.7375, |
|
"eval_samples_per_second": 67.598, |
|
"eval_steps_per_second": 2.127, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.606425702811245e-05, |
|
"loss": 0.1359, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.5394912985274433e-05, |
|
"loss": 0.1081, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.4725568942436414e-05, |
|
"loss": 0.1331, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.4056224899598394e-05, |
|
"loss": 0.1095, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.3386880856760376e-05, |
|
"loss": 0.1183, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.2717536813922356e-05, |
|
"loss": 0.1257, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 1.2048192771084338e-05, |
|
"loss": 0.1201, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.1378848728246319e-05, |
|
"loss": 0.1054, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9989333333333333, |
|
"eval_loss": 0.08257802575826645, |
|
"eval_runtime": 27.821, |
|
"eval_samples_per_second": 67.395, |
|
"eval_steps_per_second": 2.121, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 1.0709504685408301e-05, |
|
"loss": 0.0961, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 1.0040160642570281e-05, |
|
"loss": 0.1112, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 9.370816599732263e-06, |
|
"loss": 0.1009, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 8.701472556894244e-06, |
|
"loss": 0.0931, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 8.032128514056226e-06, |
|
"loss": 0.0987, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 7.362784471218207e-06, |
|
"loss": 0.1074, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.693440428380188e-06, |
|
"loss": 0.1096, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 6.024096385542169e-06, |
|
"loss": 0.1083, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9989333333333333, |
|
"eval_loss": 0.07765089720487595, |
|
"eval_runtime": 27.9635, |
|
"eval_samples_per_second": 67.052, |
|
"eval_steps_per_second": 2.11, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 5.3547523427041504e-06, |
|
"loss": 0.1025, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.685408299866132e-06, |
|
"loss": 0.0993, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 4.016064257028113e-06, |
|
"loss": 0.0878, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3.346720214190094e-06, |
|
"loss": 0.1114, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.6773761713520752e-06, |
|
"loss": 0.1085, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 2.0080321285140564e-06, |
|
"loss": 0.0873, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.3386880856760376e-06, |
|
"loss": 0.1037, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 6.693440428380188e-07, |
|
"loss": 0.0947, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0918, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9994666666666666, |
|
"eval_loss": 0.07559256255626678, |
|
"eval_runtime": 27.9992, |
|
"eval_samples_per_second": 66.966, |
|
"eval_steps_per_second": 2.107, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 830, |
|
"total_flos": 8.232859423714222e+18, |
|
"train_loss": 0.4019865568861904, |
|
"train_runtime": 5542.9491, |
|
"train_samples_per_second": 19.165, |
|
"train_steps_per_second": 0.15 |
|
} |
|
], |
|
"max_steps": 830, |
|
"num_train_epochs": 10, |
|
"total_flos": 8.232859423714222e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|