|
{ |
|
"best_metric": 0.22263780236244202, |
|
"best_model_checkpoint": "DrishtiSharma/finetuned-SwinT-Indian-Food-Classification-v2/checkpoint-1500", |
|
"epoch": 5.0, |
|
"global_step": 1665, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019903903903903904, |
|
"loss": 2.4985, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019783783783783784, |
|
"loss": 1.4444, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019663663663663664, |
|
"loss": 1.0698, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019543543543543545, |
|
"loss": 1.0817, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019423423423423425, |
|
"loss": 0.8105, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019303303303303305, |
|
"loss": 0.8324, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019183183183183185, |
|
"loss": 0.8106, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019063063063063065, |
|
"loss": 0.6581, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018942942942942943, |
|
"loss": 0.6992, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00018822822822822823, |
|
"loss": 0.9351, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.8363443145589798, |
|
"eval_loss": 0.6016978025436401, |
|
"eval_runtime": 39.9065, |
|
"eval_samples_per_second": 23.58, |
|
"eval_steps_per_second": 2.957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00018702702702702703, |
|
"loss": 0.9577, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00018582582582582583, |
|
"loss": 0.789, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018462462462462463, |
|
"loss": 1.0007, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00018342342342342343, |
|
"loss": 0.6186, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018222222222222224, |
|
"loss": 0.5886, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018114114114114115, |
|
"loss": 0.6685, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00017993993993993993, |
|
"loss": 0.886, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00017873873873873876, |
|
"loss": 0.7398, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00017753753753753756, |
|
"loss": 0.6541, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00017633633633633636, |
|
"loss": 0.5667, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.8767268862911796, |
|
"eval_loss": 0.43840092420578003, |
|
"eval_runtime": 39.8194, |
|
"eval_samples_per_second": 23.632, |
|
"eval_steps_per_second": 2.963, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017513513513513516, |
|
"loss": 0.5596, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017393393393393394, |
|
"loss": 0.5697, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017273273273273274, |
|
"loss": 0.6168, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017153153153153154, |
|
"loss": 0.6351, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017033033033033034, |
|
"loss": 0.578, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016912912912912911, |
|
"loss": 0.8882, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00016792792792792794, |
|
"loss": 0.6284, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016672672672672675, |
|
"loss": 0.8874, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00016552552552552555, |
|
"loss": 0.7083, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016432432432432435, |
|
"loss": 0.5548, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.8767268862911796, |
|
"eval_loss": 0.42152631282806396, |
|
"eval_runtime": 40.3274, |
|
"eval_samples_per_second": 23.334, |
|
"eval_steps_per_second": 2.926, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016312312312312312, |
|
"loss": 0.6819, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016192192192192192, |
|
"loss": 0.4283, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00016072072072072073, |
|
"loss": 0.4916, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00015951951951951953, |
|
"loss": 0.5186, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001583183183183183, |
|
"loss": 0.6319, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00015711711711711713, |
|
"loss": 0.3755, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00015591591591591593, |
|
"loss": 0.3293, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00015471471471471474, |
|
"loss": 0.4974, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00015351351351351354, |
|
"loss": 0.6046, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001523123123123123, |
|
"loss": 0.5516, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.873538788522848, |
|
"eval_loss": 0.42902085185050964, |
|
"eval_runtime": 40.2528, |
|
"eval_samples_per_second": 23.377, |
|
"eval_steps_per_second": 2.931, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001511111111111111, |
|
"loss": 0.5308, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00014990990990990991, |
|
"loss": 0.4627, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00014870870870870872, |
|
"loss": 0.4123, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00014750750750750752, |
|
"loss": 0.5095, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00014630630630630632, |
|
"loss": 0.5262, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00014510510510510512, |
|
"loss": 0.6747, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00014390390390390392, |
|
"loss": 0.3073, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00014270270270270272, |
|
"loss": 0.5508, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001415015015015015, |
|
"loss": 0.5183, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001403003003003003, |
|
"loss": 0.3782, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.89798087141339, |
|
"eval_loss": 0.35022875666618347, |
|
"eval_runtime": 39.8292, |
|
"eval_samples_per_second": 23.626, |
|
"eval_steps_per_second": 2.963, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001390990990990991, |
|
"loss": 0.4911, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001378978978978979, |
|
"loss": 0.544, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001366966966966967, |
|
"loss": 0.5958, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001354954954954955, |
|
"loss": 0.4798, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001342942942942943, |
|
"loss": 0.3317, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001330930930930931, |
|
"loss": 0.3693, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001318918918918919, |
|
"loss": 0.3432, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001306906906906907, |
|
"loss": 0.4374, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001294894894894895, |
|
"loss": 0.5319, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001282882882882883, |
|
"loss": 0.3115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.8937300743889479, |
|
"eval_loss": 0.3780476152896881, |
|
"eval_runtime": 39.7563, |
|
"eval_samples_per_second": 23.669, |
|
"eval_steps_per_second": 2.968, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001270870870870871, |
|
"loss": 0.3055, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0001258858858858859, |
|
"loss": 0.361, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001246846846846847, |
|
"loss": 0.5006, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0001234834834834835, |
|
"loss": 0.4293, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001222822822822823, |
|
"loss": 0.3787, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00012108108108108109, |
|
"loss": 0.4617, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00011987987987987988, |
|
"loss": 0.4291, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00011867867867867868, |
|
"loss": 0.4525, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00011747747747747748, |
|
"loss": 0.419, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00011627627627627628, |
|
"loss": 0.4229, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.8905419766206164, |
|
"eval_loss": 0.35450688004493713, |
|
"eval_runtime": 39.4995, |
|
"eval_samples_per_second": 23.823, |
|
"eval_steps_per_second": 2.987, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0001150750750750751, |
|
"loss": 0.117, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00011387387387387387, |
|
"loss": 0.3166, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00011267267267267267, |
|
"loss": 0.3709, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00011147147147147147, |
|
"loss": 0.2417, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00011027027027027029, |
|
"loss": 0.4182, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00010906906906906909, |
|
"loss": 0.3219, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00010786786786786786, |
|
"loss": 0.3879, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 0.3304, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00010546546546546547, |
|
"loss": 0.4358, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00010426426426426428, |
|
"loss": 0.3832, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.9086078639744952, |
|
"eval_loss": 0.3445776104927063, |
|
"eval_runtime": 40.5544, |
|
"eval_samples_per_second": 23.203, |
|
"eval_steps_per_second": 2.91, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00010306306306306306, |
|
"loss": 0.2202, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00010186186186186186, |
|
"loss": 0.2851, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00010066066066066066, |
|
"loss": 0.2624, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.945945945945948e-05, |
|
"loss": 0.3048, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.825825825825826e-05, |
|
"loss": 0.2088, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 9.705705705705707e-05, |
|
"loss": 0.3517, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.585585585585585e-05, |
|
"loss": 0.4147, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.465465465465466e-05, |
|
"loss": 0.3886, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.345345345345346e-05, |
|
"loss": 0.2345, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.225225225225226e-05, |
|
"loss": 0.2745, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy": 0.9149840595111584, |
|
"eval_loss": 0.32985183596611023, |
|
"eval_runtime": 40.5661, |
|
"eval_samples_per_second": 23.197, |
|
"eval_steps_per_second": 2.909, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.105105105105105e-05, |
|
"loss": 0.2712, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.984984984984985e-05, |
|
"loss": 0.389, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 8.864864864864866e-05, |
|
"loss": 0.4054, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 8.744744744744745e-05, |
|
"loss": 0.3054, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.624624624624625e-05, |
|
"loss": 0.2941, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.504504504504504e-05, |
|
"loss": 0.2896, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.384384384384384e-05, |
|
"loss": 0.2729, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.264264264264265e-05, |
|
"loss": 0.5281, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.144144144144145e-05, |
|
"loss": 0.2477, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.024024024024024e-05, |
|
"loss": 0.2063, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9277364505844846, |
|
"eval_loss": 0.2592255771160126, |
|
"eval_runtime": 40.3202, |
|
"eval_samples_per_second": 23.338, |
|
"eval_steps_per_second": 2.927, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 7.903903903903904e-05, |
|
"loss": 0.1643, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.783783783783785e-05, |
|
"loss": 0.322, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.663663663663664e-05, |
|
"loss": 0.1657, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.543543543543544e-05, |
|
"loss": 0.2675, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 7.423423423423423e-05, |
|
"loss": 0.2194, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.303303303303303e-05, |
|
"loss": 0.1154, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 7.183183183183183e-05, |
|
"loss": 0.1816, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.063063063063064e-05, |
|
"loss": 0.1359, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 6.942942942942944e-05, |
|
"loss": 0.3088, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 6.822822822822822e-05, |
|
"loss": 0.2077, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.9149840595111584, |
|
"eval_loss": 0.37718382477760315, |
|
"eval_runtime": 40.237, |
|
"eval_samples_per_second": 23.386, |
|
"eval_steps_per_second": 2.933, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.702702702702704e-05, |
|
"loss": 0.316, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.582582582582583e-05, |
|
"loss": 0.1016, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 6.462462462462463e-05, |
|
"loss": 0.1508, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.342342342342342e-05, |
|
"loss": 0.2685, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.222222222222222e-05, |
|
"loss": 0.2662, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.102102102102102e-05, |
|
"loss": 0.2772, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 5.981981981981982e-05, |
|
"loss": 0.1013, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 5.8618618618618625e-05, |
|
"loss": 0.1762, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.741741741741742e-05, |
|
"loss": 0.1514, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.621621621621622e-05, |
|
"loss": 0.2041, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.9213602550478215, |
|
"eval_loss": 0.2855222523212433, |
|
"eval_runtime": 39.5438, |
|
"eval_samples_per_second": 23.796, |
|
"eval_steps_per_second": 2.984, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 5.5015015015015016e-05, |
|
"loss": 0.1939, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 5.381381381381382e-05, |
|
"loss": 0.0919, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 5.261261261261261e-05, |
|
"loss": 0.2731, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 5.1411411411411414e-05, |
|
"loss": 0.1865, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5.0210210210210216e-05, |
|
"loss": 0.238, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.900900900900901e-05, |
|
"loss": 0.0804, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.7807807807807806e-05, |
|
"loss": 0.3592, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.660660660660661e-05, |
|
"loss": 0.323, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.540540540540541e-05, |
|
"loss": 0.1983, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.420420420420421e-05, |
|
"loss": 0.2541, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_accuracy": 0.9330499468650372, |
|
"eval_loss": 0.25022733211517334, |
|
"eval_runtime": 39.5082, |
|
"eval_samples_per_second": 23.818, |
|
"eval_steps_per_second": 2.987, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 4.3003003003003006e-05, |
|
"loss": 0.3586, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.180180180180181e-05, |
|
"loss": 0.2121, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.06006006006006e-05, |
|
"loss": 0.1308, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.93993993993994e-05, |
|
"loss": 0.2163, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.81981981981982e-05, |
|
"loss": 0.0668, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.6996996996996994e-05, |
|
"loss": 0.1516, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.57957957957958e-05, |
|
"loss": 0.1323, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.45945945945946e-05, |
|
"loss": 0.237, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.33933933933934e-05, |
|
"loss": 0.1337, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.2192192192192194e-05, |
|
"loss": 0.1203, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.9362380446333688, |
|
"eval_loss": 0.25774362683296204, |
|
"eval_runtime": 39.3333, |
|
"eval_samples_per_second": 23.924, |
|
"eval_steps_per_second": 3.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.0990990990990995e-05, |
|
"loss": 0.0531, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.978978978978979e-05, |
|
"loss": 0.1448, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.858858858858859e-05, |
|
"loss": 0.0946, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.7387387387387387e-05, |
|
"loss": 0.0927, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.618618618618619e-05, |
|
"loss": 0.1215, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.4984984984984987e-05, |
|
"loss": 0.1332, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.3783783783783785e-05, |
|
"loss": 0.1657, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.2582582582582583e-05, |
|
"loss": 0.2, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.1381381381381382e-05, |
|
"loss": 0.1323, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.018018018018018e-05, |
|
"loss": 0.1594, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_accuracy": 0.9458023379383634, |
|
"eval_loss": 0.22263780236244202, |
|
"eval_runtime": 39.564, |
|
"eval_samples_per_second": 23.784, |
|
"eval_steps_per_second": 2.983, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.8978978978978982e-05, |
|
"loss": 0.1963, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.18, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.6576576576576578e-05, |
|
"loss": 0.1519, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1.5375375375375377e-05, |
|
"loss": 0.1375, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.4174174174174173e-05, |
|
"loss": 0.1323, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 1.2972972972972975e-05, |
|
"loss": 0.128, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.1771771771771773e-05, |
|
"loss": 0.0505, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.057057057057057e-05, |
|
"loss": 0.0957, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 9.36936936936937e-06, |
|
"loss": 0.043, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.168168168168168e-06, |
|
"loss": 0.1015, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.9436769394261424, |
|
"eval_loss": 0.236781507730484, |
|
"eval_runtime": 39.398, |
|
"eval_samples_per_second": 23.884, |
|
"eval_steps_per_second": 2.995, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 6.966966966966967e-06, |
|
"loss": 0.1853, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.765765765765766e-06, |
|
"loss": 0.0874, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 4.5645645645645645e-06, |
|
"loss": 0.0949, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.3633633633633636e-06, |
|
"loss": 0.1901, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.1621621621621623e-06, |
|
"loss": 0.1235, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.60960960960961e-07, |
|
"loss": 0.0785, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1665, |
|
"total_flos": 2.0875695162281165e+18, |
|
"train_loss": 0.38661149321375665, |
|
"train_runtime": 2052.9844, |
|
"train_samples_per_second": 12.976, |
|
"train_steps_per_second": 0.811 |
|
} |
|
], |
|
"max_steps": 1665, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.0875695162281165e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|