|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 10700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.47e-05, |
|
"loss": 9.2459, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.97e-05, |
|
"loss": 4.0521, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.4699999999999996e-05, |
|
"loss": 3.4098, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 5.97e-05, |
|
"loss": 3.2533, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 7.47e-05, |
|
"loss": 3.1982, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 8.969999999999998e-05, |
|
"loss": 3.1301, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00010469999999999998, |
|
"loss": 3.0863, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.0001197, |
|
"loss": 3.0034, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.0001347, |
|
"loss": 2.5449, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 0.00014969999999999998, |
|
"loss": 2.0192, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 0.0001647, |
|
"loss": 1.846, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.00017969999999999998, |
|
"loss": 1.7371, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 0.0001947, |
|
"loss": 1.6727, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 0.00020969999999999997, |
|
"loss": 1.6512, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 0.0002247, |
|
"loss": 1.6324, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 0.0002397, |
|
"loss": 1.5982, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.00025469999999999996, |
|
"loss": 1.564, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 0.0002697, |
|
"loss": 1.5777, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 0.0002847, |
|
"loss": 1.5636, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 0.00029969999999999997, |
|
"loss": 1.5412, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 0.6161174178123474, |
|
"eval_runtime": 82.7828, |
|
"eval_samples_per_second": 19.485, |
|
"eval_steps_per_second": 19.485, |
|
"eval_wer": 0.5747115912836477, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 0.00029662068965517237, |
|
"loss": 1.5532, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"learning_rate": 0.0002931724137931034, |
|
"loss": 1.547, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 0.00028975862068965515, |
|
"loss": 1.5271, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 0.00028631034482758615, |
|
"loss": 1.5141, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 0.0002828620689655172, |
|
"loss": 1.492, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 0.00027941379310344826, |
|
"loss": 1.4687, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 0.00027596551724137926, |
|
"loss": 1.4519, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 0.0002725172413793103, |
|
"loss": 1.4215, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 0.0002690689655172414, |
|
"loss": 1.4192, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"learning_rate": 0.00026562068965517243, |
|
"loss": 1.418, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 0.00026217241379310343, |
|
"loss": 1.3969, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 29.91, |
|
"learning_rate": 0.00025875862068965515, |
|
"loss": 1.3693, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 30.84, |
|
"learning_rate": 0.0002553448275862069, |
|
"loss": 1.3633, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"learning_rate": 0.0002518965517241379, |
|
"loss": 1.3744, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 32.71, |
|
"learning_rate": 0.00024844827586206894, |
|
"loss": 1.3554, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 33.64, |
|
"learning_rate": 0.000245, |
|
"loss": 1.3262, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"learning_rate": 0.00024155172413793102, |
|
"loss": 1.321, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 35.51, |
|
"learning_rate": 0.00023810344827586205, |
|
"loss": 1.3375, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 36.45, |
|
"learning_rate": 0.0002346551724137931, |
|
"loss": 1.3009, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 37.38, |
|
"learning_rate": 0.00023120689655172413, |
|
"loss": 1.311, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 37.38, |
|
"eval_loss": 0.5707210302352905, |
|
"eval_runtime": 86.6018, |
|
"eval_samples_per_second": 18.625, |
|
"eval_steps_per_second": 18.625, |
|
"eval_wer": 0.5070499908441677, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"learning_rate": 0.00022775862068965516, |
|
"loss": 1.2889, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 39.25, |
|
"learning_rate": 0.00022431034482758616, |
|
"loss": 1.3253, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 40.19, |
|
"learning_rate": 0.00022086206896551722, |
|
"loss": 1.2888, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"learning_rate": 0.00021741379310344825, |
|
"loss": 1.2567, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 42.06, |
|
"learning_rate": 0.0002139655172413793, |
|
"loss": 1.2445, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"learning_rate": 0.00021051724137931033, |
|
"loss": 1.2608, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"learning_rate": 0.00020706896551724136, |
|
"loss": 1.2272, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 44.86, |
|
"learning_rate": 0.0002036206896551724, |
|
"loss": 1.2186, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 45.79, |
|
"learning_rate": 0.0002001724137931034, |
|
"loss": 1.2086, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 46.73, |
|
"learning_rate": 0.00019672413793103444, |
|
"loss": 1.2042, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 47.66, |
|
"learning_rate": 0.0001932758620689655, |
|
"loss": 1.1856, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 48.6, |
|
"learning_rate": 0.00018982758620689653, |
|
"loss": 1.1965, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 49.53, |
|
"learning_rate": 0.00018637931034482758, |
|
"loss": 1.2104, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 50.47, |
|
"learning_rate": 0.0001829310344827586, |
|
"loss": 1.1631, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 51.4, |
|
"learning_rate": 0.00017948275862068964, |
|
"loss": 1.1535, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 52.34, |
|
"learning_rate": 0.0001760344827586207, |
|
"loss": 1.1615, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 53.27, |
|
"learning_rate": 0.0001725862068965517, |
|
"loss": 1.1632, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 54.21, |
|
"learning_rate": 0.00016913793103448272, |
|
"loss": 1.1431, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 55.14, |
|
"learning_rate": 0.00016568965517241378, |
|
"loss": 1.1245, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 56.07, |
|
"learning_rate": 0.0001622413793103448, |
|
"loss": 1.1367, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 56.07, |
|
"eval_loss": 0.5372028946876526, |
|
"eval_runtime": 83.9889, |
|
"eval_samples_per_second": 19.205, |
|
"eval_steps_per_second": 19.205, |
|
"eval_wer": 0.46639809558688883, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 0.00015879310344827586, |
|
"loss": 1.1242, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 57.94, |
|
"learning_rate": 0.0001553448275862069, |
|
"loss": 1.1016, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 58.88, |
|
"learning_rate": 0.00015189655172413792, |
|
"loss": 1.0946, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 59.81, |
|
"learning_rate": 0.00014844827586206895, |
|
"loss": 1.0953, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 60.75, |
|
"learning_rate": 0.000145, |
|
"loss": 1.1016, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 61.68, |
|
"learning_rate": 0.00014155172413793103, |
|
"loss": 1.0745, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 62.62, |
|
"learning_rate": 0.00013810344827586206, |
|
"loss": 1.072, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 63.55, |
|
"learning_rate": 0.00013465517241379309, |
|
"loss": 1.0621, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 64.49, |
|
"learning_rate": 0.00013120689655172414, |
|
"loss": 1.0659, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 65.42, |
|
"learning_rate": 0.00012775862068965517, |
|
"loss": 1.0271, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 66.36, |
|
"learning_rate": 0.0001243103448275862, |
|
"loss": 1.0381, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 67.29, |
|
"learning_rate": 0.00012086206896551723, |
|
"loss": 1.0332, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 68.22, |
|
"learning_rate": 0.00011741379310344827, |
|
"loss": 1.0279, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 69.16, |
|
"learning_rate": 0.00011396551724137931, |
|
"loss": 1.0174, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 70.09, |
|
"learning_rate": 0.00011051724137931032, |
|
"loss": 1.0124, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 71.03, |
|
"learning_rate": 0.00010706896551724137, |
|
"loss": 1.0142, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 71.96, |
|
"learning_rate": 0.00010362068965517241, |
|
"loss": 0.9809, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 72.9, |
|
"learning_rate": 0.00010017241379310345, |
|
"loss": 0.9609, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 73.83, |
|
"learning_rate": 9.672413793103446e-05, |
|
"loss": 0.9808, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 74.77, |
|
"learning_rate": 9.32758620689655e-05, |
|
"loss": 0.9696, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 74.77, |
|
"eval_loss": 0.5442632436752319, |
|
"eval_runtime": 84.0493, |
|
"eval_samples_per_second": 19.191, |
|
"eval_steps_per_second": 19.191, |
|
"eval_wer": 0.4327961911737777, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 75.7, |
|
"learning_rate": 8.982758620689655e-05, |
|
"loss": 0.9559, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 76.64, |
|
"learning_rate": 8.637931034482759e-05, |
|
"loss": 0.9507, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 77.57, |
|
"learning_rate": 8.29310344827586e-05, |
|
"loss": 0.9358, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 78.5, |
|
"learning_rate": 7.948275862068965e-05, |
|
"loss": 0.9367, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 79.44, |
|
"learning_rate": 7.603448275862069e-05, |
|
"loss": 0.9223, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 80.37, |
|
"learning_rate": 7.258620689655172e-05, |
|
"loss": 0.9258, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 81.31, |
|
"learning_rate": 6.913793103448276e-05, |
|
"loss": 0.8865, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 82.24, |
|
"learning_rate": 6.568965517241379e-05, |
|
"loss": 0.9062, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 83.18, |
|
"learning_rate": 6.224137931034483e-05, |
|
"loss": 0.88, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 84.11, |
|
"learning_rate": 5.8793103448275856e-05, |
|
"loss": 0.8794, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 85.05, |
|
"learning_rate": 5.534482758620689e-05, |
|
"loss": 0.8892, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 85.98, |
|
"learning_rate": 5.1896551724137926e-05, |
|
"loss": 0.8746, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 86.92, |
|
"learning_rate": 4.844827586206896e-05, |
|
"loss": 0.8465, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 87.85, |
|
"learning_rate": 4.5034482758620686e-05, |
|
"loss": 0.8453, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 88.79, |
|
"learning_rate": 4.162068965517241e-05, |
|
"loss": 0.8472, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 89.72, |
|
"learning_rate": 3.817241379310344e-05, |
|
"loss": 0.8638, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 90.65, |
|
"learning_rate": 3.4724137931034475e-05, |
|
"loss": 0.8354, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 91.59, |
|
"learning_rate": 3.127586206896551e-05, |
|
"loss": 0.825, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 92.52, |
|
"learning_rate": 2.7827586206896548e-05, |
|
"loss": 0.8395, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 93.46, |
|
"learning_rate": 2.4379310344827583e-05, |
|
"loss": 0.8163, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 93.46, |
|
"eval_loss": 0.5916153788566589, |
|
"eval_runtime": 83.5748, |
|
"eval_samples_per_second": 19.3, |
|
"eval_steps_per_second": 19.3, |
|
"eval_wer": 0.4123786852224867, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 94.39, |
|
"learning_rate": 2.093103448275862e-05, |
|
"loss": 0.8116, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 95.33, |
|
"learning_rate": 1.7482758620689653e-05, |
|
"loss": 0.8103, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 96.26, |
|
"learning_rate": 1.4034482758620688e-05, |
|
"loss": 0.8252, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 97.2, |
|
"learning_rate": 1.0586206896551723e-05, |
|
"loss": 0.805, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 98.13, |
|
"learning_rate": 7.1379310344827575e-06, |
|
"loss": 0.7983, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 99.07, |
|
"learning_rate": 3.689655172413793e-06, |
|
"loss": 0.7991, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.413793103448276e-07, |
|
"loss": 0.8191, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 10700, |
|
"total_flos": 4.070849231885728e+19, |
|
"train_loss": 1.4013342235244324, |
|
"train_runtime": 14378.2108, |
|
"train_samples_per_second": 23.647, |
|
"train_steps_per_second": 0.744 |
|
} |
|
], |
|
"max_steps": 10700, |
|
"num_train_epochs": 100, |
|
"total_flos": 4.070849231885728e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|