|
{ |
|
"best_metric": 5.539509738576612, |
|
"best_model_checkpoint": "./training/results/checkpoint-20000", |
|
"epoch": 79.13669064748201, |
|
"eval_steps": 1000, |
|
"global_step": 22000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08992805755395683, |
|
"grad_norm": 12.73649787902832, |
|
"learning_rate": 1.2500000000000002e-07, |
|
"loss": 3.2522, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17985611510791366, |
|
"grad_norm": 12.000336647033691, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 3.0617, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2697841726618705, |
|
"grad_norm": 10.76065444946289, |
|
"learning_rate": 3.75e-07, |
|
"loss": 2.7165, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3597122302158273, |
|
"grad_norm": 8.36201286315918, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 2.2607, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44964028776978415, |
|
"grad_norm": 7.234769344329834, |
|
"learning_rate": 6.25e-07, |
|
"loss": 1.8433, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.539568345323741, |
|
"grad_norm": 6.549698829650879, |
|
"learning_rate": 7.5e-07, |
|
"loss": 1.5515, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6294964028776978, |
|
"grad_norm": 7.549570083618164, |
|
"learning_rate": 8.75e-07, |
|
"loss": 1.3346, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7194244604316546, |
|
"grad_norm": 5.8322930335998535, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.0572, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8093525179856115, |
|
"grad_norm": 3.925255537033081, |
|
"learning_rate": 1.125e-06, |
|
"loss": 0.6348, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8992805755395683, |
|
"grad_norm": 3.1902644634246826, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.4882, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9892086330935251, |
|
"grad_norm": 3.355315923690796, |
|
"learning_rate": 1.3750000000000002e-06, |
|
"loss": 0.4032, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.079136690647482, |
|
"grad_norm": 3.4707915782928467, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.3355, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.169064748201439, |
|
"grad_norm": 3.261484384536743, |
|
"learning_rate": 1.6250000000000001e-06, |
|
"loss": 0.2896, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.2589928057553956, |
|
"grad_norm": 3.3107025623321533, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.2685, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.3489208633093526, |
|
"grad_norm": 2.6028969287872314, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.2365, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.4388489208633093, |
|
"grad_norm": 3.380187749862671, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.2333, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.5287769784172662, |
|
"grad_norm": 3.0845112800598145, |
|
"learning_rate": 2.125e-06, |
|
"loss": 0.2191, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.6187050359712232, |
|
"grad_norm": 3.15523099899292, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.1949, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.70863309352518, |
|
"grad_norm": 2.5198237895965576, |
|
"learning_rate": 2.375e-06, |
|
"loss": 0.1756, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.7985611510791366, |
|
"grad_norm": 2.7945399284362793, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1748, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8884892086330936, |
|
"grad_norm": 3.299269199371338, |
|
"learning_rate": 2.6250000000000003e-06, |
|
"loss": 0.1711, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.9784172661870505, |
|
"grad_norm": 2.3727056980133057, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 0.1495, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.068345323741007, |
|
"grad_norm": 2.1909244060516357, |
|
"learning_rate": 2.875e-06, |
|
"loss": 0.1196, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.158273381294964, |
|
"grad_norm": 2.45758318901062, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1023, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.2482014388489207, |
|
"grad_norm": 2.009880542755127, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.1019, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.338129496402878, |
|
"grad_norm": 2.2170872688293457, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.0948, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.4280575539568345, |
|
"grad_norm": 1.9289822578430176, |
|
"learning_rate": 3.3750000000000003e-06, |
|
"loss": 0.0934, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.5179856115107913, |
|
"grad_norm": 2.0615289211273193, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.0935, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.6079136690647484, |
|
"grad_norm": 2.231041193008423, |
|
"learning_rate": 3.625e-06, |
|
"loss": 0.0923, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.697841726618705, |
|
"grad_norm": 1.953312873840332, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.0844, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.787769784172662, |
|
"grad_norm": 2.1245667934417725, |
|
"learning_rate": 3.875e-06, |
|
"loss": 0.0831, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.8776978417266186, |
|
"grad_norm": 1.8499614000320435, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0841, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.9676258992805753, |
|
"grad_norm": 2.0503857135772705, |
|
"learning_rate": 4.125e-06, |
|
"loss": 0.0854, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.0575539568345325, |
|
"grad_norm": 2.0084242820739746, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.0621, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.147482014388489, |
|
"grad_norm": 1.3122639656066895, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.0434, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.237410071942446, |
|
"grad_norm": 1.3615615367889404, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0416, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.327338129496403, |
|
"grad_norm": 1.533996343612671, |
|
"learning_rate": 4.625000000000001e-06, |
|
"loss": 0.0451, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.41726618705036, |
|
"grad_norm": 1.573549509048462, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.0404, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.5071942446043165, |
|
"grad_norm": 1.4288333654403687, |
|
"learning_rate": 4.875e-06, |
|
"loss": 0.044, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.597122302158273, |
|
"grad_norm": 1.5075387954711914, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0479, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.597122302158273, |
|
"eval_loss": 0.10350359231233597, |
|
"eval_runtime": 1344.3937, |
|
"eval_samples_per_second": 1.653, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 20.29178701029401, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.68705035971223, |
|
"grad_norm": 1.842606782913208, |
|
"learning_rate": 4.998737373737374e-06, |
|
"loss": 0.0467, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.776978417266187, |
|
"grad_norm": 1.495784044265747, |
|
"learning_rate": 4.997474747474748e-06, |
|
"loss": 0.0437, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.866906474820144, |
|
"grad_norm": 2.054900646209717, |
|
"learning_rate": 4.9962121212121216e-06, |
|
"loss": 0.0497, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 3.956834532374101, |
|
"grad_norm": 1.438658356666565, |
|
"learning_rate": 4.9949494949494956e-06, |
|
"loss": 0.0398, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.046762589928058, |
|
"grad_norm": 1.3041224479675293, |
|
"learning_rate": 4.993686868686869e-06, |
|
"loss": 0.0293, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 4.136690647482014, |
|
"grad_norm": 1.2206145524978638, |
|
"learning_rate": 4.992424242424243e-06, |
|
"loss": 0.0227, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.226618705035971, |
|
"grad_norm": 1.2926621437072754, |
|
"learning_rate": 4.991161616161617e-06, |
|
"loss": 0.0231, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 4.316546762589928, |
|
"grad_norm": 1.4683257341384888, |
|
"learning_rate": 4.98989898989899e-06, |
|
"loss": 0.023, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.406474820143885, |
|
"grad_norm": 1.3095593452453613, |
|
"learning_rate": 4.988636363636364e-06, |
|
"loss": 0.0226, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 4.496402877697841, |
|
"grad_norm": 0.7059262990951538, |
|
"learning_rate": 4.987373737373738e-06, |
|
"loss": 0.0225, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.586330935251799, |
|
"grad_norm": 1.1493045091629028, |
|
"learning_rate": 4.986111111111112e-06, |
|
"loss": 0.022, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 4.676258992805756, |
|
"grad_norm": 1.9609806537628174, |
|
"learning_rate": 4.984848484848485e-06, |
|
"loss": 0.0232, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.766187050359712, |
|
"grad_norm": 1.5463200807571411, |
|
"learning_rate": 4.983585858585859e-06, |
|
"loss": 0.0206, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 4.856115107913669, |
|
"grad_norm": 0.858127772808075, |
|
"learning_rate": 4.982323232323233e-06, |
|
"loss": 0.0222, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.946043165467626, |
|
"grad_norm": 0.8384924530982971, |
|
"learning_rate": 4.981060606060606e-06, |
|
"loss": 0.0201, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.0359712230215825, |
|
"grad_norm": 0.9966625571250916, |
|
"learning_rate": 4.97979797979798e-06, |
|
"loss": 0.0173, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.125899280575539, |
|
"grad_norm": 0.6609445214271545, |
|
"learning_rate": 4.978535353535353e-06, |
|
"loss": 0.0113, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 5.215827338129497, |
|
"grad_norm": 0.82105952501297, |
|
"learning_rate": 4.977272727272728e-06, |
|
"loss": 0.012, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.305755395683454, |
|
"grad_norm": 1.0994760990142822, |
|
"learning_rate": 4.976010101010101e-06, |
|
"loss": 0.0118, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 5.39568345323741, |
|
"grad_norm": 0.4543660283088684, |
|
"learning_rate": 4.974747474747475e-06, |
|
"loss": 0.0112, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.485611510791367, |
|
"grad_norm": 3.425143241882324, |
|
"learning_rate": 4.973484848484849e-06, |
|
"loss": 0.0113, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 5.575539568345324, |
|
"grad_norm": 0.7691114544868469, |
|
"learning_rate": 4.9722222222222224e-06, |
|
"loss": 0.0114, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.66546762589928, |
|
"grad_norm": 0.5446438789367676, |
|
"learning_rate": 4.9709595959595964e-06, |
|
"loss": 0.0121, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 5.755395683453237, |
|
"grad_norm": 0.7232896089553833, |
|
"learning_rate": 4.9696969696969696e-06, |
|
"loss": 0.0118, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.845323741007194, |
|
"grad_norm": 1.3025506734848022, |
|
"learning_rate": 4.968434343434344e-06, |
|
"loss": 0.0135, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 5.935251798561151, |
|
"grad_norm": 1.2080421447753906, |
|
"learning_rate": 4.9671717171717176e-06, |
|
"loss": 0.0126, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.025179856115108, |
|
"grad_norm": 0.4218277633190155, |
|
"learning_rate": 4.9659090909090916e-06, |
|
"loss": 0.0094, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 6.115107913669065, |
|
"grad_norm": 0.5942659378051758, |
|
"learning_rate": 4.964646464646465e-06, |
|
"loss": 0.0071, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.205035971223022, |
|
"grad_norm": 0.31671133637428284, |
|
"learning_rate": 4.963383838383839e-06, |
|
"loss": 0.008, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 6.294964028776978, |
|
"grad_norm": 0.3538670539855957, |
|
"learning_rate": 4.962121212121213e-06, |
|
"loss": 0.0066, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 6.384892086330935, |
|
"grad_norm": 0.8252100348472595, |
|
"learning_rate": 4.960858585858586e-06, |
|
"loss": 0.006, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 6.474820143884892, |
|
"grad_norm": 0.9238548278808594, |
|
"learning_rate": 4.95959595959596e-06, |
|
"loss": 0.0074, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.564748201438849, |
|
"grad_norm": 1.1760324239730835, |
|
"learning_rate": 4.958333333333334e-06, |
|
"loss": 0.0066, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 6.654676258992806, |
|
"grad_norm": 0.3382113575935364, |
|
"learning_rate": 4.957070707070708e-06, |
|
"loss": 0.0103, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 6.744604316546763, |
|
"grad_norm": 0.9418781399726868, |
|
"learning_rate": 4.955808080808081e-06, |
|
"loss": 0.0092, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 6.83453237410072, |
|
"grad_norm": 0.7677399516105652, |
|
"learning_rate": 4.954545454545455e-06, |
|
"loss": 0.009, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.924460431654676, |
|
"grad_norm": 0.32002565264701843, |
|
"learning_rate": 4.953282828282829e-06, |
|
"loss": 0.0075, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 7.014388489208633, |
|
"grad_norm": 1.0049771070480347, |
|
"learning_rate": 4.952020202020202e-06, |
|
"loss": 0.0071, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 7.10431654676259, |
|
"grad_norm": 0.513941764831543, |
|
"learning_rate": 4.950757575757576e-06, |
|
"loss": 0.0043, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 7.194244604316546, |
|
"grad_norm": 0.8406050205230713, |
|
"learning_rate": 4.94949494949495e-06, |
|
"loss": 0.005, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.194244604316546, |
|
"eval_loss": 0.09395472705364227, |
|
"eval_runtime": 1340.6412, |
|
"eval_samples_per_second": 1.657, |
|
"eval_steps_per_second": 0.104, |
|
"eval_wer": 10.419906687402799, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.284172661870503, |
|
"grad_norm": 0.47227388620376587, |
|
"learning_rate": 4.948232323232323e-06, |
|
"loss": 0.005, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 7.374100719424461, |
|
"grad_norm": 0.2972259819507599, |
|
"learning_rate": 4.946969696969697e-06, |
|
"loss": 0.0047, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 7.4640287769784175, |
|
"grad_norm": 0.580878496170044, |
|
"learning_rate": 4.945707070707071e-06, |
|
"loss": 0.0047, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 7.553956834532374, |
|
"grad_norm": 0.0858689397573471, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.0047, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.643884892086331, |
|
"grad_norm": 0.9921578168869019, |
|
"learning_rate": 4.9431818181818184e-06, |
|
"loss": 0.0049, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 7.733812949640288, |
|
"grad_norm": 0.3222315311431885, |
|
"learning_rate": 4.9419191919191924e-06, |
|
"loss": 0.0039, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 7.823741007194244, |
|
"grad_norm": 0.2401006668806076, |
|
"learning_rate": 4.940656565656566e-06, |
|
"loss": 0.0045, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 7.913669064748201, |
|
"grad_norm": 0.26786544919013977, |
|
"learning_rate": 4.93939393939394e-06, |
|
"loss": 0.0037, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.003597122302159, |
|
"grad_norm": 1.120921015739441, |
|
"learning_rate": 4.938131313131314e-06, |
|
"loss": 0.0048, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 8.093525179856115, |
|
"grad_norm": 0.7425853610038757, |
|
"learning_rate": 4.936868686868687e-06, |
|
"loss": 0.0036, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 8.183453237410072, |
|
"grad_norm": 0.19618873298168182, |
|
"learning_rate": 4.935606060606061e-06, |
|
"loss": 0.0038, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 8.273381294964029, |
|
"grad_norm": 0.41672375798225403, |
|
"learning_rate": 4.934343434343435e-06, |
|
"loss": 0.003, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.363309352517986, |
|
"grad_norm": 0.3363110423088074, |
|
"learning_rate": 4.933080808080809e-06, |
|
"loss": 0.0031, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 8.453237410071942, |
|
"grad_norm": 0.8529962301254272, |
|
"learning_rate": 4.931818181818182e-06, |
|
"loss": 0.0034, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 8.543165467625899, |
|
"grad_norm": 0.15698625147342682, |
|
"learning_rate": 4.930555555555556e-06, |
|
"loss": 0.0033, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 8.633093525179856, |
|
"grad_norm": 0.19619868695735931, |
|
"learning_rate": 4.92929292929293e-06, |
|
"loss": 0.004, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.723021582733812, |
|
"grad_norm": 0.2903304994106293, |
|
"learning_rate": 4.928030303030303e-06, |
|
"loss": 0.0034, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 8.81294964028777, |
|
"grad_norm": 0.5127314329147339, |
|
"learning_rate": 4.926767676767677e-06, |
|
"loss": 0.0035, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 8.902877697841726, |
|
"grad_norm": 1.0652037858963013, |
|
"learning_rate": 4.925505050505051e-06, |
|
"loss": 0.0045, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 8.992805755395683, |
|
"grad_norm": 0.9570706486701965, |
|
"learning_rate": 4.924242424242425e-06, |
|
"loss": 0.0042, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.082733812949641, |
|
"grad_norm": 0.5939081907272339, |
|
"learning_rate": 4.922979797979798e-06, |
|
"loss": 0.0032, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 9.172661870503598, |
|
"grad_norm": 0.25739356875419617, |
|
"learning_rate": 4.921717171717172e-06, |
|
"loss": 0.0038, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 9.262589928057555, |
|
"grad_norm": 0.17940430343151093, |
|
"learning_rate": 4.920454545454546e-06, |
|
"loss": 0.0029, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 9.352517985611511, |
|
"grad_norm": 0.33168259263038635, |
|
"learning_rate": 4.919191919191919e-06, |
|
"loss": 0.0028, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.442446043165468, |
|
"grad_norm": 0.20831653475761414, |
|
"learning_rate": 4.917929292929293e-06, |
|
"loss": 0.002, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 9.532374100719425, |
|
"grad_norm": 0.19978338479995728, |
|
"learning_rate": 4.9166666666666665e-06, |
|
"loss": 0.0025, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 9.622302158273381, |
|
"grad_norm": 0.23154591023921967, |
|
"learning_rate": 4.915404040404041e-06, |
|
"loss": 0.0033, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 9.712230215827338, |
|
"grad_norm": 0.7622235417366028, |
|
"learning_rate": 4.9141414141414145e-06, |
|
"loss": 0.0039, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.802158273381295, |
|
"grad_norm": 0.23092857003211975, |
|
"learning_rate": 4.9128787878787885e-06, |
|
"loss": 0.0044, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 9.892086330935252, |
|
"grad_norm": 0.5034282207489014, |
|
"learning_rate": 4.9116161616161625e-06, |
|
"loss": 0.0035, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 9.982014388489208, |
|
"grad_norm": 0.2582780122756958, |
|
"learning_rate": 4.910353535353536e-06, |
|
"loss": 0.0033, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 10.071942446043165, |
|
"grad_norm": 0.4610576033592224, |
|
"learning_rate": 4.90909090909091e-06, |
|
"loss": 0.0037, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.161870503597122, |
|
"grad_norm": 0.217066690325737, |
|
"learning_rate": 4.907828282828283e-06, |
|
"loss": 0.0028, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 10.251798561151078, |
|
"grad_norm": 0.05713683366775513, |
|
"learning_rate": 4.906565656565658e-06, |
|
"loss": 0.003, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 10.341726618705035, |
|
"grad_norm": 0.5356289148330688, |
|
"learning_rate": 4.905303030303031e-06, |
|
"loss": 0.0018, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 10.431654676258994, |
|
"grad_norm": 0.37969082593917847, |
|
"learning_rate": 4.904040404040405e-06, |
|
"loss": 0.0022, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 10.52158273381295, |
|
"grad_norm": 1.078008770942688, |
|
"learning_rate": 4.902777777777778e-06, |
|
"loss": 0.0032, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 10.611510791366907, |
|
"grad_norm": 0.26670244336128235, |
|
"learning_rate": 4.901515151515152e-06, |
|
"loss": 0.0027, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 10.701438848920864, |
|
"grad_norm": 0.673686683177948, |
|
"learning_rate": 4.900252525252526e-06, |
|
"loss": 0.0029, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 10.79136690647482, |
|
"grad_norm": 0.37779000401496887, |
|
"learning_rate": 4.898989898989899e-06, |
|
"loss": 0.0022, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.79136690647482, |
|
"eval_loss": 0.10011211037635803, |
|
"eval_runtime": 1344.035, |
|
"eval_samples_per_second": 1.653, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 9.049840776123824, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.881294964028777, |
|
"grad_norm": 0.09616148471832275, |
|
"learning_rate": 4.897727272727273e-06, |
|
"loss": 0.0041, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 10.971223021582734, |
|
"grad_norm": 0.8408087491989136, |
|
"learning_rate": 4.896464646464647e-06, |
|
"loss": 0.0046, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 11.06115107913669, |
|
"grad_norm": 0.1868293583393097, |
|
"learning_rate": 4.895202020202021e-06, |
|
"loss": 0.0027, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 11.151079136690647, |
|
"grad_norm": 0.19219942390918732, |
|
"learning_rate": 4.893939393939394e-06, |
|
"loss": 0.0024, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 11.241007194244604, |
|
"grad_norm": 3.7455391883850098, |
|
"learning_rate": 4.892676767676768e-06, |
|
"loss": 0.0027, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 11.33093525179856, |
|
"grad_norm": 0.2693164348602295, |
|
"learning_rate": 4.891414141414142e-06, |
|
"loss": 0.002, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 11.420863309352518, |
|
"grad_norm": 0.8100782632827759, |
|
"learning_rate": 4.890151515151515e-06, |
|
"loss": 0.0033, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 11.510791366906474, |
|
"grad_norm": 0.30300647020339966, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.0025, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.600719424460431, |
|
"grad_norm": 0.49988773465156555, |
|
"learning_rate": 4.887626262626263e-06, |
|
"loss": 0.002, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 11.690647482014388, |
|
"grad_norm": 0.2162599414587021, |
|
"learning_rate": 4.8863636363636365e-06, |
|
"loss": 0.0024, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 11.780575539568346, |
|
"grad_norm": 2.3612468242645264, |
|
"learning_rate": 4.8851010101010105e-06, |
|
"loss": 0.0045, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 11.870503597122303, |
|
"grad_norm": 0.4287119209766388, |
|
"learning_rate": 4.883838383838384e-06, |
|
"loss": 0.0051, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 11.96043165467626, |
|
"grad_norm": 0.46471118927001953, |
|
"learning_rate": 4.8825757575757585e-06, |
|
"loss": 0.0036, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 12.050359712230216, |
|
"grad_norm": 0.4310344159603119, |
|
"learning_rate": 4.881313131313132e-06, |
|
"loss": 0.0031, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 12.140287769784173, |
|
"grad_norm": 0.8054510951042175, |
|
"learning_rate": 4.880050505050506e-06, |
|
"loss": 0.0036, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 12.23021582733813, |
|
"grad_norm": 0.5783084630966187, |
|
"learning_rate": 4.878787878787879e-06, |
|
"loss": 0.0023, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 12.320143884892087, |
|
"grad_norm": 0.1537202149629593, |
|
"learning_rate": 4.877525252525253e-06, |
|
"loss": 0.0031, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 12.410071942446043, |
|
"grad_norm": 0.25773826241493225, |
|
"learning_rate": 4.876262626262627e-06, |
|
"loss": 0.0029, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 1.0221893787384033, |
|
"learning_rate": 4.875e-06, |
|
"loss": 0.003, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 12.589928057553957, |
|
"grad_norm": 0.2363336831331253, |
|
"learning_rate": 4.873737373737374e-06, |
|
"loss": 0.0036, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.679856115107913, |
|
"grad_norm": 0.9339852333068848, |
|
"learning_rate": 4.872474747474748e-06, |
|
"loss": 0.004, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 12.76978417266187, |
|
"grad_norm": 0.6633305549621582, |
|
"learning_rate": 4.871212121212122e-06, |
|
"loss": 0.0032, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 12.859712230215827, |
|
"grad_norm": 0.7261077761650085, |
|
"learning_rate": 4.869949494949495e-06, |
|
"loss": 0.0028, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 12.949640287769784, |
|
"grad_norm": 0.6666585803031921, |
|
"learning_rate": 4.868686868686869e-06, |
|
"loss": 0.0031, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.03956834532374, |
|
"grad_norm": 0.42198774218559265, |
|
"learning_rate": 4.867424242424243e-06, |
|
"loss": 0.0023, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 13.129496402877697, |
|
"grad_norm": 0.1100483238697052, |
|
"learning_rate": 4.866161616161616e-06, |
|
"loss": 0.002, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 13.219424460431656, |
|
"grad_norm": 0.5182665586471558, |
|
"learning_rate": 4.86489898989899e-06, |
|
"loss": 0.003, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 13.309352517985612, |
|
"grad_norm": 0.10821045190095901, |
|
"learning_rate": 4.863636363636364e-06, |
|
"loss": 0.0024, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 13.399280575539569, |
|
"grad_norm": 0.302943617105484, |
|
"learning_rate": 4.862373737373738e-06, |
|
"loss": 0.0022, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 13.489208633093526, |
|
"grad_norm": 0.34953269362449646, |
|
"learning_rate": 4.861111111111111e-06, |
|
"loss": 0.0024, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 13.579136690647482, |
|
"grad_norm": 0.3864242732524872, |
|
"learning_rate": 4.859848484848485e-06, |
|
"loss": 0.0025, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 13.66906474820144, |
|
"grad_norm": 0.23528048396110535, |
|
"learning_rate": 4.858585858585859e-06, |
|
"loss": 0.0028, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 13.758992805755396, |
|
"grad_norm": 0.31728431582450867, |
|
"learning_rate": 4.8573232323232325e-06, |
|
"loss": 0.0041, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 13.848920863309353, |
|
"grad_norm": 0.5803298950195312, |
|
"learning_rate": 4.8560606060606065e-06, |
|
"loss": 0.0028, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 13.93884892086331, |
|
"grad_norm": 0.30145183205604553, |
|
"learning_rate": 4.85479797979798e-06, |
|
"loss": 0.0022, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 14.028776978417266, |
|
"grad_norm": 0.43851757049560547, |
|
"learning_rate": 4.8535353535353545e-06, |
|
"loss": 0.0024, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 14.118705035971223, |
|
"grad_norm": 0.7910506725311279, |
|
"learning_rate": 4.852272727272728e-06, |
|
"loss": 0.0033, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 14.20863309352518, |
|
"grad_norm": 0.3168434500694275, |
|
"learning_rate": 4.851010101010102e-06, |
|
"loss": 0.0028, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 14.298561151079136, |
|
"grad_norm": 0.7242361307144165, |
|
"learning_rate": 4.849747474747475e-06, |
|
"loss": 0.0031, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 14.388489208633093, |
|
"grad_norm": 0.7368125319480896, |
|
"learning_rate": 4.848484848484849e-06, |
|
"loss": 0.0027, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.388489208633093, |
|
"eval_loss": 0.09274967014789581, |
|
"eval_runtime": 1343.7242, |
|
"eval_samples_per_second": 1.654, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 9.375694290157742, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.47841726618705, |
|
"grad_norm": 0.420599102973938, |
|
"learning_rate": 4.847222222222223e-06, |
|
"loss": 0.0028, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 14.568345323741006, |
|
"grad_norm": 0.3025602698326111, |
|
"learning_rate": 4.845959595959596e-06, |
|
"loss": 0.0028, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 14.658273381294965, |
|
"grad_norm": 0.7078948020935059, |
|
"learning_rate": 4.84469696969697e-06, |
|
"loss": 0.003, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 14.748201438848922, |
|
"grad_norm": 0.5534040331840515, |
|
"learning_rate": 4.843434343434344e-06, |
|
"loss": 0.0031, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 14.838129496402878, |
|
"grad_norm": 0.28715190291404724, |
|
"learning_rate": 4.842171717171718e-06, |
|
"loss": 0.0028, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 14.928057553956835, |
|
"grad_norm": 0.5861944556236267, |
|
"learning_rate": 4.840909090909091e-06, |
|
"loss": 0.0028, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 15.017985611510792, |
|
"grad_norm": 0.102662093937397, |
|
"learning_rate": 4.839646464646465e-06, |
|
"loss": 0.0057, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 15.107913669064748, |
|
"grad_norm": 0.15230265259742737, |
|
"learning_rate": 4.838383838383839e-06, |
|
"loss": 0.0023, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 15.197841726618705, |
|
"grad_norm": 0.12530238926410675, |
|
"learning_rate": 4.837121212121212e-06, |
|
"loss": 0.0017, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 15.287769784172662, |
|
"grad_norm": 0.09885858744382858, |
|
"learning_rate": 4.835858585858586e-06, |
|
"loss": 0.0022, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 15.377697841726619, |
|
"grad_norm": 0.1105910986661911, |
|
"learning_rate": 4.83459595959596e-06, |
|
"loss": 0.0026, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 15.467625899280575, |
|
"grad_norm": 0.3952260911464691, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.0021, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 15.557553956834532, |
|
"grad_norm": 0.6049605011940002, |
|
"learning_rate": 4.832070707070707e-06, |
|
"loss": 0.0021, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 15.647482014388489, |
|
"grad_norm": 0.7125779986381531, |
|
"learning_rate": 4.830808080808081e-06, |
|
"loss": 0.0015, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 15.737410071942445, |
|
"grad_norm": 0.16274645924568176, |
|
"learning_rate": 4.829545454545455e-06, |
|
"loss": 0.0019, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 15.827338129496402, |
|
"grad_norm": 0.6492106318473816, |
|
"learning_rate": 4.8282828282828285e-06, |
|
"loss": 0.0019, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.917266187050359, |
|
"grad_norm": 0.9411545991897583, |
|
"learning_rate": 4.8270202020202025e-06, |
|
"loss": 0.003, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 16.007194244604317, |
|
"grad_norm": 0.03323192521929741, |
|
"learning_rate": 4.8257575757575765e-06, |
|
"loss": 0.0018, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 16.097122302158272, |
|
"grad_norm": 0.1154596135020256, |
|
"learning_rate": 4.82449494949495e-06, |
|
"loss": 0.0015, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 16.18705035971223, |
|
"grad_norm": 0.41669028997421265, |
|
"learning_rate": 4.823232323232324e-06, |
|
"loss": 0.0016, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.276978417266186, |
|
"grad_norm": 0.25636962056159973, |
|
"learning_rate": 4.821969696969697e-06, |
|
"loss": 0.0014, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 16.366906474820144, |
|
"grad_norm": 3.250777244567871, |
|
"learning_rate": 4.820707070707072e-06, |
|
"loss": 0.0027, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 16.4568345323741, |
|
"grad_norm": 1.1029988527297974, |
|
"learning_rate": 4.819444444444445e-06, |
|
"loss": 0.0028, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 16.546762589928058, |
|
"grad_norm": 0.3530588150024414, |
|
"learning_rate": 4.818181818181819e-06, |
|
"loss": 0.0015, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 16.636690647482013, |
|
"grad_norm": 0.0861181914806366, |
|
"learning_rate": 4.816919191919192e-06, |
|
"loss": 0.0023, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 16.72661870503597, |
|
"grad_norm": 0.44006574153900146, |
|
"learning_rate": 4.815656565656566e-06, |
|
"loss": 0.0021, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 16.81654676258993, |
|
"grad_norm": 0.9688239097595215, |
|
"learning_rate": 4.81439393939394e-06, |
|
"loss": 0.0014, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 16.906474820143885, |
|
"grad_norm": 0.848913311958313, |
|
"learning_rate": 4.813131313131313e-06, |
|
"loss": 0.0021, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 16.996402877697843, |
|
"grad_norm": 0.14554986357688904, |
|
"learning_rate": 4.811868686868687e-06, |
|
"loss": 0.0013, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 17.086330935251798, |
|
"grad_norm": 0.31808871030807495, |
|
"learning_rate": 4.810606060606061e-06, |
|
"loss": 0.0019, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 17.176258992805757, |
|
"grad_norm": 0.2081349641084671, |
|
"learning_rate": 4.809343434343435e-06, |
|
"loss": 0.0018, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 17.26618705035971, |
|
"grad_norm": 0.0817071720957756, |
|
"learning_rate": 4.808080808080808e-06, |
|
"loss": 0.0011, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 17.35611510791367, |
|
"grad_norm": 0.148326575756073, |
|
"learning_rate": 4.806818181818182e-06, |
|
"loss": 0.0011, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 17.446043165467625, |
|
"grad_norm": 1.1114903688430786, |
|
"learning_rate": 4.805555555555556e-06, |
|
"loss": 0.0012, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 17.535971223021583, |
|
"grad_norm": 0.5132379531860352, |
|
"learning_rate": 4.804292929292929e-06, |
|
"loss": 0.0015, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 17.62589928057554, |
|
"grad_norm": 0.5439797043800354, |
|
"learning_rate": 4.803030303030303e-06, |
|
"loss": 0.0019, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 17.715827338129497, |
|
"grad_norm": 0.4897061586380005, |
|
"learning_rate": 4.801767676767677e-06, |
|
"loss": 0.0022, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 17.805755395683452, |
|
"grad_norm": 0.13605351746082306, |
|
"learning_rate": 4.800505050505051e-06, |
|
"loss": 0.0017, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 17.89568345323741, |
|
"grad_norm": 0.6285837888717651, |
|
"learning_rate": 4.7992424242424245e-06, |
|
"loss": 0.0014, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 17.985611510791365, |
|
"grad_norm": 0.04884183779358864, |
|
"learning_rate": 4.7979797979797985e-06, |
|
"loss": 0.0011, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.985611510791365, |
|
"eval_loss": 0.09266538918018341, |
|
"eval_runtime": 1344.6458, |
|
"eval_samples_per_second": 1.652, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 8.835073687328741, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.075539568345324, |
|
"grad_norm": 0.036710768938064575, |
|
"learning_rate": 4.7967171717171725e-06, |
|
"loss": 0.0024, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 18.165467625899282, |
|
"grad_norm": 0.41920551657676697, |
|
"learning_rate": 4.795454545454546e-06, |
|
"loss": 0.0011, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 18.255395683453237, |
|
"grad_norm": 0.2354598492383957, |
|
"learning_rate": 4.79419191919192e-06, |
|
"loss": 0.0018, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 18.345323741007196, |
|
"grad_norm": 0.4095918536186218, |
|
"learning_rate": 4.792929292929293e-06, |
|
"loss": 0.0015, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 18.43525179856115, |
|
"grad_norm": 0.03964778780937195, |
|
"learning_rate": 4.791666666666668e-06, |
|
"loss": 0.0019, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 18.52517985611511, |
|
"grad_norm": 0.9322590827941895, |
|
"learning_rate": 4.790404040404041e-06, |
|
"loss": 0.0014, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 18.615107913669064, |
|
"grad_norm": 0.11062884330749512, |
|
"learning_rate": 4.789141414141415e-06, |
|
"loss": 0.0015, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 18.705035971223023, |
|
"grad_norm": 0.4186955690383911, |
|
"learning_rate": 4.787878787878788e-06, |
|
"loss": 0.0013, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 18.794964028776977, |
|
"grad_norm": 0.40554943680763245, |
|
"learning_rate": 4.786616161616162e-06, |
|
"loss": 0.0017, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 18.884892086330936, |
|
"grad_norm": 0.4156556725502014, |
|
"learning_rate": 4.785353535353536e-06, |
|
"loss": 0.0016, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 18.97482014388489, |
|
"grad_norm": 0.8705348968505859, |
|
"learning_rate": 4.784090909090909e-06, |
|
"loss": 0.003, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 19.06474820143885, |
|
"grad_norm": 0.47541674971580505, |
|
"learning_rate": 4.782828282828283e-06, |
|
"loss": 0.0026, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 19.154676258992804, |
|
"grad_norm": 0.3221082389354706, |
|
"learning_rate": 4.781565656565657e-06, |
|
"loss": 0.0014, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 19.244604316546763, |
|
"grad_norm": 0.26767319440841675, |
|
"learning_rate": 4.78030303030303e-06, |
|
"loss": 0.0015, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 19.334532374100718, |
|
"grad_norm": 0.41984379291534424, |
|
"learning_rate": 4.779040404040404e-06, |
|
"loss": 0.0026, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 19.424460431654676, |
|
"grad_norm": 0.6067033410072327, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.0031, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 19.514388489208635, |
|
"grad_norm": 0.23113247752189636, |
|
"learning_rate": 4.776515151515152e-06, |
|
"loss": 0.0027, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 19.60431654676259, |
|
"grad_norm": 0.7052062153816223, |
|
"learning_rate": 4.775252525252525e-06, |
|
"loss": 0.0038, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 19.694244604316548, |
|
"grad_norm": 1.4232673645019531, |
|
"learning_rate": 4.773989898989899e-06, |
|
"loss": 0.0024, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 19.784172661870503, |
|
"grad_norm": 0.12078073620796204, |
|
"learning_rate": 4.772727272727273e-06, |
|
"loss": 0.0014, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 19.87410071942446, |
|
"grad_norm": 1.296155333518982, |
|
"learning_rate": 4.7714646464646465e-06, |
|
"loss": 0.0028, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 19.964028776978417, |
|
"grad_norm": 0.4774380922317505, |
|
"learning_rate": 4.7702020202020205e-06, |
|
"loss": 0.0039, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 20.053956834532375, |
|
"grad_norm": 0.7243533134460449, |
|
"learning_rate": 4.768939393939394e-06, |
|
"loss": 0.0038, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 20.14388489208633, |
|
"grad_norm": 0.03761635348200798, |
|
"learning_rate": 4.7676767676767685e-06, |
|
"loss": 0.0028, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 20.23381294964029, |
|
"grad_norm": 0.3167934715747833, |
|
"learning_rate": 4.766414141414142e-06, |
|
"loss": 0.0023, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 20.323741007194243, |
|
"grad_norm": 0.08072912693023682, |
|
"learning_rate": 4.765151515151516e-06, |
|
"loss": 0.0021, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 20.413669064748202, |
|
"grad_norm": 0.0809144377708435, |
|
"learning_rate": 4.763888888888889e-06, |
|
"loss": 0.0033, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 20.503597122302157, |
|
"grad_norm": 0.021725259721279144, |
|
"learning_rate": 4.762626262626263e-06, |
|
"loss": 0.0022, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 20.593525179856115, |
|
"grad_norm": 0.79271399974823, |
|
"learning_rate": 4.761363636363637e-06, |
|
"loss": 0.0015, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 20.68345323741007, |
|
"grad_norm": 0.10382846742868423, |
|
"learning_rate": 4.76010101010101e-06, |
|
"loss": 0.0019, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 20.77338129496403, |
|
"grad_norm": 0.03259812295436859, |
|
"learning_rate": 4.758838383838385e-06, |
|
"loss": 0.002, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 20.863309352517987, |
|
"grad_norm": 0.6223962306976318, |
|
"learning_rate": 4.757575757575758e-06, |
|
"loss": 0.0036, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 20.953237410071942, |
|
"grad_norm": 1.0351557731628418, |
|
"learning_rate": 4.756313131313132e-06, |
|
"loss": 0.0022, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 21.0431654676259, |
|
"grad_norm": 0.8662335276603699, |
|
"learning_rate": 4.755050505050505e-06, |
|
"loss": 0.0028, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 21.133093525179856, |
|
"grad_norm": 0.13104894757270813, |
|
"learning_rate": 4.753787878787879e-06, |
|
"loss": 0.0028, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 21.223021582733814, |
|
"grad_norm": 0.8010006546974182, |
|
"learning_rate": 4.752525252525253e-06, |
|
"loss": 0.0021, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 21.31294964028777, |
|
"grad_norm": 0.7761834263801575, |
|
"learning_rate": 4.751262626262626e-06, |
|
"loss": 0.0035, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 21.402877697841728, |
|
"grad_norm": 0.05642890930175781, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.0015, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 21.492805755395683, |
|
"grad_norm": 0.2215975672006607, |
|
"learning_rate": 4.748737373737374e-06, |
|
"loss": 0.0011, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 21.58273381294964, |
|
"grad_norm": 0.5649552345275879, |
|
"learning_rate": 4.747474747474748e-06, |
|
"loss": 0.0017, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 21.58273381294964, |
|
"eval_loss": 0.08750007301568985, |
|
"eval_runtime": 1349.1716, |
|
"eval_samples_per_second": 1.647, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 7.657557579797082, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 21.672661870503596, |
|
"grad_norm": 0.3567905128002167, |
|
"learning_rate": 4.746212121212121e-06, |
|
"loss": 0.0023, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 21.762589928057555, |
|
"grad_norm": 0.7165196537971497, |
|
"learning_rate": 4.744949494949495e-06, |
|
"loss": 0.0019, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 21.85251798561151, |
|
"grad_norm": 0.9009844660758972, |
|
"learning_rate": 4.743686868686869e-06, |
|
"loss": 0.0022, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 21.942446043165468, |
|
"grad_norm": 0.7037338614463806, |
|
"learning_rate": 4.7424242424242426e-06, |
|
"loss": 0.0026, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 22.032374100719423, |
|
"grad_norm": 0.2905846834182739, |
|
"learning_rate": 4.7411616161616166e-06, |
|
"loss": 0.002, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 22.12230215827338, |
|
"grad_norm": 0.7335506677627563, |
|
"learning_rate": 4.7398989898989905e-06, |
|
"loss": 0.0019, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 22.21223021582734, |
|
"grad_norm": 0.3520030677318573, |
|
"learning_rate": 4.7386363636363645e-06, |
|
"loss": 0.0016, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 22.302158273381295, |
|
"grad_norm": 0.3580196797847748, |
|
"learning_rate": 4.737373737373738e-06, |
|
"loss": 0.0014, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 22.392086330935253, |
|
"grad_norm": 0.19062575697898865, |
|
"learning_rate": 4.736111111111112e-06, |
|
"loss": 0.002, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 22.48201438848921, |
|
"grad_norm": 0.6567767858505249, |
|
"learning_rate": 4.734848484848486e-06, |
|
"loss": 0.0021, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 22.571942446043167, |
|
"grad_norm": 0.24819691479206085, |
|
"learning_rate": 4.733585858585859e-06, |
|
"loss": 0.0019, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 22.66187050359712, |
|
"grad_norm": 0.47786185145378113, |
|
"learning_rate": 4.732323232323233e-06, |
|
"loss": 0.0014, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 22.75179856115108, |
|
"grad_norm": 0.05066821351647377, |
|
"learning_rate": 4.731060606060606e-06, |
|
"loss": 0.0018, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 22.841726618705035, |
|
"grad_norm": 0.33751770853996277, |
|
"learning_rate": 4.72979797979798e-06, |
|
"loss": 0.0028, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 22.931654676258994, |
|
"grad_norm": 0.03158155083656311, |
|
"learning_rate": 4.728535353535354e-06, |
|
"loss": 0.0013, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 23.02158273381295, |
|
"grad_norm": 0.05814801901578903, |
|
"learning_rate": 4.727272727272728e-06, |
|
"loss": 0.0021, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 23.111510791366907, |
|
"grad_norm": 0.031183883547782898, |
|
"learning_rate": 4.726010101010101e-06, |
|
"loss": 0.0011, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 23.201438848920862, |
|
"grad_norm": 0.539813756942749, |
|
"learning_rate": 4.724747474747475e-06, |
|
"loss": 0.0009, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 23.29136690647482, |
|
"grad_norm": 0.14558178186416626, |
|
"learning_rate": 4.723484848484849e-06, |
|
"loss": 0.0018, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 23.381294964028775, |
|
"grad_norm": 0.10804769396781921, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.0013, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 23.471223021582734, |
|
"grad_norm": 0.3211396038532257, |
|
"learning_rate": 4.720959595959596e-06, |
|
"loss": 0.0015, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 23.56115107913669, |
|
"grad_norm": 0.16721013188362122, |
|
"learning_rate": 4.71969696969697e-06, |
|
"loss": 0.0027, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 23.651079136690647, |
|
"grad_norm": 0.3473891019821167, |
|
"learning_rate": 4.7184343434343434e-06, |
|
"loss": 0.0014, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 23.741007194244606, |
|
"grad_norm": 0.04464249685406685, |
|
"learning_rate": 4.717171717171717e-06, |
|
"loss": 0.0013, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 23.83093525179856, |
|
"grad_norm": 0.21577273309230804, |
|
"learning_rate": 4.715909090909091e-06, |
|
"loss": 0.0025, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 23.92086330935252, |
|
"grad_norm": 1.0553650856018066, |
|
"learning_rate": 4.714646464646465e-06, |
|
"loss": 0.0012, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 24.010791366906474, |
|
"grad_norm": 0.015737203881144524, |
|
"learning_rate": 4.7133838383838386e-06, |
|
"loss": 0.0018, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 24.100719424460433, |
|
"grad_norm": 0.08808793127536774, |
|
"learning_rate": 4.7121212121212126e-06, |
|
"loss": 0.0008, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 24.190647482014388, |
|
"grad_norm": 0.01893734373152256, |
|
"learning_rate": 4.7108585858585866e-06, |
|
"loss": 0.0008, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 24.280575539568346, |
|
"grad_norm": 0.032726775854825974, |
|
"learning_rate": 4.70959595959596e-06, |
|
"loss": 0.0011, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 24.3705035971223, |
|
"grad_norm": 1.2210007905960083, |
|
"learning_rate": 4.708333333333334e-06, |
|
"loss": 0.0014, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 24.46043165467626, |
|
"grad_norm": 0.21317902207374573, |
|
"learning_rate": 4.707070707070707e-06, |
|
"loss": 0.0008, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 24.550359712230215, |
|
"grad_norm": 0.02254541404545307, |
|
"learning_rate": 4.705808080808082e-06, |
|
"loss": 0.0008, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 24.640287769784173, |
|
"grad_norm": 0.19283901154994965, |
|
"learning_rate": 4.704545454545455e-06, |
|
"loss": 0.0006, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 24.730215827338128, |
|
"grad_norm": 0.1615646928548813, |
|
"learning_rate": 4.703282828282829e-06, |
|
"loss": 0.0011, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 24.820143884892087, |
|
"grad_norm": 0.04525255784392357, |
|
"learning_rate": 4.702020202020202e-06, |
|
"loss": 0.0006, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 24.91007194244604, |
|
"grad_norm": 0.17892493307590485, |
|
"learning_rate": 4.700757575757576e-06, |
|
"loss": 0.0011, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 1.5881894826889038, |
|
"learning_rate": 4.69949494949495e-06, |
|
"loss": 0.0009, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 25.08992805755396, |
|
"grad_norm": 0.028072576969861984, |
|
"learning_rate": 4.698232323232323e-06, |
|
"loss": 0.001, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 25.179856115107913, |
|
"grad_norm": 0.034753262996673584, |
|
"learning_rate": 4.696969696969698e-06, |
|
"loss": 0.001, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 25.179856115107913, |
|
"eval_loss": 0.08996064960956573, |
|
"eval_runtime": 1372.6865, |
|
"eval_samples_per_second": 1.619, |
|
"eval_steps_per_second": 0.101, |
|
"eval_wer": 6.591127897504258, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 25.269784172661872, |
|
"grad_norm": 0.047846052795648575, |
|
"learning_rate": 4.695707070707071e-06, |
|
"loss": 0.0017, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 25.359712230215827, |
|
"grad_norm": 0.08721514046192169, |
|
"learning_rate": 4.694444444444445e-06, |
|
"loss": 0.0012, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 25.449640287769785, |
|
"grad_norm": 0.488505095243454, |
|
"learning_rate": 4.693181818181818e-06, |
|
"loss": 0.001, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 25.53956834532374, |
|
"grad_norm": 0.3541705012321472, |
|
"learning_rate": 4.691919191919192e-06, |
|
"loss": 0.0009, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 25.6294964028777, |
|
"grad_norm": 1.2867228984832764, |
|
"learning_rate": 4.690656565656566e-06, |
|
"loss": 0.0009, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 25.719424460431654, |
|
"grad_norm": 0.06602492183446884, |
|
"learning_rate": 4.6893939393939394e-06, |
|
"loss": 0.001, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 25.809352517985612, |
|
"grad_norm": 0.03555336222052574, |
|
"learning_rate": 4.6881313131313134e-06, |
|
"loss": 0.0016, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 25.899280575539567, |
|
"grad_norm": 0.1011524349451065, |
|
"learning_rate": 4.6868686868686874e-06, |
|
"loss": 0.0028, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 25.989208633093526, |
|
"grad_norm": 0.14894358813762665, |
|
"learning_rate": 4.6856060606060614e-06, |
|
"loss": 0.0026, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 26.07913669064748, |
|
"grad_norm": 0.944786787033081, |
|
"learning_rate": 4.684343434343435e-06, |
|
"loss": 0.0014, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 26.16906474820144, |
|
"grad_norm": 0.4678920805454254, |
|
"learning_rate": 4.683080808080809e-06, |
|
"loss": 0.0016, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 26.258992805755394, |
|
"grad_norm": 0.0241763386875391, |
|
"learning_rate": 4.681818181818183e-06, |
|
"loss": 0.0018, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 26.348920863309353, |
|
"grad_norm": 0.1959693878889084, |
|
"learning_rate": 4.680555555555556e-06, |
|
"loss": 0.0014, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 26.43884892086331, |
|
"grad_norm": 0.05353585258126259, |
|
"learning_rate": 4.67929292929293e-06, |
|
"loss": 0.001, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 26.528776978417266, |
|
"grad_norm": 0.022708551958203316, |
|
"learning_rate": 4.678030303030303e-06, |
|
"loss": 0.0008, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 26.618705035971225, |
|
"grad_norm": 0.28148502111434937, |
|
"learning_rate": 4.676767676767677e-06, |
|
"loss": 0.0012, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 26.70863309352518, |
|
"grad_norm": 0.0556604228913784, |
|
"learning_rate": 4.675505050505051e-06, |
|
"loss": 0.0018, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 26.798561151079138, |
|
"grad_norm": 0.03789166733622551, |
|
"learning_rate": 4.674242424242425e-06, |
|
"loss": 0.0008, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 26.888489208633093, |
|
"grad_norm": 0.18029791116714478, |
|
"learning_rate": 4.672979797979799e-06, |
|
"loss": 0.001, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 26.97841726618705, |
|
"grad_norm": 0.27599871158599854, |
|
"learning_rate": 4.671717171717172e-06, |
|
"loss": 0.0008, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 27.068345323741006, |
|
"grad_norm": 0.4067777693271637, |
|
"learning_rate": 4.670454545454546e-06, |
|
"loss": 0.0017, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 27.158273381294965, |
|
"grad_norm": 0.36876606941223145, |
|
"learning_rate": 4.669191919191919e-06, |
|
"loss": 0.0011, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 27.24820143884892, |
|
"grad_norm": 0.2605381906032562, |
|
"learning_rate": 4.667929292929293e-06, |
|
"loss": 0.0014, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 27.33812949640288, |
|
"grad_norm": 0.02853270247578621, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.0008, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 27.428057553956833, |
|
"grad_norm": 0.055020011961460114, |
|
"learning_rate": 4.66540404040404e-06, |
|
"loss": 0.0009, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 27.51798561151079, |
|
"grad_norm": 0.30874237418174744, |
|
"learning_rate": 4.664141414141414e-06, |
|
"loss": 0.0018, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 27.607913669064747, |
|
"grad_norm": 0.09795974940061569, |
|
"learning_rate": 4.662878787878788e-06, |
|
"loss": 0.0014, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 27.697841726618705, |
|
"grad_norm": 0.04705384001135826, |
|
"learning_rate": 4.661616161616162e-06, |
|
"loss": 0.0015, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 27.78776978417266, |
|
"grad_norm": 0.058379877358675, |
|
"learning_rate": 4.6603535353535355e-06, |
|
"loss": 0.0008, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 27.87769784172662, |
|
"grad_norm": 0.047014497220516205, |
|
"learning_rate": 4.6590909090909095e-06, |
|
"loss": 0.0016, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 27.967625899280577, |
|
"grad_norm": 0.6353835463523865, |
|
"learning_rate": 4.6578282828282835e-06, |
|
"loss": 0.0012, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 28.057553956834532, |
|
"grad_norm": 0.13249577581882477, |
|
"learning_rate": 4.656565656565657e-06, |
|
"loss": 0.0007, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 28.14748201438849, |
|
"grad_norm": 0.16413046419620514, |
|
"learning_rate": 4.655303030303031e-06, |
|
"loss": 0.0009, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 28.237410071942445, |
|
"grad_norm": 0.21356362104415894, |
|
"learning_rate": 4.654040404040405e-06, |
|
"loss": 0.0007, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 28.327338129496404, |
|
"grad_norm": 0.0190277099609375, |
|
"learning_rate": 4.652777777777779e-06, |
|
"loss": 0.0007, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 28.41726618705036, |
|
"grad_norm": 0.12108524143695831, |
|
"learning_rate": 4.651515151515152e-06, |
|
"loss": 0.0009, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 28.507194244604317, |
|
"grad_norm": 0.026057908311486244, |
|
"learning_rate": 4.650252525252526e-06, |
|
"loss": 0.0007, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 28.597122302158272, |
|
"grad_norm": 0.09515079110860825, |
|
"learning_rate": 4.6489898989899e-06, |
|
"loss": 0.0008, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 28.68705035971223, |
|
"grad_norm": 0.48142778873443604, |
|
"learning_rate": 4.647727272727273e-06, |
|
"loss": 0.0007, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 28.776978417266186, |
|
"grad_norm": 0.46795013546943665, |
|
"learning_rate": 4.646464646464647e-06, |
|
"loss": 0.0014, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 28.776978417266186, |
|
"eval_loss": 0.09178629517555237, |
|
"eval_runtime": 1347.1747, |
|
"eval_samples_per_second": 1.649, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 7.139154262015849, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 28.866906474820144, |
|
"grad_norm": 0.5243809223175049, |
|
"learning_rate": 4.64520202020202e-06, |
|
"loss": 0.0007, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 28.9568345323741, |
|
"grad_norm": 0.3461306095123291, |
|
"learning_rate": 4.643939393939395e-06, |
|
"loss": 0.001, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 29.046762589928058, |
|
"grad_norm": 0.2795426845550537, |
|
"learning_rate": 4.642676767676768e-06, |
|
"loss": 0.0014, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 29.136690647482013, |
|
"grad_norm": 0.05419691279530525, |
|
"learning_rate": 4.641414141414142e-06, |
|
"loss": 0.0014, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 29.22661870503597, |
|
"grad_norm": 0.08857329189777374, |
|
"learning_rate": 4.640151515151515e-06, |
|
"loss": 0.0016, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 29.31654676258993, |
|
"grad_norm": 0.05129173770546913, |
|
"learning_rate": 4.638888888888889e-06, |
|
"loss": 0.0011, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 29.406474820143885, |
|
"grad_norm": 1.0032382011413574, |
|
"learning_rate": 4.637626262626263e-06, |
|
"loss": 0.0023, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 29.496402877697843, |
|
"grad_norm": 0.4335207939147949, |
|
"learning_rate": 4.636363636363636e-06, |
|
"loss": 0.0028, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 29.586330935251798, |
|
"grad_norm": 0.15561847388744354, |
|
"learning_rate": 4.63510101010101e-06, |
|
"loss": 0.0028, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 29.676258992805757, |
|
"grad_norm": 0.24305035173892975, |
|
"learning_rate": 4.633838383838384e-06, |
|
"loss": 0.0024, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 29.76618705035971, |
|
"grad_norm": 1.3689900636672974, |
|
"learning_rate": 4.632575757575758e-06, |
|
"loss": 0.0036, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 29.85611510791367, |
|
"grad_norm": 0.6511125564575195, |
|
"learning_rate": 4.6313131313131315e-06, |
|
"loss": 0.0025, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 29.946043165467625, |
|
"grad_norm": 0.8534782528877258, |
|
"learning_rate": 4.6300505050505055e-06, |
|
"loss": 0.0029, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 30.035971223021583, |
|
"grad_norm": 0.3412608504295349, |
|
"learning_rate": 4.6287878787878795e-06, |
|
"loss": 0.0028, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 30.12589928057554, |
|
"grad_norm": 0.16232311725616455, |
|
"learning_rate": 4.627525252525253e-06, |
|
"loss": 0.0023, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 30.215827338129497, |
|
"grad_norm": 0.08357956260442734, |
|
"learning_rate": 4.626262626262627e-06, |
|
"loss": 0.0019, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 30.305755395683452, |
|
"grad_norm": 0.412728488445282, |
|
"learning_rate": 4.625000000000001e-06, |
|
"loss": 0.0015, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 30.39568345323741, |
|
"grad_norm": 0.9784059524536133, |
|
"learning_rate": 4.623737373737375e-06, |
|
"loss": 0.0025, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 30.485611510791365, |
|
"grad_norm": 0.38275232911109924, |
|
"learning_rate": 4.622474747474748e-06, |
|
"loss": 0.0016, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 30.575539568345324, |
|
"grad_norm": 0.3518912196159363, |
|
"learning_rate": 4.621212121212122e-06, |
|
"loss": 0.0024, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 30.665467625899282, |
|
"grad_norm": 0.8633609414100647, |
|
"learning_rate": 4.619949494949496e-06, |
|
"loss": 0.0022, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 30.755395683453237, |
|
"grad_norm": 0.23257087171077728, |
|
"learning_rate": 4.618686868686869e-06, |
|
"loss": 0.0016, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 30.845323741007196, |
|
"grad_norm": 1.2157853841781616, |
|
"learning_rate": 4.617424242424243e-06, |
|
"loss": 0.0013, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 30.93525179856115, |
|
"grad_norm": 0.6692176461219788, |
|
"learning_rate": 4.616161616161616e-06, |
|
"loss": 0.0025, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 31.02517985611511, |
|
"grad_norm": 0.08320923149585724, |
|
"learning_rate": 4.61489898989899e-06, |
|
"loss": 0.0015, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 31.115107913669064, |
|
"grad_norm": 0.03867033123970032, |
|
"learning_rate": 4.613636363636364e-06, |
|
"loss": 0.0011, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 31.205035971223023, |
|
"grad_norm": 0.37571918964385986, |
|
"learning_rate": 4.612373737373737e-06, |
|
"loss": 0.002, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 31.294964028776977, |
|
"grad_norm": 0.023200325667858124, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.0017, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 31.384892086330936, |
|
"grad_norm": 0.025962859392166138, |
|
"learning_rate": 4.609848484848485e-06, |
|
"loss": 0.0025, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 31.47482014388489, |
|
"grad_norm": 0.07832462340593338, |
|
"learning_rate": 4.608585858585859e-06, |
|
"loss": 0.002, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 31.56474820143885, |
|
"grad_norm": 0.5365622043609619, |
|
"learning_rate": 4.607323232323232e-06, |
|
"loss": 0.0019, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 31.654676258992804, |
|
"grad_norm": 0.042796701192855835, |
|
"learning_rate": 4.606060606060606e-06, |
|
"loss": 0.0012, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 31.744604316546763, |
|
"grad_norm": 0.2298709750175476, |
|
"learning_rate": 4.60479797979798e-06, |
|
"loss": 0.0015, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 31.834532374100718, |
|
"grad_norm": 0.7432539463043213, |
|
"learning_rate": 4.6035353535353535e-06, |
|
"loss": 0.002, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 31.924460431654676, |
|
"grad_norm": 0.05896187201142311, |
|
"learning_rate": 4.6022727272727275e-06, |
|
"loss": 0.0017, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 32.014388489208635, |
|
"grad_norm": 0.6994006633758545, |
|
"learning_rate": 4.6010101010101015e-06, |
|
"loss": 0.0019, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 32.10431654676259, |
|
"grad_norm": 0.6547738909721375, |
|
"learning_rate": 4.5997474747474755e-06, |
|
"loss": 0.0016, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 32.194244604316545, |
|
"grad_norm": 0.13888348639011383, |
|
"learning_rate": 4.598484848484849e-06, |
|
"loss": 0.0014, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 32.28417266187051, |
|
"grad_norm": 0.09715843945741653, |
|
"learning_rate": 4.597222222222223e-06, |
|
"loss": 0.001, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 32.37410071942446, |
|
"grad_norm": 0.05904947221279144, |
|
"learning_rate": 4.595959595959597e-06, |
|
"loss": 0.0014, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 32.37410071942446, |
|
"eval_loss": 0.08943528681993484, |
|
"eval_runtime": 1353.2653, |
|
"eval_samples_per_second": 1.642, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.739243131156039, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 32.46402877697842, |
|
"grad_norm": 1.000013828277588, |
|
"learning_rate": 4.59469696969697e-06, |
|
"loss": 0.0012, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 32.55395683453237, |
|
"grad_norm": 0.031857941299676895, |
|
"learning_rate": 4.593434343434344e-06, |
|
"loss": 0.0011, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 32.643884892086334, |
|
"grad_norm": 0.18854251503944397, |
|
"learning_rate": 4.592171717171717e-06, |
|
"loss": 0.0011, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 32.73381294964029, |
|
"grad_norm": 0.06311248987913132, |
|
"learning_rate": 4.590909090909092e-06, |
|
"loss": 0.0009, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 32.82374100719424, |
|
"grad_norm": 0.02462015673518181, |
|
"learning_rate": 4.589646464646465e-06, |
|
"loss": 0.0023, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 32.9136690647482, |
|
"grad_norm": 0.5756279826164246, |
|
"learning_rate": 4.588383838383839e-06, |
|
"loss": 0.0014, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 33.00359712230216, |
|
"grad_norm": 0.39254868030548096, |
|
"learning_rate": 4.587121212121213e-06, |
|
"loss": 0.0012, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 33.093525179856115, |
|
"grad_norm": 0.05750317871570587, |
|
"learning_rate": 4.585858585858586e-06, |
|
"loss": 0.0016, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 33.18345323741007, |
|
"grad_norm": 0.456665962934494, |
|
"learning_rate": 4.58459595959596e-06, |
|
"loss": 0.0005, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 33.273381294964025, |
|
"grad_norm": 0.05247064307332039, |
|
"learning_rate": 4.583333333333333e-06, |
|
"loss": 0.0007, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 33.36330935251799, |
|
"grad_norm": 0.1745249629020691, |
|
"learning_rate": 4.582070707070708e-06, |
|
"loss": 0.0011, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 33.45323741007194, |
|
"grad_norm": 0.1702817678451538, |
|
"learning_rate": 4.580808080808081e-06, |
|
"loss": 0.0011, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 33.5431654676259, |
|
"grad_norm": 0.5600733757019043, |
|
"learning_rate": 4.579545454545455e-06, |
|
"loss": 0.0017, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 33.63309352517986, |
|
"grad_norm": 0.042534805834293365, |
|
"learning_rate": 4.578282828282828e-06, |
|
"loss": 0.002, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 33.723021582733814, |
|
"grad_norm": 0.025305964052677155, |
|
"learning_rate": 4.577020202020202e-06, |
|
"loss": 0.0014, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 33.81294964028777, |
|
"grad_norm": 0.05213531106710434, |
|
"learning_rate": 4.575757575757576e-06, |
|
"loss": 0.001, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 33.902877697841724, |
|
"grad_norm": 0.02446218766272068, |
|
"learning_rate": 4.5744949494949495e-06, |
|
"loss": 0.0006, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 33.992805755395686, |
|
"grad_norm": 0.009959193877875805, |
|
"learning_rate": 4.5732323232323235e-06, |
|
"loss": 0.0009, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 34.08273381294964, |
|
"grad_norm": 0.4287709891796112, |
|
"learning_rate": 4.5719696969696975e-06, |
|
"loss": 0.0007, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 34.172661870503596, |
|
"grad_norm": 0.011952442117035389, |
|
"learning_rate": 4.5707070707070715e-06, |
|
"loss": 0.0004, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 34.26258992805755, |
|
"grad_norm": 0.1948029100894928, |
|
"learning_rate": 4.569444444444445e-06, |
|
"loss": 0.0007, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 34.35251798561151, |
|
"grad_norm": 0.03538801521062851, |
|
"learning_rate": 4.568181818181819e-06, |
|
"loss": 0.0007, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 34.44244604316547, |
|
"grad_norm": 0.03204001113772392, |
|
"learning_rate": 4.566919191919193e-06, |
|
"loss": 0.0006, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 34.53237410071942, |
|
"grad_norm": 0.12747210264205933, |
|
"learning_rate": 4.565656565656566e-06, |
|
"loss": 0.0008, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 34.62230215827338, |
|
"grad_norm": 0.009002352133393288, |
|
"learning_rate": 4.56439393939394e-06, |
|
"loss": 0.0003, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 34.71223021582734, |
|
"grad_norm": 0.057965803891420364, |
|
"learning_rate": 4.563131313131314e-06, |
|
"loss": 0.0009, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 34.802158273381295, |
|
"grad_norm": 0.07385562360286713, |
|
"learning_rate": 4.561868686868687e-06, |
|
"loss": 0.0007, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 34.89208633093525, |
|
"grad_norm": 0.010685013607144356, |
|
"learning_rate": 4.560606060606061e-06, |
|
"loss": 0.0008, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 34.98201438848921, |
|
"grad_norm": 0.038797181099653244, |
|
"learning_rate": 4.559343434343435e-06, |
|
"loss": 0.0003, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 35.07194244604317, |
|
"grad_norm": 0.016779489815235138, |
|
"learning_rate": 4.558080808080809e-06, |
|
"loss": 0.0011, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 35.16187050359712, |
|
"grad_norm": 0.01562959887087345, |
|
"learning_rate": 4.556818181818182e-06, |
|
"loss": 0.0007, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 35.25179856115108, |
|
"grad_norm": 0.025731824338436127, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.0005, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 35.34172661870504, |
|
"grad_norm": 0.0950327217578888, |
|
"learning_rate": 4.554292929292929e-06, |
|
"loss": 0.0014, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 35.431654676258994, |
|
"grad_norm": 0.015813730657100677, |
|
"learning_rate": 4.553030303030303e-06, |
|
"loss": 0.0011, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 35.52158273381295, |
|
"grad_norm": 0.07395196706056595, |
|
"learning_rate": 4.551767676767677e-06, |
|
"loss": 0.0006, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 35.611510791366904, |
|
"grad_norm": 0.3815157413482666, |
|
"learning_rate": 4.55050505050505e-06, |
|
"loss": 0.001, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 35.701438848920866, |
|
"grad_norm": 0.028180675581097603, |
|
"learning_rate": 4.549242424242424e-06, |
|
"loss": 0.0007, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 35.79136690647482, |
|
"grad_norm": 0.022708212956786156, |
|
"learning_rate": 4.547979797979798e-06, |
|
"loss": 0.0007, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 35.881294964028775, |
|
"grad_norm": 0.37191152572631836, |
|
"learning_rate": 4.546717171717172e-06, |
|
"loss": 0.0006, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 35.97122302158273, |
|
"grad_norm": 0.045804716646671295, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.0005, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 35.97122302158273, |
|
"eval_loss": 0.08962783217430115, |
|
"eval_runtime": 1349.7416, |
|
"eval_samples_per_second": 1.646, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.25786862178775, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 36.06115107913669, |
|
"grad_norm": 0.016676392406225204, |
|
"learning_rate": 4.5441919191919195e-06, |
|
"loss": 0.0003, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 36.15107913669065, |
|
"grad_norm": 0.15673214197158813, |
|
"learning_rate": 4.5429292929292935e-06, |
|
"loss": 0.0009, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 36.2410071942446, |
|
"grad_norm": 0.032344311475753784, |
|
"learning_rate": 4.541666666666667e-06, |
|
"loss": 0.0015, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 36.330935251798564, |
|
"grad_norm": 0.5042840242385864, |
|
"learning_rate": 4.540404040404041e-06, |
|
"loss": 0.0014, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 36.42086330935252, |
|
"grad_norm": 0.02287839725613594, |
|
"learning_rate": 4.539141414141415e-06, |
|
"loss": 0.0013, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 36.510791366906474, |
|
"grad_norm": 0.30796897411346436, |
|
"learning_rate": 4.537878787878789e-06, |
|
"loss": 0.0025, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 36.60071942446043, |
|
"grad_norm": 0.11940345168113708, |
|
"learning_rate": 4.536616161616162e-06, |
|
"loss": 0.0009, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 36.69064748201439, |
|
"grad_norm": 0.12890297174453735, |
|
"learning_rate": 4.535353535353536e-06, |
|
"loss": 0.001, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 36.780575539568346, |
|
"grad_norm": 0.016430262476205826, |
|
"learning_rate": 4.53409090909091e-06, |
|
"loss": 0.0012, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 36.8705035971223, |
|
"grad_norm": 0.08656007796525955, |
|
"learning_rate": 4.532828282828283e-06, |
|
"loss": 0.0015, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 36.960431654676256, |
|
"grad_norm": 0.0869501456618309, |
|
"learning_rate": 4.531565656565657e-06, |
|
"loss": 0.0018, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 37.05035971223022, |
|
"grad_norm": 0.4101605713367462, |
|
"learning_rate": 4.53030303030303e-06, |
|
"loss": 0.0015, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 37.14028776978417, |
|
"grad_norm": 0.0797925516963005, |
|
"learning_rate": 4.529040404040405e-06, |
|
"loss": 0.0007, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 37.23021582733813, |
|
"grad_norm": 0.025322135537862778, |
|
"learning_rate": 4.527777777777778e-06, |
|
"loss": 0.0006, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 37.32014388489208, |
|
"grad_norm": 0.059909917414188385, |
|
"learning_rate": 4.526515151515152e-06, |
|
"loss": 0.0012, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 37.410071942446045, |
|
"grad_norm": 0.062007270753383636, |
|
"learning_rate": 4.525252525252526e-06, |
|
"loss": 0.0012, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"grad_norm": 0.35286614298820496, |
|
"learning_rate": 4.523989898989899e-06, |
|
"loss": 0.0016, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 37.589928057553955, |
|
"grad_norm": 0.1300862431526184, |
|
"learning_rate": 4.522727272727273e-06, |
|
"loss": 0.0006, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 37.67985611510792, |
|
"grad_norm": 0.13838863372802734, |
|
"learning_rate": 4.521464646464646e-06, |
|
"loss": 0.0006, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 37.76978417266187, |
|
"grad_norm": 0.6767460703849792, |
|
"learning_rate": 4.520202020202021e-06, |
|
"loss": 0.0006, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 37.85971223021583, |
|
"grad_norm": 0.03494667634367943, |
|
"learning_rate": 4.518939393939394e-06, |
|
"loss": 0.0013, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 37.94964028776978, |
|
"grad_norm": 0.14763426780700684, |
|
"learning_rate": 4.517676767676768e-06, |
|
"loss": 0.0022, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 38.039568345323744, |
|
"grad_norm": 0.15873517096042633, |
|
"learning_rate": 4.5164141414141415e-06, |
|
"loss": 0.0019, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 38.1294964028777, |
|
"grad_norm": 0.048420246690511703, |
|
"learning_rate": 4.5151515151515155e-06, |
|
"loss": 0.001, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 38.219424460431654, |
|
"grad_norm": 0.038138266652822495, |
|
"learning_rate": 4.5138888888888895e-06, |
|
"loss": 0.0004, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 38.30935251798561, |
|
"grad_norm": 0.024455932900309563, |
|
"learning_rate": 4.512626262626263e-06, |
|
"loss": 0.0007, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 38.39928057553957, |
|
"grad_norm": 0.29704517126083374, |
|
"learning_rate": 4.511363636363637e-06, |
|
"loss": 0.0012, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 38.489208633093526, |
|
"grad_norm": 0.23077060282230377, |
|
"learning_rate": 4.510101010101011e-06, |
|
"loss": 0.0006, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 38.57913669064748, |
|
"grad_norm": 0.04493401572108269, |
|
"learning_rate": 4.508838383838384e-06, |
|
"loss": 0.0007, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 38.669064748201436, |
|
"grad_norm": 0.01225815899670124, |
|
"learning_rate": 4.507575757575758e-06, |
|
"loss": 0.0004, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 38.7589928057554, |
|
"grad_norm": 0.19539327919483185, |
|
"learning_rate": 4.506313131313132e-06, |
|
"loss": 0.0012, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 38.84892086330935, |
|
"grad_norm": 0.4501245319843292, |
|
"learning_rate": 4.505050505050506e-06, |
|
"loss": 0.0016, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 38.93884892086331, |
|
"grad_norm": 0.955757200717926, |
|
"learning_rate": 4.503787878787879e-06, |
|
"loss": 0.002, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 39.02877697841727, |
|
"grad_norm": 0.4927741587162018, |
|
"learning_rate": 4.502525252525253e-06, |
|
"loss": 0.0009, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 39.118705035971225, |
|
"grad_norm": 0.5250554084777832, |
|
"learning_rate": 4.501262626262627e-06, |
|
"loss": 0.0018, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 39.20863309352518, |
|
"grad_norm": 0.5786688327789307, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0013, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 39.298561151079134, |
|
"grad_norm": 0.015845810994505882, |
|
"learning_rate": 4.498737373737374e-06, |
|
"loss": 0.0009, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 39.388489208633096, |
|
"grad_norm": 0.01820209249854088, |
|
"learning_rate": 4.497474747474747e-06, |
|
"loss": 0.001, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 39.47841726618705, |
|
"grad_norm": 0.026294970884919167, |
|
"learning_rate": 4.496212121212122e-06, |
|
"loss": 0.0018, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 39.568345323741006, |
|
"grad_norm": 0.4651360511779785, |
|
"learning_rate": 4.494949494949495e-06, |
|
"loss": 0.0016, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 39.568345323741006, |
|
"eval_loss": 0.09019309282302856, |
|
"eval_runtime": 1345.7556, |
|
"eval_samples_per_second": 1.651, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.331926238613642, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 39.65827338129496, |
|
"grad_norm": 0.29995694756507874, |
|
"learning_rate": 4.493686868686869e-06, |
|
"loss": 0.0015, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 39.74820143884892, |
|
"grad_norm": 0.3291122019290924, |
|
"learning_rate": 4.492424242424242e-06, |
|
"loss": 0.0015, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 39.83812949640288, |
|
"grad_norm": 0.1785033792257309, |
|
"learning_rate": 4.491161616161616e-06, |
|
"loss": 0.0006, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 39.92805755395683, |
|
"grad_norm": 0.020028244704008102, |
|
"learning_rate": 4.48989898989899e-06, |
|
"loss": 0.002, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 40.01798561151079, |
|
"grad_norm": 0.08107150346040726, |
|
"learning_rate": 4.4886363636363636e-06, |
|
"loss": 0.0014, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 40.10791366906475, |
|
"grad_norm": 0.012092849239706993, |
|
"learning_rate": 4.4873737373737375e-06, |
|
"loss": 0.0018, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 40.197841726618705, |
|
"grad_norm": 0.163823664188385, |
|
"learning_rate": 4.4861111111111115e-06, |
|
"loss": 0.0014, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 40.28776978417266, |
|
"grad_norm": 0.07797440141439438, |
|
"learning_rate": 4.4848484848484855e-06, |
|
"loss": 0.0022, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 40.37769784172662, |
|
"grad_norm": 0.07735186815261841, |
|
"learning_rate": 4.483585858585859e-06, |
|
"loss": 0.0018, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 40.46762589928058, |
|
"grad_norm": 0.3801431953907013, |
|
"learning_rate": 4.482323232323233e-06, |
|
"loss": 0.0013, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 40.55755395683453, |
|
"grad_norm": 0.02574390545487404, |
|
"learning_rate": 4.481060606060607e-06, |
|
"loss": 0.0008, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 40.64748201438849, |
|
"grad_norm": 0.06015799939632416, |
|
"learning_rate": 4.47979797979798e-06, |
|
"loss": 0.0007, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 40.73741007194245, |
|
"grad_norm": 0.011081011034548283, |
|
"learning_rate": 4.478535353535354e-06, |
|
"loss": 0.0009, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 40.827338129496404, |
|
"grad_norm": 0.14023222029209137, |
|
"learning_rate": 4.477272727272728e-06, |
|
"loss": 0.0009, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 40.91726618705036, |
|
"grad_norm": 1.1734967231750488, |
|
"learning_rate": 4.476010101010102e-06, |
|
"loss": 0.0034, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 41.007194244604314, |
|
"grad_norm": 0.018789170309901237, |
|
"learning_rate": 4.474747474747475e-06, |
|
"loss": 0.0012, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 41.097122302158276, |
|
"grad_norm": 0.5469329953193665, |
|
"learning_rate": 4.473484848484849e-06, |
|
"loss": 0.0012, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 41.18705035971223, |
|
"grad_norm": 1.0320335626602173, |
|
"learning_rate": 4.472222222222223e-06, |
|
"loss": 0.0022, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 41.276978417266186, |
|
"grad_norm": 0.13018514215946198, |
|
"learning_rate": 4.470959595959596e-06, |
|
"loss": 0.001, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 41.36690647482014, |
|
"grad_norm": 0.764275848865509, |
|
"learning_rate": 4.46969696969697e-06, |
|
"loss": 0.0017, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 41.4568345323741, |
|
"grad_norm": 0.037678878754377365, |
|
"learning_rate": 4.468434343434343e-06, |
|
"loss": 0.0012, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 41.54676258992806, |
|
"grad_norm": 0.0776861384510994, |
|
"learning_rate": 4.467171717171718e-06, |
|
"loss": 0.0012, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 41.63669064748201, |
|
"grad_norm": 0.1435922086238861, |
|
"learning_rate": 4.465909090909091e-06, |
|
"loss": 0.0014, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 41.726618705035975, |
|
"grad_norm": 0.2661900520324707, |
|
"learning_rate": 4.464646464646465e-06, |
|
"loss": 0.0014, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 41.81654676258993, |
|
"grad_norm": 0.014804186299443245, |
|
"learning_rate": 4.463383838383838e-06, |
|
"loss": 0.0013, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 41.906474820143885, |
|
"grad_norm": 0.5918655395507812, |
|
"learning_rate": 4.462121212121212e-06, |
|
"loss": 0.001, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 41.99640287769784, |
|
"grad_norm": 0.2970104217529297, |
|
"learning_rate": 4.460858585858586e-06, |
|
"loss": 0.0014, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 42.0863309352518, |
|
"grad_norm": 0.24786308407783508, |
|
"learning_rate": 4.4595959595959596e-06, |
|
"loss": 0.0005, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 42.17625899280576, |
|
"grad_norm": 0.39591023325920105, |
|
"learning_rate": 4.4583333333333336e-06, |
|
"loss": 0.0012, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 42.26618705035971, |
|
"grad_norm": 0.014619703404605389, |
|
"learning_rate": 4.4570707070707076e-06, |
|
"loss": 0.0009, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 42.356115107913666, |
|
"grad_norm": 0.014031196013092995, |
|
"learning_rate": 4.4558080808080816e-06, |
|
"loss": 0.0005, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 42.44604316546763, |
|
"grad_norm": 0.0157134011387825, |
|
"learning_rate": 4.454545454545455e-06, |
|
"loss": 0.0005, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 42.53597122302158, |
|
"grad_norm": 0.5443057417869568, |
|
"learning_rate": 4.453282828282829e-06, |
|
"loss": 0.0005, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 42.62589928057554, |
|
"grad_norm": 0.17728668451309204, |
|
"learning_rate": 4.452020202020203e-06, |
|
"loss": 0.001, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 42.71582733812949, |
|
"grad_norm": 0.06720776110887527, |
|
"learning_rate": 4.450757575757576e-06, |
|
"loss": 0.0008, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 42.805755395683455, |
|
"grad_norm": 0.020302429795265198, |
|
"learning_rate": 4.44949494949495e-06, |
|
"loss": 0.0005, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 42.89568345323741, |
|
"grad_norm": 0.02236667089164257, |
|
"learning_rate": 4.448232323232324e-06, |
|
"loss": 0.0008, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 42.985611510791365, |
|
"grad_norm": 0.3039033114910126, |
|
"learning_rate": 4.446969696969697e-06, |
|
"loss": 0.0007, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 43.07553956834533, |
|
"grad_norm": 0.019936522468924522, |
|
"learning_rate": 4.445707070707071e-06, |
|
"loss": 0.0004, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 43.16546762589928, |
|
"grad_norm": 0.006646598689258099, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0007, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 43.16546762589928, |
|
"eval_loss": 0.09005734324455261, |
|
"eval_runtime": 1349.9657, |
|
"eval_samples_per_second": 1.646, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.2208398133748055, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 43.25539568345324, |
|
"grad_norm": 0.06663926690816879, |
|
"learning_rate": 4.443181818181819e-06, |
|
"loss": 0.0003, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 43.34532374100719, |
|
"grad_norm": 0.7015880346298218, |
|
"learning_rate": 4.441919191919192e-06, |
|
"loss": 0.0013, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 43.435251798561154, |
|
"grad_norm": 0.09495950490236282, |
|
"learning_rate": 4.440656565656566e-06, |
|
"loss": 0.0009, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 43.52517985611511, |
|
"grad_norm": 0.010513260029256344, |
|
"learning_rate": 4.43939393939394e-06, |
|
"loss": 0.0007, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 43.615107913669064, |
|
"grad_norm": 0.08924310654401779, |
|
"learning_rate": 4.438131313131313e-06, |
|
"loss": 0.0004, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 43.70503597122302, |
|
"grad_norm": 0.015554459765553474, |
|
"learning_rate": 4.436868686868687e-06, |
|
"loss": 0.0005, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 43.79496402877698, |
|
"grad_norm": 0.02140822261571884, |
|
"learning_rate": 4.4356060606060604e-06, |
|
"loss": 0.0012, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 43.884892086330936, |
|
"grad_norm": 0.2149767279624939, |
|
"learning_rate": 4.434343434343435e-06, |
|
"loss": 0.0005, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 43.97482014388489, |
|
"grad_norm": 0.009459302760660648, |
|
"learning_rate": 4.4330808080808084e-06, |
|
"loss": 0.0012, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 44.064748201438846, |
|
"grad_norm": 0.05037049949169159, |
|
"learning_rate": 4.4318181818181824e-06, |
|
"loss": 0.0004, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 44.15467625899281, |
|
"grad_norm": 0.006279121618717909, |
|
"learning_rate": 4.430555555555556e-06, |
|
"loss": 0.0006, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 44.24460431654676, |
|
"grad_norm": 0.03591470420360565, |
|
"learning_rate": 4.42929292929293e-06, |
|
"loss": 0.0006, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 44.33453237410072, |
|
"grad_norm": 0.013430873863399029, |
|
"learning_rate": 4.428030303030304e-06, |
|
"loss": 0.0015, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 44.42446043165468, |
|
"grad_norm": 0.01713446155190468, |
|
"learning_rate": 4.426767676767677e-06, |
|
"loss": 0.0011, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 44.514388489208635, |
|
"grad_norm": 0.6338793039321899, |
|
"learning_rate": 4.425505050505051e-06, |
|
"loss": 0.0023, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 44.60431654676259, |
|
"grad_norm": 0.19725088775157928, |
|
"learning_rate": 4.424242424242425e-06, |
|
"loss": 0.0015, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 44.694244604316545, |
|
"grad_norm": 0.034790072590112686, |
|
"learning_rate": 4.422979797979799e-06, |
|
"loss": 0.0011, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 44.78417266187051, |
|
"grad_norm": 2.0450031757354736, |
|
"learning_rate": 4.421717171717172e-06, |
|
"loss": 0.0012, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 44.87410071942446, |
|
"grad_norm": 0.25726571679115295, |
|
"learning_rate": 4.420454545454546e-06, |
|
"loss": 0.0008, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 44.96402877697842, |
|
"grad_norm": 0.14911916851997375, |
|
"learning_rate": 4.41919191919192e-06, |
|
"loss": 0.002, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 45.05395683453237, |
|
"grad_norm": 0.5396764278411865, |
|
"learning_rate": 4.417929292929293e-06, |
|
"loss": 0.0018, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 45.143884892086334, |
|
"grad_norm": 0.21499969065189362, |
|
"learning_rate": 4.416666666666667e-06, |
|
"loss": 0.0008, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 45.23381294964029, |
|
"grad_norm": 0.12975308299064636, |
|
"learning_rate": 4.415404040404041e-06, |
|
"loss": 0.0011, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 45.32374100719424, |
|
"grad_norm": 0.03521961346268654, |
|
"learning_rate": 4.414141414141415e-06, |
|
"loss": 0.0009, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 45.4136690647482, |
|
"grad_norm": 0.3964645564556122, |
|
"learning_rate": 4.412878787878788e-06, |
|
"loss": 0.0009, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 45.50359712230216, |
|
"grad_norm": 0.04135512188076973, |
|
"learning_rate": 4.411616161616162e-06, |
|
"loss": 0.0007, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 45.593525179856115, |
|
"grad_norm": 0.11724065244197845, |
|
"learning_rate": 4.410353535353536e-06, |
|
"loss": 0.0013, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 45.68345323741007, |
|
"grad_norm": 0.3066418170928955, |
|
"learning_rate": 4.409090909090909e-06, |
|
"loss": 0.002, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 45.773381294964025, |
|
"grad_norm": 0.020460475236177444, |
|
"learning_rate": 4.407828282828283e-06, |
|
"loss": 0.0004, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 45.86330935251799, |
|
"grad_norm": 0.021625172346830368, |
|
"learning_rate": 4.4065656565656565e-06, |
|
"loss": 0.0008, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 45.95323741007194, |
|
"grad_norm": 0.01973818428814411, |
|
"learning_rate": 4.4053030303030305e-06, |
|
"loss": 0.0005, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 46.0431654676259, |
|
"grad_norm": 0.3055168092250824, |
|
"learning_rate": 4.4040404040404044e-06, |
|
"loss": 0.0004, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 46.13309352517986, |
|
"grad_norm": 0.11869470030069351, |
|
"learning_rate": 4.4027777777777784e-06, |
|
"loss": 0.0012, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 46.223021582733814, |
|
"grad_norm": 0.5959618091583252, |
|
"learning_rate": 4.401515151515152e-06, |
|
"loss": 0.0007, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 46.31294964028777, |
|
"grad_norm": 0.08037717640399933, |
|
"learning_rate": 4.400252525252526e-06, |
|
"loss": 0.0006, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 46.402877697841724, |
|
"grad_norm": 0.017363494262099266, |
|
"learning_rate": 4.3989898989899e-06, |
|
"loss": 0.0008, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 46.492805755395686, |
|
"grad_norm": 0.028551748022437096, |
|
"learning_rate": 4.397727272727273e-06, |
|
"loss": 0.001, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 46.58273381294964, |
|
"grad_norm": 0.08840727061033249, |
|
"learning_rate": 4.396464646464647e-06, |
|
"loss": 0.0007, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 46.672661870503596, |
|
"grad_norm": 0.023021990433335304, |
|
"learning_rate": 4.395202020202021e-06, |
|
"loss": 0.0018, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 46.76258992805755, |
|
"grad_norm": 0.05099537596106529, |
|
"learning_rate": 4.393939393939394e-06, |
|
"loss": 0.001, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 46.76258992805755, |
|
"eval_loss": 0.08809197694063187, |
|
"eval_runtime": 1348.5762, |
|
"eval_samples_per_second": 1.648, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.154187958231504, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 46.85251798561151, |
|
"grad_norm": 0.02734680473804474, |
|
"learning_rate": 4.392676767676768e-06, |
|
"loss": 0.0006, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 46.94244604316547, |
|
"grad_norm": 0.012311214581131935, |
|
"learning_rate": 4.391414141414142e-06, |
|
"loss": 0.0004, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 47.03237410071942, |
|
"grad_norm": 1.1471985578536987, |
|
"learning_rate": 4.390151515151516e-06, |
|
"loss": 0.0006, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 47.12230215827338, |
|
"grad_norm": 0.04378161579370499, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.0009, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 47.21223021582734, |
|
"grad_norm": 0.014206623658537865, |
|
"learning_rate": 4.387626262626263e-06, |
|
"loss": 0.0006, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 47.302158273381295, |
|
"grad_norm": 0.12384720891714096, |
|
"learning_rate": 4.386363636363637e-06, |
|
"loss": 0.0008, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 47.39208633093525, |
|
"grad_norm": 0.12384091317653656, |
|
"learning_rate": 4.38510101010101e-06, |
|
"loss": 0.0006, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 47.48201438848921, |
|
"grad_norm": 0.05459749698638916, |
|
"learning_rate": 4.383838383838384e-06, |
|
"loss": 0.0017, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 47.57194244604317, |
|
"grad_norm": 0.06376705318689346, |
|
"learning_rate": 4.382575757575757e-06, |
|
"loss": 0.0012, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 47.66187050359712, |
|
"grad_norm": 0.09516707807779312, |
|
"learning_rate": 4.381313131313132e-06, |
|
"loss": 0.0005, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 47.75179856115108, |
|
"grad_norm": 0.035159386694431305, |
|
"learning_rate": 4.380050505050505e-06, |
|
"loss": 0.0009, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 47.84172661870504, |
|
"grad_norm": 0.13273297250270844, |
|
"learning_rate": 4.378787878787879e-06, |
|
"loss": 0.0011, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 47.931654676258994, |
|
"grad_norm": 0.6526914834976196, |
|
"learning_rate": 4.3775252525252525e-06, |
|
"loss": 0.0017, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 48.02158273381295, |
|
"grad_norm": 0.10989696532487869, |
|
"learning_rate": 4.3762626262626265e-06, |
|
"loss": 0.0013, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 48.111510791366904, |
|
"grad_norm": 0.12258470058441162, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.001, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 48.201438848920866, |
|
"grad_norm": 0.04794065281748772, |
|
"learning_rate": 4.373737373737374e-06, |
|
"loss": 0.0006, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 48.29136690647482, |
|
"grad_norm": 0.18742027878761292, |
|
"learning_rate": 4.3724747474747485e-06, |
|
"loss": 0.001, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 48.381294964028775, |
|
"grad_norm": 0.047946684062480927, |
|
"learning_rate": 4.371212121212122e-06, |
|
"loss": 0.0008, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 48.47122302158273, |
|
"grad_norm": 0.011459482833743095, |
|
"learning_rate": 4.369949494949496e-06, |
|
"loss": 0.0004, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 48.56115107913669, |
|
"grad_norm": 0.0178390983492136, |
|
"learning_rate": 4.368686868686869e-06, |
|
"loss": 0.0005, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 48.65107913669065, |
|
"grad_norm": 0.02639496698975563, |
|
"learning_rate": 4.367424242424243e-06, |
|
"loss": 0.0006, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 48.7410071942446, |
|
"grad_norm": 0.9992175698280334, |
|
"learning_rate": 4.366161616161617e-06, |
|
"loss": 0.0006, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 48.830935251798564, |
|
"grad_norm": 0.12613770365715027, |
|
"learning_rate": 4.36489898989899e-06, |
|
"loss": 0.0003, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 48.92086330935252, |
|
"grad_norm": 0.008718474768102169, |
|
"learning_rate": 4.363636363636364e-06, |
|
"loss": 0.0006, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 49.010791366906474, |
|
"grad_norm": 0.09226574003696442, |
|
"learning_rate": 4.362373737373738e-06, |
|
"loss": 0.001, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 49.10071942446043, |
|
"grad_norm": 0.01371210440993309, |
|
"learning_rate": 4.361111111111112e-06, |
|
"loss": 0.0005, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 49.19064748201439, |
|
"grad_norm": 0.8040596842765808, |
|
"learning_rate": 4.359848484848485e-06, |
|
"loss": 0.0014, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 49.280575539568346, |
|
"grad_norm": 0.2569543123245239, |
|
"learning_rate": 4.358585858585859e-06, |
|
"loss": 0.0004, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 49.3705035971223, |
|
"grad_norm": 0.04654459282755852, |
|
"learning_rate": 4.357323232323233e-06, |
|
"loss": 0.0003, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 49.460431654676256, |
|
"grad_norm": 0.03116775117814541, |
|
"learning_rate": 4.356060606060606e-06, |
|
"loss": 0.0006, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 49.55035971223022, |
|
"grad_norm": 0.013714387081563473, |
|
"learning_rate": 4.35479797979798e-06, |
|
"loss": 0.0005, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 49.64028776978417, |
|
"grad_norm": 0.012171006761491299, |
|
"learning_rate": 4.353535353535353e-06, |
|
"loss": 0.0005, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 49.73021582733813, |
|
"grad_norm": 0.39719274640083313, |
|
"learning_rate": 4.352272727272727e-06, |
|
"loss": 0.0002, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 49.82014388489208, |
|
"grad_norm": 0.009979949332773685, |
|
"learning_rate": 4.351010101010101e-06, |
|
"loss": 0.0002, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 49.910071942446045, |
|
"grad_norm": 0.010056397877633572, |
|
"learning_rate": 4.349747474747475e-06, |
|
"loss": 0.0001, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 1.2399721145629883, |
|
"learning_rate": 4.348484848484849e-06, |
|
"loss": 0.0003, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 50.089928057553955, |
|
"grad_norm": 0.008993759751319885, |
|
"learning_rate": 4.3472222222222225e-06, |
|
"loss": 0.0003, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 50.17985611510792, |
|
"grad_norm": 0.0040525756776332855, |
|
"learning_rate": 4.3459595959595965e-06, |
|
"loss": 0.0001, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 50.26978417266187, |
|
"grad_norm": 0.037480395287275314, |
|
"learning_rate": 4.34469696969697e-06, |
|
"loss": 0.0006, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 50.35971223021583, |
|
"grad_norm": 0.011341557838022709, |
|
"learning_rate": 4.343434343434344e-06, |
|
"loss": 0.0001, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 50.35971223021583, |
|
"eval_loss": 0.0883052721619606, |
|
"eval_runtime": 1347.8354, |
|
"eval_samples_per_second": 1.649, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.161593719914093, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 50.44964028776978, |
|
"grad_norm": 0.097772017121315, |
|
"learning_rate": 4.342171717171718e-06, |
|
"loss": 0.0003, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 50.539568345323744, |
|
"grad_norm": 0.22011174261569977, |
|
"learning_rate": 4.340909090909091e-06, |
|
"loss": 0.0004, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 50.6294964028777, |
|
"grad_norm": 0.004608627874404192, |
|
"learning_rate": 4.339646464646465e-06, |
|
"loss": 0.002, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 50.719424460431654, |
|
"grad_norm": 0.02777382917702198, |
|
"learning_rate": 4.338383838383839e-06, |
|
"loss": 0.0009, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 50.80935251798561, |
|
"grad_norm": 0.3765215277671814, |
|
"learning_rate": 4.337121212121213e-06, |
|
"loss": 0.0015, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 50.89928057553957, |
|
"grad_norm": 0.014906881377100945, |
|
"learning_rate": 4.335858585858586e-06, |
|
"loss": 0.0019, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 50.989208633093526, |
|
"grad_norm": 0.07598377764225006, |
|
"learning_rate": 4.33459595959596e-06, |
|
"loss": 0.0011, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 51.07913669064748, |
|
"grad_norm": 0.04858017340302467, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.002, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 51.169064748201436, |
|
"grad_norm": 0.00848084781318903, |
|
"learning_rate": 4.332070707070707e-06, |
|
"loss": 0.0015, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 51.2589928057554, |
|
"grad_norm": 0.192399799823761, |
|
"learning_rate": 4.330808080808081e-06, |
|
"loss": 0.0014, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 51.34892086330935, |
|
"grad_norm": 0.17804254591464996, |
|
"learning_rate": 4.329545454545455e-06, |
|
"loss": 0.0009, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 51.43884892086331, |
|
"grad_norm": 0.9404972791671753, |
|
"learning_rate": 4.328282828282829e-06, |
|
"loss": 0.0022, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 51.52877697841727, |
|
"grad_norm": 0.06042027473449707, |
|
"learning_rate": 4.327020202020202e-06, |
|
"loss": 0.0009, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 51.618705035971225, |
|
"grad_norm": 0.11593267321586609, |
|
"learning_rate": 4.325757575757576e-06, |
|
"loss": 0.001, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 51.70863309352518, |
|
"grad_norm": 0.042370762676000595, |
|
"learning_rate": 4.32449494949495e-06, |
|
"loss": 0.0009, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 51.798561151079134, |
|
"grad_norm": 0.06264758855104446, |
|
"learning_rate": 4.323232323232323e-06, |
|
"loss": 0.0011, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 51.888489208633096, |
|
"grad_norm": 0.419005811214447, |
|
"learning_rate": 4.321969696969697e-06, |
|
"loss": 0.0013, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 51.97841726618705, |
|
"grad_norm": 0.025492649525403976, |
|
"learning_rate": 4.3207070707070705e-06, |
|
"loss": 0.0008, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 52.068345323741006, |
|
"grad_norm": 0.1695825606584549, |
|
"learning_rate": 4.319444444444445e-06, |
|
"loss": 0.001, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 52.15827338129496, |
|
"grad_norm": 0.21136726438999176, |
|
"learning_rate": 4.3181818181818185e-06, |
|
"loss": 0.0004, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 52.24820143884892, |
|
"grad_norm": 0.00583269540220499, |
|
"learning_rate": 4.3169191919191925e-06, |
|
"loss": 0.0003, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 52.33812949640288, |
|
"grad_norm": 0.05031251907348633, |
|
"learning_rate": 4.315656565656566e-06, |
|
"loss": 0.0005, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 52.42805755395683, |
|
"grad_norm": 1.4654878377914429, |
|
"learning_rate": 4.31439393939394e-06, |
|
"loss": 0.0011, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 52.51798561151079, |
|
"grad_norm": 0.05035277083516121, |
|
"learning_rate": 4.313131313131314e-06, |
|
"loss": 0.0008, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 52.60791366906475, |
|
"grad_norm": 0.3283204138278961, |
|
"learning_rate": 4.311868686868687e-06, |
|
"loss": 0.0024, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 52.697841726618705, |
|
"grad_norm": 0.09352482855319977, |
|
"learning_rate": 4.310606060606061e-06, |
|
"loss": 0.0013, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 52.78776978417266, |
|
"grad_norm": 0.4381198287010193, |
|
"learning_rate": 4.309343434343435e-06, |
|
"loss": 0.0014, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 52.87769784172662, |
|
"grad_norm": 0.4195464551448822, |
|
"learning_rate": 4.308080808080809e-06, |
|
"loss": 0.0006, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 52.96762589928058, |
|
"grad_norm": 0.037935055792331696, |
|
"learning_rate": 4.306818181818182e-06, |
|
"loss": 0.0005, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 53.05755395683453, |
|
"grad_norm": 0.0057031637988984585, |
|
"learning_rate": 4.305555555555556e-06, |
|
"loss": 0.0011, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 53.14748201438849, |
|
"grad_norm": 0.09235268831253052, |
|
"learning_rate": 4.30429292929293e-06, |
|
"loss": 0.0012, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 53.23741007194245, |
|
"grad_norm": 0.4533500075340271, |
|
"learning_rate": 4.303030303030303e-06, |
|
"loss": 0.0013, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 53.327338129496404, |
|
"grad_norm": 0.14968417584896088, |
|
"learning_rate": 4.301767676767677e-06, |
|
"loss": 0.0009, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 53.41726618705036, |
|
"grad_norm": 0.016032686457037926, |
|
"learning_rate": 4.300505050505051e-06, |
|
"loss": 0.0003, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 53.507194244604314, |
|
"grad_norm": 0.04255020618438721, |
|
"learning_rate": 4.299242424242425e-06, |
|
"loss": 0.0002, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 53.597122302158276, |
|
"grad_norm": 0.01301508117467165, |
|
"learning_rate": 4.297979797979798e-06, |
|
"loss": 0.0003, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 53.68705035971223, |
|
"grad_norm": 0.007252383045852184, |
|
"learning_rate": 4.296717171717172e-06, |
|
"loss": 0.0005, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 53.776978417266186, |
|
"grad_norm": 0.13183751702308655, |
|
"learning_rate": 4.295454545454546e-06, |
|
"loss": 0.002, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 53.86690647482014, |
|
"grad_norm": 0.028183195739984512, |
|
"learning_rate": 4.294191919191919e-06, |
|
"loss": 0.0015, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 53.9568345323741, |
|
"grad_norm": 0.1370900571346283, |
|
"learning_rate": 4.292929292929293e-06, |
|
"loss": 0.0007, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 53.9568345323741, |
|
"eval_loss": 0.08864730596542358, |
|
"eval_runtime": 1347.6756, |
|
"eval_samples_per_second": 1.649, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.391172332074353, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 54.04676258992806, |
|
"grad_norm": 0.01960013061761856, |
|
"learning_rate": 4.2916666666666665e-06, |
|
"loss": 0.0016, |
|
"step": 15025 |
|
}, |
|
{ |
|
"epoch": 54.13669064748201, |
|
"grad_norm": 0.13105234503746033, |
|
"learning_rate": 4.2904040404040405e-06, |
|
"loss": 0.0003, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 54.226618705035975, |
|
"grad_norm": 2.309511423110962, |
|
"learning_rate": 4.2891414141414145e-06, |
|
"loss": 0.0009, |
|
"step": 15075 |
|
}, |
|
{ |
|
"epoch": 54.31654676258993, |
|
"grad_norm": 0.018184732645750046, |
|
"learning_rate": 4.287878787878788e-06, |
|
"loss": 0.001, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 54.406474820143885, |
|
"grad_norm": 0.05596456304192543, |
|
"learning_rate": 4.2866161616161625e-06, |
|
"loss": 0.0012, |
|
"step": 15125 |
|
}, |
|
{ |
|
"epoch": 54.49640287769784, |
|
"grad_norm": 0.735536515712738, |
|
"learning_rate": 4.285353535353536e-06, |
|
"loss": 0.0014, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 54.5863309352518, |
|
"grad_norm": 0.641944169998169, |
|
"learning_rate": 4.28409090909091e-06, |
|
"loss": 0.0017, |
|
"step": 15175 |
|
}, |
|
{ |
|
"epoch": 54.67625899280576, |
|
"grad_norm": 0.02818766050040722, |
|
"learning_rate": 4.282828282828283e-06, |
|
"loss": 0.0013, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 54.76618705035971, |
|
"grad_norm": 0.04384085536003113, |
|
"learning_rate": 4.281565656565657e-06, |
|
"loss": 0.0012, |
|
"step": 15225 |
|
}, |
|
{ |
|
"epoch": 54.856115107913666, |
|
"grad_norm": 0.5741293430328369, |
|
"learning_rate": 4.280303030303031e-06, |
|
"loss": 0.0012, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 54.94604316546763, |
|
"grad_norm": 0.5108962059020996, |
|
"learning_rate": 4.279040404040404e-06, |
|
"loss": 0.0013, |
|
"step": 15275 |
|
}, |
|
{ |
|
"epoch": 55.03597122302158, |
|
"grad_norm": 0.09613129496574402, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.0011, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 55.12589928057554, |
|
"grad_norm": 0.2453729510307312, |
|
"learning_rate": 4.276515151515152e-06, |
|
"loss": 0.0016, |
|
"step": 15325 |
|
}, |
|
{ |
|
"epoch": 55.21582733812949, |
|
"grad_norm": 0.03533944860100746, |
|
"learning_rate": 4.275252525252526e-06, |
|
"loss": 0.0013, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 55.305755395683455, |
|
"grad_norm": 0.02793753705918789, |
|
"learning_rate": 4.273989898989899e-06, |
|
"loss": 0.0011, |
|
"step": 15375 |
|
}, |
|
{ |
|
"epoch": 55.39568345323741, |
|
"grad_norm": 0.11208122968673706, |
|
"learning_rate": 4.272727272727273e-06, |
|
"loss": 0.0014, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 55.485611510791365, |
|
"grad_norm": 0.23727653920650482, |
|
"learning_rate": 4.271464646464647e-06, |
|
"loss": 0.0007, |
|
"step": 15425 |
|
}, |
|
{ |
|
"epoch": 55.57553956834532, |
|
"grad_norm": 0.1095881313085556, |
|
"learning_rate": 4.27020202020202e-06, |
|
"loss": 0.0006, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 55.66546762589928, |
|
"grad_norm": 0.026398301124572754, |
|
"learning_rate": 4.268939393939394e-06, |
|
"loss": 0.0003, |
|
"step": 15475 |
|
}, |
|
{ |
|
"epoch": 55.75539568345324, |
|
"grad_norm": 0.3764269948005676, |
|
"learning_rate": 4.267676767676767e-06, |
|
"loss": 0.0007, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 55.84532374100719, |
|
"grad_norm": 0.710081160068512, |
|
"learning_rate": 4.266414141414142e-06, |
|
"loss": 0.0006, |
|
"step": 15525 |
|
}, |
|
{ |
|
"epoch": 55.935251798561154, |
|
"grad_norm": 0.01405036449432373, |
|
"learning_rate": 4.265151515151515e-06, |
|
"loss": 0.0009, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 56.02517985611511, |
|
"grad_norm": 0.011654024943709373, |
|
"learning_rate": 4.263888888888889e-06, |
|
"loss": 0.0011, |
|
"step": 15575 |
|
}, |
|
{ |
|
"epoch": 56.115107913669064, |
|
"grad_norm": 0.8455324172973633, |
|
"learning_rate": 4.262626262626263e-06, |
|
"loss": 0.0006, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 56.20503597122302, |
|
"grad_norm": 0.7859840989112854, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"loss": 0.0005, |
|
"step": 15625 |
|
}, |
|
{ |
|
"epoch": 56.29496402877698, |
|
"grad_norm": 0.012887760065495968, |
|
"learning_rate": 4.2601010101010105e-06, |
|
"loss": 0.0003, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 56.384892086330936, |
|
"grad_norm": 0.27630236744880676, |
|
"learning_rate": 4.258838383838384e-06, |
|
"loss": 0.0005, |
|
"step": 15675 |
|
}, |
|
{ |
|
"epoch": 56.47482014388489, |
|
"grad_norm": 0.23494713008403778, |
|
"learning_rate": 4.2575757575757585e-06, |
|
"loss": 0.0003, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 56.564748201438846, |
|
"grad_norm": 0.04018251597881317, |
|
"learning_rate": 4.256313131313132e-06, |
|
"loss": 0.0009, |
|
"step": 15725 |
|
}, |
|
{ |
|
"epoch": 56.65467625899281, |
|
"grad_norm": 0.29447436332702637, |
|
"learning_rate": 4.255050505050506e-06, |
|
"loss": 0.0002, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 56.74460431654676, |
|
"grad_norm": 0.048734016716480255, |
|
"learning_rate": 4.253787878787879e-06, |
|
"loss": 0.0008, |
|
"step": 15775 |
|
}, |
|
{ |
|
"epoch": 56.83453237410072, |
|
"grad_norm": 0.00981312245130539, |
|
"learning_rate": 4.252525252525253e-06, |
|
"loss": 0.0003, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 56.92446043165468, |
|
"grad_norm": 0.029217666015028954, |
|
"learning_rate": 4.251262626262627e-06, |
|
"loss": 0.0002, |
|
"step": 15825 |
|
}, |
|
{ |
|
"epoch": 57.014388489208635, |
|
"grad_norm": 0.0892946720123291, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.0008, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 57.10431654676259, |
|
"grad_norm": 0.0070861089043319225, |
|
"learning_rate": 4.248737373737374e-06, |
|
"loss": 0.0003, |
|
"step": 15875 |
|
}, |
|
{ |
|
"epoch": 57.194244604316545, |
|
"grad_norm": 0.5670444965362549, |
|
"learning_rate": 4.247474747474748e-06, |
|
"loss": 0.0005, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 57.28417266187051, |
|
"grad_norm": 0.4061719477176666, |
|
"learning_rate": 4.246212121212122e-06, |
|
"loss": 0.0009, |
|
"step": 15925 |
|
}, |
|
{ |
|
"epoch": 57.37410071942446, |
|
"grad_norm": 0.2658737897872925, |
|
"learning_rate": 4.244949494949495e-06, |
|
"loss": 0.0011, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 57.46402877697842, |
|
"grad_norm": 0.06908473372459412, |
|
"learning_rate": 4.243686868686869e-06, |
|
"loss": 0.0012, |
|
"step": 15975 |
|
}, |
|
{ |
|
"epoch": 57.55395683453237, |
|
"grad_norm": 0.12484970688819885, |
|
"learning_rate": 4.242424242424243e-06, |
|
"loss": 0.0008, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 57.55395683453237, |
|
"eval_loss": 0.09175190329551697, |
|
"eval_runtime": 1351.6711, |
|
"eval_samples_per_second": 1.644, |
|
"eval_steps_per_second": 0.103, |
|
"eval_wer": 6.391172332074353, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 57.643884892086334, |
|
"grad_norm": 0.10532079637050629, |
|
"learning_rate": 4.241161616161616e-06, |
|
"loss": 0.0009, |
|
"step": 16025 |
|
}, |
|
{ |
|
"epoch": 57.73381294964029, |
|
"grad_norm": 0.0082013588398695, |
|
"learning_rate": 4.23989898989899e-06, |
|
"loss": 0.0009, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 57.82374100719424, |
|
"grad_norm": 0.8880343437194824, |
|
"learning_rate": 4.238636363636364e-06, |
|
"loss": 0.0012, |
|
"step": 16075 |
|
}, |
|
{ |
|
"epoch": 57.9136690647482, |
|
"grad_norm": 0.04694369435310364, |
|
"learning_rate": 4.237373737373737e-06, |
|
"loss": 0.0011, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 58.00359712230216, |
|
"grad_norm": 0.4175935387611389, |
|
"learning_rate": 4.236111111111111e-06, |
|
"loss": 0.0007, |
|
"step": 16125 |
|
}, |
|
{ |
|
"epoch": 58.093525179856115, |
|
"grad_norm": 0.0991375669836998, |
|
"learning_rate": 4.234848484848485e-06, |
|
"loss": 0.0008, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 58.18345323741007, |
|
"grad_norm": 0.05238619074225426, |
|
"learning_rate": 4.233585858585859e-06, |
|
"loss": 0.0009, |
|
"step": 16175 |
|
}, |
|
{ |
|
"epoch": 58.273381294964025, |
|
"grad_norm": 0.024060403928160667, |
|
"learning_rate": 4.2323232323232325e-06, |
|
"loss": 0.0005, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 58.36330935251799, |
|
"grad_norm": 0.514026939868927, |
|
"learning_rate": 4.2310606060606065e-06, |
|
"loss": 0.0017, |
|
"step": 16225 |
|
}, |
|
{ |
|
"epoch": 58.45323741007194, |
|
"grad_norm": 0.9123257994651794, |
|
"learning_rate": 4.22979797979798e-06, |
|
"loss": 0.0009, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 58.5431654676259, |
|
"grad_norm": 0.034488383680582047, |
|
"learning_rate": 4.228535353535354e-06, |
|
"loss": 0.0005, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 58.63309352517986, |
|
"grad_norm": 0.08020392805337906, |
|
"learning_rate": 4.227272727272728e-06, |
|
"loss": 0.0021, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 58.723021582733814, |
|
"grad_norm": 0.011538870632648468, |
|
"learning_rate": 4.226010101010101e-06, |
|
"loss": 0.001, |
|
"step": 16325 |
|
}, |
|
{ |
|
"epoch": 58.81294964028777, |
|
"grad_norm": 0.4130057692527771, |
|
"learning_rate": 4.224747474747475e-06, |
|
"loss": 0.0009, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 58.902877697841724, |
|
"grad_norm": 0.018940504640340805, |
|
"learning_rate": 4.223484848484849e-06, |
|
"loss": 0.0008, |
|
"step": 16375 |
|
}, |
|
{ |
|
"epoch": 58.992805755395686, |
|
"grad_norm": 0.09760510176420212, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.0009, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 59.08273381294964, |
|
"grad_norm": 0.6728724241256714, |
|
"learning_rate": 4.220959595959596e-06, |
|
"loss": 0.0015, |
|
"step": 16425 |
|
}, |
|
{ |
|
"epoch": 59.172661870503596, |
|
"grad_norm": 0.01400268916040659, |
|
"learning_rate": 4.21969696969697e-06, |
|
"loss": 0.0006, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 59.26258992805755, |
|
"grad_norm": 0.027168823406100273, |
|
"learning_rate": 4.218434343434344e-06, |
|
"loss": 0.0003, |
|
"step": 16475 |
|
}, |
|
{ |
|
"epoch": 59.35251798561151, |
|
"grad_norm": 0.025733735412359238, |
|
"learning_rate": 4.217171717171717e-06, |
|
"loss": 0.0003, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 59.44244604316547, |
|
"grad_norm": 0.012072687968611717, |
|
"learning_rate": 4.215909090909091e-06, |
|
"loss": 0.0004, |
|
"step": 16525 |
|
}, |
|
{ |
|
"epoch": 59.53237410071942, |
|
"grad_norm": 0.03630650043487549, |
|
"learning_rate": 4.214646464646465e-06, |
|
"loss": 0.0013, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 59.62230215827338, |
|
"grad_norm": 0.13875187933444977, |
|
"learning_rate": 4.213383838383839e-06, |
|
"loss": 0.0008, |
|
"step": 16575 |
|
}, |
|
{ |
|
"epoch": 59.71223021582734, |
|
"grad_norm": 0.06004035472869873, |
|
"learning_rate": 4.212121212121212e-06, |
|
"loss": 0.0004, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 59.802158273381295, |
|
"grad_norm": 0.024319609627127647, |
|
"learning_rate": 4.210858585858586e-06, |
|
"loss": 0.0006, |
|
"step": 16625 |
|
}, |
|
{ |
|
"epoch": 59.89208633093525, |
|
"grad_norm": 0.0957476794719696, |
|
"learning_rate": 4.20959595959596e-06, |
|
"loss": 0.0016, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 59.98201438848921, |
|
"grad_norm": 0.014447568915784359, |
|
"learning_rate": 4.208333333333333e-06, |
|
"loss": 0.0007, |
|
"step": 16675 |
|
}, |
|
{ |
|
"epoch": 60.07194244604317, |
|
"grad_norm": 0.0760221779346466, |
|
"learning_rate": 4.207070707070707e-06, |
|
"loss": 0.0007, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 60.16187050359712, |
|
"grad_norm": 0.08783930540084839, |
|
"learning_rate": 4.2058080808080806e-06, |
|
"loss": 0.0006, |
|
"step": 16725 |
|
}, |
|
{ |
|
"epoch": 60.25179856115108, |
|
"grad_norm": 0.020011553540825844, |
|
"learning_rate": 4.204545454545455e-06, |
|
"loss": 0.0002, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 60.34172661870504, |
|
"grad_norm": 0.004587370436638594, |
|
"learning_rate": 4.2032828282828286e-06, |
|
"loss": 0.0001, |
|
"step": 16775 |
|
}, |
|
{ |
|
"epoch": 60.431654676258994, |
|
"grad_norm": 0.05192629247903824, |
|
"learning_rate": 4.2020202020202026e-06, |
|
"loss": 0.0007, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 60.52158273381295, |
|
"grad_norm": 0.0028184789698570967, |
|
"learning_rate": 4.2007575757575766e-06, |
|
"loss": 0.0001, |
|
"step": 16825 |
|
}, |
|
{ |
|
"epoch": 60.611510791366904, |
|
"grad_norm": 0.11263082921504974, |
|
"learning_rate": 4.19949494949495e-06, |
|
"loss": 0.0004, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 60.701438848920866, |
|
"grad_norm": 0.020229890942573547, |
|
"learning_rate": 4.198232323232324e-06, |
|
"loss": 0.0002, |
|
"step": 16875 |
|
}, |
|
{ |
|
"epoch": 60.79136690647482, |
|
"grad_norm": 0.004258246161043644, |
|
"learning_rate": 4.196969696969697e-06, |
|
"loss": 0.0004, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 60.881294964028775, |
|
"grad_norm": 0.005619137082248926, |
|
"learning_rate": 4.195707070707072e-06, |
|
"loss": 0.0001, |
|
"step": 16925 |
|
}, |
|
{ |
|
"epoch": 60.97122302158273, |
|
"grad_norm": 0.005032286513596773, |
|
"learning_rate": 4.194444444444445e-06, |
|
"loss": 0.0002, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 61.06115107913669, |
|
"grad_norm": 0.02484523132443428, |
|
"learning_rate": 4.193181818181819e-06, |
|
"loss": 0.0003, |
|
"step": 16975 |
|
}, |
|
{ |
|
"epoch": 61.15107913669065, |
|
"grad_norm": 0.0017194038955494761, |
|
"learning_rate": 4.191919191919192e-06, |
|
"loss": 0.0002, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 61.15107913669065, |
|
"eval_loss": 0.09027338027954102, |
|
"eval_runtime": 1359.5537, |
|
"eval_samples_per_second": 1.634, |
|
"eval_steps_per_second": 0.102, |
|
"eval_wer": 5.909797822706065, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 61.2410071942446, |
|
"grad_norm": 0.0024019062984734774, |
|
"learning_rate": 4.190656565656566e-06, |
|
"loss": 0.0002, |
|
"step": 17025 |
|
}, |
|
{ |
|
"epoch": 61.330935251798564, |
|
"grad_norm": 0.004478455055505037, |
|
"learning_rate": 4.18939393939394e-06, |
|
"loss": 0.0003, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 61.42086330935252, |
|
"grad_norm": 0.0044603836722671986, |
|
"learning_rate": 4.188131313131313e-06, |
|
"loss": 0.0004, |
|
"step": 17075 |
|
}, |
|
{ |
|
"epoch": 61.510791366906474, |
|
"grad_norm": 0.08818788081407547, |
|
"learning_rate": 4.186868686868687e-06, |
|
"loss": 0.0009, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 61.60071942446043, |
|
"grad_norm": 0.0027286384720355272, |
|
"learning_rate": 4.185606060606061e-06, |
|
"loss": 0.0002, |
|
"step": 17125 |
|
}, |
|
{ |
|
"epoch": 61.69064748201439, |
|
"grad_norm": 0.0037345695309340954, |
|
"learning_rate": 4.184343434343434e-06, |
|
"loss": 0.0003, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 61.780575539568346, |
|
"grad_norm": 0.014616015367209911, |
|
"learning_rate": 4.183080808080808e-06, |
|
"loss": 0.0004, |
|
"step": 17175 |
|
}, |
|
{ |
|
"epoch": 61.8705035971223, |
|
"grad_norm": 0.007769573014229536, |
|
"learning_rate": 4.181818181818182e-06, |
|
"loss": 0.0003, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 61.960431654676256, |
|
"grad_norm": 0.008359814994037151, |
|
"learning_rate": 4.180555555555556e-06, |
|
"loss": 0.0005, |
|
"step": 17225 |
|
}, |
|
{ |
|
"epoch": 62.05035971223022, |
|
"grad_norm": 0.0051100486889481544, |
|
"learning_rate": 4.1792929292929294e-06, |
|
"loss": 0.0005, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 62.14028776978417, |
|
"grad_norm": 0.0029563389252871275, |
|
"learning_rate": 4.1780303030303034e-06, |
|
"loss": 0.0001, |
|
"step": 17275 |
|
}, |
|
{ |
|
"epoch": 62.23021582733813, |
|
"grad_norm": 0.0030668089166283607, |
|
"learning_rate": 4.1767676767676774e-06, |
|
"loss": 0.0001, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 62.32014388489208, |
|
"grad_norm": 0.02710825577378273, |
|
"learning_rate": 4.175505050505051e-06, |
|
"loss": 0.0006, |
|
"step": 17325 |
|
}, |
|
{ |
|
"epoch": 62.410071942446045, |
|
"grad_norm": 0.0027756947092711926, |
|
"learning_rate": 4.1742424242424246e-06, |
|
"loss": 0.0001, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"grad_norm": 0.09106307476758957, |
|
"learning_rate": 4.172979797979798e-06, |
|
"loss": 0.0003, |
|
"step": 17375 |
|
}, |
|
{ |
|
"epoch": 62.589928057553955, |
|
"grad_norm": 0.005363088101148605, |
|
"learning_rate": 4.1717171717171726e-06, |
|
"loss": 0.0001, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 62.67985611510792, |
|
"grad_norm": 0.005525332409888506, |
|
"learning_rate": 4.170454545454546e-06, |
|
"loss": 0.0001, |
|
"step": 17425 |
|
}, |
|
{ |
|
"epoch": 62.76978417266187, |
|
"grad_norm": 0.007496482692658901, |
|
"learning_rate": 4.16919191919192e-06, |
|
"loss": 0.0001, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 62.85971223021583, |
|
"grad_norm": 0.026290051639080048, |
|
"learning_rate": 4.167929292929293e-06, |
|
"loss": 0.0001, |
|
"step": 17475 |
|
}, |
|
{ |
|
"epoch": 62.94964028776978, |
|
"grad_norm": 0.006395560223609209, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 63.039568345323744, |
|
"grad_norm": 0.004197731614112854, |
|
"learning_rate": 4.165404040404041e-06, |
|
"loss": 0.0001, |
|
"step": 17525 |
|
}, |
|
{ |
|
"epoch": 63.1294964028777, |
|
"grad_norm": 0.002505301032215357, |
|
"learning_rate": 4.164141414141414e-06, |
|
"loss": 0.0, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 63.219424460431654, |
|
"grad_norm": 0.0022915108129382133, |
|
"learning_rate": 4.162878787878788e-06, |
|
"loss": 0.0001, |
|
"step": 17575 |
|
}, |
|
{ |
|
"epoch": 63.30935251798561, |
|
"grad_norm": 0.0019390948582440615, |
|
"learning_rate": 4.161616161616162e-06, |
|
"loss": 0.0, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 63.39928057553957, |
|
"grad_norm": 0.001307799364440143, |
|
"learning_rate": 4.160353535353536e-06, |
|
"loss": 0.0001, |
|
"step": 17625 |
|
}, |
|
{ |
|
"epoch": 63.489208633093526, |
|
"grad_norm": 0.0016936671454459429, |
|
"learning_rate": 4.159090909090909e-06, |
|
"loss": 0.0, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 63.57913669064748, |
|
"grad_norm": 0.0017974688671529293, |
|
"learning_rate": 4.157828282828283e-06, |
|
"loss": 0.0, |
|
"step": 17675 |
|
}, |
|
{ |
|
"epoch": 63.669064748201436, |
|
"grad_norm": 0.0027852486819028854, |
|
"learning_rate": 4.156565656565657e-06, |
|
"loss": 0.0, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 63.7589928057554, |
|
"grad_norm": 0.0017096559749916196, |
|
"learning_rate": 4.15530303030303e-06, |
|
"loss": 0.0, |
|
"step": 17725 |
|
}, |
|
{ |
|
"epoch": 63.84892086330935, |
|
"grad_norm": 0.0019876237493008375, |
|
"learning_rate": 4.154040404040404e-06, |
|
"loss": 0.0, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 63.93884892086331, |
|
"grad_norm": 0.0011115281376987696, |
|
"learning_rate": 4.152777777777778e-06, |
|
"loss": 0.0, |
|
"step": 17775 |
|
}, |
|
{ |
|
"epoch": 64.02877697841727, |
|
"grad_norm": 0.0017126763705164194, |
|
"learning_rate": 4.151515151515152e-06, |
|
"loss": 0.0, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 64.11870503597122, |
|
"grad_norm": 0.0011258955346420407, |
|
"learning_rate": 4.1502525252525254e-06, |
|
"loss": 0.0, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 64.20863309352518, |
|
"grad_norm": 0.0015615399461239576, |
|
"learning_rate": 4.1489898989898994e-06, |
|
"loss": 0.0, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 64.29856115107914, |
|
"grad_norm": 0.001990539487451315, |
|
"learning_rate": 4.1477272727272734e-06, |
|
"loss": 0.0, |
|
"step": 17875 |
|
}, |
|
{ |
|
"epoch": 64.38848920863309, |
|
"grad_norm": 0.0013739466667175293, |
|
"learning_rate": 4.146464646464647e-06, |
|
"loss": 0.0, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 64.47841726618705, |
|
"grad_norm": 0.0017153042135760188, |
|
"learning_rate": 4.145202020202021e-06, |
|
"loss": 0.0, |
|
"step": 17925 |
|
}, |
|
{ |
|
"epoch": 64.56834532374101, |
|
"grad_norm": 0.0013855737634003162, |
|
"learning_rate": 4.143939393939394e-06, |
|
"loss": 0.0, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 64.65827338129496, |
|
"grad_norm": 0.0023376569151878357, |
|
"learning_rate": 4.142676767676769e-06, |
|
"loss": 0.0001, |
|
"step": 17975 |
|
}, |
|
{ |
|
"epoch": 64.74820143884892, |
|
"grad_norm": 0.0007114307954907417, |
|
"learning_rate": 4.141414141414142e-06, |
|
"loss": 0.0, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 64.74820143884892, |
|
"eval_loss": 0.09263601154088974, |
|
"eval_runtime": 1339.2527, |
|
"eval_samples_per_second": 1.659, |
|
"eval_steps_per_second": 0.104, |
|
"eval_wer": 5.658001925498037, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 64.83812949640287, |
|
"grad_norm": 0.0010609790915623307, |
|
"learning_rate": 4.140151515151516e-06, |
|
"loss": 0.0, |
|
"step": 18025 |
|
}, |
|
{ |
|
"epoch": 64.92805755395683, |
|
"grad_norm": 0.0020956743974238634, |
|
"learning_rate": 4.138888888888889e-06, |
|
"loss": 0.0, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 65.0179856115108, |
|
"grad_norm": 0.0013533415040001273, |
|
"learning_rate": 4.137626262626263e-06, |
|
"loss": 0.0001, |
|
"step": 18075 |
|
}, |
|
{ |
|
"epoch": 65.10791366906474, |
|
"grad_norm": 0.0010088173439726233, |
|
"learning_rate": 4.136363636363637e-06, |
|
"loss": 0.0001, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 65.1978417266187, |
|
"grad_norm": 0.001570379245094955, |
|
"learning_rate": 4.13510101010101e-06, |
|
"loss": 0.0, |
|
"step": 18125 |
|
}, |
|
{ |
|
"epoch": 65.28776978417267, |
|
"grad_norm": 0.0016373491380363703, |
|
"learning_rate": 4.133838383838384e-06, |
|
"loss": 0.0, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 65.37769784172662, |
|
"grad_norm": 0.0015006172470748425, |
|
"learning_rate": 4.132575757575758e-06, |
|
"loss": 0.0, |
|
"step": 18175 |
|
}, |
|
{ |
|
"epoch": 65.46762589928058, |
|
"grad_norm": 0.0011033018818125129, |
|
"learning_rate": 4.131313131313132e-06, |
|
"loss": 0.0, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 65.55755395683454, |
|
"grad_norm": 0.0013498698826879263, |
|
"learning_rate": 4.130050505050505e-06, |
|
"loss": 0.0, |
|
"step": 18225 |
|
}, |
|
{ |
|
"epoch": 65.64748201438849, |
|
"grad_norm": 0.0013445069780573249, |
|
"learning_rate": 4.128787878787879e-06, |
|
"loss": 0.0, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 65.73741007194245, |
|
"grad_norm": 0.0017605924513190985, |
|
"learning_rate": 4.127525252525253e-06, |
|
"loss": 0.0, |
|
"step": 18275 |
|
}, |
|
{ |
|
"epoch": 65.8273381294964, |
|
"grad_norm": 0.0018534163245931268, |
|
"learning_rate": 4.126262626262626e-06, |
|
"loss": 0.0, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 65.91726618705036, |
|
"grad_norm": 0.000884951208718121, |
|
"learning_rate": 4.125e-06, |
|
"loss": 0.0, |
|
"step": 18325 |
|
}, |
|
{ |
|
"epoch": 66.00719424460432, |
|
"grad_norm": 0.0011815873440355062, |
|
"learning_rate": 4.123737373737374e-06, |
|
"loss": 0.0001, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 66.09712230215827, |
|
"grad_norm": 0.001126173185184598, |
|
"learning_rate": 4.1224747474747475e-06, |
|
"loss": 0.0, |
|
"step": 18375 |
|
}, |
|
{ |
|
"epoch": 66.18705035971223, |
|
"grad_norm": 0.0011552530340850353, |
|
"learning_rate": 4.1212121212121215e-06, |
|
"loss": 0.0, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 66.27697841726619, |
|
"grad_norm": 0.001199888065457344, |
|
"learning_rate": 4.119949494949495e-06, |
|
"loss": 0.0, |
|
"step": 18425 |
|
}, |
|
{ |
|
"epoch": 66.36690647482014, |
|
"grad_norm": 0.0007247981848195195, |
|
"learning_rate": 4.1186868686868695e-06, |
|
"loss": 0.0, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 66.4568345323741, |
|
"grad_norm": 0.001124533242546022, |
|
"learning_rate": 4.117424242424243e-06, |
|
"loss": 0.0001, |
|
"step": 18475 |
|
}, |
|
{ |
|
"epoch": 66.54676258992805, |
|
"grad_norm": 0.0009603950311429799, |
|
"learning_rate": 4.116161616161617e-06, |
|
"loss": 0.0, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 66.63669064748201, |
|
"grad_norm": 0.0016920759808272123, |
|
"learning_rate": 4.114898989898991e-06, |
|
"loss": 0.0001, |
|
"step": 18525 |
|
}, |
|
{ |
|
"epoch": 66.72661870503597, |
|
"grad_norm": 0.0007674341322854161, |
|
"learning_rate": 4.113636363636364e-06, |
|
"loss": 0.0, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 66.81654676258992, |
|
"grad_norm": 0.000895792618393898, |
|
"learning_rate": 4.112373737373738e-06, |
|
"loss": 0.0, |
|
"step": 18575 |
|
}, |
|
{ |
|
"epoch": 66.90647482014388, |
|
"grad_norm": 0.0009227583650499582, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.0, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 66.99640287769785, |
|
"grad_norm": 0.0019231617916375399, |
|
"learning_rate": 4.109848484848486e-06, |
|
"loss": 0.0, |
|
"step": 18625 |
|
}, |
|
{ |
|
"epoch": 67.0863309352518, |
|
"grad_norm": 0.0010071933502331376, |
|
"learning_rate": 4.108585858585859e-06, |
|
"loss": 0.0, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 67.17625899280576, |
|
"grad_norm": 0.0009304916602559388, |
|
"learning_rate": 4.107323232323233e-06, |
|
"loss": 0.0002, |
|
"step": 18675 |
|
}, |
|
{ |
|
"epoch": 67.26618705035972, |
|
"grad_norm": 0.0008229652885347605, |
|
"learning_rate": 4.106060606060606e-06, |
|
"loss": 0.0, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 67.35611510791367, |
|
"grad_norm": 0.0006714012124575675, |
|
"learning_rate": 4.10479797979798e-06, |
|
"loss": 0.0, |
|
"step": 18725 |
|
}, |
|
{ |
|
"epoch": 67.44604316546763, |
|
"grad_norm": 0.0009734642808325589, |
|
"learning_rate": 4.103535353535354e-06, |
|
"loss": 0.0, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 67.53597122302158, |
|
"grad_norm": 0.0007786314818076789, |
|
"learning_rate": 4.102272727272727e-06, |
|
"loss": 0.0, |
|
"step": 18775 |
|
}, |
|
{ |
|
"epoch": 67.62589928057554, |
|
"grad_norm": 0.001005512080155313, |
|
"learning_rate": 4.101010101010101e-06, |
|
"loss": 0.0, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 67.7158273381295, |
|
"grad_norm": 0.001331688603386283, |
|
"learning_rate": 4.099747474747475e-06, |
|
"loss": 0.0, |
|
"step": 18825 |
|
}, |
|
{ |
|
"epoch": 67.80575539568345, |
|
"grad_norm": 0.000987470499239862, |
|
"learning_rate": 4.098484848484849e-06, |
|
"loss": 0.0, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 67.89568345323741, |
|
"grad_norm": 0.0008799554198049009, |
|
"learning_rate": 4.097222222222222e-06, |
|
"loss": 0.0001, |
|
"step": 18875 |
|
}, |
|
{ |
|
"epoch": 67.98561151079137, |
|
"grad_norm": 0.0009637974435463548, |
|
"learning_rate": 4.095959595959596e-06, |
|
"loss": 0.0, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 68.07553956834532, |
|
"grad_norm": 0.0006672360468655825, |
|
"learning_rate": 4.09469696969697e-06, |
|
"loss": 0.0, |
|
"step": 18925 |
|
}, |
|
{ |
|
"epoch": 68.16546762589928, |
|
"grad_norm": 0.0008431566529907286, |
|
"learning_rate": 4.0934343434343435e-06, |
|
"loss": 0.0001, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 68.25539568345324, |
|
"grad_norm": 0.0010287058539688587, |
|
"learning_rate": 4.0921717171717175e-06, |
|
"loss": 0.0, |
|
"step": 18975 |
|
}, |
|
{ |
|
"epoch": 68.34532374100719, |
|
"grad_norm": 0.0007457846077159047, |
|
"learning_rate": 4.0909090909090915e-06, |
|
"loss": 0.0, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 68.34532374100719, |
|
"eval_loss": 0.09562169760465622, |
|
"eval_runtime": 1339.1079, |
|
"eval_samples_per_second": 1.659, |
|
"eval_steps_per_second": 0.104, |
|
"eval_wer": 5.583944308672146, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 68.43525179856115, |
|
"grad_norm": 0.0009193470468744636, |
|
"learning_rate": 4.0896464646464655e-06, |
|
"loss": 0.0, |
|
"step": 19025 |
|
}, |
|
{ |
|
"epoch": 68.5251798561151, |
|
"grad_norm": 0.0008717461605556309, |
|
"learning_rate": 4.088383838383839e-06, |
|
"loss": 0.0, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 68.61510791366906, |
|
"grad_norm": 0.0008119108970277011, |
|
"learning_rate": 4.087121212121213e-06, |
|
"loss": 0.0, |
|
"step": 19075 |
|
}, |
|
{ |
|
"epoch": 68.70503597122303, |
|
"grad_norm": 0.0010454319417476654, |
|
"learning_rate": 4.085858585858587e-06, |
|
"loss": 0.0001, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 68.79496402877697, |
|
"grad_norm": 0.0012115614954382181, |
|
"learning_rate": 4.08459595959596e-06, |
|
"loss": 0.0, |
|
"step": 19125 |
|
}, |
|
{ |
|
"epoch": 68.88489208633094, |
|
"grad_norm": 0.001058676978573203, |
|
"learning_rate": 4.083333333333334e-06, |
|
"loss": 0.0, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 68.9748201438849, |
|
"grad_norm": 0.0009722402319312096, |
|
"learning_rate": 4.082070707070707e-06, |
|
"loss": 0.0, |
|
"step": 19175 |
|
}, |
|
{ |
|
"epoch": 69.06474820143885, |
|
"grad_norm": 0.0006609881529584527, |
|
"learning_rate": 4.080808080808081e-06, |
|
"loss": 0.0, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 69.15467625899281, |
|
"grad_norm": 0.0007030842243693769, |
|
"learning_rate": 4.079545454545455e-06, |
|
"loss": 0.0, |
|
"step": 19225 |
|
}, |
|
{ |
|
"epoch": 69.24460431654676, |
|
"grad_norm": 0.0006842823349870741, |
|
"learning_rate": 4.078282828282829e-06, |
|
"loss": 0.0001, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 69.33453237410072, |
|
"grad_norm": 0.000651550421025604, |
|
"learning_rate": 4.077020202020202e-06, |
|
"loss": 0.0, |
|
"step": 19275 |
|
}, |
|
{ |
|
"epoch": 69.42446043165468, |
|
"grad_norm": 0.0006407879409380257, |
|
"learning_rate": 4.075757575757576e-06, |
|
"loss": 0.0001, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 69.51438848920863, |
|
"grad_norm": 0.0010551882442086935, |
|
"learning_rate": 4.07449494949495e-06, |
|
"loss": 0.0, |
|
"step": 19325 |
|
}, |
|
{ |
|
"epoch": 69.60431654676259, |
|
"grad_norm": 0.0008015549392439425, |
|
"learning_rate": 4.073232323232323e-06, |
|
"loss": 0.0, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 69.69424460431655, |
|
"grad_norm": 0.0008218359434977174, |
|
"learning_rate": 4.071969696969697e-06, |
|
"loss": 0.0, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 69.7841726618705, |
|
"grad_norm": 0.0009953822009265423, |
|
"learning_rate": 4.070707070707071e-06, |
|
"loss": 0.0, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 69.87410071942446, |
|
"grad_norm": 0.0008482063421979547, |
|
"learning_rate": 4.069444444444444e-06, |
|
"loss": 0.0, |
|
"step": 19425 |
|
}, |
|
{ |
|
"epoch": 69.96402877697842, |
|
"grad_norm": 0.0008491966291330755, |
|
"learning_rate": 4.068181818181818e-06, |
|
"loss": 0.0, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 70.05395683453237, |
|
"grad_norm": 0.000667088374029845, |
|
"learning_rate": 4.066919191919192e-06, |
|
"loss": 0.0001, |
|
"step": 19475 |
|
}, |
|
{ |
|
"epoch": 70.14388489208633, |
|
"grad_norm": 0.0006748430896550417, |
|
"learning_rate": 4.065656565656566e-06, |
|
"loss": 0.0, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 70.23381294964028, |
|
"grad_norm": 0.0006421016296371818, |
|
"learning_rate": 4.0643939393939395e-06, |
|
"loss": 0.0, |
|
"step": 19525 |
|
}, |
|
{ |
|
"epoch": 70.32374100719424, |
|
"grad_norm": 0.0009323668200522661, |
|
"learning_rate": 4.0631313131313135e-06, |
|
"loss": 0.0, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 70.4136690647482, |
|
"grad_norm": 0.0008588407654315233, |
|
"learning_rate": 4.0618686868686875e-06, |
|
"loss": 0.0, |
|
"step": 19575 |
|
}, |
|
{ |
|
"epoch": 70.50359712230215, |
|
"grad_norm": 0.0006930006784386933, |
|
"learning_rate": 4.060606060606061e-06, |
|
"loss": 0.0, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 70.59352517985612, |
|
"grad_norm": 0.000734307337552309, |
|
"learning_rate": 4.059343434343435e-06, |
|
"loss": 0.0, |
|
"step": 19625 |
|
}, |
|
{ |
|
"epoch": 70.68345323741008, |
|
"grad_norm": 0.0007306214538402855, |
|
"learning_rate": 4.058080808080808e-06, |
|
"loss": 0.0001, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 70.77338129496403, |
|
"grad_norm": 0.0005738097243010998, |
|
"learning_rate": 4.056818181818183e-06, |
|
"loss": 0.0, |
|
"step": 19675 |
|
}, |
|
{ |
|
"epoch": 70.86330935251799, |
|
"grad_norm": 0.00065003422787413, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.0, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 70.95323741007195, |
|
"grad_norm": 0.0006234170868992805, |
|
"learning_rate": 4.05429292929293e-06, |
|
"loss": 0.0, |
|
"step": 19725 |
|
}, |
|
{ |
|
"epoch": 71.0431654676259, |
|
"grad_norm": 0.000607940077316016, |
|
"learning_rate": 4.053030303030303e-06, |
|
"loss": 0.0, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 71.13309352517986, |
|
"grad_norm": 0.0005851531168445945, |
|
"learning_rate": 4.051767676767677e-06, |
|
"loss": 0.0, |
|
"step": 19775 |
|
}, |
|
{ |
|
"epoch": 71.22302158273381, |
|
"grad_norm": 0.0009296953212469816, |
|
"learning_rate": 4.050505050505051e-06, |
|
"loss": 0.0, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 71.31294964028777, |
|
"grad_norm": 0.0006304428679868579, |
|
"learning_rate": 4.049242424242424e-06, |
|
"loss": 0.0001, |
|
"step": 19825 |
|
}, |
|
{ |
|
"epoch": 71.40287769784173, |
|
"grad_norm": 0.000664900871925056, |
|
"learning_rate": 4.047979797979799e-06, |
|
"loss": 0.0, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 71.49280575539568, |
|
"grad_norm": 0.0003695714403875172, |
|
"learning_rate": 4.046717171717172e-06, |
|
"loss": 0.0, |
|
"step": 19875 |
|
}, |
|
{ |
|
"epoch": 71.58273381294964, |
|
"grad_norm": 0.000516809755936265, |
|
"learning_rate": 4.045454545454546e-06, |
|
"loss": 0.0, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 71.6726618705036, |
|
"grad_norm": 0.0006113911513239145, |
|
"learning_rate": 4.044191919191919e-06, |
|
"loss": 0.0, |
|
"step": 19925 |
|
}, |
|
{ |
|
"epoch": 71.76258992805755, |
|
"grad_norm": 0.000814276107121259, |
|
"learning_rate": 4.042929292929293e-06, |
|
"loss": 0.0, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 71.85251798561151, |
|
"grad_norm": 0.0007162923575378954, |
|
"learning_rate": 4.041666666666667e-06, |
|
"loss": 0.0, |
|
"step": 19975 |
|
}, |
|
{ |
|
"epoch": 71.94244604316546, |
|
"grad_norm": 0.000519581779371947, |
|
"learning_rate": 4.04040404040404e-06, |
|
"loss": 0.0, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 71.94244604316546, |
|
"eval_loss": 0.0976732075214386, |
|
"eval_runtime": 1338.7066, |
|
"eval_samples_per_second": 1.66, |
|
"eval_steps_per_second": 0.104, |
|
"eval_wer": 5.539509738576612, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 72.03237410071942, |
|
"grad_norm": 0.0013573451433330774, |
|
"learning_rate": 4.039141414141414e-06, |
|
"loss": 0.0001, |
|
"step": 20025 |
|
}, |
|
{ |
|
"epoch": 72.12230215827338, |
|
"grad_norm": 0.0006321736145764589, |
|
"learning_rate": 4.037878787878788e-06, |
|
"loss": 0.0, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 72.21223021582733, |
|
"grad_norm": 0.00046551282866857946, |
|
"learning_rate": 4.036616161616162e-06, |
|
"loss": 0.0, |
|
"step": 20075 |
|
}, |
|
{ |
|
"epoch": 72.3021582733813, |
|
"grad_norm": 0.00047266227193176746, |
|
"learning_rate": 4.0353535353535355e-06, |
|
"loss": 0.0, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 72.39208633093526, |
|
"grad_norm": 0.0004692314541898668, |
|
"learning_rate": 4.0340909090909095e-06, |
|
"loss": 0.0, |
|
"step": 20125 |
|
}, |
|
{ |
|
"epoch": 72.4820143884892, |
|
"grad_norm": 0.0005892490735277534, |
|
"learning_rate": 4.0328282828282835e-06, |
|
"loss": 0.0, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 72.57194244604317, |
|
"grad_norm": 0.0005393667961470783, |
|
"learning_rate": 4.031565656565657e-06, |
|
"loss": 0.0001, |
|
"step": 20175 |
|
}, |
|
{ |
|
"epoch": 72.66187050359713, |
|
"grad_norm": 0.0007663563592359424, |
|
"learning_rate": 4.030303030303031e-06, |
|
"loss": 0.0, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 72.75179856115108, |
|
"grad_norm": 0.0005675546126440167, |
|
"learning_rate": 4.029040404040405e-06, |
|
"loss": 0.0, |
|
"step": 20225 |
|
}, |
|
{ |
|
"epoch": 72.84172661870504, |
|
"grad_norm": 0.0006041157757863402, |
|
"learning_rate": 4.027777777777779e-06, |
|
"loss": 0.0, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 72.93165467625899, |
|
"grad_norm": 0.0006022896850481629, |
|
"learning_rate": 4.026515151515152e-06, |
|
"loss": 0.0001, |
|
"step": 20275 |
|
}, |
|
{ |
|
"epoch": 73.02158273381295, |
|
"grad_norm": 0.0005813241587020457, |
|
"learning_rate": 4.025252525252526e-06, |
|
"loss": 0.0, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 73.11151079136691, |
|
"grad_norm": 0.0006358566461130977, |
|
"learning_rate": 4.0239898989899e-06, |
|
"loss": 0.0, |
|
"step": 20325 |
|
}, |
|
{ |
|
"epoch": 73.20143884892086, |
|
"grad_norm": 0.0006074347766116261, |
|
"learning_rate": 4.022727272727273e-06, |
|
"loss": 0.0, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 73.29136690647482, |
|
"grad_norm": 0.0005062387208454311, |
|
"learning_rate": 4.021464646464647e-06, |
|
"loss": 0.0003, |
|
"step": 20375 |
|
}, |
|
{ |
|
"epoch": 73.38129496402878, |
|
"grad_norm": 0.0010172536130994558, |
|
"learning_rate": 4.02020202020202e-06, |
|
"loss": 0.0, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 73.47122302158273, |
|
"grad_norm": 0.0006235135952010751, |
|
"learning_rate": 4.018939393939394e-06, |
|
"loss": 0.0, |
|
"step": 20425 |
|
}, |
|
{ |
|
"epoch": 73.56115107913669, |
|
"grad_norm": 0.0009783974383026361, |
|
"learning_rate": 4.017676767676768e-06, |
|
"loss": 0.0, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 73.65107913669064, |
|
"grad_norm": 0.0005355635657906532, |
|
"learning_rate": 4.016414141414141e-06, |
|
"loss": 0.0, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 73.7410071942446, |
|
"grad_norm": 0.0004634314973372966, |
|
"learning_rate": 4.015151515151515e-06, |
|
"loss": 0.0, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 73.83093525179856, |
|
"grad_norm": 0.0005511495401151478, |
|
"learning_rate": 4.013888888888889e-06, |
|
"loss": 0.0, |
|
"step": 20525 |
|
}, |
|
{ |
|
"epoch": 73.92086330935251, |
|
"grad_norm": 0.0010061068460345268, |
|
"learning_rate": 4.012626262626263e-06, |
|
"loss": 0.0, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 74.01079136690647, |
|
"grad_norm": 0.3256176710128784, |
|
"learning_rate": 4.011363636363636e-06, |
|
"loss": 0.0007, |
|
"step": 20575 |
|
}, |
|
{ |
|
"epoch": 74.10071942446044, |
|
"grad_norm": 0.17023605108261108, |
|
"learning_rate": 4.01010101010101e-06, |
|
"loss": 0.0008, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 74.19064748201438, |
|
"grad_norm": 0.8051077723503113, |
|
"learning_rate": 4.008838383838384e-06, |
|
"loss": 0.0078, |
|
"step": 20625 |
|
}, |
|
{ |
|
"epoch": 74.28057553956835, |
|
"grad_norm": 0.4720918536186218, |
|
"learning_rate": 4.0075757575757575e-06, |
|
"loss": 0.0062, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 74.37050359712231, |
|
"grad_norm": 0.4814521074295044, |
|
"learning_rate": 4.0063131313131315e-06, |
|
"loss": 0.0061, |
|
"step": 20675 |
|
}, |
|
{ |
|
"epoch": 74.46043165467626, |
|
"grad_norm": 0.7329695820808411, |
|
"learning_rate": 4.0050505050505055e-06, |
|
"loss": 0.0069, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 74.55035971223022, |
|
"grad_norm": 0.713927686214447, |
|
"learning_rate": 4.0037878787878795e-06, |
|
"loss": 0.0061, |
|
"step": 20725 |
|
}, |
|
{ |
|
"epoch": 74.64028776978417, |
|
"grad_norm": 0.6485239863395691, |
|
"learning_rate": 4.002525252525253e-06, |
|
"loss": 0.0064, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 74.73021582733813, |
|
"grad_norm": 0.8775496482849121, |
|
"learning_rate": 4.001262626262627e-06, |
|
"loss": 0.0048, |
|
"step": 20775 |
|
}, |
|
{ |
|
"epoch": 74.82014388489209, |
|
"grad_norm": 0.2677914798259735, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.004, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 74.91007194244604, |
|
"grad_norm": 0.38305044174194336, |
|
"learning_rate": 3.998737373737374e-06, |
|
"loss": 0.0028, |
|
"step": 20825 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 0.05106651037931442, |
|
"learning_rate": 3.997474747474748e-06, |
|
"loss": 0.0021, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 75.08992805755396, |
|
"grad_norm": 0.01168102491647005, |
|
"learning_rate": 3.996212121212121e-06, |
|
"loss": 0.0012, |
|
"step": 20875 |
|
}, |
|
{ |
|
"epoch": 75.17985611510791, |
|
"grad_norm": 0.22549034655094147, |
|
"learning_rate": 3.994949494949496e-06, |
|
"loss": 0.0015, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 75.26978417266187, |
|
"grad_norm": 0.022075073793530464, |
|
"learning_rate": 3.993686868686869e-06, |
|
"loss": 0.0026, |
|
"step": 20925 |
|
}, |
|
{ |
|
"epoch": 75.35971223021583, |
|
"grad_norm": 0.0188248660415411, |
|
"learning_rate": 3.992424242424243e-06, |
|
"loss": 0.0017, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 75.44964028776978, |
|
"grad_norm": 0.47026434540748596, |
|
"learning_rate": 3.991161616161616e-06, |
|
"loss": 0.0026, |
|
"step": 20975 |
|
}, |
|
{ |
|
"epoch": 75.53956834532374, |
|
"grad_norm": 0.2045595496892929, |
|
"learning_rate": 3.98989898989899e-06, |
|
"loss": 0.0019, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 75.53956834532374, |
|
"eval_loss": 0.08847362548112869, |
|
"eval_runtime": 1337.9238, |
|
"eval_samples_per_second": 1.661, |
|
"eval_steps_per_second": 0.104, |
|
"eval_wer": 6.294897430200697, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 75.62949640287769, |
|
"grad_norm": 0.0665188655257225, |
|
"learning_rate": 3.988636363636364e-06, |
|
"loss": 0.0014, |
|
"step": 21025 |
|
}, |
|
{ |
|
"epoch": 75.71942446043165, |
|
"grad_norm": 0.33609738945961, |
|
"learning_rate": 3.987373737373737e-06, |
|
"loss": 0.0011, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 75.80935251798562, |
|
"grad_norm": 0.4631134867668152, |
|
"learning_rate": 3.986111111111112e-06, |
|
"loss": 0.0023, |
|
"step": 21075 |
|
}, |
|
{ |
|
"epoch": 75.89928057553956, |
|
"grad_norm": 0.26408031582832336, |
|
"learning_rate": 3.984848484848485e-06, |
|
"loss": 0.0019, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 75.98920863309353, |
|
"grad_norm": 0.3067505657672882, |
|
"learning_rate": 3.983585858585859e-06, |
|
"loss": 0.0021, |
|
"step": 21125 |
|
}, |
|
{ |
|
"epoch": 76.07913669064749, |
|
"grad_norm": 0.0688316822052002, |
|
"learning_rate": 3.982323232323232e-06, |
|
"loss": 0.0024, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 76.16906474820144, |
|
"grad_norm": 1.5255663394927979, |
|
"learning_rate": 3.981060606060606e-06, |
|
"loss": 0.0012, |
|
"step": 21175 |
|
}, |
|
{ |
|
"epoch": 76.2589928057554, |
|
"grad_norm": 0.368730753660202, |
|
"learning_rate": 3.97979797979798e-06, |
|
"loss": 0.001, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 76.34892086330935, |
|
"grad_norm": 0.019969308748841286, |
|
"learning_rate": 3.9785353535353535e-06, |
|
"loss": 0.0006, |
|
"step": 21225 |
|
}, |
|
{ |
|
"epoch": 76.43884892086331, |
|
"grad_norm": 0.070771723985672, |
|
"learning_rate": 3.9772727272727275e-06, |
|
"loss": 0.0004, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 76.52877697841727, |
|
"grad_norm": 0.023271985352039337, |
|
"learning_rate": 3.9760101010101015e-06, |
|
"loss": 0.0007, |
|
"step": 21275 |
|
}, |
|
{ |
|
"epoch": 76.61870503597122, |
|
"grad_norm": 0.027517560869455338, |
|
"learning_rate": 3.9747474747474755e-06, |
|
"loss": 0.0004, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 76.70863309352518, |
|
"grad_norm": 0.009323998354375362, |
|
"learning_rate": 3.973484848484849e-06, |
|
"loss": 0.0007, |
|
"step": 21325 |
|
}, |
|
{ |
|
"epoch": 76.79856115107914, |
|
"grad_norm": 0.007815494202077389, |
|
"learning_rate": 3.972222222222223e-06, |
|
"loss": 0.0007, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 76.88848920863309, |
|
"grad_norm": 0.06828250735998154, |
|
"learning_rate": 3.970959595959597e-06, |
|
"loss": 0.0004, |
|
"step": 21375 |
|
}, |
|
{ |
|
"epoch": 76.97841726618705, |
|
"grad_norm": 0.4169680178165436, |
|
"learning_rate": 3.96969696969697e-06, |
|
"loss": 0.0007, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 77.06834532374101, |
|
"grad_norm": 0.010289140976965427, |
|
"learning_rate": 3.968434343434344e-06, |
|
"loss": 0.0003, |
|
"step": 21425 |
|
}, |
|
{ |
|
"epoch": 77.15827338129496, |
|
"grad_norm": 0.02134793810546398, |
|
"learning_rate": 3.967171717171717e-06, |
|
"loss": 0.0003, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 77.24820143884892, |
|
"grad_norm": 0.005463853012770414, |
|
"learning_rate": 3.965909090909091e-06, |
|
"loss": 0.0001, |
|
"step": 21475 |
|
}, |
|
{ |
|
"epoch": 77.33812949640287, |
|
"grad_norm": 0.0035135000944137573, |
|
"learning_rate": 3.964646464646465e-06, |
|
"loss": 0.0001, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 77.42805755395683, |
|
"grad_norm": 0.01657390221953392, |
|
"learning_rate": 3.963383838383839e-06, |
|
"loss": 0.0001, |
|
"step": 21525 |
|
}, |
|
{ |
|
"epoch": 77.5179856115108, |
|
"grad_norm": 0.1767745018005371, |
|
"learning_rate": 3.962121212121213e-06, |
|
"loss": 0.0007, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 77.60791366906474, |
|
"grad_norm": 0.016838785260915756, |
|
"learning_rate": 3.960858585858586e-06, |
|
"loss": 0.0001, |
|
"step": 21575 |
|
}, |
|
{ |
|
"epoch": 77.6978417266187, |
|
"grad_norm": 0.0039493367075920105, |
|
"learning_rate": 3.95959595959596e-06, |
|
"loss": 0.0001, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 77.78776978417267, |
|
"grad_norm": 0.0031421987805515528, |
|
"learning_rate": 3.958333333333333e-06, |
|
"loss": 0.0003, |
|
"step": 21625 |
|
}, |
|
{ |
|
"epoch": 77.87769784172662, |
|
"grad_norm": 0.0026466776616871357, |
|
"learning_rate": 3.957070707070707e-06, |
|
"loss": 0.0003, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 77.96762589928058, |
|
"grad_norm": 0.009947208687663078, |
|
"learning_rate": 3.955808080808081e-06, |
|
"loss": 0.0002, |
|
"step": 21675 |
|
}, |
|
{ |
|
"epoch": 78.05755395683454, |
|
"grad_norm": 0.1049116924405098, |
|
"learning_rate": 3.954545454545454e-06, |
|
"loss": 0.0002, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 78.14748201438849, |
|
"grad_norm": 0.0023068960290402174, |
|
"learning_rate": 3.953282828282828e-06, |
|
"loss": 0.0001, |
|
"step": 21725 |
|
}, |
|
{ |
|
"epoch": 78.23741007194245, |
|
"grad_norm": 0.003103764960542321, |
|
"learning_rate": 3.952020202020202e-06, |
|
"loss": 0.0001, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 78.3273381294964, |
|
"grad_norm": 0.002706879284232855, |
|
"learning_rate": 3.950757575757576e-06, |
|
"loss": 0.0001, |
|
"step": 21775 |
|
}, |
|
{ |
|
"epoch": 78.41726618705036, |
|
"grad_norm": 0.004320697858929634, |
|
"learning_rate": 3.9494949494949496e-06, |
|
"loss": 0.0001, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 78.50719424460432, |
|
"grad_norm": 0.005596183240413666, |
|
"learning_rate": 3.9482323232323236e-06, |
|
"loss": 0.0002, |
|
"step": 21825 |
|
}, |
|
{ |
|
"epoch": 78.59712230215827, |
|
"grad_norm": 0.0037838639691472054, |
|
"learning_rate": 3.9469696969696976e-06, |
|
"loss": 0.0003, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 78.68705035971223, |
|
"grad_norm": 0.00796448066830635, |
|
"learning_rate": 3.945707070707071e-06, |
|
"loss": 0.0001, |
|
"step": 21875 |
|
}, |
|
{ |
|
"epoch": 78.77697841726619, |
|
"grad_norm": 0.003022188087925315, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 78.86690647482014, |
|
"grad_norm": 0.0022381923627108335, |
|
"learning_rate": 3.943181818181819e-06, |
|
"loss": 0.0002, |
|
"step": 21925 |
|
}, |
|
{ |
|
"epoch": 78.9568345323741, |
|
"grad_norm": 0.0027954999823123217, |
|
"learning_rate": 3.941919191919193e-06, |
|
"loss": 0.0001, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 79.04676258992805, |
|
"grad_norm": 0.0016978129278868437, |
|
"learning_rate": 3.940656565656566e-06, |
|
"loss": 0.0001, |
|
"step": 21975 |
|
}, |
|
{ |
|
"epoch": 79.13669064748201, |
|
"grad_norm": 0.0017409235006198287, |
|
"learning_rate": 3.93939393939394e-06, |
|
"loss": 0.0003, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 79.13669064748201, |
|
"eval_loss": 0.0888415277004242, |
|
"eval_runtime": 1337.7919, |
|
"eval_samples_per_second": 1.661, |
|
"eval_steps_per_second": 0.104, |
|
"eval_wer": 5.598755832037325, |
|
"step": 22000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 100000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 360, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.777235958847242e+21, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|