whisper-large-v3-hsb / trainer_state.json
DILHTWD's picture
Upload folder using huggingface_hub
1179b4e verified
{
"best_metric": 5.539509738576612,
"best_model_checkpoint": "./training/results/checkpoint-20000",
"epoch": 79.13669064748201,
"eval_steps": 1000,
"global_step": 22000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08992805755395683,
"grad_norm": 12.73649787902832,
"learning_rate": 1.2500000000000002e-07,
"loss": 3.2522,
"step": 25
},
{
"epoch": 0.17985611510791366,
"grad_norm": 12.000336647033691,
"learning_rate": 2.5000000000000004e-07,
"loss": 3.0617,
"step": 50
},
{
"epoch": 0.2697841726618705,
"grad_norm": 10.76065444946289,
"learning_rate": 3.75e-07,
"loss": 2.7165,
"step": 75
},
{
"epoch": 0.3597122302158273,
"grad_norm": 8.36201286315918,
"learning_rate": 5.000000000000001e-07,
"loss": 2.2607,
"step": 100
},
{
"epoch": 0.44964028776978415,
"grad_norm": 7.234769344329834,
"learning_rate": 6.25e-07,
"loss": 1.8433,
"step": 125
},
{
"epoch": 0.539568345323741,
"grad_norm": 6.549698829650879,
"learning_rate": 7.5e-07,
"loss": 1.5515,
"step": 150
},
{
"epoch": 0.6294964028776978,
"grad_norm": 7.549570083618164,
"learning_rate": 8.75e-07,
"loss": 1.3346,
"step": 175
},
{
"epoch": 0.7194244604316546,
"grad_norm": 5.8322930335998535,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.0572,
"step": 200
},
{
"epoch": 0.8093525179856115,
"grad_norm": 3.925255537033081,
"learning_rate": 1.125e-06,
"loss": 0.6348,
"step": 225
},
{
"epoch": 0.8992805755395683,
"grad_norm": 3.1902644634246826,
"learning_rate": 1.25e-06,
"loss": 0.4882,
"step": 250
},
{
"epoch": 0.9892086330935251,
"grad_norm": 3.355315923690796,
"learning_rate": 1.3750000000000002e-06,
"loss": 0.4032,
"step": 275
},
{
"epoch": 1.079136690647482,
"grad_norm": 3.4707915782928467,
"learning_rate": 1.5e-06,
"loss": 0.3355,
"step": 300
},
{
"epoch": 1.169064748201439,
"grad_norm": 3.261484384536743,
"learning_rate": 1.6250000000000001e-06,
"loss": 0.2896,
"step": 325
},
{
"epoch": 1.2589928057553956,
"grad_norm": 3.3107025623321533,
"learning_rate": 1.75e-06,
"loss": 0.2685,
"step": 350
},
{
"epoch": 1.3489208633093526,
"grad_norm": 2.6028969287872314,
"learning_rate": 1.8750000000000003e-06,
"loss": 0.2365,
"step": 375
},
{
"epoch": 1.4388489208633093,
"grad_norm": 3.380187749862671,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.2333,
"step": 400
},
{
"epoch": 1.5287769784172662,
"grad_norm": 3.0845112800598145,
"learning_rate": 2.125e-06,
"loss": 0.2191,
"step": 425
},
{
"epoch": 1.6187050359712232,
"grad_norm": 3.15523099899292,
"learning_rate": 2.25e-06,
"loss": 0.1949,
"step": 450
},
{
"epoch": 1.70863309352518,
"grad_norm": 2.5198237895965576,
"learning_rate": 2.375e-06,
"loss": 0.1756,
"step": 475
},
{
"epoch": 1.7985611510791366,
"grad_norm": 2.7945399284362793,
"learning_rate": 2.5e-06,
"loss": 0.1748,
"step": 500
},
{
"epoch": 1.8884892086330936,
"grad_norm": 3.299269199371338,
"learning_rate": 2.6250000000000003e-06,
"loss": 0.1711,
"step": 525
},
{
"epoch": 1.9784172661870505,
"grad_norm": 2.3727056980133057,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.1495,
"step": 550
},
{
"epoch": 2.068345323741007,
"grad_norm": 2.1909244060516357,
"learning_rate": 2.875e-06,
"loss": 0.1196,
"step": 575
},
{
"epoch": 2.158273381294964,
"grad_norm": 2.45758318901062,
"learning_rate": 3e-06,
"loss": 0.1023,
"step": 600
},
{
"epoch": 2.2482014388489207,
"grad_norm": 2.009880542755127,
"learning_rate": 3.125e-06,
"loss": 0.1019,
"step": 625
},
{
"epoch": 2.338129496402878,
"grad_norm": 2.2170872688293457,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0948,
"step": 650
},
{
"epoch": 2.4280575539568345,
"grad_norm": 1.9289822578430176,
"learning_rate": 3.3750000000000003e-06,
"loss": 0.0934,
"step": 675
},
{
"epoch": 2.5179856115107913,
"grad_norm": 2.0615289211273193,
"learning_rate": 3.5e-06,
"loss": 0.0935,
"step": 700
},
{
"epoch": 2.6079136690647484,
"grad_norm": 2.231041193008423,
"learning_rate": 3.625e-06,
"loss": 0.0923,
"step": 725
},
{
"epoch": 2.697841726618705,
"grad_norm": 1.953312873840332,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0844,
"step": 750
},
{
"epoch": 2.787769784172662,
"grad_norm": 2.1245667934417725,
"learning_rate": 3.875e-06,
"loss": 0.0831,
"step": 775
},
{
"epoch": 2.8776978417266186,
"grad_norm": 1.8499614000320435,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0841,
"step": 800
},
{
"epoch": 2.9676258992805753,
"grad_norm": 2.0503857135772705,
"learning_rate": 4.125e-06,
"loss": 0.0854,
"step": 825
},
{
"epoch": 3.0575539568345325,
"grad_norm": 2.0084242820739746,
"learning_rate": 4.25e-06,
"loss": 0.0621,
"step": 850
},
{
"epoch": 3.147482014388489,
"grad_norm": 1.3122639656066895,
"learning_rate": 4.3750000000000005e-06,
"loss": 0.0434,
"step": 875
},
{
"epoch": 3.237410071942446,
"grad_norm": 1.3615615367889404,
"learning_rate": 4.5e-06,
"loss": 0.0416,
"step": 900
},
{
"epoch": 3.327338129496403,
"grad_norm": 1.533996343612671,
"learning_rate": 4.625000000000001e-06,
"loss": 0.0451,
"step": 925
},
{
"epoch": 3.41726618705036,
"grad_norm": 1.573549509048462,
"learning_rate": 4.75e-06,
"loss": 0.0404,
"step": 950
},
{
"epoch": 3.5071942446043165,
"grad_norm": 1.4288333654403687,
"learning_rate": 4.875e-06,
"loss": 0.044,
"step": 975
},
{
"epoch": 3.597122302158273,
"grad_norm": 1.5075387954711914,
"learning_rate": 5e-06,
"loss": 0.0479,
"step": 1000
},
{
"epoch": 3.597122302158273,
"eval_loss": 0.10350359231233597,
"eval_runtime": 1344.3937,
"eval_samples_per_second": 1.653,
"eval_steps_per_second": 0.103,
"eval_wer": 20.29178701029401,
"step": 1000
},
{
"epoch": 3.68705035971223,
"grad_norm": 1.842606782913208,
"learning_rate": 4.998737373737374e-06,
"loss": 0.0467,
"step": 1025
},
{
"epoch": 3.776978417266187,
"grad_norm": 1.495784044265747,
"learning_rate": 4.997474747474748e-06,
"loss": 0.0437,
"step": 1050
},
{
"epoch": 3.866906474820144,
"grad_norm": 2.054900646209717,
"learning_rate": 4.9962121212121216e-06,
"loss": 0.0497,
"step": 1075
},
{
"epoch": 3.956834532374101,
"grad_norm": 1.438658356666565,
"learning_rate": 4.9949494949494956e-06,
"loss": 0.0398,
"step": 1100
},
{
"epoch": 4.046762589928058,
"grad_norm": 1.3041224479675293,
"learning_rate": 4.993686868686869e-06,
"loss": 0.0293,
"step": 1125
},
{
"epoch": 4.136690647482014,
"grad_norm": 1.2206145524978638,
"learning_rate": 4.992424242424243e-06,
"loss": 0.0227,
"step": 1150
},
{
"epoch": 4.226618705035971,
"grad_norm": 1.2926621437072754,
"learning_rate": 4.991161616161617e-06,
"loss": 0.0231,
"step": 1175
},
{
"epoch": 4.316546762589928,
"grad_norm": 1.4683257341384888,
"learning_rate": 4.98989898989899e-06,
"loss": 0.023,
"step": 1200
},
{
"epoch": 4.406474820143885,
"grad_norm": 1.3095593452453613,
"learning_rate": 4.988636363636364e-06,
"loss": 0.0226,
"step": 1225
},
{
"epoch": 4.496402877697841,
"grad_norm": 0.7059262990951538,
"learning_rate": 4.987373737373738e-06,
"loss": 0.0225,
"step": 1250
},
{
"epoch": 4.586330935251799,
"grad_norm": 1.1493045091629028,
"learning_rate": 4.986111111111112e-06,
"loss": 0.022,
"step": 1275
},
{
"epoch": 4.676258992805756,
"grad_norm": 1.9609806537628174,
"learning_rate": 4.984848484848485e-06,
"loss": 0.0232,
"step": 1300
},
{
"epoch": 4.766187050359712,
"grad_norm": 1.5463200807571411,
"learning_rate": 4.983585858585859e-06,
"loss": 0.0206,
"step": 1325
},
{
"epoch": 4.856115107913669,
"grad_norm": 0.858127772808075,
"learning_rate": 4.982323232323233e-06,
"loss": 0.0222,
"step": 1350
},
{
"epoch": 4.946043165467626,
"grad_norm": 0.8384924530982971,
"learning_rate": 4.981060606060606e-06,
"loss": 0.0201,
"step": 1375
},
{
"epoch": 5.0359712230215825,
"grad_norm": 0.9966625571250916,
"learning_rate": 4.97979797979798e-06,
"loss": 0.0173,
"step": 1400
},
{
"epoch": 5.125899280575539,
"grad_norm": 0.6609445214271545,
"learning_rate": 4.978535353535353e-06,
"loss": 0.0113,
"step": 1425
},
{
"epoch": 5.215827338129497,
"grad_norm": 0.82105952501297,
"learning_rate": 4.977272727272728e-06,
"loss": 0.012,
"step": 1450
},
{
"epoch": 5.305755395683454,
"grad_norm": 1.0994760990142822,
"learning_rate": 4.976010101010101e-06,
"loss": 0.0118,
"step": 1475
},
{
"epoch": 5.39568345323741,
"grad_norm": 0.4543660283088684,
"learning_rate": 4.974747474747475e-06,
"loss": 0.0112,
"step": 1500
},
{
"epoch": 5.485611510791367,
"grad_norm": 3.425143241882324,
"learning_rate": 4.973484848484849e-06,
"loss": 0.0113,
"step": 1525
},
{
"epoch": 5.575539568345324,
"grad_norm": 0.7691114544868469,
"learning_rate": 4.9722222222222224e-06,
"loss": 0.0114,
"step": 1550
},
{
"epoch": 5.66546762589928,
"grad_norm": 0.5446438789367676,
"learning_rate": 4.9709595959595964e-06,
"loss": 0.0121,
"step": 1575
},
{
"epoch": 5.755395683453237,
"grad_norm": 0.7232896089553833,
"learning_rate": 4.9696969696969696e-06,
"loss": 0.0118,
"step": 1600
},
{
"epoch": 5.845323741007194,
"grad_norm": 1.3025506734848022,
"learning_rate": 4.968434343434344e-06,
"loss": 0.0135,
"step": 1625
},
{
"epoch": 5.935251798561151,
"grad_norm": 1.2080421447753906,
"learning_rate": 4.9671717171717176e-06,
"loss": 0.0126,
"step": 1650
},
{
"epoch": 6.025179856115108,
"grad_norm": 0.4218277633190155,
"learning_rate": 4.9659090909090916e-06,
"loss": 0.0094,
"step": 1675
},
{
"epoch": 6.115107913669065,
"grad_norm": 0.5942659378051758,
"learning_rate": 4.964646464646465e-06,
"loss": 0.0071,
"step": 1700
},
{
"epoch": 6.205035971223022,
"grad_norm": 0.31671133637428284,
"learning_rate": 4.963383838383839e-06,
"loss": 0.008,
"step": 1725
},
{
"epoch": 6.294964028776978,
"grad_norm": 0.3538670539855957,
"learning_rate": 4.962121212121213e-06,
"loss": 0.0066,
"step": 1750
},
{
"epoch": 6.384892086330935,
"grad_norm": 0.8252100348472595,
"learning_rate": 4.960858585858586e-06,
"loss": 0.006,
"step": 1775
},
{
"epoch": 6.474820143884892,
"grad_norm": 0.9238548278808594,
"learning_rate": 4.95959595959596e-06,
"loss": 0.0074,
"step": 1800
},
{
"epoch": 6.564748201438849,
"grad_norm": 1.1760324239730835,
"learning_rate": 4.958333333333334e-06,
"loss": 0.0066,
"step": 1825
},
{
"epoch": 6.654676258992806,
"grad_norm": 0.3382113575935364,
"learning_rate": 4.957070707070708e-06,
"loss": 0.0103,
"step": 1850
},
{
"epoch": 6.744604316546763,
"grad_norm": 0.9418781399726868,
"learning_rate": 4.955808080808081e-06,
"loss": 0.0092,
"step": 1875
},
{
"epoch": 6.83453237410072,
"grad_norm": 0.7677399516105652,
"learning_rate": 4.954545454545455e-06,
"loss": 0.009,
"step": 1900
},
{
"epoch": 6.924460431654676,
"grad_norm": 0.32002565264701843,
"learning_rate": 4.953282828282829e-06,
"loss": 0.0075,
"step": 1925
},
{
"epoch": 7.014388489208633,
"grad_norm": 1.0049771070480347,
"learning_rate": 4.952020202020202e-06,
"loss": 0.0071,
"step": 1950
},
{
"epoch": 7.10431654676259,
"grad_norm": 0.513941764831543,
"learning_rate": 4.950757575757576e-06,
"loss": 0.0043,
"step": 1975
},
{
"epoch": 7.194244604316546,
"grad_norm": 0.8406050205230713,
"learning_rate": 4.94949494949495e-06,
"loss": 0.005,
"step": 2000
},
{
"epoch": 7.194244604316546,
"eval_loss": 0.09395472705364227,
"eval_runtime": 1340.6412,
"eval_samples_per_second": 1.657,
"eval_steps_per_second": 0.104,
"eval_wer": 10.419906687402799,
"step": 2000
},
{
"epoch": 7.284172661870503,
"grad_norm": 0.47227388620376587,
"learning_rate": 4.948232323232323e-06,
"loss": 0.005,
"step": 2025
},
{
"epoch": 7.374100719424461,
"grad_norm": 0.2972259819507599,
"learning_rate": 4.946969696969697e-06,
"loss": 0.0047,
"step": 2050
},
{
"epoch": 7.4640287769784175,
"grad_norm": 0.580878496170044,
"learning_rate": 4.945707070707071e-06,
"loss": 0.0047,
"step": 2075
},
{
"epoch": 7.553956834532374,
"grad_norm": 0.0858689397573471,
"learning_rate": 4.944444444444445e-06,
"loss": 0.0047,
"step": 2100
},
{
"epoch": 7.643884892086331,
"grad_norm": 0.9921578168869019,
"learning_rate": 4.9431818181818184e-06,
"loss": 0.0049,
"step": 2125
},
{
"epoch": 7.733812949640288,
"grad_norm": 0.3222315311431885,
"learning_rate": 4.9419191919191924e-06,
"loss": 0.0039,
"step": 2150
},
{
"epoch": 7.823741007194244,
"grad_norm": 0.2401006668806076,
"learning_rate": 4.940656565656566e-06,
"loss": 0.0045,
"step": 2175
},
{
"epoch": 7.913669064748201,
"grad_norm": 0.26786544919013977,
"learning_rate": 4.93939393939394e-06,
"loss": 0.0037,
"step": 2200
},
{
"epoch": 8.003597122302159,
"grad_norm": 1.120921015739441,
"learning_rate": 4.938131313131314e-06,
"loss": 0.0048,
"step": 2225
},
{
"epoch": 8.093525179856115,
"grad_norm": 0.7425853610038757,
"learning_rate": 4.936868686868687e-06,
"loss": 0.0036,
"step": 2250
},
{
"epoch": 8.183453237410072,
"grad_norm": 0.19618873298168182,
"learning_rate": 4.935606060606061e-06,
"loss": 0.0038,
"step": 2275
},
{
"epoch": 8.273381294964029,
"grad_norm": 0.41672375798225403,
"learning_rate": 4.934343434343435e-06,
"loss": 0.003,
"step": 2300
},
{
"epoch": 8.363309352517986,
"grad_norm": 0.3363110423088074,
"learning_rate": 4.933080808080809e-06,
"loss": 0.0031,
"step": 2325
},
{
"epoch": 8.453237410071942,
"grad_norm": 0.8529962301254272,
"learning_rate": 4.931818181818182e-06,
"loss": 0.0034,
"step": 2350
},
{
"epoch": 8.543165467625899,
"grad_norm": 0.15698625147342682,
"learning_rate": 4.930555555555556e-06,
"loss": 0.0033,
"step": 2375
},
{
"epoch": 8.633093525179856,
"grad_norm": 0.19619868695735931,
"learning_rate": 4.92929292929293e-06,
"loss": 0.004,
"step": 2400
},
{
"epoch": 8.723021582733812,
"grad_norm": 0.2903304994106293,
"learning_rate": 4.928030303030303e-06,
"loss": 0.0034,
"step": 2425
},
{
"epoch": 8.81294964028777,
"grad_norm": 0.5127314329147339,
"learning_rate": 4.926767676767677e-06,
"loss": 0.0035,
"step": 2450
},
{
"epoch": 8.902877697841726,
"grad_norm": 1.0652037858963013,
"learning_rate": 4.925505050505051e-06,
"loss": 0.0045,
"step": 2475
},
{
"epoch": 8.992805755395683,
"grad_norm": 0.9570706486701965,
"learning_rate": 4.924242424242425e-06,
"loss": 0.0042,
"step": 2500
},
{
"epoch": 9.082733812949641,
"grad_norm": 0.5939081907272339,
"learning_rate": 4.922979797979798e-06,
"loss": 0.0032,
"step": 2525
},
{
"epoch": 9.172661870503598,
"grad_norm": 0.25739356875419617,
"learning_rate": 4.921717171717172e-06,
"loss": 0.0038,
"step": 2550
},
{
"epoch": 9.262589928057555,
"grad_norm": 0.17940430343151093,
"learning_rate": 4.920454545454546e-06,
"loss": 0.0029,
"step": 2575
},
{
"epoch": 9.352517985611511,
"grad_norm": 0.33168259263038635,
"learning_rate": 4.919191919191919e-06,
"loss": 0.0028,
"step": 2600
},
{
"epoch": 9.442446043165468,
"grad_norm": 0.20831653475761414,
"learning_rate": 4.917929292929293e-06,
"loss": 0.002,
"step": 2625
},
{
"epoch": 9.532374100719425,
"grad_norm": 0.19978338479995728,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.0025,
"step": 2650
},
{
"epoch": 9.622302158273381,
"grad_norm": 0.23154591023921967,
"learning_rate": 4.915404040404041e-06,
"loss": 0.0033,
"step": 2675
},
{
"epoch": 9.712230215827338,
"grad_norm": 0.7622235417366028,
"learning_rate": 4.9141414141414145e-06,
"loss": 0.0039,
"step": 2700
},
{
"epoch": 9.802158273381295,
"grad_norm": 0.23092857003211975,
"learning_rate": 4.9128787878787885e-06,
"loss": 0.0044,
"step": 2725
},
{
"epoch": 9.892086330935252,
"grad_norm": 0.5034282207489014,
"learning_rate": 4.9116161616161625e-06,
"loss": 0.0035,
"step": 2750
},
{
"epoch": 9.982014388489208,
"grad_norm": 0.2582780122756958,
"learning_rate": 4.910353535353536e-06,
"loss": 0.0033,
"step": 2775
},
{
"epoch": 10.071942446043165,
"grad_norm": 0.4610576033592224,
"learning_rate": 4.90909090909091e-06,
"loss": 0.0037,
"step": 2800
},
{
"epoch": 10.161870503597122,
"grad_norm": 0.217066690325737,
"learning_rate": 4.907828282828283e-06,
"loss": 0.0028,
"step": 2825
},
{
"epoch": 10.251798561151078,
"grad_norm": 0.05713683366775513,
"learning_rate": 4.906565656565658e-06,
"loss": 0.003,
"step": 2850
},
{
"epoch": 10.341726618705035,
"grad_norm": 0.5356289148330688,
"learning_rate": 4.905303030303031e-06,
"loss": 0.0018,
"step": 2875
},
{
"epoch": 10.431654676258994,
"grad_norm": 0.37969082593917847,
"learning_rate": 4.904040404040405e-06,
"loss": 0.0022,
"step": 2900
},
{
"epoch": 10.52158273381295,
"grad_norm": 1.078008770942688,
"learning_rate": 4.902777777777778e-06,
"loss": 0.0032,
"step": 2925
},
{
"epoch": 10.611510791366907,
"grad_norm": 0.26670244336128235,
"learning_rate": 4.901515151515152e-06,
"loss": 0.0027,
"step": 2950
},
{
"epoch": 10.701438848920864,
"grad_norm": 0.673686683177948,
"learning_rate": 4.900252525252526e-06,
"loss": 0.0029,
"step": 2975
},
{
"epoch": 10.79136690647482,
"grad_norm": 0.37779000401496887,
"learning_rate": 4.898989898989899e-06,
"loss": 0.0022,
"step": 3000
},
{
"epoch": 10.79136690647482,
"eval_loss": 0.10011211037635803,
"eval_runtime": 1344.035,
"eval_samples_per_second": 1.653,
"eval_steps_per_second": 0.103,
"eval_wer": 9.049840776123824,
"step": 3000
},
{
"epoch": 10.881294964028777,
"grad_norm": 0.09616148471832275,
"learning_rate": 4.897727272727273e-06,
"loss": 0.0041,
"step": 3025
},
{
"epoch": 10.971223021582734,
"grad_norm": 0.8408087491989136,
"learning_rate": 4.896464646464647e-06,
"loss": 0.0046,
"step": 3050
},
{
"epoch": 11.06115107913669,
"grad_norm": 0.1868293583393097,
"learning_rate": 4.895202020202021e-06,
"loss": 0.0027,
"step": 3075
},
{
"epoch": 11.151079136690647,
"grad_norm": 0.19219942390918732,
"learning_rate": 4.893939393939394e-06,
"loss": 0.0024,
"step": 3100
},
{
"epoch": 11.241007194244604,
"grad_norm": 3.7455391883850098,
"learning_rate": 4.892676767676768e-06,
"loss": 0.0027,
"step": 3125
},
{
"epoch": 11.33093525179856,
"grad_norm": 0.2693164348602295,
"learning_rate": 4.891414141414142e-06,
"loss": 0.002,
"step": 3150
},
{
"epoch": 11.420863309352518,
"grad_norm": 0.8100782632827759,
"learning_rate": 4.890151515151515e-06,
"loss": 0.0033,
"step": 3175
},
{
"epoch": 11.510791366906474,
"grad_norm": 0.30300647020339966,
"learning_rate": 4.888888888888889e-06,
"loss": 0.0025,
"step": 3200
},
{
"epoch": 11.600719424460431,
"grad_norm": 0.49988773465156555,
"learning_rate": 4.887626262626263e-06,
"loss": 0.002,
"step": 3225
},
{
"epoch": 11.690647482014388,
"grad_norm": 0.2162599414587021,
"learning_rate": 4.8863636363636365e-06,
"loss": 0.0024,
"step": 3250
},
{
"epoch": 11.780575539568346,
"grad_norm": 2.3612468242645264,
"learning_rate": 4.8851010101010105e-06,
"loss": 0.0045,
"step": 3275
},
{
"epoch": 11.870503597122303,
"grad_norm": 0.4287119209766388,
"learning_rate": 4.883838383838384e-06,
"loss": 0.0051,
"step": 3300
},
{
"epoch": 11.96043165467626,
"grad_norm": 0.46471118927001953,
"learning_rate": 4.8825757575757585e-06,
"loss": 0.0036,
"step": 3325
},
{
"epoch": 12.050359712230216,
"grad_norm": 0.4310344159603119,
"learning_rate": 4.881313131313132e-06,
"loss": 0.0031,
"step": 3350
},
{
"epoch": 12.140287769784173,
"grad_norm": 0.8054510951042175,
"learning_rate": 4.880050505050506e-06,
"loss": 0.0036,
"step": 3375
},
{
"epoch": 12.23021582733813,
"grad_norm": 0.5783084630966187,
"learning_rate": 4.878787878787879e-06,
"loss": 0.0023,
"step": 3400
},
{
"epoch": 12.320143884892087,
"grad_norm": 0.1537202149629593,
"learning_rate": 4.877525252525253e-06,
"loss": 0.0031,
"step": 3425
},
{
"epoch": 12.410071942446043,
"grad_norm": 0.25773826241493225,
"learning_rate": 4.876262626262627e-06,
"loss": 0.0029,
"step": 3450
},
{
"epoch": 12.5,
"grad_norm": 1.0221893787384033,
"learning_rate": 4.875e-06,
"loss": 0.003,
"step": 3475
},
{
"epoch": 12.589928057553957,
"grad_norm": 0.2363336831331253,
"learning_rate": 4.873737373737374e-06,
"loss": 0.0036,
"step": 3500
},
{
"epoch": 12.679856115107913,
"grad_norm": 0.9339852333068848,
"learning_rate": 4.872474747474748e-06,
"loss": 0.004,
"step": 3525
},
{
"epoch": 12.76978417266187,
"grad_norm": 0.6633305549621582,
"learning_rate": 4.871212121212122e-06,
"loss": 0.0032,
"step": 3550
},
{
"epoch": 12.859712230215827,
"grad_norm": 0.7261077761650085,
"learning_rate": 4.869949494949495e-06,
"loss": 0.0028,
"step": 3575
},
{
"epoch": 12.949640287769784,
"grad_norm": 0.6666585803031921,
"learning_rate": 4.868686868686869e-06,
"loss": 0.0031,
"step": 3600
},
{
"epoch": 13.03956834532374,
"grad_norm": 0.42198774218559265,
"learning_rate": 4.867424242424243e-06,
"loss": 0.0023,
"step": 3625
},
{
"epoch": 13.129496402877697,
"grad_norm": 0.1100483238697052,
"learning_rate": 4.866161616161616e-06,
"loss": 0.002,
"step": 3650
},
{
"epoch": 13.219424460431656,
"grad_norm": 0.5182665586471558,
"learning_rate": 4.86489898989899e-06,
"loss": 0.003,
"step": 3675
},
{
"epoch": 13.309352517985612,
"grad_norm": 0.10821045190095901,
"learning_rate": 4.863636363636364e-06,
"loss": 0.0024,
"step": 3700
},
{
"epoch": 13.399280575539569,
"grad_norm": 0.302943617105484,
"learning_rate": 4.862373737373738e-06,
"loss": 0.0022,
"step": 3725
},
{
"epoch": 13.489208633093526,
"grad_norm": 0.34953269362449646,
"learning_rate": 4.861111111111111e-06,
"loss": 0.0024,
"step": 3750
},
{
"epoch": 13.579136690647482,
"grad_norm": 0.3864242732524872,
"learning_rate": 4.859848484848485e-06,
"loss": 0.0025,
"step": 3775
},
{
"epoch": 13.66906474820144,
"grad_norm": 0.23528048396110535,
"learning_rate": 4.858585858585859e-06,
"loss": 0.0028,
"step": 3800
},
{
"epoch": 13.758992805755396,
"grad_norm": 0.31728431582450867,
"learning_rate": 4.8573232323232325e-06,
"loss": 0.0041,
"step": 3825
},
{
"epoch": 13.848920863309353,
"grad_norm": 0.5803298950195312,
"learning_rate": 4.8560606060606065e-06,
"loss": 0.0028,
"step": 3850
},
{
"epoch": 13.93884892086331,
"grad_norm": 0.30145183205604553,
"learning_rate": 4.85479797979798e-06,
"loss": 0.0022,
"step": 3875
},
{
"epoch": 14.028776978417266,
"grad_norm": 0.43851757049560547,
"learning_rate": 4.8535353535353545e-06,
"loss": 0.0024,
"step": 3900
},
{
"epoch": 14.118705035971223,
"grad_norm": 0.7910506725311279,
"learning_rate": 4.852272727272728e-06,
"loss": 0.0033,
"step": 3925
},
{
"epoch": 14.20863309352518,
"grad_norm": 0.3168434500694275,
"learning_rate": 4.851010101010102e-06,
"loss": 0.0028,
"step": 3950
},
{
"epoch": 14.298561151079136,
"grad_norm": 0.7242361307144165,
"learning_rate": 4.849747474747475e-06,
"loss": 0.0031,
"step": 3975
},
{
"epoch": 14.388489208633093,
"grad_norm": 0.7368125319480896,
"learning_rate": 4.848484848484849e-06,
"loss": 0.0027,
"step": 4000
},
{
"epoch": 14.388489208633093,
"eval_loss": 0.09274967014789581,
"eval_runtime": 1343.7242,
"eval_samples_per_second": 1.654,
"eval_steps_per_second": 0.103,
"eval_wer": 9.375694290157742,
"step": 4000
},
{
"epoch": 14.47841726618705,
"grad_norm": 0.420599102973938,
"learning_rate": 4.847222222222223e-06,
"loss": 0.0028,
"step": 4025
},
{
"epoch": 14.568345323741006,
"grad_norm": 0.3025602698326111,
"learning_rate": 4.845959595959596e-06,
"loss": 0.0028,
"step": 4050
},
{
"epoch": 14.658273381294965,
"grad_norm": 0.7078948020935059,
"learning_rate": 4.84469696969697e-06,
"loss": 0.003,
"step": 4075
},
{
"epoch": 14.748201438848922,
"grad_norm": 0.5534040331840515,
"learning_rate": 4.843434343434344e-06,
"loss": 0.0031,
"step": 4100
},
{
"epoch": 14.838129496402878,
"grad_norm": 0.28715190291404724,
"learning_rate": 4.842171717171718e-06,
"loss": 0.0028,
"step": 4125
},
{
"epoch": 14.928057553956835,
"grad_norm": 0.5861944556236267,
"learning_rate": 4.840909090909091e-06,
"loss": 0.0028,
"step": 4150
},
{
"epoch": 15.017985611510792,
"grad_norm": 0.102662093937397,
"learning_rate": 4.839646464646465e-06,
"loss": 0.0057,
"step": 4175
},
{
"epoch": 15.107913669064748,
"grad_norm": 0.15230265259742737,
"learning_rate": 4.838383838383839e-06,
"loss": 0.0023,
"step": 4200
},
{
"epoch": 15.197841726618705,
"grad_norm": 0.12530238926410675,
"learning_rate": 4.837121212121212e-06,
"loss": 0.0017,
"step": 4225
},
{
"epoch": 15.287769784172662,
"grad_norm": 0.09885858744382858,
"learning_rate": 4.835858585858586e-06,
"loss": 0.0022,
"step": 4250
},
{
"epoch": 15.377697841726619,
"grad_norm": 0.1105910986661911,
"learning_rate": 4.83459595959596e-06,
"loss": 0.0026,
"step": 4275
},
{
"epoch": 15.467625899280575,
"grad_norm": 0.3952260911464691,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0021,
"step": 4300
},
{
"epoch": 15.557553956834532,
"grad_norm": 0.6049605011940002,
"learning_rate": 4.832070707070707e-06,
"loss": 0.0021,
"step": 4325
},
{
"epoch": 15.647482014388489,
"grad_norm": 0.7125779986381531,
"learning_rate": 4.830808080808081e-06,
"loss": 0.0015,
"step": 4350
},
{
"epoch": 15.737410071942445,
"grad_norm": 0.16274645924568176,
"learning_rate": 4.829545454545455e-06,
"loss": 0.0019,
"step": 4375
},
{
"epoch": 15.827338129496402,
"grad_norm": 0.6492106318473816,
"learning_rate": 4.8282828282828285e-06,
"loss": 0.0019,
"step": 4400
},
{
"epoch": 15.917266187050359,
"grad_norm": 0.9411545991897583,
"learning_rate": 4.8270202020202025e-06,
"loss": 0.003,
"step": 4425
},
{
"epoch": 16.007194244604317,
"grad_norm": 0.03323192521929741,
"learning_rate": 4.8257575757575765e-06,
"loss": 0.0018,
"step": 4450
},
{
"epoch": 16.097122302158272,
"grad_norm": 0.1154596135020256,
"learning_rate": 4.82449494949495e-06,
"loss": 0.0015,
"step": 4475
},
{
"epoch": 16.18705035971223,
"grad_norm": 0.41669028997421265,
"learning_rate": 4.823232323232324e-06,
"loss": 0.0016,
"step": 4500
},
{
"epoch": 16.276978417266186,
"grad_norm": 0.25636962056159973,
"learning_rate": 4.821969696969697e-06,
"loss": 0.0014,
"step": 4525
},
{
"epoch": 16.366906474820144,
"grad_norm": 3.250777244567871,
"learning_rate": 4.820707070707072e-06,
"loss": 0.0027,
"step": 4550
},
{
"epoch": 16.4568345323741,
"grad_norm": 1.1029988527297974,
"learning_rate": 4.819444444444445e-06,
"loss": 0.0028,
"step": 4575
},
{
"epoch": 16.546762589928058,
"grad_norm": 0.3530588150024414,
"learning_rate": 4.818181818181819e-06,
"loss": 0.0015,
"step": 4600
},
{
"epoch": 16.636690647482013,
"grad_norm": 0.0861181914806366,
"learning_rate": 4.816919191919192e-06,
"loss": 0.0023,
"step": 4625
},
{
"epoch": 16.72661870503597,
"grad_norm": 0.44006574153900146,
"learning_rate": 4.815656565656566e-06,
"loss": 0.0021,
"step": 4650
},
{
"epoch": 16.81654676258993,
"grad_norm": 0.9688239097595215,
"learning_rate": 4.81439393939394e-06,
"loss": 0.0014,
"step": 4675
},
{
"epoch": 16.906474820143885,
"grad_norm": 0.848913311958313,
"learning_rate": 4.813131313131313e-06,
"loss": 0.0021,
"step": 4700
},
{
"epoch": 16.996402877697843,
"grad_norm": 0.14554986357688904,
"learning_rate": 4.811868686868687e-06,
"loss": 0.0013,
"step": 4725
},
{
"epoch": 17.086330935251798,
"grad_norm": 0.31808871030807495,
"learning_rate": 4.810606060606061e-06,
"loss": 0.0019,
"step": 4750
},
{
"epoch": 17.176258992805757,
"grad_norm": 0.2081349641084671,
"learning_rate": 4.809343434343435e-06,
"loss": 0.0018,
"step": 4775
},
{
"epoch": 17.26618705035971,
"grad_norm": 0.0817071720957756,
"learning_rate": 4.808080808080808e-06,
"loss": 0.0011,
"step": 4800
},
{
"epoch": 17.35611510791367,
"grad_norm": 0.148326575756073,
"learning_rate": 4.806818181818182e-06,
"loss": 0.0011,
"step": 4825
},
{
"epoch": 17.446043165467625,
"grad_norm": 1.1114903688430786,
"learning_rate": 4.805555555555556e-06,
"loss": 0.0012,
"step": 4850
},
{
"epoch": 17.535971223021583,
"grad_norm": 0.5132379531860352,
"learning_rate": 4.804292929292929e-06,
"loss": 0.0015,
"step": 4875
},
{
"epoch": 17.62589928057554,
"grad_norm": 0.5439797043800354,
"learning_rate": 4.803030303030303e-06,
"loss": 0.0019,
"step": 4900
},
{
"epoch": 17.715827338129497,
"grad_norm": 0.4897061586380005,
"learning_rate": 4.801767676767677e-06,
"loss": 0.0022,
"step": 4925
},
{
"epoch": 17.805755395683452,
"grad_norm": 0.13605351746082306,
"learning_rate": 4.800505050505051e-06,
"loss": 0.0017,
"step": 4950
},
{
"epoch": 17.89568345323741,
"grad_norm": 0.6285837888717651,
"learning_rate": 4.7992424242424245e-06,
"loss": 0.0014,
"step": 4975
},
{
"epoch": 17.985611510791365,
"grad_norm": 0.04884183779358864,
"learning_rate": 4.7979797979797985e-06,
"loss": 0.0011,
"step": 5000
},
{
"epoch": 17.985611510791365,
"eval_loss": 0.09266538918018341,
"eval_runtime": 1344.6458,
"eval_samples_per_second": 1.652,
"eval_steps_per_second": 0.103,
"eval_wer": 8.835073687328741,
"step": 5000
},
{
"epoch": 18.075539568345324,
"grad_norm": 0.036710768938064575,
"learning_rate": 4.7967171717171725e-06,
"loss": 0.0024,
"step": 5025
},
{
"epoch": 18.165467625899282,
"grad_norm": 0.41920551657676697,
"learning_rate": 4.795454545454546e-06,
"loss": 0.0011,
"step": 5050
},
{
"epoch": 18.255395683453237,
"grad_norm": 0.2354598492383957,
"learning_rate": 4.79419191919192e-06,
"loss": 0.0018,
"step": 5075
},
{
"epoch": 18.345323741007196,
"grad_norm": 0.4095918536186218,
"learning_rate": 4.792929292929293e-06,
"loss": 0.0015,
"step": 5100
},
{
"epoch": 18.43525179856115,
"grad_norm": 0.03964778780937195,
"learning_rate": 4.791666666666668e-06,
"loss": 0.0019,
"step": 5125
},
{
"epoch": 18.52517985611511,
"grad_norm": 0.9322590827941895,
"learning_rate": 4.790404040404041e-06,
"loss": 0.0014,
"step": 5150
},
{
"epoch": 18.615107913669064,
"grad_norm": 0.11062884330749512,
"learning_rate": 4.789141414141415e-06,
"loss": 0.0015,
"step": 5175
},
{
"epoch": 18.705035971223023,
"grad_norm": 0.4186955690383911,
"learning_rate": 4.787878787878788e-06,
"loss": 0.0013,
"step": 5200
},
{
"epoch": 18.794964028776977,
"grad_norm": 0.40554943680763245,
"learning_rate": 4.786616161616162e-06,
"loss": 0.0017,
"step": 5225
},
{
"epoch": 18.884892086330936,
"grad_norm": 0.4156556725502014,
"learning_rate": 4.785353535353536e-06,
"loss": 0.0016,
"step": 5250
},
{
"epoch": 18.97482014388489,
"grad_norm": 0.8705348968505859,
"learning_rate": 4.784090909090909e-06,
"loss": 0.003,
"step": 5275
},
{
"epoch": 19.06474820143885,
"grad_norm": 0.47541674971580505,
"learning_rate": 4.782828282828283e-06,
"loss": 0.0026,
"step": 5300
},
{
"epoch": 19.154676258992804,
"grad_norm": 0.3221082389354706,
"learning_rate": 4.781565656565657e-06,
"loss": 0.0014,
"step": 5325
},
{
"epoch": 19.244604316546763,
"grad_norm": 0.26767319440841675,
"learning_rate": 4.78030303030303e-06,
"loss": 0.0015,
"step": 5350
},
{
"epoch": 19.334532374100718,
"grad_norm": 0.41984379291534424,
"learning_rate": 4.779040404040404e-06,
"loss": 0.0026,
"step": 5375
},
{
"epoch": 19.424460431654676,
"grad_norm": 0.6067033410072327,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0031,
"step": 5400
},
{
"epoch": 19.514388489208635,
"grad_norm": 0.23113247752189636,
"learning_rate": 4.776515151515152e-06,
"loss": 0.0027,
"step": 5425
},
{
"epoch": 19.60431654676259,
"grad_norm": 0.7052062153816223,
"learning_rate": 4.775252525252525e-06,
"loss": 0.0038,
"step": 5450
},
{
"epoch": 19.694244604316548,
"grad_norm": 1.4232673645019531,
"learning_rate": 4.773989898989899e-06,
"loss": 0.0024,
"step": 5475
},
{
"epoch": 19.784172661870503,
"grad_norm": 0.12078073620796204,
"learning_rate": 4.772727272727273e-06,
"loss": 0.0014,
"step": 5500
},
{
"epoch": 19.87410071942446,
"grad_norm": 1.296155333518982,
"learning_rate": 4.7714646464646465e-06,
"loss": 0.0028,
"step": 5525
},
{
"epoch": 19.964028776978417,
"grad_norm": 0.4774380922317505,
"learning_rate": 4.7702020202020205e-06,
"loss": 0.0039,
"step": 5550
},
{
"epoch": 20.053956834532375,
"grad_norm": 0.7243533134460449,
"learning_rate": 4.768939393939394e-06,
"loss": 0.0038,
"step": 5575
},
{
"epoch": 20.14388489208633,
"grad_norm": 0.03761635348200798,
"learning_rate": 4.7676767676767685e-06,
"loss": 0.0028,
"step": 5600
},
{
"epoch": 20.23381294964029,
"grad_norm": 0.3167934715747833,
"learning_rate": 4.766414141414142e-06,
"loss": 0.0023,
"step": 5625
},
{
"epoch": 20.323741007194243,
"grad_norm": 0.08072912693023682,
"learning_rate": 4.765151515151516e-06,
"loss": 0.0021,
"step": 5650
},
{
"epoch": 20.413669064748202,
"grad_norm": 0.0809144377708435,
"learning_rate": 4.763888888888889e-06,
"loss": 0.0033,
"step": 5675
},
{
"epoch": 20.503597122302157,
"grad_norm": 0.021725259721279144,
"learning_rate": 4.762626262626263e-06,
"loss": 0.0022,
"step": 5700
},
{
"epoch": 20.593525179856115,
"grad_norm": 0.79271399974823,
"learning_rate": 4.761363636363637e-06,
"loss": 0.0015,
"step": 5725
},
{
"epoch": 20.68345323741007,
"grad_norm": 0.10382846742868423,
"learning_rate": 4.76010101010101e-06,
"loss": 0.0019,
"step": 5750
},
{
"epoch": 20.77338129496403,
"grad_norm": 0.03259812295436859,
"learning_rate": 4.758838383838385e-06,
"loss": 0.002,
"step": 5775
},
{
"epoch": 20.863309352517987,
"grad_norm": 0.6223962306976318,
"learning_rate": 4.757575757575758e-06,
"loss": 0.0036,
"step": 5800
},
{
"epoch": 20.953237410071942,
"grad_norm": 1.0351557731628418,
"learning_rate": 4.756313131313132e-06,
"loss": 0.0022,
"step": 5825
},
{
"epoch": 21.0431654676259,
"grad_norm": 0.8662335276603699,
"learning_rate": 4.755050505050505e-06,
"loss": 0.0028,
"step": 5850
},
{
"epoch": 21.133093525179856,
"grad_norm": 0.13104894757270813,
"learning_rate": 4.753787878787879e-06,
"loss": 0.0028,
"step": 5875
},
{
"epoch": 21.223021582733814,
"grad_norm": 0.8010006546974182,
"learning_rate": 4.752525252525253e-06,
"loss": 0.0021,
"step": 5900
},
{
"epoch": 21.31294964028777,
"grad_norm": 0.7761834263801575,
"learning_rate": 4.751262626262626e-06,
"loss": 0.0035,
"step": 5925
},
{
"epoch": 21.402877697841728,
"grad_norm": 0.05642890930175781,
"learning_rate": 4.75e-06,
"loss": 0.0015,
"step": 5950
},
{
"epoch": 21.492805755395683,
"grad_norm": 0.2215975672006607,
"learning_rate": 4.748737373737374e-06,
"loss": 0.0011,
"step": 5975
},
{
"epoch": 21.58273381294964,
"grad_norm": 0.5649552345275879,
"learning_rate": 4.747474747474748e-06,
"loss": 0.0017,
"step": 6000
},
{
"epoch": 21.58273381294964,
"eval_loss": 0.08750007301568985,
"eval_runtime": 1349.1716,
"eval_samples_per_second": 1.647,
"eval_steps_per_second": 0.103,
"eval_wer": 7.657557579797082,
"step": 6000
},
{
"epoch": 21.672661870503596,
"grad_norm": 0.3567905128002167,
"learning_rate": 4.746212121212121e-06,
"loss": 0.0023,
"step": 6025
},
{
"epoch": 21.762589928057555,
"grad_norm": 0.7165196537971497,
"learning_rate": 4.744949494949495e-06,
"loss": 0.0019,
"step": 6050
},
{
"epoch": 21.85251798561151,
"grad_norm": 0.9009844660758972,
"learning_rate": 4.743686868686869e-06,
"loss": 0.0022,
"step": 6075
},
{
"epoch": 21.942446043165468,
"grad_norm": 0.7037338614463806,
"learning_rate": 4.7424242424242426e-06,
"loss": 0.0026,
"step": 6100
},
{
"epoch": 22.032374100719423,
"grad_norm": 0.2905846834182739,
"learning_rate": 4.7411616161616166e-06,
"loss": 0.002,
"step": 6125
},
{
"epoch": 22.12230215827338,
"grad_norm": 0.7335506677627563,
"learning_rate": 4.7398989898989905e-06,
"loss": 0.0019,
"step": 6150
},
{
"epoch": 22.21223021582734,
"grad_norm": 0.3520030677318573,
"learning_rate": 4.7386363636363645e-06,
"loss": 0.0016,
"step": 6175
},
{
"epoch": 22.302158273381295,
"grad_norm": 0.3580196797847748,
"learning_rate": 4.737373737373738e-06,
"loss": 0.0014,
"step": 6200
},
{
"epoch": 22.392086330935253,
"grad_norm": 0.19062575697898865,
"learning_rate": 4.736111111111112e-06,
"loss": 0.002,
"step": 6225
},
{
"epoch": 22.48201438848921,
"grad_norm": 0.6567767858505249,
"learning_rate": 4.734848484848486e-06,
"loss": 0.0021,
"step": 6250
},
{
"epoch": 22.571942446043167,
"grad_norm": 0.24819691479206085,
"learning_rate": 4.733585858585859e-06,
"loss": 0.0019,
"step": 6275
},
{
"epoch": 22.66187050359712,
"grad_norm": 0.47786185145378113,
"learning_rate": 4.732323232323233e-06,
"loss": 0.0014,
"step": 6300
},
{
"epoch": 22.75179856115108,
"grad_norm": 0.05066821351647377,
"learning_rate": 4.731060606060606e-06,
"loss": 0.0018,
"step": 6325
},
{
"epoch": 22.841726618705035,
"grad_norm": 0.33751770853996277,
"learning_rate": 4.72979797979798e-06,
"loss": 0.0028,
"step": 6350
},
{
"epoch": 22.931654676258994,
"grad_norm": 0.03158155083656311,
"learning_rate": 4.728535353535354e-06,
"loss": 0.0013,
"step": 6375
},
{
"epoch": 23.02158273381295,
"grad_norm": 0.05814801901578903,
"learning_rate": 4.727272727272728e-06,
"loss": 0.0021,
"step": 6400
},
{
"epoch": 23.111510791366907,
"grad_norm": 0.031183883547782898,
"learning_rate": 4.726010101010101e-06,
"loss": 0.0011,
"step": 6425
},
{
"epoch": 23.201438848920862,
"grad_norm": 0.539813756942749,
"learning_rate": 4.724747474747475e-06,
"loss": 0.0009,
"step": 6450
},
{
"epoch": 23.29136690647482,
"grad_norm": 0.14558178186416626,
"learning_rate": 4.723484848484849e-06,
"loss": 0.0018,
"step": 6475
},
{
"epoch": 23.381294964028775,
"grad_norm": 0.10804769396781921,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0013,
"step": 6500
},
{
"epoch": 23.471223021582734,
"grad_norm": 0.3211396038532257,
"learning_rate": 4.720959595959596e-06,
"loss": 0.0015,
"step": 6525
},
{
"epoch": 23.56115107913669,
"grad_norm": 0.16721013188362122,
"learning_rate": 4.71969696969697e-06,
"loss": 0.0027,
"step": 6550
},
{
"epoch": 23.651079136690647,
"grad_norm": 0.3473891019821167,
"learning_rate": 4.7184343434343434e-06,
"loss": 0.0014,
"step": 6575
},
{
"epoch": 23.741007194244606,
"grad_norm": 0.04464249685406685,
"learning_rate": 4.717171717171717e-06,
"loss": 0.0013,
"step": 6600
},
{
"epoch": 23.83093525179856,
"grad_norm": 0.21577273309230804,
"learning_rate": 4.715909090909091e-06,
"loss": 0.0025,
"step": 6625
},
{
"epoch": 23.92086330935252,
"grad_norm": 1.0553650856018066,
"learning_rate": 4.714646464646465e-06,
"loss": 0.0012,
"step": 6650
},
{
"epoch": 24.010791366906474,
"grad_norm": 0.015737203881144524,
"learning_rate": 4.7133838383838386e-06,
"loss": 0.0018,
"step": 6675
},
{
"epoch": 24.100719424460433,
"grad_norm": 0.08808793127536774,
"learning_rate": 4.7121212121212126e-06,
"loss": 0.0008,
"step": 6700
},
{
"epoch": 24.190647482014388,
"grad_norm": 0.01893734373152256,
"learning_rate": 4.7108585858585866e-06,
"loss": 0.0008,
"step": 6725
},
{
"epoch": 24.280575539568346,
"grad_norm": 0.032726775854825974,
"learning_rate": 4.70959595959596e-06,
"loss": 0.0011,
"step": 6750
},
{
"epoch": 24.3705035971223,
"grad_norm": 1.2210007905960083,
"learning_rate": 4.708333333333334e-06,
"loss": 0.0014,
"step": 6775
},
{
"epoch": 24.46043165467626,
"grad_norm": 0.21317902207374573,
"learning_rate": 4.707070707070707e-06,
"loss": 0.0008,
"step": 6800
},
{
"epoch": 24.550359712230215,
"grad_norm": 0.02254541404545307,
"learning_rate": 4.705808080808082e-06,
"loss": 0.0008,
"step": 6825
},
{
"epoch": 24.640287769784173,
"grad_norm": 0.19283901154994965,
"learning_rate": 4.704545454545455e-06,
"loss": 0.0006,
"step": 6850
},
{
"epoch": 24.730215827338128,
"grad_norm": 0.1615646928548813,
"learning_rate": 4.703282828282829e-06,
"loss": 0.0011,
"step": 6875
},
{
"epoch": 24.820143884892087,
"grad_norm": 0.04525255784392357,
"learning_rate": 4.702020202020202e-06,
"loss": 0.0006,
"step": 6900
},
{
"epoch": 24.91007194244604,
"grad_norm": 0.17892493307590485,
"learning_rate": 4.700757575757576e-06,
"loss": 0.0011,
"step": 6925
},
{
"epoch": 25.0,
"grad_norm": 1.5881894826889038,
"learning_rate": 4.69949494949495e-06,
"loss": 0.0009,
"step": 6950
},
{
"epoch": 25.08992805755396,
"grad_norm": 0.028072576969861984,
"learning_rate": 4.698232323232323e-06,
"loss": 0.001,
"step": 6975
},
{
"epoch": 25.179856115107913,
"grad_norm": 0.034753262996673584,
"learning_rate": 4.696969696969698e-06,
"loss": 0.001,
"step": 7000
},
{
"epoch": 25.179856115107913,
"eval_loss": 0.08996064960956573,
"eval_runtime": 1372.6865,
"eval_samples_per_second": 1.619,
"eval_steps_per_second": 0.101,
"eval_wer": 6.591127897504258,
"step": 7000
},
{
"epoch": 25.269784172661872,
"grad_norm": 0.047846052795648575,
"learning_rate": 4.695707070707071e-06,
"loss": 0.0017,
"step": 7025
},
{
"epoch": 25.359712230215827,
"grad_norm": 0.08721514046192169,
"learning_rate": 4.694444444444445e-06,
"loss": 0.0012,
"step": 7050
},
{
"epoch": 25.449640287769785,
"grad_norm": 0.488505095243454,
"learning_rate": 4.693181818181818e-06,
"loss": 0.001,
"step": 7075
},
{
"epoch": 25.53956834532374,
"grad_norm": 0.3541705012321472,
"learning_rate": 4.691919191919192e-06,
"loss": 0.0009,
"step": 7100
},
{
"epoch": 25.6294964028777,
"grad_norm": 1.2867228984832764,
"learning_rate": 4.690656565656566e-06,
"loss": 0.0009,
"step": 7125
},
{
"epoch": 25.719424460431654,
"grad_norm": 0.06602492183446884,
"learning_rate": 4.6893939393939394e-06,
"loss": 0.001,
"step": 7150
},
{
"epoch": 25.809352517985612,
"grad_norm": 0.03555336222052574,
"learning_rate": 4.6881313131313134e-06,
"loss": 0.0016,
"step": 7175
},
{
"epoch": 25.899280575539567,
"grad_norm": 0.1011524349451065,
"learning_rate": 4.6868686868686874e-06,
"loss": 0.0028,
"step": 7200
},
{
"epoch": 25.989208633093526,
"grad_norm": 0.14894358813762665,
"learning_rate": 4.6856060606060614e-06,
"loss": 0.0026,
"step": 7225
},
{
"epoch": 26.07913669064748,
"grad_norm": 0.944786787033081,
"learning_rate": 4.684343434343435e-06,
"loss": 0.0014,
"step": 7250
},
{
"epoch": 26.16906474820144,
"grad_norm": 0.4678920805454254,
"learning_rate": 4.683080808080809e-06,
"loss": 0.0016,
"step": 7275
},
{
"epoch": 26.258992805755394,
"grad_norm": 0.0241763386875391,
"learning_rate": 4.681818181818183e-06,
"loss": 0.0018,
"step": 7300
},
{
"epoch": 26.348920863309353,
"grad_norm": 0.1959693878889084,
"learning_rate": 4.680555555555556e-06,
"loss": 0.0014,
"step": 7325
},
{
"epoch": 26.43884892086331,
"grad_norm": 0.05353585258126259,
"learning_rate": 4.67929292929293e-06,
"loss": 0.001,
"step": 7350
},
{
"epoch": 26.528776978417266,
"grad_norm": 0.022708551958203316,
"learning_rate": 4.678030303030303e-06,
"loss": 0.0008,
"step": 7375
},
{
"epoch": 26.618705035971225,
"grad_norm": 0.28148502111434937,
"learning_rate": 4.676767676767677e-06,
"loss": 0.0012,
"step": 7400
},
{
"epoch": 26.70863309352518,
"grad_norm": 0.0556604228913784,
"learning_rate": 4.675505050505051e-06,
"loss": 0.0018,
"step": 7425
},
{
"epoch": 26.798561151079138,
"grad_norm": 0.03789166733622551,
"learning_rate": 4.674242424242425e-06,
"loss": 0.0008,
"step": 7450
},
{
"epoch": 26.888489208633093,
"grad_norm": 0.18029791116714478,
"learning_rate": 4.672979797979799e-06,
"loss": 0.001,
"step": 7475
},
{
"epoch": 26.97841726618705,
"grad_norm": 0.27599871158599854,
"learning_rate": 4.671717171717172e-06,
"loss": 0.0008,
"step": 7500
},
{
"epoch": 27.068345323741006,
"grad_norm": 0.4067777693271637,
"learning_rate": 4.670454545454546e-06,
"loss": 0.0017,
"step": 7525
},
{
"epoch": 27.158273381294965,
"grad_norm": 0.36876606941223145,
"learning_rate": 4.669191919191919e-06,
"loss": 0.0011,
"step": 7550
},
{
"epoch": 27.24820143884892,
"grad_norm": 0.2605381906032562,
"learning_rate": 4.667929292929293e-06,
"loss": 0.0014,
"step": 7575
},
{
"epoch": 27.33812949640288,
"grad_norm": 0.02853270247578621,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0008,
"step": 7600
},
{
"epoch": 27.428057553956833,
"grad_norm": 0.055020011961460114,
"learning_rate": 4.66540404040404e-06,
"loss": 0.0009,
"step": 7625
},
{
"epoch": 27.51798561151079,
"grad_norm": 0.30874237418174744,
"learning_rate": 4.664141414141414e-06,
"loss": 0.0018,
"step": 7650
},
{
"epoch": 27.607913669064747,
"grad_norm": 0.09795974940061569,
"learning_rate": 4.662878787878788e-06,
"loss": 0.0014,
"step": 7675
},
{
"epoch": 27.697841726618705,
"grad_norm": 0.04705384001135826,
"learning_rate": 4.661616161616162e-06,
"loss": 0.0015,
"step": 7700
},
{
"epoch": 27.78776978417266,
"grad_norm": 0.058379877358675,
"learning_rate": 4.6603535353535355e-06,
"loss": 0.0008,
"step": 7725
},
{
"epoch": 27.87769784172662,
"grad_norm": 0.047014497220516205,
"learning_rate": 4.6590909090909095e-06,
"loss": 0.0016,
"step": 7750
},
{
"epoch": 27.967625899280577,
"grad_norm": 0.6353835463523865,
"learning_rate": 4.6578282828282835e-06,
"loss": 0.0012,
"step": 7775
},
{
"epoch": 28.057553956834532,
"grad_norm": 0.13249577581882477,
"learning_rate": 4.656565656565657e-06,
"loss": 0.0007,
"step": 7800
},
{
"epoch": 28.14748201438849,
"grad_norm": 0.16413046419620514,
"learning_rate": 4.655303030303031e-06,
"loss": 0.0009,
"step": 7825
},
{
"epoch": 28.237410071942445,
"grad_norm": 0.21356362104415894,
"learning_rate": 4.654040404040405e-06,
"loss": 0.0007,
"step": 7850
},
{
"epoch": 28.327338129496404,
"grad_norm": 0.0190277099609375,
"learning_rate": 4.652777777777779e-06,
"loss": 0.0007,
"step": 7875
},
{
"epoch": 28.41726618705036,
"grad_norm": 0.12108524143695831,
"learning_rate": 4.651515151515152e-06,
"loss": 0.0009,
"step": 7900
},
{
"epoch": 28.507194244604317,
"grad_norm": 0.026057908311486244,
"learning_rate": 4.650252525252526e-06,
"loss": 0.0007,
"step": 7925
},
{
"epoch": 28.597122302158272,
"grad_norm": 0.09515079110860825,
"learning_rate": 4.6489898989899e-06,
"loss": 0.0008,
"step": 7950
},
{
"epoch": 28.68705035971223,
"grad_norm": 0.48142778873443604,
"learning_rate": 4.647727272727273e-06,
"loss": 0.0007,
"step": 7975
},
{
"epoch": 28.776978417266186,
"grad_norm": 0.46795013546943665,
"learning_rate": 4.646464646464647e-06,
"loss": 0.0014,
"step": 8000
},
{
"epoch": 28.776978417266186,
"eval_loss": 0.09178629517555237,
"eval_runtime": 1347.1747,
"eval_samples_per_second": 1.649,
"eval_steps_per_second": 0.103,
"eval_wer": 7.139154262015849,
"step": 8000
},
{
"epoch": 28.866906474820144,
"grad_norm": 0.5243809223175049,
"learning_rate": 4.64520202020202e-06,
"loss": 0.0007,
"step": 8025
},
{
"epoch": 28.9568345323741,
"grad_norm": 0.3461306095123291,
"learning_rate": 4.643939393939395e-06,
"loss": 0.001,
"step": 8050
},
{
"epoch": 29.046762589928058,
"grad_norm": 0.2795426845550537,
"learning_rate": 4.642676767676768e-06,
"loss": 0.0014,
"step": 8075
},
{
"epoch": 29.136690647482013,
"grad_norm": 0.05419691279530525,
"learning_rate": 4.641414141414142e-06,
"loss": 0.0014,
"step": 8100
},
{
"epoch": 29.22661870503597,
"grad_norm": 0.08857329189777374,
"learning_rate": 4.640151515151515e-06,
"loss": 0.0016,
"step": 8125
},
{
"epoch": 29.31654676258993,
"grad_norm": 0.05129173770546913,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0011,
"step": 8150
},
{
"epoch": 29.406474820143885,
"grad_norm": 1.0032382011413574,
"learning_rate": 4.637626262626263e-06,
"loss": 0.0023,
"step": 8175
},
{
"epoch": 29.496402877697843,
"grad_norm": 0.4335207939147949,
"learning_rate": 4.636363636363636e-06,
"loss": 0.0028,
"step": 8200
},
{
"epoch": 29.586330935251798,
"grad_norm": 0.15561847388744354,
"learning_rate": 4.63510101010101e-06,
"loss": 0.0028,
"step": 8225
},
{
"epoch": 29.676258992805757,
"grad_norm": 0.24305035173892975,
"learning_rate": 4.633838383838384e-06,
"loss": 0.0024,
"step": 8250
},
{
"epoch": 29.76618705035971,
"grad_norm": 1.3689900636672974,
"learning_rate": 4.632575757575758e-06,
"loss": 0.0036,
"step": 8275
},
{
"epoch": 29.85611510791367,
"grad_norm": 0.6511125564575195,
"learning_rate": 4.6313131313131315e-06,
"loss": 0.0025,
"step": 8300
},
{
"epoch": 29.946043165467625,
"grad_norm": 0.8534782528877258,
"learning_rate": 4.6300505050505055e-06,
"loss": 0.0029,
"step": 8325
},
{
"epoch": 30.035971223021583,
"grad_norm": 0.3412608504295349,
"learning_rate": 4.6287878787878795e-06,
"loss": 0.0028,
"step": 8350
},
{
"epoch": 30.12589928057554,
"grad_norm": 0.16232311725616455,
"learning_rate": 4.627525252525253e-06,
"loss": 0.0023,
"step": 8375
},
{
"epoch": 30.215827338129497,
"grad_norm": 0.08357956260442734,
"learning_rate": 4.626262626262627e-06,
"loss": 0.0019,
"step": 8400
},
{
"epoch": 30.305755395683452,
"grad_norm": 0.412728488445282,
"learning_rate": 4.625000000000001e-06,
"loss": 0.0015,
"step": 8425
},
{
"epoch": 30.39568345323741,
"grad_norm": 0.9784059524536133,
"learning_rate": 4.623737373737375e-06,
"loss": 0.0025,
"step": 8450
},
{
"epoch": 30.485611510791365,
"grad_norm": 0.38275232911109924,
"learning_rate": 4.622474747474748e-06,
"loss": 0.0016,
"step": 8475
},
{
"epoch": 30.575539568345324,
"grad_norm": 0.3518912196159363,
"learning_rate": 4.621212121212122e-06,
"loss": 0.0024,
"step": 8500
},
{
"epoch": 30.665467625899282,
"grad_norm": 0.8633609414100647,
"learning_rate": 4.619949494949496e-06,
"loss": 0.0022,
"step": 8525
},
{
"epoch": 30.755395683453237,
"grad_norm": 0.23257087171077728,
"learning_rate": 4.618686868686869e-06,
"loss": 0.0016,
"step": 8550
},
{
"epoch": 30.845323741007196,
"grad_norm": 1.2157853841781616,
"learning_rate": 4.617424242424243e-06,
"loss": 0.0013,
"step": 8575
},
{
"epoch": 30.93525179856115,
"grad_norm": 0.6692176461219788,
"learning_rate": 4.616161616161616e-06,
"loss": 0.0025,
"step": 8600
},
{
"epoch": 31.02517985611511,
"grad_norm": 0.08320923149585724,
"learning_rate": 4.61489898989899e-06,
"loss": 0.0015,
"step": 8625
},
{
"epoch": 31.115107913669064,
"grad_norm": 0.03867033123970032,
"learning_rate": 4.613636363636364e-06,
"loss": 0.0011,
"step": 8650
},
{
"epoch": 31.205035971223023,
"grad_norm": 0.37571918964385986,
"learning_rate": 4.612373737373737e-06,
"loss": 0.002,
"step": 8675
},
{
"epoch": 31.294964028776977,
"grad_norm": 0.023200325667858124,
"learning_rate": 4.611111111111112e-06,
"loss": 0.0017,
"step": 8700
},
{
"epoch": 31.384892086330936,
"grad_norm": 0.025962859392166138,
"learning_rate": 4.609848484848485e-06,
"loss": 0.0025,
"step": 8725
},
{
"epoch": 31.47482014388489,
"grad_norm": 0.07832462340593338,
"learning_rate": 4.608585858585859e-06,
"loss": 0.002,
"step": 8750
},
{
"epoch": 31.56474820143885,
"grad_norm": 0.5365622043609619,
"learning_rate": 4.607323232323232e-06,
"loss": 0.0019,
"step": 8775
},
{
"epoch": 31.654676258992804,
"grad_norm": 0.042796701192855835,
"learning_rate": 4.606060606060606e-06,
"loss": 0.0012,
"step": 8800
},
{
"epoch": 31.744604316546763,
"grad_norm": 0.2298709750175476,
"learning_rate": 4.60479797979798e-06,
"loss": 0.0015,
"step": 8825
},
{
"epoch": 31.834532374100718,
"grad_norm": 0.7432539463043213,
"learning_rate": 4.6035353535353535e-06,
"loss": 0.002,
"step": 8850
},
{
"epoch": 31.924460431654676,
"grad_norm": 0.05896187201142311,
"learning_rate": 4.6022727272727275e-06,
"loss": 0.0017,
"step": 8875
},
{
"epoch": 32.014388489208635,
"grad_norm": 0.6994006633758545,
"learning_rate": 4.6010101010101015e-06,
"loss": 0.0019,
"step": 8900
},
{
"epoch": 32.10431654676259,
"grad_norm": 0.6547738909721375,
"learning_rate": 4.5997474747474755e-06,
"loss": 0.0016,
"step": 8925
},
{
"epoch": 32.194244604316545,
"grad_norm": 0.13888348639011383,
"learning_rate": 4.598484848484849e-06,
"loss": 0.0014,
"step": 8950
},
{
"epoch": 32.28417266187051,
"grad_norm": 0.09715843945741653,
"learning_rate": 4.597222222222223e-06,
"loss": 0.001,
"step": 8975
},
{
"epoch": 32.37410071942446,
"grad_norm": 0.05904947221279144,
"learning_rate": 4.595959595959597e-06,
"loss": 0.0014,
"step": 9000
},
{
"epoch": 32.37410071942446,
"eval_loss": 0.08943528681993484,
"eval_runtime": 1353.2653,
"eval_samples_per_second": 1.642,
"eval_steps_per_second": 0.103,
"eval_wer": 6.739243131156039,
"step": 9000
},
{
"epoch": 32.46402877697842,
"grad_norm": 1.000013828277588,
"learning_rate": 4.59469696969697e-06,
"loss": 0.0012,
"step": 9025
},
{
"epoch": 32.55395683453237,
"grad_norm": 0.031857941299676895,
"learning_rate": 4.593434343434344e-06,
"loss": 0.0011,
"step": 9050
},
{
"epoch": 32.643884892086334,
"grad_norm": 0.18854251503944397,
"learning_rate": 4.592171717171717e-06,
"loss": 0.0011,
"step": 9075
},
{
"epoch": 32.73381294964029,
"grad_norm": 0.06311248987913132,
"learning_rate": 4.590909090909092e-06,
"loss": 0.0009,
"step": 9100
},
{
"epoch": 32.82374100719424,
"grad_norm": 0.02462015673518181,
"learning_rate": 4.589646464646465e-06,
"loss": 0.0023,
"step": 9125
},
{
"epoch": 32.9136690647482,
"grad_norm": 0.5756279826164246,
"learning_rate": 4.588383838383839e-06,
"loss": 0.0014,
"step": 9150
},
{
"epoch": 33.00359712230216,
"grad_norm": 0.39254868030548096,
"learning_rate": 4.587121212121213e-06,
"loss": 0.0012,
"step": 9175
},
{
"epoch": 33.093525179856115,
"grad_norm": 0.05750317871570587,
"learning_rate": 4.585858585858586e-06,
"loss": 0.0016,
"step": 9200
},
{
"epoch": 33.18345323741007,
"grad_norm": 0.456665962934494,
"learning_rate": 4.58459595959596e-06,
"loss": 0.0005,
"step": 9225
},
{
"epoch": 33.273381294964025,
"grad_norm": 0.05247064307332039,
"learning_rate": 4.583333333333333e-06,
"loss": 0.0007,
"step": 9250
},
{
"epoch": 33.36330935251799,
"grad_norm": 0.1745249629020691,
"learning_rate": 4.582070707070708e-06,
"loss": 0.0011,
"step": 9275
},
{
"epoch": 33.45323741007194,
"grad_norm": 0.1702817678451538,
"learning_rate": 4.580808080808081e-06,
"loss": 0.0011,
"step": 9300
},
{
"epoch": 33.5431654676259,
"grad_norm": 0.5600733757019043,
"learning_rate": 4.579545454545455e-06,
"loss": 0.0017,
"step": 9325
},
{
"epoch": 33.63309352517986,
"grad_norm": 0.042534805834293365,
"learning_rate": 4.578282828282828e-06,
"loss": 0.002,
"step": 9350
},
{
"epoch": 33.723021582733814,
"grad_norm": 0.025305964052677155,
"learning_rate": 4.577020202020202e-06,
"loss": 0.0014,
"step": 9375
},
{
"epoch": 33.81294964028777,
"grad_norm": 0.05213531106710434,
"learning_rate": 4.575757575757576e-06,
"loss": 0.001,
"step": 9400
},
{
"epoch": 33.902877697841724,
"grad_norm": 0.02446218766272068,
"learning_rate": 4.5744949494949495e-06,
"loss": 0.0006,
"step": 9425
},
{
"epoch": 33.992805755395686,
"grad_norm": 0.009959193877875805,
"learning_rate": 4.5732323232323235e-06,
"loss": 0.0009,
"step": 9450
},
{
"epoch": 34.08273381294964,
"grad_norm": 0.4287709891796112,
"learning_rate": 4.5719696969696975e-06,
"loss": 0.0007,
"step": 9475
},
{
"epoch": 34.172661870503596,
"grad_norm": 0.011952442117035389,
"learning_rate": 4.5707070707070715e-06,
"loss": 0.0004,
"step": 9500
},
{
"epoch": 34.26258992805755,
"grad_norm": 0.1948029100894928,
"learning_rate": 4.569444444444445e-06,
"loss": 0.0007,
"step": 9525
},
{
"epoch": 34.35251798561151,
"grad_norm": 0.03538801521062851,
"learning_rate": 4.568181818181819e-06,
"loss": 0.0007,
"step": 9550
},
{
"epoch": 34.44244604316547,
"grad_norm": 0.03204001113772392,
"learning_rate": 4.566919191919193e-06,
"loss": 0.0006,
"step": 9575
},
{
"epoch": 34.53237410071942,
"grad_norm": 0.12747210264205933,
"learning_rate": 4.565656565656566e-06,
"loss": 0.0008,
"step": 9600
},
{
"epoch": 34.62230215827338,
"grad_norm": 0.009002352133393288,
"learning_rate": 4.56439393939394e-06,
"loss": 0.0003,
"step": 9625
},
{
"epoch": 34.71223021582734,
"grad_norm": 0.057965803891420364,
"learning_rate": 4.563131313131314e-06,
"loss": 0.0009,
"step": 9650
},
{
"epoch": 34.802158273381295,
"grad_norm": 0.07385562360286713,
"learning_rate": 4.561868686868687e-06,
"loss": 0.0007,
"step": 9675
},
{
"epoch": 34.89208633093525,
"grad_norm": 0.010685013607144356,
"learning_rate": 4.560606060606061e-06,
"loss": 0.0008,
"step": 9700
},
{
"epoch": 34.98201438848921,
"grad_norm": 0.038797181099653244,
"learning_rate": 4.559343434343435e-06,
"loss": 0.0003,
"step": 9725
},
{
"epoch": 35.07194244604317,
"grad_norm": 0.016779489815235138,
"learning_rate": 4.558080808080809e-06,
"loss": 0.0011,
"step": 9750
},
{
"epoch": 35.16187050359712,
"grad_norm": 0.01562959887087345,
"learning_rate": 4.556818181818182e-06,
"loss": 0.0007,
"step": 9775
},
{
"epoch": 35.25179856115108,
"grad_norm": 0.025731824338436127,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0005,
"step": 9800
},
{
"epoch": 35.34172661870504,
"grad_norm": 0.0950327217578888,
"learning_rate": 4.554292929292929e-06,
"loss": 0.0014,
"step": 9825
},
{
"epoch": 35.431654676258994,
"grad_norm": 0.015813730657100677,
"learning_rate": 4.553030303030303e-06,
"loss": 0.0011,
"step": 9850
},
{
"epoch": 35.52158273381295,
"grad_norm": 0.07395196706056595,
"learning_rate": 4.551767676767677e-06,
"loss": 0.0006,
"step": 9875
},
{
"epoch": 35.611510791366904,
"grad_norm": 0.3815157413482666,
"learning_rate": 4.55050505050505e-06,
"loss": 0.001,
"step": 9900
},
{
"epoch": 35.701438848920866,
"grad_norm": 0.028180675581097603,
"learning_rate": 4.549242424242424e-06,
"loss": 0.0007,
"step": 9925
},
{
"epoch": 35.79136690647482,
"grad_norm": 0.022708212956786156,
"learning_rate": 4.547979797979798e-06,
"loss": 0.0007,
"step": 9950
},
{
"epoch": 35.881294964028775,
"grad_norm": 0.37191152572631836,
"learning_rate": 4.546717171717172e-06,
"loss": 0.0006,
"step": 9975
},
{
"epoch": 35.97122302158273,
"grad_norm": 0.045804716646671295,
"learning_rate": 4.5454545454545455e-06,
"loss": 0.0005,
"step": 10000
},
{
"epoch": 35.97122302158273,
"eval_loss": 0.08962783217430115,
"eval_runtime": 1349.7416,
"eval_samples_per_second": 1.646,
"eval_steps_per_second": 0.103,
"eval_wer": 6.25786862178775,
"step": 10000
},
{
"epoch": 36.06115107913669,
"grad_norm": 0.016676392406225204,
"learning_rate": 4.5441919191919195e-06,
"loss": 0.0003,
"step": 10025
},
{
"epoch": 36.15107913669065,
"grad_norm": 0.15673214197158813,
"learning_rate": 4.5429292929292935e-06,
"loss": 0.0009,
"step": 10050
},
{
"epoch": 36.2410071942446,
"grad_norm": 0.032344311475753784,
"learning_rate": 4.541666666666667e-06,
"loss": 0.0015,
"step": 10075
},
{
"epoch": 36.330935251798564,
"grad_norm": 0.5042840242385864,
"learning_rate": 4.540404040404041e-06,
"loss": 0.0014,
"step": 10100
},
{
"epoch": 36.42086330935252,
"grad_norm": 0.02287839725613594,
"learning_rate": 4.539141414141415e-06,
"loss": 0.0013,
"step": 10125
},
{
"epoch": 36.510791366906474,
"grad_norm": 0.30796897411346436,
"learning_rate": 4.537878787878789e-06,
"loss": 0.0025,
"step": 10150
},
{
"epoch": 36.60071942446043,
"grad_norm": 0.11940345168113708,
"learning_rate": 4.536616161616162e-06,
"loss": 0.0009,
"step": 10175
},
{
"epoch": 36.69064748201439,
"grad_norm": 0.12890297174453735,
"learning_rate": 4.535353535353536e-06,
"loss": 0.001,
"step": 10200
},
{
"epoch": 36.780575539568346,
"grad_norm": 0.016430262476205826,
"learning_rate": 4.53409090909091e-06,
"loss": 0.0012,
"step": 10225
},
{
"epoch": 36.8705035971223,
"grad_norm": 0.08656007796525955,
"learning_rate": 4.532828282828283e-06,
"loss": 0.0015,
"step": 10250
},
{
"epoch": 36.960431654676256,
"grad_norm": 0.0869501456618309,
"learning_rate": 4.531565656565657e-06,
"loss": 0.0018,
"step": 10275
},
{
"epoch": 37.05035971223022,
"grad_norm": 0.4101605713367462,
"learning_rate": 4.53030303030303e-06,
"loss": 0.0015,
"step": 10300
},
{
"epoch": 37.14028776978417,
"grad_norm": 0.0797925516963005,
"learning_rate": 4.529040404040405e-06,
"loss": 0.0007,
"step": 10325
},
{
"epoch": 37.23021582733813,
"grad_norm": 0.025322135537862778,
"learning_rate": 4.527777777777778e-06,
"loss": 0.0006,
"step": 10350
},
{
"epoch": 37.32014388489208,
"grad_norm": 0.059909917414188385,
"learning_rate": 4.526515151515152e-06,
"loss": 0.0012,
"step": 10375
},
{
"epoch": 37.410071942446045,
"grad_norm": 0.062007270753383636,
"learning_rate": 4.525252525252526e-06,
"loss": 0.0012,
"step": 10400
},
{
"epoch": 37.5,
"grad_norm": 0.35286614298820496,
"learning_rate": 4.523989898989899e-06,
"loss": 0.0016,
"step": 10425
},
{
"epoch": 37.589928057553955,
"grad_norm": 0.1300862431526184,
"learning_rate": 4.522727272727273e-06,
"loss": 0.0006,
"step": 10450
},
{
"epoch": 37.67985611510792,
"grad_norm": 0.13838863372802734,
"learning_rate": 4.521464646464646e-06,
"loss": 0.0006,
"step": 10475
},
{
"epoch": 37.76978417266187,
"grad_norm": 0.6767460703849792,
"learning_rate": 4.520202020202021e-06,
"loss": 0.0006,
"step": 10500
},
{
"epoch": 37.85971223021583,
"grad_norm": 0.03494667634367943,
"learning_rate": 4.518939393939394e-06,
"loss": 0.0013,
"step": 10525
},
{
"epoch": 37.94964028776978,
"grad_norm": 0.14763426780700684,
"learning_rate": 4.517676767676768e-06,
"loss": 0.0022,
"step": 10550
},
{
"epoch": 38.039568345323744,
"grad_norm": 0.15873517096042633,
"learning_rate": 4.5164141414141415e-06,
"loss": 0.0019,
"step": 10575
},
{
"epoch": 38.1294964028777,
"grad_norm": 0.048420246690511703,
"learning_rate": 4.5151515151515155e-06,
"loss": 0.001,
"step": 10600
},
{
"epoch": 38.219424460431654,
"grad_norm": 0.038138266652822495,
"learning_rate": 4.5138888888888895e-06,
"loss": 0.0004,
"step": 10625
},
{
"epoch": 38.30935251798561,
"grad_norm": 0.024455932900309563,
"learning_rate": 4.512626262626263e-06,
"loss": 0.0007,
"step": 10650
},
{
"epoch": 38.39928057553957,
"grad_norm": 0.29704517126083374,
"learning_rate": 4.511363636363637e-06,
"loss": 0.0012,
"step": 10675
},
{
"epoch": 38.489208633093526,
"grad_norm": 0.23077060282230377,
"learning_rate": 4.510101010101011e-06,
"loss": 0.0006,
"step": 10700
},
{
"epoch": 38.57913669064748,
"grad_norm": 0.04493401572108269,
"learning_rate": 4.508838383838384e-06,
"loss": 0.0007,
"step": 10725
},
{
"epoch": 38.669064748201436,
"grad_norm": 0.01225815899670124,
"learning_rate": 4.507575757575758e-06,
"loss": 0.0004,
"step": 10750
},
{
"epoch": 38.7589928057554,
"grad_norm": 0.19539327919483185,
"learning_rate": 4.506313131313132e-06,
"loss": 0.0012,
"step": 10775
},
{
"epoch": 38.84892086330935,
"grad_norm": 0.4501245319843292,
"learning_rate": 4.505050505050506e-06,
"loss": 0.0016,
"step": 10800
},
{
"epoch": 38.93884892086331,
"grad_norm": 0.955757200717926,
"learning_rate": 4.503787878787879e-06,
"loss": 0.002,
"step": 10825
},
{
"epoch": 39.02877697841727,
"grad_norm": 0.4927741587162018,
"learning_rate": 4.502525252525253e-06,
"loss": 0.0009,
"step": 10850
},
{
"epoch": 39.118705035971225,
"grad_norm": 0.5250554084777832,
"learning_rate": 4.501262626262627e-06,
"loss": 0.0018,
"step": 10875
},
{
"epoch": 39.20863309352518,
"grad_norm": 0.5786688327789307,
"learning_rate": 4.5e-06,
"loss": 0.0013,
"step": 10900
},
{
"epoch": 39.298561151079134,
"grad_norm": 0.015845810994505882,
"learning_rate": 4.498737373737374e-06,
"loss": 0.0009,
"step": 10925
},
{
"epoch": 39.388489208633096,
"grad_norm": 0.01820209249854088,
"learning_rate": 4.497474747474747e-06,
"loss": 0.001,
"step": 10950
},
{
"epoch": 39.47841726618705,
"grad_norm": 0.026294970884919167,
"learning_rate": 4.496212121212122e-06,
"loss": 0.0018,
"step": 10975
},
{
"epoch": 39.568345323741006,
"grad_norm": 0.4651360511779785,
"learning_rate": 4.494949494949495e-06,
"loss": 0.0016,
"step": 11000
},
{
"epoch": 39.568345323741006,
"eval_loss": 0.09019309282302856,
"eval_runtime": 1345.7556,
"eval_samples_per_second": 1.651,
"eval_steps_per_second": 0.103,
"eval_wer": 6.331926238613642,
"step": 11000
},
{
"epoch": 39.65827338129496,
"grad_norm": 0.29995694756507874,
"learning_rate": 4.493686868686869e-06,
"loss": 0.0015,
"step": 11025
},
{
"epoch": 39.74820143884892,
"grad_norm": 0.3291122019290924,
"learning_rate": 4.492424242424242e-06,
"loss": 0.0015,
"step": 11050
},
{
"epoch": 39.83812949640288,
"grad_norm": 0.1785033792257309,
"learning_rate": 4.491161616161616e-06,
"loss": 0.0006,
"step": 11075
},
{
"epoch": 39.92805755395683,
"grad_norm": 0.020028244704008102,
"learning_rate": 4.48989898989899e-06,
"loss": 0.002,
"step": 11100
},
{
"epoch": 40.01798561151079,
"grad_norm": 0.08107150346040726,
"learning_rate": 4.4886363636363636e-06,
"loss": 0.0014,
"step": 11125
},
{
"epoch": 40.10791366906475,
"grad_norm": 0.012092849239706993,
"learning_rate": 4.4873737373737375e-06,
"loss": 0.0018,
"step": 11150
},
{
"epoch": 40.197841726618705,
"grad_norm": 0.163823664188385,
"learning_rate": 4.4861111111111115e-06,
"loss": 0.0014,
"step": 11175
},
{
"epoch": 40.28776978417266,
"grad_norm": 0.07797440141439438,
"learning_rate": 4.4848484848484855e-06,
"loss": 0.0022,
"step": 11200
},
{
"epoch": 40.37769784172662,
"grad_norm": 0.07735186815261841,
"learning_rate": 4.483585858585859e-06,
"loss": 0.0018,
"step": 11225
},
{
"epoch": 40.46762589928058,
"grad_norm": 0.3801431953907013,
"learning_rate": 4.482323232323233e-06,
"loss": 0.0013,
"step": 11250
},
{
"epoch": 40.55755395683453,
"grad_norm": 0.02574390545487404,
"learning_rate": 4.481060606060607e-06,
"loss": 0.0008,
"step": 11275
},
{
"epoch": 40.64748201438849,
"grad_norm": 0.06015799939632416,
"learning_rate": 4.47979797979798e-06,
"loss": 0.0007,
"step": 11300
},
{
"epoch": 40.73741007194245,
"grad_norm": 0.011081011034548283,
"learning_rate": 4.478535353535354e-06,
"loss": 0.0009,
"step": 11325
},
{
"epoch": 40.827338129496404,
"grad_norm": 0.14023222029209137,
"learning_rate": 4.477272727272728e-06,
"loss": 0.0009,
"step": 11350
},
{
"epoch": 40.91726618705036,
"grad_norm": 1.1734967231750488,
"learning_rate": 4.476010101010102e-06,
"loss": 0.0034,
"step": 11375
},
{
"epoch": 41.007194244604314,
"grad_norm": 0.018789170309901237,
"learning_rate": 4.474747474747475e-06,
"loss": 0.0012,
"step": 11400
},
{
"epoch": 41.097122302158276,
"grad_norm": 0.5469329953193665,
"learning_rate": 4.473484848484849e-06,
"loss": 0.0012,
"step": 11425
},
{
"epoch": 41.18705035971223,
"grad_norm": 1.0320335626602173,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0022,
"step": 11450
},
{
"epoch": 41.276978417266186,
"grad_norm": 0.13018514215946198,
"learning_rate": 4.470959595959596e-06,
"loss": 0.001,
"step": 11475
},
{
"epoch": 41.36690647482014,
"grad_norm": 0.764275848865509,
"learning_rate": 4.46969696969697e-06,
"loss": 0.0017,
"step": 11500
},
{
"epoch": 41.4568345323741,
"grad_norm": 0.037678878754377365,
"learning_rate": 4.468434343434343e-06,
"loss": 0.0012,
"step": 11525
},
{
"epoch": 41.54676258992806,
"grad_norm": 0.0776861384510994,
"learning_rate": 4.467171717171718e-06,
"loss": 0.0012,
"step": 11550
},
{
"epoch": 41.63669064748201,
"grad_norm": 0.1435922086238861,
"learning_rate": 4.465909090909091e-06,
"loss": 0.0014,
"step": 11575
},
{
"epoch": 41.726618705035975,
"grad_norm": 0.2661900520324707,
"learning_rate": 4.464646464646465e-06,
"loss": 0.0014,
"step": 11600
},
{
"epoch": 41.81654676258993,
"grad_norm": 0.014804186299443245,
"learning_rate": 4.463383838383838e-06,
"loss": 0.0013,
"step": 11625
},
{
"epoch": 41.906474820143885,
"grad_norm": 0.5918655395507812,
"learning_rate": 4.462121212121212e-06,
"loss": 0.001,
"step": 11650
},
{
"epoch": 41.99640287769784,
"grad_norm": 0.2970104217529297,
"learning_rate": 4.460858585858586e-06,
"loss": 0.0014,
"step": 11675
},
{
"epoch": 42.0863309352518,
"grad_norm": 0.24786308407783508,
"learning_rate": 4.4595959595959596e-06,
"loss": 0.0005,
"step": 11700
},
{
"epoch": 42.17625899280576,
"grad_norm": 0.39591023325920105,
"learning_rate": 4.4583333333333336e-06,
"loss": 0.0012,
"step": 11725
},
{
"epoch": 42.26618705035971,
"grad_norm": 0.014619703404605389,
"learning_rate": 4.4570707070707076e-06,
"loss": 0.0009,
"step": 11750
},
{
"epoch": 42.356115107913666,
"grad_norm": 0.014031196013092995,
"learning_rate": 4.4558080808080816e-06,
"loss": 0.0005,
"step": 11775
},
{
"epoch": 42.44604316546763,
"grad_norm": 0.0157134011387825,
"learning_rate": 4.454545454545455e-06,
"loss": 0.0005,
"step": 11800
},
{
"epoch": 42.53597122302158,
"grad_norm": 0.5443057417869568,
"learning_rate": 4.453282828282829e-06,
"loss": 0.0005,
"step": 11825
},
{
"epoch": 42.62589928057554,
"grad_norm": 0.17728668451309204,
"learning_rate": 4.452020202020203e-06,
"loss": 0.001,
"step": 11850
},
{
"epoch": 42.71582733812949,
"grad_norm": 0.06720776110887527,
"learning_rate": 4.450757575757576e-06,
"loss": 0.0008,
"step": 11875
},
{
"epoch": 42.805755395683455,
"grad_norm": 0.020302429795265198,
"learning_rate": 4.44949494949495e-06,
"loss": 0.0005,
"step": 11900
},
{
"epoch": 42.89568345323741,
"grad_norm": 0.02236667089164257,
"learning_rate": 4.448232323232324e-06,
"loss": 0.0008,
"step": 11925
},
{
"epoch": 42.985611510791365,
"grad_norm": 0.3039033114910126,
"learning_rate": 4.446969696969697e-06,
"loss": 0.0007,
"step": 11950
},
{
"epoch": 43.07553956834533,
"grad_norm": 0.019936522468924522,
"learning_rate": 4.445707070707071e-06,
"loss": 0.0004,
"step": 11975
},
{
"epoch": 43.16546762589928,
"grad_norm": 0.006646598689258099,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0007,
"step": 12000
},
{
"epoch": 43.16546762589928,
"eval_loss": 0.09005734324455261,
"eval_runtime": 1349.9657,
"eval_samples_per_second": 1.646,
"eval_steps_per_second": 0.103,
"eval_wer": 6.2208398133748055,
"step": 12000
},
{
"epoch": 43.25539568345324,
"grad_norm": 0.06663926690816879,
"learning_rate": 4.443181818181819e-06,
"loss": 0.0003,
"step": 12025
},
{
"epoch": 43.34532374100719,
"grad_norm": 0.7015880346298218,
"learning_rate": 4.441919191919192e-06,
"loss": 0.0013,
"step": 12050
},
{
"epoch": 43.435251798561154,
"grad_norm": 0.09495950490236282,
"learning_rate": 4.440656565656566e-06,
"loss": 0.0009,
"step": 12075
},
{
"epoch": 43.52517985611511,
"grad_norm": 0.010513260029256344,
"learning_rate": 4.43939393939394e-06,
"loss": 0.0007,
"step": 12100
},
{
"epoch": 43.615107913669064,
"grad_norm": 0.08924310654401779,
"learning_rate": 4.438131313131313e-06,
"loss": 0.0004,
"step": 12125
},
{
"epoch": 43.70503597122302,
"grad_norm": 0.015554459765553474,
"learning_rate": 4.436868686868687e-06,
"loss": 0.0005,
"step": 12150
},
{
"epoch": 43.79496402877698,
"grad_norm": 0.02140822261571884,
"learning_rate": 4.4356060606060604e-06,
"loss": 0.0012,
"step": 12175
},
{
"epoch": 43.884892086330936,
"grad_norm": 0.2149767279624939,
"learning_rate": 4.434343434343435e-06,
"loss": 0.0005,
"step": 12200
},
{
"epoch": 43.97482014388489,
"grad_norm": 0.009459302760660648,
"learning_rate": 4.4330808080808084e-06,
"loss": 0.0012,
"step": 12225
},
{
"epoch": 44.064748201438846,
"grad_norm": 0.05037049949169159,
"learning_rate": 4.4318181818181824e-06,
"loss": 0.0004,
"step": 12250
},
{
"epoch": 44.15467625899281,
"grad_norm": 0.006279121618717909,
"learning_rate": 4.430555555555556e-06,
"loss": 0.0006,
"step": 12275
},
{
"epoch": 44.24460431654676,
"grad_norm": 0.03591470420360565,
"learning_rate": 4.42929292929293e-06,
"loss": 0.0006,
"step": 12300
},
{
"epoch": 44.33453237410072,
"grad_norm": 0.013430873863399029,
"learning_rate": 4.428030303030304e-06,
"loss": 0.0015,
"step": 12325
},
{
"epoch": 44.42446043165468,
"grad_norm": 0.01713446155190468,
"learning_rate": 4.426767676767677e-06,
"loss": 0.0011,
"step": 12350
},
{
"epoch": 44.514388489208635,
"grad_norm": 0.6338793039321899,
"learning_rate": 4.425505050505051e-06,
"loss": 0.0023,
"step": 12375
},
{
"epoch": 44.60431654676259,
"grad_norm": 0.19725088775157928,
"learning_rate": 4.424242424242425e-06,
"loss": 0.0015,
"step": 12400
},
{
"epoch": 44.694244604316545,
"grad_norm": 0.034790072590112686,
"learning_rate": 4.422979797979799e-06,
"loss": 0.0011,
"step": 12425
},
{
"epoch": 44.78417266187051,
"grad_norm": 2.0450031757354736,
"learning_rate": 4.421717171717172e-06,
"loss": 0.0012,
"step": 12450
},
{
"epoch": 44.87410071942446,
"grad_norm": 0.25726571679115295,
"learning_rate": 4.420454545454546e-06,
"loss": 0.0008,
"step": 12475
},
{
"epoch": 44.96402877697842,
"grad_norm": 0.14911916851997375,
"learning_rate": 4.41919191919192e-06,
"loss": 0.002,
"step": 12500
},
{
"epoch": 45.05395683453237,
"grad_norm": 0.5396764278411865,
"learning_rate": 4.417929292929293e-06,
"loss": 0.0018,
"step": 12525
},
{
"epoch": 45.143884892086334,
"grad_norm": 0.21499969065189362,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0008,
"step": 12550
},
{
"epoch": 45.23381294964029,
"grad_norm": 0.12975308299064636,
"learning_rate": 4.415404040404041e-06,
"loss": 0.0011,
"step": 12575
},
{
"epoch": 45.32374100719424,
"grad_norm": 0.03521961346268654,
"learning_rate": 4.414141414141415e-06,
"loss": 0.0009,
"step": 12600
},
{
"epoch": 45.4136690647482,
"grad_norm": 0.3964645564556122,
"learning_rate": 4.412878787878788e-06,
"loss": 0.0009,
"step": 12625
},
{
"epoch": 45.50359712230216,
"grad_norm": 0.04135512188076973,
"learning_rate": 4.411616161616162e-06,
"loss": 0.0007,
"step": 12650
},
{
"epoch": 45.593525179856115,
"grad_norm": 0.11724065244197845,
"learning_rate": 4.410353535353536e-06,
"loss": 0.0013,
"step": 12675
},
{
"epoch": 45.68345323741007,
"grad_norm": 0.3066418170928955,
"learning_rate": 4.409090909090909e-06,
"loss": 0.002,
"step": 12700
},
{
"epoch": 45.773381294964025,
"grad_norm": 0.020460475236177444,
"learning_rate": 4.407828282828283e-06,
"loss": 0.0004,
"step": 12725
},
{
"epoch": 45.86330935251799,
"grad_norm": 0.021625172346830368,
"learning_rate": 4.4065656565656565e-06,
"loss": 0.0008,
"step": 12750
},
{
"epoch": 45.95323741007194,
"grad_norm": 0.01973818428814411,
"learning_rate": 4.4053030303030305e-06,
"loss": 0.0005,
"step": 12775
},
{
"epoch": 46.0431654676259,
"grad_norm": 0.3055168092250824,
"learning_rate": 4.4040404040404044e-06,
"loss": 0.0004,
"step": 12800
},
{
"epoch": 46.13309352517986,
"grad_norm": 0.11869470030069351,
"learning_rate": 4.4027777777777784e-06,
"loss": 0.0012,
"step": 12825
},
{
"epoch": 46.223021582733814,
"grad_norm": 0.5959618091583252,
"learning_rate": 4.401515151515152e-06,
"loss": 0.0007,
"step": 12850
},
{
"epoch": 46.31294964028777,
"grad_norm": 0.08037717640399933,
"learning_rate": 4.400252525252526e-06,
"loss": 0.0006,
"step": 12875
},
{
"epoch": 46.402877697841724,
"grad_norm": 0.017363494262099266,
"learning_rate": 4.3989898989899e-06,
"loss": 0.0008,
"step": 12900
},
{
"epoch": 46.492805755395686,
"grad_norm": 0.028551748022437096,
"learning_rate": 4.397727272727273e-06,
"loss": 0.001,
"step": 12925
},
{
"epoch": 46.58273381294964,
"grad_norm": 0.08840727061033249,
"learning_rate": 4.396464646464647e-06,
"loss": 0.0007,
"step": 12950
},
{
"epoch": 46.672661870503596,
"grad_norm": 0.023021990433335304,
"learning_rate": 4.395202020202021e-06,
"loss": 0.0018,
"step": 12975
},
{
"epoch": 46.76258992805755,
"grad_norm": 0.05099537596106529,
"learning_rate": 4.393939393939394e-06,
"loss": 0.001,
"step": 13000
},
{
"epoch": 46.76258992805755,
"eval_loss": 0.08809197694063187,
"eval_runtime": 1348.5762,
"eval_samples_per_second": 1.648,
"eval_steps_per_second": 0.103,
"eval_wer": 6.154187958231504,
"step": 13000
},
{
"epoch": 46.85251798561151,
"grad_norm": 0.02734680473804474,
"learning_rate": 4.392676767676768e-06,
"loss": 0.0006,
"step": 13025
},
{
"epoch": 46.94244604316547,
"grad_norm": 0.012311214581131935,
"learning_rate": 4.391414141414142e-06,
"loss": 0.0004,
"step": 13050
},
{
"epoch": 47.03237410071942,
"grad_norm": 1.1471985578536987,
"learning_rate": 4.390151515151516e-06,
"loss": 0.0006,
"step": 13075
},
{
"epoch": 47.12230215827338,
"grad_norm": 0.04378161579370499,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0009,
"step": 13100
},
{
"epoch": 47.21223021582734,
"grad_norm": 0.014206623658537865,
"learning_rate": 4.387626262626263e-06,
"loss": 0.0006,
"step": 13125
},
{
"epoch": 47.302158273381295,
"grad_norm": 0.12384720891714096,
"learning_rate": 4.386363636363637e-06,
"loss": 0.0008,
"step": 13150
},
{
"epoch": 47.39208633093525,
"grad_norm": 0.12384091317653656,
"learning_rate": 4.38510101010101e-06,
"loss": 0.0006,
"step": 13175
},
{
"epoch": 47.48201438848921,
"grad_norm": 0.05459749698638916,
"learning_rate": 4.383838383838384e-06,
"loss": 0.0017,
"step": 13200
},
{
"epoch": 47.57194244604317,
"grad_norm": 0.06376705318689346,
"learning_rate": 4.382575757575757e-06,
"loss": 0.0012,
"step": 13225
},
{
"epoch": 47.66187050359712,
"grad_norm": 0.09516707807779312,
"learning_rate": 4.381313131313132e-06,
"loss": 0.0005,
"step": 13250
},
{
"epoch": 47.75179856115108,
"grad_norm": 0.035159386694431305,
"learning_rate": 4.380050505050505e-06,
"loss": 0.0009,
"step": 13275
},
{
"epoch": 47.84172661870504,
"grad_norm": 0.13273297250270844,
"learning_rate": 4.378787878787879e-06,
"loss": 0.0011,
"step": 13300
},
{
"epoch": 47.931654676258994,
"grad_norm": 0.6526914834976196,
"learning_rate": 4.3775252525252525e-06,
"loss": 0.0017,
"step": 13325
},
{
"epoch": 48.02158273381295,
"grad_norm": 0.10989696532487869,
"learning_rate": 4.3762626262626265e-06,
"loss": 0.0013,
"step": 13350
},
{
"epoch": 48.111510791366904,
"grad_norm": 0.12258470058441162,
"learning_rate": 4.3750000000000005e-06,
"loss": 0.001,
"step": 13375
},
{
"epoch": 48.201438848920866,
"grad_norm": 0.04794065281748772,
"learning_rate": 4.373737373737374e-06,
"loss": 0.0006,
"step": 13400
},
{
"epoch": 48.29136690647482,
"grad_norm": 0.18742027878761292,
"learning_rate": 4.3724747474747485e-06,
"loss": 0.001,
"step": 13425
},
{
"epoch": 48.381294964028775,
"grad_norm": 0.047946684062480927,
"learning_rate": 4.371212121212122e-06,
"loss": 0.0008,
"step": 13450
},
{
"epoch": 48.47122302158273,
"grad_norm": 0.011459482833743095,
"learning_rate": 4.369949494949496e-06,
"loss": 0.0004,
"step": 13475
},
{
"epoch": 48.56115107913669,
"grad_norm": 0.0178390983492136,
"learning_rate": 4.368686868686869e-06,
"loss": 0.0005,
"step": 13500
},
{
"epoch": 48.65107913669065,
"grad_norm": 0.02639496698975563,
"learning_rate": 4.367424242424243e-06,
"loss": 0.0006,
"step": 13525
},
{
"epoch": 48.7410071942446,
"grad_norm": 0.9992175698280334,
"learning_rate": 4.366161616161617e-06,
"loss": 0.0006,
"step": 13550
},
{
"epoch": 48.830935251798564,
"grad_norm": 0.12613770365715027,
"learning_rate": 4.36489898989899e-06,
"loss": 0.0003,
"step": 13575
},
{
"epoch": 48.92086330935252,
"grad_norm": 0.008718474768102169,
"learning_rate": 4.363636363636364e-06,
"loss": 0.0006,
"step": 13600
},
{
"epoch": 49.010791366906474,
"grad_norm": 0.09226574003696442,
"learning_rate": 4.362373737373738e-06,
"loss": 0.001,
"step": 13625
},
{
"epoch": 49.10071942446043,
"grad_norm": 0.01371210440993309,
"learning_rate": 4.361111111111112e-06,
"loss": 0.0005,
"step": 13650
},
{
"epoch": 49.19064748201439,
"grad_norm": 0.8040596842765808,
"learning_rate": 4.359848484848485e-06,
"loss": 0.0014,
"step": 13675
},
{
"epoch": 49.280575539568346,
"grad_norm": 0.2569543123245239,
"learning_rate": 4.358585858585859e-06,
"loss": 0.0004,
"step": 13700
},
{
"epoch": 49.3705035971223,
"grad_norm": 0.04654459282755852,
"learning_rate": 4.357323232323233e-06,
"loss": 0.0003,
"step": 13725
},
{
"epoch": 49.460431654676256,
"grad_norm": 0.03116775117814541,
"learning_rate": 4.356060606060606e-06,
"loss": 0.0006,
"step": 13750
},
{
"epoch": 49.55035971223022,
"grad_norm": 0.013714387081563473,
"learning_rate": 4.35479797979798e-06,
"loss": 0.0005,
"step": 13775
},
{
"epoch": 49.64028776978417,
"grad_norm": 0.012171006761491299,
"learning_rate": 4.353535353535353e-06,
"loss": 0.0005,
"step": 13800
},
{
"epoch": 49.73021582733813,
"grad_norm": 0.39719274640083313,
"learning_rate": 4.352272727272727e-06,
"loss": 0.0002,
"step": 13825
},
{
"epoch": 49.82014388489208,
"grad_norm": 0.009979949332773685,
"learning_rate": 4.351010101010101e-06,
"loss": 0.0002,
"step": 13850
},
{
"epoch": 49.910071942446045,
"grad_norm": 0.010056397877633572,
"learning_rate": 4.349747474747475e-06,
"loss": 0.0001,
"step": 13875
},
{
"epoch": 50.0,
"grad_norm": 1.2399721145629883,
"learning_rate": 4.348484848484849e-06,
"loss": 0.0003,
"step": 13900
},
{
"epoch": 50.089928057553955,
"grad_norm": 0.008993759751319885,
"learning_rate": 4.3472222222222225e-06,
"loss": 0.0003,
"step": 13925
},
{
"epoch": 50.17985611510792,
"grad_norm": 0.0040525756776332855,
"learning_rate": 4.3459595959595965e-06,
"loss": 0.0001,
"step": 13950
},
{
"epoch": 50.26978417266187,
"grad_norm": 0.037480395287275314,
"learning_rate": 4.34469696969697e-06,
"loss": 0.0006,
"step": 13975
},
{
"epoch": 50.35971223021583,
"grad_norm": 0.011341557838022709,
"learning_rate": 4.343434343434344e-06,
"loss": 0.0001,
"step": 14000
},
{
"epoch": 50.35971223021583,
"eval_loss": 0.0883052721619606,
"eval_runtime": 1347.8354,
"eval_samples_per_second": 1.649,
"eval_steps_per_second": 0.103,
"eval_wer": 6.161593719914093,
"step": 14000
},
{
"epoch": 50.44964028776978,
"grad_norm": 0.097772017121315,
"learning_rate": 4.342171717171718e-06,
"loss": 0.0003,
"step": 14025
},
{
"epoch": 50.539568345323744,
"grad_norm": 0.22011174261569977,
"learning_rate": 4.340909090909091e-06,
"loss": 0.0004,
"step": 14050
},
{
"epoch": 50.6294964028777,
"grad_norm": 0.004608627874404192,
"learning_rate": 4.339646464646465e-06,
"loss": 0.002,
"step": 14075
},
{
"epoch": 50.719424460431654,
"grad_norm": 0.02777382917702198,
"learning_rate": 4.338383838383839e-06,
"loss": 0.0009,
"step": 14100
},
{
"epoch": 50.80935251798561,
"grad_norm": 0.3765215277671814,
"learning_rate": 4.337121212121213e-06,
"loss": 0.0015,
"step": 14125
},
{
"epoch": 50.89928057553957,
"grad_norm": 0.014906881377100945,
"learning_rate": 4.335858585858586e-06,
"loss": 0.0019,
"step": 14150
},
{
"epoch": 50.989208633093526,
"grad_norm": 0.07598377764225006,
"learning_rate": 4.33459595959596e-06,
"loss": 0.0011,
"step": 14175
},
{
"epoch": 51.07913669064748,
"grad_norm": 0.04858017340302467,
"learning_rate": 4.333333333333334e-06,
"loss": 0.002,
"step": 14200
},
{
"epoch": 51.169064748201436,
"grad_norm": 0.00848084781318903,
"learning_rate": 4.332070707070707e-06,
"loss": 0.0015,
"step": 14225
},
{
"epoch": 51.2589928057554,
"grad_norm": 0.192399799823761,
"learning_rate": 4.330808080808081e-06,
"loss": 0.0014,
"step": 14250
},
{
"epoch": 51.34892086330935,
"grad_norm": 0.17804254591464996,
"learning_rate": 4.329545454545455e-06,
"loss": 0.0009,
"step": 14275
},
{
"epoch": 51.43884892086331,
"grad_norm": 0.9404972791671753,
"learning_rate": 4.328282828282829e-06,
"loss": 0.0022,
"step": 14300
},
{
"epoch": 51.52877697841727,
"grad_norm": 0.06042027473449707,
"learning_rate": 4.327020202020202e-06,
"loss": 0.0009,
"step": 14325
},
{
"epoch": 51.618705035971225,
"grad_norm": 0.11593267321586609,
"learning_rate": 4.325757575757576e-06,
"loss": 0.001,
"step": 14350
},
{
"epoch": 51.70863309352518,
"grad_norm": 0.042370762676000595,
"learning_rate": 4.32449494949495e-06,
"loss": 0.0009,
"step": 14375
},
{
"epoch": 51.798561151079134,
"grad_norm": 0.06264758855104446,
"learning_rate": 4.323232323232323e-06,
"loss": 0.0011,
"step": 14400
},
{
"epoch": 51.888489208633096,
"grad_norm": 0.419005811214447,
"learning_rate": 4.321969696969697e-06,
"loss": 0.0013,
"step": 14425
},
{
"epoch": 51.97841726618705,
"grad_norm": 0.025492649525403976,
"learning_rate": 4.3207070707070705e-06,
"loss": 0.0008,
"step": 14450
},
{
"epoch": 52.068345323741006,
"grad_norm": 0.1695825606584549,
"learning_rate": 4.319444444444445e-06,
"loss": 0.001,
"step": 14475
},
{
"epoch": 52.15827338129496,
"grad_norm": 0.21136726438999176,
"learning_rate": 4.3181818181818185e-06,
"loss": 0.0004,
"step": 14500
},
{
"epoch": 52.24820143884892,
"grad_norm": 0.00583269540220499,
"learning_rate": 4.3169191919191925e-06,
"loss": 0.0003,
"step": 14525
},
{
"epoch": 52.33812949640288,
"grad_norm": 0.05031251907348633,
"learning_rate": 4.315656565656566e-06,
"loss": 0.0005,
"step": 14550
},
{
"epoch": 52.42805755395683,
"grad_norm": 1.4654878377914429,
"learning_rate": 4.31439393939394e-06,
"loss": 0.0011,
"step": 14575
},
{
"epoch": 52.51798561151079,
"grad_norm": 0.05035277083516121,
"learning_rate": 4.313131313131314e-06,
"loss": 0.0008,
"step": 14600
},
{
"epoch": 52.60791366906475,
"grad_norm": 0.3283204138278961,
"learning_rate": 4.311868686868687e-06,
"loss": 0.0024,
"step": 14625
},
{
"epoch": 52.697841726618705,
"grad_norm": 0.09352482855319977,
"learning_rate": 4.310606060606061e-06,
"loss": 0.0013,
"step": 14650
},
{
"epoch": 52.78776978417266,
"grad_norm": 0.4381198287010193,
"learning_rate": 4.309343434343435e-06,
"loss": 0.0014,
"step": 14675
},
{
"epoch": 52.87769784172662,
"grad_norm": 0.4195464551448822,
"learning_rate": 4.308080808080809e-06,
"loss": 0.0006,
"step": 14700
},
{
"epoch": 52.96762589928058,
"grad_norm": 0.037935055792331696,
"learning_rate": 4.306818181818182e-06,
"loss": 0.0005,
"step": 14725
},
{
"epoch": 53.05755395683453,
"grad_norm": 0.0057031637988984585,
"learning_rate": 4.305555555555556e-06,
"loss": 0.0011,
"step": 14750
},
{
"epoch": 53.14748201438849,
"grad_norm": 0.09235268831253052,
"learning_rate": 4.30429292929293e-06,
"loss": 0.0012,
"step": 14775
},
{
"epoch": 53.23741007194245,
"grad_norm": 0.4533500075340271,
"learning_rate": 4.303030303030303e-06,
"loss": 0.0013,
"step": 14800
},
{
"epoch": 53.327338129496404,
"grad_norm": 0.14968417584896088,
"learning_rate": 4.301767676767677e-06,
"loss": 0.0009,
"step": 14825
},
{
"epoch": 53.41726618705036,
"grad_norm": 0.016032686457037926,
"learning_rate": 4.300505050505051e-06,
"loss": 0.0003,
"step": 14850
},
{
"epoch": 53.507194244604314,
"grad_norm": 0.04255020618438721,
"learning_rate": 4.299242424242425e-06,
"loss": 0.0002,
"step": 14875
},
{
"epoch": 53.597122302158276,
"grad_norm": 0.01301508117467165,
"learning_rate": 4.297979797979798e-06,
"loss": 0.0003,
"step": 14900
},
{
"epoch": 53.68705035971223,
"grad_norm": 0.007252383045852184,
"learning_rate": 4.296717171717172e-06,
"loss": 0.0005,
"step": 14925
},
{
"epoch": 53.776978417266186,
"grad_norm": 0.13183751702308655,
"learning_rate": 4.295454545454546e-06,
"loss": 0.002,
"step": 14950
},
{
"epoch": 53.86690647482014,
"grad_norm": 0.028183195739984512,
"learning_rate": 4.294191919191919e-06,
"loss": 0.0015,
"step": 14975
},
{
"epoch": 53.9568345323741,
"grad_norm": 0.1370900571346283,
"learning_rate": 4.292929292929293e-06,
"loss": 0.0007,
"step": 15000
},
{
"epoch": 53.9568345323741,
"eval_loss": 0.08864730596542358,
"eval_runtime": 1347.6756,
"eval_samples_per_second": 1.649,
"eval_steps_per_second": 0.103,
"eval_wer": 6.391172332074353,
"step": 15000
},
{
"epoch": 54.04676258992806,
"grad_norm": 0.01960013061761856,
"learning_rate": 4.2916666666666665e-06,
"loss": 0.0016,
"step": 15025
},
{
"epoch": 54.13669064748201,
"grad_norm": 0.13105234503746033,
"learning_rate": 4.2904040404040405e-06,
"loss": 0.0003,
"step": 15050
},
{
"epoch": 54.226618705035975,
"grad_norm": 2.309511423110962,
"learning_rate": 4.2891414141414145e-06,
"loss": 0.0009,
"step": 15075
},
{
"epoch": 54.31654676258993,
"grad_norm": 0.018184732645750046,
"learning_rate": 4.287878787878788e-06,
"loss": 0.001,
"step": 15100
},
{
"epoch": 54.406474820143885,
"grad_norm": 0.05596456304192543,
"learning_rate": 4.2866161616161625e-06,
"loss": 0.0012,
"step": 15125
},
{
"epoch": 54.49640287769784,
"grad_norm": 0.735536515712738,
"learning_rate": 4.285353535353536e-06,
"loss": 0.0014,
"step": 15150
},
{
"epoch": 54.5863309352518,
"grad_norm": 0.641944169998169,
"learning_rate": 4.28409090909091e-06,
"loss": 0.0017,
"step": 15175
},
{
"epoch": 54.67625899280576,
"grad_norm": 0.02818766050040722,
"learning_rate": 4.282828282828283e-06,
"loss": 0.0013,
"step": 15200
},
{
"epoch": 54.76618705035971,
"grad_norm": 0.04384085536003113,
"learning_rate": 4.281565656565657e-06,
"loss": 0.0012,
"step": 15225
},
{
"epoch": 54.856115107913666,
"grad_norm": 0.5741293430328369,
"learning_rate": 4.280303030303031e-06,
"loss": 0.0012,
"step": 15250
},
{
"epoch": 54.94604316546763,
"grad_norm": 0.5108962059020996,
"learning_rate": 4.279040404040404e-06,
"loss": 0.0013,
"step": 15275
},
{
"epoch": 55.03597122302158,
"grad_norm": 0.09613129496574402,
"learning_rate": 4.277777777777778e-06,
"loss": 0.0011,
"step": 15300
},
{
"epoch": 55.12589928057554,
"grad_norm": 0.2453729510307312,
"learning_rate": 4.276515151515152e-06,
"loss": 0.0016,
"step": 15325
},
{
"epoch": 55.21582733812949,
"grad_norm": 0.03533944860100746,
"learning_rate": 4.275252525252526e-06,
"loss": 0.0013,
"step": 15350
},
{
"epoch": 55.305755395683455,
"grad_norm": 0.02793753705918789,
"learning_rate": 4.273989898989899e-06,
"loss": 0.0011,
"step": 15375
},
{
"epoch": 55.39568345323741,
"grad_norm": 0.11208122968673706,
"learning_rate": 4.272727272727273e-06,
"loss": 0.0014,
"step": 15400
},
{
"epoch": 55.485611510791365,
"grad_norm": 0.23727653920650482,
"learning_rate": 4.271464646464647e-06,
"loss": 0.0007,
"step": 15425
},
{
"epoch": 55.57553956834532,
"grad_norm": 0.1095881313085556,
"learning_rate": 4.27020202020202e-06,
"loss": 0.0006,
"step": 15450
},
{
"epoch": 55.66546762589928,
"grad_norm": 0.026398301124572754,
"learning_rate": 4.268939393939394e-06,
"loss": 0.0003,
"step": 15475
},
{
"epoch": 55.75539568345324,
"grad_norm": 0.3764269948005676,
"learning_rate": 4.267676767676767e-06,
"loss": 0.0007,
"step": 15500
},
{
"epoch": 55.84532374100719,
"grad_norm": 0.710081160068512,
"learning_rate": 4.266414141414142e-06,
"loss": 0.0006,
"step": 15525
},
{
"epoch": 55.935251798561154,
"grad_norm": 0.01405036449432373,
"learning_rate": 4.265151515151515e-06,
"loss": 0.0009,
"step": 15550
},
{
"epoch": 56.02517985611511,
"grad_norm": 0.011654024943709373,
"learning_rate": 4.263888888888889e-06,
"loss": 0.0011,
"step": 15575
},
{
"epoch": 56.115107913669064,
"grad_norm": 0.8455324172973633,
"learning_rate": 4.262626262626263e-06,
"loss": 0.0006,
"step": 15600
},
{
"epoch": 56.20503597122302,
"grad_norm": 0.7859840989112854,
"learning_rate": 4.2613636363636365e-06,
"loss": 0.0005,
"step": 15625
},
{
"epoch": 56.29496402877698,
"grad_norm": 0.012887760065495968,
"learning_rate": 4.2601010101010105e-06,
"loss": 0.0003,
"step": 15650
},
{
"epoch": 56.384892086330936,
"grad_norm": 0.27630236744880676,
"learning_rate": 4.258838383838384e-06,
"loss": 0.0005,
"step": 15675
},
{
"epoch": 56.47482014388489,
"grad_norm": 0.23494713008403778,
"learning_rate": 4.2575757575757585e-06,
"loss": 0.0003,
"step": 15700
},
{
"epoch": 56.564748201438846,
"grad_norm": 0.04018251597881317,
"learning_rate": 4.256313131313132e-06,
"loss": 0.0009,
"step": 15725
},
{
"epoch": 56.65467625899281,
"grad_norm": 0.29447436332702637,
"learning_rate": 4.255050505050506e-06,
"loss": 0.0002,
"step": 15750
},
{
"epoch": 56.74460431654676,
"grad_norm": 0.048734016716480255,
"learning_rate": 4.253787878787879e-06,
"loss": 0.0008,
"step": 15775
},
{
"epoch": 56.83453237410072,
"grad_norm": 0.00981312245130539,
"learning_rate": 4.252525252525253e-06,
"loss": 0.0003,
"step": 15800
},
{
"epoch": 56.92446043165468,
"grad_norm": 0.029217666015028954,
"learning_rate": 4.251262626262627e-06,
"loss": 0.0002,
"step": 15825
},
{
"epoch": 57.014388489208635,
"grad_norm": 0.0892946720123291,
"learning_rate": 4.25e-06,
"loss": 0.0008,
"step": 15850
},
{
"epoch": 57.10431654676259,
"grad_norm": 0.0070861089043319225,
"learning_rate": 4.248737373737374e-06,
"loss": 0.0003,
"step": 15875
},
{
"epoch": 57.194244604316545,
"grad_norm": 0.5670444965362549,
"learning_rate": 4.247474747474748e-06,
"loss": 0.0005,
"step": 15900
},
{
"epoch": 57.28417266187051,
"grad_norm": 0.4061719477176666,
"learning_rate": 4.246212121212122e-06,
"loss": 0.0009,
"step": 15925
},
{
"epoch": 57.37410071942446,
"grad_norm": 0.2658737897872925,
"learning_rate": 4.244949494949495e-06,
"loss": 0.0011,
"step": 15950
},
{
"epoch": 57.46402877697842,
"grad_norm": 0.06908473372459412,
"learning_rate": 4.243686868686869e-06,
"loss": 0.0012,
"step": 15975
},
{
"epoch": 57.55395683453237,
"grad_norm": 0.12484970688819885,
"learning_rate": 4.242424242424243e-06,
"loss": 0.0008,
"step": 16000
},
{
"epoch": 57.55395683453237,
"eval_loss": 0.09175190329551697,
"eval_runtime": 1351.6711,
"eval_samples_per_second": 1.644,
"eval_steps_per_second": 0.103,
"eval_wer": 6.391172332074353,
"step": 16000
},
{
"epoch": 57.643884892086334,
"grad_norm": 0.10532079637050629,
"learning_rate": 4.241161616161616e-06,
"loss": 0.0009,
"step": 16025
},
{
"epoch": 57.73381294964029,
"grad_norm": 0.0082013588398695,
"learning_rate": 4.23989898989899e-06,
"loss": 0.0009,
"step": 16050
},
{
"epoch": 57.82374100719424,
"grad_norm": 0.8880343437194824,
"learning_rate": 4.238636363636364e-06,
"loss": 0.0012,
"step": 16075
},
{
"epoch": 57.9136690647482,
"grad_norm": 0.04694369435310364,
"learning_rate": 4.237373737373737e-06,
"loss": 0.0011,
"step": 16100
},
{
"epoch": 58.00359712230216,
"grad_norm": 0.4175935387611389,
"learning_rate": 4.236111111111111e-06,
"loss": 0.0007,
"step": 16125
},
{
"epoch": 58.093525179856115,
"grad_norm": 0.0991375669836998,
"learning_rate": 4.234848484848485e-06,
"loss": 0.0008,
"step": 16150
},
{
"epoch": 58.18345323741007,
"grad_norm": 0.05238619074225426,
"learning_rate": 4.233585858585859e-06,
"loss": 0.0009,
"step": 16175
},
{
"epoch": 58.273381294964025,
"grad_norm": 0.024060403928160667,
"learning_rate": 4.2323232323232325e-06,
"loss": 0.0005,
"step": 16200
},
{
"epoch": 58.36330935251799,
"grad_norm": 0.514026939868927,
"learning_rate": 4.2310606060606065e-06,
"loss": 0.0017,
"step": 16225
},
{
"epoch": 58.45323741007194,
"grad_norm": 0.9123257994651794,
"learning_rate": 4.22979797979798e-06,
"loss": 0.0009,
"step": 16250
},
{
"epoch": 58.5431654676259,
"grad_norm": 0.034488383680582047,
"learning_rate": 4.228535353535354e-06,
"loss": 0.0005,
"step": 16275
},
{
"epoch": 58.63309352517986,
"grad_norm": 0.08020392805337906,
"learning_rate": 4.227272727272728e-06,
"loss": 0.0021,
"step": 16300
},
{
"epoch": 58.723021582733814,
"grad_norm": 0.011538870632648468,
"learning_rate": 4.226010101010101e-06,
"loss": 0.001,
"step": 16325
},
{
"epoch": 58.81294964028777,
"grad_norm": 0.4130057692527771,
"learning_rate": 4.224747474747475e-06,
"loss": 0.0009,
"step": 16350
},
{
"epoch": 58.902877697841724,
"grad_norm": 0.018940504640340805,
"learning_rate": 4.223484848484849e-06,
"loss": 0.0008,
"step": 16375
},
{
"epoch": 58.992805755395686,
"grad_norm": 0.09760510176420212,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0009,
"step": 16400
},
{
"epoch": 59.08273381294964,
"grad_norm": 0.6728724241256714,
"learning_rate": 4.220959595959596e-06,
"loss": 0.0015,
"step": 16425
},
{
"epoch": 59.172661870503596,
"grad_norm": 0.01400268916040659,
"learning_rate": 4.21969696969697e-06,
"loss": 0.0006,
"step": 16450
},
{
"epoch": 59.26258992805755,
"grad_norm": 0.027168823406100273,
"learning_rate": 4.218434343434344e-06,
"loss": 0.0003,
"step": 16475
},
{
"epoch": 59.35251798561151,
"grad_norm": 0.025733735412359238,
"learning_rate": 4.217171717171717e-06,
"loss": 0.0003,
"step": 16500
},
{
"epoch": 59.44244604316547,
"grad_norm": 0.012072687968611717,
"learning_rate": 4.215909090909091e-06,
"loss": 0.0004,
"step": 16525
},
{
"epoch": 59.53237410071942,
"grad_norm": 0.03630650043487549,
"learning_rate": 4.214646464646465e-06,
"loss": 0.0013,
"step": 16550
},
{
"epoch": 59.62230215827338,
"grad_norm": 0.13875187933444977,
"learning_rate": 4.213383838383839e-06,
"loss": 0.0008,
"step": 16575
},
{
"epoch": 59.71223021582734,
"grad_norm": 0.06004035472869873,
"learning_rate": 4.212121212121212e-06,
"loss": 0.0004,
"step": 16600
},
{
"epoch": 59.802158273381295,
"grad_norm": 0.024319609627127647,
"learning_rate": 4.210858585858586e-06,
"loss": 0.0006,
"step": 16625
},
{
"epoch": 59.89208633093525,
"grad_norm": 0.0957476794719696,
"learning_rate": 4.20959595959596e-06,
"loss": 0.0016,
"step": 16650
},
{
"epoch": 59.98201438848921,
"grad_norm": 0.014447568915784359,
"learning_rate": 4.208333333333333e-06,
"loss": 0.0007,
"step": 16675
},
{
"epoch": 60.07194244604317,
"grad_norm": 0.0760221779346466,
"learning_rate": 4.207070707070707e-06,
"loss": 0.0007,
"step": 16700
},
{
"epoch": 60.16187050359712,
"grad_norm": 0.08783930540084839,
"learning_rate": 4.2058080808080806e-06,
"loss": 0.0006,
"step": 16725
},
{
"epoch": 60.25179856115108,
"grad_norm": 0.020011553540825844,
"learning_rate": 4.204545454545455e-06,
"loss": 0.0002,
"step": 16750
},
{
"epoch": 60.34172661870504,
"grad_norm": 0.004587370436638594,
"learning_rate": 4.2032828282828286e-06,
"loss": 0.0001,
"step": 16775
},
{
"epoch": 60.431654676258994,
"grad_norm": 0.05192629247903824,
"learning_rate": 4.2020202020202026e-06,
"loss": 0.0007,
"step": 16800
},
{
"epoch": 60.52158273381295,
"grad_norm": 0.0028184789698570967,
"learning_rate": 4.2007575757575766e-06,
"loss": 0.0001,
"step": 16825
},
{
"epoch": 60.611510791366904,
"grad_norm": 0.11263082921504974,
"learning_rate": 4.19949494949495e-06,
"loss": 0.0004,
"step": 16850
},
{
"epoch": 60.701438848920866,
"grad_norm": 0.020229890942573547,
"learning_rate": 4.198232323232324e-06,
"loss": 0.0002,
"step": 16875
},
{
"epoch": 60.79136690647482,
"grad_norm": 0.004258246161043644,
"learning_rate": 4.196969696969697e-06,
"loss": 0.0004,
"step": 16900
},
{
"epoch": 60.881294964028775,
"grad_norm": 0.005619137082248926,
"learning_rate": 4.195707070707072e-06,
"loss": 0.0001,
"step": 16925
},
{
"epoch": 60.97122302158273,
"grad_norm": 0.005032286513596773,
"learning_rate": 4.194444444444445e-06,
"loss": 0.0002,
"step": 16950
},
{
"epoch": 61.06115107913669,
"grad_norm": 0.02484523132443428,
"learning_rate": 4.193181818181819e-06,
"loss": 0.0003,
"step": 16975
},
{
"epoch": 61.15107913669065,
"grad_norm": 0.0017194038955494761,
"learning_rate": 4.191919191919192e-06,
"loss": 0.0002,
"step": 17000
},
{
"epoch": 61.15107913669065,
"eval_loss": 0.09027338027954102,
"eval_runtime": 1359.5537,
"eval_samples_per_second": 1.634,
"eval_steps_per_second": 0.102,
"eval_wer": 5.909797822706065,
"step": 17000
},
{
"epoch": 61.2410071942446,
"grad_norm": 0.0024019062984734774,
"learning_rate": 4.190656565656566e-06,
"loss": 0.0002,
"step": 17025
},
{
"epoch": 61.330935251798564,
"grad_norm": 0.004478455055505037,
"learning_rate": 4.18939393939394e-06,
"loss": 0.0003,
"step": 17050
},
{
"epoch": 61.42086330935252,
"grad_norm": 0.0044603836722671986,
"learning_rate": 4.188131313131313e-06,
"loss": 0.0004,
"step": 17075
},
{
"epoch": 61.510791366906474,
"grad_norm": 0.08818788081407547,
"learning_rate": 4.186868686868687e-06,
"loss": 0.0009,
"step": 17100
},
{
"epoch": 61.60071942446043,
"grad_norm": 0.0027286384720355272,
"learning_rate": 4.185606060606061e-06,
"loss": 0.0002,
"step": 17125
},
{
"epoch": 61.69064748201439,
"grad_norm": 0.0037345695309340954,
"learning_rate": 4.184343434343434e-06,
"loss": 0.0003,
"step": 17150
},
{
"epoch": 61.780575539568346,
"grad_norm": 0.014616015367209911,
"learning_rate": 4.183080808080808e-06,
"loss": 0.0004,
"step": 17175
},
{
"epoch": 61.8705035971223,
"grad_norm": 0.007769573014229536,
"learning_rate": 4.181818181818182e-06,
"loss": 0.0003,
"step": 17200
},
{
"epoch": 61.960431654676256,
"grad_norm": 0.008359814994037151,
"learning_rate": 4.180555555555556e-06,
"loss": 0.0005,
"step": 17225
},
{
"epoch": 62.05035971223022,
"grad_norm": 0.0051100486889481544,
"learning_rate": 4.1792929292929294e-06,
"loss": 0.0005,
"step": 17250
},
{
"epoch": 62.14028776978417,
"grad_norm": 0.0029563389252871275,
"learning_rate": 4.1780303030303034e-06,
"loss": 0.0001,
"step": 17275
},
{
"epoch": 62.23021582733813,
"grad_norm": 0.0030668089166283607,
"learning_rate": 4.1767676767676774e-06,
"loss": 0.0001,
"step": 17300
},
{
"epoch": 62.32014388489208,
"grad_norm": 0.02710825577378273,
"learning_rate": 4.175505050505051e-06,
"loss": 0.0006,
"step": 17325
},
{
"epoch": 62.410071942446045,
"grad_norm": 0.0027756947092711926,
"learning_rate": 4.1742424242424246e-06,
"loss": 0.0001,
"step": 17350
},
{
"epoch": 62.5,
"grad_norm": 0.09106307476758957,
"learning_rate": 4.172979797979798e-06,
"loss": 0.0003,
"step": 17375
},
{
"epoch": 62.589928057553955,
"grad_norm": 0.005363088101148605,
"learning_rate": 4.1717171717171726e-06,
"loss": 0.0001,
"step": 17400
},
{
"epoch": 62.67985611510792,
"grad_norm": 0.005525332409888506,
"learning_rate": 4.170454545454546e-06,
"loss": 0.0001,
"step": 17425
},
{
"epoch": 62.76978417266187,
"grad_norm": 0.007496482692658901,
"learning_rate": 4.16919191919192e-06,
"loss": 0.0001,
"step": 17450
},
{
"epoch": 62.85971223021583,
"grad_norm": 0.026290051639080048,
"learning_rate": 4.167929292929293e-06,
"loss": 0.0001,
"step": 17475
},
{
"epoch": 62.94964028776978,
"grad_norm": 0.006395560223609209,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0001,
"step": 17500
},
{
"epoch": 63.039568345323744,
"grad_norm": 0.004197731614112854,
"learning_rate": 4.165404040404041e-06,
"loss": 0.0001,
"step": 17525
},
{
"epoch": 63.1294964028777,
"grad_norm": 0.002505301032215357,
"learning_rate": 4.164141414141414e-06,
"loss": 0.0,
"step": 17550
},
{
"epoch": 63.219424460431654,
"grad_norm": 0.0022915108129382133,
"learning_rate": 4.162878787878788e-06,
"loss": 0.0001,
"step": 17575
},
{
"epoch": 63.30935251798561,
"grad_norm": 0.0019390948582440615,
"learning_rate": 4.161616161616162e-06,
"loss": 0.0,
"step": 17600
},
{
"epoch": 63.39928057553957,
"grad_norm": 0.001307799364440143,
"learning_rate": 4.160353535353536e-06,
"loss": 0.0001,
"step": 17625
},
{
"epoch": 63.489208633093526,
"grad_norm": 0.0016936671454459429,
"learning_rate": 4.159090909090909e-06,
"loss": 0.0,
"step": 17650
},
{
"epoch": 63.57913669064748,
"grad_norm": 0.0017974688671529293,
"learning_rate": 4.157828282828283e-06,
"loss": 0.0,
"step": 17675
},
{
"epoch": 63.669064748201436,
"grad_norm": 0.0027852486819028854,
"learning_rate": 4.156565656565657e-06,
"loss": 0.0,
"step": 17700
},
{
"epoch": 63.7589928057554,
"grad_norm": 0.0017096559749916196,
"learning_rate": 4.15530303030303e-06,
"loss": 0.0,
"step": 17725
},
{
"epoch": 63.84892086330935,
"grad_norm": 0.0019876237493008375,
"learning_rate": 4.154040404040404e-06,
"loss": 0.0,
"step": 17750
},
{
"epoch": 63.93884892086331,
"grad_norm": 0.0011115281376987696,
"learning_rate": 4.152777777777778e-06,
"loss": 0.0,
"step": 17775
},
{
"epoch": 64.02877697841727,
"grad_norm": 0.0017126763705164194,
"learning_rate": 4.151515151515152e-06,
"loss": 0.0,
"step": 17800
},
{
"epoch": 64.11870503597122,
"grad_norm": 0.0011258955346420407,
"learning_rate": 4.1502525252525254e-06,
"loss": 0.0,
"step": 17825
},
{
"epoch": 64.20863309352518,
"grad_norm": 0.0015615399461239576,
"learning_rate": 4.1489898989898994e-06,
"loss": 0.0,
"step": 17850
},
{
"epoch": 64.29856115107914,
"grad_norm": 0.001990539487451315,
"learning_rate": 4.1477272727272734e-06,
"loss": 0.0,
"step": 17875
},
{
"epoch": 64.38848920863309,
"grad_norm": 0.0013739466667175293,
"learning_rate": 4.146464646464647e-06,
"loss": 0.0,
"step": 17900
},
{
"epoch": 64.47841726618705,
"grad_norm": 0.0017153042135760188,
"learning_rate": 4.145202020202021e-06,
"loss": 0.0,
"step": 17925
},
{
"epoch": 64.56834532374101,
"grad_norm": 0.0013855737634003162,
"learning_rate": 4.143939393939394e-06,
"loss": 0.0,
"step": 17950
},
{
"epoch": 64.65827338129496,
"grad_norm": 0.0023376569151878357,
"learning_rate": 4.142676767676769e-06,
"loss": 0.0001,
"step": 17975
},
{
"epoch": 64.74820143884892,
"grad_norm": 0.0007114307954907417,
"learning_rate": 4.141414141414142e-06,
"loss": 0.0,
"step": 18000
},
{
"epoch": 64.74820143884892,
"eval_loss": 0.09263601154088974,
"eval_runtime": 1339.2527,
"eval_samples_per_second": 1.659,
"eval_steps_per_second": 0.104,
"eval_wer": 5.658001925498037,
"step": 18000
},
{
"epoch": 64.83812949640287,
"grad_norm": 0.0010609790915623307,
"learning_rate": 4.140151515151516e-06,
"loss": 0.0,
"step": 18025
},
{
"epoch": 64.92805755395683,
"grad_norm": 0.0020956743974238634,
"learning_rate": 4.138888888888889e-06,
"loss": 0.0,
"step": 18050
},
{
"epoch": 65.0179856115108,
"grad_norm": 0.0013533415040001273,
"learning_rate": 4.137626262626263e-06,
"loss": 0.0001,
"step": 18075
},
{
"epoch": 65.10791366906474,
"grad_norm": 0.0010088173439726233,
"learning_rate": 4.136363636363637e-06,
"loss": 0.0001,
"step": 18100
},
{
"epoch": 65.1978417266187,
"grad_norm": 0.001570379245094955,
"learning_rate": 4.13510101010101e-06,
"loss": 0.0,
"step": 18125
},
{
"epoch": 65.28776978417267,
"grad_norm": 0.0016373491380363703,
"learning_rate": 4.133838383838384e-06,
"loss": 0.0,
"step": 18150
},
{
"epoch": 65.37769784172662,
"grad_norm": 0.0015006172470748425,
"learning_rate": 4.132575757575758e-06,
"loss": 0.0,
"step": 18175
},
{
"epoch": 65.46762589928058,
"grad_norm": 0.0011033018818125129,
"learning_rate": 4.131313131313132e-06,
"loss": 0.0,
"step": 18200
},
{
"epoch": 65.55755395683454,
"grad_norm": 0.0013498698826879263,
"learning_rate": 4.130050505050505e-06,
"loss": 0.0,
"step": 18225
},
{
"epoch": 65.64748201438849,
"grad_norm": 0.0013445069780573249,
"learning_rate": 4.128787878787879e-06,
"loss": 0.0,
"step": 18250
},
{
"epoch": 65.73741007194245,
"grad_norm": 0.0017605924513190985,
"learning_rate": 4.127525252525253e-06,
"loss": 0.0,
"step": 18275
},
{
"epoch": 65.8273381294964,
"grad_norm": 0.0018534163245931268,
"learning_rate": 4.126262626262626e-06,
"loss": 0.0,
"step": 18300
},
{
"epoch": 65.91726618705036,
"grad_norm": 0.000884951208718121,
"learning_rate": 4.125e-06,
"loss": 0.0,
"step": 18325
},
{
"epoch": 66.00719424460432,
"grad_norm": 0.0011815873440355062,
"learning_rate": 4.123737373737374e-06,
"loss": 0.0001,
"step": 18350
},
{
"epoch": 66.09712230215827,
"grad_norm": 0.001126173185184598,
"learning_rate": 4.1224747474747475e-06,
"loss": 0.0,
"step": 18375
},
{
"epoch": 66.18705035971223,
"grad_norm": 0.0011552530340850353,
"learning_rate": 4.1212121212121215e-06,
"loss": 0.0,
"step": 18400
},
{
"epoch": 66.27697841726619,
"grad_norm": 0.001199888065457344,
"learning_rate": 4.119949494949495e-06,
"loss": 0.0,
"step": 18425
},
{
"epoch": 66.36690647482014,
"grad_norm": 0.0007247981848195195,
"learning_rate": 4.1186868686868695e-06,
"loss": 0.0,
"step": 18450
},
{
"epoch": 66.4568345323741,
"grad_norm": 0.001124533242546022,
"learning_rate": 4.117424242424243e-06,
"loss": 0.0001,
"step": 18475
},
{
"epoch": 66.54676258992805,
"grad_norm": 0.0009603950311429799,
"learning_rate": 4.116161616161617e-06,
"loss": 0.0,
"step": 18500
},
{
"epoch": 66.63669064748201,
"grad_norm": 0.0016920759808272123,
"learning_rate": 4.114898989898991e-06,
"loss": 0.0001,
"step": 18525
},
{
"epoch": 66.72661870503597,
"grad_norm": 0.0007674341322854161,
"learning_rate": 4.113636363636364e-06,
"loss": 0.0,
"step": 18550
},
{
"epoch": 66.81654676258992,
"grad_norm": 0.000895792618393898,
"learning_rate": 4.112373737373738e-06,
"loss": 0.0,
"step": 18575
},
{
"epoch": 66.90647482014388,
"grad_norm": 0.0009227583650499582,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0,
"step": 18600
},
{
"epoch": 66.99640287769785,
"grad_norm": 0.0019231617916375399,
"learning_rate": 4.109848484848486e-06,
"loss": 0.0,
"step": 18625
},
{
"epoch": 67.0863309352518,
"grad_norm": 0.0010071933502331376,
"learning_rate": 4.108585858585859e-06,
"loss": 0.0,
"step": 18650
},
{
"epoch": 67.17625899280576,
"grad_norm": 0.0009304916602559388,
"learning_rate": 4.107323232323233e-06,
"loss": 0.0002,
"step": 18675
},
{
"epoch": 67.26618705035972,
"grad_norm": 0.0008229652885347605,
"learning_rate": 4.106060606060606e-06,
"loss": 0.0,
"step": 18700
},
{
"epoch": 67.35611510791367,
"grad_norm": 0.0006714012124575675,
"learning_rate": 4.10479797979798e-06,
"loss": 0.0,
"step": 18725
},
{
"epoch": 67.44604316546763,
"grad_norm": 0.0009734642808325589,
"learning_rate": 4.103535353535354e-06,
"loss": 0.0,
"step": 18750
},
{
"epoch": 67.53597122302158,
"grad_norm": 0.0007786314818076789,
"learning_rate": 4.102272727272727e-06,
"loss": 0.0,
"step": 18775
},
{
"epoch": 67.62589928057554,
"grad_norm": 0.001005512080155313,
"learning_rate": 4.101010101010101e-06,
"loss": 0.0,
"step": 18800
},
{
"epoch": 67.7158273381295,
"grad_norm": 0.001331688603386283,
"learning_rate": 4.099747474747475e-06,
"loss": 0.0,
"step": 18825
},
{
"epoch": 67.80575539568345,
"grad_norm": 0.000987470499239862,
"learning_rate": 4.098484848484849e-06,
"loss": 0.0,
"step": 18850
},
{
"epoch": 67.89568345323741,
"grad_norm": 0.0008799554198049009,
"learning_rate": 4.097222222222222e-06,
"loss": 0.0001,
"step": 18875
},
{
"epoch": 67.98561151079137,
"grad_norm": 0.0009637974435463548,
"learning_rate": 4.095959595959596e-06,
"loss": 0.0,
"step": 18900
},
{
"epoch": 68.07553956834532,
"grad_norm": 0.0006672360468655825,
"learning_rate": 4.09469696969697e-06,
"loss": 0.0,
"step": 18925
},
{
"epoch": 68.16546762589928,
"grad_norm": 0.0008431566529907286,
"learning_rate": 4.0934343434343435e-06,
"loss": 0.0001,
"step": 18950
},
{
"epoch": 68.25539568345324,
"grad_norm": 0.0010287058539688587,
"learning_rate": 4.0921717171717175e-06,
"loss": 0.0,
"step": 18975
},
{
"epoch": 68.34532374100719,
"grad_norm": 0.0007457846077159047,
"learning_rate": 4.0909090909090915e-06,
"loss": 0.0,
"step": 19000
},
{
"epoch": 68.34532374100719,
"eval_loss": 0.09562169760465622,
"eval_runtime": 1339.1079,
"eval_samples_per_second": 1.659,
"eval_steps_per_second": 0.104,
"eval_wer": 5.583944308672146,
"step": 19000
},
{
"epoch": 68.43525179856115,
"grad_norm": 0.0009193470468744636,
"learning_rate": 4.0896464646464655e-06,
"loss": 0.0,
"step": 19025
},
{
"epoch": 68.5251798561151,
"grad_norm": 0.0008717461605556309,
"learning_rate": 4.088383838383839e-06,
"loss": 0.0,
"step": 19050
},
{
"epoch": 68.61510791366906,
"grad_norm": 0.0008119108970277011,
"learning_rate": 4.087121212121213e-06,
"loss": 0.0,
"step": 19075
},
{
"epoch": 68.70503597122303,
"grad_norm": 0.0010454319417476654,
"learning_rate": 4.085858585858587e-06,
"loss": 0.0001,
"step": 19100
},
{
"epoch": 68.79496402877697,
"grad_norm": 0.0012115614954382181,
"learning_rate": 4.08459595959596e-06,
"loss": 0.0,
"step": 19125
},
{
"epoch": 68.88489208633094,
"grad_norm": 0.001058676978573203,
"learning_rate": 4.083333333333334e-06,
"loss": 0.0,
"step": 19150
},
{
"epoch": 68.9748201438849,
"grad_norm": 0.0009722402319312096,
"learning_rate": 4.082070707070707e-06,
"loss": 0.0,
"step": 19175
},
{
"epoch": 69.06474820143885,
"grad_norm": 0.0006609881529584527,
"learning_rate": 4.080808080808081e-06,
"loss": 0.0,
"step": 19200
},
{
"epoch": 69.15467625899281,
"grad_norm": 0.0007030842243693769,
"learning_rate": 4.079545454545455e-06,
"loss": 0.0,
"step": 19225
},
{
"epoch": 69.24460431654676,
"grad_norm": 0.0006842823349870741,
"learning_rate": 4.078282828282829e-06,
"loss": 0.0001,
"step": 19250
},
{
"epoch": 69.33453237410072,
"grad_norm": 0.000651550421025604,
"learning_rate": 4.077020202020202e-06,
"loss": 0.0,
"step": 19275
},
{
"epoch": 69.42446043165468,
"grad_norm": 0.0006407879409380257,
"learning_rate": 4.075757575757576e-06,
"loss": 0.0001,
"step": 19300
},
{
"epoch": 69.51438848920863,
"grad_norm": 0.0010551882442086935,
"learning_rate": 4.07449494949495e-06,
"loss": 0.0,
"step": 19325
},
{
"epoch": 69.60431654676259,
"grad_norm": 0.0008015549392439425,
"learning_rate": 4.073232323232323e-06,
"loss": 0.0,
"step": 19350
},
{
"epoch": 69.69424460431655,
"grad_norm": 0.0008218359434977174,
"learning_rate": 4.071969696969697e-06,
"loss": 0.0,
"step": 19375
},
{
"epoch": 69.7841726618705,
"grad_norm": 0.0009953822009265423,
"learning_rate": 4.070707070707071e-06,
"loss": 0.0,
"step": 19400
},
{
"epoch": 69.87410071942446,
"grad_norm": 0.0008482063421979547,
"learning_rate": 4.069444444444444e-06,
"loss": 0.0,
"step": 19425
},
{
"epoch": 69.96402877697842,
"grad_norm": 0.0008491966291330755,
"learning_rate": 4.068181818181818e-06,
"loss": 0.0,
"step": 19450
},
{
"epoch": 70.05395683453237,
"grad_norm": 0.000667088374029845,
"learning_rate": 4.066919191919192e-06,
"loss": 0.0001,
"step": 19475
},
{
"epoch": 70.14388489208633,
"grad_norm": 0.0006748430896550417,
"learning_rate": 4.065656565656566e-06,
"loss": 0.0,
"step": 19500
},
{
"epoch": 70.23381294964028,
"grad_norm": 0.0006421016296371818,
"learning_rate": 4.0643939393939395e-06,
"loss": 0.0,
"step": 19525
},
{
"epoch": 70.32374100719424,
"grad_norm": 0.0009323668200522661,
"learning_rate": 4.0631313131313135e-06,
"loss": 0.0,
"step": 19550
},
{
"epoch": 70.4136690647482,
"grad_norm": 0.0008588407654315233,
"learning_rate": 4.0618686868686875e-06,
"loss": 0.0,
"step": 19575
},
{
"epoch": 70.50359712230215,
"grad_norm": 0.0006930006784386933,
"learning_rate": 4.060606060606061e-06,
"loss": 0.0,
"step": 19600
},
{
"epoch": 70.59352517985612,
"grad_norm": 0.000734307337552309,
"learning_rate": 4.059343434343435e-06,
"loss": 0.0,
"step": 19625
},
{
"epoch": 70.68345323741008,
"grad_norm": 0.0007306214538402855,
"learning_rate": 4.058080808080808e-06,
"loss": 0.0001,
"step": 19650
},
{
"epoch": 70.77338129496403,
"grad_norm": 0.0005738097243010998,
"learning_rate": 4.056818181818183e-06,
"loss": 0.0,
"step": 19675
},
{
"epoch": 70.86330935251799,
"grad_norm": 0.00065003422787413,
"learning_rate": 4.055555555555556e-06,
"loss": 0.0,
"step": 19700
},
{
"epoch": 70.95323741007195,
"grad_norm": 0.0006234170868992805,
"learning_rate": 4.05429292929293e-06,
"loss": 0.0,
"step": 19725
},
{
"epoch": 71.0431654676259,
"grad_norm": 0.000607940077316016,
"learning_rate": 4.053030303030303e-06,
"loss": 0.0,
"step": 19750
},
{
"epoch": 71.13309352517986,
"grad_norm": 0.0005851531168445945,
"learning_rate": 4.051767676767677e-06,
"loss": 0.0,
"step": 19775
},
{
"epoch": 71.22302158273381,
"grad_norm": 0.0009296953212469816,
"learning_rate": 4.050505050505051e-06,
"loss": 0.0,
"step": 19800
},
{
"epoch": 71.31294964028777,
"grad_norm": 0.0006304428679868579,
"learning_rate": 4.049242424242424e-06,
"loss": 0.0001,
"step": 19825
},
{
"epoch": 71.40287769784173,
"grad_norm": 0.000664900871925056,
"learning_rate": 4.047979797979799e-06,
"loss": 0.0,
"step": 19850
},
{
"epoch": 71.49280575539568,
"grad_norm": 0.0003695714403875172,
"learning_rate": 4.046717171717172e-06,
"loss": 0.0,
"step": 19875
},
{
"epoch": 71.58273381294964,
"grad_norm": 0.000516809755936265,
"learning_rate": 4.045454545454546e-06,
"loss": 0.0,
"step": 19900
},
{
"epoch": 71.6726618705036,
"grad_norm": 0.0006113911513239145,
"learning_rate": 4.044191919191919e-06,
"loss": 0.0,
"step": 19925
},
{
"epoch": 71.76258992805755,
"grad_norm": 0.000814276107121259,
"learning_rate": 4.042929292929293e-06,
"loss": 0.0,
"step": 19950
},
{
"epoch": 71.85251798561151,
"grad_norm": 0.0007162923575378954,
"learning_rate": 4.041666666666667e-06,
"loss": 0.0,
"step": 19975
},
{
"epoch": 71.94244604316546,
"grad_norm": 0.000519581779371947,
"learning_rate": 4.04040404040404e-06,
"loss": 0.0,
"step": 20000
},
{
"epoch": 71.94244604316546,
"eval_loss": 0.0976732075214386,
"eval_runtime": 1338.7066,
"eval_samples_per_second": 1.66,
"eval_steps_per_second": 0.104,
"eval_wer": 5.539509738576612,
"step": 20000
},
{
"epoch": 72.03237410071942,
"grad_norm": 0.0013573451433330774,
"learning_rate": 4.039141414141414e-06,
"loss": 0.0001,
"step": 20025
},
{
"epoch": 72.12230215827338,
"grad_norm": 0.0006321736145764589,
"learning_rate": 4.037878787878788e-06,
"loss": 0.0,
"step": 20050
},
{
"epoch": 72.21223021582733,
"grad_norm": 0.00046551282866857946,
"learning_rate": 4.036616161616162e-06,
"loss": 0.0,
"step": 20075
},
{
"epoch": 72.3021582733813,
"grad_norm": 0.00047266227193176746,
"learning_rate": 4.0353535353535355e-06,
"loss": 0.0,
"step": 20100
},
{
"epoch": 72.39208633093526,
"grad_norm": 0.0004692314541898668,
"learning_rate": 4.0340909090909095e-06,
"loss": 0.0,
"step": 20125
},
{
"epoch": 72.4820143884892,
"grad_norm": 0.0005892490735277534,
"learning_rate": 4.0328282828282835e-06,
"loss": 0.0,
"step": 20150
},
{
"epoch": 72.57194244604317,
"grad_norm": 0.0005393667961470783,
"learning_rate": 4.031565656565657e-06,
"loss": 0.0001,
"step": 20175
},
{
"epoch": 72.66187050359713,
"grad_norm": 0.0007663563592359424,
"learning_rate": 4.030303030303031e-06,
"loss": 0.0,
"step": 20200
},
{
"epoch": 72.75179856115108,
"grad_norm": 0.0005675546126440167,
"learning_rate": 4.029040404040405e-06,
"loss": 0.0,
"step": 20225
},
{
"epoch": 72.84172661870504,
"grad_norm": 0.0006041157757863402,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0,
"step": 20250
},
{
"epoch": 72.93165467625899,
"grad_norm": 0.0006022896850481629,
"learning_rate": 4.026515151515152e-06,
"loss": 0.0001,
"step": 20275
},
{
"epoch": 73.02158273381295,
"grad_norm": 0.0005813241587020457,
"learning_rate": 4.025252525252526e-06,
"loss": 0.0,
"step": 20300
},
{
"epoch": 73.11151079136691,
"grad_norm": 0.0006358566461130977,
"learning_rate": 4.0239898989899e-06,
"loss": 0.0,
"step": 20325
},
{
"epoch": 73.20143884892086,
"grad_norm": 0.0006074347766116261,
"learning_rate": 4.022727272727273e-06,
"loss": 0.0,
"step": 20350
},
{
"epoch": 73.29136690647482,
"grad_norm": 0.0005062387208454311,
"learning_rate": 4.021464646464647e-06,
"loss": 0.0003,
"step": 20375
},
{
"epoch": 73.38129496402878,
"grad_norm": 0.0010172536130994558,
"learning_rate": 4.02020202020202e-06,
"loss": 0.0,
"step": 20400
},
{
"epoch": 73.47122302158273,
"grad_norm": 0.0006235135952010751,
"learning_rate": 4.018939393939394e-06,
"loss": 0.0,
"step": 20425
},
{
"epoch": 73.56115107913669,
"grad_norm": 0.0009783974383026361,
"learning_rate": 4.017676767676768e-06,
"loss": 0.0,
"step": 20450
},
{
"epoch": 73.65107913669064,
"grad_norm": 0.0005355635657906532,
"learning_rate": 4.016414141414141e-06,
"loss": 0.0,
"step": 20475
},
{
"epoch": 73.7410071942446,
"grad_norm": 0.0004634314973372966,
"learning_rate": 4.015151515151515e-06,
"loss": 0.0,
"step": 20500
},
{
"epoch": 73.83093525179856,
"grad_norm": 0.0005511495401151478,
"learning_rate": 4.013888888888889e-06,
"loss": 0.0,
"step": 20525
},
{
"epoch": 73.92086330935251,
"grad_norm": 0.0010061068460345268,
"learning_rate": 4.012626262626263e-06,
"loss": 0.0,
"step": 20550
},
{
"epoch": 74.01079136690647,
"grad_norm": 0.3256176710128784,
"learning_rate": 4.011363636363636e-06,
"loss": 0.0007,
"step": 20575
},
{
"epoch": 74.10071942446044,
"grad_norm": 0.17023605108261108,
"learning_rate": 4.01010101010101e-06,
"loss": 0.0008,
"step": 20600
},
{
"epoch": 74.19064748201438,
"grad_norm": 0.8051077723503113,
"learning_rate": 4.008838383838384e-06,
"loss": 0.0078,
"step": 20625
},
{
"epoch": 74.28057553956835,
"grad_norm": 0.4720918536186218,
"learning_rate": 4.0075757575757575e-06,
"loss": 0.0062,
"step": 20650
},
{
"epoch": 74.37050359712231,
"grad_norm": 0.4814521074295044,
"learning_rate": 4.0063131313131315e-06,
"loss": 0.0061,
"step": 20675
},
{
"epoch": 74.46043165467626,
"grad_norm": 0.7329695820808411,
"learning_rate": 4.0050505050505055e-06,
"loss": 0.0069,
"step": 20700
},
{
"epoch": 74.55035971223022,
"grad_norm": 0.713927686214447,
"learning_rate": 4.0037878787878795e-06,
"loss": 0.0061,
"step": 20725
},
{
"epoch": 74.64028776978417,
"grad_norm": 0.6485239863395691,
"learning_rate": 4.002525252525253e-06,
"loss": 0.0064,
"step": 20750
},
{
"epoch": 74.73021582733813,
"grad_norm": 0.8775496482849121,
"learning_rate": 4.001262626262627e-06,
"loss": 0.0048,
"step": 20775
},
{
"epoch": 74.82014388489209,
"grad_norm": 0.2677914798259735,
"learning_rate": 4.000000000000001e-06,
"loss": 0.004,
"step": 20800
},
{
"epoch": 74.91007194244604,
"grad_norm": 0.38305044174194336,
"learning_rate": 3.998737373737374e-06,
"loss": 0.0028,
"step": 20825
},
{
"epoch": 75.0,
"grad_norm": 0.05106651037931442,
"learning_rate": 3.997474747474748e-06,
"loss": 0.0021,
"step": 20850
},
{
"epoch": 75.08992805755396,
"grad_norm": 0.01168102491647005,
"learning_rate": 3.996212121212121e-06,
"loss": 0.0012,
"step": 20875
},
{
"epoch": 75.17985611510791,
"grad_norm": 0.22549034655094147,
"learning_rate": 3.994949494949496e-06,
"loss": 0.0015,
"step": 20900
},
{
"epoch": 75.26978417266187,
"grad_norm": 0.022075073793530464,
"learning_rate": 3.993686868686869e-06,
"loss": 0.0026,
"step": 20925
},
{
"epoch": 75.35971223021583,
"grad_norm": 0.0188248660415411,
"learning_rate": 3.992424242424243e-06,
"loss": 0.0017,
"step": 20950
},
{
"epoch": 75.44964028776978,
"grad_norm": 0.47026434540748596,
"learning_rate": 3.991161616161616e-06,
"loss": 0.0026,
"step": 20975
},
{
"epoch": 75.53956834532374,
"grad_norm": 0.2045595496892929,
"learning_rate": 3.98989898989899e-06,
"loss": 0.0019,
"step": 21000
},
{
"epoch": 75.53956834532374,
"eval_loss": 0.08847362548112869,
"eval_runtime": 1337.9238,
"eval_samples_per_second": 1.661,
"eval_steps_per_second": 0.104,
"eval_wer": 6.294897430200697,
"step": 21000
},
{
"epoch": 75.62949640287769,
"grad_norm": 0.0665188655257225,
"learning_rate": 3.988636363636364e-06,
"loss": 0.0014,
"step": 21025
},
{
"epoch": 75.71942446043165,
"grad_norm": 0.33609738945961,
"learning_rate": 3.987373737373737e-06,
"loss": 0.0011,
"step": 21050
},
{
"epoch": 75.80935251798562,
"grad_norm": 0.4631134867668152,
"learning_rate": 3.986111111111112e-06,
"loss": 0.0023,
"step": 21075
},
{
"epoch": 75.89928057553956,
"grad_norm": 0.26408031582832336,
"learning_rate": 3.984848484848485e-06,
"loss": 0.0019,
"step": 21100
},
{
"epoch": 75.98920863309353,
"grad_norm": 0.3067505657672882,
"learning_rate": 3.983585858585859e-06,
"loss": 0.0021,
"step": 21125
},
{
"epoch": 76.07913669064749,
"grad_norm": 0.0688316822052002,
"learning_rate": 3.982323232323232e-06,
"loss": 0.0024,
"step": 21150
},
{
"epoch": 76.16906474820144,
"grad_norm": 1.5255663394927979,
"learning_rate": 3.981060606060606e-06,
"loss": 0.0012,
"step": 21175
},
{
"epoch": 76.2589928057554,
"grad_norm": 0.368730753660202,
"learning_rate": 3.97979797979798e-06,
"loss": 0.001,
"step": 21200
},
{
"epoch": 76.34892086330935,
"grad_norm": 0.019969308748841286,
"learning_rate": 3.9785353535353535e-06,
"loss": 0.0006,
"step": 21225
},
{
"epoch": 76.43884892086331,
"grad_norm": 0.070771723985672,
"learning_rate": 3.9772727272727275e-06,
"loss": 0.0004,
"step": 21250
},
{
"epoch": 76.52877697841727,
"grad_norm": 0.023271985352039337,
"learning_rate": 3.9760101010101015e-06,
"loss": 0.0007,
"step": 21275
},
{
"epoch": 76.61870503597122,
"grad_norm": 0.027517560869455338,
"learning_rate": 3.9747474747474755e-06,
"loss": 0.0004,
"step": 21300
},
{
"epoch": 76.70863309352518,
"grad_norm": 0.009323998354375362,
"learning_rate": 3.973484848484849e-06,
"loss": 0.0007,
"step": 21325
},
{
"epoch": 76.79856115107914,
"grad_norm": 0.007815494202077389,
"learning_rate": 3.972222222222223e-06,
"loss": 0.0007,
"step": 21350
},
{
"epoch": 76.88848920863309,
"grad_norm": 0.06828250735998154,
"learning_rate": 3.970959595959597e-06,
"loss": 0.0004,
"step": 21375
},
{
"epoch": 76.97841726618705,
"grad_norm": 0.4169680178165436,
"learning_rate": 3.96969696969697e-06,
"loss": 0.0007,
"step": 21400
},
{
"epoch": 77.06834532374101,
"grad_norm": 0.010289140976965427,
"learning_rate": 3.968434343434344e-06,
"loss": 0.0003,
"step": 21425
},
{
"epoch": 77.15827338129496,
"grad_norm": 0.02134793810546398,
"learning_rate": 3.967171717171717e-06,
"loss": 0.0003,
"step": 21450
},
{
"epoch": 77.24820143884892,
"grad_norm": 0.005463853012770414,
"learning_rate": 3.965909090909091e-06,
"loss": 0.0001,
"step": 21475
},
{
"epoch": 77.33812949640287,
"grad_norm": 0.0035135000944137573,
"learning_rate": 3.964646464646465e-06,
"loss": 0.0001,
"step": 21500
},
{
"epoch": 77.42805755395683,
"grad_norm": 0.01657390221953392,
"learning_rate": 3.963383838383839e-06,
"loss": 0.0001,
"step": 21525
},
{
"epoch": 77.5179856115108,
"grad_norm": 0.1767745018005371,
"learning_rate": 3.962121212121213e-06,
"loss": 0.0007,
"step": 21550
},
{
"epoch": 77.60791366906474,
"grad_norm": 0.016838785260915756,
"learning_rate": 3.960858585858586e-06,
"loss": 0.0001,
"step": 21575
},
{
"epoch": 77.6978417266187,
"grad_norm": 0.0039493367075920105,
"learning_rate": 3.95959595959596e-06,
"loss": 0.0001,
"step": 21600
},
{
"epoch": 77.78776978417267,
"grad_norm": 0.0031421987805515528,
"learning_rate": 3.958333333333333e-06,
"loss": 0.0003,
"step": 21625
},
{
"epoch": 77.87769784172662,
"grad_norm": 0.0026466776616871357,
"learning_rate": 3.957070707070707e-06,
"loss": 0.0003,
"step": 21650
},
{
"epoch": 77.96762589928058,
"grad_norm": 0.009947208687663078,
"learning_rate": 3.955808080808081e-06,
"loss": 0.0002,
"step": 21675
},
{
"epoch": 78.05755395683454,
"grad_norm": 0.1049116924405098,
"learning_rate": 3.954545454545454e-06,
"loss": 0.0002,
"step": 21700
},
{
"epoch": 78.14748201438849,
"grad_norm": 0.0023068960290402174,
"learning_rate": 3.953282828282828e-06,
"loss": 0.0001,
"step": 21725
},
{
"epoch": 78.23741007194245,
"grad_norm": 0.003103764960542321,
"learning_rate": 3.952020202020202e-06,
"loss": 0.0001,
"step": 21750
},
{
"epoch": 78.3273381294964,
"grad_norm": 0.002706879284232855,
"learning_rate": 3.950757575757576e-06,
"loss": 0.0001,
"step": 21775
},
{
"epoch": 78.41726618705036,
"grad_norm": 0.004320697858929634,
"learning_rate": 3.9494949494949496e-06,
"loss": 0.0001,
"step": 21800
},
{
"epoch": 78.50719424460432,
"grad_norm": 0.005596183240413666,
"learning_rate": 3.9482323232323236e-06,
"loss": 0.0002,
"step": 21825
},
{
"epoch": 78.59712230215827,
"grad_norm": 0.0037838639691472054,
"learning_rate": 3.9469696969696976e-06,
"loss": 0.0003,
"step": 21850
},
{
"epoch": 78.68705035971223,
"grad_norm": 0.00796448066830635,
"learning_rate": 3.945707070707071e-06,
"loss": 0.0001,
"step": 21875
},
{
"epoch": 78.77697841726619,
"grad_norm": 0.003022188087925315,
"learning_rate": 3.944444444444445e-06,
"loss": 0.0001,
"step": 21900
},
{
"epoch": 78.86690647482014,
"grad_norm": 0.0022381923627108335,
"learning_rate": 3.943181818181819e-06,
"loss": 0.0002,
"step": 21925
},
{
"epoch": 78.9568345323741,
"grad_norm": 0.0027954999823123217,
"learning_rate": 3.941919191919193e-06,
"loss": 0.0001,
"step": 21950
},
{
"epoch": 79.04676258992805,
"grad_norm": 0.0016978129278868437,
"learning_rate": 3.940656565656566e-06,
"loss": 0.0001,
"step": 21975
},
{
"epoch": 79.13669064748201,
"grad_norm": 0.0017409235006198287,
"learning_rate": 3.93939393939394e-06,
"loss": 0.0003,
"step": 22000
},
{
"epoch": 79.13669064748201,
"eval_loss": 0.0888415277004242,
"eval_runtime": 1337.7919,
"eval_samples_per_second": 1.661,
"eval_steps_per_second": 0.104,
"eval_wer": 5.598755832037325,
"step": 22000
}
],
"logging_steps": 25,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 360,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.777235958847242e+21,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}