{ "best_metric": 5.539509738576612, "best_model_checkpoint": "./training/results/checkpoint-20000", "epoch": 79.13669064748201, "eval_steps": 1000, "global_step": 22000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08992805755395683, "grad_norm": 12.73649787902832, "learning_rate": 1.2500000000000002e-07, "loss": 3.2522, "step": 25 }, { "epoch": 0.17985611510791366, "grad_norm": 12.000336647033691, "learning_rate": 2.5000000000000004e-07, "loss": 3.0617, "step": 50 }, { "epoch": 0.2697841726618705, "grad_norm": 10.76065444946289, "learning_rate": 3.75e-07, "loss": 2.7165, "step": 75 }, { "epoch": 0.3597122302158273, "grad_norm": 8.36201286315918, "learning_rate": 5.000000000000001e-07, "loss": 2.2607, "step": 100 }, { "epoch": 0.44964028776978415, "grad_norm": 7.234769344329834, "learning_rate": 6.25e-07, "loss": 1.8433, "step": 125 }, { "epoch": 0.539568345323741, "grad_norm": 6.549698829650879, "learning_rate": 7.5e-07, "loss": 1.5515, "step": 150 }, { "epoch": 0.6294964028776978, "grad_norm": 7.549570083618164, "learning_rate": 8.75e-07, "loss": 1.3346, "step": 175 }, { "epoch": 0.7194244604316546, "grad_norm": 5.8322930335998535, "learning_rate": 1.0000000000000002e-06, "loss": 1.0572, "step": 200 }, { "epoch": 0.8093525179856115, "grad_norm": 3.925255537033081, "learning_rate": 1.125e-06, "loss": 0.6348, "step": 225 }, { "epoch": 0.8992805755395683, "grad_norm": 3.1902644634246826, "learning_rate": 1.25e-06, "loss": 0.4882, "step": 250 }, { "epoch": 0.9892086330935251, "grad_norm": 3.355315923690796, "learning_rate": 1.3750000000000002e-06, "loss": 0.4032, "step": 275 }, { "epoch": 1.079136690647482, "grad_norm": 3.4707915782928467, "learning_rate": 1.5e-06, "loss": 0.3355, "step": 300 }, { "epoch": 1.169064748201439, "grad_norm": 3.261484384536743, "learning_rate": 1.6250000000000001e-06, "loss": 0.2896, "step": 325 }, { "epoch": 1.2589928057553956, "grad_norm": 3.3107025623321533, "learning_rate": 1.75e-06, "loss": 0.2685, "step": 350 }, { "epoch": 1.3489208633093526, "grad_norm": 2.6028969287872314, "learning_rate": 1.8750000000000003e-06, "loss": 0.2365, "step": 375 }, { "epoch": 1.4388489208633093, "grad_norm": 3.380187749862671, "learning_rate": 2.0000000000000003e-06, "loss": 0.2333, "step": 400 }, { "epoch": 1.5287769784172662, "grad_norm": 3.0845112800598145, "learning_rate": 2.125e-06, "loss": 0.2191, "step": 425 }, { "epoch": 1.6187050359712232, "grad_norm": 3.15523099899292, "learning_rate": 2.25e-06, "loss": 0.1949, "step": 450 }, { "epoch": 1.70863309352518, "grad_norm": 2.5198237895965576, "learning_rate": 2.375e-06, "loss": 0.1756, "step": 475 }, { "epoch": 1.7985611510791366, "grad_norm": 2.7945399284362793, "learning_rate": 2.5e-06, "loss": 0.1748, "step": 500 }, { "epoch": 1.8884892086330936, "grad_norm": 3.299269199371338, "learning_rate": 2.6250000000000003e-06, "loss": 0.1711, "step": 525 }, { "epoch": 1.9784172661870505, "grad_norm": 2.3727056980133057, "learning_rate": 2.7500000000000004e-06, "loss": 0.1495, "step": 550 }, { "epoch": 2.068345323741007, "grad_norm": 2.1909244060516357, "learning_rate": 2.875e-06, "loss": 0.1196, "step": 575 }, { "epoch": 2.158273381294964, "grad_norm": 2.45758318901062, "learning_rate": 3e-06, "loss": 0.1023, "step": 600 }, { "epoch": 2.2482014388489207, "grad_norm": 2.009880542755127, "learning_rate": 3.125e-06, "loss": 0.1019, "step": 625 }, { "epoch": 2.338129496402878, "grad_norm": 2.2170872688293457, "learning_rate": 3.2500000000000002e-06, "loss": 0.0948, "step": 650 }, { "epoch": 2.4280575539568345, "grad_norm": 1.9289822578430176, "learning_rate": 3.3750000000000003e-06, "loss": 0.0934, "step": 675 }, { "epoch": 2.5179856115107913, "grad_norm": 2.0615289211273193, "learning_rate": 3.5e-06, "loss": 0.0935, "step": 700 }, { "epoch": 2.6079136690647484, "grad_norm": 2.231041193008423, "learning_rate": 3.625e-06, "loss": 0.0923, "step": 725 }, { "epoch": 2.697841726618705, "grad_norm": 1.953312873840332, "learning_rate": 3.7500000000000005e-06, "loss": 0.0844, "step": 750 }, { "epoch": 2.787769784172662, "grad_norm": 2.1245667934417725, "learning_rate": 3.875e-06, "loss": 0.0831, "step": 775 }, { "epoch": 2.8776978417266186, "grad_norm": 1.8499614000320435, "learning_rate": 4.000000000000001e-06, "loss": 0.0841, "step": 800 }, { "epoch": 2.9676258992805753, "grad_norm": 2.0503857135772705, "learning_rate": 4.125e-06, "loss": 0.0854, "step": 825 }, { "epoch": 3.0575539568345325, "grad_norm": 2.0084242820739746, "learning_rate": 4.25e-06, "loss": 0.0621, "step": 850 }, { "epoch": 3.147482014388489, "grad_norm": 1.3122639656066895, "learning_rate": 4.3750000000000005e-06, "loss": 0.0434, "step": 875 }, { "epoch": 3.237410071942446, "grad_norm": 1.3615615367889404, "learning_rate": 4.5e-06, "loss": 0.0416, "step": 900 }, { "epoch": 3.327338129496403, "grad_norm": 1.533996343612671, "learning_rate": 4.625000000000001e-06, "loss": 0.0451, "step": 925 }, { "epoch": 3.41726618705036, "grad_norm": 1.573549509048462, "learning_rate": 4.75e-06, "loss": 0.0404, "step": 950 }, { "epoch": 3.5071942446043165, "grad_norm": 1.4288333654403687, "learning_rate": 4.875e-06, "loss": 0.044, "step": 975 }, { "epoch": 3.597122302158273, "grad_norm": 1.5075387954711914, "learning_rate": 5e-06, "loss": 0.0479, "step": 1000 }, { "epoch": 3.597122302158273, "eval_loss": 0.10350359231233597, "eval_runtime": 1344.3937, "eval_samples_per_second": 1.653, "eval_steps_per_second": 0.103, "eval_wer": 20.29178701029401, "step": 1000 }, { "epoch": 3.68705035971223, "grad_norm": 1.842606782913208, "learning_rate": 4.998737373737374e-06, "loss": 0.0467, "step": 1025 }, { "epoch": 3.776978417266187, "grad_norm": 1.495784044265747, "learning_rate": 4.997474747474748e-06, "loss": 0.0437, "step": 1050 }, { "epoch": 3.866906474820144, "grad_norm": 2.054900646209717, "learning_rate": 4.9962121212121216e-06, "loss": 0.0497, "step": 1075 }, { "epoch": 3.956834532374101, "grad_norm": 1.438658356666565, "learning_rate": 4.9949494949494956e-06, "loss": 0.0398, "step": 1100 }, { "epoch": 4.046762589928058, "grad_norm": 1.3041224479675293, "learning_rate": 4.993686868686869e-06, "loss": 0.0293, "step": 1125 }, { "epoch": 4.136690647482014, "grad_norm": 1.2206145524978638, "learning_rate": 4.992424242424243e-06, "loss": 0.0227, "step": 1150 }, { "epoch": 4.226618705035971, "grad_norm": 1.2926621437072754, "learning_rate": 4.991161616161617e-06, "loss": 0.0231, "step": 1175 }, { "epoch": 4.316546762589928, "grad_norm": 1.4683257341384888, "learning_rate": 4.98989898989899e-06, "loss": 0.023, "step": 1200 }, { "epoch": 4.406474820143885, "grad_norm": 1.3095593452453613, "learning_rate": 4.988636363636364e-06, "loss": 0.0226, "step": 1225 }, { "epoch": 4.496402877697841, "grad_norm": 0.7059262990951538, "learning_rate": 4.987373737373738e-06, "loss": 0.0225, "step": 1250 }, { "epoch": 4.586330935251799, "grad_norm": 1.1493045091629028, "learning_rate": 4.986111111111112e-06, "loss": 0.022, "step": 1275 }, { "epoch": 4.676258992805756, "grad_norm": 1.9609806537628174, "learning_rate": 4.984848484848485e-06, "loss": 0.0232, "step": 1300 }, { "epoch": 4.766187050359712, "grad_norm": 1.5463200807571411, "learning_rate": 4.983585858585859e-06, "loss": 0.0206, "step": 1325 }, { "epoch": 4.856115107913669, "grad_norm": 0.858127772808075, "learning_rate": 4.982323232323233e-06, "loss": 0.0222, "step": 1350 }, { "epoch": 4.946043165467626, "grad_norm": 0.8384924530982971, "learning_rate": 4.981060606060606e-06, "loss": 0.0201, "step": 1375 }, { "epoch": 5.0359712230215825, "grad_norm": 0.9966625571250916, "learning_rate": 4.97979797979798e-06, "loss": 0.0173, "step": 1400 }, { "epoch": 5.125899280575539, "grad_norm": 0.6609445214271545, "learning_rate": 4.978535353535353e-06, "loss": 0.0113, "step": 1425 }, { "epoch": 5.215827338129497, "grad_norm": 0.82105952501297, "learning_rate": 4.977272727272728e-06, "loss": 0.012, "step": 1450 }, { "epoch": 5.305755395683454, "grad_norm": 1.0994760990142822, "learning_rate": 4.976010101010101e-06, "loss": 0.0118, "step": 1475 }, { "epoch": 5.39568345323741, "grad_norm": 0.4543660283088684, "learning_rate": 4.974747474747475e-06, "loss": 0.0112, "step": 1500 }, { "epoch": 5.485611510791367, "grad_norm": 3.425143241882324, "learning_rate": 4.973484848484849e-06, "loss": 0.0113, "step": 1525 }, { "epoch": 5.575539568345324, "grad_norm": 0.7691114544868469, "learning_rate": 4.9722222222222224e-06, "loss": 0.0114, "step": 1550 }, { "epoch": 5.66546762589928, "grad_norm": 0.5446438789367676, "learning_rate": 4.9709595959595964e-06, "loss": 0.0121, "step": 1575 }, { "epoch": 5.755395683453237, "grad_norm": 0.7232896089553833, "learning_rate": 4.9696969696969696e-06, "loss": 0.0118, "step": 1600 }, { "epoch": 5.845323741007194, "grad_norm": 1.3025506734848022, "learning_rate": 4.968434343434344e-06, "loss": 0.0135, "step": 1625 }, { "epoch": 5.935251798561151, "grad_norm": 1.2080421447753906, "learning_rate": 4.9671717171717176e-06, "loss": 0.0126, "step": 1650 }, { "epoch": 6.025179856115108, "grad_norm": 0.4218277633190155, "learning_rate": 4.9659090909090916e-06, "loss": 0.0094, "step": 1675 }, { "epoch": 6.115107913669065, "grad_norm": 0.5942659378051758, "learning_rate": 4.964646464646465e-06, "loss": 0.0071, "step": 1700 }, { "epoch": 6.205035971223022, "grad_norm": 0.31671133637428284, "learning_rate": 4.963383838383839e-06, "loss": 0.008, "step": 1725 }, { "epoch": 6.294964028776978, "grad_norm": 0.3538670539855957, "learning_rate": 4.962121212121213e-06, "loss": 0.0066, "step": 1750 }, { "epoch": 6.384892086330935, "grad_norm": 0.8252100348472595, "learning_rate": 4.960858585858586e-06, "loss": 0.006, "step": 1775 }, { "epoch": 6.474820143884892, "grad_norm": 0.9238548278808594, "learning_rate": 4.95959595959596e-06, "loss": 0.0074, "step": 1800 }, { "epoch": 6.564748201438849, "grad_norm": 1.1760324239730835, "learning_rate": 4.958333333333334e-06, "loss": 0.0066, "step": 1825 }, { "epoch": 6.654676258992806, "grad_norm": 0.3382113575935364, "learning_rate": 4.957070707070708e-06, "loss": 0.0103, "step": 1850 }, { "epoch": 6.744604316546763, "grad_norm": 0.9418781399726868, "learning_rate": 4.955808080808081e-06, "loss": 0.0092, "step": 1875 }, { "epoch": 6.83453237410072, "grad_norm": 0.7677399516105652, "learning_rate": 4.954545454545455e-06, "loss": 0.009, "step": 1900 }, { "epoch": 6.924460431654676, "grad_norm": 0.32002565264701843, "learning_rate": 4.953282828282829e-06, "loss": 0.0075, "step": 1925 }, { "epoch": 7.014388489208633, "grad_norm": 1.0049771070480347, "learning_rate": 4.952020202020202e-06, "loss": 0.0071, "step": 1950 }, { "epoch": 7.10431654676259, "grad_norm": 0.513941764831543, "learning_rate": 4.950757575757576e-06, "loss": 0.0043, "step": 1975 }, { "epoch": 7.194244604316546, "grad_norm": 0.8406050205230713, "learning_rate": 4.94949494949495e-06, "loss": 0.005, "step": 2000 }, { "epoch": 7.194244604316546, "eval_loss": 0.09395472705364227, "eval_runtime": 1340.6412, "eval_samples_per_second": 1.657, "eval_steps_per_second": 0.104, "eval_wer": 10.419906687402799, "step": 2000 }, { "epoch": 7.284172661870503, "grad_norm": 0.47227388620376587, "learning_rate": 4.948232323232323e-06, "loss": 0.005, "step": 2025 }, { "epoch": 7.374100719424461, "grad_norm": 0.2972259819507599, "learning_rate": 4.946969696969697e-06, "loss": 0.0047, "step": 2050 }, { "epoch": 7.4640287769784175, "grad_norm": 0.580878496170044, "learning_rate": 4.945707070707071e-06, "loss": 0.0047, "step": 2075 }, { "epoch": 7.553956834532374, "grad_norm": 0.0858689397573471, "learning_rate": 4.944444444444445e-06, "loss": 0.0047, "step": 2100 }, { "epoch": 7.643884892086331, "grad_norm": 0.9921578168869019, "learning_rate": 4.9431818181818184e-06, "loss": 0.0049, "step": 2125 }, { "epoch": 7.733812949640288, "grad_norm": 0.3222315311431885, "learning_rate": 4.9419191919191924e-06, "loss": 0.0039, "step": 2150 }, { "epoch": 7.823741007194244, "grad_norm": 0.2401006668806076, "learning_rate": 4.940656565656566e-06, "loss": 0.0045, "step": 2175 }, { "epoch": 7.913669064748201, "grad_norm": 0.26786544919013977, "learning_rate": 4.93939393939394e-06, "loss": 0.0037, "step": 2200 }, { "epoch": 8.003597122302159, "grad_norm": 1.120921015739441, "learning_rate": 4.938131313131314e-06, "loss": 0.0048, "step": 2225 }, { "epoch": 8.093525179856115, "grad_norm": 0.7425853610038757, "learning_rate": 4.936868686868687e-06, "loss": 0.0036, "step": 2250 }, { "epoch": 8.183453237410072, "grad_norm": 0.19618873298168182, "learning_rate": 4.935606060606061e-06, "loss": 0.0038, "step": 2275 }, { "epoch": 8.273381294964029, "grad_norm": 0.41672375798225403, "learning_rate": 4.934343434343435e-06, "loss": 0.003, "step": 2300 }, { "epoch": 8.363309352517986, "grad_norm": 0.3363110423088074, "learning_rate": 4.933080808080809e-06, "loss": 0.0031, "step": 2325 }, { "epoch": 8.453237410071942, "grad_norm": 0.8529962301254272, "learning_rate": 4.931818181818182e-06, "loss": 0.0034, "step": 2350 }, { "epoch": 8.543165467625899, "grad_norm": 0.15698625147342682, "learning_rate": 4.930555555555556e-06, "loss": 0.0033, "step": 2375 }, { "epoch": 8.633093525179856, "grad_norm": 0.19619868695735931, "learning_rate": 4.92929292929293e-06, "loss": 0.004, "step": 2400 }, { "epoch": 8.723021582733812, "grad_norm": 0.2903304994106293, "learning_rate": 4.928030303030303e-06, "loss": 0.0034, "step": 2425 }, { "epoch": 8.81294964028777, "grad_norm": 0.5127314329147339, "learning_rate": 4.926767676767677e-06, "loss": 0.0035, "step": 2450 }, { "epoch": 8.902877697841726, "grad_norm": 1.0652037858963013, "learning_rate": 4.925505050505051e-06, "loss": 0.0045, "step": 2475 }, { "epoch": 8.992805755395683, "grad_norm": 0.9570706486701965, "learning_rate": 4.924242424242425e-06, "loss": 0.0042, "step": 2500 }, { "epoch": 9.082733812949641, "grad_norm": 0.5939081907272339, "learning_rate": 4.922979797979798e-06, "loss": 0.0032, "step": 2525 }, { "epoch": 9.172661870503598, "grad_norm": 0.25739356875419617, "learning_rate": 4.921717171717172e-06, "loss": 0.0038, "step": 2550 }, { "epoch": 9.262589928057555, "grad_norm": 0.17940430343151093, "learning_rate": 4.920454545454546e-06, "loss": 0.0029, "step": 2575 }, { "epoch": 9.352517985611511, "grad_norm": 0.33168259263038635, "learning_rate": 4.919191919191919e-06, "loss": 0.0028, "step": 2600 }, { "epoch": 9.442446043165468, "grad_norm": 0.20831653475761414, "learning_rate": 4.917929292929293e-06, "loss": 0.002, "step": 2625 }, { "epoch": 9.532374100719425, "grad_norm": 0.19978338479995728, "learning_rate": 4.9166666666666665e-06, "loss": 0.0025, "step": 2650 }, { "epoch": 9.622302158273381, "grad_norm": 0.23154591023921967, "learning_rate": 4.915404040404041e-06, "loss": 0.0033, "step": 2675 }, { "epoch": 9.712230215827338, "grad_norm": 0.7622235417366028, "learning_rate": 4.9141414141414145e-06, "loss": 0.0039, "step": 2700 }, { "epoch": 9.802158273381295, "grad_norm": 0.23092857003211975, "learning_rate": 4.9128787878787885e-06, "loss": 0.0044, "step": 2725 }, { "epoch": 9.892086330935252, "grad_norm": 0.5034282207489014, "learning_rate": 4.9116161616161625e-06, "loss": 0.0035, "step": 2750 }, { "epoch": 9.982014388489208, "grad_norm": 0.2582780122756958, "learning_rate": 4.910353535353536e-06, "loss": 0.0033, "step": 2775 }, { "epoch": 10.071942446043165, "grad_norm": 0.4610576033592224, "learning_rate": 4.90909090909091e-06, "loss": 0.0037, "step": 2800 }, { "epoch": 10.161870503597122, "grad_norm": 0.217066690325737, "learning_rate": 4.907828282828283e-06, "loss": 0.0028, "step": 2825 }, { "epoch": 10.251798561151078, "grad_norm": 0.05713683366775513, "learning_rate": 4.906565656565658e-06, "loss": 0.003, "step": 2850 }, { "epoch": 10.341726618705035, "grad_norm": 0.5356289148330688, "learning_rate": 4.905303030303031e-06, "loss": 0.0018, "step": 2875 }, { "epoch": 10.431654676258994, "grad_norm": 0.37969082593917847, "learning_rate": 4.904040404040405e-06, "loss": 0.0022, "step": 2900 }, { "epoch": 10.52158273381295, "grad_norm": 1.078008770942688, "learning_rate": 4.902777777777778e-06, "loss": 0.0032, "step": 2925 }, { "epoch": 10.611510791366907, "grad_norm": 0.26670244336128235, "learning_rate": 4.901515151515152e-06, "loss": 0.0027, "step": 2950 }, { "epoch": 10.701438848920864, "grad_norm": 0.673686683177948, "learning_rate": 4.900252525252526e-06, "loss": 0.0029, "step": 2975 }, { "epoch": 10.79136690647482, "grad_norm": 0.37779000401496887, "learning_rate": 4.898989898989899e-06, "loss": 0.0022, "step": 3000 }, { "epoch": 10.79136690647482, "eval_loss": 0.10011211037635803, "eval_runtime": 1344.035, "eval_samples_per_second": 1.653, "eval_steps_per_second": 0.103, "eval_wer": 9.049840776123824, "step": 3000 }, { "epoch": 10.881294964028777, "grad_norm": 0.09616148471832275, "learning_rate": 4.897727272727273e-06, "loss": 0.0041, "step": 3025 }, { "epoch": 10.971223021582734, "grad_norm": 0.8408087491989136, "learning_rate": 4.896464646464647e-06, "loss": 0.0046, "step": 3050 }, { "epoch": 11.06115107913669, "grad_norm": 0.1868293583393097, "learning_rate": 4.895202020202021e-06, "loss": 0.0027, "step": 3075 }, { "epoch": 11.151079136690647, "grad_norm": 0.19219942390918732, "learning_rate": 4.893939393939394e-06, "loss": 0.0024, "step": 3100 }, { "epoch": 11.241007194244604, "grad_norm": 3.7455391883850098, "learning_rate": 4.892676767676768e-06, "loss": 0.0027, "step": 3125 }, { "epoch": 11.33093525179856, "grad_norm": 0.2693164348602295, "learning_rate": 4.891414141414142e-06, "loss": 0.002, "step": 3150 }, { "epoch": 11.420863309352518, "grad_norm": 0.8100782632827759, "learning_rate": 4.890151515151515e-06, "loss": 0.0033, "step": 3175 }, { "epoch": 11.510791366906474, "grad_norm": 0.30300647020339966, "learning_rate": 4.888888888888889e-06, "loss": 0.0025, "step": 3200 }, { "epoch": 11.600719424460431, "grad_norm": 0.49988773465156555, "learning_rate": 4.887626262626263e-06, "loss": 0.002, "step": 3225 }, { "epoch": 11.690647482014388, "grad_norm": 0.2162599414587021, "learning_rate": 4.8863636363636365e-06, "loss": 0.0024, "step": 3250 }, { "epoch": 11.780575539568346, "grad_norm": 2.3612468242645264, "learning_rate": 4.8851010101010105e-06, "loss": 0.0045, "step": 3275 }, { "epoch": 11.870503597122303, "grad_norm": 0.4287119209766388, "learning_rate": 4.883838383838384e-06, "loss": 0.0051, "step": 3300 }, { "epoch": 11.96043165467626, "grad_norm": 0.46471118927001953, "learning_rate": 4.8825757575757585e-06, "loss": 0.0036, "step": 3325 }, { "epoch": 12.050359712230216, "grad_norm": 0.4310344159603119, "learning_rate": 4.881313131313132e-06, "loss": 0.0031, "step": 3350 }, { "epoch": 12.140287769784173, "grad_norm": 0.8054510951042175, "learning_rate": 4.880050505050506e-06, "loss": 0.0036, "step": 3375 }, { "epoch": 12.23021582733813, "grad_norm": 0.5783084630966187, "learning_rate": 4.878787878787879e-06, "loss": 0.0023, "step": 3400 }, { "epoch": 12.320143884892087, "grad_norm": 0.1537202149629593, "learning_rate": 4.877525252525253e-06, "loss": 0.0031, "step": 3425 }, { "epoch": 12.410071942446043, "grad_norm": 0.25773826241493225, "learning_rate": 4.876262626262627e-06, "loss": 0.0029, "step": 3450 }, { "epoch": 12.5, "grad_norm": 1.0221893787384033, "learning_rate": 4.875e-06, "loss": 0.003, "step": 3475 }, { "epoch": 12.589928057553957, "grad_norm": 0.2363336831331253, "learning_rate": 4.873737373737374e-06, "loss": 0.0036, "step": 3500 }, { "epoch": 12.679856115107913, "grad_norm": 0.9339852333068848, "learning_rate": 4.872474747474748e-06, "loss": 0.004, "step": 3525 }, { "epoch": 12.76978417266187, "grad_norm": 0.6633305549621582, "learning_rate": 4.871212121212122e-06, "loss": 0.0032, "step": 3550 }, { "epoch": 12.859712230215827, "grad_norm": 0.7261077761650085, "learning_rate": 4.869949494949495e-06, "loss": 0.0028, "step": 3575 }, { "epoch": 12.949640287769784, "grad_norm": 0.6666585803031921, "learning_rate": 4.868686868686869e-06, "loss": 0.0031, "step": 3600 }, { "epoch": 13.03956834532374, "grad_norm": 0.42198774218559265, "learning_rate": 4.867424242424243e-06, "loss": 0.0023, "step": 3625 }, { "epoch": 13.129496402877697, "grad_norm": 0.1100483238697052, "learning_rate": 4.866161616161616e-06, "loss": 0.002, "step": 3650 }, { "epoch": 13.219424460431656, "grad_norm": 0.5182665586471558, "learning_rate": 4.86489898989899e-06, "loss": 0.003, "step": 3675 }, { "epoch": 13.309352517985612, "grad_norm": 0.10821045190095901, "learning_rate": 4.863636363636364e-06, "loss": 0.0024, "step": 3700 }, { "epoch": 13.399280575539569, "grad_norm": 0.302943617105484, "learning_rate": 4.862373737373738e-06, "loss": 0.0022, "step": 3725 }, { "epoch": 13.489208633093526, "grad_norm": 0.34953269362449646, "learning_rate": 4.861111111111111e-06, "loss": 0.0024, "step": 3750 }, { "epoch": 13.579136690647482, "grad_norm": 0.3864242732524872, "learning_rate": 4.859848484848485e-06, "loss": 0.0025, "step": 3775 }, { "epoch": 13.66906474820144, "grad_norm": 0.23528048396110535, "learning_rate": 4.858585858585859e-06, "loss": 0.0028, "step": 3800 }, { "epoch": 13.758992805755396, "grad_norm": 0.31728431582450867, "learning_rate": 4.8573232323232325e-06, "loss": 0.0041, "step": 3825 }, { "epoch": 13.848920863309353, "grad_norm": 0.5803298950195312, "learning_rate": 4.8560606060606065e-06, "loss": 0.0028, "step": 3850 }, { "epoch": 13.93884892086331, "grad_norm": 0.30145183205604553, "learning_rate": 4.85479797979798e-06, "loss": 0.0022, "step": 3875 }, { "epoch": 14.028776978417266, "grad_norm": 0.43851757049560547, "learning_rate": 4.8535353535353545e-06, "loss": 0.0024, "step": 3900 }, { "epoch": 14.118705035971223, "grad_norm": 0.7910506725311279, "learning_rate": 4.852272727272728e-06, "loss": 0.0033, "step": 3925 }, { "epoch": 14.20863309352518, "grad_norm": 0.3168434500694275, "learning_rate": 4.851010101010102e-06, "loss": 0.0028, "step": 3950 }, { "epoch": 14.298561151079136, "grad_norm": 0.7242361307144165, "learning_rate": 4.849747474747475e-06, "loss": 0.0031, "step": 3975 }, { "epoch": 14.388489208633093, "grad_norm": 0.7368125319480896, "learning_rate": 4.848484848484849e-06, "loss": 0.0027, "step": 4000 }, { "epoch": 14.388489208633093, "eval_loss": 0.09274967014789581, "eval_runtime": 1343.7242, "eval_samples_per_second": 1.654, "eval_steps_per_second": 0.103, "eval_wer": 9.375694290157742, "step": 4000 }, { "epoch": 14.47841726618705, "grad_norm": 0.420599102973938, "learning_rate": 4.847222222222223e-06, "loss": 0.0028, "step": 4025 }, { "epoch": 14.568345323741006, "grad_norm": 0.3025602698326111, "learning_rate": 4.845959595959596e-06, "loss": 0.0028, "step": 4050 }, { "epoch": 14.658273381294965, "grad_norm": 0.7078948020935059, "learning_rate": 4.84469696969697e-06, "loss": 0.003, "step": 4075 }, { "epoch": 14.748201438848922, "grad_norm": 0.5534040331840515, "learning_rate": 4.843434343434344e-06, "loss": 0.0031, "step": 4100 }, { "epoch": 14.838129496402878, "grad_norm": 0.28715190291404724, "learning_rate": 4.842171717171718e-06, "loss": 0.0028, "step": 4125 }, { "epoch": 14.928057553956835, "grad_norm": 0.5861944556236267, "learning_rate": 4.840909090909091e-06, "loss": 0.0028, "step": 4150 }, { "epoch": 15.017985611510792, "grad_norm": 0.102662093937397, "learning_rate": 4.839646464646465e-06, "loss": 0.0057, "step": 4175 }, { "epoch": 15.107913669064748, "grad_norm": 0.15230265259742737, "learning_rate": 4.838383838383839e-06, "loss": 0.0023, "step": 4200 }, { "epoch": 15.197841726618705, "grad_norm": 0.12530238926410675, "learning_rate": 4.837121212121212e-06, "loss": 0.0017, "step": 4225 }, { "epoch": 15.287769784172662, "grad_norm": 0.09885858744382858, "learning_rate": 4.835858585858586e-06, "loss": 0.0022, "step": 4250 }, { "epoch": 15.377697841726619, "grad_norm": 0.1105910986661911, "learning_rate": 4.83459595959596e-06, "loss": 0.0026, "step": 4275 }, { "epoch": 15.467625899280575, "grad_norm": 0.3952260911464691, "learning_rate": 4.833333333333333e-06, "loss": 0.0021, "step": 4300 }, { "epoch": 15.557553956834532, "grad_norm": 0.6049605011940002, "learning_rate": 4.832070707070707e-06, "loss": 0.0021, "step": 4325 }, { "epoch": 15.647482014388489, "grad_norm": 0.7125779986381531, "learning_rate": 4.830808080808081e-06, "loss": 0.0015, "step": 4350 }, { "epoch": 15.737410071942445, "grad_norm": 0.16274645924568176, "learning_rate": 4.829545454545455e-06, "loss": 0.0019, "step": 4375 }, { "epoch": 15.827338129496402, "grad_norm": 0.6492106318473816, "learning_rate": 4.8282828282828285e-06, "loss": 0.0019, "step": 4400 }, { "epoch": 15.917266187050359, "grad_norm": 0.9411545991897583, "learning_rate": 4.8270202020202025e-06, "loss": 0.003, "step": 4425 }, { "epoch": 16.007194244604317, "grad_norm": 0.03323192521929741, "learning_rate": 4.8257575757575765e-06, "loss": 0.0018, "step": 4450 }, { "epoch": 16.097122302158272, "grad_norm": 0.1154596135020256, "learning_rate": 4.82449494949495e-06, "loss": 0.0015, "step": 4475 }, { "epoch": 16.18705035971223, "grad_norm": 0.41669028997421265, "learning_rate": 4.823232323232324e-06, "loss": 0.0016, "step": 4500 }, { "epoch": 16.276978417266186, "grad_norm": 0.25636962056159973, "learning_rate": 4.821969696969697e-06, "loss": 0.0014, "step": 4525 }, { "epoch": 16.366906474820144, "grad_norm": 3.250777244567871, "learning_rate": 4.820707070707072e-06, "loss": 0.0027, "step": 4550 }, { "epoch": 16.4568345323741, "grad_norm": 1.1029988527297974, "learning_rate": 4.819444444444445e-06, "loss": 0.0028, "step": 4575 }, { "epoch": 16.546762589928058, "grad_norm": 0.3530588150024414, "learning_rate": 4.818181818181819e-06, "loss": 0.0015, "step": 4600 }, { "epoch": 16.636690647482013, "grad_norm": 0.0861181914806366, "learning_rate": 4.816919191919192e-06, "loss": 0.0023, "step": 4625 }, { "epoch": 16.72661870503597, "grad_norm": 0.44006574153900146, "learning_rate": 4.815656565656566e-06, "loss": 0.0021, "step": 4650 }, { "epoch": 16.81654676258993, "grad_norm": 0.9688239097595215, "learning_rate": 4.81439393939394e-06, "loss": 0.0014, "step": 4675 }, { "epoch": 16.906474820143885, "grad_norm": 0.848913311958313, "learning_rate": 4.813131313131313e-06, "loss": 0.0021, "step": 4700 }, { "epoch": 16.996402877697843, "grad_norm": 0.14554986357688904, "learning_rate": 4.811868686868687e-06, "loss": 0.0013, "step": 4725 }, { "epoch": 17.086330935251798, "grad_norm": 0.31808871030807495, "learning_rate": 4.810606060606061e-06, "loss": 0.0019, "step": 4750 }, { "epoch": 17.176258992805757, "grad_norm": 0.2081349641084671, "learning_rate": 4.809343434343435e-06, "loss": 0.0018, "step": 4775 }, { "epoch": 17.26618705035971, "grad_norm": 0.0817071720957756, "learning_rate": 4.808080808080808e-06, "loss": 0.0011, "step": 4800 }, { "epoch": 17.35611510791367, "grad_norm": 0.148326575756073, "learning_rate": 4.806818181818182e-06, "loss": 0.0011, "step": 4825 }, { "epoch": 17.446043165467625, "grad_norm": 1.1114903688430786, "learning_rate": 4.805555555555556e-06, "loss": 0.0012, "step": 4850 }, { "epoch": 17.535971223021583, "grad_norm": 0.5132379531860352, "learning_rate": 4.804292929292929e-06, "loss": 0.0015, "step": 4875 }, { "epoch": 17.62589928057554, "grad_norm": 0.5439797043800354, "learning_rate": 4.803030303030303e-06, "loss": 0.0019, "step": 4900 }, { "epoch": 17.715827338129497, "grad_norm": 0.4897061586380005, "learning_rate": 4.801767676767677e-06, "loss": 0.0022, "step": 4925 }, { "epoch": 17.805755395683452, "grad_norm": 0.13605351746082306, "learning_rate": 4.800505050505051e-06, "loss": 0.0017, "step": 4950 }, { "epoch": 17.89568345323741, "grad_norm": 0.6285837888717651, "learning_rate": 4.7992424242424245e-06, "loss": 0.0014, "step": 4975 }, { "epoch": 17.985611510791365, "grad_norm": 0.04884183779358864, "learning_rate": 4.7979797979797985e-06, "loss": 0.0011, "step": 5000 }, { "epoch": 17.985611510791365, "eval_loss": 0.09266538918018341, "eval_runtime": 1344.6458, "eval_samples_per_second": 1.652, "eval_steps_per_second": 0.103, "eval_wer": 8.835073687328741, "step": 5000 }, { "epoch": 18.075539568345324, "grad_norm": 0.036710768938064575, "learning_rate": 4.7967171717171725e-06, "loss": 0.0024, "step": 5025 }, { "epoch": 18.165467625899282, "grad_norm": 0.41920551657676697, "learning_rate": 4.795454545454546e-06, "loss": 0.0011, "step": 5050 }, { "epoch": 18.255395683453237, "grad_norm": 0.2354598492383957, "learning_rate": 4.79419191919192e-06, "loss": 0.0018, "step": 5075 }, { "epoch": 18.345323741007196, "grad_norm": 0.4095918536186218, "learning_rate": 4.792929292929293e-06, "loss": 0.0015, "step": 5100 }, { "epoch": 18.43525179856115, "grad_norm": 0.03964778780937195, "learning_rate": 4.791666666666668e-06, "loss": 0.0019, "step": 5125 }, { "epoch": 18.52517985611511, "grad_norm": 0.9322590827941895, "learning_rate": 4.790404040404041e-06, "loss": 0.0014, "step": 5150 }, { "epoch": 18.615107913669064, "grad_norm": 0.11062884330749512, "learning_rate": 4.789141414141415e-06, "loss": 0.0015, "step": 5175 }, { "epoch": 18.705035971223023, "grad_norm": 0.4186955690383911, "learning_rate": 4.787878787878788e-06, "loss": 0.0013, "step": 5200 }, { "epoch": 18.794964028776977, "grad_norm": 0.40554943680763245, "learning_rate": 4.786616161616162e-06, "loss": 0.0017, "step": 5225 }, { "epoch": 18.884892086330936, "grad_norm": 0.4156556725502014, "learning_rate": 4.785353535353536e-06, "loss": 0.0016, "step": 5250 }, { "epoch": 18.97482014388489, "grad_norm": 0.8705348968505859, "learning_rate": 4.784090909090909e-06, "loss": 0.003, "step": 5275 }, { "epoch": 19.06474820143885, "grad_norm": 0.47541674971580505, "learning_rate": 4.782828282828283e-06, "loss": 0.0026, "step": 5300 }, { "epoch": 19.154676258992804, "grad_norm": 0.3221082389354706, "learning_rate": 4.781565656565657e-06, "loss": 0.0014, "step": 5325 }, { "epoch": 19.244604316546763, "grad_norm": 0.26767319440841675, "learning_rate": 4.78030303030303e-06, "loss": 0.0015, "step": 5350 }, { "epoch": 19.334532374100718, "grad_norm": 0.41984379291534424, "learning_rate": 4.779040404040404e-06, "loss": 0.0026, "step": 5375 }, { "epoch": 19.424460431654676, "grad_norm": 0.6067033410072327, "learning_rate": 4.777777777777778e-06, "loss": 0.0031, "step": 5400 }, { "epoch": 19.514388489208635, "grad_norm": 0.23113247752189636, "learning_rate": 4.776515151515152e-06, "loss": 0.0027, "step": 5425 }, { "epoch": 19.60431654676259, "grad_norm": 0.7052062153816223, "learning_rate": 4.775252525252525e-06, "loss": 0.0038, "step": 5450 }, { "epoch": 19.694244604316548, "grad_norm": 1.4232673645019531, "learning_rate": 4.773989898989899e-06, "loss": 0.0024, "step": 5475 }, { "epoch": 19.784172661870503, "grad_norm": 0.12078073620796204, "learning_rate": 4.772727272727273e-06, "loss": 0.0014, "step": 5500 }, { "epoch": 19.87410071942446, "grad_norm": 1.296155333518982, "learning_rate": 4.7714646464646465e-06, "loss": 0.0028, "step": 5525 }, { "epoch": 19.964028776978417, "grad_norm": 0.4774380922317505, "learning_rate": 4.7702020202020205e-06, "loss": 0.0039, "step": 5550 }, { "epoch": 20.053956834532375, "grad_norm": 0.7243533134460449, "learning_rate": 4.768939393939394e-06, "loss": 0.0038, "step": 5575 }, { "epoch": 20.14388489208633, "grad_norm": 0.03761635348200798, "learning_rate": 4.7676767676767685e-06, "loss": 0.0028, "step": 5600 }, { "epoch": 20.23381294964029, "grad_norm": 0.3167934715747833, "learning_rate": 4.766414141414142e-06, "loss": 0.0023, "step": 5625 }, { "epoch": 20.323741007194243, "grad_norm": 0.08072912693023682, "learning_rate": 4.765151515151516e-06, "loss": 0.0021, "step": 5650 }, { "epoch": 20.413669064748202, "grad_norm": 0.0809144377708435, "learning_rate": 4.763888888888889e-06, "loss": 0.0033, "step": 5675 }, { "epoch": 20.503597122302157, "grad_norm": 0.021725259721279144, "learning_rate": 4.762626262626263e-06, "loss": 0.0022, "step": 5700 }, { "epoch": 20.593525179856115, "grad_norm": 0.79271399974823, "learning_rate": 4.761363636363637e-06, "loss": 0.0015, "step": 5725 }, { "epoch": 20.68345323741007, "grad_norm": 0.10382846742868423, "learning_rate": 4.76010101010101e-06, "loss": 0.0019, "step": 5750 }, { "epoch": 20.77338129496403, "grad_norm": 0.03259812295436859, "learning_rate": 4.758838383838385e-06, "loss": 0.002, "step": 5775 }, { "epoch": 20.863309352517987, "grad_norm": 0.6223962306976318, "learning_rate": 4.757575757575758e-06, "loss": 0.0036, "step": 5800 }, { "epoch": 20.953237410071942, "grad_norm": 1.0351557731628418, "learning_rate": 4.756313131313132e-06, "loss": 0.0022, "step": 5825 }, { "epoch": 21.0431654676259, "grad_norm": 0.8662335276603699, "learning_rate": 4.755050505050505e-06, "loss": 0.0028, "step": 5850 }, { "epoch": 21.133093525179856, "grad_norm": 0.13104894757270813, "learning_rate": 4.753787878787879e-06, "loss": 0.0028, "step": 5875 }, { "epoch": 21.223021582733814, "grad_norm": 0.8010006546974182, "learning_rate": 4.752525252525253e-06, "loss": 0.0021, "step": 5900 }, { "epoch": 21.31294964028777, "grad_norm": 0.7761834263801575, "learning_rate": 4.751262626262626e-06, "loss": 0.0035, "step": 5925 }, { "epoch": 21.402877697841728, "grad_norm": 0.05642890930175781, "learning_rate": 4.75e-06, "loss": 0.0015, "step": 5950 }, { "epoch": 21.492805755395683, "grad_norm": 0.2215975672006607, "learning_rate": 4.748737373737374e-06, "loss": 0.0011, "step": 5975 }, { "epoch": 21.58273381294964, "grad_norm": 0.5649552345275879, "learning_rate": 4.747474747474748e-06, "loss": 0.0017, "step": 6000 }, { "epoch": 21.58273381294964, "eval_loss": 0.08750007301568985, "eval_runtime": 1349.1716, "eval_samples_per_second": 1.647, "eval_steps_per_second": 0.103, "eval_wer": 7.657557579797082, "step": 6000 }, { "epoch": 21.672661870503596, "grad_norm": 0.3567905128002167, "learning_rate": 4.746212121212121e-06, "loss": 0.0023, "step": 6025 }, { "epoch": 21.762589928057555, "grad_norm": 0.7165196537971497, "learning_rate": 4.744949494949495e-06, "loss": 0.0019, "step": 6050 }, { "epoch": 21.85251798561151, "grad_norm": 0.9009844660758972, "learning_rate": 4.743686868686869e-06, "loss": 0.0022, "step": 6075 }, { "epoch": 21.942446043165468, "grad_norm": 0.7037338614463806, "learning_rate": 4.7424242424242426e-06, "loss": 0.0026, "step": 6100 }, { "epoch": 22.032374100719423, "grad_norm": 0.2905846834182739, "learning_rate": 4.7411616161616166e-06, "loss": 0.002, "step": 6125 }, { "epoch": 22.12230215827338, "grad_norm": 0.7335506677627563, "learning_rate": 4.7398989898989905e-06, "loss": 0.0019, "step": 6150 }, { "epoch": 22.21223021582734, "grad_norm": 0.3520030677318573, "learning_rate": 4.7386363636363645e-06, "loss": 0.0016, "step": 6175 }, { "epoch": 22.302158273381295, "grad_norm": 0.3580196797847748, "learning_rate": 4.737373737373738e-06, "loss": 0.0014, "step": 6200 }, { "epoch": 22.392086330935253, "grad_norm": 0.19062575697898865, "learning_rate": 4.736111111111112e-06, "loss": 0.002, "step": 6225 }, { "epoch": 22.48201438848921, "grad_norm": 0.6567767858505249, "learning_rate": 4.734848484848486e-06, "loss": 0.0021, "step": 6250 }, { "epoch": 22.571942446043167, "grad_norm": 0.24819691479206085, "learning_rate": 4.733585858585859e-06, "loss": 0.0019, "step": 6275 }, { "epoch": 22.66187050359712, "grad_norm": 0.47786185145378113, "learning_rate": 4.732323232323233e-06, "loss": 0.0014, "step": 6300 }, { "epoch": 22.75179856115108, "grad_norm": 0.05066821351647377, "learning_rate": 4.731060606060606e-06, "loss": 0.0018, "step": 6325 }, { "epoch": 22.841726618705035, "grad_norm": 0.33751770853996277, "learning_rate": 4.72979797979798e-06, "loss": 0.0028, "step": 6350 }, { "epoch": 22.931654676258994, "grad_norm": 0.03158155083656311, "learning_rate": 4.728535353535354e-06, "loss": 0.0013, "step": 6375 }, { "epoch": 23.02158273381295, "grad_norm": 0.05814801901578903, "learning_rate": 4.727272727272728e-06, "loss": 0.0021, "step": 6400 }, { "epoch": 23.111510791366907, "grad_norm": 0.031183883547782898, "learning_rate": 4.726010101010101e-06, "loss": 0.0011, "step": 6425 }, { "epoch": 23.201438848920862, "grad_norm": 0.539813756942749, "learning_rate": 4.724747474747475e-06, "loss": 0.0009, "step": 6450 }, { "epoch": 23.29136690647482, "grad_norm": 0.14558178186416626, "learning_rate": 4.723484848484849e-06, "loss": 0.0018, "step": 6475 }, { "epoch": 23.381294964028775, "grad_norm": 0.10804769396781921, "learning_rate": 4.722222222222222e-06, "loss": 0.0013, "step": 6500 }, { "epoch": 23.471223021582734, "grad_norm": 0.3211396038532257, "learning_rate": 4.720959595959596e-06, "loss": 0.0015, "step": 6525 }, { "epoch": 23.56115107913669, "grad_norm": 0.16721013188362122, "learning_rate": 4.71969696969697e-06, "loss": 0.0027, "step": 6550 }, { "epoch": 23.651079136690647, "grad_norm": 0.3473891019821167, "learning_rate": 4.7184343434343434e-06, "loss": 0.0014, "step": 6575 }, { "epoch": 23.741007194244606, "grad_norm": 0.04464249685406685, "learning_rate": 4.717171717171717e-06, "loss": 0.0013, "step": 6600 }, { "epoch": 23.83093525179856, "grad_norm": 0.21577273309230804, "learning_rate": 4.715909090909091e-06, "loss": 0.0025, "step": 6625 }, { "epoch": 23.92086330935252, "grad_norm": 1.0553650856018066, "learning_rate": 4.714646464646465e-06, "loss": 0.0012, "step": 6650 }, { "epoch": 24.010791366906474, "grad_norm": 0.015737203881144524, "learning_rate": 4.7133838383838386e-06, "loss": 0.0018, "step": 6675 }, { "epoch": 24.100719424460433, "grad_norm": 0.08808793127536774, "learning_rate": 4.7121212121212126e-06, "loss": 0.0008, "step": 6700 }, { "epoch": 24.190647482014388, "grad_norm": 0.01893734373152256, "learning_rate": 4.7108585858585866e-06, "loss": 0.0008, "step": 6725 }, { "epoch": 24.280575539568346, "grad_norm": 0.032726775854825974, "learning_rate": 4.70959595959596e-06, "loss": 0.0011, "step": 6750 }, { "epoch": 24.3705035971223, "grad_norm": 1.2210007905960083, "learning_rate": 4.708333333333334e-06, "loss": 0.0014, "step": 6775 }, { "epoch": 24.46043165467626, "grad_norm": 0.21317902207374573, "learning_rate": 4.707070707070707e-06, "loss": 0.0008, "step": 6800 }, { "epoch": 24.550359712230215, "grad_norm": 0.02254541404545307, "learning_rate": 4.705808080808082e-06, "loss": 0.0008, "step": 6825 }, { "epoch": 24.640287769784173, "grad_norm": 0.19283901154994965, "learning_rate": 4.704545454545455e-06, "loss": 0.0006, "step": 6850 }, { "epoch": 24.730215827338128, "grad_norm": 0.1615646928548813, "learning_rate": 4.703282828282829e-06, "loss": 0.0011, "step": 6875 }, { "epoch": 24.820143884892087, "grad_norm": 0.04525255784392357, "learning_rate": 4.702020202020202e-06, "loss": 0.0006, "step": 6900 }, { "epoch": 24.91007194244604, "grad_norm": 0.17892493307590485, "learning_rate": 4.700757575757576e-06, "loss": 0.0011, "step": 6925 }, { "epoch": 25.0, "grad_norm": 1.5881894826889038, "learning_rate": 4.69949494949495e-06, "loss": 0.0009, "step": 6950 }, { "epoch": 25.08992805755396, "grad_norm": 0.028072576969861984, "learning_rate": 4.698232323232323e-06, "loss": 0.001, "step": 6975 }, { "epoch": 25.179856115107913, "grad_norm": 0.034753262996673584, "learning_rate": 4.696969696969698e-06, "loss": 0.001, "step": 7000 }, { "epoch": 25.179856115107913, "eval_loss": 0.08996064960956573, "eval_runtime": 1372.6865, "eval_samples_per_second": 1.619, "eval_steps_per_second": 0.101, "eval_wer": 6.591127897504258, "step": 7000 }, { "epoch": 25.269784172661872, "grad_norm": 0.047846052795648575, "learning_rate": 4.695707070707071e-06, "loss": 0.0017, "step": 7025 }, { "epoch": 25.359712230215827, "grad_norm": 0.08721514046192169, "learning_rate": 4.694444444444445e-06, "loss": 0.0012, "step": 7050 }, { "epoch": 25.449640287769785, "grad_norm": 0.488505095243454, "learning_rate": 4.693181818181818e-06, "loss": 0.001, "step": 7075 }, { "epoch": 25.53956834532374, "grad_norm": 0.3541705012321472, "learning_rate": 4.691919191919192e-06, "loss": 0.0009, "step": 7100 }, { "epoch": 25.6294964028777, "grad_norm": 1.2867228984832764, "learning_rate": 4.690656565656566e-06, "loss": 0.0009, "step": 7125 }, { "epoch": 25.719424460431654, "grad_norm": 0.06602492183446884, "learning_rate": 4.6893939393939394e-06, "loss": 0.001, "step": 7150 }, { "epoch": 25.809352517985612, "grad_norm": 0.03555336222052574, "learning_rate": 4.6881313131313134e-06, "loss": 0.0016, "step": 7175 }, { "epoch": 25.899280575539567, "grad_norm": 0.1011524349451065, "learning_rate": 4.6868686868686874e-06, "loss": 0.0028, "step": 7200 }, { "epoch": 25.989208633093526, "grad_norm": 0.14894358813762665, "learning_rate": 4.6856060606060614e-06, "loss": 0.0026, "step": 7225 }, { "epoch": 26.07913669064748, "grad_norm": 0.944786787033081, "learning_rate": 4.684343434343435e-06, "loss": 0.0014, "step": 7250 }, { "epoch": 26.16906474820144, "grad_norm": 0.4678920805454254, "learning_rate": 4.683080808080809e-06, "loss": 0.0016, "step": 7275 }, { "epoch": 26.258992805755394, "grad_norm": 0.0241763386875391, "learning_rate": 4.681818181818183e-06, "loss": 0.0018, "step": 7300 }, { "epoch": 26.348920863309353, "grad_norm": 0.1959693878889084, "learning_rate": 4.680555555555556e-06, "loss": 0.0014, "step": 7325 }, { "epoch": 26.43884892086331, "grad_norm": 0.05353585258126259, "learning_rate": 4.67929292929293e-06, "loss": 0.001, "step": 7350 }, { "epoch": 26.528776978417266, "grad_norm": 0.022708551958203316, "learning_rate": 4.678030303030303e-06, "loss": 0.0008, "step": 7375 }, { "epoch": 26.618705035971225, "grad_norm": 0.28148502111434937, "learning_rate": 4.676767676767677e-06, "loss": 0.0012, "step": 7400 }, { "epoch": 26.70863309352518, "grad_norm": 0.0556604228913784, "learning_rate": 4.675505050505051e-06, "loss": 0.0018, "step": 7425 }, { "epoch": 26.798561151079138, "grad_norm": 0.03789166733622551, "learning_rate": 4.674242424242425e-06, "loss": 0.0008, "step": 7450 }, { "epoch": 26.888489208633093, "grad_norm": 0.18029791116714478, "learning_rate": 4.672979797979799e-06, "loss": 0.001, "step": 7475 }, { "epoch": 26.97841726618705, "grad_norm": 0.27599871158599854, "learning_rate": 4.671717171717172e-06, "loss": 0.0008, "step": 7500 }, { "epoch": 27.068345323741006, "grad_norm": 0.4067777693271637, "learning_rate": 4.670454545454546e-06, "loss": 0.0017, "step": 7525 }, { "epoch": 27.158273381294965, "grad_norm": 0.36876606941223145, "learning_rate": 4.669191919191919e-06, "loss": 0.0011, "step": 7550 }, { "epoch": 27.24820143884892, "grad_norm": 0.2605381906032562, "learning_rate": 4.667929292929293e-06, "loss": 0.0014, "step": 7575 }, { "epoch": 27.33812949640288, "grad_norm": 0.02853270247578621, "learning_rate": 4.666666666666667e-06, "loss": 0.0008, "step": 7600 }, { "epoch": 27.428057553956833, "grad_norm": 0.055020011961460114, "learning_rate": 4.66540404040404e-06, "loss": 0.0009, "step": 7625 }, { "epoch": 27.51798561151079, "grad_norm": 0.30874237418174744, "learning_rate": 4.664141414141414e-06, "loss": 0.0018, "step": 7650 }, { "epoch": 27.607913669064747, "grad_norm": 0.09795974940061569, "learning_rate": 4.662878787878788e-06, "loss": 0.0014, "step": 7675 }, { "epoch": 27.697841726618705, "grad_norm": 0.04705384001135826, "learning_rate": 4.661616161616162e-06, "loss": 0.0015, "step": 7700 }, { "epoch": 27.78776978417266, "grad_norm": 0.058379877358675, "learning_rate": 4.6603535353535355e-06, "loss": 0.0008, "step": 7725 }, { "epoch": 27.87769784172662, "grad_norm": 0.047014497220516205, "learning_rate": 4.6590909090909095e-06, "loss": 0.0016, "step": 7750 }, { "epoch": 27.967625899280577, "grad_norm": 0.6353835463523865, "learning_rate": 4.6578282828282835e-06, "loss": 0.0012, "step": 7775 }, { "epoch": 28.057553956834532, "grad_norm": 0.13249577581882477, "learning_rate": 4.656565656565657e-06, "loss": 0.0007, "step": 7800 }, { "epoch": 28.14748201438849, "grad_norm": 0.16413046419620514, "learning_rate": 4.655303030303031e-06, "loss": 0.0009, "step": 7825 }, { "epoch": 28.237410071942445, "grad_norm": 0.21356362104415894, "learning_rate": 4.654040404040405e-06, "loss": 0.0007, "step": 7850 }, { "epoch": 28.327338129496404, "grad_norm": 0.0190277099609375, "learning_rate": 4.652777777777779e-06, "loss": 0.0007, "step": 7875 }, { "epoch": 28.41726618705036, "grad_norm": 0.12108524143695831, "learning_rate": 4.651515151515152e-06, "loss": 0.0009, "step": 7900 }, { "epoch": 28.507194244604317, "grad_norm": 0.026057908311486244, "learning_rate": 4.650252525252526e-06, "loss": 0.0007, "step": 7925 }, { "epoch": 28.597122302158272, "grad_norm": 0.09515079110860825, "learning_rate": 4.6489898989899e-06, "loss": 0.0008, "step": 7950 }, { "epoch": 28.68705035971223, "grad_norm": 0.48142778873443604, "learning_rate": 4.647727272727273e-06, "loss": 0.0007, "step": 7975 }, { "epoch": 28.776978417266186, "grad_norm": 0.46795013546943665, "learning_rate": 4.646464646464647e-06, "loss": 0.0014, "step": 8000 }, { "epoch": 28.776978417266186, "eval_loss": 0.09178629517555237, "eval_runtime": 1347.1747, "eval_samples_per_second": 1.649, "eval_steps_per_second": 0.103, "eval_wer": 7.139154262015849, "step": 8000 }, { "epoch": 28.866906474820144, "grad_norm": 0.5243809223175049, "learning_rate": 4.64520202020202e-06, "loss": 0.0007, "step": 8025 }, { "epoch": 28.9568345323741, "grad_norm": 0.3461306095123291, "learning_rate": 4.643939393939395e-06, "loss": 0.001, "step": 8050 }, { "epoch": 29.046762589928058, "grad_norm": 0.2795426845550537, "learning_rate": 4.642676767676768e-06, "loss": 0.0014, "step": 8075 }, { "epoch": 29.136690647482013, "grad_norm": 0.05419691279530525, "learning_rate": 4.641414141414142e-06, "loss": 0.0014, "step": 8100 }, { "epoch": 29.22661870503597, "grad_norm": 0.08857329189777374, "learning_rate": 4.640151515151515e-06, "loss": 0.0016, "step": 8125 }, { "epoch": 29.31654676258993, "grad_norm": 0.05129173770546913, "learning_rate": 4.638888888888889e-06, "loss": 0.0011, "step": 8150 }, { "epoch": 29.406474820143885, "grad_norm": 1.0032382011413574, "learning_rate": 4.637626262626263e-06, "loss": 0.0023, "step": 8175 }, { "epoch": 29.496402877697843, "grad_norm": 0.4335207939147949, "learning_rate": 4.636363636363636e-06, "loss": 0.0028, "step": 8200 }, { "epoch": 29.586330935251798, "grad_norm": 0.15561847388744354, "learning_rate": 4.63510101010101e-06, "loss": 0.0028, "step": 8225 }, { "epoch": 29.676258992805757, "grad_norm": 0.24305035173892975, "learning_rate": 4.633838383838384e-06, "loss": 0.0024, "step": 8250 }, { "epoch": 29.76618705035971, "grad_norm": 1.3689900636672974, "learning_rate": 4.632575757575758e-06, "loss": 0.0036, "step": 8275 }, { "epoch": 29.85611510791367, "grad_norm": 0.6511125564575195, "learning_rate": 4.6313131313131315e-06, "loss": 0.0025, "step": 8300 }, { "epoch": 29.946043165467625, "grad_norm": 0.8534782528877258, "learning_rate": 4.6300505050505055e-06, "loss": 0.0029, "step": 8325 }, { "epoch": 30.035971223021583, "grad_norm": 0.3412608504295349, "learning_rate": 4.6287878787878795e-06, "loss": 0.0028, "step": 8350 }, { "epoch": 30.12589928057554, "grad_norm": 0.16232311725616455, "learning_rate": 4.627525252525253e-06, "loss": 0.0023, "step": 8375 }, { "epoch": 30.215827338129497, "grad_norm": 0.08357956260442734, "learning_rate": 4.626262626262627e-06, "loss": 0.0019, "step": 8400 }, { "epoch": 30.305755395683452, "grad_norm": 0.412728488445282, "learning_rate": 4.625000000000001e-06, "loss": 0.0015, "step": 8425 }, { "epoch": 30.39568345323741, "grad_norm": 0.9784059524536133, "learning_rate": 4.623737373737375e-06, "loss": 0.0025, "step": 8450 }, { "epoch": 30.485611510791365, "grad_norm": 0.38275232911109924, "learning_rate": 4.622474747474748e-06, "loss": 0.0016, "step": 8475 }, { "epoch": 30.575539568345324, "grad_norm": 0.3518912196159363, "learning_rate": 4.621212121212122e-06, "loss": 0.0024, "step": 8500 }, { "epoch": 30.665467625899282, "grad_norm": 0.8633609414100647, "learning_rate": 4.619949494949496e-06, "loss": 0.0022, "step": 8525 }, { "epoch": 30.755395683453237, "grad_norm": 0.23257087171077728, "learning_rate": 4.618686868686869e-06, "loss": 0.0016, "step": 8550 }, { "epoch": 30.845323741007196, "grad_norm": 1.2157853841781616, "learning_rate": 4.617424242424243e-06, "loss": 0.0013, "step": 8575 }, { "epoch": 30.93525179856115, "grad_norm": 0.6692176461219788, "learning_rate": 4.616161616161616e-06, "loss": 0.0025, "step": 8600 }, { "epoch": 31.02517985611511, "grad_norm": 0.08320923149585724, "learning_rate": 4.61489898989899e-06, "loss": 0.0015, "step": 8625 }, { "epoch": 31.115107913669064, "grad_norm": 0.03867033123970032, "learning_rate": 4.613636363636364e-06, "loss": 0.0011, "step": 8650 }, { "epoch": 31.205035971223023, "grad_norm": 0.37571918964385986, "learning_rate": 4.612373737373737e-06, "loss": 0.002, "step": 8675 }, { "epoch": 31.294964028776977, "grad_norm": 0.023200325667858124, "learning_rate": 4.611111111111112e-06, "loss": 0.0017, "step": 8700 }, { "epoch": 31.384892086330936, "grad_norm": 0.025962859392166138, "learning_rate": 4.609848484848485e-06, "loss": 0.0025, "step": 8725 }, { "epoch": 31.47482014388489, "grad_norm": 0.07832462340593338, "learning_rate": 4.608585858585859e-06, "loss": 0.002, "step": 8750 }, { "epoch": 31.56474820143885, "grad_norm": 0.5365622043609619, "learning_rate": 4.607323232323232e-06, "loss": 0.0019, "step": 8775 }, { "epoch": 31.654676258992804, "grad_norm": 0.042796701192855835, "learning_rate": 4.606060606060606e-06, "loss": 0.0012, "step": 8800 }, { "epoch": 31.744604316546763, "grad_norm": 0.2298709750175476, "learning_rate": 4.60479797979798e-06, "loss": 0.0015, "step": 8825 }, { "epoch": 31.834532374100718, "grad_norm": 0.7432539463043213, "learning_rate": 4.6035353535353535e-06, "loss": 0.002, "step": 8850 }, { "epoch": 31.924460431654676, "grad_norm": 0.05896187201142311, "learning_rate": 4.6022727272727275e-06, "loss": 0.0017, "step": 8875 }, { "epoch": 32.014388489208635, "grad_norm": 0.6994006633758545, "learning_rate": 4.6010101010101015e-06, "loss": 0.0019, "step": 8900 }, { "epoch": 32.10431654676259, "grad_norm": 0.6547738909721375, "learning_rate": 4.5997474747474755e-06, "loss": 0.0016, "step": 8925 }, { "epoch": 32.194244604316545, "grad_norm": 0.13888348639011383, "learning_rate": 4.598484848484849e-06, "loss": 0.0014, "step": 8950 }, { "epoch": 32.28417266187051, "grad_norm": 0.09715843945741653, "learning_rate": 4.597222222222223e-06, "loss": 0.001, "step": 8975 }, { "epoch": 32.37410071942446, "grad_norm": 0.05904947221279144, "learning_rate": 4.595959595959597e-06, "loss": 0.0014, "step": 9000 }, { "epoch": 32.37410071942446, "eval_loss": 0.08943528681993484, "eval_runtime": 1353.2653, "eval_samples_per_second": 1.642, "eval_steps_per_second": 0.103, "eval_wer": 6.739243131156039, "step": 9000 }, { "epoch": 32.46402877697842, "grad_norm": 1.000013828277588, "learning_rate": 4.59469696969697e-06, "loss": 0.0012, "step": 9025 }, { "epoch": 32.55395683453237, "grad_norm": 0.031857941299676895, "learning_rate": 4.593434343434344e-06, "loss": 0.0011, "step": 9050 }, { "epoch": 32.643884892086334, "grad_norm": 0.18854251503944397, "learning_rate": 4.592171717171717e-06, "loss": 0.0011, "step": 9075 }, { "epoch": 32.73381294964029, "grad_norm": 0.06311248987913132, "learning_rate": 4.590909090909092e-06, "loss": 0.0009, "step": 9100 }, { "epoch": 32.82374100719424, "grad_norm": 0.02462015673518181, "learning_rate": 4.589646464646465e-06, "loss": 0.0023, "step": 9125 }, { "epoch": 32.9136690647482, "grad_norm": 0.5756279826164246, "learning_rate": 4.588383838383839e-06, "loss": 0.0014, "step": 9150 }, { "epoch": 33.00359712230216, "grad_norm": 0.39254868030548096, "learning_rate": 4.587121212121213e-06, "loss": 0.0012, "step": 9175 }, { "epoch": 33.093525179856115, "grad_norm": 0.05750317871570587, "learning_rate": 4.585858585858586e-06, "loss": 0.0016, "step": 9200 }, { "epoch": 33.18345323741007, "grad_norm": 0.456665962934494, "learning_rate": 4.58459595959596e-06, "loss": 0.0005, "step": 9225 }, { "epoch": 33.273381294964025, "grad_norm": 0.05247064307332039, "learning_rate": 4.583333333333333e-06, "loss": 0.0007, "step": 9250 }, { "epoch": 33.36330935251799, "grad_norm": 0.1745249629020691, "learning_rate": 4.582070707070708e-06, "loss": 0.0011, "step": 9275 }, { "epoch": 33.45323741007194, "grad_norm": 0.1702817678451538, "learning_rate": 4.580808080808081e-06, "loss": 0.0011, "step": 9300 }, { "epoch": 33.5431654676259, "grad_norm": 0.5600733757019043, "learning_rate": 4.579545454545455e-06, "loss": 0.0017, "step": 9325 }, { "epoch": 33.63309352517986, "grad_norm": 0.042534805834293365, "learning_rate": 4.578282828282828e-06, "loss": 0.002, "step": 9350 }, { "epoch": 33.723021582733814, "grad_norm": 0.025305964052677155, "learning_rate": 4.577020202020202e-06, "loss": 0.0014, "step": 9375 }, { "epoch": 33.81294964028777, "grad_norm": 0.05213531106710434, "learning_rate": 4.575757575757576e-06, "loss": 0.001, "step": 9400 }, { "epoch": 33.902877697841724, "grad_norm": 0.02446218766272068, "learning_rate": 4.5744949494949495e-06, "loss": 0.0006, "step": 9425 }, { "epoch": 33.992805755395686, "grad_norm": 0.009959193877875805, "learning_rate": 4.5732323232323235e-06, "loss": 0.0009, "step": 9450 }, { "epoch": 34.08273381294964, "grad_norm": 0.4287709891796112, "learning_rate": 4.5719696969696975e-06, "loss": 0.0007, "step": 9475 }, { "epoch": 34.172661870503596, "grad_norm": 0.011952442117035389, "learning_rate": 4.5707070707070715e-06, "loss": 0.0004, "step": 9500 }, { "epoch": 34.26258992805755, "grad_norm": 0.1948029100894928, "learning_rate": 4.569444444444445e-06, "loss": 0.0007, "step": 9525 }, { "epoch": 34.35251798561151, "grad_norm": 0.03538801521062851, "learning_rate": 4.568181818181819e-06, "loss": 0.0007, "step": 9550 }, { "epoch": 34.44244604316547, "grad_norm": 0.03204001113772392, "learning_rate": 4.566919191919193e-06, "loss": 0.0006, "step": 9575 }, { "epoch": 34.53237410071942, "grad_norm": 0.12747210264205933, "learning_rate": 4.565656565656566e-06, "loss": 0.0008, "step": 9600 }, { "epoch": 34.62230215827338, "grad_norm": 0.009002352133393288, "learning_rate": 4.56439393939394e-06, "loss": 0.0003, "step": 9625 }, { "epoch": 34.71223021582734, "grad_norm": 0.057965803891420364, "learning_rate": 4.563131313131314e-06, "loss": 0.0009, "step": 9650 }, { "epoch": 34.802158273381295, "grad_norm": 0.07385562360286713, "learning_rate": 4.561868686868687e-06, "loss": 0.0007, "step": 9675 }, { "epoch": 34.89208633093525, "grad_norm": 0.010685013607144356, "learning_rate": 4.560606060606061e-06, "loss": 0.0008, "step": 9700 }, { "epoch": 34.98201438848921, "grad_norm": 0.038797181099653244, "learning_rate": 4.559343434343435e-06, "loss": 0.0003, "step": 9725 }, { "epoch": 35.07194244604317, "grad_norm": 0.016779489815235138, "learning_rate": 4.558080808080809e-06, "loss": 0.0011, "step": 9750 }, { "epoch": 35.16187050359712, "grad_norm": 0.01562959887087345, "learning_rate": 4.556818181818182e-06, "loss": 0.0007, "step": 9775 }, { "epoch": 35.25179856115108, "grad_norm": 0.025731824338436127, "learning_rate": 4.555555555555556e-06, "loss": 0.0005, "step": 9800 }, { "epoch": 35.34172661870504, "grad_norm": 0.0950327217578888, "learning_rate": 4.554292929292929e-06, "loss": 0.0014, "step": 9825 }, { "epoch": 35.431654676258994, "grad_norm": 0.015813730657100677, "learning_rate": 4.553030303030303e-06, "loss": 0.0011, "step": 9850 }, { "epoch": 35.52158273381295, "grad_norm": 0.07395196706056595, "learning_rate": 4.551767676767677e-06, "loss": 0.0006, "step": 9875 }, { "epoch": 35.611510791366904, "grad_norm": 0.3815157413482666, "learning_rate": 4.55050505050505e-06, "loss": 0.001, "step": 9900 }, { "epoch": 35.701438848920866, "grad_norm": 0.028180675581097603, "learning_rate": 4.549242424242424e-06, "loss": 0.0007, "step": 9925 }, { "epoch": 35.79136690647482, "grad_norm": 0.022708212956786156, "learning_rate": 4.547979797979798e-06, "loss": 0.0007, "step": 9950 }, { "epoch": 35.881294964028775, "grad_norm": 0.37191152572631836, "learning_rate": 4.546717171717172e-06, "loss": 0.0006, "step": 9975 }, { "epoch": 35.97122302158273, "grad_norm": 0.045804716646671295, "learning_rate": 4.5454545454545455e-06, "loss": 0.0005, "step": 10000 }, { "epoch": 35.97122302158273, "eval_loss": 0.08962783217430115, "eval_runtime": 1349.7416, "eval_samples_per_second": 1.646, "eval_steps_per_second": 0.103, "eval_wer": 6.25786862178775, "step": 10000 }, { "epoch": 36.06115107913669, "grad_norm": 0.016676392406225204, "learning_rate": 4.5441919191919195e-06, "loss": 0.0003, "step": 10025 }, { "epoch": 36.15107913669065, "grad_norm": 0.15673214197158813, "learning_rate": 4.5429292929292935e-06, "loss": 0.0009, "step": 10050 }, { "epoch": 36.2410071942446, "grad_norm": 0.032344311475753784, "learning_rate": 4.541666666666667e-06, "loss": 0.0015, "step": 10075 }, { "epoch": 36.330935251798564, "grad_norm": 0.5042840242385864, "learning_rate": 4.540404040404041e-06, "loss": 0.0014, "step": 10100 }, { "epoch": 36.42086330935252, "grad_norm": 0.02287839725613594, "learning_rate": 4.539141414141415e-06, "loss": 0.0013, "step": 10125 }, { "epoch": 36.510791366906474, "grad_norm": 0.30796897411346436, "learning_rate": 4.537878787878789e-06, "loss": 0.0025, "step": 10150 }, { "epoch": 36.60071942446043, "grad_norm": 0.11940345168113708, "learning_rate": 4.536616161616162e-06, "loss": 0.0009, "step": 10175 }, { "epoch": 36.69064748201439, "grad_norm": 0.12890297174453735, "learning_rate": 4.535353535353536e-06, "loss": 0.001, "step": 10200 }, { "epoch": 36.780575539568346, "grad_norm": 0.016430262476205826, "learning_rate": 4.53409090909091e-06, "loss": 0.0012, "step": 10225 }, { "epoch": 36.8705035971223, "grad_norm": 0.08656007796525955, "learning_rate": 4.532828282828283e-06, "loss": 0.0015, "step": 10250 }, { "epoch": 36.960431654676256, "grad_norm": 0.0869501456618309, "learning_rate": 4.531565656565657e-06, "loss": 0.0018, "step": 10275 }, { "epoch": 37.05035971223022, "grad_norm": 0.4101605713367462, "learning_rate": 4.53030303030303e-06, "loss": 0.0015, "step": 10300 }, { "epoch": 37.14028776978417, "grad_norm": 0.0797925516963005, "learning_rate": 4.529040404040405e-06, "loss": 0.0007, "step": 10325 }, { "epoch": 37.23021582733813, "grad_norm": 0.025322135537862778, "learning_rate": 4.527777777777778e-06, "loss": 0.0006, "step": 10350 }, { "epoch": 37.32014388489208, "grad_norm": 0.059909917414188385, "learning_rate": 4.526515151515152e-06, "loss": 0.0012, "step": 10375 }, { "epoch": 37.410071942446045, "grad_norm": 0.062007270753383636, "learning_rate": 4.525252525252526e-06, "loss": 0.0012, "step": 10400 }, { "epoch": 37.5, "grad_norm": 0.35286614298820496, "learning_rate": 4.523989898989899e-06, "loss": 0.0016, "step": 10425 }, { "epoch": 37.589928057553955, "grad_norm": 0.1300862431526184, "learning_rate": 4.522727272727273e-06, "loss": 0.0006, "step": 10450 }, { "epoch": 37.67985611510792, "grad_norm": 0.13838863372802734, "learning_rate": 4.521464646464646e-06, "loss": 0.0006, "step": 10475 }, { "epoch": 37.76978417266187, "grad_norm": 0.6767460703849792, "learning_rate": 4.520202020202021e-06, "loss": 0.0006, "step": 10500 }, { "epoch": 37.85971223021583, "grad_norm": 0.03494667634367943, "learning_rate": 4.518939393939394e-06, "loss": 0.0013, "step": 10525 }, { "epoch": 37.94964028776978, "grad_norm": 0.14763426780700684, "learning_rate": 4.517676767676768e-06, "loss": 0.0022, "step": 10550 }, { "epoch": 38.039568345323744, "grad_norm": 0.15873517096042633, "learning_rate": 4.5164141414141415e-06, "loss": 0.0019, "step": 10575 }, { "epoch": 38.1294964028777, "grad_norm": 0.048420246690511703, "learning_rate": 4.5151515151515155e-06, "loss": 0.001, "step": 10600 }, { "epoch": 38.219424460431654, "grad_norm": 0.038138266652822495, "learning_rate": 4.5138888888888895e-06, "loss": 0.0004, "step": 10625 }, { "epoch": 38.30935251798561, "grad_norm": 0.024455932900309563, "learning_rate": 4.512626262626263e-06, "loss": 0.0007, "step": 10650 }, { "epoch": 38.39928057553957, "grad_norm": 0.29704517126083374, "learning_rate": 4.511363636363637e-06, "loss": 0.0012, "step": 10675 }, { "epoch": 38.489208633093526, "grad_norm": 0.23077060282230377, "learning_rate": 4.510101010101011e-06, "loss": 0.0006, "step": 10700 }, { "epoch": 38.57913669064748, "grad_norm": 0.04493401572108269, "learning_rate": 4.508838383838384e-06, "loss": 0.0007, "step": 10725 }, { "epoch": 38.669064748201436, "grad_norm": 0.01225815899670124, "learning_rate": 4.507575757575758e-06, "loss": 0.0004, "step": 10750 }, { "epoch": 38.7589928057554, "grad_norm": 0.19539327919483185, "learning_rate": 4.506313131313132e-06, "loss": 0.0012, "step": 10775 }, { "epoch": 38.84892086330935, "grad_norm": 0.4501245319843292, "learning_rate": 4.505050505050506e-06, "loss": 0.0016, "step": 10800 }, { "epoch": 38.93884892086331, "grad_norm": 0.955757200717926, "learning_rate": 4.503787878787879e-06, "loss": 0.002, "step": 10825 }, { "epoch": 39.02877697841727, "grad_norm": 0.4927741587162018, "learning_rate": 4.502525252525253e-06, "loss": 0.0009, "step": 10850 }, { "epoch": 39.118705035971225, "grad_norm": 0.5250554084777832, "learning_rate": 4.501262626262627e-06, "loss": 0.0018, "step": 10875 }, { "epoch": 39.20863309352518, "grad_norm": 0.5786688327789307, "learning_rate": 4.5e-06, "loss": 0.0013, "step": 10900 }, { "epoch": 39.298561151079134, "grad_norm": 0.015845810994505882, "learning_rate": 4.498737373737374e-06, "loss": 0.0009, "step": 10925 }, { "epoch": 39.388489208633096, "grad_norm": 0.01820209249854088, "learning_rate": 4.497474747474747e-06, "loss": 0.001, "step": 10950 }, { "epoch": 39.47841726618705, "grad_norm": 0.026294970884919167, "learning_rate": 4.496212121212122e-06, "loss": 0.0018, "step": 10975 }, { "epoch": 39.568345323741006, "grad_norm": 0.4651360511779785, "learning_rate": 4.494949494949495e-06, "loss": 0.0016, "step": 11000 }, { "epoch": 39.568345323741006, "eval_loss": 0.09019309282302856, "eval_runtime": 1345.7556, "eval_samples_per_second": 1.651, "eval_steps_per_second": 0.103, "eval_wer": 6.331926238613642, "step": 11000 }, { "epoch": 39.65827338129496, "grad_norm": 0.29995694756507874, "learning_rate": 4.493686868686869e-06, "loss": 0.0015, "step": 11025 }, { "epoch": 39.74820143884892, "grad_norm": 0.3291122019290924, "learning_rate": 4.492424242424242e-06, "loss": 0.0015, "step": 11050 }, { "epoch": 39.83812949640288, "grad_norm": 0.1785033792257309, "learning_rate": 4.491161616161616e-06, "loss": 0.0006, "step": 11075 }, { "epoch": 39.92805755395683, "grad_norm": 0.020028244704008102, "learning_rate": 4.48989898989899e-06, "loss": 0.002, "step": 11100 }, { "epoch": 40.01798561151079, "grad_norm": 0.08107150346040726, "learning_rate": 4.4886363636363636e-06, "loss": 0.0014, "step": 11125 }, { "epoch": 40.10791366906475, "grad_norm": 0.012092849239706993, "learning_rate": 4.4873737373737375e-06, "loss": 0.0018, "step": 11150 }, { "epoch": 40.197841726618705, "grad_norm": 0.163823664188385, "learning_rate": 4.4861111111111115e-06, "loss": 0.0014, "step": 11175 }, { "epoch": 40.28776978417266, "grad_norm": 0.07797440141439438, "learning_rate": 4.4848484848484855e-06, "loss": 0.0022, "step": 11200 }, { "epoch": 40.37769784172662, "grad_norm": 0.07735186815261841, "learning_rate": 4.483585858585859e-06, "loss": 0.0018, "step": 11225 }, { "epoch": 40.46762589928058, "grad_norm": 0.3801431953907013, "learning_rate": 4.482323232323233e-06, "loss": 0.0013, "step": 11250 }, { "epoch": 40.55755395683453, "grad_norm": 0.02574390545487404, "learning_rate": 4.481060606060607e-06, "loss": 0.0008, "step": 11275 }, { "epoch": 40.64748201438849, "grad_norm": 0.06015799939632416, "learning_rate": 4.47979797979798e-06, "loss": 0.0007, "step": 11300 }, { "epoch": 40.73741007194245, "grad_norm": 0.011081011034548283, "learning_rate": 4.478535353535354e-06, "loss": 0.0009, "step": 11325 }, { "epoch": 40.827338129496404, "grad_norm": 0.14023222029209137, "learning_rate": 4.477272727272728e-06, "loss": 0.0009, "step": 11350 }, { "epoch": 40.91726618705036, "grad_norm": 1.1734967231750488, "learning_rate": 4.476010101010102e-06, "loss": 0.0034, "step": 11375 }, { "epoch": 41.007194244604314, "grad_norm": 0.018789170309901237, "learning_rate": 4.474747474747475e-06, "loss": 0.0012, "step": 11400 }, { "epoch": 41.097122302158276, "grad_norm": 0.5469329953193665, "learning_rate": 4.473484848484849e-06, "loss": 0.0012, "step": 11425 }, { "epoch": 41.18705035971223, "grad_norm": 1.0320335626602173, "learning_rate": 4.472222222222223e-06, "loss": 0.0022, "step": 11450 }, { "epoch": 41.276978417266186, "grad_norm": 0.13018514215946198, "learning_rate": 4.470959595959596e-06, "loss": 0.001, "step": 11475 }, { "epoch": 41.36690647482014, "grad_norm": 0.764275848865509, "learning_rate": 4.46969696969697e-06, "loss": 0.0017, "step": 11500 }, { "epoch": 41.4568345323741, "grad_norm": 0.037678878754377365, "learning_rate": 4.468434343434343e-06, "loss": 0.0012, "step": 11525 }, { "epoch": 41.54676258992806, "grad_norm": 0.0776861384510994, "learning_rate": 4.467171717171718e-06, "loss": 0.0012, "step": 11550 }, { "epoch": 41.63669064748201, "grad_norm": 0.1435922086238861, "learning_rate": 4.465909090909091e-06, "loss": 0.0014, "step": 11575 }, { "epoch": 41.726618705035975, "grad_norm": 0.2661900520324707, "learning_rate": 4.464646464646465e-06, "loss": 0.0014, "step": 11600 }, { "epoch": 41.81654676258993, "grad_norm": 0.014804186299443245, "learning_rate": 4.463383838383838e-06, "loss": 0.0013, "step": 11625 }, { "epoch": 41.906474820143885, "grad_norm": 0.5918655395507812, "learning_rate": 4.462121212121212e-06, "loss": 0.001, "step": 11650 }, { "epoch": 41.99640287769784, "grad_norm": 0.2970104217529297, "learning_rate": 4.460858585858586e-06, "loss": 0.0014, "step": 11675 }, { "epoch": 42.0863309352518, "grad_norm": 0.24786308407783508, "learning_rate": 4.4595959595959596e-06, "loss": 0.0005, "step": 11700 }, { "epoch": 42.17625899280576, "grad_norm": 0.39591023325920105, "learning_rate": 4.4583333333333336e-06, "loss": 0.0012, "step": 11725 }, { "epoch": 42.26618705035971, "grad_norm": 0.014619703404605389, "learning_rate": 4.4570707070707076e-06, "loss": 0.0009, "step": 11750 }, { "epoch": 42.356115107913666, "grad_norm": 0.014031196013092995, "learning_rate": 4.4558080808080816e-06, "loss": 0.0005, "step": 11775 }, { "epoch": 42.44604316546763, "grad_norm": 0.0157134011387825, "learning_rate": 4.454545454545455e-06, "loss": 0.0005, "step": 11800 }, { "epoch": 42.53597122302158, "grad_norm": 0.5443057417869568, "learning_rate": 4.453282828282829e-06, "loss": 0.0005, "step": 11825 }, { "epoch": 42.62589928057554, "grad_norm": 0.17728668451309204, "learning_rate": 4.452020202020203e-06, "loss": 0.001, "step": 11850 }, { "epoch": 42.71582733812949, "grad_norm": 0.06720776110887527, "learning_rate": 4.450757575757576e-06, "loss": 0.0008, "step": 11875 }, { "epoch": 42.805755395683455, "grad_norm": 0.020302429795265198, "learning_rate": 4.44949494949495e-06, "loss": 0.0005, "step": 11900 }, { "epoch": 42.89568345323741, "grad_norm": 0.02236667089164257, "learning_rate": 4.448232323232324e-06, "loss": 0.0008, "step": 11925 }, { "epoch": 42.985611510791365, "grad_norm": 0.3039033114910126, "learning_rate": 4.446969696969697e-06, "loss": 0.0007, "step": 11950 }, { "epoch": 43.07553956834533, "grad_norm": 0.019936522468924522, "learning_rate": 4.445707070707071e-06, "loss": 0.0004, "step": 11975 }, { "epoch": 43.16546762589928, "grad_norm": 0.006646598689258099, "learning_rate": 4.444444444444444e-06, "loss": 0.0007, "step": 12000 }, { "epoch": 43.16546762589928, "eval_loss": 0.09005734324455261, "eval_runtime": 1349.9657, "eval_samples_per_second": 1.646, "eval_steps_per_second": 0.103, "eval_wer": 6.2208398133748055, "step": 12000 }, { "epoch": 43.25539568345324, "grad_norm": 0.06663926690816879, "learning_rate": 4.443181818181819e-06, "loss": 0.0003, "step": 12025 }, { "epoch": 43.34532374100719, "grad_norm": 0.7015880346298218, "learning_rate": 4.441919191919192e-06, "loss": 0.0013, "step": 12050 }, { "epoch": 43.435251798561154, "grad_norm": 0.09495950490236282, "learning_rate": 4.440656565656566e-06, "loss": 0.0009, "step": 12075 }, { "epoch": 43.52517985611511, "grad_norm": 0.010513260029256344, "learning_rate": 4.43939393939394e-06, "loss": 0.0007, "step": 12100 }, { "epoch": 43.615107913669064, "grad_norm": 0.08924310654401779, "learning_rate": 4.438131313131313e-06, "loss": 0.0004, "step": 12125 }, { "epoch": 43.70503597122302, "grad_norm": 0.015554459765553474, "learning_rate": 4.436868686868687e-06, "loss": 0.0005, "step": 12150 }, { "epoch": 43.79496402877698, "grad_norm": 0.02140822261571884, "learning_rate": 4.4356060606060604e-06, "loss": 0.0012, "step": 12175 }, { "epoch": 43.884892086330936, "grad_norm": 0.2149767279624939, "learning_rate": 4.434343434343435e-06, "loss": 0.0005, "step": 12200 }, { "epoch": 43.97482014388489, "grad_norm": 0.009459302760660648, "learning_rate": 4.4330808080808084e-06, "loss": 0.0012, "step": 12225 }, { "epoch": 44.064748201438846, "grad_norm": 0.05037049949169159, "learning_rate": 4.4318181818181824e-06, "loss": 0.0004, "step": 12250 }, { "epoch": 44.15467625899281, "grad_norm": 0.006279121618717909, "learning_rate": 4.430555555555556e-06, "loss": 0.0006, "step": 12275 }, { "epoch": 44.24460431654676, "grad_norm": 0.03591470420360565, "learning_rate": 4.42929292929293e-06, "loss": 0.0006, "step": 12300 }, { "epoch": 44.33453237410072, "grad_norm": 0.013430873863399029, "learning_rate": 4.428030303030304e-06, "loss": 0.0015, "step": 12325 }, { "epoch": 44.42446043165468, "grad_norm": 0.01713446155190468, "learning_rate": 4.426767676767677e-06, "loss": 0.0011, "step": 12350 }, { "epoch": 44.514388489208635, "grad_norm": 0.6338793039321899, "learning_rate": 4.425505050505051e-06, "loss": 0.0023, "step": 12375 }, { "epoch": 44.60431654676259, "grad_norm": 0.19725088775157928, "learning_rate": 4.424242424242425e-06, "loss": 0.0015, "step": 12400 }, { "epoch": 44.694244604316545, "grad_norm": 0.034790072590112686, "learning_rate": 4.422979797979799e-06, "loss": 0.0011, "step": 12425 }, { "epoch": 44.78417266187051, "grad_norm": 2.0450031757354736, "learning_rate": 4.421717171717172e-06, "loss": 0.0012, "step": 12450 }, { "epoch": 44.87410071942446, "grad_norm": 0.25726571679115295, "learning_rate": 4.420454545454546e-06, "loss": 0.0008, "step": 12475 }, { "epoch": 44.96402877697842, "grad_norm": 0.14911916851997375, "learning_rate": 4.41919191919192e-06, "loss": 0.002, "step": 12500 }, { "epoch": 45.05395683453237, "grad_norm": 0.5396764278411865, "learning_rate": 4.417929292929293e-06, "loss": 0.0018, "step": 12525 }, { "epoch": 45.143884892086334, "grad_norm": 0.21499969065189362, "learning_rate": 4.416666666666667e-06, "loss": 0.0008, "step": 12550 }, { "epoch": 45.23381294964029, "grad_norm": 0.12975308299064636, "learning_rate": 4.415404040404041e-06, "loss": 0.0011, "step": 12575 }, { "epoch": 45.32374100719424, "grad_norm": 0.03521961346268654, "learning_rate": 4.414141414141415e-06, "loss": 0.0009, "step": 12600 }, { "epoch": 45.4136690647482, "grad_norm": 0.3964645564556122, "learning_rate": 4.412878787878788e-06, "loss": 0.0009, "step": 12625 }, { "epoch": 45.50359712230216, "grad_norm": 0.04135512188076973, "learning_rate": 4.411616161616162e-06, "loss": 0.0007, "step": 12650 }, { "epoch": 45.593525179856115, "grad_norm": 0.11724065244197845, "learning_rate": 4.410353535353536e-06, "loss": 0.0013, "step": 12675 }, { "epoch": 45.68345323741007, "grad_norm": 0.3066418170928955, "learning_rate": 4.409090909090909e-06, "loss": 0.002, "step": 12700 }, { "epoch": 45.773381294964025, "grad_norm": 0.020460475236177444, "learning_rate": 4.407828282828283e-06, "loss": 0.0004, "step": 12725 }, { "epoch": 45.86330935251799, "grad_norm": 0.021625172346830368, "learning_rate": 4.4065656565656565e-06, "loss": 0.0008, "step": 12750 }, { "epoch": 45.95323741007194, "grad_norm": 0.01973818428814411, "learning_rate": 4.4053030303030305e-06, "loss": 0.0005, "step": 12775 }, { "epoch": 46.0431654676259, "grad_norm": 0.3055168092250824, "learning_rate": 4.4040404040404044e-06, "loss": 0.0004, "step": 12800 }, { "epoch": 46.13309352517986, "grad_norm": 0.11869470030069351, "learning_rate": 4.4027777777777784e-06, "loss": 0.0012, "step": 12825 }, { "epoch": 46.223021582733814, "grad_norm": 0.5959618091583252, "learning_rate": 4.401515151515152e-06, "loss": 0.0007, "step": 12850 }, { "epoch": 46.31294964028777, "grad_norm": 0.08037717640399933, "learning_rate": 4.400252525252526e-06, "loss": 0.0006, "step": 12875 }, { "epoch": 46.402877697841724, "grad_norm": 0.017363494262099266, "learning_rate": 4.3989898989899e-06, "loss": 0.0008, "step": 12900 }, { "epoch": 46.492805755395686, "grad_norm": 0.028551748022437096, "learning_rate": 4.397727272727273e-06, "loss": 0.001, "step": 12925 }, { "epoch": 46.58273381294964, "grad_norm": 0.08840727061033249, "learning_rate": 4.396464646464647e-06, "loss": 0.0007, "step": 12950 }, { "epoch": 46.672661870503596, "grad_norm": 0.023021990433335304, "learning_rate": 4.395202020202021e-06, "loss": 0.0018, "step": 12975 }, { "epoch": 46.76258992805755, "grad_norm": 0.05099537596106529, "learning_rate": 4.393939393939394e-06, "loss": 0.001, "step": 13000 }, { "epoch": 46.76258992805755, "eval_loss": 0.08809197694063187, "eval_runtime": 1348.5762, "eval_samples_per_second": 1.648, "eval_steps_per_second": 0.103, "eval_wer": 6.154187958231504, "step": 13000 }, { "epoch": 46.85251798561151, "grad_norm": 0.02734680473804474, "learning_rate": 4.392676767676768e-06, "loss": 0.0006, "step": 13025 }, { "epoch": 46.94244604316547, "grad_norm": 0.012311214581131935, "learning_rate": 4.391414141414142e-06, "loss": 0.0004, "step": 13050 }, { "epoch": 47.03237410071942, "grad_norm": 1.1471985578536987, "learning_rate": 4.390151515151516e-06, "loss": 0.0006, "step": 13075 }, { "epoch": 47.12230215827338, "grad_norm": 0.04378161579370499, "learning_rate": 4.388888888888889e-06, "loss": 0.0009, "step": 13100 }, { "epoch": 47.21223021582734, "grad_norm": 0.014206623658537865, "learning_rate": 4.387626262626263e-06, "loss": 0.0006, "step": 13125 }, { "epoch": 47.302158273381295, "grad_norm": 0.12384720891714096, "learning_rate": 4.386363636363637e-06, "loss": 0.0008, "step": 13150 }, { "epoch": 47.39208633093525, "grad_norm": 0.12384091317653656, "learning_rate": 4.38510101010101e-06, "loss": 0.0006, "step": 13175 }, { "epoch": 47.48201438848921, "grad_norm": 0.05459749698638916, "learning_rate": 4.383838383838384e-06, "loss": 0.0017, "step": 13200 }, { "epoch": 47.57194244604317, "grad_norm": 0.06376705318689346, "learning_rate": 4.382575757575757e-06, "loss": 0.0012, "step": 13225 }, { "epoch": 47.66187050359712, "grad_norm": 0.09516707807779312, "learning_rate": 4.381313131313132e-06, "loss": 0.0005, "step": 13250 }, { "epoch": 47.75179856115108, "grad_norm": 0.035159386694431305, "learning_rate": 4.380050505050505e-06, "loss": 0.0009, "step": 13275 }, { "epoch": 47.84172661870504, "grad_norm": 0.13273297250270844, "learning_rate": 4.378787878787879e-06, "loss": 0.0011, "step": 13300 }, { "epoch": 47.931654676258994, "grad_norm": 0.6526914834976196, "learning_rate": 4.3775252525252525e-06, "loss": 0.0017, "step": 13325 }, { "epoch": 48.02158273381295, "grad_norm": 0.10989696532487869, "learning_rate": 4.3762626262626265e-06, "loss": 0.0013, "step": 13350 }, { "epoch": 48.111510791366904, "grad_norm": 0.12258470058441162, "learning_rate": 4.3750000000000005e-06, "loss": 0.001, "step": 13375 }, { "epoch": 48.201438848920866, "grad_norm": 0.04794065281748772, "learning_rate": 4.373737373737374e-06, "loss": 0.0006, "step": 13400 }, { "epoch": 48.29136690647482, "grad_norm": 0.18742027878761292, "learning_rate": 4.3724747474747485e-06, "loss": 0.001, "step": 13425 }, { "epoch": 48.381294964028775, "grad_norm": 0.047946684062480927, "learning_rate": 4.371212121212122e-06, "loss": 0.0008, "step": 13450 }, { "epoch": 48.47122302158273, "grad_norm": 0.011459482833743095, "learning_rate": 4.369949494949496e-06, "loss": 0.0004, "step": 13475 }, { "epoch": 48.56115107913669, "grad_norm": 0.0178390983492136, "learning_rate": 4.368686868686869e-06, "loss": 0.0005, "step": 13500 }, { "epoch": 48.65107913669065, "grad_norm": 0.02639496698975563, "learning_rate": 4.367424242424243e-06, "loss": 0.0006, "step": 13525 }, { "epoch": 48.7410071942446, "grad_norm": 0.9992175698280334, "learning_rate": 4.366161616161617e-06, "loss": 0.0006, "step": 13550 }, { "epoch": 48.830935251798564, "grad_norm": 0.12613770365715027, "learning_rate": 4.36489898989899e-06, "loss": 0.0003, "step": 13575 }, { "epoch": 48.92086330935252, "grad_norm": 0.008718474768102169, "learning_rate": 4.363636363636364e-06, "loss": 0.0006, "step": 13600 }, { "epoch": 49.010791366906474, "grad_norm": 0.09226574003696442, "learning_rate": 4.362373737373738e-06, "loss": 0.001, "step": 13625 }, { "epoch": 49.10071942446043, "grad_norm": 0.01371210440993309, "learning_rate": 4.361111111111112e-06, "loss": 0.0005, "step": 13650 }, { "epoch": 49.19064748201439, "grad_norm": 0.8040596842765808, "learning_rate": 4.359848484848485e-06, "loss": 0.0014, "step": 13675 }, { "epoch": 49.280575539568346, "grad_norm": 0.2569543123245239, "learning_rate": 4.358585858585859e-06, "loss": 0.0004, "step": 13700 }, { "epoch": 49.3705035971223, "grad_norm": 0.04654459282755852, "learning_rate": 4.357323232323233e-06, "loss": 0.0003, "step": 13725 }, { "epoch": 49.460431654676256, "grad_norm": 0.03116775117814541, "learning_rate": 4.356060606060606e-06, "loss": 0.0006, "step": 13750 }, { "epoch": 49.55035971223022, "grad_norm": 0.013714387081563473, "learning_rate": 4.35479797979798e-06, "loss": 0.0005, "step": 13775 }, { "epoch": 49.64028776978417, "grad_norm": 0.012171006761491299, "learning_rate": 4.353535353535353e-06, "loss": 0.0005, "step": 13800 }, { "epoch": 49.73021582733813, "grad_norm": 0.39719274640083313, "learning_rate": 4.352272727272727e-06, "loss": 0.0002, "step": 13825 }, { "epoch": 49.82014388489208, "grad_norm": 0.009979949332773685, "learning_rate": 4.351010101010101e-06, "loss": 0.0002, "step": 13850 }, { "epoch": 49.910071942446045, "grad_norm": 0.010056397877633572, "learning_rate": 4.349747474747475e-06, "loss": 0.0001, "step": 13875 }, { "epoch": 50.0, "grad_norm": 1.2399721145629883, "learning_rate": 4.348484848484849e-06, "loss": 0.0003, "step": 13900 }, { "epoch": 50.089928057553955, "grad_norm": 0.008993759751319885, "learning_rate": 4.3472222222222225e-06, "loss": 0.0003, "step": 13925 }, { "epoch": 50.17985611510792, "grad_norm": 0.0040525756776332855, "learning_rate": 4.3459595959595965e-06, "loss": 0.0001, "step": 13950 }, { "epoch": 50.26978417266187, "grad_norm": 0.037480395287275314, "learning_rate": 4.34469696969697e-06, "loss": 0.0006, "step": 13975 }, { "epoch": 50.35971223021583, "grad_norm": 0.011341557838022709, "learning_rate": 4.343434343434344e-06, "loss": 0.0001, "step": 14000 }, { "epoch": 50.35971223021583, "eval_loss": 0.0883052721619606, "eval_runtime": 1347.8354, "eval_samples_per_second": 1.649, "eval_steps_per_second": 0.103, "eval_wer": 6.161593719914093, "step": 14000 }, { "epoch": 50.44964028776978, "grad_norm": 0.097772017121315, "learning_rate": 4.342171717171718e-06, "loss": 0.0003, "step": 14025 }, { "epoch": 50.539568345323744, "grad_norm": 0.22011174261569977, "learning_rate": 4.340909090909091e-06, "loss": 0.0004, "step": 14050 }, { "epoch": 50.6294964028777, "grad_norm": 0.004608627874404192, "learning_rate": 4.339646464646465e-06, "loss": 0.002, "step": 14075 }, { "epoch": 50.719424460431654, "grad_norm": 0.02777382917702198, "learning_rate": 4.338383838383839e-06, "loss": 0.0009, "step": 14100 }, { "epoch": 50.80935251798561, "grad_norm": 0.3765215277671814, "learning_rate": 4.337121212121213e-06, "loss": 0.0015, "step": 14125 }, { "epoch": 50.89928057553957, "grad_norm": 0.014906881377100945, "learning_rate": 4.335858585858586e-06, "loss": 0.0019, "step": 14150 }, { "epoch": 50.989208633093526, "grad_norm": 0.07598377764225006, "learning_rate": 4.33459595959596e-06, "loss": 0.0011, "step": 14175 }, { "epoch": 51.07913669064748, "grad_norm": 0.04858017340302467, "learning_rate": 4.333333333333334e-06, "loss": 0.002, "step": 14200 }, { "epoch": 51.169064748201436, "grad_norm": 0.00848084781318903, "learning_rate": 4.332070707070707e-06, "loss": 0.0015, "step": 14225 }, { "epoch": 51.2589928057554, "grad_norm": 0.192399799823761, "learning_rate": 4.330808080808081e-06, "loss": 0.0014, "step": 14250 }, { "epoch": 51.34892086330935, "grad_norm": 0.17804254591464996, "learning_rate": 4.329545454545455e-06, "loss": 0.0009, "step": 14275 }, { "epoch": 51.43884892086331, "grad_norm": 0.9404972791671753, "learning_rate": 4.328282828282829e-06, "loss": 0.0022, "step": 14300 }, { "epoch": 51.52877697841727, "grad_norm": 0.06042027473449707, "learning_rate": 4.327020202020202e-06, "loss": 0.0009, "step": 14325 }, { "epoch": 51.618705035971225, "grad_norm": 0.11593267321586609, "learning_rate": 4.325757575757576e-06, "loss": 0.001, "step": 14350 }, { "epoch": 51.70863309352518, "grad_norm": 0.042370762676000595, "learning_rate": 4.32449494949495e-06, "loss": 0.0009, "step": 14375 }, { "epoch": 51.798561151079134, "grad_norm": 0.06264758855104446, "learning_rate": 4.323232323232323e-06, "loss": 0.0011, "step": 14400 }, { "epoch": 51.888489208633096, "grad_norm": 0.419005811214447, "learning_rate": 4.321969696969697e-06, "loss": 0.0013, "step": 14425 }, { "epoch": 51.97841726618705, "grad_norm": 0.025492649525403976, "learning_rate": 4.3207070707070705e-06, "loss": 0.0008, "step": 14450 }, { "epoch": 52.068345323741006, "grad_norm": 0.1695825606584549, "learning_rate": 4.319444444444445e-06, "loss": 0.001, "step": 14475 }, { "epoch": 52.15827338129496, "grad_norm": 0.21136726438999176, "learning_rate": 4.3181818181818185e-06, "loss": 0.0004, "step": 14500 }, { "epoch": 52.24820143884892, "grad_norm": 0.00583269540220499, "learning_rate": 4.3169191919191925e-06, "loss": 0.0003, "step": 14525 }, { "epoch": 52.33812949640288, "grad_norm": 0.05031251907348633, "learning_rate": 4.315656565656566e-06, "loss": 0.0005, "step": 14550 }, { "epoch": 52.42805755395683, "grad_norm": 1.4654878377914429, "learning_rate": 4.31439393939394e-06, "loss": 0.0011, "step": 14575 }, { "epoch": 52.51798561151079, "grad_norm": 0.05035277083516121, "learning_rate": 4.313131313131314e-06, "loss": 0.0008, "step": 14600 }, { "epoch": 52.60791366906475, "grad_norm": 0.3283204138278961, "learning_rate": 4.311868686868687e-06, "loss": 0.0024, "step": 14625 }, { "epoch": 52.697841726618705, "grad_norm": 0.09352482855319977, "learning_rate": 4.310606060606061e-06, "loss": 0.0013, "step": 14650 }, { "epoch": 52.78776978417266, "grad_norm": 0.4381198287010193, "learning_rate": 4.309343434343435e-06, "loss": 0.0014, "step": 14675 }, { "epoch": 52.87769784172662, "grad_norm": 0.4195464551448822, "learning_rate": 4.308080808080809e-06, "loss": 0.0006, "step": 14700 }, { "epoch": 52.96762589928058, "grad_norm": 0.037935055792331696, "learning_rate": 4.306818181818182e-06, "loss": 0.0005, "step": 14725 }, { "epoch": 53.05755395683453, "grad_norm": 0.0057031637988984585, "learning_rate": 4.305555555555556e-06, "loss": 0.0011, "step": 14750 }, { "epoch": 53.14748201438849, "grad_norm": 0.09235268831253052, "learning_rate": 4.30429292929293e-06, "loss": 0.0012, "step": 14775 }, { "epoch": 53.23741007194245, "grad_norm": 0.4533500075340271, "learning_rate": 4.303030303030303e-06, "loss": 0.0013, "step": 14800 }, { "epoch": 53.327338129496404, "grad_norm": 0.14968417584896088, "learning_rate": 4.301767676767677e-06, "loss": 0.0009, "step": 14825 }, { "epoch": 53.41726618705036, "grad_norm": 0.016032686457037926, "learning_rate": 4.300505050505051e-06, "loss": 0.0003, "step": 14850 }, { "epoch": 53.507194244604314, "grad_norm": 0.04255020618438721, "learning_rate": 4.299242424242425e-06, "loss": 0.0002, "step": 14875 }, { "epoch": 53.597122302158276, "grad_norm": 0.01301508117467165, "learning_rate": 4.297979797979798e-06, "loss": 0.0003, "step": 14900 }, { "epoch": 53.68705035971223, "grad_norm": 0.007252383045852184, "learning_rate": 4.296717171717172e-06, "loss": 0.0005, "step": 14925 }, { "epoch": 53.776978417266186, "grad_norm": 0.13183751702308655, "learning_rate": 4.295454545454546e-06, "loss": 0.002, "step": 14950 }, { "epoch": 53.86690647482014, "grad_norm": 0.028183195739984512, "learning_rate": 4.294191919191919e-06, "loss": 0.0015, "step": 14975 }, { "epoch": 53.9568345323741, "grad_norm": 0.1370900571346283, "learning_rate": 4.292929292929293e-06, "loss": 0.0007, "step": 15000 }, { "epoch": 53.9568345323741, "eval_loss": 0.08864730596542358, "eval_runtime": 1347.6756, "eval_samples_per_second": 1.649, "eval_steps_per_second": 0.103, "eval_wer": 6.391172332074353, "step": 15000 }, { "epoch": 54.04676258992806, "grad_norm": 0.01960013061761856, "learning_rate": 4.2916666666666665e-06, "loss": 0.0016, "step": 15025 }, { "epoch": 54.13669064748201, "grad_norm": 0.13105234503746033, "learning_rate": 4.2904040404040405e-06, "loss": 0.0003, "step": 15050 }, { "epoch": 54.226618705035975, "grad_norm": 2.309511423110962, "learning_rate": 4.2891414141414145e-06, "loss": 0.0009, "step": 15075 }, { "epoch": 54.31654676258993, "grad_norm": 0.018184732645750046, "learning_rate": 4.287878787878788e-06, "loss": 0.001, "step": 15100 }, { "epoch": 54.406474820143885, "grad_norm": 0.05596456304192543, "learning_rate": 4.2866161616161625e-06, "loss": 0.0012, "step": 15125 }, { "epoch": 54.49640287769784, "grad_norm": 0.735536515712738, "learning_rate": 4.285353535353536e-06, "loss": 0.0014, "step": 15150 }, { "epoch": 54.5863309352518, "grad_norm": 0.641944169998169, "learning_rate": 4.28409090909091e-06, "loss": 0.0017, "step": 15175 }, { "epoch": 54.67625899280576, "grad_norm": 0.02818766050040722, "learning_rate": 4.282828282828283e-06, "loss": 0.0013, "step": 15200 }, { "epoch": 54.76618705035971, "grad_norm": 0.04384085536003113, "learning_rate": 4.281565656565657e-06, "loss": 0.0012, "step": 15225 }, { "epoch": 54.856115107913666, "grad_norm": 0.5741293430328369, "learning_rate": 4.280303030303031e-06, "loss": 0.0012, "step": 15250 }, { "epoch": 54.94604316546763, "grad_norm": 0.5108962059020996, "learning_rate": 4.279040404040404e-06, "loss": 0.0013, "step": 15275 }, { "epoch": 55.03597122302158, "grad_norm": 0.09613129496574402, "learning_rate": 4.277777777777778e-06, "loss": 0.0011, "step": 15300 }, { "epoch": 55.12589928057554, "grad_norm": 0.2453729510307312, "learning_rate": 4.276515151515152e-06, "loss": 0.0016, "step": 15325 }, { "epoch": 55.21582733812949, "grad_norm": 0.03533944860100746, "learning_rate": 4.275252525252526e-06, "loss": 0.0013, "step": 15350 }, { "epoch": 55.305755395683455, "grad_norm": 0.02793753705918789, "learning_rate": 4.273989898989899e-06, "loss": 0.0011, "step": 15375 }, { "epoch": 55.39568345323741, "grad_norm": 0.11208122968673706, "learning_rate": 4.272727272727273e-06, "loss": 0.0014, "step": 15400 }, { "epoch": 55.485611510791365, "grad_norm": 0.23727653920650482, "learning_rate": 4.271464646464647e-06, "loss": 0.0007, "step": 15425 }, { "epoch": 55.57553956834532, "grad_norm": 0.1095881313085556, "learning_rate": 4.27020202020202e-06, "loss": 0.0006, "step": 15450 }, { "epoch": 55.66546762589928, "grad_norm": 0.026398301124572754, "learning_rate": 4.268939393939394e-06, "loss": 0.0003, "step": 15475 }, { "epoch": 55.75539568345324, "grad_norm": 0.3764269948005676, "learning_rate": 4.267676767676767e-06, "loss": 0.0007, "step": 15500 }, { "epoch": 55.84532374100719, "grad_norm": 0.710081160068512, "learning_rate": 4.266414141414142e-06, "loss": 0.0006, "step": 15525 }, { "epoch": 55.935251798561154, "grad_norm": 0.01405036449432373, "learning_rate": 4.265151515151515e-06, "loss": 0.0009, "step": 15550 }, { "epoch": 56.02517985611511, "grad_norm": 0.011654024943709373, "learning_rate": 4.263888888888889e-06, "loss": 0.0011, "step": 15575 }, { "epoch": 56.115107913669064, "grad_norm": 0.8455324172973633, "learning_rate": 4.262626262626263e-06, "loss": 0.0006, "step": 15600 }, { "epoch": 56.20503597122302, "grad_norm": 0.7859840989112854, "learning_rate": 4.2613636363636365e-06, "loss": 0.0005, "step": 15625 }, { "epoch": 56.29496402877698, "grad_norm": 0.012887760065495968, "learning_rate": 4.2601010101010105e-06, "loss": 0.0003, "step": 15650 }, { "epoch": 56.384892086330936, "grad_norm": 0.27630236744880676, "learning_rate": 4.258838383838384e-06, "loss": 0.0005, "step": 15675 }, { "epoch": 56.47482014388489, "grad_norm": 0.23494713008403778, "learning_rate": 4.2575757575757585e-06, "loss": 0.0003, "step": 15700 }, { "epoch": 56.564748201438846, "grad_norm": 0.04018251597881317, "learning_rate": 4.256313131313132e-06, "loss": 0.0009, "step": 15725 }, { "epoch": 56.65467625899281, "grad_norm": 0.29447436332702637, "learning_rate": 4.255050505050506e-06, "loss": 0.0002, "step": 15750 }, { "epoch": 56.74460431654676, "grad_norm": 0.048734016716480255, "learning_rate": 4.253787878787879e-06, "loss": 0.0008, "step": 15775 }, { "epoch": 56.83453237410072, "grad_norm": 0.00981312245130539, "learning_rate": 4.252525252525253e-06, "loss": 0.0003, "step": 15800 }, { "epoch": 56.92446043165468, "grad_norm": 0.029217666015028954, "learning_rate": 4.251262626262627e-06, "loss": 0.0002, "step": 15825 }, { "epoch": 57.014388489208635, "grad_norm": 0.0892946720123291, "learning_rate": 4.25e-06, "loss": 0.0008, "step": 15850 }, { "epoch": 57.10431654676259, "grad_norm": 0.0070861089043319225, "learning_rate": 4.248737373737374e-06, "loss": 0.0003, "step": 15875 }, { "epoch": 57.194244604316545, "grad_norm": 0.5670444965362549, "learning_rate": 4.247474747474748e-06, "loss": 0.0005, "step": 15900 }, { "epoch": 57.28417266187051, "grad_norm": 0.4061719477176666, "learning_rate": 4.246212121212122e-06, "loss": 0.0009, "step": 15925 }, { "epoch": 57.37410071942446, "grad_norm": 0.2658737897872925, "learning_rate": 4.244949494949495e-06, "loss": 0.0011, "step": 15950 }, { "epoch": 57.46402877697842, "grad_norm": 0.06908473372459412, "learning_rate": 4.243686868686869e-06, "loss": 0.0012, "step": 15975 }, { "epoch": 57.55395683453237, "grad_norm": 0.12484970688819885, "learning_rate": 4.242424242424243e-06, "loss": 0.0008, "step": 16000 }, { "epoch": 57.55395683453237, "eval_loss": 0.09175190329551697, "eval_runtime": 1351.6711, "eval_samples_per_second": 1.644, "eval_steps_per_second": 0.103, "eval_wer": 6.391172332074353, "step": 16000 }, { "epoch": 57.643884892086334, "grad_norm": 0.10532079637050629, "learning_rate": 4.241161616161616e-06, "loss": 0.0009, "step": 16025 }, { "epoch": 57.73381294964029, "grad_norm": 0.0082013588398695, "learning_rate": 4.23989898989899e-06, "loss": 0.0009, "step": 16050 }, { "epoch": 57.82374100719424, "grad_norm": 0.8880343437194824, "learning_rate": 4.238636363636364e-06, "loss": 0.0012, "step": 16075 }, { "epoch": 57.9136690647482, "grad_norm": 0.04694369435310364, "learning_rate": 4.237373737373737e-06, "loss": 0.0011, "step": 16100 }, { "epoch": 58.00359712230216, "grad_norm": 0.4175935387611389, "learning_rate": 4.236111111111111e-06, "loss": 0.0007, "step": 16125 }, { "epoch": 58.093525179856115, "grad_norm": 0.0991375669836998, "learning_rate": 4.234848484848485e-06, "loss": 0.0008, "step": 16150 }, { "epoch": 58.18345323741007, "grad_norm": 0.05238619074225426, "learning_rate": 4.233585858585859e-06, "loss": 0.0009, "step": 16175 }, { "epoch": 58.273381294964025, "grad_norm": 0.024060403928160667, "learning_rate": 4.2323232323232325e-06, "loss": 0.0005, "step": 16200 }, { "epoch": 58.36330935251799, "grad_norm": 0.514026939868927, "learning_rate": 4.2310606060606065e-06, "loss": 0.0017, "step": 16225 }, { "epoch": 58.45323741007194, "grad_norm": 0.9123257994651794, "learning_rate": 4.22979797979798e-06, "loss": 0.0009, "step": 16250 }, { "epoch": 58.5431654676259, "grad_norm": 0.034488383680582047, "learning_rate": 4.228535353535354e-06, "loss": 0.0005, "step": 16275 }, { "epoch": 58.63309352517986, "grad_norm": 0.08020392805337906, "learning_rate": 4.227272727272728e-06, "loss": 0.0021, "step": 16300 }, { "epoch": 58.723021582733814, "grad_norm": 0.011538870632648468, "learning_rate": 4.226010101010101e-06, "loss": 0.001, "step": 16325 }, { "epoch": 58.81294964028777, "grad_norm": 0.4130057692527771, "learning_rate": 4.224747474747475e-06, "loss": 0.0009, "step": 16350 }, { "epoch": 58.902877697841724, "grad_norm": 0.018940504640340805, "learning_rate": 4.223484848484849e-06, "loss": 0.0008, "step": 16375 }, { "epoch": 58.992805755395686, "grad_norm": 0.09760510176420212, "learning_rate": 4.222222222222223e-06, "loss": 0.0009, "step": 16400 }, { "epoch": 59.08273381294964, "grad_norm": 0.6728724241256714, "learning_rate": 4.220959595959596e-06, "loss": 0.0015, "step": 16425 }, { "epoch": 59.172661870503596, "grad_norm": 0.01400268916040659, "learning_rate": 4.21969696969697e-06, "loss": 0.0006, "step": 16450 }, { "epoch": 59.26258992805755, "grad_norm": 0.027168823406100273, "learning_rate": 4.218434343434344e-06, "loss": 0.0003, "step": 16475 }, { "epoch": 59.35251798561151, "grad_norm": 0.025733735412359238, "learning_rate": 4.217171717171717e-06, "loss": 0.0003, "step": 16500 }, { "epoch": 59.44244604316547, "grad_norm": 0.012072687968611717, "learning_rate": 4.215909090909091e-06, "loss": 0.0004, "step": 16525 }, { "epoch": 59.53237410071942, "grad_norm": 0.03630650043487549, "learning_rate": 4.214646464646465e-06, "loss": 0.0013, "step": 16550 }, { "epoch": 59.62230215827338, "grad_norm": 0.13875187933444977, "learning_rate": 4.213383838383839e-06, "loss": 0.0008, "step": 16575 }, { "epoch": 59.71223021582734, "grad_norm": 0.06004035472869873, "learning_rate": 4.212121212121212e-06, "loss": 0.0004, "step": 16600 }, { "epoch": 59.802158273381295, "grad_norm": 0.024319609627127647, "learning_rate": 4.210858585858586e-06, "loss": 0.0006, "step": 16625 }, { "epoch": 59.89208633093525, "grad_norm": 0.0957476794719696, "learning_rate": 4.20959595959596e-06, "loss": 0.0016, "step": 16650 }, { "epoch": 59.98201438848921, "grad_norm": 0.014447568915784359, "learning_rate": 4.208333333333333e-06, "loss": 0.0007, "step": 16675 }, { "epoch": 60.07194244604317, "grad_norm": 0.0760221779346466, "learning_rate": 4.207070707070707e-06, "loss": 0.0007, "step": 16700 }, { "epoch": 60.16187050359712, "grad_norm": 0.08783930540084839, "learning_rate": 4.2058080808080806e-06, "loss": 0.0006, "step": 16725 }, { "epoch": 60.25179856115108, "grad_norm": 0.020011553540825844, "learning_rate": 4.204545454545455e-06, "loss": 0.0002, "step": 16750 }, { "epoch": 60.34172661870504, "grad_norm": 0.004587370436638594, "learning_rate": 4.2032828282828286e-06, "loss": 0.0001, "step": 16775 }, { "epoch": 60.431654676258994, "grad_norm": 0.05192629247903824, "learning_rate": 4.2020202020202026e-06, "loss": 0.0007, "step": 16800 }, { "epoch": 60.52158273381295, "grad_norm": 0.0028184789698570967, "learning_rate": 4.2007575757575766e-06, "loss": 0.0001, "step": 16825 }, { "epoch": 60.611510791366904, "grad_norm": 0.11263082921504974, "learning_rate": 4.19949494949495e-06, "loss": 0.0004, "step": 16850 }, { "epoch": 60.701438848920866, "grad_norm": 0.020229890942573547, "learning_rate": 4.198232323232324e-06, "loss": 0.0002, "step": 16875 }, { "epoch": 60.79136690647482, "grad_norm": 0.004258246161043644, "learning_rate": 4.196969696969697e-06, "loss": 0.0004, "step": 16900 }, { "epoch": 60.881294964028775, "grad_norm": 0.005619137082248926, "learning_rate": 4.195707070707072e-06, "loss": 0.0001, "step": 16925 }, { "epoch": 60.97122302158273, "grad_norm": 0.005032286513596773, "learning_rate": 4.194444444444445e-06, "loss": 0.0002, "step": 16950 }, { "epoch": 61.06115107913669, "grad_norm": 0.02484523132443428, "learning_rate": 4.193181818181819e-06, "loss": 0.0003, "step": 16975 }, { "epoch": 61.15107913669065, "grad_norm": 0.0017194038955494761, "learning_rate": 4.191919191919192e-06, "loss": 0.0002, "step": 17000 }, { "epoch": 61.15107913669065, "eval_loss": 0.09027338027954102, "eval_runtime": 1359.5537, "eval_samples_per_second": 1.634, "eval_steps_per_second": 0.102, "eval_wer": 5.909797822706065, "step": 17000 }, { "epoch": 61.2410071942446, "grad_norm": 0.0024019062984734774, "learning_rate": 4.190656565656566e-06, "loss": 0.0002, "step": 17025 }, { "epoch": 61.330935251798564, "grad_norm": 0.004478455055505037, "learning_rate": 4.18939393939394e-06, "loss": 0.0003, "step": 17050 }, { "epoch": 61.42086330935252, "grad_norm": 0.0044603836722671986, "learning_rate": 4.188131313131313e-06, "loss": 0.0004, "step": 17075 }, { "epoch": 61.510791366906474, "grad_norm": 0.08818788081407547, "learning_rate": 4.186868686868687e-06, "loss": 0.0009, "step": 17100 }, { "epoch": 61.60071942446043, "grad_norm": 0.0027286384720355272, "learning_rate": 4.185606060606061e-06, "loss": 0.0002, "step": 17125 }, { "epoch": 61.69064748201439, "grad_norm": 0.0037345695309340954, "learning_rate": 4.184343434343434e-06, "loss": 0.0003, "step": 17150 }, { "epoch": 61.780575539568346, "grad_norm": 0.014616015367209911, "learning_rate": 4.183080808080808e-06, "loss": 0.0004, "step": 17175 }, { "epoch": 61.8705035971223, "grad_norm": 0.007769573014229536, "learning_rate": 4.181818181818182e-06, "loss": 0.0003, "step": 17200 }, { "epoch": 61.960431654676256, "grad_norm": 0.008359814994037151, "learning_rate": 4.180555555555556e-06, "loss": 0.0005, "step": 17225 }, { "epoch": 62.05035971223022, "grad_norm": 0.0051100486889481544, "learning_rate": 4.1792929292929294e-06, "loss": 0.0005, "step": 17250 }, { "epoch": 62.14028776978417, "grad_norm": 0.0029563389252871275, "learning_rate": 4.1780303030303034e-06, "loss": 0.0001, "step": 17275 }, { "epoch": 62.23021582733813, "grad_norm": 0.0030668089166283607, "learning_rate": 4.1767676767676774e-06, "loss": 0.0001, "step": 17300 }, { "epoch": 62.32014388489208, "grad_norm": 0.02710825577378273, "learning_rate": 4.175505050505051e-06, "loss": 0.0006, "step": 17325 }, { "epoch": 62.410071942446045, "grad_norm": 0.0027756947092711926, "learning_rate": 4.1742424242424246e-06, "loss": 0.0001, "step": 17350 }, { "epoch": 62.5, "grad_norm": 0.09106307476758957, "learning_rate": 4.172979797979798e-06, "loss": 0.0003, "step": 17375 }, { "epoch": 62.589928057553955, "grad_norm": 0.005363088101148605, "learning_rate": 4.1717171717171726e-06, "loss": 0.0001, "step": 17400 }, { "epoch": 62.67985611510792, "grad_norm": 0.005525332409888506, "learning_rate": 4.170454545454546e-06, "loss": 0.0001, "step": 17425 }, { "epoch": 62.76978417266187, "grad_norm": 0.007496482692658901, "learning_rate": 4.16919191919192e-06, "loss": 0.0001, "step": 17450 }, { "epoch": 62.85971223021583, "grad_norm": 0.026290051639080048, "learning_rate": 4.167929292929293e-06, "loss": 0.0001, "step": 17475 }, { "epoch": 62.94964028776978, "grad_norm": 0.006395560223609209, "learning_rate": 4.166666666666667e-06, "loss": 0.0001, "step": 17500 }, { "epoch": 63.039568345323744, "grad_norm": 0.004197731614112854, "learning_rate": 4.165404040404041e-06, "loss": 0.0001, "step": 17525 }, { "epoch": 63.1294964028777, "grad_norm": 0.002505301032215357, "learning_rate": 4.164141414141414e-06, "loss": 0.0, "step": 17550 }, { "epoch": 63.219424460431654, "grad_norm": 0.0022915108129382133, "learning_rate": 4.162878787878788e-06, "loss": 0.0001, "step": 17575 }, { "epoch": 63.30935251798561, "grad_norm": 0.0019390948582440615, "learning_rate": 4.161616161616162e-06, "loss": 0.0, "step": 17600 }, { "epoch": 63.39928057553957, "grad_norm": 0.001307799364440143, "learning_rate": 4.160353535353536e-06, "loss": 0.0001, "step": 17625 }, { "epoch": 63.489208633093526, "grad_norm": 0.0016936671454459429, "learning_rate": 4.159090909090909e-06, "loss": 0.0, "step": 17650 }, { "epoch": 63.57913669064748, "grad_norm": 0.0017974688671529293, "learning_rate": 4.157828282828283e-06, "loss": 0.0, "step": 17675 }, { "epoch": 63.669064748201436, "grad_norm": 0.0027852486819028854, "learning_rate": 4.156565656565657e-06, "loss": 0.0, "step": 17700 }, { "epoch": 63.7589928057554, "grad_norm": 0.0017096559749916196, "learning_rate": 4.15530303030303e-06, "loss": 0.0, "step": 17725 }, { "epoch": 63.84892086330935, "grad_norm": 0.0019876237493008375, "learning_rate": 4.154040404040404e-06, "loss": 0.0, "step": 17750 }, { "epoch": 63.93884892086331, "grad_norm": 0.0011115281376987696, "learning_rate": 4.152777777777778e-06, "loss": 0.0, "step": 17775 }, { "epoch": 64.02877697841727, "grad_norm": 0.0017126763705164194, "learning_rate": 4.151515151515152e-06, "loss": 0.0, "step": 17800 }, { "epoch": 64.11870503597122, "grad_norm": 0.0011258955346420407, "learning_rate": 4.1502525252525254e-06, "loss": 0.0, "step": 17825 }, { "epoch": 64.20863309352518, "grad_norm": 0.0015615399461239576, "learning_rate": 4.1489898989898994e-06, "loss": 0.0, "step": 17850 }, { "epoch": 64.29856115107914, "grad_norm": 0.001990539487451315, "learning_rate": 4.1477272727272734e-06, "loss": 0.0, "step": 17875 }, { "epoch": 64.38848920863309, "grad_norm": 0.0013739466667175293, "learning_rate": 4.146464646464647e-06, "loss": 0.0, "step": 17900 }, { "epoch": 64.47841726618705, "grad_norm": 0.0017153042135760188, "learning_rate": 4.145202020202021e-06, "loss": 0.0, "step": 17925 }, { "epoch": 64.56834532374101, "grad_norm": 0.0013855737634003162, "learning_rate": 4.143939393939394e-06, "loss": 0.0, "step": 17950 }, { "epoch": 64.65827338129496, "grad_norm": 0.0023376569151878357, "learning_rate": 4.142676767676769e-06, "loss": 0.0001, "step": 17975 }, { "epoch": 64.74820143884892, "grad_norm": 0.0007114307954907417, "learning_rate": 4.141414141414142e-06, "loss": 0.0, "step": 18000 }, { "epoch": 64.74820143884892, "eval_loss": 0.09263601154088974, "eval_runtime": 1339.2527, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.104, "eval_wer": 5.658001925498037, "step": 18000 }, { "epoch": 64.83812949640287, "grad_norm": 0.0010609790915623307, "learning_rate": 4.140151515151516e-06, "loss": 0.0, "step": 18025 }, { "epoch": 64.92805755395683, "grad_norm": 0.0020956743974238634, "learning_rate": 4.138888888888889e-06, "loss": 0.0, "step": 18050 }, { "epoch": 65.0179856115108, "grad_norm": 0.0013533415040001273, "learning_rate": 4.137626262626263e-06, "loss": 0.0001, "step": 18075 }, { "epoch": 65.10791366906474, "grad_norm": 0.0010088173439726233, "learning_rate": 4.136363636363637e-06, "loss": 0.0001, "step": 18100 }, { "epoch": 65.1978417266187, "grad_norm": 0.001570379245094955, "learning_rate": 4.13510101010101e-06, "loss": 0.0, "step": 18125 }, { "epoch": 65.28776978417267, "grad_norm": 0.0016373491380363703, "learning_rate": 4.133838383838384e-06, "loss": 0.0, "step": 18150 }, { "epoch": 65.37769784172662, "grad_norm": 0.0015006172470748425, "learning_rate": 4.132575757575758e-06, "loss": 0.0, "step": 18175 }, { "epoch": 65.46762589928058, "grad_norm": 0.0011033018818125129, "learning_rate": 4.131313131313132e-06, "loss": 0.0, "step": 18200 }, { "epoch": 65.55755395683454, "grad_norm": 0.0013498698826879263, "learning_rate": 4.130050505050505e-06, "loss": 0.0, "step": 18225 }, { "epoch": 65.64748201438849, "grad_norm": 0.0013445069780573249, "learning_rate": 4.128787878787879e-06, "loss": 0.0, "step": 18250 }, { "epoch": 65.73741007194245, "grad_norm": 0.0017605924513190985, "learning_rate": 4.127525252525253e-06, "loss": 0.0, "step": 18275 }, { "epoch": 65.8273381294964, "grad_norm": 0.0018534163245931268, "learning_rate": 4.126262626262626e-06, "loss": 0.0, "step": 18300 }, { "epoch": 65.91726618705036, "grad_norm": 0.000884951208718121, "learning_rate": 4.125e-06, "loss": 0.0, "step": 18325 }, { "epoch": 66.00719424460432, "grad_norm": 0.0011815873440355062, "learning_rate": 4.123737373737374e-06, "loss": 0.0001, "step": 18350 }, { "epoch": 66.09712230215827, "grad_norm": 0.001126173185184598, "learning_rate": 4.1224747474747475e-06, "loss": 0.0, "step": 18375 }, { "epoch": 66.18705035971223, "grad_norm": 0.0011552530340850353, "learning_rate": 4.1212121212121215e-06, "loss": 0.0, "step": 18400 }, { "epoch": 66.27697841726619, "grad_norm": 0.001199888065457344, "learning_rate": 4.119949494949495e-06, "loss": 0.0, "step": 18425 }, { "epoch": 66.36690647482014, "grad_norm": 0.0007247981848195195, "learning_rate": 4.1186868686868695e-06, "loss": 0.0, "step": 18450 }, { "epoch": 66.4568345323741, "grad_norm": 0.001124533242546022, "learning_rate": 4.117424242424243e-06, "loss": 0.0001, "step": 18475 }, { "epoch": 66.54676258992805, "grad_norm": 0.0009603950311429799, "learning_rate": 4.116161616161617e-06, "loss": 0.0, "step": 18500 }, { "epoch": 66.63669064748201, "grad_norm": 0.0016920759808272123, "learning_rate": 4.114898989898991e-06, "loss": 0.0001, "step": 18525 }, { "epoch": 66.72661870503597, "grad_norm": 0.0007674341322854161, "learning_rate": 4.113636363636364e-06, "loss": 0.0, "step": 18550 }, { "epoch": 66.81654676258992, "grad_norm": 0.000895792618393898, "learning_rate": 4.112373737373738e-06, "loss": 0.0, "step": 18575 }, { "epoch": 66.90647482014388, "grad_norm": 0.0009227583650499582, "learning_rate": 4.111111111111111e-06, "loss": 0.0, "step": 18600 }, { "epoch": 66.99640287769785, "grad_norm": 0.0019231617916375399, "learning_rate": 4.109848484848486e-06, "loss": 0.0, "step": 18625 }, { "epoch": 67.0863309352518, "grad_norm": 0.0010071933502331376, "learning_rate": 4.108585858585859e-06, "loss": 0.0, "step": 18650 }, { "epoch": 67.17625899280576, "grad_norm": 0.0009304916602559388, "learning_rate": 4.107323232323233e-06, "loss": 0.0002, "step": 18675 }, { "epoch": 67.26618705035972, "grad_norm": 0.0008229652885347605, "learning_rate": 4.106060606060606e-06, "loss": 0.0, "step": 18700 }, { "epoch": 67.35611510791367, "grad_norm": 0.0006714012124575675, "learning_rate": 4.10479797979798e-06, "loss": 0.0, "step": 18725 }, { "epoch": 67.44604316546763, "grad_norm": 0.0009734642808325589, "learning_rate": 4.103535353535354e-06, "loss": 0.0, "step": 18750 }, { "epoch": 67.53597122302158, "grad_norm": 0.0007786314818076789, "learning_rate": 4.102272727272727e-06, "loss": 0.0, "step": 18775 }, { "epoch": 67.62589928057554, "grad_norm": 0.001005512080155313, "learning_rate": 4.101010101010101e-06, "loss": 0.0, "step": 18800 }, { "epoch": 67.7158273381295, "grad_norm": 0.001331688603386283, "learning_rate": 4.099747474747475e-06, "loss": 0.0, "step": 18825 }, { "epoch": 67.80575539568345, "grad_norm": 0.000987470499239862, "learning_rate": 4.098484848484849e-06, "loss": 0.0, "step": 18850 }, { "epoch": 67.89568345323741, "grad_norm": 0.0008799554198049009, "learning_rate": 4.097222222222222e-06, "loss": 0.0001, "step": 18875 }, { "epoch": 67.98561151079137, "grad_norm": 0.0009637974435463548, "learning_rate": 4.095959595959596e-06, "loss": 0.0, "step": 18900 }, { "epoch": 68.07553956834532, "grad_norm": 0.0006672360468655825, "learning_rate": 4.09469696969697e-06, "loss": 0.0, "step": 18925 }, { "epoch": 68.16546762589928, "grad_norm": 0.0008431566529907286, "learning_rate": 4.0934343434343435e-06, "loss": 0.0001, "step": 18950 }, { "epoch": 68.25539568345324, "grad_norm": 0.0010287058539688587, "learning_rate": 4.0921717171717175e-06, "loss": 0.0, "step": 18975 }, { "epoch": 68.34532374100719, "grad_norm": 0.0007457846077159047, "learning_rate": 4.0909090909090915e-06, "loss": 0.0, "step": 19000 }, { "epoch": 68.34532374100719, "eval_loss": 0.09562169760465622, "eval_runtime": 1339.1079, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.104, "eval_wer": 5.583944308672146, "step": 19000 }, { "epoch": 68.43525179856115, "grad_norm": 0.0009193470468744636, "learning_rate": 4.0896464646464655e-06, "loss": 0.0, "step": 19025 }, { "epoch": 68.5251798561151, "grad_norm": 0.0008717461605556309, "learning_rate": 4.088383838383839e-06, "loss": 0.0, "step": 19050 }, { "epoch": 68.61510791366906, "grad_norm": 0.0008119108970277011, "learning_rate": 4.087121212121213e-06, "loss": 0.0, "step": 19075 }, { "epoch": 68.70503597122303, "grad_norm": 0.0010454319417476654, "learning_rate": 4.085858585858587e-06, "loss": 0.0001, "step": 19100 }, { "epoch": 68.79496402877697, "grad_norm": 0.0012115614954382181, "learning_rate": 4.08459595959596e-06, "loss": 0.0, "step": 19125 }, { "epoch": 68.88489208633094, "grad_norm": 0.001058676978573203, "learning_rate": 4.083333333333334e-06, "loss": 0.0, "step": 19150 }, { "epoch": 68.9748201438849, "grad_norm": 0.0009722402319312096, "learning_rate": 4.082070707070707e-06, "loss": 0.0, "step": 19175 }, { "epoch": 69.06474820143885, "grad_norm": 0.0006609881529584527, "learning_rate": 4.080808080808081e-06, "loss": 0.0, "step": 19200 }, { "epoch": 69.15467625899281, "grad_norm": 0.0007030842243693769, "learning_rate": 4.079545454545455e-06, "loss": 0.0, "step": 19225 }, { "epoch": 69.24460431654676, "grad_norm": 0.0006842823349870741, "learning_rate": 4.078282828282829e-06, "loss": 0.0001, "step": 19250 }, { "epoch": 69.33453237410072, "grad_norm": 0.000651550421025604, "learning_rate": 4.077020202020202e-06, "loss": 0.0, "step": 19275 }, { "epoch": 69.42446043165468, "grad_norm": 0.0006407879409380257, "learning_rate": 4.075757575757576e-06, "loss": 0.0001, "step": 19300 }, { "epoch": 69.51438848920863, "grad_norm": 0.0010551882442086935, "learning_rate": 4.07449494949495e-06, "loss": 0.0, "step": 19325 }, { "epoch": 69.60431654676259, "grad_norm": 0.0008015549392439425, "learning_rate": 4.073232323232323e-06, "loss": 0.0, "step": 19350 }, { "epoch": 69.69424460431655, "grad_norm": 0.0008218359434977174, "learning_rate": 4.071969696969697e-06, "loss": 0.0, "step": 19375 }, { "epoch": 69.7841726618705, "grad_norm": 0.0009953822009265423, "learning_rate": 4.070707070707071e-06, "loss": 0.0, "step": 19400 }, { "epoch": 69.87410071942446, "grad_norm": 0.0008482063421979547, "learning_rate": 4.069444444444444e-06, "loss": 0.0, "step": 19425 }, { "epoch": 69.96402877697842, "grad_norm": 0.0008491966291330755, "learning_rate": 4.068181818181818e-06, "loss": 0.0, "step": 19450 }, { "epoch": 70.05395683453237, "grad_norm": 0.000667088374029845, "learning_rate": 4.066919191919192e-06, "loss": 0.0001, "step": 19475 }, { "epoch": 70.14388489208633, "grad_norm": 0.0006748430896550417, "learning_rate": 4.065656565656566e-06, "loss": 0.0, "step": 19500 }, { "epoch": 70.23381294964028, "grad_norm": 0.0006421016296371818, "learning_rate": 4.0643939393939395e-06, "loss": 0.0, "step": 19525 }, { "epoch": 70.32374100719424, "grad_norm": 0.0009323668200522661, "learning_rate": 4.0631313131313135e-06, "loss": 0.0, "step": 19550 }, { "epoch": 70.4136690647482, "grad_norm": 0.0008588407654315233, "learning_rate": 4.0618686868686875e-06, "loss": 0.0, "step": 19575 }, { "epoch": 70.50359712230215, "grad_norm": 0.0006930006784386933, "learning_rate": 4.060606060606061e-06, "loss": 0.0, "step": 19600 }, { "epoch": 70.59352517985612, "grad_norm": 0.000734307337552309, "learning_rate": 4.059343434343435e-06, "loss": 0.0, "step": 19625 }, { "epoch": 70.68345323741008, "grad_norm": 0.0007306214538402855, "learning_rate": 4.058080808080808e-06, "loss": 0.0001, "step": 19650 }, { "epoch": 70.77338129496403, "grad_norm": 0.0005738097243010998, "learning_rate": 4.056818181818183e-06, "loss": 0.0, "step": 19675 }, { "epoch": 70.86330935251799, "grad_norm": 0.00065003422787413, "learning_rate": 4.055555555555556e-06, "loss": 0.0, "step": 19700 }, { "epoch": 70.95323741007195, "grad_norm": 0.0006234170868992805, "learning_rate": 4.05429292929293e-06, "loss": 0.0, "step": 19725 }, { "epoch": 71.0431654676259, "grad_norm": 0.000607940077316016, "learning_rate": 4.053030303030303e-06, "loss": 0.0, "step": 19750 }, { "epoch": 71.13309352517986, "grad_norm": 0.0005851531168445945, "learning_rate": 4.051767676767677e-06, "loss": 0.0, "step": 19775 }, { "epoch": 71.22302158273381, "grad_norm": 0.0009296953212469816, "learning_rate": 4.050505050505051e-06, "loss": 0.0, "step": 19800 }, { "epoch": 71.31294964028777, "grad_norm": 0.0006304428679868579, "learning_rate": 4.049242424242424e-06, "loss": 0.0001, "step": 19825 }, { "epoch": 71.40287769784173, "grad_norm": 0.000664900871925056, "learning_rate": 4.047979797979799e-06, "loss": 0.0, "step": 19850 }, { "epoch": 71.49280575539568, "grad_norm": 0.0003695714403875172, "learning_rate": 4.046717171717172e-06, "loss": 0.0, "step": 19875 }, { "epoch": 71.58273381294964, "grad_norm": 0.000516809755936265, "learning_rate": 4.045454545454546e-06, "loss": 0.0, "step": 19900 }, { "epoch": 71.6726618705036, "grad_norm": 0.0006113911513239145, "learning_rate": 4.044191919191919e-06, "loss": 0.0, "step": 19925 }, { "epoch": 71.76258992805755, "grad_norm": 0.000814276107121259, "learning_rate": 4.042929292929293e-06, "loss": 0.0, "step": 19950 }, { "epoch": 71.85251798561151, "grad_norm": 0.0007162923575378954, "learning_rate": 4.041666666666667e-06, "loss": 0.0, "step": 19975 }, { "epoch": 71.94244604316546, "grad_norm": 0.000519581779371947, "learning_rate": 4.04040404040404e-06, "loss": 0.0, "step": 20000 }, { "epoch": 71.94244604316546, "eval_loss": 0.0976732075214386, "eval_runtime": 1338.7066, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.104, "eval_wer": 5.539509738576612, "step": 20000 }, { "epoch": 72.03237410071942, "grad_norm": 0.0013573451433330774, "learning_rate": 4.039141414141414e-06, "loss": 0.0001, "step": 20025 }, { "epoch": 72.12230215827338, "grad_norm": 0.0006321736145764589, "learning_rate": 4.037878787878788e-06, "loss": 0.0, "step": 20050 }, { "epoch": 72.21223021582733, "grad_norm": 0.00046551282866857946, "learning_rate": 4.036616161616162e-06, "loss": 0.0, "step": 20075 }, { "epoch": 72.3021582733813, "grad_norm": 0.00047266227193176746, "learning_rate": 4.0353535353535355e-06, "loss": 0.0, "step": 20100 }, { "epoch": 72.39208633093526, "grad_norm": 0.0004692314541898668, "learning_rate": 4.0340909090909095e-06, "loss": 0.0, "step": 20125 }, { "epoch": 72.4820143884892, "grad_norm": 0.0005892490735277534, "learning_rate": 4.0328282828282835e-06, "loss": 0.0, "step": 20150 }, { "epoch": 72.57194244604317, "grad_norm": 0.0005393667961470783, "learning_rate": 4.031565656565657e-06, "loss": 0.0001, "step": 20175 }, { "epoch": 72.66187050359713, "grad_norm": 0.0007663563592359424, "learning_rate": 4.030303030303031e-06, "loss": 0.0, "step": 20200 }, { "epoch": 72.75179856115108, "grad_norm": 0.0005675546126440167, "learning_rate": 4.029040404040405e-06, "loss": 0.0, "step": 20225 }, { "epoch": 72.84172661870504, "grad_norm": 0.0006041157757863402, "learning_rate": 4.027777777777779e-06, "loss": 0.0, "step": 20250 }, { "epoch": 72.93165467625899, "grad_norm": 0.0006022896850481629, "learning_rate": 4.026515151515152e-06, "loss": 0.0001, "step": 20275 }, { "epoch": 73.02158273381295, "grad_norm": 0.0005813241587020457, "learning_rate": 4.025252525252526e-06, "loss": 0.0, "step": 20300 }, { "epoch": 73.11151079136691, "grad_norm": 0.0006358566461130977, "learning_rate": 4.0239898989899e-06, "loss": 0.0, "step": 20325 }, { "epoch": 73.20143884892086, "grad_norm": 0.0006074347766116261, "learning_rate": 4.022727272727273e-06, "loss": 0.0, "step": 20350 }, { "epoch": 73.29136690647482, "grad_norm": 0.0005062387208454311, "learning_rate": 4.021464646464647e-06, "loss": 0.0003, "step": 20375 }, { "epoch": 73.38129496402878, "grad_norm": 0.0010172536130994558, "learning_rate": 4.02020202020202e-06, "loss": 0.0, "step": 20400 }, { "epoch": 73.47122302158273, "grad_norm": 0.0006235135952010751, "learning_rate": 4.018939393939394e-06, "loss": 0.0, "step": 20425 }, { "epoch": 73.56115107913669, "grad_norm": 0.0009783974383026361, "learning_rate": 4.017676767676768e-06, "loss": 0.0, "step": 20450 }, { "epoch": 73.65107913669064, "grad_norm": 0.0005355635657906532, "learning_rate": 4.016414141414141e-06, "loss": 0.0, "step": 20475 }, { "epoch": 73.7410071942446, "grad_norm": 0.0004634314973372966, "learning_rate": 4.015151515151515e-06, "loss": 0.0, "step": 20500 }, { "epoch": 73.83093525179856, "grad_norm": 0.0005511495401151478, "learning_rate": 4.013888888888889e-06, "loss": 0.0, "step": 20525 }, { "epoch": 73.92086330935251, "grad_norm": 0.0010061068460345268, "learning_rate": 4.012626262626263e-06, "loss": 0.0, "step": 20550 }, { "epoch": 74.01079136690647, "grad_norm": 0.3256176710128784, "learning_rate": 4.011363636363636e-06, "loss": 0.0007, "step": 20575 }, { "epoch": 74.10071942446044, "grad_norm": 0.17023605108261108, "learning_rate": 4.01010101010101e-06, "loss": 0.0008, "step": 20600 }, { "epoch": 74.19064748201438, "grad_norm": 0.8051077723503113, "learning_rate": 4.008838383838384e-06, "loss": 0.0078, "step": 20625 }, { "epoch": 74.28057553956835, "grad_norm": 0.4720918536186218, "learning_rate": 4.0075757575757575e-06, "loss": 0.0062, "step": 20650 }, { "epoch": 74.37050359712231, "grad_norm": 0.4814521074295044, "learning_rate": 4.0063131313131315e-06, "loss": 0.0061, "step": 20675 }, { "epoch": 74.46043165467626, "grad_norm": 0.7329695820808411, "learning_rate": 4.0050505050505055e-06, "loss": 0.0069, "step": 20700 }, { "epoch": 74.55035971223022, "grad_norm": 0.713927686214447, "learning_rate": 4.0037878787878795e-06, "loss": 0.0061, "step": 20725 }, { "epoch": 74.64028776978417, "grad_norm": 0.6485239863395691, "learning_rate": 4.002525252525253e-06, "loss": 0.0064, "step": 20750 }, { "epoch": 74.73021582733813, "grad_norm": 0.8775496482849121, "learning_rate": 4.001262626262627e-06, "loss": 0.0048, "step": 20775 }, { "epoch": 74.82014388489209, "grad_norm": 0.2677914798259735, "learning_rate": 4.000000000000001e-06, "loss": 0.004, "step": 20800 }, { "epoch": 74.91007194244604, "grad_norm": 0.38305044174194336, "learning_rate": 3.998737373737374e-06, "loss": 0.0028, "step": 20825 }, { "epoch": 75.0, "grad_norm": 0.05106651037931442, "learning_rate": 3.997474747474748e-06, "loss": 0.0021, "step": 20850 }, { "epoch": 75.08992805755396, "grad_norm": 0.01168102491647005, "learning_rate": 3.996212121212121e-06, "loss": 0.0012, "step": 20875 }, { "epoch": 75.17985611510791, "grad_norm": 0.22549034655094147, "learning_rate": 3.994949494949496e-06, "loss": 0.0015, "step": 20900 }, { "epoch": 75.26978417266187, "grad_norm": 0.022075073793530464, "learning_rate": 3.993686868686869e-06, "loss": 0.0026, "step": 20925 }, { "epoch": 75.35971223021583, "grad_norm": 0.0188248660415411, "learning_rate": 3.992424242424243e-06, "loss": 0.0017, "step": 20950 }, { "epoch": 75.44964028776978, "grad_norm": 0.47026434540748596, "learning_rate": 3.991161616161616e-06, "loss": 0.0026, "step": 20975 }, { "epoch": 75.53956834532374, "grad_norm": 0.2045595496892929, "learning_rate": 3.98989898989899e-06, "loss": 0.0019, "step": 21000 }, { "epoch": 75.53956834532374, "eval_loss": 0.08847362548112869, "eval_runtime": 1337.9238, "eval_samples_per_second": 1.661, "eval_steps_per_second": 0.104, "eval_wer": 6.294897430200697, "step": 21000 }, { "epoch": 75.62949640287769, "grad_norm": 0.0665188655257225, "learning_rate": 3.988636363636364e-06, "loss": 0.0014, "step": 21025 }, { "epoch": 75.71942446043165, "grad_norm": 0.33609738945961, "learning_rate": 3.987373737373737e-06, "loss": 0.0011, "step": 21050 }, { "epoch": 75.80935251798562, "grad_norm": 0.4631134867668152, "learning_rate": 3.986111111111112e-06, "loss": 0.0023, "step": 21075 }, { "epoch": 75.89928057553956, "grad_norm": 0.26408031582832336, "learning_rate": 3.984848484848485e-06, "loss": 0.0019, "step": 21100 }, { "epoch": 75.98920863309353, "grad_norm": 0.3067505657672882, "learning_rate": 3.983585858585859e-06, "loss": 0.0021, "step": 21125 }, { "epoch": 76.07913669064749, "grad_norm": 0.0688316822052002, "learning_rate": 3.982323232323232e-06, "loss": 0.0024, "step": 21150 }, { "epoch": 76.16906474820144, "grad_norm": 1.5255663394927979, "learning_rate": 3.981060606060606e-06, "loss": 0.0012, "step": 21175 }, { "epoch": 76.2589928057554, "grad_norm": 0.368730753660202, "learning_rate": 3.97979797979798e-06, "loss": 0.001, "step": 21200 }, { "epoch": 76.34892086330935, "grad_norm": 0.019969308748841286, "learning_rate": 3.9785353535353535e-06, "loss": 0.0006, "step": 21225 }, { "epoch": 76.43884892086331, "grad_norm": 0.070771723985672, "learning_rate": 3.9772727272727275e-06, "loss": 0.0004, "step": 21250 }, { "epoch": 76.52877697841727, "grad_norm": 0.023271985352039337, "learning_rate": 3.9760101010101015e-06, "loss": 0.0007, "step": 21275 }, { "epoch": 76.61870503597122, "grad_norm": 0.027517560869455338, "learning_rate": 3.9747474747474755e-06, "loss": 0.0004, "step": 21300 }, { "epoch": 76.70863309352518, "grad_norm": 0.009323998354375362, "learning_rate": 3.973484848484849e-06, "loss": 0.0007, "step": 21325 }, { "epoch": 76.79856115107914, "grad_norm": 0.007815494202077389, "learning_rate": 3.972222222222223e-06, "loss": 0.0007, "step": 21350 }, { "epoch": 76.88848920863309, "grad_norm": 0.06828250735998154, "learning_rate": 3.970959595959597e-06, "loss": 0.0004, "step": 21375 }, { "epoch": 76.97841726618705, "grad_norm": 0.4169680178165436, "learning_rate": 3.96969696969697e-06, "loss": 0.0007, "step": 21400 }, { "epoch": 77.06834532374101, "grad_norm": 0.010289140976965427, "learning_rate": 3.968434343434344e-06, "loss": 0.0003, "step": 21425 }, { "epoch": 77.15827338129496, "grad_norm": 0.02134793810546398, "learning_rate": 3.967171717171717e-06, "loss": 0.0003, "step": 21450 }, { "epoch": 77.24820143884892, "grad_norm": 0.005463853012770414, "learning_rate": 3.965909090909091e-06, "loss": 0.0001, "step": 21475 }, { "epoch": 77.33812949640287, "grad_norm": 0.0035135000944137573, "learning_rate": 3.964646464646465e-06, "loss": 0.0001, "step": 21500 }, { "epoch": 77.42805755395683, "grad_norm": 0.01657390221953392, "learning_rate": 3.963383838383839e-06, "loss": 0.0001, "step": 21525 }, { "epoch": 77.5179856115108, "grad_norm": 0.1767745018005371, "learning_rate": 3.962121212121213e-06, "loss": 0.0007, "step": 21550 }, { "epoch": 77.60791366906474, "grad_norm": 0.016838785260915756, "learning_rate": 3.960858585858586e-06, "loss": 0.0001, "step": 21575 }, { "epoch": 77.6978417266187, "grad_norm": 0.0039493367075920105, "learning_rate": 3.95959595959596e-06, "loss": 0.0001, "step": 21600 }, { "epoch": 77.78776978417267, "grad_norm": 0.0031421987805515528, "learning_rate": 3.958333333333333e-06, "loss": 0.0003, "step": 21625 }, { "epoch": 77.87769784172662, "grad_norm": 0.0026466776616871357, "learning_rate": 3.957070707070707e-06, "loss": 0.0003, "step": 21650 }, { "epoch": 77.96762589928058, "grad_norm": 0.009947208687663078, "learning_rate": 3.955808080808081e-06, "loss": 0.0002, "step": 21675 }, { "epoch": 78.05755395683454, "grad_norm": 0.1049116924405098, "learning_rate": 3.954545454545454e-06, "loss": 0.0002, "step": 21700 }, { "epoch": 78.14748201438849, "grad_norm": 0.0023068960290402174, "learning_rate": 3.953282828282828e-06, "loss": 0.0001, "step": 21725 }, { "epoch": 78.23741007194245, "grad_norm": 0.003103764960542321, "learning_rate": 3.952020202020202e-06, "loss": 0.0001, "step": 21750 }, { "epoch": 78.3273381294964, "grad_norm": 0.002706879284232855, "learning_rate": 3.950757575757576e-06, "loss": 0.0001, "step": 21775 }, { "epoch": 78.41726618705036, "grad_norm": 0.004320697858929634, "learning_rate": 3.9494949494949496e-06, "loss": 0.0001, "step": 21800 }, { "epoch": 78.50719424460432, "grad_norm": 0.005596183240413666, "learning_rate": 3.9482323232323236e-06, "loss": 0.0002, "step": 21825 }, { "epoch": 78.59712230215827, "grad_norm": 0.0037838639691472054, "learning_rate": 3.9469696969696976e-06, "loss": 0.0003, "step": 21850 }, { "epoch": 78.68705035971223, "grad_norm": 0.00796448066830635, "learning_rate": 3.945707070707071e-06, "loss": 0.0001, "step": 21875 }, { "epoch": 78.77697841726619, "grad_norm": 0.003022188087925315, "learning_rate": 3.944444444444445e-06, "loss": 0.0001, "step": 21900 }, { "epoch": 78.86690647482014, "grad_norm": 0.0022381923627108335, "learning_rate": 3.943181818181819e-06, "loss": 0.0002, "step": 21925 }, { "epoch": 78.9568345323741, "grad_norm": 0.0027954999823123217, "learning_rate": 3.941919191919193e-06, "loss": 0.0001, "step": 21950 }, { "epoch": 79.04676258992805, "grad_norm": 0.0016978129278868437, "learning_rate": 3.940656565656566e-06, "loss": 0.0001, "step": 21975 }, { "epoch": 79.13669064748201, "grad_norm": 0.0017409235006198287, "learning_rate": 3.93939393939394e-06, "loss": 0.0003, "step": 22000 }, { "epoch": 79.13669064748201, "eval_loss": 0.0888415277004242, "eval_runtime": 1337.7919, "eval_samples_per_second": 1.661, "eval_steps_per_second": 0.104, "eval_wer": 5.598755832037325, "step": 22000 } ], "logging_steps": 25, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 360, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.777235958847242e+21, "train_batch_size": 64, "trial_name": null, "trial_params": null }