whisper-base-common_voice_17_0-id / trainer_state.json
Bagus's picture
End of training
e75ff83 verified
raw
history blame
145 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.458447874814972,
"eval_steps": 1000,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010573059843518714,
"grad_norm": 58.068878173828125,
"learning_rate": 4.4e-07,
"loss": 3.1142,
"step": 25
},
{
"epoch": 0.02114611968703743,
"grad_norm": 37.93722152709961,
"learning_rate": 9.400000000000001e-07,
"loss": 2.5594,
"step": 50
},
{
"epoch": 0.03171917953055614,
"grad_norm": 22.363622665405273,
"learning_rate": 1.44e-06,
"loss": 1.6703,
"step": 75
},
{
"epoch": 0.04229223937407486,
"grad_norm": 15.931524276733398,
"learning_rate": 1.94e-06,
"loss": 1.0597,
"step": 100
},
{
"epoch": 0.05286529921759357,
"grad_norm": 13.918318748474121,
"learning_rate": 2.4400000000000004e-06,
"loss": 0.765,
"step": 125
},
{
"epoch": 0.06343835906111228,
"grad_norm": 14.767135620117188,
"learning_rate": 2.9400000000000002e-06,
"loss": 0.6955,
"step": 150
},
{
"epoch": 0.074011418904631,
"grad_norm": 11.596781730651855,
"learning_rate": 3.44e-06,
"loss": 0.6118,
"step": 175
},
{
"epoch": 0.08458447874814971,
"grad_norm": 10.992063522338867,
"learning_rate": 3.94e-06,
"loss": 0.6096,
"step": 200
},
{
"epoch": 0.09515753859166842,
"grad_norm": 12.467724800109863,
"learning_rate": 4.440000000000001e-06,
"loss": 0.5579,
"step": 225
},
{
"epoch": 0.10573059843518715,
"grad_norm": 11.876873016357422,
"learning_rate": 4.94e-06,
"loss": 0.5435,
"step": 250
},
{
"epoch": 0.11630365827870585,
"grad_norm": 12.889082908630371,
"learning_rate": 5.4400000000000004e-06,
"loss": 0.5693,
"step": 275
},
{
"epoch": 0.12687671812222456,
"grad_norm": 10.841497421264648,
"learning_rate": 5.94e-06,
"loss": 0.5494,
"step": 300
},
{
"epoch": 0.13744977796574329,
"grad_norm": 10.560270309448242,
"learning_rate": 6.440000000000001e-06,
"loss": 0.5065,
"step": 325
},
{
"epoch": 0.148022837809262,
"grad_norm": 13.699323654174805,
"learning_rate": 6.9400000000000005e-06,
"loss": 0.4911,
"step": 350
},
{
"epoch": 0.1585958976527807,
"grad_norm": 9.73985481262207,
"learning_rate": 7.440000000000001e-06,
"loss": 0.5293,
"step": 375
},
{
"epoch": 0.16916895749629943,
"grad_norm": 11.41083812713623,
"learning_rate": 7.94e-06,
"loss": 0.4365,
"step": 400
},
{
"epoch": 0.17974201733981815,
"grad_norm": 10.309052467346191,
"learning_rate": 8.44e-06,
"loss": 0.5207,
"step": 425
},
{
"epoch": 0.19031507718333684,
"grad_norm": 10.460412979125977,
"learning_rate": 8.94e-06,
"loss": 0.4537,
"step": 450
},
{
"epoch": 0.20088813702685557,
"grad_norm": 10.022929191589355,
"learning_rate": 9.440000000000001e-06,
"loss": 0.4395,
"step": 475
},
{
"epoch": 0.2114611968703743,
"grad_norm": 10.307358741760254,
"learning_rate": 9.940000000000001e-06,
"loss": 0.4439,
"step": 500
},
{
"epoch": 0.222034256713893,
"grad_norm": 8.121826171875,
"learning_rate": 9.98871794871795e-06,
"loss": 0.4624,
"step": 525
},
{
"epoch": 0.2326073165574117,
"grad_norm": 9.346823692321777,
"learning_rate": 9.975897435897436e-06,
"loss": 0.4533,
"step": 550
},
{
"epoch": 0.24318037640093043,
"grad_norm": 15.096284866333008,
"learning_rate": 9.963076923076925e-06,
"loss": 0.4372,
"step": 575
},
{
"epoch": 0.2537534362444491,
"grad_norm": 9.635919570922852,
"learning_rate": 9.950256410256412e-06,
"loss": 0.4149,
"step": 600
},
{
"epoch": 0.2643264960879679,
"grad_norm": 12.7789945602417,
"learning_rate": 9.937435897435898e-06,
"loss": 0.4219,
"step": 625
},
{
"epoch": 0.27489955593148657,
"grad_norm": 10.612743377685547,
"learning_rate": 9.924615384615385e-06,
"loss": 0.4103,
"step": 650
},
{
"epoch": 0.28547261577500527,
"grad_norm": 9.644041061401367,
"learning_rate": 9.911794871794874e-06,
"loss": 0.3747,
"step": 675
},
{
"epoch": 0.296045675618524,
"grad_norm": 12.278302192687988,
"learning_rate": 9.89897435897436e-06,
"loss": 0.3818,
"step": 700
},
{
"epoch": 0.3066187354620427,
"grad_norm": 11.676104545593262,
"learning_rate": 9.886153846153846e-06,
"loss": 0.3964,
"step": 725
},
{
"epoch": 0.3171917953055614,
"grad_norm": 9.235854148864746,
"learning_rate": 9.873333333333334e-06,
"loss": 0.3954,
"step": 750
},
{
"epoch": 0.32776485514908016,
"grad_norm": 10.16644287109375,
"learning_rate": 9.860512820512821e-06,
"loss": 0.4021,
"step": 775
},
{
"epoch": 0.33833791499259885,
"grad_norm": 11.959391593933105,
"learning_rate": 9.847692307692308e-06,
"loss": 0.4094,
"step": 800
},
{
"epoch": 0.34891097483611755,
"grad_norm": 9.894736289978027,
"learning_rate": 9.834871794871795e-06,
"loss": 0.386,
"step": 825
},
{
"epoch": 0.3594840346796363,
"grad_norm": 10.717971801757812,
"learning_rate": 9.822051282051283e-06,
"loss": 0.3786,
"step": 850
},
{
"epoch": 0.370057094523155,
"grad_norm": 9.43459701538086,
"learning_rate": 9.80923076923077e-06,
"loss": 0.3558,
"step": 875
},
{
"epoch": 0.3806301543666737,
"grad_norm": 8.999226570129395,
"learning_rate": 9.796410256410257e-06,
"loss": 0.4059,
"step": 900
},
{
"epoch": 0.39120321421019244,
"grad_norm": 9.803775787353516,
"learning_rate": 9.783589743589744e-06,
"loss": 0.3665,
"step": 925
},
{
"epoch": 0.40177627405371114,
"grad_norm": 6.630070209503174,
"learning_rate": 9.770769230769232e-06,
"loss": 0.3913,
"step": 950
},
{
"epoch": 0.4123493338972299,
"grad_norm": 8.892091751098633,
"learning_rate": 9.757948717948719e-06,
"loss": 0.4345,
"step": 975
},
{
"epoch": 0.4229223937407486,
"grad_norm": 11.313458442687988,
"learning_rate": 9.745128205128206e-06,
"loss": 0.3523,
"step": 1000
},
{
"epoch": 0.4229223937407486,
"eval_loss": 0.3129430413246155,
"eval_runtime": 466.842,
"eval_samples_per_second": 7.799,
"eval_steps_per_second": 0.977,
"eval_wer": 0.23648429161446008,
"step": 1000
},
{
"epoch": 0.4334954535842673,
"grad_norm": 10.663793563842773,
"learning_rate": 9.732307692307693e-06,
"loss": 0.3711,
"step": 1025
},
{
"epoch": 0.444068513427786,
"grad_norm": 8.810164451599121,
"learning_rate": 9.71948717948718e-06,
"loss": 0.355,
"step": 1050
},
{
"epoch": 0.4546415732713047,
"grad_norm": 17.50876235961914,
"learning_rate": 9.706666666666668e-06,
"loss": 0.3652,
"step": 1075
},
{
"epoch": 0.4652146331148234,
"grad_norm": 6.720116138458252,
"learning_rate": 9.693846153846155e-06,
"loss": 0.3415,
"step": 1100
},
{
"epoch": 0.47578769295834217,
"grad_norm": 6.864238739013672,
"learning_rate": 9.681025641025642e-06,
"loss": 0.3203,
"step": 1125
},
{
"epoch": 0.48636075280186086,
"grad_norm": 10.059639930725098,
"learning_rate": 9.668205128205129e-06,
"loss": 0.4017,
"step": 1150
},
{
"epoch": 0.49693381264537956,
"grad_norm": 8.964330673217773,
"learning_rate": 9.655384615384617e-06,
"loss": 0.3089,
"step": 1175
},
{
"epoch": 0.5075068724888983,
"grad_norm": 11.160856246948242,
"learning_rate": 9.642564102564104e-06,
"loss": 0.357,
"step": 1200
},
{
"epoch": 0.518079932332417,
"grad_norm": 8.594799041748047,
"learning_rate": 9.62974358974359e-06,
"loss": 0.3391,
"step": 1225
},
{
"epoch": 0.5286529921759358,
"grad_norm": 7.494054794311523,
"learning_rate": 9.616923076923077e-06,
"loss": 0.3315,
"step": 1250
},
{
"epoch": 0.5392260520194544,
"grad_norm": 7.793934345245361,
"learning_rate": 9.604102564102566e-06,
"loss": 0.3226,
"step": 1275
},
{
"epoch": 0.5497991118629731,
"grad_norm": 7.225709438323975,
"learning_rate": 9.591282051282053e-06,
"loss": 0.3317,
"step": 1300
},
{
"epoch": 0.5603721717064919,
"grad_norm": 6.296034812927246,
"learning_rate": 9.578461538461538e-06,
"loss": 0.3302,
"step": 1325
},
{
"epoch": 0.5709452315500105,
"grad_norm": 11.198543548583984,
"learning_rate": 9.565641025641026e-06,
"loss": 0.3038,
"step": 1350
},
{
"epoch": 0.5815182913935293,
"grad_norm": 6.4023542404174805,
"learning_rate": 9.552820512820513e-06,
"loss": 0.3511,
"step": 1375
},
{
"epoch": 0.592091351237048,
"grad_norm": 8.326896667480469,
"learning_rate": 9.54e-06,
"loss": 0.3257,
"step": 1400
},
{
"epoch": 0.6026644110805667,
"grad_norm": 7.079158306121826,
"learning_rate": 9.527179487179487e-06,
"loss": 0.335,
"step": 1425
},
{
"epoch": 0.6132374709240854,
"grad_norm": 8.422740936279297,
"learning_rate": 9.514358974358975e-06,
"loss": 0.3212,
"step": 1450
},
{
"epoch": 0.6238105307676042,
"grad_norm": 8.642356872558594,
"learning_rate": 9.501538461538462e-06,
"loss": 0.314,
"step": 1475
},
{
"epoch": 0.6343835906111228,
"grad_norm": 8.129634857177734,
"learning_rate": 9.488717948717949e-06,
"loss": 0.2889,
"step": 1500
},
{
"epoch": 0.6449566504546416,
"grad_norm": 7.115053653717041,
"learning_rate": 9.475897435897436e-06,
"loss": 0.3073,
"step": 1525
},
{
"epoch": 0.6555297102981603,
"grad_norm": 8.397727966308594,
"learning_rate": 9.463076923076924e-06,
"loss": 0.3212,
"step": 1550
},
{
"epoch": 0.666102770141679,
"grad_norm": 9.354975700378418,
"learning_rate": 9.450256410256411e-06,
"loss": 0.3285,
"step": 1575
},
{
"epoch": 0.6766758299851977,
"grad_norm": 7.693446159362793,
"learning_rate": 9.437435897435898e-06,
"loss": 0.3382,
"step": 1600
},
{
"epoch": 0.6872488898287165,
"grad_norm": 10.583995819091797,
"learning_rate": 9.424615384615385e-06,
"loss": 0.3198,
"step": 1625
},
{
"epoch": 0.6978219496722351,
"grad_norm": 9.70487117767334,
"learning_rate": 9.411794871794872e-06,
"loss": 0.2968,
"step": 1650
},
{
"epoch": 0.7083950095157538,
"grad_norm": 8.256582260131836,
"learning_rate": 9.39897435897436e-06,
"loss": 0.2994,
"step": 1675
},
{
"epoch": 0.7189680693592726,
"grad_norm": 8.840137481689453,
"learning_rate": 9.386153846153847e-06,
"loss": 0.347,
"step": 1700
},
{
"epoch": 0.7295411292027912,
"grad_norm": 6.581663608551025,
"learning_rate": 9.373333333333334e-06,
"loss": 0.3436,
"step": 1725
},
{
"epoch": 0.74011418904631,
"grad_norm": 10.300860404968262,
"learning_rate": 9.36051282051282e-06,
"loss": 0.2989,
"step": 1750
},
{
"epoch": 0.7506872488898287,
"grad_norm": 8.777045249938965,
"learning_rate": 9.34769230769231e-06,
"loss": 0.3212,
"step": 1775
},
{
"epoch": 0.7612603087333474,
"grad_norm": 8.94032096862793,
"learning_rate": 9.334871794871796e-06,
"loss": 0.2951,
"step": 1800
},
{
"epoch": 0.7718333685768661,
"grad_norm": 6.139760971069336,
"learning_rate": 9.322051282051283e-06,
"loss": 0.2962,
"step": 1825
},
{
"epoch": 0.7824064284203849,
"grad_norm": 6.843733310699463,
"learning_rate": 9.30923076923077e-06,
"loss": 0.3324,
"step": 1850
},
{
"epoch": 0.7929794882639035,
"grad_norm": 8.275738716125488,
"learning_rate": 9.296410256410258e-06,
"loss": 0.289,
"step": 1875
},
{
"epoch": 0.8035525481074223,
"grad_norm": 7.651088237762451,
"learning_rate": 9.283589743589745e-06,
"loss": 0.3563,
"step": 1900
},
{
"epoch": 0.814125607950941,
"grad_norm": 9.584896087646484,
"learning_rate": 9.270769230769232e-06,
"loss": 0.3009,
"step": 1925
},
{
"epoch": 0.8246986677944598,
"grad_norm": 8.920114517211914,
"learning_rate": 9.257948717948719e-06,
"loss": 0.3229,
"step": 1950
},
{
"epoch": 0.8352717276379784,
"grad_norm": 8.181923866271973,
"learning_rate": 9.245128205128206e-06,
"loss": 0.2707,
"step": 1975
},
{
"epoch": 0.8458447874814972,
"grad_norm": 6.475239276885986,
"learning_rate": 9.232307692307692e-06,
"loss": 0.3002,
"step": 2000
},
{
"epoch": 0.8458447874814972,
"eval_loss": 0.2391187697649002,
"eval_runtime": 464.6331,
"eval_samples_per_second": 7.836,
"eval_steps_per_second": 0.981,
"eval_wer": 0.19638962364842916,
"step": 2000
},
{
"epoch": 0.8564178473250159,
"grad_norm": 6.2346930503845215,
"learning_rate": 9.21948717948718e-06,
"loss": 0.3355,
"step": 2025
},
{
"epoch": 0.8669909071685346,
"grad_norm": 8.41283130645752,
"learning_rate": 9.207179487179488e-06,
"loss": 0.2924,
"step": 2050
},
{
"epoch": 0.8775639670120533,
"grad_norm": 10.027973175048828,
"learning_rate": 9.194358974358975e-06,
"loss": 0.2711,
"step": 2075
},
{
"epoch": 0.888137026855572,
"grad_norm": 6.886898040771484,
"learning_rate": 9.181538461538464e-06,
"loss": 0.2593,
"step": 2100
},
{
"epoch": 0.8987100866990907,
"grad_norm": 6.60000467300415,
"learning_rate": 9.168717948717949e-06,
"loss": 0.2818,
"step": 2125
},
{
"epoch": 0.9092831465426094,
"grad_norm": 8.06698989868164,
"learning_rate": 9.155897435897436e-06,
"loss": 0.2812,
"step": 2150
},
{
"epoch": 0.9198562063861282,
"grad_norm": 11.074329376220703,
"learning_rate": 9.143076923076924e-06,
"loss": 0.2717,
"step": 2175
},
{
"epoch": 0.9304292662296468,
"grad_norm": 7.394618511199951,
"learning_rate": 9.130256410256411e-06,
"loss": 0.2752,
"step": 2200
},
{
"epoch": 0.9410023260731656,
"grad_norm": 9.914189338684082,
"learning_rate": 9.117435897435898e-06,
"loss": 0.2842,
"step": 2225
},
{
"epoch": 0.9515753859166843,
"grad_norm": 13.050373077392578,
"learning_rate": 9.104615384615385e-06,
"loss": 0.2681,
"step": 2250
},
{
"epoch": 0.962148445760203,
"grad_norm": 9.03541088104248,
"learning_rate": 9.091794871794873e-06,
"loss": 0.2969,
"step": 2275
},
{
"epoch": 0.9727215056037217,
"grad_norm": 9.255524635314941,
"learning_rate": 9.07897435897436e-06,
"loss": 0.2558,
"step": 2300
},
{
"epoch": 0.9832945654472405,
"grad_norm": 9.215723037719727,
"learning_rate": 9.066153846153847e-06,
"loss": 0.2648,
"step": 2325
},
{
"epoch": 0.9938676252907591,
"grad_norm": 10.28739070892334,
"learning_rate": 9.053333333333334e-06,
"loss": 0.2809,
"step": 2350
},
{
"epoch": 1.0044406851342778,
"grad_norm": 5.855769157409668,
"learning_rate": 9.04051282051282e-06,
"loss": 0.2384,
"step": 2375
},
{
"epoch": 1.0150137449777965,
"grad_norm": 5.34947395324707,
"learning_rate": 9.027692307692309e-06,
"loss": 0.2049,
"step": 2400
},
{
"epoch": 1.0255868048213153,
"grad_norm": 6.5480170249938965,
"learning_rate": 9.014871794871796e-06,
"loss": 0.1907,
"step": 2425
},
{
"epoch": 1.036159864664834,
"grad_norm": 8.17578125,
"learning_rate": 9.002051282051283e-06,
"loss": 0.1948,
"step": 2450
},
{
"epoch": 1.0467329245083528,
"grad_norm": 6.4552083015441895,
"learning_rate": 8.98923076923077e-06,
"loss": 0.1703,
"step": 2475
},
{
"epoch": 1.0573059843518715,
"grad_norm": 6.099579811096191,
"learning_rate": 8.976410256410258e-06,
"loss": 0.1765,
"step": 2500
},
{
"epoch": 1.06787904419539,
"grad_norm": 7.8965301513671875,
"learning_rate": 8.963589743589745e-06,
"loss": 0.1849,
"step": 2525
},
{
"epoch": 1.0784521040389088,
"grad_norm": 6.395140171051025,
"learning_rate": 8.950769230769232e-06,
"loss": 0.2055,
"step": 2550
},
{
"epoch": 1.0890251638824275,
"grad_norm": 5.077881813049316,
"learning_rate": 8.937948717948718e-06,
"loss": 0.1857,
"step": 2575
},
{
"epoch": 1.0995982237259463,
"grad_norm": 6.063033103942871,
"learning_rate": 8.925128205128207e-06,
"loss": 0.1629,
"step": 2600
},
{
"epoch": 1.110171283569465,
"grad_norm": 5.514612197875977,
"learning_rate": 8.912307692307694e-06,
"loss": 0.1735,
"step": 2625
},
{
"epoch": 1.1207443434129838,
"grad_norm": 6.513278961181641,
"learning_rate": 8.89948717948718e-06,
"loss": 0.174,
"step": 2650
},
{
"epoch": 1.1313174032565025,
"grad_norm": 5.930003643035889,
"learning_rate": 8.886666666666667e-06,
"loss": 0.183,
"step": 2675
},
{
"epoch": 1.141890463100021,
"grad_norm": 6.062052249908447,
"learning_rate": 8.873846153846156e-06,
"loss": 0.2031,
"step": 2700
},
{
"epoch": 1.1524635229435398,
"grad_norm": 6.619091987609863,
"learning_rate": 8.861025641025641e-06,
"loss": 0.1669,
"step": 2725
},
{
"epoch": 1.1630365827870586,
"grad_norm": 6.383360385894775,
"learning_rate": 8.848205128205128e-06,
"loss": 0.1698,
"step": 2750
},
{
"epoch": 1.1736096426305773,
"grad_norm": 5.248838901519775,
"learning_rate": 8.835384615384616e-06,
"loss": 0.1783,
"step": 2775
},
{
"epoch": 1.184182702474096,
"grad_norm": 4.1173319816589355,
"learning_rate": 8.822564102564103e-06,
"loss": 0.1689,
"step": 2800
},
{
"epoch": 1.1947557623176146,
"grad_norm": 4.528750419616699,
"learning_rate": 8.80974358974359e-06,
"loss": 0.1714,
"step": 2825
},
{
"epoch": 1.2053288221611334,
"grad_norm": 7.49066686630249,
"learning_rate": 8.796923076923077e-06,
"loss": 0.1567,
"step": 2850
},
{
"epoch": 1.215901882004652,
"grad_norm": 6.05342960357666,
"learning_rate": 8.784102564102565e-06,
"loss": 0.1675,
"step": 2875
},
{
"epoch": 1.2264749418481709,
"grad_norm": 6.280456066131592,
"learning_rate": 8.771282051282052e-06,
"loss": 0.1885,
"step": 2900
},
{
"epoch": 1.2370480016916896,
"grad_norm": 6.7942657470703125,
"learning_rate": 8.758461538461539e-06,
"loss": 0.1725,
"step": 2925
},
{
"epoch": 1.2476210615352084,
"grad_norm": 5.41685152053833,
"learning_rate": 8.745641025641026e-06,
"loss": 0.178,
"step": 2950
},
{
"epoch": 1.258194121378727,
"grad_norm": 7.254810333251953,
"learning_rate": 8.732820512820513e-06,
"loss": 0.1788,
"step": 2975
},
{
"epoch": 1.2687671812222456,
"grad_norm": 5.75822639465332,
"learning_rate": 8.720000000000001e-06,
"loss": 0.1718,
"step": 3000
},
{
"epoch": 1.2687671812222456,
"eval_loss": 0.20488913357257843,
"eval_runtime": 470.0703,
"eval_samples_per_second": 7.746,
"eval_steps_per_second": 0.97,
"eval_wer": 0.16594737574829457,
"step": 3000
},
{
"epoch": 1.2793402410657644,
"grad_norm": 4.381278038024902,
"learning_rate": 8.707179487179488e-06,
"loss": 0.1656,
"step": 3025
},
{
"epoch": 1.2899133009092831,
"grad_norm": 6.3774919509887695,
"learning_rate": 8.694358974358975e-06,
"loss": 0.1987,
"step": 3050
},
{
"epoch": 1.3004863607528019,
"grad_norm": 5.695129871368408,
"learning_rate": 8.681538461538462e-06,
"loss": 0.1602,
"step": 3075
},
{
"epoch": 1.3110594205963206,
"grad_norm": 5.8820013999938965,
"learning_rate": 8.66871794871795e-06,
"loss": 0.1788,
"step": 3100
},
{
"epoch": 1.3216324804398392,
"grad_norm": 6.164638519287109,
"learning_rate": 8.655897435897437e-06,
"loss": 0.2075,
"step": 3125
},
{
"epoch": 1.3322055402833581,
"grad_norm": 6.681603908538818,
"learning_rate": 8.643076923076924e-06,
"loss": 0.1867,
"step": 3150
},
{
"epoch": 1.3427786001268767,
"grad_norm": 4.936852931976318,
"learning_rate": 8.63025641025641e-06,
"loss": 0.1917,
"step": 3175
},
{
"epoch": 1.3533516599703954,
"grad_norm": 5.827350616455078,
"learning_rate": 8.6174358974359e-06,
"loss": 0.1898,
"step": 3200
},
{
"epoch": 1.3639247198139142,
"grad_norm": 6.6198272705078125,
"learning_rate": 8.604615384615386e-06,
"loss": 0.1659,
"step": 3225
},
{
"epoch": 1.374497779657433,
"grad_norm": 8.098386764526367,
"learning_rate": 8.591794871794873e-06,
"loss": 0.1793,
"step": 3250
},
{
"epoch": 1.3850708395009517,
"grad_norm": 7.836790561676025,
"learning_rate": 8.57897435897436e-06,
"loss": 0.1551,
"step": 3275
},
{
"epoch": 1.3956438993444702,
"grad_norm": 4.521047115325928,
"learning_rate": 8.566153846153848e-06,
"loss": 0.156,
"step": 3300
},
{
"epoch": 1.406216959187989,
"grad_norm": 6.938994407653809,
"learning_rate": 8.553333333333333e-06,
"loss": 0.1722,
"step": 3325
},
{
"epoch": 1.4167900190315077,
"grad_norm": 7.659555435180664,
"learning_rate": 8.54051282051282e-06,
"loss": 0.1781,
"step": 3350
},
{
"epoch": 1.4273630788750264,
"grad_norm": 5.197014808654785,
"learning_rate": 8.527692307692309e-06,
"loss": 0.1606,
"step": 3375
},
{
"epoch": 1.4379361387185452,
"grad_norm": 4.832046031951904,
"learning_rate": 8.514871794871795e-06,
"loss": 0.1579,
"step": 3400
},
{
"epoch": 1.4485091985620637,
"grad_norm": 7.802398681640625,
"learning_rate": 8.502051282051282e-06,
"loss": 0.1922,
"step": 3425
},
{
"epoch": 1.4590822584055827,
"grad_norm": 6.803133487701416,
"learning_rate": 8.489230769230769e-06,
"loss": 0.2039,
"step": 3450
},
{
"epoch": 1.4696553182491012,
"grad_norm": 6.156517505645752,
"learning_rate": 8.476410256410258e-06,
"loss": 0.1723,
"step": 3475
},
{
"epoch": 1.48022837809262,
"grad_norm": 6.290517330169678,
"learning_rate": 8.463589743589744e-06,
"loss": 0.1739,
"step": 3500
},
{
"epoch": 1.4908014379361387,
"grad_norm": 7.008253574371338,
"learning_rate": 8.450769230769231e-06,
"loss": 0.1465,
"step": 3525
},
{
"epoch": 1.5013744977796575,
"grad_norm": 5.356746673583984,
"learning_rate": 8.437948717948718e-06,
"loss": 0.185,
"step": 3550
},
{
"epoch": 1.5119475576231762,
"grad_norm": 7.195596694946289,
"learning_rate": 8.425128205128205e-06,
"loss": 0.1968,
"step": 3575
},
{
"epoch": 1.5225206174666948,
"grad_norm": 7.071013927459717,
"learning_rate": 8.412307692307693e-06,
"loss": 0.1722,
"step": 3600
},
{
"epoch": 1.5330936773102137,
"grad_norm": 6.704313278198242,
"learning_rate": 8.39948717948718e-06,
"loss": 0.1763,
"step": 3625
},
{
"epoch": 1.5436667371537323,
"grad_norm": 3.831084966659546,
"learning_rate": 8.386666666666667e-06,
"loss": 0.1795,
"step": 3650
},
{
"epoch": 1.554239796997251,
"grad_norm": 5.931239604949951,
"learning_rate": 8.373846153846154e-06,
"loss": 0.1717,
"step": 3675
},
{
"epoch": 1.5648128568407698,
"grad_norm": 8.750226974487305,
"learning_rate": 8.361025641025642e-06,
"loss": 0.1679,
"step": 3700
},
{
"epoch": 1.5753859166842883,
"grad_norm": 5.434117317199707,
"learning_rate": 8.34820512820513e-06,
"loss": 0.1343,
"step": 3725
},
{
"epoch": 1.5859589765278073,
"grad_norm": 6.517702102661133,
"learning_rate": 8.335384615384616e-06,
"loss": 0.1641,
"step": 3750
},
{
"epoch": 1.5965320363713258,
"grad_norm": 7.22881555557251,
"learning_rate": 8.322564102564103e-06,
"loss": 0.152,
"step": 3775
},
{
"epoch": 1.6071050962148445,
"grad_norm": 9.106505393981934,
"learning_rate": 8.309743589743591e-06,
"loss": 0.168,
"step": 3800
},
{
"epoch": 1.6176781560583633,
"grad_norm": 6.742056369781494,
"learning_rate": 8.296923076923078e-06,
"loss": 0.1624,
"step": 3825
},
{
"epoch": 1.628251215901882,
"grad_norm": 5.521613597869873,
"learning_rate": 8.284102564102565e-06,
"loss": 0.1476,
"step": 3850
},
{
"epoch": 1.6388242757454008,
"grad_norm": 5.299131870269775,
"learning_rate": 8.271282051282052e-06,
"loss": 0.1576,
"step": 3875
},
{
"epoch": 1.6493973355889193,
"grad_norm": 3.890312671661377,
"learning_rate": 8.25846153846154e-06,
"loss": 0.162,
"step": 3900
},
{
"epoch": 1.6599703954324383,
"grad_norm": 5.404117584228516,
"learning_rate": 8.245641025641027e-06,
"loss": 0.1622,
"step": 3925
},
{
"epoch": 1.6705434552759568,
"grad_norm": 8.395030975341797,
"learning_rate": 8.232820512820512e-06,
"loss": 0.1473,
"step": 3950
},
{
"epoch": 1.6811165151194756,
"grad_norm": 7.276181221008301,
"learning_rate": 8.220000000000001e-06,
"loss": 0.1538,
"step": 3975
},
{
"epoch": 1.6916895749629943,
"grad_norm": 8.637175559997559,
"learning_rate": 8.207179487179488e-06,
"loss": 0.1537,
"step": 4000
},
{
"epoch": 1.6916895749629943,
"eval_loss": 0.1817464828491211,
"eval_runtime": 469.7808,
"eval_samples_per_second": 7.75,
"eval_steps_per_second": 0.971,
"eval_wer": 0.15156155738085295,
"step": 4000
},
{
"epoch": 1.7022626348065129,
"grad_norm": 2.9336845874786377,
"learning_rate": 8.194358974358975e-06,
"loss": 0.1789,
"step": 4025
},
{
"epoch": 1.7128356946500318,
"grad_norm": 4.7298688888549805,
"learning_rate": 8.181538461538461e-06,
"loss": 0.1631,
"step": 4050
},
{
"epoch": 1.7234087544935504,
"grad_norm": 6.827828407287598,
"learning_rate": 8.16871794871795e-06,
"loss": 0.1599,
"step": 4075
},
{
"epoch": 1.733981814337069,
"grad_norm": 5.460879325866699,
"learning_rate": 8.155897435897437e-06,
"loss": 0.1836,
"step": 4100
},
{
"epoch": 1.7445548741805879,
"grad_norm": 6.356947898864746,
"learning_rate": 8.143076923076924e-06,
"loss": 0.1533,
"step": 4125
},
{
"epoch": 1.7551279340241066,
"grad_norm": 7.573319435119629,
"learning_rate": 8.13025641025641e-06,
"loss": 0.1437,
"step": 4150
},
{
"epoch": 1.7657009938676254,
"grad_norm": 4.901601791381836,
"learning_rate": 8.117435897435897e-06,
"loss": 0.1371,
"step": 4175
},
{
"epoch": 1.7762740537111439,
"grad_norm": 6.051869869232178,
"learning_rate": 8.104615384615386e-06,
"loss": 0.1883,
"step": 4200
},
{
"epoch": 1.7868471135546629,
"grad_norm": 7.222878932952881,
"learning_rate": 8.091794871794873e-06,
"loss": 0.1614,
"step": 4225
},
{
"epoch": 1.7974201733981814,
"grad_norm": 6.554632663726807,
"learning_rate": 8.07897435897436e-06,
"loss": 0.1629,
"step": 4250
},
{
"epoch": 1.8079932332417001,
"grad_norm": 6.802252292633057,
"learning_rate": 8.066153846153846e-06,
"loss": 0.1526,
"step": 4275
},
{
"epoch": 1.8185662930852189,
"grad_norm": 6.444076061248779,
"learning_rate": 8.053333333333335e-06,
"loss": 0.1549,
"step": 4300
},
{
"epoch": 1.8291393529287374,
"grad_norm": 6.49000358581543,
"learning_rate": 8.040512820512822e-06,
"loss": 0.1469,
"step": 4325
},
{
"epoch": 1.8397124127722564,
"grad_norm": 5.989566802978516,
"learning_rate": 8.027692307692308e-06,
"loss": 0.1587,
"step": 4350
},
{
"epoch": 1.850285472615775,
"grad_norm": 4.621600151062012,
"learning_rate": 8.014871794871795e-06,
"loss": 0.1394,
"step": 4375
},
{
"epoch": 1.8608585324592937,
"grad_norm": 7.434966087341309,
"learning_rate": 8.002051282051284e-06,
"loss": 0.1484,
"step": 4400
},
{
"epoch": 1.8714315923028124,
"grad_norm": 4.955178737640381,
"learning_rate": 7.98923076923077e-06,
"loss": 0.1304,
"step": 4425
},
{
"epoch": 1.8820046521463312,
"grad_norm": 6.121462821960449,
"learning_rate": 7.976410256410257e-06,
"loss": 0.1676,
"step": 4450
},
{
"epoch": 1.89257771198985,
"grad_norm": 5.908482551574707,
"learning_rate": 7.963589743589744e-06,
"loss": 0.1463,
"step": 4475
},
{
"epoch": 1.9031507718333684,
"grad_norm": 4.962184906005859,
"learning_rate": 7.950769230769233e-06,
"loss": 0.1494,
"step": 4500
},
{
"epoch": 1.9137238316768874,
"grad_norm": 6.131477355957031,
"learning_rate": 7.93794871794872e-06,
"loss": 0.146,
"step": 4525
},
{
"epoch": 1.924296891520406,
"grad_norm": 7.316920280456543,
"learning_rate": 7.925128205128205e-06,
"loss": 0.1451,
"step": 4550
},
{
"epoch": 1.9348699513639247,
"grad_norm": 6.914496898651123,
"learning_rate": 7.912307692307693e-06,
"loss": 0.1263,
"step": 4575
},
{
"epoch": 1.9454430112074435,
"grad_norm": 6.739365100860596,
"learning_rate": 7.89948717948718e-06,
"loss": 0.1467,
"step": 4600
},
{
"epoch": 1.9560160710509622,
"grad_norm": 7.861861705780029,
"learning_rate": 7.886666666666667e-06,
"loss": 0.1582,
"step": 4625
},
{
"epoch": 1.966589130894481,
"grad_norm": 8.007691383361816,
"learning_rate": 7.873846153846154e-06,
"loss": 0.1569,
"step": 4650
},
{
"epoch": 1.9771621907379995,
"grad_norm": 9.34388542175293,
"learning_rate": 7.861025641025642e-06,
"loss": 0.1419,
"step": 4675
},
{
"epoch": 1.9877352505815185,
"grad_norm": 7.265613555908203,
"learning_rate": 7.848205128205129e-06,
"loss": 0.1436,
"step": 4700
},
{
"epoch": 1.998308310425037,
"grad_norm": 5.50709867477417,
"learning_rate": 7.835384615384616e-06,
"loss": 0.1467,
"step": 4725
},
{
"epoch": 2.0088813702685555,
"grad_norm": 4.5442962646484375,
"learning_rate": 7.822564102564103e-06,
"loss": 0.0813,
"step": 4750
},
{
"epoch": 2.0194544301120745,
"grad_norm": 6.023507595062256,
"learning_rate": 7.80974358974359e-06,
"loss": 0.0902,
"step": 4775
},
{
"epoch": 2.030027489955593,
"grad_norm": 2.5194263458251953,
"learning_rate": 7.796923076923078e-06,
"loss": 0.092,
"step": 4800
},
{
"epoch": 2.040600549799112,
"grad_norm": 5.59611701965332,
"learning_rate": 7.784102564102565e-06,
"loss": 0.0888,
"step": 4825
},
{
"epoch": 2.0511736096426305,
"grad_norm": 5.128935813903809,
"learning_rate": 7.771282051282052e-06,
"loss": 0.0886,
"step": 4850
},
{
"epoch": 2.0617466694861495,
"grad_norm": 4.068778038024902,
"learning_rate": 7.758461538461538e-06,
"loss": 0.0754,
"step": 4875
},
{
"epoch": 2.072319729329668,
"grad_norm": 3.985748052597046,
"learning_rate": 7.745641025641027e-06,
"loss": 0.0882,
"step": 4900
},
{
"epoch": 2.0828927891731865,
"grad_norm": 3.7527148723602295,
"learning_rate": 7.732820512820514e-06,
"loss": 0.0752,
"step": 4925
},
{
"epoch": 2.0934658490167055,
"grad_norm": 6.429442405700684,
"learning_rate": 7.72e-06,
"loss": 0.0999,
"step": 4950
},
{
"epoch": 2.104038908860224,
"grad_norm": 3.7136788368225098,
"learning_rate": 7.707179487179487e-06,
"loss": 0.0745,
"step": 4975
},
{
"epoch": 2.114611968703743,
"grad_norm": 3.5466103553771973,
"learning_rate": 7.694358974358976e-06,
"loss": 0.0807,
"step": 5000
},
{
"epoch": 2.114611968703743,
"eval_loss": 0.16425499320030212,
"eval_runtime": 474.1617,
"eval_samples_per_second": 7.679,
"eval_steps_per_second": 0.962,
"eval_wer": 0.14989094621560164,
"step": 5000
},
{
"epoch": 2.1251850285472615,
"grad_norm": 4.999958515167236,
"learning_rate": 7.681538461538463e-06,
"loss": 0.0804,
"step": 5025
},
{
"epoch": 2.13575808839078,
"grad_norm": 5.525317668914795,
"learning_rate": 7.66871794871795e-06,
"loss": 0.1002,
"step": 5050
},
{
"epoch": 2.146331148234299,
"grad_norm": 4.599583148956299,
"learning_rate": 7.655897435897436e-06,
"loss": 0.0868,
"step": 5075
},
{
"epoch": 2.1569042080778176,
"grad_norm": 5.12149715423584,
"learning_rate": 7.643076923076925e-06,
"loss": 0.0937,
"step": 5100
},
{
"epoch": 2.1674772679213365,
"grad_norm": 3.3281843662261963,
"learning_rate": 7.630256410256412e-06,
"loss": 0.0748,
"step": 5125
},
{
"epoch": 2.178050327764855,
"grad_norm": 5.142365455627441,
"learning_rate": 7.617435897435898e-06,
"loss": 0.0931,
"step": 5150
},
{
"epoch": 2.188623387608374,
"grad_norm": 3.4713194370269775,
"learning_rate": 7.604615384615385e-06,
"loss": 0.0747,
"step": 5175
},
{
"epoch": 2.1991964474518926,
"grad_norm": 3.9539084434509277,
"learning_rate": 7.591794871794872e-06,
"loss": 0.0917,
"step": 5200
},
{
"epoch": 2.209769507295411,
"grad_norm": 6.873681545257568,
"learning_rate": 7.578974358974359e-06,
"loss": 0.0842,
"step": 5225
},
{
"epoch": 2.22034256713893,
"grad_norm": 3.6090524196624756,
"learning_rate": 7.566153846153847e-06,
"loss": 0.0779,
"step": 5250
},
{
"epoch": 2.2309156269824486,
"grad_norm": 4.458488464355469,
"learning_rate": 7.553333333333334e-06,
"loss": 0.0854,
"step": 5275
},
{
"epoch": 2.2414886868259676,
"grad_norm": 4.1330718994140625,
"learning_rate": 7.540512820512821e-06,
"loss": 0.0674,
"step": 5300
},
{
"epoch": 2.252061746669486,
"grad_norm": 3.256821393966675,
"learning_rate": 7.527692307692308e-06,
"loss": 0.0787,
"step": 5325
},
{
"epoch": 2.262634806513005,
"grad_norm": 3.738051176071167,
"learning_rate": 7.514871794871795e-06,
"loss": 0.0844,
"step": 5350
},
{
"epoch": 2.2732078663565236,
"grad_norm": 6.081026554107666,
"learning_rate": 7.5020512820512826e-06,
"loss": 0.0794,
"step": 5375
},
{
"epoch": 2.283780926200042,
"grad_norm": 4.990882396697998,
"learning_rate": 7.489230769230769e-06,
"loss": 0.0806,
"step": 5400
},
{
"epoch": 2.294353986043561,
"grad_norm": 3.432616710662842,
"learning_rate": 7.476410256410257e-06,
"loss": 0.0961,
"step": 5425
},
{
"epoch": 2.3049270458870796,
"grad_norm": 9.63456916809082,
"learning_rate": 7.463589743589744e-06,
"loss": 0.0801,
"step": 5450
},
{
"epoch": 2.3155001057305986,
"grad_norm": 4.464268207550049,
"learning_rate": 7.4507692307692316e-06,
"loss": 0.0796,
"step": 5475
},
{
"epoch": 2.326073165574117,
"grad_norm": 3.0448217391967773,
"learning_rate": 7.437948717948718e-06,
"loss": 0.0718,
"step": 5500
},
{
"epoch": 2.3366462254176357,
"grad_norm": 6.034780979156494,
"learning_rate": 7.425128205128206e-06,
"loss": 0.1072,
"step": 5525
},
{
"epoch": 2.3472192852611546,
"grad_norm": 5.285013675689697,
"learning_rate": 7.412307692307693e-06,
"loss": 0.094,
"step": 5550
},
{
"epoch": 2.357792345104673,
"grad_norm": 4.443994998931885,
"learning_rate": 7.3994871794871806e-06,
"loss": 0.0897,
"step": 5575
},
{
"epoch": 2.368365404948192,
"grad_norm": 6.323433876037598,
"learning_rate": 7.386666666666667e-06,
"loss": 0.0774,
"step": 5600
},
{
"epoch": 2.3789384647917107,
"grad_norm": 7.71209192276001,
"learning_rate": 7.373846153846155e-06,
"loss": 0.097,
"step": 5625
},
{
"epoch": 2.389511524635229,
"grad_norm": 2.249687910079956,
"learning_rate": 7.361025641025642e-06,
"loss": 0.1082,
"step": 5650
},
{
"epoch": 2.400084584478748,
"grad_norm": 4.210668087005615,
"learning_rate": 7.3482051282051295e-06,
"loss": 0.0649,
"step": 5675
},
{
"epoch": 2.4106576443222667,
"grad_norm": 5.31463098526001,
"learning_rate": 7.335384615384616e-06,
"loss": 0.0924,
"step": 5700
},
{
"epoch": 2.4212307041657857,
"grad_norm": 5.068974494934082,
"learning_rate": 7.322564102564104e-06,
"loss": 0.0783,
"step": 5725
},
{
"epoch": 2.431803764009304,
"grad_norm": 5.510648727416992,
"learning_rate": 7.309743589743591e-06,
"loss": 0.0791,
"step": 5750
},
{
"epoch": 2.442376823852823,
"grad_norm": 5.710975646972656,
"learning_rate": 7.296923076923077e-06,
"loss": 0.0842,
"step": 5775
},
{
"epoch": 2.4529498836963417,
"grad_norm": 4.2290215492248535,
"learning_rate": 7.2841025641025645e-06,
"loss": 0.0827,
"step": 5800
},
{
"epoch": 2.4635229435398607,
"grad_norm": 5.621830940246582,
"learning_rate": 7.271282051282051e-06,
"loss": 0.0809,
"step": 5825
},
{
"epoch": 2.474096003383379,
"grad_norm": 2.6628761291503906,
"learning_rate": 7.258461538461539e-06,
"loss": 0.0784,
"step": 5850
},
{
"epoch": 2.4846690632268977,
"grad_norm": 5.889407634735107,
"learning_rate": 7.245641025641026e-06,
"loss": 0.0908,
"step": 5875
},
{
"epoch": 2.4952421230704167,
"grad_norm": 4.617676258087158,
"learning_rate": 7.2328205128205135e-06,
"loss": 0.0883,
"step": 5900
},
{
"epoch": 2.5058151829139352,
"grad_norm": 4.988363265991211,
"learning_rate": 7.22e-06,
"loss": 0.1109,
"step": 5925
},
{
"epoch": 2.516388242757454,
"grad_norm": 5.262870788574219,
"learning_rate": 7.207179487179487e-06,
"loss": 0.1085,
"step": 5950
},
{
"epoch": 2.5269613026009727,
"grad_norm": 19.423404693603516,
"learning_rate": 7.194358974358975e-06,
"loss": 0.0868,
"step": 5975
},
{
"epoch": 2.5375343624444913,
"grad_norm": 6.528380870819092,
"learning_rate": 7.181538461538462e-06,
"loss": 0.089,
"step": 6000
},
{
"epoch": 2.5375343624444913,
"eval_loss": 0.15615449845790863,
"eval_runtime": 479.6175,
"eval_samples_per_second": 7.591,
"eval_steps_per_second": 0.951,
"eval_wer": 0.13476263399693722,
"step": 6000
},
{
"epoch": 2.5481074222880102,
"grad_norm": 4.123988628387451,
"learning_rate": 7.168717948717949e-06,
"loss": 0.085,
"step": 6025
},
{
"epoch": 2.5586804821315288,
"grad_norm": 3.1818017959594727,
"learning_rate": 7.155897435897436e-06,
"loss": 0.0906,
"step": 6050
},
{
"epoch": 2.5692535419750477,
"grad_norm": 5.67425537109375,
"learning_rate": 7.143076923076924e-06,
"loss": 0.0928,
"step": 6075
},
{
"epoch": 2.5798266018185663,
"grad_norm": 4.55906867980957,
"learning_rate": 7.130256410256411e-06,
"loss": 0.0732,
"step": 6100
},
{
"epoch": 2.590399661662085,
"grad_norm": 3.958922863006592,
"learning_rate": 7.117435897435898e-06,
"loss": 0.0867,
"step": 6125
},
{
"epoch": 2.6009727215056038,
"grad_norm": 4.0345306396484375,
"learning_rate": 7.104615384615385e-06,
"loss": 0.0636,
"step": 6150
},
{
"epoch": 2.6115457813491223,
"grad_norm": 6.317999839782715,
"learning_rate": 7.091794871794873e-06,
"loss": 0.0816,
"step": 6175
},
{
"epoch": 2.6221188411926413,
"grad_norm": 7.474951267242432,
"learning_rate": 7.07897435897436e-06,
"loss": 0.0953,
"step": 6200
},
{
"epoch": 2.63269190103616,
"grad_norm": 4.363856315612793,
"learning_rate": 7.066153846153847e-06,
"loss": 0.0931,
"step": 6225
},
{
"epoch": 2.6432649608796783,
"grad_norm": 6.427609920501709,
"learning_rate": 7.053333333333334e-06,
"loss": 0.0794,
"step": 6250
},
{
"epoch": 2.6538380207231973,
"grad_norm": 5.55384635925293,
"learning_rate": 7.040512820512822e-06,
"loss": 0.0892,
"step": 6275
},
{
"epoch": 2.6644110805667163,
"grad_norm": 5.477039813995361,
"learning_rate": 7.027692307692309e-06,
"loss": 0.0681,
"step": 6300
},
{
"epoch": 2.674984140410235,
"grad_norm": 4.115767955780029,
"learning_rate": 7.014871794871796e-06,
"loss": 0.0673,
"step": 6325
},
{
"epoch": 2.6855572002537533,
"grad_norm": 3.343461751937866,
"learning_rate": 7.002051282051283e-06,
"loss": 0.0651,
"step": 6350
},
{
"epoch": 2.6961302600972723,
"grad_norm": 5.022345542907715,
"learning_rate": 6.989230769230769e-06,
"loss": 0.0727,
"step": 6375
},
{
"epoch": 2.706703319940791,
"grad_norm": 5.0488128662109375,
"learning_rate": 6.976410256410257e-06,
"loss": 0.0808,
"step": 6400
},
{
"epoch": 2.71727637978431,
"grad_norm": 5.218719005584717,
"learning_rate": 6.963589743589744e-06,
"loss": 0.0955,
"step": 6425
},
{
"epoch": 2.7278494396278283,
"grad_norm": 4.660473346710205,
"learning_rate": 6.950769230769231e-06,
"loss": 0.0855,
"step": 6450
},
{
"epoch": 2.738422499471347,
"grad_norm": 5.673701286315918,
"learning_rate": 6.937948717948718e-06,
"loss": 0.0899,
"step": 6475
},
{
"epoch": 2.748995559314866,
"grad_norm": 4.193291187286377,
"learning_rate": 6.925128205128206e-06,
"loss": 0.0986,
"step": 6500
},
{
"epoch": 2.7595686191583844,
"grad_norm": 3.0398550033569336,
"learning_rate": 6.912307692307693e-06,
"loss": 0.0747,
"step": 6525
},
{
"epoch": 2.7701416790019033,
"grad_norm": 4.971275806427002,
"learning_rate": 6.899487179487179e-06,
"loss": 0.0829,
"step": 6550
},
{
"epoch": 2.780714738845422,
"grad_norm": 5.15726375579834,
"learning_rate": 6.886666666666667e-06,
"loss": 0.074,
"step": 6575
},
{
"epoch": 2.7912877986889404,
"grad_norm": 4.134614944458008,
"learning_rate": 6.873846153846154e-06,
"loss": 0.0764,
"step": 6600
},
{
"epoch": 2.8018608585324594,
"grad_norm": 3.367145538330078,
"learning_rate": 6.861025641025642e-06,
"loss": 0.085,
"step": 6625
},
{
"epoch": 2.812433918375978,
"grad_norm": 5.638070106506348,
"learning_rate": 6.848205128205128e-06,
"loss": 0.0687,
"step": 6650
},
{
"epoch": 2.823006978219497,
"grad_norm": 5.024541854858398,
"learning_rate": 6.835384615384616e-06,
"loss": 0.0778,
"step": 6675
},
{
"epoch": 2.8335800380630154,
"grad_norm": 2.271406412124634,
"learning_rate": 6.822564102564103e-06,
"loss": 0.0755,
"step": 6700
},
{
"epoch": 2.844153097906534,
"grad_norm": 3.323329210281372,
"learning_rate": 6.8097435897435906e-06,
"loss": 0.0762,
"step": 6725
},
{
"epoch": 2.854726157750053,
"grad_norm": 3.916624069213867,
"learning_rate": 6.796923076923077e-06,
"loss": 0.0985,
"step": 6750
},
{
"epoch": 2.8652992175935714,
"grad_norm": 4.36145544052124,
"learning_rate": 6.784102564102565e-06,
"loss": 0.0815,
"step": 6775
},
{
"epoch": 2.8758722774370904,
"grad_norm": 3.7215347290039062,
"learning_rate": 6.771282051282052e-06,
"loss": 0.0721,
"step": 6800
},
{
"epoch": 2.886445337280609,
"grad_norm": 3.406437397003174,
"learning_rate": 6.7584615384615396e-06,
"loss": 0.0794,
"step": 6825
},
{
"epoch": 2.8970183971241275,
"grad_norm": 4.781268119812012,
"learning_rate": 6.745641025641026e-06,
"loss": 0.0831,
"step": 6850
},
{
"epoch": 2.9075914569676464,
"grad_norm": 7.577853202819824,
"learning_rate": 6.732820512820514e-06,
"loss": 0.0837,
"step": 6875
},
{
"epoch": 2.9181645168111654,
"grad_norm": 4.966701984405518,
"learning_rate": 6.720000000000001e-06,
"loss": 0.0882,
"step": 6900
},
{
"epoch": 2.928737576654684,
"grad_norm": 4.185604572296143,
"learning_rate": 6.7071794871794886e-06,
"loss": 0.0894,
"step": 6925
},
{
"epoch": 2.9393106364982025,
"grad_norm": 6.426883697509766,
"learning_rate": 6.694358974358975e-06,
"loss": 0.0762,
"step": 6950
},
{
"epoch": 2.9498836963417214,
"grad_norm": 5.418060779571533,
"learning_rate": 6.681538461538463e-06,
"loss": 0.0837,
"step": 6975
},
{
"epoch": 2.96045675618524,
"grad_norm": 7.900200366973877,
"learning_rate": 6.668717948717949e-06,
"loss": 0.0883,
"step": 7000
},
{
"epoch": 2.96045675618524,
"eval_loss": 0.1452471762895584,
"eval_runtime": 481.3751,
"eval_samples_per_second": 7.564,
"eval_steps_per_second": 0.947,
"eval_wer": 0.12678082509629218,
"step": 7000
},
{
"epoch": 2.971029816028759,
"grad_norm": 7.546687126159668,
"learning_rate": 6.655897435897436e-06,
"loss": 0.083,
"step": 7025
},
{
"epoch": 2.9816028758722775,
"grad_norm": 4.659261703491211,
"learning_rate": 6.6430769230769235e-06,
"loss": 0.0789,
"step": 7050
},
{
"epoch": 2.992175935715796,
"grad_norm": 5.171137809753418,
"learning_rate": 6.63025641025641e-06,
"loss": 0.0843,
"step": 7075
},
{
"epoch": 3.002748995559315,
"grad_norm": 2.0631213188171387,
"learning_rate": 6.617435897435898e-06,
"loss": 0.0597,
"step": 7100
},
{
"epoch": 3.0133220554028335,
"grad_norm": 1.6943330764770508,
"learning_rate": 6.604615384615385e-06,
"loss": 0.0441,
"step": 7125
},
{
"epoch": 3.0238951152463525,
"grad_norm": 2.18405818939209,
"learning_rate": 6.5917948717948725e-06,
"loss": 0.0399,
"step": 7150
},
{
"epoch": 3.034468175089871,
"grad_norm": 4.707269668579102,
"learning_rate": 6.578974358974359e-06,
"loss": 0.0497,
"step": 7175
},
{
"epoch": 3.0450412349333895,
"grad_norm": 2.237076759338379,
"learning_rate": 6.566153846153846e-06,
"loss": 0.0381,
"step": 7200
},
{
"epoch": 3.0556142947769085,
"grad_norm": 2.0544979572296143,
"learning_rate": 6.553333333333334e-06,
"loss": 0.0458,
"step": 7225
},
{
"epoch": 3.066187354620427,
"grad_norm": 3.0701844692230225,
"learning_rate": 6.540512820512821e-06,
"loss": 0.0641,
"step": 7250
},
{
"epoch": 3.076760414463946,
"grad_norm": 2.446314573287964,
"learning_rate": 6.527692307692308e-06,
"loss": 0.0398,
"step": 7275
},
{
"epoch": 3.0873334743074645,
"grad_norm": 1.1977494955062866,
"learning_rate": 6.514871794871795e-06,
"loss": 0.0347,
"step": 7300
},
{
"epoch": 3.0979065341509835,
"grad_norm": 3.8696768283843994,
"learning_rate": 6.5025641025641026e-06,
"loss": 0.0454,
"step": 7325
},
{
"epoch": 3.108479593994502,
"grad_norm": 4.348515033721924,
"learning_rate": 6.48974358974359e-06,
"loss": 0.0358,
"step": 7350
},
{
"epoch": 3.1190526538380206,
"grad_norm": 3.3741064071655273,
"learning_rate": 6.476923076923077e-06,
"loss": 0.0477,
"step": 7375
},
{
"epoch": 3.1296257136815395,
"grad_norm": 1.3603962659835815,
"learning_rate": 6.464102564102565e-06,
"loss": 0.0424,
"step": 7400
},
{
"epoch": 3.140198773525058,
"grad_norm": 2.087542772293091,
"learning_rate": 6.4512820512820516e-06,
"loss": 0.034,
"step": 7425
},
{
"epoch": 3.150771833368577,
"grad_norm": 3.6920878887176514,
"learning_rate": 6.438461538461539e-06,
"loss": 0.0441,
"step": 7450
},
{
"epoch": 3.1613448932120956,
"grad_norm": 2.65889573097229,
"learning_rate": 6.425641025641026e-06,
"loss": 0.0399,
"step": 7475
},
{
"epoch": 3.1719179530556145,
"grad_norm": 6.821660995483398,
"learning_rate": 6.412820512820514e-06,
"loss": 0.0331,
"step": 7500
},
{
"epoch": 3.182491012899133,
"grad_norm": 3.360375165939331,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.0424,
"step": 7525
},
{
"epoch": 3.1930640727426516,
"grad_norm": 2.295417547225952,
"learning_rate": 6.387179487179488e-06,
"loss": 0.0406,
"step": 7550
},
{
"epoch": 3.2036371325861706,
"grad_norm": 2.809596300125122,
"learning_rate": 6.374358974358975e-06,
"loss": 0.0536,
"step": 7575
},
{
"epoch": 3.214210192429689,
"grad_norm": 2.86702823638916,
"learning_rate": 6.361538461538463e-06,
"loss": 0.0401,
"step": 7600
},
{
"epoch": 3.224783252273208,
"grad_norm": 3.1539106369018555,
"learning_rate": 6.3487179487179495e-06,
"loss": 0.038,
"step": 7625
},
{
"epoch": 3.2353563121167266,
"grad_norm": 5.205074787139893,
"learning_rate": 6.335897435897436e-06,
"loss": 0.0402,
"step": 7650
},
{
"epoch": 3.245929371960245,
"grad_norm": 4.798672199249268,
"learning_rate": 6.323076923076924e-06,
"loss": 0.0412,
"step": 7675
},
{
"epoch": 3.256502431803764,
"grad_norm": 2.3215603828430176,
"learning_rate": 6.310256410256411e-06,
"loss": 0.0389,
"step": 7700
},
{
"epoch": 3.2670754916472826,
"grad_norm": 2.9327590465545654,
"learning_rate": 6.2974358974358985e-06,
"loss": 0.0327,
"step": 7725
},
{
"epoch": 3.2776485514908016,
"grad_norm": 2.3523976802825928,
"learning_rate": 6.284615384615385e-06,
"loss": 0.0415,
"step": 7750
},
{
"epoch": 3.28822161133432,
"grad_norm": 2.7430717945098877,
"learning_rate": 6.271794871794872e-06,
"loss": 0.038,
"step": 7775
},
{
"epoch": 3.2987946711778386,
"grad_norm": 3.922346830368042,
"learning_rate": 6.258974358974359e-06,
"loss": 0.0505,
"step": 7800
},
{
"epoch": 3.3093677310213576,
"grad_norm": 3.7691900730133057,
"learning_rate": 6.246153846153846e-06,
"loss": 0.037,
"step": 7825
},
{
"epoch": 3.319940790864876,
"grad_norm": 5.420505523681641,
"learning_rate": 6.2333333333333335e-06,
"loss": 0.0403,
"step": 7850
},
{
"epoch": 3.330513850708395,
"grad_norm": 2.5405962467193604,
"learning_rate": 6.22051282051282e-06,
"loss": 0.0481,
"step": 7875
},
{
"epoch": 3.3410869105519136,
"grad_norm": 5.486624717712402,
"learning_rate": 6.207692307692308e-06,
"loss": 0.0499,
"step": 7900
},
{
"epoch": 3.3516599703954326,
"grad_norm": 7.166432857513428,
"learning_rate": 6.194871794871795e-06,
"loss": 0.0455,
"step": 7925
},
{
"epoch": 3.362233030238951,
"grad_norm": 2.30519437789917,
"learning_rate": 6.1820512820512825e-06,
"loss": 0.0526,
"step": 7950
},
{
"epoch": 3.3728060900824697,
"grad_norm": 1.9276854991912842,
"learning_rate": 6.169230769230769e-06,
"loss": 0.0496,
"step": 7975
},
{
"epoch": 3.3833791499259886,
"grad_norm": 2.095248222351074,
"learning_rate": 6.156410256410257e-06,
"loss": 0.0368,
"step": 8000
},
{
"epoch": 3.3833791499259886,
"eval_loss": 0.14458830654621124,
"eval_runtime": 482.4832,
"eval_samples_per_second": 7.546,
"eval_steps_per_second": 0.945,
"eval_wer": 0.13239593484616455,
"step": 8000
},
{
"epoch": 3.393952209769507,
"grad_norm": 1.7885074615478516,
"learning_rate": 6.143589743589744e-06,
"loss": 0.0361,
"step": 8025
},
{
"epoch": 3.404525269613026,
"grad_norm": 4.022132873535156,
"learning_rate": 6.1307692307692315e-06,
"loss": 0.0463,
"step": 8050
},
{
"epoch": 3.4150983294565447,
"grad_norm": 2.1497411727905273,
"learning_rate": 6.117948717948718e-06,
"loss": 0.0427,
"step": 8075
},
{
"epoch": 3.4256713893000637,
"grad_norm": 1.6746113300323486,
"learning_rate": 6.105128205128206e-06,
"loss": 0.0344,
"step": 8100
},
{
"epoch": 3.436244449143582,
"grad_norm": 3.6092941761016846,
"learning_rate": 6.092307692307693e-06,
"loss": 0.0469,
"step": 8125
},
{
"epoch": 3.4468175089871007,
"grad_norm": 4.466283798217773,
"learning_rate": 6.0794871794871805e-06,
"loss": 0.0423,
"step": 8150
},
{
"epoch": 3.4573905688306197,
"grad_norm": 2.550199508666992,
"learning_rate": 6.066666666666667e-06,
"loss": 0.0542,
"step": 8175
},
{
"epoch": 3.467963628674138,
"grad_norm": 2.781538963317871,
"learning_rate": 6.053846153846155e-06,
"loss": 0.0423,
"step": 8200
},
{
"epoch": 3.478536688517657,
"grad_norm": 1.9282548427581787,
"learning_rate": 6.041025641025642e-06,
"loss": 0.0386,
"step": 8225
},
{
"epoch": 3.4891097483611757,
"grad_norm": 2.867671012878418,
"learning_rate": 6.028205128205129e-06,
"loss": 0.0364,
"step": 8250
},
{
"epoch": 3.4996828082046942,
"grad_norm": 2.200422763824463,
"learning_rate": 6.015384615384616e-06,
"loss": 0.0396,
"step": 8275
},
{
"epoch": 3.510255868048213,
"grad_norm": 2.84183931350708,
"learning_rate": 6.002564102564103e-06,
"loss": 0.0432,
"step": 8300
},
{
"epoch": 3.5208289278917317,
"grad_norm": 3.7579634189605713,
"learning_rate": 5.989743589743591e-06,
"loss": 0.0503,
"step": 8325
},
{
"epoch": 3.5314019877352507,
"grad_norm": 5.463958740234375,
"learning_rate": 5.976923076923078e-06,
"loss": 0.0367,
"step": 8350
},
{
"epoch": 3.5419750475787692,
"grad_norm": 2.921443462371826,
"learning_rate": 5.9641025641025644e-06,
"loss": 0.0474,
"step": 8375
},
{
"epoch": 3.5525481074222878,
"grad_norm": 2.1998775005340576,
"learning_rate": 5.951282051282051e-06,
"loss": 0.0393,
"step": 8400
},
{
"epoch": 3.5631211672658067,
"grad_norm": 2.0558321475982666,
"learning_rate": 5.938461538461538e-06,
"loss": 0.0434,
"step": 8425
},
{
"epoch": 3.5736942271093257,
"grad_norm": 4.614722728729248,
"learning_rate": 5.925641025641026e-06,
"loss": 0.0419,
"step": 8450
},
{
"epoch": 3.5842672869528442,
"grad_norm": 3.2002508640289307,
"learning_rate": 5.912820512820513e-06,
"loss": 0.0579,
"step": 8475
},
{
"epoch": 3.5948403467963628,
"grad_norm": 2.0797934532165527,
"learning_rate": 5.9e-06,
"loss": 0.047,
"step": 8500
},
{
"epoch": 3.6054134066398817,
"grad_norm": 1.3923156261444092,
"learning_rate": 5.887179487179487e-06,
"loss": 0.035,
"step": 8525
},
{
"epoch": 3.6159864664834003,
"grad_norm": 2.350816011428833,
"learning_rate": 5.874358974358975e-06,
"loss": 0.0678,
"step": 8550
},
{
"epoch": 3.6265595263269192,
"grad_norm": 3.1680469512939453,
"learning_rate": 5.861538461538462e-06,
"loss": 0.055,
"step": 8575
},
{
"epoch": 3.6371325861704378,
"grad_norm": 2.174107551574707,
"learning_rate": 5.848717948717949e-06,
"loss": 0.0475,
"step": 8600
},
{
"epoch": 3.6477056460139563,
"grad_norm": 3.112668037414551,
"learning_rate": 5.835897435897436e-06,
"loss": 0.0447,
"step": 8625
},
{
"epoch": 3.6582787058574753,
"grad_norm": 4.258768558502197,
"learning_rate": 5.823076923076924e-06,
"loss": 0.0322,
"step": 8650
},
{
"epoch": 3.668851765700994,
"grad_norm": 3.305208683013916,
"learning_rate": 5.8102564102564106e-06,
"loss": 0.0454,
"step": 8675
},
{
"epoch": 3.679424825544513,
"grad_norm": 2.314253568649292,
"learning_rate": 5.797435897435898e-06,
"loss": 0.0376,
"step": 8700
},
{
"epoch": 3.6899978853880313,
"grad_norm": 2.856414318084717,
"learning_rate": 5.784615384615385e-06,
"loss": 0.0396,
"step": 8725
},
{
"epoch": 3.70057094523155,
"grad_norm": 1.2516982555389404,
"learning_rate": 5.771794871794873e-06,
"loss": 0.0323,
"step": 8750
},
{
"epoch": 3.711144005075069,
"grad_norm": 3.374429225921631,
"learning_rate": 5.7589743589743596e-06,
"loss": 0.0487,
"step": 8775
},
{
"epoch": 3.7217170649185873,
"grad_norm": 3.7010583877563477,
"learning_rate": 5.746153846153847e-06,
"loss": 0.0489,
"step": 8800
},
{
"epoch": 3.7322901247621063,
"grad_norm": 1.8617634773254395,
"learning_rate": 5.733333333333334e-06,
"loss": 0.0376,
"step": 8825
},
{
"epoch": 3.742863184605625,
"grad_norm": 1.5360052585601807,
"learning_rate": 5.720512820512821e-06,
"loss": 0.0392,
"step": 8850
},
{
"epoch": 3.7534362444491434,
"grad_norm": 3.569289207458496,
"learning_rate": 5.7076923076923086e-06,
"loss": 0.0506,
"step": 8875
},
{
"epoch": 3.7640093042926623,
"grad_norm": 4.679437160491943,
"learning_rate": 5.694871794871795e-06,
"loss": 0.0397,
"step": 8900
},
{
"epoch": 3.774582364136181,
"grad_norm": 1.4421815872192383,
"learning_rate": 5.682051282051283e-06,
"loss": 0.0507,
"step": 8925
},
{
"epoch": 3.7851554239797,
"grad_norm": 4.020344257354736,
"learning_rate": 5.66923076923077e-06,
"loss": 0.0426,
"step": 8950
},
{
"epoch": 3.7957284838232184,
"grad_norm": 1.8350247144699097,
"learning_rate": 5.6564102564102575e-06,
"loss": 0.0577,
"step": 8975
},
{
"epoch": 3.806301543666737,
"grad_norm": 4.344027519226074,
"learning_rate": 5.6435897435897435e-06,
"loss": 0.0463,
"step": 9000
},
{
"epoch": 3.806301543666737,
"eval_loss": 0.1400885134935379,
"eval_runtime": 478.4392,
"eval_samples_per_second": 7.61,
"eval_steps_per_second": 0.953,
"eval_wer": 0.12863705972434916,
"step": 9000
},
{
"epoch": 3.816874603510256,
"grad_norm": 1.8994994163513184,
"learning_rate": 5.63076923076923e-06,
"loss": 0.0396,
"step": 9025
},
{
"epoch": 3.827447663353775,
"grad_norm": 2.9287285804748535,
"learning_rate": 5.617948717948718e-06,
"loss": 0.0353,
"step": 9050
},
{
"epoch": 3.8380207231972934,
"grad_norm": 5.816341876983643,
"learning_rate": 5.605128205128205e-06,
"loss": 0.0416,
"step": 9075
},
{
"epoch": 3.848593783040812,
"grad_norm": 1.939314603805542,
"learning_rate": 5.5923076923076925e-06,
"loss": 0.0356,
"step": 9100
},
{
"epoch": 3.859166842884331,
"grad_norm": 2.6920735836029053,
"learning_rate": 5.579487179487179e-06,
"loss": 0.0361,
"step": 9125
},
{
"epoch": 3.8697399027278494,
"grad_norm": 3.10347056388855,
"learning_rate": 5.566666666666667e-06,
"loss": 0.0465,
"step": 9150
},
{
"epoch": 3.8803129625713684,
"grad_norm": 3.6406362056732178,
"learning_rate": 5.553846153846154e-06,
"loss": 0.0451,
"step": 9175
},
{
"epoch": 3.890886022414887,
"grad_norm": 1.8818650245666504,
"learning_rate": 5.5410256410256415e-06,
"loss": 0.0399,
"step": 9200
},
{
"epoch": 3.9014590822584054,
"grad_norm": 2.670872688293457,
"learning_rate": 5.528205128205128e-06,
"loss": 0.0394,
"step": 9225
},
{
"epoch": 3.9120321421019244,
"grad_norm": 6.323610782623291,
"learning_rate": 5.515384615384616e-06,
"loss": 0.0469,
"step": 9250
},
{
"epoch": 3.922605201945443,
"grad_norm": 3.8592729568481445,
"learning_rate": 5.502564102564103e-06,
"loss": 0.0432,
"step": 9275
},
{
"epoch": 3.933178261788962,
"grad_norm": 1.5597033500671387,
"learning_rate": 5.4897435897435905e-06,
"loss": 0.0326,
"step": 9300
},
{
"epoch": 3.9437513216324804,
"grad_norm": 1.413038969039917,
"learning_rate": 5.476923076923077e-06,
"loss": 0.0439,
"step": 9325
},
{
"epoch": 3.954324381475999,
"grad_norm": 1.5831608772277832,
"learning_rate": 5.464102564102565e-06,
"loss": 0.0406,
"step": 9350
},
{
"epoch": 3.964897441319518,
"grad_norm": 4.640722274780273,
"learning_rate": 5.451282051282052e-06,
"loss": 0.046,
"step": 9375
},
{
"epoch": 3.9754705011630365,
"grad_norm": 2.2848503589630127,
"learning_rate": 5.4384615384615395e-06,
"loss": 0.039,
"step": 9400
},
{
"epoch": 3.9860435610065554,
"grad_norm": 3.464656352996826,
"learning_rate": 5.425641025641026e-06,
"loss": 0.0482,
"step": 9425
},
{
"epoch": 3.996616620850074,
"grad_norm": 1.6633790731430054,
"learning_rate": 5.412820512820514e-06,
"loss": 0.0458,
"step": 9450
},
{
"epoch": 4.0071896806935925,
"grad_norm": 3.9742796421051025,
"learning_rate": 5.400000000000001e-06,
"loss": 0.0301,
"step": 9475
},
{
"epoch": 4.017762740537111,
"grad_norm": 0.8984088897705078,
"learning_rate": 5.387179487179488e-06,
"loss": 0.0171,
"step": 9500
},
{
"epoch": 4.02833580038063,
"grad_norm": 1.1975101232528687,
"learning_rate": 5.374358974358975e-06,
"loss": 0.0346,
"step": 9525
},
{
"epoch": 4.038908860224149,
"grad_norm": 1.3089258670806885,
"learning_rate": 5.361538461538462e-06,
"loss": 0.0261,
"step": 9550
},
{
"epoch": 4.0494819200676675,
"grad_norm": 1.7242599725723267,
"learning_rate": 5.34871794871795e-06,
"loss": 0.024,
"step": 9575
},
{
"epoch": 4.060054979911186,
"grad_norm": 3.1359024047851562,
"learning_rate": 5.335897435897436e-06,
"loss": 0.019,
"step": 9600
},
{
"epoch": 4.070628039754705,
"grad_norm": 1.996058702468872,
"learning_rate": 5.323076923076923e-06,
"loss": 0.0165,
"step": 9625
},
{
"epoch": 4.081201099598224,
"grad_norm": 1.165366530418396,
"learning_rate": 5.31025641025641e-06,
"loss": 0.0241,
"step": 9650
},
{
"epoch": 4.0917741594417425,
"grad_norm": 1.7959648370742798,
"learning_rate": 5.297435897435897e-06,
"loss": 0.0193,
"step": 9675
},
{
"epoch": 4.102347219285261,
"grad_norm": 3.0931942462921143,
"learning_rate": 5.284615384615385e-06,
"loss": 0.0188,
"step": 9700
},
{
"epoch": 4.1129202791287796,
"grad_norm": 1.2411391735076904,
"learning_rate": 5.271794871794872e-06,
"loss": 0.0315,
"step": 9725
},
{
"epoch": 4.123493338972299,
"grad_norm": 4.090996265411377,
"learning_rate": 5.258974358974359e-06,
"loss": 0.0216,
"step": 9750
},
{
"epoch": 4.1340663988158175,
"grad_norm": 1.1179065704345703,
"learning_rate": 5.246153846153846e-06,
"loss": 0.0234,
"step": 9775
},
{
"epoch": 4.144639458659336,
"grad_norm": 1.4874383211135864,
"learning_rate": 5.233333333333334e-06,
"loss": 0.0187,
"step": 9800
},
{
"epoch": 4.155212518502855,
"grad_norm": 0.8263124823570251,
"learning_rate": 5.220512820512821e-06,
"loss": 0.0253,
"step": 9825
},
{
"epoch": 4.165785578346373,
"grad_norm": 2.70497727394104,
"learning_rate": 5.207692307692308e-06,
"loss": 0.0228,
"step": 9850
},
{
"epoch": 4.1763586381898925,
"grad_norm": 4.427598476409912,
"learning_rate": 5.194871794871795e-06,
"loss": 0.0248,
"step": 9875
},
{
"epoch": 4.186931698033411,
"grad_norm": 1.1242289543151855,
"learning_rate": 5.182051282051283e-06,
"loss": 0.0205,
"step": 9900
},
{
"epoch": 4.19750475787693,
"grad_norm": 0.6889589428901672,
"learning_rate": 5.16923076923077e-06,
"loss": 0.0152,
"step": 9925
},
{
"epoch": 4.208077817720448,
"grad_norm": 1.5763376951217651,
"learning_rate": 5.156410256410257e-06,
"loss": 0.0164,
"step": 9950
},
{
"epoch": 4.218650877563967,
"grad_norm": 3.120882034301758,
"learning_rate": 5.143589743589744e-06,
"loss": 0.0249,
"step": 9975
},
{
"epoch": 4.229223937407486,
"grad_norm": 1.5591440200805664,
"learning_rate": 5.130769230769232e-06,
"loss": 0.0278,
"step": 10000
},
{
"epoch": 4.229223937407486,
"eval_loss": 0.14359265565872192,
"eval_runtime": 472.3801,
"eval_samples_per_second": 7.708,
"eval_steps_per_second": 0.965,
"eval_wer": 0.11810292821012576,
"step": 10000
},
{
"epoch": 4.239796997251005,
"grad_norm": 1.2260849475860596,
"learning_rate": 5.1179487179487186e-06,
"loss": 0.0169,
"step": 10025
},
{
"epoch": 4.250370057094523,
"grad_norm": 0.49284470081329346,
"learning_rate": 5.105128205128206e-06,
"loss": 0.018,
"step": 10050
},
{
"epoch": 4.260943116938042,
"grad_norm": 2.0094289779663086,
"learning_rate": 5.092307692307693e-06,
"loss": 0.0218,
"step": 10075
},
{
"epoch": 4.27151617678156,
"grad_norm": 1.3108640909194946,
"learning_rate": 5.07948717948718e-06,
"loss": 0.025,
"step": 10100
},
{
"epoch": 4.28208923662508,
"grad_norm": 1.7406195402145386,
"learning_rate": 5.0666666666666676e-06,
"loss": 0.0235,
"step": 10125
},
{
"epoch": 4.292662296468598,
"grad_norm": 0.876416027545929,
"learning_rate": 5.053846153846154e-06,
"loss": 0.0195,
"step": 10150
},
{
"epoch": 4.303235356312117,
"grad_norm": 1.6580686569213867,
"learning_rate": 5.041025641025642e-06,
"loss": 0.0167,
"step": 10175
},
{
"epoch": 4.313808416155635,
"grad_norm": 0.5891424417495728,
"learning_rate": 5.028205128205128e-06,
"loss": 0.0262,
"step": 10200
},
{
"epoch": 4.324381475999155,
"grad_norm": 2.2991206645965576,
"learning_rate": 5.015384615384616e-06,
"loss": 0.0253,
"step": 10225
},
{
"epoch": 4.334954535842673,
"grad_norm": 1.3528612852096558,
"learning_rate": 5.0025641025641025e-06,
"loss": 0.0269,
"step": 10250
},
{
"epoch": 4.345527595686192,
"grad_norm": 0.8127634525299072,
"learning_rate": 4.98974358974359e-06,
"loss": 0.0225,
"step": 10275
},
{
"epoch": 4.35610065552971,
"grad_norm": 0.9224340915679932,
"learning_rate": 4.976923076923078e-06,
"loss": 0.0219,
"step": 10300
},
{
"epoch": 4.366673715373229,
"grad_norm": 1.4799253940582275,
"learning_rate": 4.964102564102565e-06,
"loss": 0.0176,
"step": 10325
},
{
"epoch": 4.377246775216748,
"grad_norm": 3.0369462966918945,
"learning_rate": 4.9512820512820515e-06,
"loss": 0.0207,
"step": 10350
},
{
"epoch": 4.387819835060267,
"grad_norm": 0.7996551990509033,
"learning_rate": 4.938461538461538e-06,
"loss": 0.0236,
"step": 10375
},
{
"epoch": 4.398392894903785,
"grad_norm": 0.9955740571022034,
"learning_rate": 4.925641025641026e-06,
"loss": 0.0274,
"step": 10400
},
{
"epoch": 4.408965954747304,
"grad_norm": 1.9452871084213257,
"learning_rate": 4.912820512820513e-06,
"loss": 0.0164,
"step": 10425
},
{
"epoch": 4.419539014590822,
"grad_norm": 10.20065975189209,
"learning_rate": 4.9000000000000005e-06,
"loss": 0.0215,
"step": 10450
},
{
"epoch": 4.430112074434342,
"grad_norm": 0.7727457880973816,
"learning_rate": 4.887179487179487e-06,
"loss": 0.0154,
"step": 10475
},
{
"epoch": 4.44068513427786,
"grad_norm": 2.406764268875122,
"learning_rate": 4.874358974358975e-06,
"loss": 0.0197,
"step": 10500
},
{
"epoch": 4.451258194121379,
"grad_norm": 1.1302127838134766,
"learning_rate": 4.861538461538462e-06,
"loss": 0.0213,
"step": 10525
},
{
"epoch": 4.461831253964897,
"grad_norm": 1.5602991580963135,
"learning_rate": 4.8487179487179495e-06,
"loss": 0.0222,
"step": 10550
},
{
"epoch": 4.472404313808416,
"grad_norm": 2.2636196613311768,
"learning_rate": 4.835897435897436e-06,
"loss": 0.0208,
"step": 10575
},
{
"epoch": 4.482977373651935,
"grad_norm": 1.560081124305725,
"learning_rate": 4.823076923076924e-06,
"loss": 0.0212,
"step": 10600
},
{
"epoch": 4.493550433495454,
"grad_norm": 1.5913350582122803,
"learning_rate": 4.810256410256411e-06,
"loss": 0.0202,
"step": 10625
},
{
"epoch": 4.504123493338972,
"grad_norm": 2.0870747566223145,
"learning_rate": 4.7974358974358985e-06,
"loss": 0.0197,
"step": 10650
},
{
"epoch": 4.514696553182491,
"grad_norm": 0.832817554473877,
"learning_rate": 4.7846153846153845e-06,
"loss": 0.0229,
"step": 10675
},
{
"epoch": 4.52526961302601,
"grad_norm": 0.8193099498748779,
"learning_rate": 4.771794871794872e-06,
"loss": 0.0325,
"step": 10700
},
{
"epoch": 4.535842672869529,
"grad_norm": 2.3790667057037354,
"learning_rate": 4.758974358974359e-06,
"loss": 0.0226,
"step": 10725
},
{
"epoch": 4.546415732713047,
"grad_norm": 4.055891990661621,
"learning_rate": 4.746153846153847e-06,
"loss": 0.0339,
"step": 10750
},
{
"epoch": 4.556988792556566,
"grad_norm": 1.3302873373031616,
"learning_rate": 4.7333333333333335e-06,
"loss": 0.0205,
"step": 10775
},
{
"epoch": 4.567561852400084,
"grad_norm": 1.9055285453796387,
"learning_rate": 4.720512820512821e-06,
"loss": 0.0246,
"step": 10800
},
{
"epoch": 4.578134912243604,
"grad_norm": 0.8478215336799622,
"learning_rate": 4.707692307692308e-06,
"loss": 0.0283,
"step": 10825
},
{
"epoch": 4.588707972087122,
"grad_norm": 1.2983384132385254,
"learning_rate": 4.694871794871796e-06,
"loss": 0.0214,
"step": 10850
},
{
"epoch": 4.599281031930641,
"grad_norm": 0.70320063829422,
"learning_rate": 4.6820512820512825e-06,
"loss": 0.0344,
"step": 10875
},
{
"epoch": 4.609854091774159,
"grad_norm": 1.7480090856552124,
"learning_rate": 4.66923076923077e-06,
"loss": 0.0198,
"step": 10900
},
{
"epoch": 4.620427151617678,
"grad_norm": 1.8584214448928833,
"learning_rate": 4.656410256410257e-06,
"loss": 0.0198,
"step": 10925
},
{
"epoch": 4.631000211461197,
"grad_norm": 3.884225368499756,
"learning_rate": 4.643589743589745e-06,
"loss": 0.0197,
"step": 10950
},
{
"epoch": 4.641573271304716,
"grad_norm": 0.8960859775543213,
"learning_rate": 4.630769230769231e-06,
"loss": 0.0212,
"step": 10975
},
{
"epoch": 4.652146331148234,
"grad_norm": 0.8120248317718506,
"learning_rate": 4.617948717948718e-06,
"loss": 0.0157,
"step": 11000
},
{
"epoch": 4.652146331148234,
"eval_loss": 0.14061123132705688,
"eval_runtime": 474.4412,
"eval_samples_per_second": 7.674,
"eval_steps_per_second": 0.961,
"eval_wer": 0.11253422432595481,
"step": 11000
},
{
"epoch": 4.662719390991753,
"grad_norm": 0.5586819648742676,
"learning_rate": 4.605128205128205e-06,
"loss": 0.0201,
"step": 11025
},
{
"epoch": 4.673292450835271,
"grad_norm": 2.2997443675994873,
"learning_rate": 4.592307692307693e-06,
"loss": 0.0207,
"step": 11050
},
{
"epoch": 4.683865510678791,
"grad_norm": 3.0606119632720947,
"learning_rate": 4.57948717948718e-06,
"loss": 0.0261,
"step": 11075
},
{
"epoch": 4.694438570522309,
"grad_norm": 1.2574974298477173,
"learning_rate": 4.566666666666667e-06,
"loss": 0.0229,
"step": 11100
},
{
"epoch": 4.705011630365828,
"grad_norm": 0.962881326675415,
"learning_rate": 4.553846153846154e-06,
"loss": 0.0198,
"step": 11125
},
{
"epoch": 4.715584690209346,
"grad_norm": 1.8664745092391968,
"learning_rate": 4.5415384615384615e-06,
"loss": 0.0185,
"step": 11150
},
{
"epoch": 4.726157750052865,
"grad_norm": 2.5618274211883545,
"learning_rate": 4.528717948717949e-06,
"loss": 0.0306,
"step": 11175
},
{
"epoch": 4.736730809896384,
"grad_norm": 0.9622751474380493,
"learning_rate": 4.515897435897436e-06,
"loss": 0.037,
"step": 11200
},
{
"epoch": 4.747303869739903,
"grad_norm": 0.5940545201301575,
"learning_rate": 4.503076923076924e-06,
"loss": 0.0178,
"step": 11225
},
{
"epoch": 4.757876929583421,
"grad_norm": 2.6667637825012207,
"learning_rate": 4.4902564102564105e-06,
"loss": 0.015,
"step": 11250
},
{
"epoch": 4.76844998942694,
"grad_norm": 0.793354868888855,
"learning_rate": 4.477435897435898e-06,
"loss": 0.0299,
"step": 11275
},
{
"epoch": 4.779023049270458,
"grad_norm": 4.2273030281066895,
"learning_rate": 4.464615384615385e-06,
"loss": 0.0165,
"step": 11300
},
{
"epoch": 4.789596109113978,
"grad_norm": 2.6417057514190674,
"learning_rate": 4.451794871794872e-06,
"loss": 0.0232,
"step": 11325
},
{
"epoch": 4.800169168957496,
"grad_norm": 2.1996712684631348,
"learning_rate": 4.4389743589743595e-06,
"loss": 0.015,
"step": 11350
},
{
"epoch": 4.810742228801015,
"grad_norm": 5.317453384399414,
"learning_rate": 4.426153846153846e-06,
"loss": 0.0199,
"step": 11375
},
{
"epoch": 4.821315288644533,
"grad_norm": 0.8886387944221497,
"learning_rate": 4.413333333333334e-06,
"loss": 0.0219,
"step": 11400
},
{
"epoch": 4.831888348488053,
"grad_norm": 1.3750237226486206,
"learning_rate": 4.400512820512821e-06,
"loss": 0.0248,
"step": 11425
},
{
"epoch": 4.842461408331571,
"grad_norm": 1.9163587093353271,
"learning_rate": 4.387692307692308e-06,
"loss": 0.0299,
"step": 11450
},
{
"epoch": 4.85303446817509,
"grad_norm": 1.0380934476852417,
"learning_rate": 4.374871794871795e-06,
"loss": 0.016,
"step": 11475
},
{
"epoch": 4.863607528018608,
"grad_norm": 2.2600953578948975,
"learning_rate": 4.362051282051282e-06,
"loss": 0.0201,
"step": 11500
},
{
"epoch": 4.874180587862127,
"grad_norm": 2.316375255584717,
"learning_rate": 4.34923076923077e-06,
"loss": 0.0244,
"step": 11525
},
{
"epoch": 4.884753647705646,
"grad_norm": 2.1449790000915527,
"learning_rate": 4.336410256410257e-06,
"loss": 0.0167,
"step": 11550
},
{
"epoch": 4.895326707549165,
"grad_norm": 2.5895638465881348,
"learning_rate": 4.323589743589744e-06,
"loss": 0.0162,
"step": 11575
},
{
"epoch": 4.905899767392683,
"grad_norm": 2.5529627799987793,
"learning_rate": 4.310769230769231e-06,
"loss": 0.0227,
"step": 11600
},
{
"epoch": 4.916472827236202,
"grad_norm": 1.019690752029419,
"learning_rate": 4.297948717948718e-06,
"loss": 0.0307,
"step": 11625
},
{
"epoch": 4.927045887079721,
"grad_norm": 1.7469873428344727,
"learning_rate": 4.285128205128206e-06,
"loss": 0.0137,
"step": 11650
},
{
"epoch": 4.93761894692324,
"grad_norm": 1.8508704900741577,
"learning_rate": 4.2723076923076925e-06,
"loss": 0.0228,
"step": 11675
},
{
"epoch": 4.948192006766758,
"grad_norm": 0.8989993333816528,
"learning_rate": 4.25948717948718e-06,
"loss": 0.0165,
"step": 11700
},
{
"epoch": 4.958765066610277,
"grad_norm": 1.5571892261505127,
"learning_rate": 4.246666666666667e-06,
"loss": 0.0234,
"step": 11725
},
{
"epoch": 4.9693381264537955,
"grad_norm": 0.6282488703727722,
"learning_rate": 4.233846153846154e-06,
"loss": 0.0194,
"step": 11750
},
{
"epoch": 4.979911186297315,
"grad_norm": 0.898673415184021,
"learning_rate": 4.2210256410256414e-06,
"loss": 0.0177,
"step": 11775
},
{
"epoch": 4.990484246140833,
"grad_norm": 0.9171255826950073,
"learning_rate": 4.208205128205128e-06,
"loss": 0.0163,
"step": 11800
},
{
"epoch": 5.001057305984352,
"grad_norm": 0.9866194128990173,
"learning_rate": 4.195384615384616e-06,
"loss": 0.0175,
"step": 11825
},
{
"epoch": 5.0116303658278705,
"grad_norm": 0.3489556312561035,
"learning_rate": 4.182564102564103e-06,
"loss": 0.0107,
"step": 11850
},
{
"epoch": 5.022203425671389,
"grad_norm": 3.7504043579101562,
"learning_rate": 4.1697435897435904e-06,
"loss": 0.0104,
"step": 11875
},
{
"epoch": 5.032776485514908,
"grad_norm": 1.138054609298706,
"learning_rate": 4.156923076923077e-06,
"loss": 0.0078,
"step": 11900
},
{
"epoch": 5.043349545358427,
"grad_norm": 2.7337746620178223,
"learning_rate": 4.144102564102564e-06,
"loss": 0.0079,
"step": 11925
},
{
"epoch": 5.0539226052019455,
"grad_norm": 0.4507981836795807,
"learning_rate": 4.131282051282052e-06,
"loss": 0.0127,
"step": 11950
},
{
"epoch": 5.064495665045464,
"grad_norm": 0.9632282853126526,
"learning_rate": 4.118461538461539e-06,
"loss": 0.0107,
"step": 11975
},
{
"epoch": 5.0750687248889825,
"grad_norm": 0.3650486171245575,
"learning_rate": 4.105641025641026e-06,
"loss": 0.0201,
"step": 12000
},
{
"epoch": 5.0750687248889825,
"eval_loss": 0.1392413079738617,
"eval_runtime": 476.9371,
"eval_samples_per_second": 7.634,
"eval_steps_per_second": 0.956,
"eval_wer": 0.11443686481971321,
"step": 12000
},
{
"epoch": 5.085641784732502,
"grad_norm": 0.6850853562355042,
"learning_rate": 4.092820512820513e-06,
"loss": 0.0168,
"step": 12025
},
{
"epoch": 5.0962148445760205,
"grad_norm": 0.9726247787475586,
"learning_rate": 4.08e-06,
"loss": 0.0156,
"step": 12050
},
{
"epoch": 5.106787904419539,
"grad_norm": 0.6085502505302429,
"learning_rate": 4.0671794871794876e-06,
"loss": 0.0092,
"step": 12075
},
{
"epoch": 5.1173609642630575,
"grad_norm": 0.3284889757633209,
"learning_rate": 4.054358974358974e-06,
"loss": 0.0128,
"step": 12100
},
{
"epoch": 5.127934024106576,
"grad_norm": 0.4705301523208618,
"learning_rate": 4.041538461538462e-06,
"loss": 0.0104,
"step": 12125
},
{
"epoch": 5.1385070839500955,
"grad_norm": 1.9244158267974854,
"learning_rate": 4.028717948717949e-06,
"loss": 0.0081,
"step": 12150
},
{
"epoch": 5.149080143793614,
"grad_norm": 2.1168432235717773,
"learning_rate": 4.0158974358974366e-06,
"loss": 0.0087,
"step": 12175
},
{
"epoch": 5.1596532036371325,
"grad_norm": 1.652687430381775,
"learning_rate": 4.003076923076923e-06,
"loss": 0.0113,
"step": 12200
},
{
"epoch": 5.170226263480651,
"grad_norm": 2.1567976474761963,
"learning_rate": 3.990256410256411e-06,
"loss": 0.0088,
"step": 12225
},
{
"epoch": 5.18079932332417,
"grad_norm": 0.7141952514648438,
"learning_rate": 3.977435897435898e-06,
"loss": 0.0108,
"step": 12250
},
{
"epoch": 5.191372383167689,
"grad_norm": 1.3821399211883545,
"learning_rate": 3.964615384615385e-06,
"loss": 0.0111,
"step": 12275
},
{
"epoch": 5.2019454430112075,
"grad_norm": 0.7689725160598755,
"learning_rate": 3.951794871794872e-06,
"loss": 0.0084,
"step": 12300
},
{
"epoch": 5.212518502854726,
"grad_norm": 0.7550517320632935,
"learning_rate": 3.938974358974359e-06,
"loss": 0.0143,
"step": 12325
},
{
"epoch": 5.223091562698245,
"grad_norm": 0.4243580102920532,
"learning_rate": 3.926153846153846e-06,
"loss": 0.008,
"step": 12350
},
{
"epoch": 5.233664622541764,
"grad_norm": 0.6995705366134644,
"learning_rate": 3.913333333333334e-06,
"loss": 0.0078,
"step": 12375
},
{
"epoch": 5.2442376823852825,
"grad_norm": 0.3228248357772827,
"learning_rate": 3.9005128205128205e-06,
"loss": 0.0089,
"step": 12400
},
{
"epoch": 5.254810742228801,
"grad_norm": 0.5981309413909912,
"learning_rate": 3.887692307692308e-06,
"loss": 0.0116,
"step": 12425
},
{
"epoch": 5.26538380207232,
"grad_norm": 1.0151786804199219,
"learning_rate": 3.874871794871795e-06,
"loss": 0.0109,
"step": 12450
},
{
"epoch": 5.275956861915838,
"grad_norm": 1.7748947143554688,
"learning_rate": 3.862051282051283e-06,
"loss": 0.0165,
"step": 12475
},
{
"epoch": 5.2865299217593575,
"grad_norm": 3.3617355823516846,
"learning_rate": 3.8492307692307695e-06,
"loss": 0.0113,
"step": 12500
},
{
"epoch": 5.297102981602876,
"grad_norm": 0.9399589896202087,
"learning_rate": 3.836410256410257e-06,
"loss": 0.0145,
"step": 12525
},
{
"epoch": 5.307676041446395,
"grad_norm": 0.4181835651397705,
"learning_rate": 3.823589743589744e-06,
"loss": 0.0132,
"step": 12550
},
{
"epoch": 5.318249101289913,
"grad_norm": 2.0555076599121094,
"learning_rate": 3.8107692307692313e-06,
"loss": 0.0081,
"step": 12575
},
{
"epoch": 5.328822161133432,
"grad_norm": 1.8507764339447021,
"learning_rate": 3.7979487179487185e-06,
"loss": 0.0108,
"step": 12600
},
{
"epoch": 5.339395220976951,
"grad_norm": 3.1068594455718994,
"learning_rate": 3.7851282051282058e-06,
"loss": 0.0146,
"step": 12625
},
{
"epoch": 5.34996828082047,
"grad_norm": 1.1594772338867188,
"learning_rate": 3.772307692307693e-06,
"loss": 0.0138,
"step": 12650
},
{
"epoch": 5.360541340663988,
"grad_norm": 0.45777344703674316,
"learning_rate": 3.7594871794871794e-06,
"loss": 0.0175,
"step": 12675
},
{
"epoch": 5.371114400507507,
"grad_norm": 0.6128593683242798,
"learning_rate": 3.7466666666666667e-06,
"loss": 0.0084,
"step": 12700
},
{
"epoch": 5.381687460351025,
"grad_norm": 0.644332230091095,
"learning_rate": 3.733846153846154e-06,
"loss": 0.0111,
"step": 12725
},
{
"epoch": 5.392260520194545,
"grad_norm": 0.603568971157074,
"learning_rate": 3.721025641025641e-06,
"loss": 0.0111,
"step": 12750
},
{
"epoch": 5.402833580038063,
"grad_norm": 0.3822285532951355,
"learning_rate": 3.7082051282051284e-06,
"loss": 0.0095,
"step": 12775
},
{
"epoch": 5.413406639881582,
"grad_norm": 0.493023157119751,
"learning_rate": 3.6953846153846156e-06,
"loss": 0.0157,
"step": 12800
},
{
"epoch": 5.4239796997251,
"grad_norm": 1.2487260103225708,
"learning_rate": 3.682564102564103e-06,
"loss": 0.0097,
"step": 12825
},
{
"epoch": 5.43455275956862,
"grad_norm": 0.8562780618667603,
"learning_rate": 3.66974358974359e-06,
"loss": 0.0139,
"step": 12850
},
{
"epoch": 5.445125819412138,
"grad_norm": 0.750851571559906,
"learning_rate": 3.6569230769230774e-06,
"loss": 0.0123,
"step": 12875
},
{
"epoch": 5.455698879255657,
"grad_norm": 0.41030699014663696,
"learning_rate": 3.6441025641025646e-06,
"loss": 0.0116,
"step": 12900
},
{
"epoch": 5.466271939099175,
"grad_norm": 0.5965930223464966,
"learning_rate": 3.631282051282052e-06,
"loss": 0.0107,
"step": 12925
},
{
"epoch": 5.476844998942694,
"grad_norm": 0.4181739389896393,
"learning_rate": 3.618461538461539e-06,
"loss": 0.0077,
"step": 12950
},
{
"epoch": 5.487418058786213,
"grad_norm": 3.0473992824554443,
"learning_rate": 3.6056410256410255e-06,
"loss": 0.0144,
"step": 12975
},
{
"epoch": 5.497991118629732,
"grad_norm": 6.770436763763428,
"learning_rate": 3.5928205128205128e-06,
"loss": 0.0121,
"step": 13000
},
{
"epoch": 5.497991118629732,
"eval_loss": 0.14054465293884277,
"eval_runtime": 479.854,
"eval_samples_per_second": 7.588,
"eval_steps_per_second": 0.95,
"eval_wer": 0.11290547125156619,
"step": 13000
},
{
"epoch": 5.50856417847325,
"grad_norm": 1.682664155960083,
"learning_rate": 3.58e-06,
"loss": 0.0098,
"step": 13025
},
{
"epoch": 5.519137238316769,
"grad_norm": 1.461300253868103,
"learning_rate": 3.5671794871794873e-06,
"loss": 0.0099,
"step": 13050
},
{
"epoch": 5.529710298160287,
"grad_norm": 0.3437669575214386,
"learning_rate": 3.5543589743589745e-06,
"loss": 0.014,
"step": 13075
},
{
"epoch": 5.540283358003807,
"grad_norm": 1.4875051975250244,
"learning_rate": 3.5415384615384618e-06,
"loss": 0.013,
"step": 13100
},
{
"epoch": 5.550856417847325,
"grad_norm": 3.039585590362549,
"learning_rate": 3.528717948717949e-06,
"loss": 0.0131,
"step": 13125
},
{
"epoch": 5.561429477690844,
"grad_norm": 0.7710385918617249,
"learning_rate": 3.5158974358974363e-06,
"loss": 0.011,
"step": 13150
},
{
"epoch": 5.572002537534362,
"grad_norm": 4.138525485992432,
"learning_rate": 3.5030769230769235e-06,
"loss": 0.0098,
"step": 13175
},
{
"epoch": 5.582575597377881,
"grad_norm": 4.096315860748291,
"learning_rate": 3.4902564102564108e-06,
"loss": 0.0143,
"step": 13200
},
{
"epoch": 5.5931486572214,
"grad_norm": 1.198435664176941,
"learning_rate": 3.477435897435898e-06,
"loss": 0.0127,
"step": 13225
},
{
"epoch": 5.603721717064919,
"grad_norm": 0.3887302875518799,
"learning_rate": 3.4646153846153853e-06,
"loss": 0.0138,
"step": 13250
},
{
"epoch": 5.614294776908437,
"grad_norm": 1.2115799188613892,
"learning_rate": 3.4517948717948717e-06,
"loss": 0.0172,
"step": 13275
},
{
"epoch": 5.624867836751956,
"grad_norm": 0.7135342955589294,
"learning_rate": 3.438974358974359e-06,
"loss": 0.0148,
"step": 13300
},
{
"epoch": 5.635440896595474,
"grad_norm": 2.1200053691864014,
"learning_rate": 3.426153846153846e-06,
"loss": 0.0084,
"step": 13325
},
{
"epoch": 5.646013956438994,
"grad_norm": 1.8395202159881592,
"learning_rate": 3.4133333333333334e-06,
"loss": 0.01,
"step": 13350
},
{
"epoch": 5.656587016282512,
"grad_norm": 4.734637260437012,
"learning_rate": 3.4005128205128207e-06,
"loss": 0.0141,
"step": 13375
},
{
"epoch": 5.667160076126031,
"grad_norm": 2.1322085857391357,
"learning_rate": 3.387692307692308e-06,
"loss": 0.0154,
"step": 13400
},
{
"epoch": 5.677733135969549,
"grad_norm": 0.8213221430778503,
"learning_rate": 3.374871794871795e-06,
"loss": 0.0065,
"step": 13425
},
{
"epoch": 5.688306195813068,
"grad_norm": 0.4765898585319519,
"learning_rate": 3.3620512820512824e-06,
"loss": 0.008,
"step": 13450
},
{
"epoch": 5.698879255656587,
"grad_norm": 0.424668550491333,
"learning_rate": 3.3492307692307696e-06,
"loss": 0.0104,
"step": 13475
},
{
"epoch": 5.709452315500106,
"grad_norm": 0.7294663786888123,
"learning_rate": 3.336410256410257e-06,
"loss": 0.0114,
"step": 13500
},
{
"epoch": 5.720025375343624,
"grad_norm": 0.4553682208061218,
"learning_rate": 3.323589743589744e-06,
"loss": 0.0133,
"step": 13525
},
{
"epoch": 5.730598435187143,
"grad_norm": 4.255309104919434,
"learning_rate": 3.3107692307692314e-06,
"loss": 0.0269,
"step": 13550
},
{
"epoch": 5.741171495030661,
"grad_norm": 0.8475791811943054,
"learning_rate": 3.297948717948718e-06,
"loss": 0.0086,
"step": 13575
},
{
"epoch": 5.751744554874181,
"grad_norm": 4.737633228302002,
"learning_rate": 3.285128205128205e-06,
"loss": 0.0193,
"step": 13600
},
{
"epoch": 5.762317614717699,
"grad_norm": 1.0523945093154907,
"learning_rate": 3.2723076923076923e-06,
"loss": 0.0124,
"step": 13625
},
{
"epoch": 5.772890674561218,
"grad_norm": 0.8394791483879089,
"learning_rate": 3.2594871794871795e-06,
"loss": 0.0109,
"step": 13650
},
{
"epoch": 5.783463734404736,
"grad_norm": 2.474153518676758,
"learning_rate": 3.2466666666666668e-06,
"loss": 0.0123,
"step": 13675
},
{
"epoch": 5.794036794248256,
"grad_norm": 0.8185378909111023,
"learning_rate": 3.233846153846154e-06,
"loss": 0.0084,
"step": 13700
},
{
"epoch": 5.804609854091774,
"grad_norm": 4.937212944030762,
"learning_rate": 3.2210256410256413e-06,
"loss": 0.0134,
"step": 13725
},
{
"epoch": 5.815182913935293,
"grad_norm": 0.6099960207939148,
"learning_rate": 3.2082051282051285e-06,
"loss": 0.0091,
"step": 13750
},
{
"epoch": 5.825755973778811,
"grad_norm": 0.43338268995285034,
"learning_rate": 3.1953846153846158e-06,
"loss": 0.0118,
"step": 13775
},
{
"epoch": 5.836329033622331,
"grad_norm": 0.47640514373779297,
"learning_rate": 3.182564102564103e-06,
"loss": 0.0072,
"step": 13800
},
{
"epoch": 5.846902093465849,
"grad_norm": 2.1803908348083496,
"learning_rate": 3.1697435897435903e-06,
"loss": 0.0119,
"step": 13825
},
{
"epoch": 5.857475153309368,
"grad_norm": 1.2694458961486816,
"learning_rate": 3.1569230769230775e-06,
"loss": 0.0102,
"step": 13850
},
{
"epoch": 5.868048213152886,
"grad_norm": 0.539225697517395,
"learning_rate": 3.144102564102564e-06,
"loss": 0.0107,
"step": 13875
},
{
"epoch": 5.878621272996405,
"grad_norm": 2.2293202877044678,
"learning_rate": 3.131282051282051e-06,
"loss": 0.0093,
"step": 13900
},
{
"epoch": 5.889194332839924,
"grad_norm": 0.3595990538597107,
"learning_rate": 3.1184615384615384e-06,
"loss": 0.0075,
"step": 13925
},
{
"epoch": 5.899767392683443,
"grad_norm": 1.155290961265564,
"learning_rate": 3.1056410256410257e-06,
"loss": 0.011,
"step": 13950
},
{
"epoch": 5.910340452526961,
"grad_norm": 6.233605861663818,
"learning_rate": 3.092820512820513e-06,
"loss": 0.0071,
"step": 13975
},
{
"epoch": 5.92091351237048,
"grad_norm": 0.6025995016098022,
"learning_rate": 3.08e-06,
"loss": 0.0074,
"step": 14000
},
{
"epoch": 5.92091351237048,
"eval_loss": 0.13853086531162262,
"eval_runtime": 478.5411,
"eval_samples_per_second": 7.609,
"eval_steps_per_second": 0.953,
"eval_wer": 0.11954151004686993,
"step": 14000
},
{
"epoch": 5.931486572213998,
"grad_norm": 0.934971034526825,
"learning_rate": 3.0671794871794874e-06,
"loss": 0.0149,
"step": 14025
},
{
"epoch": 5.942059632057518,
"grad_norm": 2.112450122833252,
"learning_rate": 3.0543589743589747e-06,
"loss": 0.0254,
"step": 14050
},
{
"epoch": 5.952632691901036,
"grad_norm": 1.504460334777832,
"learning_rate": 3.041538461538462e-06,
"loss": 0.0079,
"step": 14075
},
{
"epoch": 5.963205751744555,
"grad_norm": 3.3018078804016113,
"learning_rate": 3.028717948717949e-06,
"loss": 0.0084,
"step": 14100
},
{
"epoch": 5.9737788115880734,
"grad_norm": 0.2936100363731384,
"learning_rate": 3.0158974358974364e-06,
"loss": 0.0077,
"step": 14125
},
{
"epoch": 5.984351871431592,
"grad_norm": 2.9025211334228516,
"learning_rate": 3.0030769230769236e-06,
"loss": 0.0107,
"step": 14150
},
{
"epoch": 5.994924931275111,
"grad_norm": 0.5673441886901855,
"learning_rate": 2.99025641025641e-06,
"loss": 0.0101,
"step": 14175
},
{
"epoch": 6.00549799111863,
"grad_norm": 0.29159656167030334,
"learning_rate": 2.9774358974358973e-06,
"loss": 0.009,
"step": 14200
},
{
"epoch": 6.0160710509621484,
"grad_norm": 0.2572082579135895,
"learning_rate": 2.9646153846153845e-06,
"loss": 0.007,
"step": 14225
},
{
"epoch": 6.026644110805667,
"grad_norm": 0.2045765519142151,
"learning_rate": 2.951794871794872e-06,
"loss": 0.0058,
"step": 14250
},
{
"epoch": 6.0372171706491855,
"grad_norm": 0.34850168228149414,
"learning_rate": 2.938974358974359e-06,
"loss": 0.0067,
"step": 14275
},
{
"epoch": 6.047790230492705,
"grad_norm": 5.987231254577637,
"learning_rate": 2.9261538461538463e-06,
"loss": 0.0052,
"step": 14300
},
{
"epoch": 6.0583632903362235,
"grad_norm": 0.34981194138526917,
"learning_rate": 2.9133333333333335e-06,
"loss": 0.0059,
"step": 14325
},
{
"epoch": 6.068936350179742,
"grad_norm": 0.2722649574279785,
"learning_rate": 2.9005128205128208e-06,
"loss": 0.0105,
"step": 14350
},
{
"epoch": 6.0795094100232605,
"grad_norm": 0.33031409978866577,
"learning_rate": 2.887692307692308e-06,
"loss": 0.0144,
"step": 14375
},
{
"epoch": 6.090082469866779,
"grad_norm": 0.26611846685409546,
"learning_rate": 2.8748717948717953e-06,
"loss": 0.0053,
"step": 14400
},
{
"epoch": 6.1006555297102985,
"grad_norm": 0.29690757393836975,
"learning_rate": 2.8620512820512825e-06,
"loss": 0.0042,
"step": 14425
},
{
"epoch": 6.111228589553817,
"grad_norm": 0.2454768419265747,
"learning_rate": 2.8492307692307698e-06,
"loss": 0.0065,
"step": 14450
},
{
"epoch": 6.1218016493973355,
"grad_norm": 0.3441830575466156,
"learning_rate": 2.836410256410257e-06,
"loss": 0.0088,
"step": 14475
},
{
"epoch": 6.132374709240854,
"grad_norm": 0.4179341793060303,
"learning_rate": 2.8235897435897434e-06,
"loss": 0.0137,
"step": 14500
},
{
"epoch": 6.142947769084373,
"grad_norm": 0.3850744664669037,
"learning_rate": 2.8107692307692307e-06,
"loss": 0.0033,
"step": 14525
},
{
"epoch": 6.153520828927892,
"grad_norm": 0.2846491038799286,
"learning_rate": 2.797948717948718e-06,
"loss": 0.0061,
"step": 14550
},
{
"epoch": 6.1640938887714105,
"grad_norm": 0.3616473376750946,
"learning_rate": 2.785128205128205e-06,
"loss": 0.0052,
"step": 14575
},
{
"epoch": 6.174666948614929,
"grad_norm": 0.38456159830093384,
"learning_rate": 2.7723076923076924e-06,
"loss": 0.0046,
"step": 14600
},
{
"epoch": 6.185240008458448,
"grad_norm": 0.267269492149353,
"learning_rate": 2.7594871794871797e-06,
"loss": 0.0069,
"step": 14625
},
{
"epoch": 6.195813068301967,
"grad_norm": 0.9138497114181519,
"learning_rate": 2.746666666666667e-06,
"loss": 0.0068,
"step": 14650
},
{
"epoch": 6.2063861281454855,
"grad_norm": 0.2973681390285492,
"learning_rate": 2.733846153846154e-06,
"loss": 0.0113,
"step": 14675
},
{
"epoch": 6.216959187989004,
"grad_norm": 0.3064761757850647,
"learning_rate": 2.7210256410256414e-06,
"loss": 0.0076,
"step": 14700
},
{
"epoch": 6.227532247832523,
"grad_norm": 0.2878686785697937,
"learning_rate": 2.7082051282051287e-06,
"loss": 0.0044,
"step": 14725
},
{
"epoch": 6.238105307676041,
"grad_norm": 0.2550056576728821,
"learning_rate": 2.695384615384616e-06,
"loss": 0.0064,
"step": 14750
},
{
"epoch": 6.2486783675195605,
"grad_norm": 0.6837669610977173,
"learning_rate": 2.682564102564103e-06,
"loss": 0.0046,
"step": 14775
},
{
"epoch": 6.259251427363079,
"grad_norm": 0.244142547249794,
"learning_rate": 2.6697435897435896e-06,
"loss": 0.0033,
"step": 14800
},
{
"epoch": 6.269824487206598,
"grad_norm": 0.15427115559577942,
"learning_rate": 2.656923076923077e-06,
"loss": 0.0055,
"step": 14825
},
{
"epoch": 6.280397547050116,
"grad_norm": 0.7226958274841309,
"learning_rate": 2.644102564102564e-06,
"loss": 0.0053,
"step": 14850
},
{
"epoch": 6.290970606893635,
"grad_norm": 0.2157323956489563,
"learning_rate": 2.6312820512820513e-06,
"loss": 0.007,
"step": 14875
},
{
"epoch": 6.301543666737154,
"grad_norm": 2.2182655334472656,
"learning_rate": 2.6184615384615385e-06,
"loss": 0.0079,
"step": 14900
},
{
"epoch": 6.312116726580673,
"grad_norm": 0.17396897077560425,
"learning_rate": 2.605641025641026e-06,
"loss": 0.0059,
"step": 14925
},
{
"epoch": 6.322689786424191,
"grad_norm": 0.8146637082099915,
"learning_rate": 2.592820512820513e-06,
"loss": 0.0048,
"step": 14950
},
{
"epoch": 6.33326284626771,
"grad_norm": 0.2695443332195282,
"learning_rate": 2.5800000000000003e-06,
"loss": 0.0049,
"step": 14975
},
{
"epoch": 6.343835906111229,
"grad_norm": 0.7036349177360535,
"learning_rate": 2.5671794871794875e-06,
"loss": 0.0064,
"step": 15000
},
{
"epoch": 6.343835906111229,
"eval_loss": 0.1409808248281479,
"eval_runtime": 474.6061,
"eval_samples_per_second": 7.672,
"eval_steps_per_second": 0.961,
"eval_wer": 0.11146688941482204,
"step": 15000
},
{
"epoch": 6.354408965954748,
"grad_norm": 0.4002957344055176,
"learning_rate": 2.5543589743589748e-06,
"loss": 0.0055,
"step": 15025
},
{
"epoch": 6.364982025798266,
"grad_norm": 2.1062963008880615,
"learning_rate": 2.541538461538462e-06,
"loss": 0.0038,
"step": 15050
},
{
"epoch": 6.375555085641785,
"grad_norm": 0.4128209948539734,
"learning_rate": 2.5287179487179493e-06,
"loss": 0.0062,
"step": 15075
},
{
"epoch": 6.386128145485303,
"grad_norm": 0.24224917590618134,
"learning_rate": 2.5158974358974357e-06,
"loss": 0.0076,
"step": 15100
},
{
"epoch": 6.396701205328823,
"grad_norm": 0.31282123923301697,
"learning_rate": 2.503076923076923e-06,
"loss": 0.0066,
"step": 15125
},
{
"epoch": 6.407274265172341,
"grad_norm": 0.28958702087402344,
"learning_rate": 2.4902564102564106e-06,
"loss": 0.0054,
"step": 15150
},
{
"epoch": 6.41784732501586,
"grad_norm": 0.23962663114070892,
"learning_rate": 2.4774358974358974e-06,
"loss": 0.0043,
"step": 15175
},
{
"epoch": 6.428420384859378,
"grad_norm": 0.22258694469928741,
"learning_rate": 2.4646153846153847e-06,
"loss": 0.0098,
"step": 15200
},
{
"epoch": 6.438993444702897,
"grad_norm": 0.219313845038414,
"learning_rate": 2.451794871794872e-06,
"loss": 0.0064,
"step": 15225
},
{
"epoch": 6.449566504546416,
"grad_norm": 0.24351318180561066,
"learning_rate": 2.438974358974359e-06,
"loss": 0.0053,
"step": 15250
},
{
"epoch": 6.460139564389935,
"grad_norm": 1.7564576864242554,
"learning_rate": 2.4261538461538464e-06,
"loss": 0.0106,
"step": 15275
},
{
"epoch": 6.470712624233453,
"grad_norm": 1.4861177206039429,
"learning_rate": 2.4133333333333337e-06,
"loss": 0.0063,
"step": 15300
},
{
"epoch": 6.481285684076972,
"grad_norm": 0.41736364364624023,
"learning_rate": 2.4005128205128205e-06,
"loss": 0.0052,
"step": 15325
},
{
"epoch": 6.49185874392049,
"grad_norm": 0.21913078427314758,
"learning_rate": 2.3876923076923077e-06,
"loss": 0.0063,
"step": 15350
},
{
"epoch": 6.50243180376401,
"grad_norm": 0.18712382018566132,
"learning_rate": 2.374871794871795e-06,
"loss": 0.01,
"step": 15375
},
{
"epoch": 6.513004863607528,
"grad_norm": 0.7857060432434082,
"learning_rate": 2.3620512820512822e-06,
"loss": 0.0081,
"step": 15400
},
{
"epoch": 6.523577923451047,
"grad_norm": 0.23228535056114197,
"learning_rate": 2.3492307692307695e-06,
"loss": 0.0055,
"step": 15425
},
{
"epoch": 6.534150983294565,
"grad_norm": 0.2794516682624817,
"learning_rate": 2.3364102564102567e-06,
"loss": 0.0118,
"step": 15450
},
{
"epoch": 6.544724043138084,
"grad_norm": 0.3594117760658264,
"learning_rate": 2.3235897435897436e-06,
"loss": 0.0049,
"step": 15475
},
{
"epoch": 6.555297102981603,
"grad_norm": 0.38121312856674194,
"learning_rate": 2.310769230769231e-06,
"loss": 0.0098,
"step": 15500
},
{
"epoch": 6.565870162825122,
"grad_norm": 0.48879972100257874,
"learning_rate": 2.2984615384615386e-06,
"loss": 0.0055,
"step": 15525
},
{
"epoch": 6.57644322266864,
"grad_norm": 0.3019927442073822,
"learning_rate": 2.285641025641026e-06,
"loss": 0.0066,
"step": 15550
},
{
"epoch": 6.587016282512159,
"grad_norm": 2.143726348876953,
"learning_rate": 2.272820512820513e-06,
"loss": 0.0078,
"step": 15575
},
{
"epoch": 6.597589342355677,
"grad_norm": 0.1708030104637146,
"learning_rate": 2.2600000000000004e-06,
"loss": 0.004,
"step": 15600
},
{
"epoch": 6.608162402199197,
"grad_norm": 1.7582453489303589,
"learning_rate": 2.2471794871794876e-06,
"loss": 0.0092,
"step": 15625
},
{
"epoch": 6.618735462042715,
"grad_norm": 0.3129083514213562,
"learning_rate": 2.2343589743589745e-06,
"loss": 0.0073,
"step": 15650
},
{
"epoch": 6.629308521886234,
"grad_norm": 0.3408079743385315,
"learning_rate": 2.2215384615384617e-06,
"loss": 0.0069,
"step": 15675
},
{
"epoch": 6.639881581729752,
"grad_norm": 11.103797912597656,
"learning_rate": 2.208717948717949e-06,
"loss": 0.0105,
"step": 15700
},
{
"epoch": 6.650454641573271,
"grad_norm": 0.554865300655365,
"learning_rate": 2.195897435897436e-06,
"loss": 0.0082,
"step": 15725
},
{
"epoch": 6.66102770141679,
"grad_norm": 0.32903775572776794,
"learning_rate": 2.1830769230769234e-06,
"loss": 0.0033,
"step": 15750
},
{
"epoch": 6.671600761260309,
"grad_norm": 0.2150058150291443,
"learning_rate": 2.1702564102564107e-06,
"loss": 0.0119,
"step": 15775
},
{
"epoch": 6.682173821103827,
"grad_norm": 0.253409743309021,
"learning_rate": 2.1574358974358975e-06,
"loss": 0.0046,
"step": 15800
},
{
"epoch": 6.692746880947346,
"grad_norm": 2.6021835803985596,
"learning_rate": 2.1446153846153848e-06,
"loss": 0.0077,
"step": 15825
},
{
"epoch": 6.703319940790865,
"grad_norm": 1.3991787433624268,
"learning_rate": 2.131794871794872e-06,
"loss": 0.0072,
"step": 15850
},
{
"epoch": 6.713893000634384,
"grad_norm": 0.40495139360427856,
"learning_rate": 2.1189743589743593e-06,
"loss": 0.013,
"step": 15875
},
{
"epoch": 6.724466060477902,
"grad_norm": 0.28818759322166443,
"learning_rate": 2.1061538461538465e-06,
"loss": 0.0065,
"step": 15900
},
{
"epoch": 6.735039120321421,
"grad_norm": 0.6223933696746826,
"learning_rate": 2.0933333333333338e-06,
"loss": 0.0057,
"step": 15925
},
{
"epoch": 6.745612180164939,
"grad_norm": 0.24611905217170715,
"learning_rate": 2.0805128205128206e-06,
"loss": 0.0049,
"step": 15950
},
{
"epoch": 6.756185240008459,
"grad_norm": 0.6771820783615112,
"learning_rate": 2.067692307692308e-06,
"loss": 0.0048,
"step": 15975
},
{
"epoch": 6.766758299851977,
"grad_norm": 14.729320526123047,
"learning_rate": 2.054871794871795e-06,
"loss": 0.0066,
"step": 16000
},
{
"epoch": 6.766758299851977,
"eval_loss": 0.14149095118045807,
"eval_runtime": 479.8155,
"eval_samples_per_second": 7.588,
"eval_steps_per_second": 0.95,
"eval_wer": 0.11842776927003573,
"step": 16000
},
{
"epoch": 6.777331359695496,
"grad_norm": 0.3452744781970978,
"learning_rate": 2.0420512820512823e-06,
"loss": 0.0052,
"step": 16025
},
{
"epoch": 6.787904419539014,
"grad_norm": 0.376597136259079,
"learning_rate": 2.0292307692307696e-06,
"loss": 0.0044,
"step": 16050
},
{
"epoch": 6.798477479382534,
"grad_norm": 0.2520189583301544,
"learning_rate": 2.016410256410257e-06,
"loss": 0.0088,
"step": 16075
},
{
"epoch": 6.809050539226052,
"grad_norm": 0.19255167245864868,
"learning_rate": 2.0035897435897436e-06,
"loss": 0.0046,
"step": 16100
},
{
"epoch": 6.819623599069571,
"grad_norm": 0.38311922550201416,
"learning_rate": 1.990769230769231e-06,
"loss": 0.0028,
"step": 16125
},
{
"epoch": 6.830196658913089,
"grad_norm": 0.2503233850002289,
"learning_rate": 1.977948717948718e-06,
"loss": 0.0038,
"step": 16150
},
{
"epoch": 6.840769718756608,
"grad_norm": 0.8586848378181458,
"learning_rate": 1.9651282051282054e-06,
"loss": 0.0052,
"step": 16175
},
{
"epoch": 6.851342778600127,
"grad_norm": 0.256672203540802,
"learning_rate": 1.9523076923076926e-06,
"loss": 0.0031,
"step": 16200
},
{
"epoch": 6.861915838443646,
"grad_norm": 0.3382306396961212,
"learning_rate": 1.93948717948718e-06,
"loss": 0.0072,
"step": 16225
},
{
"epoch": 6.872488898287164,
"grad_norm": 1.6218035221099854,
"learning_rate": 1.926666666666667e-06,
"loss": 0.0071,
"step": 16250
},
{
"epoch": 6.883061958130683,
"grad_norm": 0.33794450759887695,
"learning_rate": 1.913846153846154e-06,
"loss": 0.0062,
"step": 16275
},
{
"epoch": 6.893635017974201,
"grad_norm": 0.20873773097991943,
"learning_rate": 1.9010256410256412e-06,
"loss": 0.0047,
"step": 16300
},
{
"epoch": 6.904208077817721,
"grad_norm": 0.2936864495277405,
"learning_rate": 1.8882051282051285e-06,
"loss": 0.0034,
"step": 16325
},
{
"epoch": 6.914781137661239,
"grad_norm": 0.5449936389923096,
"learning_rate": 1.8753846153846155e-06,
"loss": 0.0065,
"step": 16350
},
{
"epoch": 6.925354197504758,
"grad_norm": 0.29695820808410645,
"learning_rate": 1.8625641025641027e-06,
"loss": 0.0125,
"step": 16375
},
{
"epoch": 6.935927257348276,
"grad_norm": 0.4382512867450714,
"learning_rate": 1.84974358974359e-06,
"loss": 0.0058,
"step": 16400
},
{
"epoch": 6.946500317191795,
"grad_norm": 0.5371158719062805,
"learning_rate": 1.836923076923077e-06,
"loss": 0.0036,
"step": 16425
},
{
"epoch": 6.957073377035314,
"grad_norm": 0.21529348194599152,
"learning_rate": 1.8241025641025643e-06,
"loss": 0.0042,
"step": 16450
},
{
"epoch": 6.967646436878833,
"grad_norm": 0.25407838821411133,
"learning_rate": 1.8112820512820515e-06,
"loss": 0.0048,
"step": 16475
},
{
"epoch": 6.978219496722351,
"grad_norm": 0.41775551438331604,
"learning_rate": 1.7984615384615386e-06,
"loss": 0.0042,
"step": 16500
},
{
"epoch": 6.98879255656587,
"grad_norm": 0.13575445115566254,
"learning_rate": 1.7856410256410258e-06,
"loss": 0.0057,
"step": 16525
},
{
"epoch": 6.9993656164093885,
"grad_norm": 0.21849684417247772,
"learning_rate": 1.772820512820513e-06,
"loss": 0.0065,
"step": 16550
},
{
"epoch": 7.009938676252908,
"grad_norm": 0.17400676012039185,
"learning_rate": 1.76e-06,
"loss": 0.0069,
"step": 16575
},
{
"epoch": 7.020511736096426,
"grad_norm": 0.2058832198381424,
"learning_rate": 1.7471794871794873e-06,
"loss": 0.005,
"step": 16600
},
{
"epoch": 7.031084795939945,
"grad_norm": 0.14491191506385803,
"learning_rate": 1.7343589743589746e-06,
"loss": 0.0026,
"step": 16625
},
{
"epoch": 7.0416578557834635,
"grad_norm": 2.8785014152526855,
"learning_rate": 1.7215384615384616e-06,
"loss": 0.0053,
"step": 16650
},
{
"epoch": 7.052230915626982,
"grad_norm": 0.17464618384838104,
"learning_rate": 1.7087179487179489e-06,
"loss": 0.0024,
"step": 16675
},
{
"epoch": 7.062803975470501,
"grad_norm": 0.17902244627475739,
"learning_rate": 1.6958974358974361e-06,
"loss": 0.004,
"step": 16700
},
{
"epoch": 7.07337703531402,
"grad_norm": 1.190285563468933,
"learning_rate": 1.6830769230769232e-06,
"loss": 0.0023,
"step": 16725
},
{
"epoch": 7.0839500951575385,
"grad_norm": 0.24982137978076935,
"learning_rate": 1.6702564102564104e-06,
"loss": 0.0029,
"step": 16750
},
{
"epoch": 7.094523155001057,
"grad_norm": 0.21240869164466858,
"learning_rate": 1.6574358974358976e-06,
"loss": 0.0034,
"step": 16775
},
{
"epoch": 7.105096214844576,
"grad_norm": 0.22856955230236053,
"learning_rate": 1.6446153846153847e-06,
"loss": 0.0028,
"step": 16800
},
{
"epoch": 7.115669274688095,
"grad_norm": 0.2120029181241989,
"learning_rate": 1.631794871794872e-06,
"loss": 0.0026,
"step": 16825
},
{
"epoch": 7.1262423345316135,
"grad_norm": 0.10486655682325363,
"learning_rate": 1.6189743589743592e-06,
"loss": 0.0063,
"step": 16850
},
{
"epoch": 7.136815394375132,
"grad_norm": 0.1727411448955536,
"learning_rate": 1.6061538461538462e-06,
"loss": 0.0058,
"step": 16875
},
{
"epoch": 7.1473884542186505,
"grad_norm": 0.2189004272222519,
"learning_rate": 1.5933333333333335e-06,
"loss": 0.0043,
"step": 16900
},
{
"epoch": 7.15796151406217,
"grad_norm": 0.224544957280159,
"learning_rate": 1.5805128205128207e-06,
"loss": 0.005,
"step": 16925
},
{
"epoch": 7.1685345739056885,
"grad_norm": 0.17426727712154388,
"learning_rate": 1.5676923076923078e-06,
"loss": 0.0039,
"step": 16950
},
{
"epoch": 7.179107633749207,
"grad_norm": 0.1636071801185608,
"learning_rate": 1.554871794871795e-06,
"loss": 0.0027,
"step": 16975
},
{
"epoch": 7.1896806935927255,
"grad_norm": 0.13778026401996613,
"learning_rate": 1.5420512820512822e-06,
"loss": 0.0029,
"step": 17000
},
{
"epoch": 7.1896806935927255,
"eval_loss": 0.14258068799972534,
"eval_runtime": 481.0853,
"eval_samples_per_second": 7.568,
"eval_steps_per_second": 0.948,
"eval_wer": 0.11903104552415425,
"step": 17000
},
{
"epoch": 7.200253753436244,
"grad_norm": 0.1793927550315857,
"learning_rate": 1.5292307692307693e-06,
"loss": 0.0054,
"step": 17025
},
{
"epoch": 7.2108268132797635,
"grad_norm": 0.17130379378795624,
"learning_rate": 1.5164102564102565e-06,
"loss": 0.0036,
"step": 17050
},
{
"epoch": 7.221399873123282,
"grad_norm": 0.20788300037384033,
"learning_rate": 1.5035897435897438e-06,
"loss": 0.0023,
"step": 17075
},
{
"epoch": 7.2319729329668005,
"grad_norm": 0.28084486722946167,
"learning_rate": 1.4907692307692308e-06,
"loss": 0.0027,
"step": 17100
},
{
"epoch": 7.242545992810319,
"grad_norm": 0.1289544254541397,
"learning_rate": 1.477948717948718e-06,
"loss": 0.0074,
"step": 17125
},
{
"epoch": 7.253119052653838,
"grad_norm": 0.14958246052265167,
"learning_rate": 1.4651282051282053e-06,
"loss": 0.0063,
"step": 17150
},
{
"epoch": 7.263692112497357,
"grad_norm": 0.7993505001068115,
"learning_rate": 1.4523076923076923e-06,
"loss": 0.0069,
"step": 17175
},
{
"epoch": 7.2742651723408756,
"grad_norm": 0.30847251415252686,
"learning_rate": 1.4394871794871796e-06,
"loss": 0.0072,
"step": 17200
},
{
"epoch": 7.284838232184394,
"grad_norm": 0.17055633664131165,
"learning_rate": 1.4266666666666668e-06,
"loss": 0.0024,
"step": 17225
},
{
"epoch": 7.295411292027913,
"grad_norm": 0.13648909330368042,
"learning_rate": 1.4138461538461539e-06,
"loss": 0.0022,
"step": 17250
},
{
"epoch": 7.305984351871432,
"grad_norm": 0.17881402373313904,
"learning_rate": 1.4010256410256411e-06,
"loss": 0.0033,
"step": 17275
},
{
"epoch": 7.3165574117149506,
"grad_norm": 0.11867067217826843,
"learning_rate": 1.3882051282051284e-06,
"loss": 0.0034,
"step": 17300
},
{
"epoch": 7.327130471558469,
"grad_norm": 0.1998029202222824,
"learning_rate": 1.3753846153846154e-06,
"loss": 0.0026,
"step": 17325
},
{
"epoch": 7.337703531401988,
"grad_norm": 0.19812311232089996,
"learning_rate": 1.3625641025641027e-06,
"loss": 0.0022,
"step": 17350
},
{
"epoch": 7.348276591245506,
"grad_norm": 0.20121921598911285,
"learning_rate": 1.34974358974359e-06,
"loss": 0.0025,
"step": 17375
},
{
"epoch": 7.358849651089026,
"grad_norm": 0.11519061774015427,
"learning_rate": 1.336923076923077e-06,
"loss": 0.0038,
"step": 17400
},
{
"epoch": 7.369422710932544,
"grad_norm": 0.2508073151111603,
"learning_rate": 1.3241025641025642e-06,
"loss": 0.0033,
"step": 17425
},
{
"epoch": 7.379995770776063,
"grad_norm": 0.1964157670736313,
"learning_rate": 1.3112820512820514e-06,
"loss": 0.003,
"step": 17450
},
{
"epoch": 7.390568830619581,
"grad_norm": 0.1781347244977951,
"learning_rate": 1.2984615384615385e-06,
"loss": 0.0111,
"step": 17475
},
{
"epoch": 7.4011418904631,
"grad_norm": 2.567892551422119,
"learning_rate": 1.2856410256410257e-06,
"loss": 0.0045,
"step": 17500
},
{
"epoch": 7.411714950306619,
"grad_norm": 4.157364368438721,
"learning_rate": 1.272820512820513e-06,
"loss": 0.0084,
"step": 17525
},
{
"epoch": 7.422288010150138,
"grad_norm": 0.1634252667427063,
"learning_rate": 1.26e-06,
"loss": 0.0045,
"step": 17550
},
{
"epoch": 7.432861069993656,
"grad_norm": 0.5479081273078918,
"learning_rate": 1.2471794871794873e-06,
"loss": 0.0075,
"step": 17575
},
{
"epoch": 7.443434129837175,
"grad_norm": 0.1536071002483368,
"learning_rate": 1.2343589743589745e-06,
"loss": 0.0038,
"step": 17600
},
{
"epoch": 7.454007189680693,
"grad_norm": 2.710284948348999,
"learning_rate": 1.2215384615384618e-06,
"loss": 0.0055,
"step": 17625
},
{
"epoch": 7.464580249524213,
"grad_norm": 0.19599756598472595,
"learning_rate": 1.2087179487179488e-06,
"loss": 0.0043,
"step": 17650
},
{
"epoch": 7.475153309367731,
"grad_norm": 0.1853959560394287,
"learning_rate": 1.195897435897436e-06,
"loss": 0.0038,
"step": 17675
},
{
"epoch": 7.48572636921125,
"grad_norm": 0.223322793841362,
"learning_rate": 1.1830769230769233e-06,
"loss": 0.0073,
"step": 17700
},
{
"epoch": 7.496299429054768,
"grad_norm": 0.33646872639656067,
"learning_rate": 1.1702564102564103e-06,
"loss": 0.0044,
"step": 17725
},
{
"epoch": 7.506872488898287,
"grad_norm": 0.24336710572242737,
"learning_rate": 1.1574358974358976e-06,
"loss": 0.0034,
"step": 17750
},
{
"epoch": 7.517445548741806,
"grad_norm": 0.16662658751010895,
"learning_rate": 1.1446153846153848e-06,
"loss": 0.0041,
"step": 17775
},
{
"epoch": 7.528018608585325,
"grad_norm": 0.1463777869939804,
"learning_rate": 1.1317948717948719e-06,
"loss": 0.0128,
"step": 17800
},
{
"epoch": 7.538591668428843,
"grad_norm": 0.26682642102241516,
"learning_rate": 1.118974358974359e-06,
"loss": 0.0035,
"step": 17825
},
{
"epoch": 7.549164728272362,
"grad_norm": 0.2606651782989502,
"learning_rate": 1.1061538461538463e-06,
"loss": 0.0038,
"step": 17850
},
{
"epoch": 7.55973778811588,
"grad_norm": 0.24456697702407837,
"learning_rate": 1.0933333333333334e-06,
"loss": 0.0069,
"step": 17875
},
{
"epoch": 7.5703108479594,
"grad_norm": 0.9612045884132385,
"learning_rate": 1.0805128205128206e-06,
"loss": 0.0047,
"step": 17900
},
{
"epoch": 7.580883907802918,
"grad_norm": 0.20409923791885376,
"learning_rate": 1.0676923076923079e-06,
"loss": 0.0019,
"step": 17925
},
{
"epoch": 7.591456967646437,
"grad_norm": 0.14264066517353058,
"learning_rate": 1.054871794871795e-06,
"loss": 0.0033,
"step": 17950
},
{
"epoch": 7.602030027489955,
"grad_norm": 0.43361735343933105,
"learning_rate": 1.0420512820512822e-06,
"loss": 0.0022,
"step": 17975
},
{
"epoch": 7.612603087333475,
"grad_norm": 0.25520941615104675,
"learning_rate": 1.0292307692307694e-06,
"loss": 0.0024,
"step": 18000
},
{
"epoch": 7.612603087333475,
"eval_loss": 0.14294278621673584,
"eval_runtime": 482.1709,
"eval_samples_per_second": 7.551,
"eval_steps_per_second": 0.946,
"eval_wer": 0.11782449301591721,
"step": 18000
},
{
"epoch": 7.623176147176993,
"grad_norm": 0.2945314943790436,
"learning_rate": 1.0164102564102564e-06,
"loss": 0.0025,
"step": 18025
},
{
"epoch": 7.633749207020512,
"grad_norm": 0.1885748952627182,
"learning_rate": 1.0035897435897437e-06,
"loss": 0.0031,
"step": 18050
},
{
"epoch": 7.64432226686403,
"grad_norm": 0.16705763339996338,
"learning_rate": 9.90769230769231e-07,
"loss": 0.0081,
"step": 18075
},
{
"epoch": 7.654895326707549,
"grad_norm": 0.1523977816104889,
"learning_rate": 9.77948717948718e-07,
"loss": 0.0028,
"step": 18100
},
{
"epoch": 7.665468386551068,
"grad_norm": 0.2194071263074875,
"learning_rate": 9.651282051282052e-07,
"loss": 0.0027,
"step": 18125
},
{
"epoch": 7.676041446394587,
"grad_norm": 0.23766735196113586,
"learning_rate": 9.523076923076924e-07,
"loss": 0.0031,
"step": 18150
},
{
"epoch": 7.686614506238105,
"grad_norm": 0.43536919355392456,
"learning_rate": 9.394871794871796e-07,
"loss": 0.0059,
"step": 18175
},
{
"epoch": 7.697187566081624,
"grad_norm": 0.20056919753551483,
"learning_rate": 9.266666666666667e-07,
"loss": 0.0027,
"step": 18200
},
{
"epoch": 7.707760625925143,
"grad_norm": 0.267135888338089,
"learning_rate": 9.138461538461539e-07,
"loss": 0.0046,
"step": 18225
},
{
"epoch": 7.718333685768662,
"grad_norm": 0.12573710083961487,
"learning_rate": 9.010256410256411e-07,
"loss": 0.0043,
"step": 18250
},
{
"epoch": 7.72890674561218,
"grad_norm": 0.12320298701524734,
"learning_rate": 8.882051282051282e-07,
"loss": 0.0047,
"step": 18275
},
{
"epoch": 7.739479805455699,
"grad_norm": 0.18382315337657928,
"learning_rate": 8.753846153846154e-07,
"loss": 0.0029,
"step": 18300
},
{
"epoch": 7.750052865299217,
"grad_norm": 1.264244794845581,
"learning_rate": 8.625641025641027e-07,
"loss": 0.0029,
"step": 18325
},
{
"epoch": 7.760625925142737,
"grad_norm": 0.31755584478378296,
"learning_rate": 8.497435897435897e-07,
"loss": 0.0023,
"step": 18350
},
{
"epoch": 7.771198984986255,
"grad_norm": 0.14317964017391205,
"learning_rate": 8.36923076923077e-07,
"loss": 0.0037,
"step": 18375
},
{
"epoch": 7.781772044829774,
"grad_norm": 0.1884177327156067,
"learning_rate": 8.241025641025642e-07,
"loss": 0.0022,
"step": 18400
},
{
"epoch": 7.792345104673292,
"grad_norm": 0.24375373125076294,
"learning_rate": 8.112820512820512e-07,
"loss": 0.0048,
"step": 18425
},
{
"epoch": 7.802918164516811,
"grad_norm": 0.2112288922071457,
"learning_rate": 7.984615384615385e-07,
"loss": 0.0038,
"step": 18450
},
{
"epoch": 7.81349122436033,
"grad_norm": 0.21161673963069916,
"learning_rate": 7.856410256410257e-07,
"loss": 0.0036,
"step": 18475
},
{
"epoch": 7.824064284203849,
"grad_norm": 0.17189516127109528,
"learning_rate": 7.728205128205128e-07,
"loss": 0.0029,
"step": 18500
},
{
"epoch": 7.834637344047367,
"grad_norm": 0.28677284717559814,
"learning_rate": 7.6e-07,
"loss": 0.0069,
"step": 18525
},
{
"epoch": 7.845210403890886,
"grad_norm": 0.5086202025413513,
"learning_rate": 7.471794871794873e-07,
"loss": 0.0024,
"step": 18550
},
{
"epoch": 7.855783463734404,
"grad_norm": 0.20552664995193481,
"learning_rate": 7.343589743589743e-07,
"loss": 0.0064,
"step": 18575
},
{
"epoch": 7.866356523577924,
"grad_norm": 0.17549338936805725,
"learning_rate": 7.215384615384616e-07,
"loss": 0.0026,
"step": 18600
},
{
"epoch": 7.876929583421442,
"grad_norm": 0.28019997477531433,
"learning_rate": 7.087179487179488e-07,
"loss": 0.0037,
"step": 18625
},
{
"epoch": 7.887502643264961,
"grad_norm": 0.1256396323442459,
"learning_rate": 6.958974358974358e-07,
"loss": 0.0045,
"step": 18650
},
{
"epoch": 7.898075703108479,
"grad_norm": 0.3035779893398285,
"learning_rate": 6.830769230769231e-07,
"loss": 0.0047,
"step": 18675
},
{
"epoch": 7.908648762951998,
"grad_norm": 0.2889888882637024,
"learning_rate": 6.702564102564103e-07,
"loss": 0.0045,
"step": 18700
},
{
"epoch": 7.919221822795517,
"grad_norm": 0.17718684673309326,
"learning_rate": 6.574358974358976e-07,
"loss": 0.0028,
"step": 18725
},
{
"epoch": 7.929794882639036,
"grad_norm": 0.16636326909065247,
"learning_rate": 6.446153846153846e-07,
"loss": 0.0023,
"step": 18750
},
{
"epoch": 7.940367942482554,
"grad_norm": 0.21917343139648438,
"learning_rate": 6.317948717948719e-07,
"loss": 0.0066,
"step": 18775
},
{
"epoch": 7.950941002326073,
"grad_norm": 0.1804770529270172,
"learning_rate": 6.18974358974359e-07,
"loss": 0.0022,
"step": 18800
},
{
"epoch": 7.9615140621695915,
"grad_norm": 0.15297465026378632,
"learning_rate": 6.061538461538462e-07,
"loss": 0.0025,
"step": 18825
},
{
"epoch": 7.972087122013111,
"grad_norm": 2.2187414169311523,
"learning_rate": 5.933333333333334e-07,
"loss": 0.0036,
"step": 18850
},
{
"epoch": 7.982660181856629,
"grad_norm": 0.14911529421806335,
"learning_rate": 5.805128205128205e-07,
"loss": 0.0035,
"step": 18875
},
{
"epoch": 7.993233241700148,
"grad_norm": 0.3315187692642212,
"learning_rate": 5.676923076923077e-07,
"loss": 0.0031,
"step": 18900
},
{
"epoch": 8.003806301543667,
"grad_norm": 0.2590886950492859,
"learning_rate": 5.548717948717949e-07,
"loss": 0.0037,
"step": 18925
},
{
"epoch": 8.014379361387185,
"grad_norm": 0.15489330887794495,
"learning_rate": 5.420512820512821e-07,
"loss": 0.0018,
"step": 18950
},
{
"epoch": 8.024952421230704,
"grad_norm": 0.18797287344932556,
"learning_rate": 5.292307692307692e-07,
"loss": 0.0018,
"step": 18975
},
{
"epoch": 8.035525481074222,
"grad_norm": 0.1324182152748108,
"learning_rate": 5.164102564102565e-07,
"loss": 0.0021,
"step": 19000
},
{
"epoch": 8.035525481074222,
"eval_loss": 0.1434488594532013,
"eval_runtime": 479.0168,
"eval_samples_per_second": 7.601,
"eval_steps_per_second": 0.952,
"eval_wer": 0.1180101164787229,
"step": 19000
},
{
"epoch": 8.046098540917741,
"grad_norm": 0.15593498945236206,
"learning_rate": 5.035897435897436e-07,
"loss": 0.0018,
"step": 19025
},
{
"epoch": 8.05667160076126,
"grad_norm": 0.14166687428951263,
"learning_rate": 4.907692307692308e-07,
"loss": 0.0025,
"step": 19050
},
{
"epoch": 8.067244660604779,
"grad_norm": 0.06104918569326401,
"learning_rate": 4.77948717948718e-07,
"loss": 0.0018,
"step": 19075
},
{
"epoch": 8.077817720448298,
"grad_norm": 0.11749964207410812,
"learning_rate": 4.6512820512820514e-07,
"loss": 0.0031,
"step": 19100
},
{
"epoch": 8.088390780291816,
"grad_norm": 0.09838801622390747,
"learning_rate": 4.523076923076924e-07,
"loss": 0.0031,
"step": 19125
},
{
"epoch": 8.098963840135335,
"grad_norm": 0.1402641385793686,
"learning_rate": 4.3948717948717953e-07,
"loss": 0.0027,
"step": 19150
},
{
"epoch": 8.109536899978854,
"grad_norm": 0.16856901347637177,
"learning_rate": 4.266666666666667e-07,
"loss": 0.0023,
"step": 19175
},
{
"epoch": 8.120109959822372,
"grad_norm": 0.12997014820575714,
"learning_rate": 4.138461538461539e-07,
"loss": 0.0058,
"step": 19200
},
{
"epoch": 8.130683019665891,
"grad_norm": 0.14169169962406158,
"learning_rate": 4.0102564102564107e-07,
"loss": 0.0036,
"step": 19225
},
{
"epoch": 8.14125607950941,
"grad_norm": 0.09970966726541519,
"learning_rate": 3.882051282051282e-07,
"loss": 0.0022,
"step": 19250
},
{
"epoch": 8.151829139352929,
"grad_norm": 1.0204663276672363,
"learning_rate": 3.7538461538461546e-07,
"loss": 0.0023,
"step": 19275
},
{
"epoch": 8.162402199196448,
"grad_norm": 0.24392949044704437,
"learning_rate": 3.625641025641026e-07,
"loss": 0.0029,
"step": 19300
},
{
"epoch": 8.172975259039966,
"grad_norm": 0.12695328891277313,
"learning_rate": 3.4974358974358974e-07,
"loss": 0.0035,
"step": 19325
},
{
"epoch": 8.183548318883485,
"grad_norm": 0.15973243117332458,
"learning_rate": 3.36923076923077e-07,
"loss": 0.0019,
"step": 19350
},
{
"epoch": 8.194121378727004,
"grad_norm": 0.2357715517282486,
"learning_rate": 3.2410256410256413e-07,
"loss": 0.0019,
"step": 19375
},
{
"epoch": 8.204694438570522,
"grad_norm": 0.15530213713645935,
"learning_rate": 3.112820512820513e-07,
"loss": 0.0019,
"step": 19400
},
{
"epoch": 8.215267498414041,
"grad_norm": 3.935415267944336,
"learning_rate": 2.9846153846153847e-07,
"loss": 0.0065,
"step": 19425
},
{
"epoch": 8.225840558257559,
"grad_norm": 3.079641580581665,
"learning_rate": 2.8564102564102566e-07,
"loss": 0.004,
"step": 19450
},
{
"epoch": 8.236413618101079,
"grad_norm": 0.1831885129213333,
"learning_rate": 2.7282051282051286e-07,
"loss": 0.0018,
"step": 19475
},
{
"epoch": 8.246986677944598,
"grad_norm": 0.11785794049501419,
"learning_rate": 2.6e-07,
"loss": 0.0018,
"step": 19500
},
{
"epoch": 8.257559737788116,
"grad_norm": 0.3607770800590515,
"learning_rate": 2.471794871794872e-07,
"loss": 0.0032,
"step": 19525
},
{
"epoch": 8.268132797631635,
"grad_norm": 0.2167283445596695,
"learning_rate": 2.343589743589744e-07,
"loss": 0.0019,
"step": 19550
},
{
"epoch": 8.278705857475153,
"grad_norm": 0.1869659721851349,
"learning_rate": 2.2153846153846153e-07,
"loss": 0.0021,
"step": 19575
},
{
"epoch": 8.289278917318672,
"grad_norm": 0.16754354536533356,
"learning_rate": 2.0871794871794873e-07,
"loss": 0.002,
"step": 19600
},
{
"epoch": 8.299851977162191,
"grad_norm": 0.8075239658355713,
"learning_rate": 1.9589743589743592e-07,
"loss": 0.003,
"step": 19625
},
{
"epoch": 8.31042503700571,
"grad_norm": 2.266139507293701,
"learning_rate": 1.8307692307692306e-07,
"loss": 0.0065,
"step": 19650
},
{
"epoch": 8.320998096849229,
"grad_norm": 0.1474863439798355,
"learning_rate": 1.7025641025641026e-07,
"loss": 0.0056,
"step": 19675
},
{
"epoch": 8.331571156692746,
"grad_norm": 0.1871362030506134,
"learning_rate": 1.5743589743589745e-07,
"loss": 0.0047,
"step": 19700
},
{
"epoch": 8.342144216536266,
"grad_norm": 0.14866340160369873,
"learning_rate": 1.4461538461538462e-07,
"loss": 0.0037,
"step": 19725
},
{
"epoch": 8.352717276379785,
"grad_norm": 0.15600642561912537,
"learning_rate": 1.317948717948718e-07,
"loss": 0.0038,
"step": 19750
},
{
"epoch": 8.363290336223303,
"grad_norm": 0.18436697125434875,
"learning_rate": 1.1897435897435898e-07,
"loss": 0.0024,
"step": 19775
},
{
"epoch": 8.373863396066822,
"grad_norm": 0.12687981128692627,
"learning_rate": 1.0615384615384615e-07,
"loss": 0.0018,
"step": 19800
},
{
"epoch": 8.38443645591034,
"grad_norm": 0.11940109729766846,
"learning_rate": 9.333333333333335e-08,
"loss": 0.0025,
"step": 19825
},
{
"epoch": 8.39500951575386,
"grad_norm": 0.16202621161937714,
"learning_rate": 8.051282051282052e-08,
"loss": 0.0028,
"step": 19850
},
{
"epoch": 8.405582575597379,
"grad_norm": 0.09489905834197998,
"learning_rate": 6.76923076923077e-08,
"loss": 0.0053,
"step": 19875
},
{
"epoch": 8.416155635440896,
"grad_norm": 1.8258126974105835,
"learning_rate": 5.4871794871794874e-08,
"loss": 0.0029,
"step": 19900
},
{
"epoch": 8.426728695284416,
"grad_norm": 0.18347540497779846,
"learning_rate": 4.2051282051282056e-08,
"loss": 0.002,
"step": 19925
},
{
"epoch": 8.437301755127933,
"grad_norm": 0.19024056196212769,
"learning_rate": 2.9230769230769234e-08,
"loss": 0.0029,
"step": 19950
},
{
"epoch": 8.447874814971453,
"grad_norm": 0.21589778363704681,
"learning_rate": 1.641025641025641e-08,
"loss": 0.0031,
"step": 19975
},
{
"epoch": 8.458447874814972,
"grad_norm": 0.12975341081619263,
"learning_rate": 3.5897435897435903e-09,
"loss": 0.0018,
"step": 20000
},
{
"epoch": 8.458447874814972,
"eval_loss": 0.1440751701593399,
"eval_runtime": 481.615,
"eval_samples_per_second": 7.56,
"eval_steps_per_second": 0.947,
"eval_wer": 0.1183813634043343,
"step": 20000
},
{
"epoch": 8.458447874814972,
"step": 20000,
"total_flos": 2.07526043713536e+19,
"train_loss": 0.09297830064073205,
"train_runtime": 50820.1517,
"train_samples_per_second": 6.297,
"train_steps_per_second": 0.394
}
],
"logging_steps": 25,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.07526043713536e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}