{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 900,
  "global_step": 4500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00022222222222222223,
      "grad_norm": 2.652693033218384,
      "learning_rate": 2e-05,
      "loss": 1.3346,
      "step": 1
    },
    {
      "epoch": 0.00044444444444444447,
      "grad_norm": 2.6040241718292236,
      "learning_rate": 4e-05,
      "loss": 1.1227,
      "step": 2
    },
    {
      "epoch": 0.0006666666666666666,
      "grad_norm": 3.2344393730163574,
      "learning_rate": 6e-05,
      "loss": 0.181,
      "step": 3
    },
    {
      "epoch": 0.0008888888888888889,
      "grad_norm": 1.2059385776519775,
      "learning_rate": 8e-05,
      "loss": 1.0497,
      "step": 4
    },
    {
      "epoch": 0.0011111111111111111,
      "grad_norm": 0.9265973567962646,
      "learning_rate": 0.0001,
      "loss": 2.3025,
      "step": 5
    },
    {
      "epoch": 0.0013333333333333333,
      "grad_norm": 0.6584568023681641,
      "learning_rate": 0.00012,
      "loss": 1.3183,
      "step": 6
    },
    {
      "epoch": 0.0015555555555555555,
      "grad_norm": 1.0159577131271362,
      "learning_rate": 0.00014,
      "loss": 2.3477,
      "step": 7
    },
    {
      "epoch": 0.0017777777777777779,
      "grad_norm": 0.8150708675384521,
      "learning_rate": 0.00016,
      "loss": 1.1444,
      "step": 8
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.8650357127189636,
      "learning_rate": 0.00018,
      "loss": 0.1126,
      "step": 9
    },
    {
      "epoch": 0.0022222222222222222,
      "grad_norm": 0.5120269656181335,
      "learning_rate": 0.0002,
      "loss": 0.0703,
      "step": 10
    },
    {
      "epoch": 0.0024444444444444444,
      "grad_norm": 0.8459653258323669,
      "learning_rate": 0.0001999554565701559,
      "loss": 2.1607,
      "step": 11
    },
    {
      "epoch": 0.0026666666666666666,
      "grad_norm": 1.0426557064056396,
      "learning_rate": 0.0001999109131403118,
      "loss": 2.2511,
      "step": 12
    },
    {
      "epoch": 0.0028888888888888888,
      "grad_norm": 0.9789963960647583,
      "learning_rate": 0.0001998663697104677,
      "loss": 2.3192,
      "step": 13
    },
    {
      "epoch": 0.003111111111111111,
      "grad_norm": 0.9778504967689514,
      "learning_rate": 0.00019982182628062363,
      "loss": 2.3259,
      "step": 14
    },
    {
      "epoch": 0.0033333333333333335,
      "grad_norm": 0.9376258850097656,
      "learning_rate": 0.00019977728285077952,
      "loss": 2.3107,
      "step": 15
    },
    {
      "epoch": 0.0035555555555555557,
      "grad_norm": 0.958590567111969,
      "learning_rate": 0.00019973273942093541,
      "loss": 2.1426,
      "step": 16
    },
    {
      "epoch": 0.003777777777777778,
      "grad_norm": 1.1192786693572998,
      "learning_rate": 0.00019968819599109133,
      "loss": 1.8911,
      "step": 17
    },
    {
      "epoch": 0.004,
      "grad_norm": 1.112155556678772,
      "learning_rate": 0.00019964365256124723,
      "loss": 2.3746,
      "step": 18
    },
    {
      "epoch": 0.004222222222222222,
      "grad_norm": 1.0468113422393799,
      "learning_rate": 0.00019959910913140312,
      "loss": 2.3151,
      "step": 19
    },
    {
      "epoch": 0.0044444444444444444,
      "grad_norm": 0.907065212726593,
      "learning_rate": 0.00019955456570155904,
      "loss": 2.1126,
      "step": 20
    },
    {
      "epoch": 0.004666666666666667,
      "grad_norm": 1.0177619457244873,
      "learning_rate": 0.00019951002227171493,
      "loss": 1.7835,
      "step": 21
    },
    {
      "epoch": 0.004888888888888889,
      "grad_norm": 0.9592558741569519,
      "learning_rate": 0.00019946547884187085,
      "loss": 1.8765,
      "step": 22
    },
    {
      "epoch": 0.005111111111111111,
      "grad_norm": 1.0939193964004517,
      "learning_rate": 0.00019942093541202674,
      "loss": 1.4541,
      "step": 23
    },
    {
      "epoch": 0.005333333333333333,
      "grad_norm": 1.4333382844924927,
      "learning_rate": 0.00019937639198218263,
      "loss": 0.5901,
      "step": 24
    },
    {
      "epoch": 0.005555555555555556,
      "grad_norm": 1.2383116483688354,
      "learning_rate": 0.00019933184855233852,
      "loss": 0.4609,
      "step": 25
    },
    {
      "epoch": 0.0057777777777777775,
      "grad_norm": 1.2145708799362183,
      "learning_rate": 0.00019928730512249444,
      "loss": 0.3003,
      "step": 26
    },
    {
      "epoch": 0.006,
      "grad_norm": 1.2296050786972046,
      "learning_rate": 0.00019924276169265036,
      "loss": 1.8733,
      "step": 27
    },
    {
      "epoch": 0.006222222222222222,
      "grad_norm": 1.5386277437210083,
      "learning_rate": 0.00019919821826280625,
      "loss": 1.9739,
      "step": 28
    },
    {
      "epoch": 0.0064444444444444445,
      "grad_norm": 1.691746473312378,
      "learning_rate": 0.00019915367483296214,
      "loss": 2.3246,
      "step": 29
    },
    {
      "epoch": 0.006666666666666667,
      "grad_norm": 1.5347216129302979,
      "learning_rate": 0.00019910913140311804,
      "loss": 1.9747,
      "step": 30
    },
    {
      "epoch": 0.006888888888888889,
      "grad_norm": 1.1143240928649902,
      "learning_rate": 0.00019906458797327395,
      "loss": 1.8893,
      "step": 31
    },
    {
      "epoch": 0.0071111111111111115,
      "grad_norm": 1.443770170211792,
      "learning_rate": 0.00019902004454342987,
      "loss": 2.1026,
      "step": 32
    },
    {
      "epoch": 0.007333333333333333,
      "grad_norm": 1.1426650285720825,
      "learning_rate": 0.00019897550111358577,
      "loss": 1.1449,
      "step": 33
    },
    {
      "epoch": 0.007555555555555556,
      "grad_norm": 1.4505339860916138,
      "learning_rate": 0.00019893095768374166,
      "loss": 0.1506,
      "step": 34
    },
    {
      "epoch": 0.0077777777777777776,
      "grad_norm": 0.7057297825813293,
      "learning_rate": 0.00019888641425389755,
      "loss": 0.0606,
      "step": 35
    },
    {
      "epoch": 0.008,
      "grad_norm": 0.3842390775680542,
      "learning_rate": 0.00019884187082405347,
      "loss": 0.0288,
      "step": 36
    },
    {
      "epoch": 0.008222222222222223,
      "grad_norm": 1.8523081541061401,
      "learning_rate": 0.00019879732739420936,
      "loss": 1.0721,
      "step": 37
    },
    {
      "epoch": 0.008444444444444444,
      "grad_norm": 2.3438615798950195,
      "learning_rate": 0.00019875278396436528,
      "loss": 0.3206,
      "step": 38
    },
    {
      "epoch": 0.008666666666666666,
      "grad_norm": 1.7265911102294922,
      "learning_rate": 0.00019870824053452117,
      "loss": 0.2136,
      "step": 39
    },
    {
      "epoch": 0.008888888888888889,
      "grad_norm": 1.1597121953964233,
      "learning_rate": 0.00019866369710467706,
      "loss": 0.1069,
      "step": 40
    },
    {
      "epoch": 0.009111111111111111,
      "grad_norm": 1.5598105192184448,
      "learning_rate": 0.00019861915367483298,
      "loss": 2.1067,
      "step": 41
    },
    {
      "epoch": 0.009333333333333334,
      "grad_norm": 1.8397672176361084,
      "learning_rate": 0.00019857461024498887,
      "loss": 1.3043,
      "step": 42
    },
    {
      "epoch": 0.009555555555555555,
      "grad_norm": 1.7002127170562744,
      "learning_rate": 0.00019853006681514476,
      "loss": 1.1985,
      "step": 43
    },
    {
      "epoch": 0.009777777777777778,
      "grad_norm": 1.86135733127594,
      "learning_rate": 0.00019848552338530068,
      "loss": 1.8315,
      "step": 44
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.30124032497406,
      "learning_rate": 0.00019844097995545658,
      "loss": 1.7591,
      "step": 45
    },
    {
      "epoch": 0.010222222222222223,
      "grad_norm": 1.6460219621658325,
      "learning_rate": 0.0001983964365256125,
      "loss": 1.9981,
      "step": 46
    },
    {
      "epoch": 0.010444444444444444,
      "grad_norm": 1.2923930883407593,
      "learning_rate": 0.00019835189309576839,
      "loss": 1.2989,
      "step": 47
    },
    {
      "epoch": 0.010666666666666666,
      "grad_norm": 1.448328971862793,
      "learning_rate": 0.00019830734966592428,
      "loss": 1.2251,
      "step": 48
    },
    {
      "epoch": 0.010888888888888889,
      "grad_norm": 1.767919659614563,
      "learning_rate": 0.00019826280623608017,
      "loss": 1.1289,
      "step": 49
    },
    {
      "epoch": 0.011111111111111112,
      "grad_norm": 1.786415696144104,
      "learning_rate": 0.0001982182628062361,
      "loss": 1.3542,
      "step": 50
    },
    {
      "epoch": 0.011333333333333334,
      "grad_norm": 1.26632559299469,
      "learning_rate": 0.000198173719376392,
      "loss": 1.5781,
      "step": 51
    },
    {
      "epoch": 0.011555555555555555,
      "grad_norm": 1.0629119873046875,
      "learning_rate": 0.0001981291759465479,
      "loss": 1.2804,
      "step": 52
    },
    {
      "epoch": 0.011777777777777778,
      "grad_norm": 1.2844982147216797,
      "learning_rate": 0.0001980846325167038,
      "loss": 0.2304,
      "step": 53
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.7769795656204224,
      "learning_rate": 0.00019804008908685968,
      "loss": 1.302,
      "step": 54
    },
    {
      "epoch": 0.012222222222222223,
      "grad_norm": 0.5583917498588562,
      "learning_rate": 0.0001979955456570156,
      "loss": 0.0753,
      "step": 55
    },
    {
      "epoch": 0.012444444444444444,
      "grad_norm": 0.9252032041549683,
      "learning_rate": 0.00019795100222717152,
      "loss": 2.0616,
      "step": 56
    },
    {
      "epoch": 0.012666666666666666,
      "grad_norm": 4.016125202178955,
      "learning_rate": 0.0001979064587973274,
      "loss": 0.3289,
      "step": 57
    },
    {
      "epoch": 0.012888888888888889,
      "grad_norm": 1.1086289882659912,
      "learning_rate": 0.0001978619153674833,
      "loss": 0.9885,
      "step": 58
    },
    {
      "epoch": 0.013111111111111112,
      "grad_norm": 1.0204805135726929,
      "learning_rate": 0.0001978173719376392,
      "loss": 2.0717,
      "step": 59
    },
    {
      "epoch": 0.013333333333333334,
      "grad_norm": 1.1669329404830933,
      "learning_rate": 0.00019777282850779511,
      "loss": 2.2568,
      "step": 60
    },
    {
      "epoch": 0.013555555555555555,
      "grad_norm": 1.0386414527893066,
      "learning_rate": 0.000197728285077951,
      "loss": 2.3931,
      "step": 61
    },
    {
      "epoch": 0.013777777777777778,
      "grad_norm": 0.9788153767585754,
      "learning_rate": 0.00019768374164810693,
      "loss": 2.0631,
      "step": 62
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.9021984338760376,
      "learning_rate": 0.00019763919821826282,
      "loss": 2.3083,
      "step": 63
    },
    {
      "epoch": 0.014222222222222223,
      "grad_norm": 1.1166595220565796,
      "learning_rate": 0.0001975946547884187,
      "loss": 1.932,
      "step": 64
    },
    {
      "epoch": 0.014444444444444444,
      "grad_norm": 1.7329879999160767,
      "learning_rate": 0.00019755011135857463,
      "loss": 1.599,
      "step": 65
    },
    {
      "epoch": 0.014666666666666666,
      "grad_norm": 1.1422115564346313,
      "learning_rate": 0.00019750556792873052,
      "loss": 1.8565,
      "step": 66
    },
    {
      "epoch": 0.014888888888888889,
      "grad_norm": 1.0347861051559448,
      "learning_rate": 0.0001974610244988864,
      "loss": 2.108,
      "step": 67
    },
    {
      "epoch": 0.015111111111111112,
      "grad_norm": 1.5094088315963745,
      "learning_rate": 0.00019741648106904233,
      "loss": 1.0669,
      "step": 68
    },
    {
      "epoch": 0.015333333333333332,
      "grad_norm": 1.7448095083236694,
      "learning_rate": 0.00019737193763919822,
      "loss": 0.2599,
      "step": 69
    },
    {
      "epoch": 0.015555555555555555,
      "grad_norm": 0.9938380718231201,
      "learning_rate": 0.00019732739420935414,
      "loss": 0.1311,
      "step": 70
    },
    {
      "epoch": 0.01577777777777778,
      "grad_norm": 1.0205384492874146,
      "learning_rate": 0.00019728285077951003,
      "loss": 1.7412,
      "step": 71
    },
    {
      "epoch": 0.016,
      "grad_norm": 1.2222613096237183,
      "learning_rate": 0.00019723830734966592,
      "loss": 1.7811,
      "step": 72
    },
    {
      "epoch": 0.01622222222222222,
      "grad_norm": 1.2196162939071655,
      "learning_rate": 0.00019719376391982182,
      "loss": 1.6904,
      "step": 73
    },
    {
      "epoch": 0.016444444444444446,
      "grad_norm": 1.3248560428619385,
      "learning_rate": 0.00019714922048997774,
      "loss": 1.7129,
      "step": 74
    },
    {
      "epoch": 0.016666666666666666,
      "grad_norm": 2.0687692165374756,
      "learning_rate": 0.00019710467706013365,
      "loss": 0.1651,
      "step": 75
    },
    {
      "epoch": 0.016888888888888887,
      "grad_norm": 0.9671218395233154,
      "learning_rate": 0.00019706013363028955,
      "loss": 0.0788,
      "step": 76
    },
    {
      "epoch": 0.01711111111111111,
      "grad_norm": 0.2219647616147995,
      "learning_rate": 0.00019701559020044544,
      "loss": 0.0253,
      "step": 77
    },
    {
      "epoch": 0.017333333333333333,
      "grad_norm": 1.0968049764633179,
      "learning_rate": 0.00019697104677060133,
      "loss": 1.058,
      "step": 78
    },
    {
      "epoch": 0.017555555555555557,
      "grad_norm": 0.9246222376823425,
      "learning_rate": 0.00019692650334075725,
      "loss": 0.1111,
      "step": 79
    },
    {
      "epoch": 0.017777777777777778,
      "grad_norm": 0.7714378237724304,
      "learning_rate": 0.00019688195991091317,
      "loss": 0.9722,
      "step": 80
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.25117895007133484,
      "learning_rate": 0.00019683741648106906,
      "loss": 0.0352,
      "step": 81
    },
    {
      "epoch": 0.018222222222222223,
      "grad_norm": 1.1857657432556152,
      "learning_rate": 0.00019679287305122495,
      "loss": 1.997,
      "step": 82
    },
    {
      "epoch": 0.018444444444444444,
      "grad_norm": 1.196076512336731,
      "learning_rate": 0.00019674832962138084,
      "loss": 1.7684,
      "step": 83
    },
    {
      "epoch": 0.018666666666666668,
      "grad_norm": 1.5178613662719727,
      "learning_rate": 0.00019670378619153676,
      "loss": 1.93,
      "step": 84
    },
    {
      "epoch": 0.01888888888888889,
      "grad_norm": 1.5289626121520996,
      "learning_rate": 0.00019665924276169265,
      "loss": 1.4133,
      "step": 85
    },
    {
      "epoch": 0.01911111111111111,
      "grad_norm": 1.3246040344238281,
      "learning_rate": 0.00019661469933184855,
      "loss": 1.7251,
      "step": 86
    },
    {
      "epoch": 0.019333333333333334,
      "grad_norm": 1.296377420425415,
      "learning_rate": 0.00019657015590200446,
      "loss": 1.8894,
      "step": 87
    },
    {
      "epoch": 0.019555555555555555,
      "grad_norm": 1.5035158395767212,
      "learning_rate": 0.00019652561247216036,
      "loss": 1.6791,
      "step": 88
    },
    {
      "epoch": 0.019777777777777776,
      "grad_norm": 1.1684895753860474,
      "learning_rate": 0.00019648106904231628,
      "loss": 1.4391,
      "step": 89
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2564208507537842,
      "learning_rate": 0.00019643652561247217,
      "loss": 1.5629,
      "step": 90
    },
    {
      "epoch": 0.02022222222222222,
      "grad_norm": 1.1524401903152466,
      "learning_rate": 0.00019639198218262806,
      "loss": 1.5727,
      "step": 91
    },
    {
      "epoch": 0.020444444444444446,
      "grad_norm": 1.2944073677062988,
      "learning_rate": 0.00019634743875278395,
      "loss": 1.4461,
      "step": 92
    },
    {
      "epoch": 0.020666666666666667,
      "grad_norm": 1.0988140106201172,
      "learning_rate": 0.0001963028953229399,
      "loss": 1.7277,
      "step": 93
    },
    {
      "epoch": 0.020888888888888887,
      "grad_norm": 1.2961751222610474,
      "learning_rate": 0.0001962583518930958,
      "loss": 1.5801,
      "step": 94
    },
    {
      "epoch": 0.021111111111111112,
      "grad_norm": 1.103636622428894,
      "learning_rate": 0.00019621380846325168,
      "loss": 1.2372,
      "step": 95
    },
    {
      "epoch": 0.021333333333333333,
      "grad_norm": 1.108388900756836,
      "learning_rate": 0.00019616926503340757,
      "loss": 1.2511,
      "step": 96
    },
    {
      "epoch": 0.021555555555555557,
      "grad_norm": 1.27703857421875,
      "learning_rate": 0.00019612472160356346,
      "loss": 0.4245,
      "step": 97
    },
    {
      "epoch": 0.021777777777777778,
      "grad_norm": 1.0161255598068237,
      "learning_rate": 0.00019608017817371938,
      "loss": 0.9912,
      "step": 98
    },
    {
      "epoch": 0.022,
      "grad_norm": 1.1940312385559082,
      "learning_rate": 0.0001960356347438753,
      "loss": 1.0144,
      "step": 99
    },
    {
      "epoch": 0.022222222222222223,
      "grad_norm": 1.552917242050171,
      "learning_rate": 0.0001959910913140312,
      "loss": 0.8394,
      "step": 100
    },
    {
      "epoch": 0.022444444444444444,
      "grad_norm": 1.560513973236084,
      "learning_rate": 0.00019594654788418709,
      "loss": 1.3496,
      "step": 101
    },
    {
      "epoch": 0.02266666666666667,
      "grad_norm": 0.8733224868774414,
      "learning_rate": 0.000195902004454343,
      "loss": 1.2893,
      "step": 102
    },
    {
      "epoch": 0.02288888888888889,
      "grad_norm": 0.7647473216056824,
      "learning_rate": 0.0001958574610244989,
      "loss": 1.309,
      "step": 103
    },
    {
      "epoch": 0.02311111111111111,
      "grad_norm": 0.36057984828948975,
      "learning_rate": 0.0001958129175946548,
      "loss": 0.0338,
      "step": 104
    },
    {
      "epoch": 0.023333333333333334,
      "grad_norm": 0.6094343066215515,
      "learning_rate": 0.0001957683741648107,
      "loss": 1.1273,
      "step": 105
    },
    {
      "epoch": 0.023555555555555555,
      "grad_norm": 0.8449940085411072,
      "learning_rate": 0.0001957238307349666,
      "loss": 0.0724,
      "step": 106
    },
    {
      "epoch": 0.023777777777777776,
      "grad_norm": 0.6553965210914612,
      "learning_rate": 0.00019567928730512252,
      "loss": 0.0731,
      "step": 107
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.7489123940467834,
      "learning_rate": 0.0001956347438752784,
      "loss": 1.2152,
      "step": 108
    },
    {
      "epoch": 0.02422222222222222,
      "grad_norm": 1.0028694868087769,
      "learning_rate": 0.0001955902004454343,
      "loss": 2.3775,
      "step": 109
    },
    {
      "epoch": 0.024444444444444446,
      "grad_norm": 1.1270484924316406,
      "learning_rate": 0.0001955456570155902,
      "loss": 2.1361,
      "step": 110
    },
    {
      "epoch": 0.024666666666666667,
      "grad_norm": 1.0289149284362793,
      "learning_rate": 0.0001955011135857461,
      "loss": 1.892,
      "step": 111
    },
    {
      "epoch": 0.024888888888888887,
      "grad_norm": 1.0495026111602783,
      "learning_rate": 0.00019545657015590203,
      "loss": 1.9626,
      "step": 112
    },
    {
      "epoch": 0.025111111111111112,
      "grad_norm": 0.8400951623916626,
      "learning_rate": 0.00019541202672605792,
      "loss": 1.9681,
      "step": 113
    },
    {
      "epoch": 0.025333333333333333,
      "grad_norm": 3.4133801460266113,
      "learning_rate": 0.00019536748329621381,
      "loss": 0.8974,
      "step": 114
    },
    {
      "epoch": 0.025555555555555557,
      "grad_norm": 1.6891502141952515,
      "learning_rate": 0.0001953229398663697,
      "loss": 0.7522,
      "step": 115
    },
    {
      "epoch": 0.025777777777777778,
      "grad_norm": 0.8611025810241699,
      "learning_rate": 0.00019527839643652563,
      "loss": 2.0537,
      "step": 116
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.9971293210983276,
      "learning_rate": 0.00019523385300668154,
      "loss": 2.2649,
      "step": 117
    },
    {
      "epoch": 0.026222222222222223,
      "grad_norm": 0.9530083537101746,
      "learning_rate": 0.00019518930957683744,
      "loss": 2.0625,
      "step": 118
    },
    {
      "epoch": 0.026444444444444444,
      "grad_norm": 1.045301079750061,
      "learning_rate": 0.00019514476614699333,
      "loss": 2.006,
      "step": 119
    },
    {
      "epoch": 0.02666666666666667,
      "grad_norm": 0.9277514815330505,
      "learning_rate": 0.00019510022271714922,
      "loss": 2.0909,
      "step": 120
    },
    {
      "epoch": 0.02688888888888889,
      "grad_norm": 1.1190154552459717,
      "learning_rate": 0.00019505567928730514,
      "loss": 1.8509,
      "step": 121
    },
    {
      "epoch": 0.02711111111111111,
      "grad_norm": 1.1135308742523193,
      "learning_rate": 0.00019501113585746103,
      "loss": 2.0486,
      "step": 122
    },
    {
      "epoch": 0.027333333333333334,
      "grad_norm": 1.0613086223602295,
      "learning_rate": 0.00019496659242761695,
      "loss": 1.2045,
      "step": 123
    },
    {
      "epoch": 0.027555555555555555,
      "grad_norm": 1.2695746421813965,
      "learning_rate": 0.00019492204899777284,
      "loss": 0.133,
      "step": 124
    },
    {
      "epoch": 0.027777777777777776,
      "grad_norm": 1.5150560140609741,
      "learning_rate": 0.00019487750556792873,
      "loss": 1.3502,
      "step": 125
    },
    {
      "epoch": 0.028,
      "grad_norm": 1.473061203956604,
      "learning_rate": 0.00019483296213808465,
      "loss": 1.2591,
      "step": 126
    },
    {
      "epoch": 0.02822222222222222,
      "grad_norm": 1.287636637687683,
      "learning_rate": 0.00019478841870824054,
      "loss": 1.8108,
      "step": 127
    },
    {
      "epoch": 0.028444444444444446,
      "grad_norm": 1.033453345298767,
      "learning_rate": 0.00019474387527839644,
      "loss": 1.8811,
      "step": 128
    },
    {
      "epoch": 0.028666666666666667,
      "grad_norm": 1.2280066013336182,
      "learning_rate": 0.00019469933184855235,
      "loss": 1.9518,
      "step": 129
    },
    {
      "epoch": 0.028888888888888888,
      "grad_norm": 1.2945783138275146,
      "learning_rate": 0.00019465478841870825,
      "loss": 1.8723,
      "step": 130
    },
    {
      "epoch": 0.029111111111111112,
      "grad_norm": 1.2305806875228882,
      "learning_rate": 0.00019461024498886416,
      "loss": 1.7272,
      "step": 131
    },
    {
      "epoch": 0.029333333333333333,
      "grad_norm": 1.3530161380767822,
      "learning_rate": 0.00019456570155902006,
      "loss": 1.0613,
      "step": 132
    },
    {
      "epoch": 0.029555555555555557,
      "grad_norm": 0.6455280184745789,
      "learning_rate": 0.00019452115812917595,
      "loss": 0.0564,
      "step": 133
    },
    {
      "epoch": 0.029777777777777778,
      "grad_norm": 0.27219173312187195,
      "learning_rate": 0.00019447661469933184,
      "loss": 0.0297,
      "step": 134
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8533250093460083,
      "learning_rate": 0.00019443207126948776,
      "loss": 1.055,
      "step": 135
    },
    {
      "epoch": 0.030222222222222223,
      "grad_norm": 2.0201220512390137,
      "learning_rate": 0.00019438752783964368,
      "loss": 0.0575,
      "step": 136
    },
    {
      "epoch": 0.030444444444444444,
      "grad_norm": 0.22158296406269073,
      "learning_rate": 0.00019434298440979957,
      "loss": 0.0267,
      "step": 137
    },
    {
      "epoch": 0.030666666666666665,
      "grad_norm": 0.6912283301353455,
      "learning_rate": 0.00019429844097995546,
      "loss": 0.759,
      "step": 138
    },
    {
      "epoch": 0.03088888888888889,
      "grad_norm": 1.0244457721710205,
      "learning_rate": 0.00019425389755011135,
      "loss": 1.8124,
      "step": 139
    },
    {
      "epoch": 0.03111111111111111,
      "grad_norm": 1.0154163837432861,
      "learning_rate": 0.00019420935412026727,
      "loss": 1.2565,
      "step": 140
    },
    {
      "epoch": 0.03133333333333333,
      "grad_norm": 0.9750798940658569,
      "learning_rate": 0.0001941648106904232,
      "loss": 1.3334,
      "step": 141
    },
    {
      "epoch": 0.03155555555555556,
      "grad_norm": 1.3760290145874023,
      "learning_rate": 0.00019412026726057908,
      "loss": 1.674,
      "step": 142
    },
    {
      "epoch": 0.03177777777777778,
      "grad_norm": 1.2384613752365112,
      "learning_rate": 0.00019407572383073498,
      "loss": 1.5803,
      "step": 143
    },
    {
      "epoch": 0.032,
      "grad_norm": 1.452026128768921,
      "learning_rate": 0.00019403118040089087,
      "loss": 1.5312,
      "step": 144
    },
    {
      "epoch": 0.03222222222222222,
      "grad_norm": 1.0377565622329712,
      "learning_rate": 0.00019398663697104679,
      "loss": 1.3206,
      "step": 145
    },
    {
      "epoch": 0.03244444444444444,
      "grad_norm": 1.2706232070922852,
      "learning_rate": 0.00019394209354120268,
      "loss": 0.6841,
      "step": 146
    },
    {
      "epoch": 0.03266666666666666,
      "grad_norm": 1.1973495483398438,
      "learning_rate": 0.0001938975501113586,
      "loss": 1.3782,
      "step": 147
    },
    {
      "epoch": 0.03288888888888889,
      "grad_norm": 1.2757797241210938,
      "learning_rate": 0.0001938530066815145,
      "loss": 1.5044,
      "step": 148
    },
    {
      "epoch": 0.03311111111111111,
      "grad_norm": 1.0980628728866577,
      "learning_rate": 0.00019380846325167038,
      "loss": 0.6844,
      "step": 149
    },
    {
      "epoch": 0.03333333333333333,
      "grad_norm": 1.2653899192810059,
      "learning_rate": 0.0001937639198218263,
      "loss": 1.1744,
      "step": 150
    },
    {
      "epoch": 0.033555555555555554,
      "grad_norm": 2.2348453998565674,
      "learning_rate": 0.0001937193763919822,
      "loss": 0.1349,
      "step": 151
    },
    {
      "epoch": 0.033777777777777775,
      "grad_norm": 1.0379369258880615,
      "learning_rate": 0.00019367483296213808,
      "loss": 1.6603,
      "step": 152
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.735359251499176,
      "learning_rate": 0.000193630289532294,
      "loss": 1.0719,
      "step": 153
    },
    {
      "epoch": 0.03422222222222222,
      "grad_norm": 1.1146577596664429,
      "learning_rate": 0.0001935857461024499,
      "loss": 2.4996,
      "step": 154
    },
    {
      "epoch": 0.034444444444444444,
      "grad_norm": 0.7083627581596375,
      "learning_rate": 0.0001935412026726058,
      "loss": 1.1257,
      "step": 155
    },
    {
      "epoch": 0.034666666666666665,
      "grad_norm": 0.12564276158809662,
      "learning_rate": 0.0001934966592427617,
      "loss": 0.0168,
      "step": 156
    },
    {
      "epoch": 0.034888888888888886,
      "grad_norm": 0.7023375630378723,
      "learning_rate": 0.0001934521158129176,
      "loss": 1.0538,
      "step": 157
    },
    {
      "epoch": 0.035111111111111114,
      "grad_norm": 0.5180396437644958,
      "learning_rate": 0.0001934075723830735,
      "loss": 0.046,
      "step": 158
    },
    {
      "epoch": 0.035333333333333335,
      "grad_norm": 0.6033398509025574,
      "learning_rate": 0.0001933630289532294,
      "loss": 1.0375,
      "step": 159
    },
    {
      "epoch": 0.035555555555555556,
      "grad_norm": 0.8683068156242371,
      "learning_rate": 0.00019331848552338533,
      "loss": 2.1418,
      "step": 160
    },
    {
      "epoch": 0.035777777777777776,
      "grad_norm": 0.9552950859069824,
      "learning_rate": 0.00019327394209354122,
      "loss": 2.2581,
      "step": 161
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.9460126757621765,
      "learning_rate": 0.0001932293986636971,
      "loss": 2.012,
      "step": 162
    },
    {
      "epoch": 0.036222222222222225,
      "grad_norm": 0.9581560492515564,
      "learning_rate": 0.000193184855233853,
      "loss": 2.2566,
      "step": 163
    },
    {
      "epoch": 0.036444444444444446,
      "grad_norm": 1.3161773681640625,
      "learning_rate": 0.00019314031180400892,
      "loss": 1.3755,
      "step": 164
    },
    {
      "epoch": 0.03666666666666667,
      "grad_norm": 1.008534550666809,
      "learning_rate": 0.0001930957683741648,
      "loss": 2.1225,
      "step": 165
    },
    {
      "epoch": 0.03688888888888889,
      "grad_norm": 0.965151309967041,
      "learning_rate": 0.00019305122494432073,
      "loss": 2.0691,
      "step": 166
    },
    {
      "epoch": 0.03711111111111111,
      "grad_norm": 1.0121870040893555,
      "learning_rate": 0.00019300668151447662,
      "loss": 1.7821,
      "step": 167
    },
    {
      "epoch": 0.037333333333333336,
      "grad_norm": 1.084385871887207,
      "learning_rate": 0.00019296213808463251,
      "loss": 0.0566,
      "step": 168
    },
    {
      "epoch": 0.03755555555555556,
      "grad_norm": 0.6437819600105286,
      "learning_rate": 0.00019291759465478843,
      "loss": 0.9437,
      "step": 169
    },
    {
      "epoch": 0.03777777777777778,
      "grad_norm": 0.8647774457931519,
      "learning_rate": 0.00019287305122494432,
      "loss": 1.2514,
      "step": 170
    },
    {
      "epoch": 0.038,
      "grad_norm": 1.2839748859405518,
      "learning_rate": 0.00019282850779510022,
      "loss": 2.1381,
      "step": 171
    },
    {
      "epoch": 0.03822222222222222,
      "grad_norm": 1.1602987051010132,
      "learning_rate": 0.00019278396436525614,
      "loss": 2.0459,
      "step": 172
    },
    {
      "epoch": 0.03844444444444445,
      "grad_norm": 1.0964981317520142,
      "learning_rate": 0.00019273942093541203,
      "loss": 1.8051,
      "step": 173
    },
    {
      "epoch": 0.03866666666666667,
      "grad_norm": 0.9932026267051697,
      "learning_rate": 0.00019269487750556795,
      "loss": 2.048,
      "step": 174
    },
    {
      "epoch": 0.03888888888888889,
      "grad_norm": 0.9929348826408386,
      "learning_rate": 0.00019265033407572384,
      "loss": 1.6021,
      "step": 175
    },
    {
      "epoch": 0.03911111111111111,
      "grad_norm": 0.9955350756645203,
      "learning_rate": 0.00019260579064587973,
      "loss": 1.8276,
      "step": 176
    },
    {
      "epoch": 0.03933333333333333,
      "grad_norm": 1.1119470596313477,
      "learning_rate": 0.00019256124721603562,
      "loss": 2.2424,
      "step": 177
    },
    {
      "epoch": 0.03955555555555555,
      "grad_norm": 0.9925389885902405,
      "learning_rate": 0.00019251670378619157,
      "loss": 1.9117,
      "step": 178
    },
    {
      "epoch": 0.03977777777777778,
      "grad_norm": 0.7970108985900879,
      "learning_rate": 0.00019247216035634746,
      "loss": 0.8588,
      "step": 179
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.25734761357307434,
      "learning_rate": 0.00019242761692650335,
      "loss": 0.0239,
      "step": 180
    },
    {
      "epoch": 0.04022222222222222,
      "grad_norm": 0.13014006614685059,
      "learning_rate": 0.00019238307349665924,
      "loss": 0.0209,
      "step": 181
    },
    {
      "epoch": 0.04044444444444444,
      "grad_norm": 0.10182005167007446,
      "learning_rate": 0.00019233853006681513,
      "loss": 0.0186,
      "step": 182
    },
    {
      "epoch": 0.04066666666666666,
      "grad_norm": 0.07455065846443176,
      "learning_rate": 0.00019229398663697105,
      "loss": 0.0176,
      "step": 183
    },
    {
      "epoch": 0.04088888888888889,
      "grad_norm": 0.06727147102355957,
      "learning_rate": 0.00019224944320712697,
      "loss": 0.0166,
      "step": 184
    },
    {
      "epoch": 0.04111111111111111,
      "grad_norm": 0.7398682236671448,
      "learning_rate": 0.00019220489977728286,
      "loss": 0.7252,
      "step": 185
    },
    {
      "epoch": 0.04133333333333333,
      "grad_norm": 0.9568517804145813,
      "learning_rate": 0.00019216035634743876,
      "loss": 1.0639,
      "step": 186
    },
    {
      "epoch": 0.041555555555555554,
      "grad_norm": 0.7644314169883728,
      "learning_rate": 0.00019211581291759468,
      "loss": 1.0938,
      "step": 187
    },
    {
      "epoch": 0.041777777777777775,
      "grad_norm": 1.0712711811065674,
      "learning_rate": 0.00019207126948775057,
      "loss": 1.9997,
      "step": 188
    },
    {
      "epoch": 0.042,
      "grad_norm": 1.1801820993423462,
      "learning_rate": 0.00019202672605790646,
      "loss": 1.684,
      "step": 189
    },
    {
      "epoch": 0.042222222222222223,
      "grad_norm": 1.134307861328125,
      "learning_rate": 0.00019198218262806238,
      "loss": 1.4712,
      "step": 190
    },
    {
      "epoch": 0.042444444444444444,
      "grad_norm": 1.0281476974487305,
      "learning_rate": 0.00019193763919821827,
      "loss": 1.7053,
      "step": 191
    },
    {
      "epoch": 0.042666666666666665,
      "grad_norm": 1.0464823246002197,
      "learning_rate": 0.0001918930957683742,
      "loss": 1.4212,
      "step": 192
    },
    {
      "epoch": 0.042888888888888886,
      "grad_norm": 1.1084800958633423,
      "learning_rate": 0.00019184855233853008,
      "loss": 1.6954,
      "step": 193
    },
    {
      "epoch": 0.043111111111111114,
      "grad_norm": 1.5273072719573975,
      "learning_rate": 0.00019180400890868597,
      "loss": 1.5331,
      "step": 194
    },
    {
      "epoch": 0.043333333333333335,
      "grad_norm": 1.0163640975952148,
      "learning_rate": 0.00019175946547884186,
      "loss": 0.8302,
      "step": 195
    },
    {
      "epoch": 0.043555555555555556,
      "grad_norm": 1.1570039987564087,
      "learning_rate": 0.00019171492204899778,
      "loss": 1.1332,
      "step": 196
    },
    {
      "epoch": 0.04377777777777778,
      "grad_norm": 1.3068771362304688,
      "learning_rate": 0.0001916703786191537,
      "loss": 0.1635,
      "step": 197
    },
    {
      "epoch": 0.044,
      "grad_norm": 1.343957781791687,
      "learning_rate": 0.0001916258351893096,
      "loss": 1.3181,
      "step": 198
    },
    {
      "epoch": 0.044222222222222225,
      "grad_norm": 1.087274193763733,
      "learning_rate": 0.00019158129175946549,
      "loss": 1.2172,
      "step": 199
    },
    {
      "epoch": 0.044444444444444446,
      "grad_norm": 1.6285357475280762,
      "learning_rate": 0.00019153674832962138,
      "loss": 0.8885,
      "step": 200
    },
    {
      "epoch": 0.04466666666666667,
      "grad_norm": 0.8212461471557617,
      "learning_rate": 0.0001914922048997773,
      "loss": 1.9517,
      "step": 201
    },
    {
      "epoch": 0.04488888888888889,
      "grad_norm": 0.8975577354431152,
      "learning_rate": 0.00019144766146993322,
      "loss": 2.0502,
      "step": 202
    },
    {
      "epoch": 0.04511111111111111,
      "grad_norm": 0.931280255317688,
      "learning_rate": 0.0001914031180400891,
      "loss": 1.0111,
      "step": 203
    },
    {
      "epoch": 0.04533333333333334,
      "grad_norm": 0.6608829498291016,
      "learning_rate": 0.000191358574610245,
      "loss": 1.1096,
      "step": 204
    },
    {
      "epoch": 0.04555555555555556,
      "grad_norm": 0.7915617227554321,
      "learning_rate": 0.0001913140311804009,
      "loss": 2.6039,
      "step": 205
    },
    {
      "epoch": 0.04577777777777778,
      "grad_norm": 0.6403900980949402,
      "learning_rate": 0.0001912694877505568,
      "loss": 1.13,
      "step": 206
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.9232172966003418,
      "learning_rate": 0.0001912249443207127,
      "loss": 2.668,
      "step": 207
    },
    {
      "epoch": 0.04622222222222222,
      "grad_norm": 0.6349806785583496,
      "learning_rate": 0.00019118040089086862,
      "loss": 1.1026,
      "step": 208
    },
    {
      "epoch": 0.04644444444444444,
      "grad_norm": 0.8131903409957886,
      "learning_rate": 0.0001911358574610245,
      "loss": 2.2254,
      "step": 209
    },
    {
      "epoch": 0.04666666666666667,
      "grad_norm": 0.2445099800825119,
      "learning_rate": 0.0001910913140311804,
      "loss": 0.0209,
      "step": 210
    },
    {
      "epoch": 0.04688888888888889,
      "grad_norm": 0.11807701736688614,
      "learning_rate": 0.00019104677060133632,
      "loss": 0.0192,
      "step": 211
    },
    {
      "epoch": 0.04711111111111111,
      "grad_norm": 0.10483487695455551,
      "learning_rate": 0.00019100222717149221,
      "loss": 0.0177,
      "step": 212
    },
    {
      "epoch": 0.04733333333333333,
      "grad_norm": 0.5437442064285278,
      "learning_rate": 0.0001909576837416481,
      "loss": 0.9199,
      "step": 213
    },
    {
      "epoch": 0.04755555555555555,
      "grad_norm": 0.8952361941337585,
      "learning_rate": 0.00019091314031180403,
      "loss": 2.6062,
      "step": 214
    },
    {
      "epoch": 0.04777777777777778,
      "grad_norm": 0.87530916929245,
      "learning_rate": 0.00019086859688195992,
      "loss": 1.98,
      "step": 215
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.9103797078132629,
      "learning_rate": 0.00019082405345211584,
      "loss": 2.021,
      "step": 216
    },
    {
      "epoch": 0.04822222222222222,
      "grad_norm": 0.8201193809509277,
      "learning_rate": 0.00019077951002227173,
      "loss": 2.0562,
      "step": 217
    },
    {
      "epoch": 0.04844444444444444,
      "grad_norm": 1.1453330516815186,
      "learning_rate": 0.00019073496659242762,
      "loss": 1.1635,
      "step": 218
    },
    {
      "epoch": 0.048666666666666664,
      "grad_norm": 1.24114990234375,
      "learning_rate": 0.0001906904231625835,
      "loss": 0.2453,
      "step": 219
    },
    {
      "epoch": 0.04888888888888889,
      "grad_norm": 0.9633159637451172,
      "learning_rate": 0.00019064587973273943,
      "loss": 2.1915,
      "step": 220
    },
    {
      "epoch": 0.04911111111111111,
      "grad_norm": 0.9862430691719055,
      "learning_rate": 0.00019060133630289535,
      "loss": 2.058,
      "step": 221
    },
    {
      "epoch": 0.04933333333333333,
      "grad_norm": 0.9856882691383362,
      "learning_rate": 0.00019055679287305124,
      "loss": 2.0013,
      "step": 222
    },
    {
      "epoch": 0.049555555555555554,
      "grad_norm": 0.8800470232963562,
      "learning_rate": 0.00019051224944320713,
      "loss": 1.7867,
      "step": 223
    },
    {
      "epoch": 0.049777777777777775,
      "grad_norm": 1.1182115077972412,
      "learning_rate": 0.00019046770601336302,
      "loss": 2.261,
      "step": 224
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9679086804389954,
      "learning_rate": 0.00019042316258351894,
      "loss": 1.9116,
      "step": 225
    },
    {
      "epoch": 0.050222222222222224,
      "grad_norm": 0.9191752076148987,
      "learning_rate": 0.00019037861915367486,
      "loss": 2.0246,
      "step": 226
    },
    {
      "epoch": 0.050444444444444445,
      "grad_norm": 1.2083994150161743,
      "learning_rate": 0.00019033407572383075,
      "loss": 1.1371,
      "step": 227
    },
    {
      "epoch": 0.050666666666666665,
      "grad_norm": 0.16673077642917633,
      "learning_rate": 0.00019028953229398665,
      "loss": 0.0239,
      "step": 228
    },
    {
      "epoch": 0.050888888888888886,
      "grad_norm": 0.6332354545593262,
      "learning_rate": 0.00019024498886414254,
      "loss": 0.8717,
      "step": 229
    },
    {
      "epoch": 0.051111111111111114,
      "grad_norm": 1.3816114664077759,
      "learning_rate": 0.00019020044543429846,
      "loss": 1.0904,
      "step": 230
    },
    {
      "epoch": 0.051333333333333335,
      "grad_norm": 0.9588896632194519,
      "learning_rate": 0.00019015590200445435,
      "loss": 1.856,
      "step": 231
    },
    {
      "epoch": 0.051555555555555556,
      "grad_norm": 1.0486406087875366,
      "learning_rate": 0.00019011135857461024,
      "loss": 2.0886,
      "step": 232
    },
    {
      "epoch": 0.05177777777777778,
      "grad_norm": 1.0114916563034058,
      "learning_rate": 0.00019006681514476616,
      "loss": 1.9553,
      "step": 233
    },
    {
      "epoch": 0.052,
      "grad_norm": 1.2497199773788452,
      "learning_rate": 0.00019002227171492205,
      "loss": 1.8981,
      "step": 234
    },
    {
      "epoch": 0.052222222222222225,
      "grad_norm": 1.0667015314102173,
      "learning_rate": 0.00018997772828507797,
      "loss": 1.8742,
      "step": 235
    },
    {
      "epoch": 0.052444444444444446,
      "grad_norm": 1.1447402238845825,
      "learning_rate": 0.00018993318485523386,
      "loss": 0.0403,
      "step": 236
    },
    {
      "epoch": 0.05266666666666667,
      "grad_norm": 0.6644120216369629,
      "learning_rate": 0.00018988864142538975,
      "loss": 0.9821,
      "step": 237
    },
    {
      "epoch": 0.05288888888888889,
      "grad_norm": 0.17127251625061035,
      "learning_rate": 0.00018984409799554565,
      "loss": 0.0277,
      "step": 238
    },
    {
      "epoch": 0.05311111111111111,
      "grad_norm": 0.22570157051086426,
      "learning_rate": 0.00018979955456570156,
      "loss": 0.0325,
      "step": 239
    },
    {
      "epoch": 0.05333333333333334,
      "grad_norm": 0.1881849616765976,
      "learning_rate": 0.00018975501113585748,
      "loss": 0.0283,
      "step": 240
    },
    {
      "epoch": 0.05355555555555556,
      "grad_norm": 0.15113665163516998,
      "learning_rate": 0.00018971046770601337,
      "loss": 0.0244,
      "step": 241
    },
    {
      "epoch": 0.05377777777777778,
      "grad_norm": 1.3109371662139893,
      "learning_rate": 0.00018966592427616927,
      "loss": 1.7582,
      "step": 242
    },
    {
      "epoch": 0.054,
      "grad_norm": 1.0388661623001099,
      "learning_rate": 0.00018962138084632516,
      "loss": 1.8784,
      "step": 243
    },
    {
      "epoch": 0.05422222222222222,
      "grad_norm": 1.4733574390411377,
      "learning_rate": 0.00018957683741648108,
      "loss": 0.5269,
      "step": 244
    },
    {
      "epoch": 0.05444444444444444,
      "grad_norm": 1.2060288190841675,
      "learning_rate": 0.000189532293986637,
      "loss": 1.9426,
      "step": 245
    },
    {
      "epoch": 0.05466666666666667,
      "grad_norm": 1.2038888931274414,
      "learning_rate": 0.0001894877505567929,
      "loss": 1.7192,
      "step": 246
    },
    {
      "epoch": 0.05488888888888889,
      "grad_norm": 1.2904036045074463,
      "learning_rate": 0.00018944320712694878,
      "loss": 1.5118,
      "step": 247
    },
    {
      "epoch": 0.05511111111111111,
      "grad_norm": 1.381375789642334,
      "learning_rate": 0.00018939866369710467,
      "loss": 0.5868,
      "step": 248
    },
    {
      "epoch": 0.05533333333333333,
      "grad_norm": 1.606933832168579,
      "learning_rate": 0.0001893541202672606,
      "loss": 0.9528,
      "step": 249
    },
    {
      "epoch": 0.05555555555555555,
      "grad_norm": 1.470360517501831,
      "learning_rate": 0.00018930957683741648,
      "loss": 1.0201,
      "step": 250
    },
    {
      "epoch": 0.05577777777777778,
      "grad_norm": 0.794981837272644,
      "learning_rate": 0.0001892650334075724,
      "loss": 1.1911,
      "step": 251
    },
    {
      "epoch": 0.056,
      "grad_norm": 0.828644871711731,
      "learning_rate": 0.0001892204899777283,
      "loss": 2.1317,
      "step": 252
    },
    {
      "epoch": 0.05622222222222222,
      "grad_norm": 0.10859879851341248,
      "learning_rate": 0.00018917594654788419,
      "loss": 0.0136,
      "step": 253
    },
    {
      "epoch": 0.05644444444444444,
      "grad_norm": 0.9162502288818359,
      "learning_rate": 0.0001891314031180401,
      "loss": 2.0738,
      "step": 254
    },
    {
      "epoch": 0.056666666666666664,
      "grad_norm": 0.7586551308631897,
      "learning_rate": 0.000189086859688196,
      "loss": 2.2056,
      "step": 255
    },
    {
      "epoch": 0.05688888888888889,
      "grad_norm": 0.07891134172677994,
      "learning_rate": 0.0001890423162583519,
      "loss": 0.0118,
      "step": 256
    },
    {
      "epoch": 0.05711111111111111,
      "grad_norm": 0.10793828219175339,
      "learning_rate": 0.0001889977728285078,
      "loss": 0.0169,
      "step": 257
    },
    {
      "epoch": 0.05733333333333333,
      "grad_norm": 0.1842554360628128,
      "learning_rate": 0.0001889532293986637,
      "loss": 0.0224,
      "step": 258
    },
    {
      "epoch": 0.057555555555555554,
      "grad_norm": 0.93645840883255,
      "learning_rate": 0.00018890868596881962,
      "loss": 2.4856,
      "step": 259
    },
    {
      "epoch": 0.057777777777777775,
      "grad_norm": 0.943806529045105,
      "learning_rate": 0.0001888641425389755,
      "loss": 2.0643,
      "step": 260
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.9614866971969604,
      "learning_rate": 0.0001888195991091314,
      "loss": 2.2795,
      "step": 261
    },
    {
      "epoch": 0.058222222222222224,
      "grad_norm": 2.3811917304992676,
      "learning_rate": 0.0001887750556792873,
      "loss": 0.1832,
      "step": 262
    },
    {
      "epoch": 0.058444444444444445,
      "grad_norm": 0.8956352472305298,
      "learning_rate": 0.00018873051224944324,
      "loss": 2.0827,
      "step": 263
    },
    {
      "epoch": 0.058666666666666666,
      "grad_norm": 1.0169123411178589,
      "learning_rate": 0.00018868596881959913,
      "loss": 1.9675,
      "step": 264
    },
    {
      "epoch": 0.058888888888888886,
      "grad_norm": 1.000707745552063,
      "learning_rate": 0.00018864142538975502,
      "loss": 2.0002,
      "step": 265
    },
    {
      "epoch": 0.059111111111111114,
      "grad_norm": 1.0070831775665283,
      "learning_rate": 0.00018859688195991091,
      "loss": 1.7638,
      "step": 266
    },
    {
      "epoch": 0.059333333333333335,
      "grad_norm": 0.9460271596908569,
      "learning_rate": 0.0001885523385300668,
      "loss": 1.8534,
      "step": 267
    },
    {
      "epoch": 0.059555555555555556,
      "grad_norm": 0.7087247371673584,
      "learning_rate": 0.00018850779510022272,
      "loss": 1.0192,
      "step": 268
    },
    {
      "epoch": 0.05977777777777778,
      "grad_norm": 0.16115489602088928,
      "learning_rate": 0.00018846325167037864,
      "loss": 0.0249,
      "step": 269
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.12376585602760315,
      "learning_rate": 0.00018841870824053454,
      "loss": 0.0232,
      "step": 270
    },
    {
      "epoch": 0.060222222222222226,
      "grad_norm": 0.1018003597855568,
      "learning_rate": 0.00018837416481069043,
      "loss": 0.0212,
      "step": 271
    },
    {
      "epoch": 0.060444444444444446,
      "grad_norm": 0.09464185684919357,
      "learning_rate": 0.00018832962138084635,
      "loss": 0.0198,
      "step": 272
    },
    {
      "epoch": 0.06066666666666667,
      "grad_norm": 0.6736340522766113,
      "learning_rate": 0.00018828507795100224,
      "loss": 0.9684,
      "step": 273
    },
    {
      "epoch": 0.06088888888888889,
      "grad_norm": 1.5240976810455322,
      "learning_rate": 0.00018824053452115813,
      "loss": 1.2137,
      "step": 274
    },
    {
      "epoch": 0.06111111111111111,
      "grad_norm": 1.1830003261566162,
      "learning_rate": 0.00018819599109131405,
      "loss": 2.0444,
      "step": 275
    },
    {
      "epoch": 0.06133333333333333,
      "grad_norm": 1.0134773254394531,
      "learning_rate": 0.00018815144766146994,
      "loss": 1.898,
      "step": 276
    },
    {
      "epoch": 0.06155555555555556,
      "grad_norm": 1.1037492752075195,
      "learning_rate": 0.00018810690423162586,
      "loss": 1.9418,
      "step": 277
    },
    {
      "epoch": 0.06177777777777778,
      "grad_norm": 1.1346395015716553,
      "learning_rate": 0.00018806236080178175,
      "loss": 1.855,
      "step": 278
    },
    {
      "epoch": 0.062,
      "grad_norm": 0.996393084526062,
      "learning_rate": 0.00018801781737193764,
      "loss": 1.6958,
      "step": 279
    },
    {
      "epoch": 0.06222222222222222,
      "grad_norm": 0.9884223937988281,
      "learning_rate": 0.00018797327394209353,
      "loss": 1.6775,
      "step": 280
    },
    {
      "epoch": 0.06244444444444444,
      "grad_norm": 1.2472928762435913,
      "learning_rate": 0.00018792873051224945,
      "loss": 1.8322,
      "step": 281
    },
    {
      "epoch": 0.06266666666666666,
      "grad_norm": 0.9096193313598633,
      "learning_rate": 0.00018788418708240537,
      "loss": 1.0656,
      "step": 282
    },
    {
      "epoch": 0.06288888888888888,
      "grad_norm": 0.1850435882806778,
      "learning_rate": 0.00018783964365256126,
      "loss": 0.0229,
      "step": 283
    },
    {
      "epoch": 0.06311111111111112,
      "grad_norm": 0.90726238489151,
      "learning_rate": 0.00018779510022271716,
      "loss": 1.882,
      "step": 284
    },
    {
      "epoch": 0.06333333333333334,
      "grad_norm": 0.9707450270652771,
      "learning_rate": 0.00018775055679287305,
      "loss": 1.5804,
      "step": 285
    },
    {
      "epoch": 0.06355555555555556,
      "grad_norm": 0.7034225463867188,
      "learning_rate": 0.00018770601336302897,
      "loss": 0.9516,
      "step": 286
    },
    {
      "epoch": 0.06377777777777778,
      "grad_norm": 0.742444634437561,
      "learning_rate": 0.00018766146993318489,
      "loss": 0.7442,
      "step": 287
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.6448124647140503,
      "learning_rate": 0.00018761692650334078,
      "loss": 0.8479,
      "step": 288
    },
    {
      "epoch": 0.06422222222222222,
      "grad_norm": 0.9502848982810974,
      "learning_rate": 0.00018757238307349667,
      "loss": 1.9858,
      "step": 289
    },
    {
      "epoch": 0.06444444444444444,
      "grad_norm": 0.966400146484375,
      "learning_rate": 0.00018752783964365256,
      "loss": 1.8273,
      "step": 290
    },
    {
      "epoch": 0.06466666666666666,
      "grad_norm": 0.9974849224090576,
      "learning_rate": 0.00018748329621380848,
      "loss": 1.0585,
      "step": 291
    },
    {
      "epoch": 0.06488888888888888,
      "grad_norm": 1.0357645750045776,
      "learning_rate": 0.00018743875278396437,
      "loss": 1.7982,
      "step": 292
    },
    {
      "epoch": 0.0651111111111111,
      "grad_norm": 1.1910970211029053,
      "learning_rate": 0.0001873942093541203,
      "loss": 1.6365,
      "step": 293
    },
    {
      "epoch": 0.06533333333333333,
      "grad_norm": 1.1395729780197144,
      "learning_rate": 0.00018734966592427618,
      "loss": 1.3202,
      "step": 294
    },
    {
      "epoch": 0.06555555555555556,
      "grad_norm": 1.0338728427886963,
      "learning_rate": 0.00018730512249443207,
      "loss": 1.6425,
      "step": 295
    },
    {
      "epoch": 0.06577777777777778,
      "grad_norm": 1.1223074197769165,
      "learning_rate": 0.000187260579064588,
      "loss": 1.7752,
      "step": 296
    },
    {
      "epoch": 0.066,
      "grad_norm": 1.4117039442062378,
      "learning_rate": 0.00018721603563474389,
      "loss": 1.4247,
      "step": 297
    },
    {
      "epoch": 0.06622222222222222,
      "grad_norm": 1.1682217121124268,
      "learning_rate": 0.00018717149220489978,
      "loss": 1.1538,
      "step": 298
    },
    {
      "epoch": 0.06644444444444444,
      "grad_norm": 0.995919942855835,
      "learning_rate": 0.0001871269487750557,
      "loss": 0.6315,
      "step": 299
    },
    {
      "epoch": 0.06666666666666667,
      "grad_norm": 1.2403305768966675,
      "learning_rate": 0.0001870824053452116,
      "loss": 1.0913,
      "step": 300
    },
    {
      "epoch": 0.06688888888888889,
      "grad_norm": 0.836203932762146,
      "learning_rate": 0.0001870378619153675,
      "loss": 1.2099,
      "step": 301
    },
    {
      "epoch": 0.06711111111111111,
      "grad_norm": 0.5399565696716309,
      "learning_rate": 0.0001869933184855234,
      "loss": 1.0186,
      "step": 302
    },
    {
      "epoch": 0.06733333333333333,
      "grad_norm": 0.5899677276611328,
      "learning_rate": 0.0001869487750556793,
      "loss": 1.0829,
      "step": 303
    },
    {
      "epoch": 0.06755555555555555,
      "grad_norm": 0.8014405965805054,
      "learning_rate": 0.00018690423162583518,
      "loss": 2.2654,
      "step": 304
    },
    {
      "epoch": 0.06777777777777778,
      "grad_norm": 0.6197260022163391,
      "learning_rate": 0.0001868596881959911,
      "loss": 1.3951,
      "step": 305
    },
    {
      "epoch": 0.068,
      "grad_norm": 0.8079295754432678,
      "learning_rate": 0.00018681514476614702,
      "loss": 2.2529,
      "step": 306
    },
    {
      "epoch": 0.06822222222222223,
      "grad_norm": 0.05890868231654167,
      "learning_rate": 0.0001867706013363029,
      "loss": 0.0114,
      "step": 307
    },
    {
      "epoch": 0.06844444444444445,
      "grad_norm": 0.7747896909713745,
      "learning_rate": 0.0001867260579064588,
      "loss": 2.1361,
      "step": 308
    },
    {
      "epoch": 0.06866666666666667,
      "grad_norm": 0.5295421481132507,
      "learning_rate": 0.0001866815144766147,
      "loss": 0.0238,
      "step": 309
    },
    {
      "epoch": 0.06888888888888889,
      "grad_norm": 0.12555040419101715,
      "learning_rate": 0.00018663697104677061,
      "loss": 0.019,
      "step": 310
    },
    {
      "epoch": 0.06911111111111111,
      "grad_norm": 0.11239951103925705,
      "learning_rate": 0.0001865924276169265,
      "loss": 0.0172,
      "step": 311
    },
    {
      "epoch": 0.06933333333333333,
      "grad_norm": 0.6421605944633484,
      "learning_rate": 0.00018654788418708243,
      "loss": 1.1458,
      "step": 312
    },
    {
      "epoch": 0.06955555555555555,
      "grad_norm": 0.9105124473571777,
      "learning_rate": 0.00018650334075723832,
      "loss": 1.9677,
      "step": 313
    },
    {
      "epoch": 0.06977777777777777,
      "grad_norm": 0.8628215789794922,
      "learning_rate": 0.0001864587973273942,
      "loss": 2.1423,
      "step": 314
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.829418957233429,
      "learning_rate": 0.00018641425389755013,
      "loss": 1.9332,
      "step": 315
    },
    {
      "epoch": 0.07022222222222223,
      "grad_norm": 0.8867872357368469,
      "learning_rate": 0.00018636971046770602,
      "loss": 1.8072,
      "step": 316
    },
    {
      "epoch": 0.07044444444444445,
      "grad_norm": 0.9867467284202576,
      "learning_rate": 0.0001863251670378619,
      "loss": 1.8567,
      "step": 317
    },
    {
      "epoch": 0.07066666666666667,
      "grad_norm": 0.9062153100967407,
      "learning_rate": 0.00018628062360801783,
      "loss": 1.8609,
      "step": 318
    },
    {
      "epoch": 0.07088888888888889,
      "grad_norm": 1.1173990964889526,
      "learning_rate": 0.00018623608017817372,
      "loss": 2.1317,
      "step": 319
    },
    {
      "epoch": 0.07111111111111111,
      "grad_norm": 0.10674909502267838,
      "learning_rate": 0.00018619153674832964,
      "loss": 0.021,
      "step": 320
    },
    {
      "epoch": 0.07133333333333333,
      "grad_norm": 0.1044670045375824,
      "learning_rate": 0.00018614699331848553,
      "loss": 0.0203,
      "step": 321
    },
    {
      "epoch": 0.07155555555555555,
      "grad_norm": 0.09776726365089417,
      "learning_rate": 0.00018610244988864142,
      "loss": 0.0192,
      "step": 322
    },
    {
      "epoch": 0.07177777777777777,
      "grad_norm": 1.127001404762268,
      "learning_rate": 0.00018605790645879732,
      "loss": 1.9981,
      "step": 323
    },
    {
      "epoch": 0.072,
      "grad_norm": 0.9693049788475037,
      "learning_rate": 0.00018601336302895324,
      "loss": 1.9003,
      "step": 324
    },
    {
      "epoch": 0.07222222222222222,
      "grad_norm": 0.9398884773254395,
      "learning_rate": 0.00018596881959910915,
      "loss": 1.791,
      "step": 325
    },
    {
      "epoch": 0.07244444444444445,
      "grad_norm": 1.1868557929992676,
      "learning_rate": 0.00018592427616926505,
      "loss": 1.7433,
      "step": 326
    },
    {
      "epoch": 0.07266666666666667,
      "grad_norm": 0.8618929982185364,
      "learning_rate": 0.00018587973273942094,
      "loss": 1.9456,
      "step": 327
    },
    {
      "epoch": 0.07288888888888889,
      "grad_norm": 0.9172239899635315,
      "learning_rate": 0.00018583518930957683,
      "loss": 1.7215,
      "step": 328
    },
    {
      "epoch": 0.07311111111111111,
      "grad_norm": 0.7097941040992737,
      "learning_rate": 0.00018579064587973275,
      "loss": 0.9636,
      "step": 329
    },
    {
      "epoch": 0.07333333333333333,
      "grad_norm": 0.7206116318702698,
      "learning_rate": 0.00018574610244988867,
      "loss": 0.9052,
      "step": 330
    },
    {
      "epoch": 0.07355555555555555,
      "grad_norm": 0.16659016907215118,
      "learning_rate": 0.00018570155902004456,
      "loss": 0.0253,
      "step": 331
    },
    {
      "epoch": 0.07377777777777778,
      "grad_norm": 1.0400179624557495,
      "learning_rate": 0.00018565701559020045,
      "loss": 1.6796,
      "step": 332
    },
    {
      "epoch": 0.074,
      "grad_norm": 0.9435672163963318,
      "learning_rate": 0.00018561247216035634,
      "loss": 0.8521,
      "step": 333
    },
    {
      "epoch": 0.07422222222222222,
      "grad_norm": 0.17228728532791138,
      "learning_rate": 0.00018556792873051226,
      "loss": 0.0243,
      "step": 334
    },
    {
      "epoch": 0.07444444444444444,
      "grad_norm": 0.7825049161911011,
      "learning_rate": 0.00018552338530066815,
      "loss": 0.8863,
      "step": 335
    },
    {
      "epoch": 0.07466666666666667,
      "grad_norm": 1.0093753337860107,
      "learning_rate": 0.00018547884187082407,
      "loss": 1.7281,
      "step": 336
    },
    {
      "epoch": 0.0748888888888889,
      "grad_norm": 1.0921690464019775,
      "learning_rate": 0.00018543429844097996,
      "loss": 1.9082,
      "step": 337
    },
    {
      "epoch": 0.07511111111111111,
      "grad_norm": 0.9549766182899475,
      "learning_rate": 0.00018538975501113586,
      "loss": 1.5888,
      "step": 338
    },
    {
      "epoch": 0.07533333333333334,
      "grad_norm": 1.225373387336731,
      "learning_rate": 0.00018534521158129177,
      "loss": 0.9245,
      "step": 339
    },
    {
      "epoch": 0.07555555555555556,
      "grad_norm": 1.1169060468673706,
      "learning_rate": 0.00018530066815144767,
      "loss": 0.1607,
      "step": 340
    },
    {
      "epoch": 0.07577777777777778,
      "grad_norm": 1.0006355047225952,
      "learning_rate": 0.00018525612472160356,
      "loss": 1.6492,
      "step": 341
    },
    {
      "epoch": 0.076,
      "grad_norm": 1.0142918825149536,
      "learning_rate": 0.00018521158129175948,
      "loss": 1.5253,
      "step": 342
    },
    {
      "epoch": 0.07622222222222222,
      "grad_norm": 0.9755372405052185,
      "learning_rate": 0.00018516703786191537,
      "loss": 1.6062,
      "step": 343
    },
    {
      "epoch": 0.07644444444444444,
      "grad_norm": 1.1996437311172485,
      "learning_rate": 0.0001851224944320713,
      "loss": 1.6435,
      "step": 344
    },
    {
      "epoch": 0.07666666666666666,
      "grad_norm": 1.0422567129135132,
      "learning_rate": 0.00018507795100222718,
      "loss": 1.6154,
      "step": 345
    },
    {
      "epoch": 0.0768888888888889,
      "grad_norm": 1.4139487743377686,
      "learning_rate": 0.00018503340757238307,
      "loss": 1.6726,
      "step": 346
    },
    {
      "epoch": 0.07711111111111112,
      "grad_norm": 1.4194035530090332,
      "learning_rate": 0.00018498886414253896,
      "loss": 0.7896,
      "step": 347
    },
    {
      "epoch": 0.07733333333333334,
      "grad_norm": 1.0651965141296387,
      "learning_rate": 0.0001849443207126949,
      "loss": 1.315,
      "step": 348
    },
    {
      "epoch": 0.07755555555555556,
      "grad_norm": 1.0679761171340942,
      "learning_rate": 0.0001848997772828508,
      "loss": 0.6501,
      "step": 349
    },
    {
      "epoch": 0.07777777777777778,
      "grad_norm": 1.3450720310211182,
      "learning_rate": 0.0001848552338530067,
      "loss": 0.7368,
      "step": 350
    },
    {
      "epoch": 0.078,
      "grad_norm": 0.6705034375190735,
      "learning_rate": 0.00018481069042316258,
      "loss": 1.0454,
      "step": 351
    },
    {
      "epoch": 0.07822222222222222,
      "grad_norm": 0.6062948703765869,
      "learning_rate": 0.00018476614699331848,
      "loss": 1.2369,
      "step": 352
    },
    {
      "epoch": 0.07844444444444444,
      "grad_norm": 0.562647819519043,
      "learning_rate": 0.0001847216035634744,
      "loss": 1.186,
      "step": 353
    },
    {
      "epoch": 0.07866666666666666,
      "grad_norm": 0.8215838670730591,
      "learning_rate": 0.00018467706013363031,
      "loss": 0.035,
      "step": 354
    },
    {
      "epoch": 0.07888888888888888,
      "grad_norm": 0.6498701572418213,
      "learning_rate": 0.0001846325167037862,
      "loss": 1.097,
      "step": 355
    },
    {
      "epoch": 0.0791111111111111,
      "grad_norm": 0.9627291560173035,
      "learning_rate": 0.0001845879732739421,
      "loss": 2.1706,
      "step": 356
    },
    {
      "epoch": 0.07933333333333334,
      "grad_norm": 0.8392488956451416,
      "learning_rate": 0.00018454342984409802,
      "loss": 2.1539,
      "step": 357
    },
    {
      "epoch": 0.07955555555555556,
      "grad_norm": 0.9004356265068054,
      "learning_rate": 0.0001844988864142539,
      "loss": 2.0944,
      "step": 358
    },
    {
      "epoch": 0.07977777777777778,
      "grad_norm": 0.8827551603317261,
      "learning_rate": 0.0001844543429844098,
      "loss": 2.1067,
      "step": 359
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9497137665748596,
      "learning_rate": 0.00018440979955456572,
      "loss": 2.0297,
      "step": 360
    },
    {
      "epoch": 0.08022222222222222,
      "grad_norm": 0.9877284169197083,
      "learning_rate": 0.0001843652561247216,
      "loss": 2.2244,
      "step": 361
    },
    {
      "epoch": 0.08044444444444444,
      "grad_norm": 0.9175549149513245,
      "learning_rate": 0.00018432071269487753,
      "loss": 1.9345,
      "step": 362
    },
    {
      "epoch": 0.08066666666666666,
      "grad_norm": 0.9076710343360901,
      "learning_rate": 0.00018427616926503342,
      "loss": 1.0216,
      "step": 363
    },
    {
      "epoch": 0.08088888888888889,
      "grad_norm": 0.9432811141014099,
      "learning_rate": 0.00018423162583518931,
      "loss": 1.9628,
      "step": 364
    },
    {
      "epoch": 0.0811111111111111,
      "grad_norm": 0.9499055743217468,
      "learning_rate": 0.0001841870824053452,
      "loss": 1.7118,
      "step": 365
    },
    {
      "epoch": 0.08133333333333333,
      "grad_norm": 0.8877860903739929,
      "learning_rate": 0.00018414253897550112,
      "loss": 1.9912,
      "step": 366
    },
    {
      "epoch": 0.08155555555555556,
      "grad_norm": 0.8878340125083923,
      "learning_rate": 0.00018409799554565704,
      "loss": 1.9927,
      "step": 367
    },
    {
      "epoch": 0.08177777777777778,
      "grad_norm": 0.9800185561180115,
      "learning_rate": 0.00018405345211581294,
      "loss": 1.9707,
      "step": 368
    },
    {
      "epoch": 0.082,
      "grad_norm": 0.7973767518997192,
      "learning_rate": 0.00018400890868596883,
      "loss": 1.0049,
      "step": 369
    },
    {
      "epoch": 0.08222222222222222,
      "grad_norm": 0.5313000679016113,
      "learning_rate": 0.00018396436525612472,
      "loss": 0.0383,
      "step": 370
    },
    {
      "epoch": 0.08244444444444445,
      "grad_norm": 0.6772900223731995,
      "learning_rate": 0.00018391982182628064,
      "loss": 0.9766,
      "step": 371
    },
    {
      "epoch": 0.08266666666666667,
      "grad_norm": 0.8833468556404114,
      "learning_rate": 0.00018387527839643656,
      "loss": 1.6439,
      "step": 372
    },
    {
      "epoch": 0.08288888888888889,
      "grad_norm": 1.4322277307510376,
      "learning_rate": 0.00018383073496659245,
      "loss": 0.9063,
      "step": 373
    },
    {
      "epoch": 0.08311111111111111,
      "grad_norm": 0.9009195566177368,
      "learning_rate": 0.00018378619153674834,
      "loss": 1.6793,
      "step": 374
    },
    {
      "epoch": 0.08333333333333333,
      "grad_norm": 1.0847877264022827,
      "learning_rate": 0.00018374164810690423,
      "loss": 1.9706,
      "step": 375
    },
    {
      "epoch": 0.08355555555555555,
      "grad_norm": 1.0555421113967896,
      "learning_rate": 0.00018369710467706015,
      "loss": 1.7983,
      "step": 376
    },
    {
      "epoch": 0.08377777777777778,
      "grad_norm": 1.0266549587249756,
      "learning_rate": 0.00018365256124721604,
      "loss": 1.5035,
      "step": 377
    },
    {
      "epoch": 0.084,
      "grad_norm": 1.0186165571212769,
      "learning_rate": 0.00018360801781737193,
      "loss": 1.6932,
      "step": 378
    },
    {
      "epoch": 0.08422222222222223,
      "grad_norm": 0.9408406019210815,
      "learning_rate": 0.00018356347438752785,
      "loss": 0.8905,
      "step": 379
    },
    {
      "epoch": 0.08444444444444445,
      "grad_norm": 0.11006919294595718,
      "learning_rate": 0.00018351893095768375,
      "loss": 0.0205,
      "step": 380
    },
    {
      "epoch": 0.08466666666666667,
      "grad_norm": 0.6134664416313171,
      "learning_rate": 0.00018347438752783966,
      "loss": 0.8471,
      "step": 381
    },
    {
      "epoch": 0.08488888888888889,
      "grad_norm": 0.9574106931686401,
      "learning_rate": 0.00018342984409799556,
      "loss": 1.5896,
      "step": 382
    },
    {
      "epoch": 0.08511111111111111,
      "grad_norm": 0.7427169680595398,
      "learning_rate": 0.00018338530066815145,
      "loss": 0.827,
      "step": 383
    },
    {
      "epoch": 0.08533333333333333,
      "grad_norm": 1.155964970588684,
      "learning_rate": 0.00018334075723830734,
      "loss": 1.7048,
      "step": 384
    },
    {
      "epoch": 0.08555555555555555,
      "grad_norm": 0.9930922389030457,
      "learning_rate": 0.00018329621380846326,
      "loss": 1.7281,
      "step": 385
    },
    {
      "epoch": 0.08577777777777777,
      "grad_norm": 1.097965955734253,
      "learning_rate": 0.00018325167037861918,
      "loss": 1.4611,
      "step": 386
    },
    {
      "epoch": 0.086,
      "grad_norm": 3.5689327716827393,
      "learning_rate": 0.00018320712694877507,
      "loss": 0.2311,
      "step": 387
    },
    {
      "epoch": 0.08622222222222223,
      "grad_norm": 1.0425963401794434,
      "learning_rate": 0.00018316258351893096,
      "loss": 1.7319,
      "step": 388
    },
    {
      "epoch": 0.08644444444444445,
      "grad_norm": 1.156557559967041,
      "learning_rate": 0.00018311804008908685,
      "loss": 1.505,
      "step": 389
    },
    {
      "epoch": 0.08666666666666667,
      "grad_norm": 1.0377581119537354,
      "learning_rate": 0.00018307349665924277,
      "loss": 1.6431,
      "step": 390
    },
    {
      "epoch": 0.08688888888888889,
      "grad_norm": 1.3024321794509888,
      "learning_rate": 0.0001830289532293987,
      "loss": 1.7298,
      "step": 391
    },
    {
      "epoch": 0.08711111111111111,
      "grad_norm": 1.0300354957580566,
      "learning_rate": 0.00018298440979955458,
      "loss": 1.6316,
      "step": 392
    },
    {
      "epoch": 0.08733333333333333,
      "grad_norm": 1.046774983406067,
      "learning_rate": 0.00018293986636971047,
      "loss": 1.5083,
      "step": 393
    },
    {
      "epoch": 0.08755555555555555,
      "grad_norm": 1.32634437084198,
      "learning_rate": 0.00018289532293986637,
      "loss": 1.6296,
      "step": 394
    },
    {
      "epoch": 0.08777777777777777,
      "grad_norm": 2.0546815395355225,
      "learning_rate": 0.00018285077951002229,
      "loss": 0.1777,
      "step": 395
    },
    {
      "epoch": 0.088,
      "grad_norm": 0.8895161151885986,
      "learning_rate": 0.00018280623608017818,
      "loss": 0.6762,
      "step": 396
    },
    {
      "epoch": 0.08822222222222222,
      "grad_norm": 0.8843004703521729,
      "learning_rate": 0.0001827616926503341,
      "loss": 0.8162,
      "step": 397
    },
    {
      "epoch": 0.08844444444444445,
      "grad_norm": 1.2154911756515503,
      "learning_rate": 0.00018271714922049,
      "loss": 1.4117,
      "step": 398
    },
    {
      "epoch": 0.08866666666666667,
      "grad_norm": 0.9329721331596375,
      "learning_rate": 0.00018267260579064588,
      "loss": 0.7163,
      "step": 399
    },
    {
      "epoch": 0.08888888888888889,
      "grad_norm": 1.130262017250061,
      "learning_rate": 0.0001826280623608018,
      "loss": 1.2238,
      "step": 400
    },
    {
      "epoch": 0.08911111111111111,
      "grad_norm": 0.5905479192733765,
      "learning_rate": 0.0001825835189309577,
      "loss": 1.2077,
      "step": 401
    },
    {
      "epoch": 0.08933333333333333,
      "grad_norm": 0.8485738039016724,
      "learning_rate": 0.00018253897550111358,
      "loss": 2.3062,
      "step": 402
    },
    {
      "epoch": 0.08955555555555555,
      "grad_norm": 0.16456733644008636,
      "learning_rate": 0.0001824944320712695,
      "loss": 0.0196,
      "step": 403
    },
    {
      "epoch": 0.08977777777777778,
      "grad_norm": 0.540153443813324,
      "learning_rate": 0.0001824498886414254,
      "loss": 1.0257,
      "step": 404
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.09294180572032928,
      "learning_rate": 0.0001824053452115813,
      "loss": 0.0164,
      "step": 405
    },
    {
      "epoch": 0.09022222222222222,
      "grad_norm": 0.7190051078796387,
      "learning_rate": 0.0001823608017817372,
      "loss": 2.529,
      "step": 406
    },
    {
      "epoch": 0.09044444444444444,
      "grad_norm": 0.7573959231376648,
      "learning_rate": 0.0001823162583518931,
      "loss": 0.95,
      "step": 407
    },
    {
      "epoch": 0.09066666666666667,
      "grad_norm": 0.13494077324867249,
      "learning_rate": 0.000182271714922049,
      "loss": 0.0194,
      "step": 408
    },
    {
      "epoch": 0.0908888888888889,
      "grad_norm": 0.11461230367422104,
      "learning_rate": 0.0001822271714922049,
      "loss": 0.0177,
      "step": 409
    },
    {
      "epoch": 0.09111111111111111,
      "grad_norm": 0.6490556001663208,
      "learning_rate": 0.00018218262806236082,
      "loss": 0.9504,
      "step": 410
    },
    {
      "epoch": 0.09133333333333334,
      "grad_norm": 0.8228305578231812,
      "learning_rate": 0.00018213808463251672,
      "loss": 2.0239,
      "step": 411
    },
    {
      "epoch": 0.09155555555555556,
      "grad_norm": 0.898671567440033,
      "learning_rate": 0.0001820935412026726,
      "loss": 2.1288,
      "step": 412
    },
    {
      "epoch": 0.09177777777777778,
      "grad_norm": 0.8291831016540527,
      "learning_rate": 0.0001820489977728285,
      "loss": 2.1028,
      "step": 413
    },
    {
      "epoch": 0.092,
      "grad_norm": 0.937248706817627,
      "learning_rate": 0.00018200445434298442,
      "loss": 2.1206,
      "step": 414
    },
    {
      "epoch": 0.09222222222222222,
      "grad_norm": 0.8091291785240173,
      "learning_rate": 0.00018195991091314034,
      "loss": 2.1107,
      "step": 415
    },
    {
      "epoch": 0.09244444444444444,
      "grad_norm": 0.9196256995201111,
      "learning_rate": 0.00018191536748329623,
      "loss": 2.1269,
      "step": 416
    },
    {
      "epoch": 0.09266666666666666,
      "grad_norm": 0.7829540371894836,
      "learning_rate": 0.00018187082405345212,
      "loss": 1.0987,
      "step": 417
    },
    {
      "epoch": 0.09288888888888888,
      "grad_norm": 0.9346635937690735,
      "learning_rate": 0.00018182628062360801,
      "loss": 1.8687,
      "step": 418
    },
    {
      "epoch": 0.09311111111111112,
      "grad_norm": 0.906173825263977,
      "learning_rate": 0.00018178173719376393,
      "loss": 1.9742,
      "step": 419
    },
    {
      "epoch": 0.09333333333333334,
      "grad_norm": 0.833707869052887,
      "learning_rate": 0.00018173719376391982,
      "loss": 1.8876,
      "step": 420
    },
    {
      "epoch": 0.09355555555555556,
      "grad_norm": 0.8492452502250671,
      "learning_rate": 0.00018169265033407574,
      "loss": 1.9469,
      "step": 421
    },
    {
      "epoch": 0.09377777777777778,
      "grad_norm": 0.846052885055542,
      "learning_rate": 0.00018164810690423164,
      "loss": 1.887,
      "step": 422
    },
    {
      "epoch": 0.094,
      "grad_norm": 0.7105516195297241,
      "learning_rate": 0.00018160356347438753,
      "loss": 0.8668,
      "step": 423
    },
    {
      "epoch": 0.09422222222222222,
      "grad_norm": 0.40860867500305176,
      "learning_rate": 0.00018155902004454345,
      "loss": 0.025,
      "step": 424
    },
    {
      "epoch": 0.09444444444444444,
      "grad_norm": 0.08055032789707184,
      "learning_rate": 0.00018151447661469934,
      "loss": 0.0187,
      "step": 425
    },
    {
      "epoch": 0.09466666666666666,
      "grad_norm": 0.9038375616073608,
      "learning_rate": 0.00018146993318485523,
      "loss": 1.8731,
      "step": 426
    },
    {
      "epoch": 0.09488888888888888,
      "grad_norm": 1.0034536123275757,
      "learning_rate": 0.00018142538975501115,
      "loss": 1.6233,
      "step": 427
    },
    {
      "epoch": 0.0951111111111111,
      "grad_norm": 1.02449631690979,
      "learning_rate": 0.00018138084632516704,
      "loss": 1.9286,
      "step": 428
    },
    {
      "epoch": 0.09533333333333334,
      "grad_norm": 0.9773498773574829,
      "learning_rate": 0.00018133630289532296,
      "loss": 1.7909,
      "step": 429
    },
    {
      "epoch": 0.09555555555555556,
      "grad_norm": 1.028286099433899,
      "learning_rate": 0.00018129175946547885,
      "loss": 1.5749,
      "step": 430
    },
    {
      "epoch": 0.09577777777777778,
      "grad_norm": 0.18419918417930603,
      "learning_rate": 0.00018124721603563474,
      "loss": 0.0204,
      "step": 431
    },
    {
      "epoch": 0.096,
      "grad_norm": 0.08870963752269745,
      "learning_rate": 0.00018120267260579063,
      "loss": 0.0192,
      "step": 432
    },
    {
      "epoch": 0.09622222222222222,
      "grad_norm": 0.08107131719589233,
      "learning_rate": 0.00018115812917594658,
      "loss": 0.0187,
      "step": 433
    },
    {
      "epoch": 0.09644444444444444,
      "grad_norm": 0.07466530054807663,
      "learning_rate": 0.00018111358574610247,
      "loss": 0.0179,
      "step": 434
    },
    {
      "epoch": 0.09666666666666666,
      "grad_norm": 0.7753478288650513,
      "learning_rate": 0.00018106904231625836,
      "loss": 0.9339,
      "step": 435
    },
    {
      "epoch": 0.09688888888888889,
      "grad_norm": 1.0347474813461304,
      "learning_rate": 0.00018102449888641426,
      "loss": 1.786,
      "step": 436
    },
    {
      "epoch": 0.0971111111111111,
      "grad_norm": 1.0018888711929321,
      "learning_rate": 0.00018097995545657015,
      "loss": 1.0405,
      "step": 437
    },
    {
      "epoch": 0.09733333333333333,
      "grad_norm": 0.12741631269454956,
      "learning_rate": 0.00018093541202672607,
      "loss": 0.0246,
      "step": 438
    },
    {
      "epoch": 0.09755555555555556,
      "grad_norm": 0.720927357673645,
      "learning_rate": 0.00018089086859688199,
      "loss": 0.7423,
      "step": 439
    },
    {
      "epoch": 0.09777777777777778,
      "grad_norm": 1.0436959266662598,
      "learning_rate": 0.00018084632516703788,
      "loss": 1.7813,
      "step": 440
    },
    {
      "epoch": 0.098,
      "grad_norm": 1.33415687084198,
      "learning_rate": 0.00018080178173719377,
      "loss": 0.1569,
      "step": 441
    },
    {
      "epoch": 0.09822222222222222,
      "grad_norm": 1.0239771604537964,
      "learning_rate": 0.0001807572383073497,
      "loss": 1.4739,
      "step": 442
    },
    {
      "epoch": 0.09844444444444445,
      "grad_norm": 1.022449254989624,
      "learning_rate": 0.00018071269487750558,
      "loss": 1.4619,
      "step": 443
    },
    {
      "epoch": 0.09866666666666667,
      "grad_norm": 1.7489303350448608,
      "learning_rate": 0.00018066815144766147,
      "loss": 0.1832,
      "step": 444
    },
    {
      "epoch": 0.09888888888888889,
      "grad_norm": 0.7640416026115417,
      "learning_rate": 0.00018062360801781736,
      "loss": 0.0761,
      "step": 445
    },
    {
      "epoch": 0.09911111111111111,
      "grad_norm": 1.0594218969345093,
      "learning_rate": 0.00018057906458797328,
      "loss": 1.0882,
      "step": 446
    },
    {
      "epoch": 0.09933333333333333,
      "grad_norm": 1.0107744932174683,
      "learning_rate": 0.0001805345211581292,
      "loss": 1.2503,
      "step": 447
    },
    {
      "epoch": 0.09955555555555555,
      "grad_norm": 1.194696307182312,
      "learning_rate": 0.0001804899777282851,
      "loss": 1.3876,
      "step": 448
    },
    {
      "epoch": 0.09977777777777778,
      "grad_norm": 0.9181436896324158,
      "learning_rate": 0.00018044543429844098,
      "loss": 0.6689,
      "step": 449
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9961203932762146,
      "learning_rate": 0.00018040089086859688,
      "loss": 0.9308,
      "step": 450
    },
    {
      "epoch": 0.10022222222222223,
      "grad_norm": 0.6209729909896851,
      "learning_rate": 0.00018035634743875277,
      "loss": 1.0967,
      "step": 451
    },
    {
      "epoch": 0.10044444444444445,
      "grad_norm": 0.7032824158668518,
      "learning_rate": 0.00018031180400890871,
      "loss": 1.3153,
      "step": 452
    },
    {
      "epoch": 0.10066666666666667,
      "grad_norm": 0.6422486305236816,
      "learning_rate": 0.0001802672605790646,
      "loss": 1.1794,
      "step": 453
    },
    {
      "epoch": 0.10088888888888889,
      "grad_norm": 0.5714130401611328,
      "learning_rate": 0.0001802227171492205,
      "loss": 1.1083,
      "step": 454
    },
    {
      "epoch": 0.10111111111111111,
      "grad_norm": 0.9129486680030823,
      "learning_rate": 0.0001801781737193764,
      "loss": 2.1855,
      "step": 455
    },
    {
      "epoch": 0.10133333333333333,
      "grad_norm": 0.7295732498168945,
      "learning_rate": 0.0001801336302895323,
      "loss": 0.0452,
      "step": 456
    },
    {
      "epoch": 0.10155555555555555,
      "grad_norm": 0.5325131416320801,
      "learning_rate": 0.0001800890868596882,
      "loss": 0.0173,
      "step": 457
    },
    {
      "epoch": 0.10177777777777777,
      "grad_norm": 1.0180753469467163,
      "learning_rate": 0.00018004454342984412,
      "loss": 2.1597,
      "step": 458
    },
    {
      "epoch": 0.102,
      "grad_norm": 0.9186641573905945,
      "learning_rate": 0.00018,
      "loss": 2.1838,
      "step": 459
    },
    {
      "epoch": 0.10222222222222223,
      "grad_norm": 0.9034368991851807,
      "learning_rate": 0.0001799554565701559,
      "loss": 2.1579,
      "step": 460
    },
    {
      "epoch": 0.10244444444444445,
      "grad_norm": 1.2893496751785278,
      "learning_rate": 0.00017991091314031182,
      "loss": 2.286,
      "step": 461
    },
    {
      "epoch": 0.10266666666666667,
      "grad_norm": 0.9287530183792114,
      "learning_rate": 0.00017986636971046771,
      "loss": 2.3557,
      "step": 462
    },
    {
      "epoch": 0.10288888888888889,
      "grad_norm": 1.0746686458587646,
      "learning_rate": 0.0001798218262806236,
      "loss": 2.1398,
      "step": 463
    },
    {
      "epoch": 0.10311111111111111,
      "grad_norm": 0.9413710236549377,
      "learning_rate": 0.00017977728285077952,
      "loss": 1.0248,
      "step": 464
    },
    {
      "epoch": 0.10333333333333333,
      "grad_norm": 0.9778950810432434,
      "learning_rate": 0.00017973273942093542,
      "loss": 2.1938,
      "step": 465
    },
    {
      "epoch": 0.10355555555555555,
      "grad_norm": 0.9324243068695068,
      "learning_rate": 0.00017968819599109134,
      "loss": 2.0434,
      "step": 466
    },
    {
      "epoch": 0.10377777777777777,
      "grad_norm": 0.8695129156112671,
      "learning_rate": 0.00017964365256124723,
      "loss": 1.858,
      "step": 467
    },
    {
      "epoch": 0.104,
      "grad_norm": 1.1168073415756226,
      "learning_rate": 0.00017959910913140312,
      "loss": 2.002,
      "step": 468
    },
    {
      "epoch": 0.10422222222222222,
      "grad_norm": 0.9692973494529724,
      "learning_rate": 0.000179554565701559,
      "loss": 2.1136,
      "step": 469
    },
    {
      "epoch": 0.10444444444444445,
      "grad_norm": 0.23175086081027985,
      "learning_rate": 0.00017951002227171493,
      "loss": 0.0257,
      "step": 470
    },
    {
      "epoch": 0.10466666666666667,
      "grad_norm": 0.12664885818958282,
      "learning_rate": 0.00017946547884187085,
      "loss": 0.024,
      "step": 471
    },
    {
      "epoch": 0.10488888888888889,
      "grad_norm": 0.11350343376398087,
      "learning_rate": 0.00017942093541202674,
      "loss": 0.0225,
      "step": 472
    },
    {
      "epoch": 0.10511111111111111,
      "grad_norm": 0.0981689915060997,
      "learning_rate": 0.00017937639198218263,
      "loss": 0.021,
      "step": 473
    },
    {
      "epoch": 0.10533333333333333,
      "grad_norm": 0.7038472294807434,
      "learning_rate": 0.00017933184855233852,
      "loss": 0.8939,
      "step": 474
    },
    {
      "epoch": 0.10555555555555556,
      "grad_norm": 1.1517345905303955,
      "learning_rate": 0.00017928730512249444,
      "loss": 1.8588,
      "step": 475
    },
    {
      "epoch": 0.10577777777777778,
      "grad_norm": 1.0799177885055542,
      "learning_rate": 0.00017924276169265036,
      "loss": 1.8593,
      "step": 476
    },
    {
      "epoch": 0.106,
      "grad_norm": 1.1787912845611572,
      "learning_rate": 0.00017919821826280625,
      "loss": 1.9679,
      "step": 477
    },
    {
      "epoch": 0.10622222222222222,
      "grad_norm": 0.9503030776977539,
      "learning_rate": 0.00017915367483296215,
      "loss": 1.7726,
      "step": 478
    },
    {
      "epoch": 0.10644444444444444,
      "grad_norm": 0.9899899959564209,
      "learning_rate": 0.00017910913140311804,
      "loss": 1.6583,
      "step": 479
    },
    {
      "epoch": 0.10666666666666667,
      "grad_norm": 0.8547096848487854,
      "learning_rate": 0.00017906458797327396,
      "loss": 0.9547,
      "step": 480
    },
    {
      "epoch": 0.1068888888888889,
      "grad_norm": 0.08154784888029099,
      "learning_rate": 0.00017902004454342985,
      "loss": 0.0182,
      "step": 481
    },
    {
      "epoch": 0.10711111111111112,
      "grad_norm": 1.203589916229248,
      "learning_rate": 0.00017897550111358577,
      "loss": 2.0223,
      "step": 482
    },
    {
      "epoch": 0.10733333333333334,
      "grad_norm": 0.5660258531570435,
      "learning_rate": 0.00017893095768374166,
      "loss": 0.0261,
      "step": 483
    },
    {
      "epoch": 0.10755555555555556,
      "grad_norm": 0.12165828794240952,
      "learning_rate": 0.00017888641425389755,
      "loss": 0.0219,
      "step": 484
    },
    {
      "epoch": 0.10777777777777778,
      "grad_norm": 0.1016518846154213,
      "learning_rate": 0.00017884187082405347,
      "loss": 0.0209,
      "step": 485
    },
    {
      "epoch": 0.108,
      "grad_norm": 0.7895167469978333,
      "learning_rate": 0.00017879732739420936,
      "loss": 0.781,
      "step": 486
    },
    {
      "epoch": 0.10822222222222222,
      "grad_norm": 0.9849477410316467,
      "learning_rate": 0.00017875278396436525,
      "loss": 1.7806,
      "step": 487
    },
    {
      "epoch": 0.10844444444444444,
      "grad_norm": 0.9108963012695312,
      "learning_rate": 0.00017870824053452117,
      "loss": 1.4698,
      "step": 488
    },
    {
      "epoch": 0.10866666666666666,
      "grad_norm": 1.078587532043457,
      "learning_rate": 0.00017866369710467706,
      "loss": 1.5214,
      "step": 489
    },
    {
      "epoch": 0.10888888888888888,
      "grad_norm": 1.0179158449172974,
      "learning_rate": 0.00017861915367483298,
      "loss": 1.8058,
      "step": 490
    },
    {
      "epoch": 0.10911111111111112,
      "grad_norm": 1.0984735488891602,
      "learning_rate": 0.00017857461024498887,
      "loss": 1.5218,
      "step": 491
    },
    {
      "epoch": 0.10933333333333334,
      "grad_norm": 1.056720495223999,
      "learning_rate": 0.00017853006681514477,
      "loss": 1.5723,
      "step": 492
    },
    {
      "epoch": 0.10955555555555556,
      "grad_norm": 1.1733689308166504,
      "learning_rate": 0.00017848552338530066,
      "loss": 1.5902,
      "step": 493
    },
    {
      "epoch": 0.10977777777777778,
      "grad_norm": 1.0748651027679443,
      "learning_rate": 0.00017844097995545658,
      "loss": 0.7313,
      "step": 494
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1146743297576904,
      "learning_rate": 0.0001783964365256125,
      "loss": 1.5575,
      "step": 495
    },
    {
      "epoch": 0.11022222222222222,
      "grad_norm": 0.9686447978019714,
      "learning_rate": 0.0001783518930957684,
      "loss": 1.26,
      "step": 496
    },
    {
      "epoch": 0.11044444444444444,
      "grad_norm": 0.9595248103141785,
      "learning_rate": 0.00017830734966592428,
      "loss": 1.2048,
      "step": 497
    },
    {
      "epoch": 0.11066666666666666,
      "grad_norm": 0.655178964138031,
      "learning_rate": 0.00017826280623608017,
      "loss": 0.0465,
      "step": 498
    },
    {
      "epoch": 0.11088888888888888,
      "grad_norm": 0.8750471472740173,
      "learning_rate": 0.0001782182628062361,
      "loss": 0.8124,
      "step": 499
    },
    {
      "epoch": 0.1111111111111111,
      "grad_norm": 0.869178831577301,
      "learning_rate": 0.000178173719376392,
      "loss": 0.5776,
      "step": 500
    },
    {
      "epoch": 0.11133333333333334,
      "grad_norm": 0.641015887260437,
      "learning_rate": 0.0001781291759465479,
      "loss": 0.9489,
      "step": 501
    },
    {
      "epoch": 0.11155555555555556,
      "grad_norm": 0.6184130907058716,
      "learning_rate": 0.0001780846325167038,
      "loss": 1.1484,
      "step": 502
    },
    {
      "epoch": 0.11177777777777778,
      "grad_norm": 0.6213683485984802,
      "learning_rate": 0.00017804008908685968,
      "loss": 1.2721,
      "step": 503
    },
    {
      "epoch": 0.112,
      "grad_norm": 0.857179582118988,
      "learning_rate": 0.0001779955456570156,
      "loss": 2.2039,
      "step": 504
    },
    {
      "epoch": 0.11222222222222222,
      "grad_norm": 0.9155113101005554,
      "learning_rate": 0.0001779510022271715,
      "loss": 2.0145,
      "step": 505
    },
    {
      "epoch": 0.11244444444444444,
      "grad_norm": 0.8243066668510437,
      "learning_rate": 0.00017790645879732741,
      "loss": 0.0276,
      "step": 506
    },
    {
      "epoch": 0.11266666666666666,
      "grad_norm": 0.13185575604438782,
      "learning_rate": 0.0001778619153674833,
      "loss": 0.0172,
      "step": 507
    },
    {
      "epoch": 0.11288888888888889,
      "grad_norm": 0.5929608345031738,
      "learning_rate": 0.0001778173719376392,
      "loss": 1.1099,
      "step": 508
    },
    {
      "epoch": 0.1131111111111111,
      "grad_norm": 0.821940004825592,
      "learning_rate": 0.00017777282850779512,
      "loss": 2.2026,
      "step": 509
    },
    {
      "epoch": 0.11333333333333333,
      "grad_norm": 0.9007627964019775,
      "learning_rate": 0.000177728285077951,
      "loss": 2.0279,
      "step": 510
    },
    {
      "epoch": 0.11355555555555556,
      "grad_norm": 0.8976812958717346,
      "learning_rate": 0.0001776837416481069,
      "loss": 2.1622,
      "step": 511
    },
    {
      "epoch": 0.11377777777777778,
      "grad_norm": 0.8461301326751709,
      "learning_rate": 0.00017763919821826282,
      "loss": 2.0182,
      "step": 512
    },
    {
      "epoch": 0.114,
      "grad_norm": 0.9555535912513733,
      "learning_rate": 0.0001775946547884187,
      "loss": 2.3448,
      "step": 513
    },
    {
      "epoch": 0.11422222222222222,
      "grad_norm": 0.6102076768875122,
      "learning_rate": 0.00017755011135857463,
      "loss": 0.8332,
      "step": 514
    },
    {
      "epoch": 0.11444444444444445,
      "grad_norm": 0.9315692782402039,
      "learning_rate": 0.00017750556792873052,
      "loss": 1.9508,
      "step": 515
    },
    {
      "epoch": 0.11466666666666667,
      "grad_norm": 0.7640470266342163,
      "learning_rate": 0.0001774610244988864,
      "loss": 1.9453,
      "step": 516
    },
    {
      "epoch": 0.11488888888888889,
      "grad_norm": 0.9399512410163879,
      "learning_rate": 0.0001774164810690423,
      "loss": 1.8861,
      "step": 517
    },
    {
      "epoch": 0.11511111111111111,
      "grad_norm": 0.9297406673431396,
      "learning_rate": 0.00017737193763919822,
      "loss": 1.956,
      "step": 518
    },
    {
      "epoch": 0.11533333333333333,
      "grad_norm": 0.9970346689224243,
      "learning_rate": 0.00017732739420935414,
      "loss": 1.9636,
      "step": 519
    },
    {
      "epoch": 0.11555555555555555,
      "grad_norm": 0.1605161428451538,
      "learning_rate": 0.00017728285077951003,
      "loss": 0.0191,
      "step": 520
    },
    {
      "epoch": 0.11577777777777777,
      "grad_norm": 0.09369970113039017,
      "learning_rate": 0.00017723830734966593,
      "loss": 0.0181,
      "step": 521
    },
    {
      "epoch": 0.116,
      "grad_norm": 0.6291989088058472,
      "learning_rate": 0.00017719376391982182,
      "loss": 1.0345,
      "step": 522
    },
    {
      "epoch": 0.11622222222222223,
      "grad_norm": 1.45033597946167,
      "learning_rate": 0.00017714922048997774,
      "loss": 0.9683,
      "step": 523
    },
    {
      "epoch": 0.11644444444444445,
      "grad_norm": 0.7875054478645325,
      "learning_rate": 0.00017710467706013363,
      "loss": 0.087,
      "step": 524
    },
    {
      "epoch": 0.11666666666666667,
      "grad_norm": 0.9223285913467407,
      "learning_rate": 0.00017706013363028955,
      "loss": 1.9496,
      "step": 525
    },
    {
      "epoch": 0.11688888888888889,
      "grad_norm": 1.0288112163543701,
      "learning_rate": 0.00017701559020044544,
      "loss": 1.9752,
      "step": 526
    },
    {
      "epoch": 0.11711111111111111,
      "grad_norm": 0.9108681678771973,
      "learning_rate": 0.00017697104677060133,
      "loss": 1.8513,
      "step": 527
    },
    {
      "epoch": 0.11733333333333333,
      "grad_norm": 0.996670126914978,
      "learning_rate": 0.00017692650334075725,
      "loss": 1.9809,
      "step": 528
    },
    {
      "epoch": 0.11755555555555555,
      "grad_norm": 0.9788877964019775,
      "learning_rate": 0.00017688195991091314,
      "loss": 1.8143,
      "step": 529
    },
    {
      "epoch": 0.11777777777777777,
      "grad_norm": 0.751679539680481,
      "learning_rate": 0.00017683741648106903,
      "loss": 0.9896,
      "step": 530
    },
    {
      "epoch": 0.118,
      "grad_norm": 0.07957503199577332,
      "learning_rate": 0.00017679287305122495,
      "loss": 0.019,
      "step": 531
    },
    {
      "epoch": 0.11822222222222223,
      "grad_norm": 0.06963716447353363,
      "learning_rate": 0.00017674832962138087,
      "loss": 0.0187,
      "step": 532
    },
    {
      "epoch": 0.11844444444444445,
      "grad_norm": 0.6804819703102112,
      "learning_rate": 0.00017670378619153676,
      "loss": 0.8408,
      "step": 533
    },
    {
      "epoch": 0.11866666666666667,
      "grad_norm": 1.1230911016464233,
      "learning_rate": 0.00017665924276169266,
      "loss": 2.0286,
      "step": 534
    },
    {
      "epoch": 0.11888888888888889,
      "grad_norm": 0.2663741409778595,
      "learning_rate": 0.00017661469933184855,
      "loss": 0.0247,
      "step": 535
    },
    {
      "epoch": 0.11911111111111111,
      "grad_norm": 0.09472407400608063,
      "learning_rate": 0.00017657015590200444,
      "loss": 0.0229,
      "step": 536
    },
    {
      "epoch": 0.11933333333333333,
      "grad_norm": 0.08869557082653046,
      "learning_rate": 0.00017652561247216039,
      "loss": 0.0217,
      "step": 537
    },
    {
      "epoch": 0.11955555555555555,
      "grad_norm": 0.9782452583312988,
      "learning_rate": 0.00017648106904231628,
      "loss": 1.7486,
      "step": 538
    },
    {
      "epoch": 0.11977777777777777,
      "grad_norm": 1.1253771781921387,
      "learning_rate": 0.00017643652561247217,
      "loss": 1.7666,
      "step": 539
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0017434358596802,
      "learning_rate": 0.00017639198218262806,
      "loss": 1.6833,
      "step": 540
    },
    {
      "epoch": 0.12022222222222222,
      "grad_norm": 1.0089191198349,
      "learning_rate": 0.00017634743875278398,
      "loss": 1.7523,
      "step": 541
    },
    {
      "epoch": 0.12044444444444445,
      "grad_norm": 1.0553641319274902,
      "learning_rate": 0.00017630289532293987,
      "loss": 1.647,
      "step": 542
    },
    {
      "epoch": 0.12066666666666667,
      "grad_norm": 1.0351313352584839,
      "learning_rate": 0.0001762583518930958,
      "loss": 1.6262,
      "step": 543
    },
    {
      "epoch": 0.12088888888888889,
      "grad_norm": 1.1512898206710815,
      "learning_rate": 0.00017621380846325168,
      "loss": 1.7247,
      "step": 544
    },
    {
      "epoch": 0.12111111111111111,
      "grad_norm": 1.083144187927246,
      "learning_rate": 0.00017616926503340757,
      "loss": 1.491,
      "step": 545
    },
    {
      "epoch": 0.12133333333333333,
      "grad_norm": 1.0339199304580688,
      "learning_rate": 0.0001761247216035635,
      "loss": 1.31,
      "step": 546
    },
    {
      "epoch": 0.12155555555555556,
      "grad_norm": 1.1164246797561646,
      "learning_rate": 0.00017608017817371938,
      "loss": 1.3082,
      "step": 547
    },
    {
      "epoch": 0.12177777777777778,
      "grad_norm": 1.182440161705017,
      "learning_rate": 0.00017603563474387528,
      "loss": 0.9501,
      "step": 548
    },
    {
      "epoch": 0.122,
      "grad_norm": 2.1058719158172607,
      "learning_rate": 0.0001759910913140312,
      "loss": 0.073,
      "step": 549
    },
    {
      "epoch": 0.12222222222222222,
      "grad_norm": 1.0356662273406982,
      "learning_rate": 0.0001759465478841871,
      "loss": 0.8053,
      "step": 550
    },
    {
      "epoch": 0.12244444444444444,
      "grad_norm": 0.09837248176336288,
      "learning_rate": 0.000175902004454343,
      "loss": 0.0157,
      "step": 551
    },
    {
      "epoch": 0.12266666666666666,
      "grad_norm": 0.5652306079864502,
      "learning_rate": 0.0001758574610244989,
      "loss": 1.1591,
      "step": 552
    },
    {
      "epoch": 0.1228888888888889,
      "grad_norm": 0.07858684659004211,
      "learning_rate": 0.0001758129175946548,
      "loss": 0.0148,
      "step": 553
    },
    {
      "epoch": 0.12311111111111112,
      "grad_norm": 0.8684788942337036,
      "learning_rate": 0.00017576837416481068,
      "loss": 2.0654,
      "step": 554
    },
    {
      "epoch": 0.12333333333333334,
      "grad_norm": 0.067326620221138,
      "learning_rate": 0.0001757238307349666,
      "loss": 0.0133,
      "step": 555
    },
    {
      "epoch": 0.12355555555555556,
      "grad_norm": 0.9072126150131226,
      "learning_rate": 0.00017567928730512252,
      "loss": 2.1198,
      "step": 556
    },
    {
      "epoch": 0.12377777777777778,
      "grad_norm": 0.6821531653404236,
      "learning_rate": 0.0001756347438752784,
      "loss": 1.1766,
      "step": 557
    },
    {
      "epoch": 0.124,
      "grad_norm": 0.8932785987854004,
      "learning_rate": 0.0001755902004454343,
      "loss": 2.179,
      "step": 558
    },
    {
      "epoch": 0.12422222222222222,
      "grad_norm": 0.8057580590248108,
      "learning_rate": 0.0001755456570155902,
      "loss": 1.7459,
      "step": 559
    },
    {
      "epoch": 0.12444444444444444,
      "grad_norm": 0.9386541843414307,
      "learning_rate": 0.00017550111358574611,
      "loss": 2.0665,
      "step": 560
    },
    {
      "epoch": 0.12466666666666666,
      "grad_norm": 0.820952296257019,
      "learning_rate": 0.00017545657015590203,
      "loss": 2.0064,
      "step": 561
    },
    {
      "epoch": 0.12488888888888888,
      "grad_norm": 0.8475558757781982,
      "learning_rate": 0.00017541202672605792,
      "loss": 2.1712,
      "step": 562
    },
    {
      "epoch": 0.12511111111111112,
      "grad_norm": 0.9829273223876953,
      "learning_rate": 0.00017536748329621382,
      "loss": 2.2744,
      "step": 563
    },
    {
      "epoch": 0.12533333333333332,
      "grad_norm": 0.9070901870727539,
      "learning_rate": 0.0001753229398663697,
      "loss": 1.0953,
      "step": 564
    },
    {
      "epoch": 0.12555555555555556,
      "grad_norm": 0.6405161619186401,
      "learning_rate": 0.00017527839643652563,
      "loss": 0.967,
      "step": 565
    },
    {
      "epoch": 0.12577777777777777,
      "grad_norm": 1.083997368812561,
      "learning_rate": 0.00017523385300668152,
      "loss": 2.2172,
      "step": 566
    },
    {
      "epoch": 0.126,
      "grad_norm": 0.8409084677696228,
      "learning_rate": 0.00017518930957683744,
      "loss": 1.9096,
      "step": 567
    },
    {
      "epoch": 0.12622222222222224,
      "grad_norm": 0.9304640889167786,
      "learning_rate": 0.00017514476614699333,
      "loss": 1.9116,
      "step": 568
    },
    {
      "epoch": 0.12644444444444444,
      "grad_norm": 0.07137549668550491,
      "learning_rate": 0.00017510022271714922,
      "loss": 0.0168,
      "step": 569
    },
    {
      "epoch": 0.12666666666666668,
      "grad_norm": 0.06879518926143646,
      "learning_rate": 0.00017505567928730514,
      "loss": 0.0169,
      "step": 570
    },
    {
      "epoch": 0.12688888888888888,
      "grad_norm": 0.7226182818412781,
      "learning_rate": 0.00017501113585746103,
      "loss": 0.9012,
      "step": 571
    },
    {
      "epoch": 0.12711111111111112,
      "grad_norm": 0.9200636148452759,
      "learning_rate": 0.00017496659242761692,
      "loss": 1.8988,
      "step": 572
    },
    {
      "epoch": 0.12733333333333333,
      "grad_norm": 1.2475532293319702,
      "learning_rate": 0.00017492204899777284,
      "loss": 2.0776,
      "step": 573
    },
    {
      "epoch": 0.12755555555555556,
      "grad_norm": 0.07300052046775818,
      "learning_rate": 0.00017487750556792873,
      "loss": 0.019,
      "step": 574
    },
    {
      "epoch": 0.12777777777777777,
      "grad_norm": 0.06808430701494217,
      "learning_rate": 0.00017483296213808465,
      "loss": 0.0189,
      "step": 575
    },
    {
      "epoch": 0.128,
      "grad_norm": 1.7508424520492554,
      "learning_rate": 0.00017478841870824055,
      "loss": 2.1227,
      "step": 576
    },
    {
      "epoch": 0.1282222222222222,
      "grad_norm": 1.0882468223571777,
      "learning_rate": 0.00017474387527839644,
      "loss": 2.0598,
      "step": 577
    },
    {
      "epoch": 0.12844444444444444,
      "grad_norm": 0.14829707145690918,
      "learning_rate": 0.00017469933184855233,
      "loss": 0.0228,
      "step": 578
    },
    {
      "epoch": 0.12866666666666668,
      "grad_norm": 0.13807116448879242,
      "learning_rate": 0.00017465478841870825,
      "loss": 0.0218,
      "step": 579
    },
    {
      "epoch": 0.1288888888888889,
      "grad_norm": 0.7114683985710144,
      "learning_rate": 0.00017461024498886417,
      "loss": 1.0263,
      "step": 580
    },
    {
      "epoch": 0.12911111111111112,
      "grad_norm": 0.7122150659561157,
      "learning_rate": 0.00017456570155902006,
      "loss": 0.8802,
      "step": 581
    },
    {
      "epoch": 0.12933333333333333,
      "grad_norm": 0.08912209421396255,
      "learning_rate": 0.00017452115812917595,
      "loss": 0.0176,
      "step": 582
    },
    {
      "epoch": 0.12955555555555556,
      "grad_norm": 0.0869002416729927,
      "learning_rate": 0.00017447661469933184,
      "loss": 0.0162,
      "step": 583
    },
    {
      "epoch": 0.12977777777777777,
      "grad_norm": 0.8936485052108765,
      "learning_rate": 0.00017443207126948776,
      "loss": 1.4582,
      "step": 584
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9836113452911377,
      "learning_rate": 0.00017438752783964368,
      "loss": 1.0513,
      "step": 585
    },
    {
      "epoch": 0.1302222222222222,
      "grad_norm": 0.9093931317329407,
      "learning_rate": 0.00017434298440979957,
      "loss": 0.1017,
      "step": 586
    },
    {
      "epoch": 0.13044444444444445,
      "grad_norm": 0.7786886692047119,
      "learning_rate": 0.00017429844097995546,
      "loss": 0.7586,
      "step": 587
    },
    {
      "epoch": 0.13066666666666665,
      "grad_norm": 1.0884865522384644,
      "learning_rate": 0.00017425389755011136,
      "loss": 1.6373,
      "step": 588
    },
    {
      "epoch": 0.1308888888888889,
      "grad_norm": 1.0809407234191895,
      "learning_rate": 0.00017420935412026727,
      "loss": 1.7491,
      "step": 589
    },
    {
      "epoch": 0.13111111111111112,
      "grad_norm": 1.408219814300537,
      "learning_rate": 0.00017416481069042317,
      "loss": 1.6617,
      "step": 590
    },
    {
      "epoch": 0.13133333333333333,
      "grad_norm": 0.9930030107498169,
      "learning_rate": 0.00017412026726057906,
      "loss": 1.385,
      "step": 591
    },
    {
      "epoch": 0.13155555555555556,
      "grad_norm": 1.1311132907867432,
      "learning_rate": 0.00017407572383073498,
      "loss": 1.7926,
      "step": 592
    },
    {
      "epoch": 0.13177777777777777,
      "grad_norm": 1.1787432432174683,
      "learning_rate": 0.00017403118040089087,
      "loss": 1.7932,
      "step": 593
    },
    {
      "epoch": 0.132,
      "grad_norm": 0.9596449732780457,
      "learning_rate": 0.0001739866369710468,
      "loss": 1.4321,
      "step": 594
    },
    {
      "epoch": 0.1322222222222222,
      "grad_norm": 0.9271344542503357,
      "learning_rate": 0.00017394209354120268,
      "loss": 0.0491,
      "step": 595
    },
    {
      "epoch": 0.13244444444444445,
      "grad_norm": 0.2056574523448944,
      "learning_rate": 0.00017389755011135857,
      "loss": 0.0362,
      "step": 596
    },
    {
      "epoch": 0.13266666666666665,
      "grad_norm": 1.0598512887954712,
      "learning_rate": 0.00017385300668151446,
      "loss": 1.1693,
      "step": 597
    },
    {
      "epoch": 0.1328888888888889,
      "grad_norm": 1.6673024892807007,
      "learning_rate": 0.00017380846325167038,
      "loss": 0.5844,
      "step": 598
    },
    {
      "epoch": 0.13311111111111112,
      "grad_norm": 1.1234303712844849,
      "learning_rate": 0.0001737639198218263,
      "loss": 0.923,
      "step": 599
    },
    {
      "epoch": 0.13333333333333333,
      "grad_norm": 1.1665160655975342,
      "learning_rate": 0.0001737193763919822,
      "loss": 1.1461,
      "step": 600
    },
    {
      "epoch": 0.13355555555555557,
      "grad_norm": 0.761227548122406,
      "learning_rate": 0.00017367483296213808,
      "loss": 1.2228,
      "step": 601
    },
    {
      "epoch": 0.13377777777777777,
      "grad_norm": 0.7757149338722229,
      "learning_rate": 0.00017363028953229398,
      "loss": 2.3424,
      "step": 602
    },
    {
      "epoch": 0.134,
      "grad_norm": 0.6811862587928772,
      "learning_rate": 0.0001735857461024499,
      "loss": 1.3366,
      "step": 603
    },
    {
      "epoch": 0.13422222222222221,
      "grad_norm": 0.6670216917991638,
      "learning_rate": 0.00017354120267260581,
      "loss": 0.5969,
      "step": 604
    },
    {
      "epoch": 0.13444444444444445,
      "grad_norm": 0.7554097771644592,
      "learning_rate": 0.0001734966592427617,
      "loss": 2.1288,
      "step": 605
    },
    {
      "epoch": 0.13466666666666666,
      "grad_norm": 0.7412663698196411,
      "learning_rate": 0.0001734521158129176,
      "loss": 2.1018,
      "step": 606
    },
    {
      "epoch": 0.1348888888888889,
      "grad_norm": 0.14801454544067383,
      "learning_rate": 0.0001734075723830735,
      "loss": 0.0229,
      "step": 607
    },
    {
      "epoch": 0.1351111111111111,
      "grad_norm": 0.1401568502187729,
      "learning_rate": 0.0001733630289532294,
      "loss": 0.0219,
      "step": 608
    },
    {
      "epoch": 0.13533333333333333,
      "grad_norm": 0.1252715289592743,
      "learning_rate": 0.0001733184855233853,
      "loss": 0.0204,
      "step": 609
    },
    {
      "epoch": 0.13555555555555557,
      "grad_norm": 0.8645430207252502,
      "learning_rate": 0.00017327394209354122,
      "loss": 2.121,
      "step": 610
    },
    {
      "epoch": 0.13577777777777778,
      "grad_norm": 0.651623010635376,
      "learning_rate": 0.0001732293986636971,
      "loss": 2.0482,
      "step": 611
    },
    {
      "epoch": 0.136,
      "grad_norm": 0.8796990513801575,
      "learning_rate": 0.000173184855233853,
      "loss": 1.9341,
      "step": 612
    },
    {
      "epoch": 0.13622222222222222,
      "grad_norm": 0.7759367227554321,
      "learning_rate": 0.00017314031180400892,
      "loss": 2.0492,
      "step": 613
    },
    {
      "epoch": 0.13644444444444445,
      "grad_norm": 0.758148729801178,
      "learning_rate": 0.0001730957683741648,
      "loss": 1.8866,
      "step": 614
    },
    {
      "epoch": 0.13666666666666666,
      "grad_norm": 0.7975385785102844,
      "learning_rate": 0.0001730512249443207,
      "loss": 1.946,
      "step": 615
    },
    {
      "epoch": 0.1368888888888889,
      "grad_norm": 1.0211257934570312,
      "learning_rate": 0.00017300668151447662,
      "loss": 2.042,
      "step": 616
    },
    {
      "epoch": 0.1371111111111111,
      "grad_norm": 0.8569313883781433,
      "learning_rate": 0.00017296213808463254,
      "loss": 1.9781,
      "step": 617
    },
    {
      "epoch": 0.13733333333333334,
      "grad_norm": 0.9642595052719116,
      "learning_rate": 0.00017291759465478843,
      "loss": 1.9,
      "step": 618
    },
    {
      "epoch": 0.13755555555555554,
      "grad_norm": 0.24051399528980255,
      "learning_rate": 0.00017287305122494433,
      "loss": 0.0233,
      "step": 619
    },
    {
      "epoch": 0.13777777777777778,
      "grad_norm": 0.13666865229606628,
      "learning_rate": 0.00017282850779510022,
      "loss": 0.0208,
      "step": 620
    },
    {
      "epoch": 0.138,
      "grad_norm": 0.7538687586784363,
      "learning_rate": 0.0001727839643652561,
      "loss": 1.0854,
      "step": 621
    },
    {
      "epoch": 0.13822222222222222,
      "grad_norm": 1.1495360136032104,
      "learning_rate": 0.00017273942093541206,
      "loss": 1.1,
      "step": 622
    },
    {
      "epoch": 0.13844444444444445,
      "grad_norm": 0.6540763974189758,
      "learning_rate": 0.00017269487750556795,
      "loss": 0.8361,
      "step": 623
    },
    {
      "epoch": 0.13866666666666666,
      "grad_norm": 1.0281493663787842,
      "learning_rate": 0.00017265033407572384,
      "loss": 1.9939,
      "step": 624
    },
    {
      "epoch": 0.1388888888888889,
      "grad_norm": 1.0349078178405762,
      "learning_rate": 0.00017260579064587973,
      "loss": 1.778,
      "step": 625
    },
    {
      "epoch": 0.1391111111111111,
      "grad_norm": 0.9192053079605103,
      "learning_rate": 0.00017256124721603565,
      "loss": 1.8163,
      "step": 626
    },
    {
      "epoch": 0.13933333333333334,
      "grad_norm": 1.1019245386123657,
      "learning_rate": 0.00017251670378619154,
      "loss": 1.8964,
      "step": 627
    },
    {
      "epoch": 0.13955555555555554,
      "grad_norm": 0.9387298226356506,
      "learning_rate": 0.00017247216035634746,
      "loss": 1.8093,
      "step": 628
    },
    {
      "epoch": 0.13977777777777778,
      "grad_norm": 1.047958254814148,
      "learning_rate": 0.00017242761692650335,
      "loss": 1.7654,
      "step": 629
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.093125820159912,
      "learning_rate": 0.00017238307349665924,
      "loss": 1.1535,
      "step": 630
    },
    {
      "epoch": 0.14022222222222222,
      "grad_norm": 0.12394639104604721,
      "learning_rate": 0.00017233853006681516,
      "loss": 0.0218,
      "step": 631
    },
    {
      "epoch": 0.14044444444444446,
      "grad_norm": 0.09156838059425354,
      "learning_rate": 0.00017229398663697106,
      "loss": 0.0206,
      "step": 632
    },
    {
      "epoch": 0.14066666666666666,
      "grad_norm": 1.1327197551727295,
      "learning_rate": 0.00017224944320712695,
      "loss": 1.872,
      "step": 633
    },
    {
      "epoch": 0.1408888888888889,
      "grad_norm": 0.38535261154174805,
      "learning_rate": 0.00017220489977728287,
      "loss": 0.0376,
      "step": 634
    },
    {
      "epoch": 0.1411111111111111,
      "grad_norm": 0.2830149531364441,
      "learning_rate": 0.00017216035634743876,
      "loss": 0.0275,
      "step": 635
    },
    {
      "epoch": 0.14133333333333334,
      "grad_norm": 0.09745927155017853,
      "learning_rate": 0.00017211581291759468,
      "loss": 0.0205,
      "step": 636
    },
    {
      "epoch": 0.14155555555555555,
      "grad_norm": 0.998629093170166,
      "learning_rate": 0.00017207126948775057,
      "loss": 1.4353,
      "step": 637
    },
    {
      "epoch": 0.14177777777777778,
      "grad_norm": 1.019489049911499,
      "learning_rate": 0.00017202672605790646,
      "loss": 1.4431,
      "step": 638
    },
    {
      "epoch": 0.142,
      "grad_norm": 1.311251163482666,
      "learning_rate": 0.00017198218262806235,
      "loss": 1.9691,
      "step": 639
    },
    {
      "epoch": 0.14222222222222222,
      "grad_norm": 1.1369127035140991,
      "learning_rate": 0.00017193763919821827,
      "loss": 1.7863,
      "step": 640
    },
    {
      "epoch": 0.14244444444444446,
      "grad_norm": 0.9947224855422974,
      "learning_rate": 0.0001718930957683742,
      "loss": 1.429,
      "step": 641
    },
    {
      "epoch": 0.14266666666666666,
      "grad_norm": 0.7390214800834656,
      "learning_rate": 0.00017184855233853008,
      "loss": 0.7239,
      "step": 642
    },
    {
      "epoch": 0.1428888888888889,
      "grad_norm": 0.2698360085487366,
      "learning_rate": 0.00017180400890868597,
      "loss": 0.0362,
      "step": 643
    },
    {
      "epoch": 0.1431111111111111,
      "grad_norm": 0.7463746070861816,
      "learning_rate": 0.00017175946547884187,
      "loss": 0.898,
      "step": 644
    },
    {
      "epoch": 0.14333333333333334,
      "grad_norm": 1.0978554487228394,
      "learning_rate": 0.00017171492204899778,
      "loss": 1.2833,
      "step": 645
    },
    {
      "epoch": 0.14355555555555555,
      "grad_norm": 1.149170160293579,
      "learning_rate": 0.0001716703786191537,
      "loss": 1.2463,
      "step": 646
    },
    {
      "epoch": 0.14377777777777778,
      "grad_norm": 2.201732873916626,
      "learning_rate": 0.0001716258351893096,
      "loss": 0.869,
      "step": 647
    },
    {
      "epoch": 0.144,
      "grad_norm": 2.3375518321990967,
      "learning_rate": 0.0001715812917594655,
      "loss": 0.3554,
      "step": 648
    },
    {
      "epoch": 0.14422222222222222,
      "grad_norm": 1.1565347909927368,
      "learning_rate": 0.00017153674832962138,
      "loss": 0.5603,
      "step": 649
    },
    {
      "epoch": 0.14444444444444443,
      "grad_norm": 1.1238269805908203,
      "learning_rate": 0.0001714922048997773,
      "loss": 1.0216,
      "step": 650
    },
    {
      "epoch": 0.14466666666666667,
      "grad_norm": 0.861092209815979,
      "learning_rate": 0.0001714476614699332,
      "loss": 2.2131,
      "step": 651
    },
    {
      "epoch": 0.1448888888888889,
      "grad_norm": 0.0988718643784523,
      "learning_rate": 0.0001714031180400891,
      "loss": 0.0149,
      "step": 652
    },
    {
      "epoch": 0.1451111111111111,
      "grad_norm": 0.5644282698631287,
      "learning_rate": 0.000171358574610245,
      "loss": 0.9802,
      "step": 653
    },
    {
      "epoch": 0.14533333333333334,
      "grad_norm": 0.6775911450386047,
      "learning_rate": 0.0001713140311804009,
      "loss": 1.1163,
      "step": 654
    },
    {
      "epoch": 0.14555555555555555,
      "grad_norm": 0.6742568016052246,
      "learning_rate": 0.0001712694877505568,
      "loss": 1.1633,
      "step": 655
    },
    {
      "epoch": 0.14577777777777778,
      "grad_norm": 0.10246681421995163,
      "learning_rate": 0.0001712249443207127,
      "loss": 0.0136,
      "step": 656
    },
    {
      "epoch": 0.146,
      "grad_norm": 0.09264359623193741,
      "learning_rate": 0.0001711804008908686,
      "loss": 0.0126,
      "step": 657
    },
    {
      "epoch": 0.14622222222222223,
      "grad_norm": 0.07406888157129288,
      "learning_rate": 0.00017113585746102451,
      "loss": 0.0117,
      "step": 658
    },
    {
      "epoch": 0.14644444444444443,
      "grad_norm": 0.06493738293647766,
      "learning_rate": 0.0001710913140311804,
      "loss": 0.0107,
      "step": 659
    },
    {
      "epoch": 0.14666666666666667,
      "grad_norm": 0.9421645998954773,
      "learning_rate": 0.00017104677060133632,
      "loss": 2.1287,
      "step": 660
    },
    {
      "epoch": 0.1468888888888889,
      "grad_norm": 0.9334849119186401,
      "learning_rate": 0.00017100222717149222,
      "loss": 2.1281,
      "step": 661
    },
    {
      "epoch": 0.1471111111111111,
      "grad_norm": 1.126652717590332,
      "learning_rate": 0.0001709576837416481,
      "loss": 2.1985,
      "step": 662
    },
    {
      "epoch": 0.14733333333333334,
      "grad_norm": 0.8477734327316284,
      "learning_rate": 0.000170913140311804,
      "loss": 2.2331,
      "step": 663
    },
    {
      "epoch": 0.14755555555555555,
      "grad_norm": 0.816444456577301,
      "learning_rate": 0.00017086859688195992,
      "loss": 1.9682,
      "step": 664
    },
    {
      "epoch": 0.14777777777777779,
      "grad_norm": 1.1034094095230103,
      "learning_rate": 0.00017082405345211584,
      "loss": 1.0565,
      "step": 665
    },
    {
      "epoch": 0.148,
      "grad_norm": 0.9575863480567932,
      "learning_rate": 0.00017077951002227173,
      "loss": 1.9457,
      "step": 666
    },
    {
      "epoch": 0.14822222222222223,
      "grad_norm": 0.8643515706062317,
      "learning_rate": 0.00017073496659242762,
      "loss": 1.969,
      "step": 667
    },
    {
      "epoch": 0.14844444444444443,
      "grad_norm": 0.7901911735534668,
      "learning_rate": 0.0001706904231625835,
      "loss": 1.6202,
      "step": 668
    },
    {
      "epoch": 0.14866666666666667,
      "grad_norm": 0.8725628852844238,
      "learning_rate": 0.00017064587973273943,
      "loss": 2.0964,
      "step": 669
    },
    {
      "epoch": 0.14888888888888888,
      "grad_norm": 0.8935587406158447,
      "learning_rate": 0.00017060133630289532,
      "loss": 1.7493,
      "step": 670
    },
    {
      "epoch": 0.1491111111111111,
      "grad_norm": 0.7131580114364624,
      "learning_rate": 0.00017055679287305124,
      "loss": 0.9353,
      "step": 671
    },
    {
      "epoch": 0.14933333333333335,
      "grad_norm": 0.30260053277015686,
      "learning_rate": 0.00017051224944320713,
      "loss": 0.0378,
      "step": 672
    },
    {
      "epoch": 0.14955555555555555,
      "grad_norm": 0.7780633568763733,
      "learning_rate": 0.00017046770601336303,
      "loss": 1.0137,
      "step": 673
    },
    {
      "epoch": 0.1497777777777778,
      "grad_norm": 0.9263336062431335,
      "learning_rate": 0.00017042316258351895,
      "loss": 1.3275,
      "step": 674
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9188753366470337,
      "learning_rate": 0.00017037861915367484,
      "loss": 1.8992,
      "step": 675
    },
    {
      "epoch": 0.15022222222222223,
      "grad_norm": 0.9691118597984314,
      "learning_rate": 0.00017033407572383073,
      "loss": 2.0906,
      "step": 676
    },
    {
      "epoch": 0.15044444444444444,
      "grad_norm": 0.9938292503356934,
      "learning_rate": 0.00017028953229398665,
      "loss": 1.769,
      "step": 677
    },
    {
      "epoch": 0.15066666666666667,
      "grad_norm": 1.172528862953186,
      "learning_rate": 0.00017024498886414254,
      "loss": 1.808,
      "step": 678
    },
    {
      "epoch": 0.15088888888888888,
      "grad_norm": 0.8911821842193604,
      "learning_rate": 0.00017020044543429846,
      "loss": 1.8644,
      "step": 679
    },
    {
      "epoch": 0.1511111111111111,
      "grad_norm": 0.9470701217651367,
      "learning_rate": 0.00017015590200445435,
      "loss": 1.9587,
      "step": 680
    },
    {
      "epoch": 0.15133333333333332,
      "grad_norm": 1.2212241888046265,
      "learning_rate": 0.00017011135857461024,
      "loss": 0.0495,
      "step": 681
    },
    {
      "epoch": 0.15155555555555555,
      "grad_norm": 0.15241730213165283,
      "learning_rate": 0.00017006681514476613,
      "loss": 0.0224,
      "step": 682
    },
    {
      "epoch": 0.1517777777777778,
      "grad_norm": 1.2180373668670654,
      "learning_rate": 0.00017002227171492205,
      "loss": 1.7299,
      "step": 683
    },
    {
      "epoch": 0.152,
      "grad_norm": 0.9515765905380249,
      "learning_rate": 0.00016997772828507797,
      "loss": 1.1014,
      "step": 684
    },
    {
      "epoch": 0.15222222222222223,
      "grad_norm": 0.10555847734212875,
      "learning_rate": 0.00016993318485523386,
      "loss": 0.0218,
      "step": 685
    },
    {
      "epoch": 0.15244444444444444,
      "grad_norm": 0.0902755856513977,
      "learning_rate": 0.00016988864142538976,
      "loss": 0.0213,
      "step": 686
    },
    {
      "epoch": 0.15266666666666667,
      "grad_norm": 0.08572933077812195,
      "learning_rate": 0.00016984409799554565,
      "loss": 0.0203,
      "step": 687
    },
    {
      "epoch": 0.15288888888888888,
      "grad_norm": 0.6668169498443604,
      "learning_rate": 0.00016979955456570157,
      "loss": 0.8351,
      "step": 688
    },
    {
      "epoch": 0.15311111111111111,
      "grad_norm": 1.3807684183120728,
      "learning_rate": 0.00016975501113585748,
      "loss": 2.0387,
      "step": 689
    },
    {
      "epoch": 0.15333333333333332,
      "grad_norm": 1.0587692260742188,
      "learning_rate": 0.00016971046770601338,
      "loss": 1.6551,
      "step": 690
    },
    {
      "epoch": 0.15355555555555556,
      "grad_norm": 1.378057837486267,
      "learning_rate": 0.00016966592427616927,
      "loss": 0.8331,
      "step": 691
    },
    {
      "epoch": 0.1537777777777778,
      "grad_norm": 1.1262686252593994,
      "learning_rate": 0.00016962138084632516,
      "loss": 1.6237,
      "step": 692
    },
    {
      "epoch": 0.154,
      "grad_norm": 1.0472062826156616,
      "learning_rate": 0.00016957683741648108,
      "loss": 1.7357,
      "step": 693
    },
    {
      "epoch": 0.15422222222222223,
      "grad_norm": 0.9540035128593445,
      "learning_rate": 0.00016953229398663697,
      "loss": 1.4822,
      "step": 694
    },
    {
      "epoch": 0.15444444444444444,
      "grad_norm": 1.032220721244812,
      "learning_rate": 0.0001694877505567929,
      "loss": 1.5291,
      "step": 695
    },
    {
      "epoch": 0.15466666666666667,
      "grad_norm": 0.7844957709312439,
      "learning_rate": 0.00016944320712694878,
      "loss": 0.8609,
      "step": 696
    },
    {
      "epoch": 0.15488888888888888,
      "grad_norm": 1.222839117050171,
      "learning_rate": 0.00016939866369710467,
      "loss": 1.2355,
      "step": 697
    },
    {
      "epoch": 0.15511111111111112,
      "grad_norm": 1.9266964197158813,
      "learning_rate": 0.0001693541202672606,
      "loss": 0.6971,
      "step": 698
    },
    {
      "epoch": 0.15533333333333332,
      "grad_norm": 0.49530452489852905,
      "learning_rate": 0.00016930957683741648,
      "loss": 0.0478,
      "step": 699
    },
    {
      "epoch": 0.15555555555555556,
      "grad_norm": 0.9992147088050842,
      "learning_rate": 0.00016926503340757238,
      "loss": 0.8504,
      "step": 700
    },
    {
      "epoch": 0.15577777777777777,
      "grad_norm": 0.5562880635261536,
      "learning_rate": 0.0001692204899777283,
      "loss": 1.2278,
      "step": 701
    },
    {
      "epoch": 0.156,
      "grad_norm": 0.5927205681800842,
      "learning_rate": 0.00016917594654788421,
      "loss": 1.1753,
      "step": 702
    },
    {
      "epoch": 0.15622222222222223,
      "grad_norm": 0.7892579436302185,
      "learning_rate": 0.0001691314031180401,
      "loss": 2.1196,
      "step": 703
    },
    {
      "epoch": 0.15644444444444444,
      "grad_norm": 0.09951931238174438,
      "learning_rate": 0.000169086859688196,
      "loss": 0.0159,
      "step": 704
    },
    {
      "epoch": 0.15666666666666668,
      "grad_norm": 0.08787377178668976,
      "learning_rate": 0.0001690423162583519,
      "loss": 0.0151,
      "step": 705
    },
    {
      "epoch": 0.15688888888888888,
      "grad_norm": 0.50344318151474,
      "learning_rate": 0.00016899777282850778,
      "loss": 1.1949,
      "step": 706
    },
    {
      "epoch": 0.15711111111111112,
      "grad_norm": 0.8544764518737793,
      "learning_rate": 0.00016895322939866373,
      "loss": 2.3102,
      "step": 707
    },
    {
      "epoch": 0.15733333333333333,
      "grad_norm": 0.9083617925643921,
      "learning_rate": 0.00016890868596881962,
      "loss": 2.1908,
      "step": 708
    },
    {
      "epoch": 0.15755555555555556,
      "grad_norm": 0.7772009968757629,
      "learning_rate": 0.0001688641425389755,
      "loss": 2.3081,
      "step": 709
    },
    {
      "epoch": 0.15777777777777777,
      "grad_norm": 0.7839867472648621,
      "learning_rate": 0.0001688195991091314,
      "loss": 1.919,
      "step": 710
    },
    {
      "epoch": 0.158,
      "grad_norm": 0.8075196743011475,
      "learning_rate": 0.00016877505567928732,
      "loss": 1.9511,
      "step": 711
    },
    {
      "epoch": 0.1582222222222222,
      "grad_norm": 0.9218339920043945,
      "learning_rate": 0.0001687305122494432,
      "loss": 1.9041,
      "step": 712
    },
    {
      "epoch": 0.15844444444444444,
      "grad_norm": 0.7846603989601135,
      "learning_rate": 0.00016868596881959913,
      "loss": 1.8966,
      "step": 713
    },
    {
      "epoch": 0.15866666666666668,
      "grad_norm": 1.1635181903839111,
      "learning_rate": 0.00016864142538975502,
      "loss": 2.0407,
      "step": 714
    },
    {
      "epoch": 0.15888888888888889,
      "grad_norm": 0.7846897840499878,
      "learning_rate": 0.00016859688195991092,
      "loss": 0.9193,
      "step": 715
    },
    {
      "epoch": 0.15911111111111112,
      "grad_norm": 0.31325796246528625,
      "learning_rate": 0.00016855233853006683,
      "loss": 0.035,
      "step": 716
    },
    {
      "epoch": 0.15933333333333333,
      "grad_norm": 1.016501545906067,
      "learning_rate": 0.00016850779510022273,
      "loss": 2.0913,
      "step": 717
    },
    {
      "epoch": 0.15955555555555556,
      "grad_norm": 0.9303516149520874,
      "learning_rate": 0.00016846325167037862,
      "loss": 1.9976,
      "step": 718
    },
    {
      "epoch": 0.15977777777777777,
      "grad_norm": 1.1509366035461426,
      "learning_rate": 0.00016841870824053454,
      "loss": 1.6855,
      "step": 719
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0439561605453491,
      "learning_rate": 0.00016837416481069043,
      "loss": 1.7674,
      "step": 720
    },
    {
      "epoch": 0.1602222222222222,
      "grad_norm": 0.27185630798339844,
      "learning_rate": 0.00016832962138084635,
      "loss": 0.0249,
      "step": 721
    },
    {
      "epoch": 0.16044444444444445,
      "grad_norm": 0.8196635246276855,
      "learning_rate": 0.00016828507795100224,
      "loss": 1.0722,
      "step": 722
    },
    {
      "epoch": 0.16066666666666668,
      "grad_norm": 0.6817071437835693,
      "learning_rate": 0.00016824053452115813,
      "loss": 1.0584,
      "step": 723
    },
    {
      "epoch": 0.1608888888888889,
      "grad_norm": 0.8211132884025574,
      "learning_rate": 0.00016819599109131402,
      "loss": 1.0687,
      "step": 724
    },
    {
      "epoch": 0.16111111111111112,
      "grad_norm": 0.23781214654445648,
      "learning_rate": 0.00016815144766146994,
      "loss": 0.0279,
      "step": 725
    },
    {
      "epoch": 0.16133333333333333,
      "grad_norm": 0.9902861714363098,
      "learning_rate": 0.00016810690423162586,
      "loss": 1.8951,
      "step": 726
    },
    {
      "epoch": 0.16155555555555556,
      "grad_norm": 1.0102611780166626,
      "learning_rate": 0.00016806236080178175,
      "loss": 1.7121,
      "step": 727
    },
    {
      "epoch": 0.16177777777777777,
      "grad_norm": 1.0301183462142944,
      "learning_rate": 0.00016801781737193764,
      "loss": 1.636,
      "step": 728
    },
    {
      "epoch": 0.162,
      "grad_norm": 0.9955403804779053,
      "learning_rate": 0.00016797327394209354,
      "loss": 0.9496,
      "step": 729
    },
    {
      "epoch": 0.1622222222222222,
      "grad_norm": 0.2391827553510666,
      "learning_rate": 0.00016792873051224946,
      "loss": 0.035,
      "step": 730
    },
    {
      "epoch": 0.16244444444444445,
      "grad_norm": 0.1952289491891861,
      "learning_rate": 0.00016788418708240537,
      "loss": 0.0305,
      "step": 731
    },
    {
      "epoch": 0.16266666666666665,
      "grad_norm": 0.13709303736686707,
      "learning_rate": 0.00016783964365256127,
      "loss": 0.0259,
      "step": 732
    },
    {
      "epoch": 0.1628888888888889,
      "grad_norm": 0.969248354434967,
      "learning_rate": 0.00016779510022271716,
      "loss": 1.7231,
      "step": 733
    },
    {
      "epoch": 0.16311111111111112,
      "grad_norm": 1.2497774362564087,
      "learning_rate": 0.00016775055679287305,
      "loss": 1.639,
      "step": 734
    },
    {
      "epoch": 0.16333333333333333,
      "grad_norm": 0.10687464475631714,
      "learning_rate": 0.00016770601336302897,
      "loss": 0.021,
      "step": 735
    },
    {
      "epoch": 0.16355555555555557,
      "grad_norm": 0.10326875746250153,
      "learning_rate": 0.00016766146993318486,
      "loss": 0.021,
      "step": 736
    },
    {
      "epoch": 0.16377777777777777,
      "grad_norm": 0.08747762441635132,
      "learning_rate": 0.00016761692650334075,
      "loss": 0.0201,
      "step": 737
    },
    {
      "epoch": 0.164,
      "grad_norm": 0.09365817159414291,
      "learning_rate": 0.00016757238307349667,
      "loss": 0.0191,
      "step": 738
    },
    {
      "epoch": 0.16422222222222221,
      "grad_norm": 0.9810875058174133,
      "learning_rate": 0.00016752783964365256,
      "loss": 1.5407,
      "step": 739
    },
    {
      "epoch": 0.16444444444444445,
      "grad_norm": 1.0869085788726807,
      "learning_rate": 0.00016748329621380848,
      "loss": 1.8049,
      "step": 740
    },
    {
      "epoch": 0.16466666666666666,
      "grad_norm": 1.1577770709991455,
      "learning_rate": 0.00016743875278396437,
      "loss": 1.9633,
      "step": 741
    },
    {
      "epoch": 0.1648888888888889,
      "grad_norm": 1.1007702350616455,
      "learning_rate": 0.00016739420935412027,
      "loss": 1.6518,
      "step": 742
    },
    {
      "epoch": 0.1651111111111111,
      "grad_norm": 1.4112728834152222,
      "learning_rate": 0.00016734966592427616,
      "loss": 1.7209,
      "step": 743
    },
    {
      "epoch": 0.16533333333333333,
      "grad_norm": 1.1396406888961792,
      "learning_rate": 0.00016730512249443208,
      "loss": 1.666,
      "step": 744
    },
    {
      "epoch": 0.16555555555555557,
      "grad_norm": 1.080653190612793,
      "learning_rate": 0.000167260579064588,
      "loss": 1.5255,
      "step": 745
    },
    {
      "epoch": 0.16577777777777777,
      "grad_norm": 1.300510048866272,
      "learning_rate": 0.0001672160356347439,
      "loss": 1.8507,
      "step": 746
    },
    {
      "epoch": 0.166,
      "grad_norm": 0.9009153842926025,
      "learning_rate": 0.00016717149220489978,
      "loss": 0.5545,
      "step": 747
    },
    {
      "epoch": 0.16622222222222222,
      "grad_norm": 0.20857150852680206,
      "learning_rate": 0.00016712694877505567,
      "loss": 0.0369,
      "step": 748
    },
    {
      "epoch": 0.16644444444444445,
      "grad_norm": 1.3490118980407715,
      "learning_rate": 0.0001670824053452116,
      "loss": 1.6188,
      "step": 749
    },
    {
      "epoch": 0.16666666666666666,
      "grad_norm": 1.054196834564209,
      "learning_rate": 0.0001670378619153675,
      "loss": 1.438,
      "step": 750
    },
    {
      "epoch": 0.1668888888888889,
      "grad_norm": 0.49820706248283386,
      "learning_rate": 0.0001669933184855234,
      "loss": 1.0425,
      "step": 751
    },
    {
      "epoch": 0.1671111111111111,
      "grad_norm": 0.10511091351509094,
      "learning_rate": 0.0001669487750556793,
      "loss": 0.0161,
      "step": 752
    },
    {
      "epoch": 0.16733333333333333,
      "grad_norm": 0.5921303033828735,
      "learning_rate": 0.00016690423162583518,
      "loss": 1.0911,
      "step": 753
    },
    {
      "epoch": 0.16755555555555557,
      "grad_norm": 0.09364970773458481,
      "learning_rate": 0.0001668596881959911,
      "loss": 0.0152,
      "step": 754
    },
    {
      "epoch": 0.16777777777777778,
      "grad_norm": 0.6593067646026611,
      "learning_rate": 0.000166815144766147,
      "loss": 1.1768,
      "step": 755
    },
    {
      "epoch": 0.168,
      "grad_norm": 0.5098649859428406,
      "learning_rate": 0.00016677060133630291,
      "loss": 0.9257,
      "step": 756
    },
    {
      "epoch": 0.16822222222222222,
      "grad_norm": 0.7384516000747681,
      "learning_rate": 0.0001667260579064588,
      "loss": 0.0302,
      "step": 757
    },
    {
      "epoch": 0.16844444444444445,
      "grad_norm": 0.11098629981279373,
      "learning_rate": 0.0001666815144766147,
      "loss": 0.0209,
      "step": 758
    },
    {
      "epoch": 0.16866666666666666,
      "grad_norm": 0.5349358916282654,
      "learning_rate": 0.00016663697104677062,
      "loss": 0.9604,
      "step": 759
    },
    {
      "epoch": 0.1688888888888889,
      "grad_norm": 0.8677853941917419,
      "learning_rate": 0.0001665924276169265,
      "loss": 2.01,
      "step": 760
    },
    {
      "epoch": 0.1691111111111111,
      "grad_norm": 0.8876023292541504,
      "learning_rate": 0.0001665478841870824,
      "loss": 2.1739,
      "step": 761
    },
    {
      "epoch": 0.16933333333333334,
      "grad_norm": 0.7748745679855347,
      "learning_rate": 0.00016650334075723832,
      "loss": 2.0633,
      "step": 762
    },
    {
      "epoch": 0.16955555555555554,
      "grad_norm": 0.8775037527084351,
      "learning_rate": 0.0001664587973273942,
      "loss": 2.0544,
      "step": 763
    },
    {
      "epoch": 0.16977777777777778,
      "grad_norm": 0.8547599911689758,
      "learning_rate": 0.00016641425389755013,
      "loss": 1.6622,
      "step": 764
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0196506977081299,
      "learning_rate": 0.00016636971046770602,
      "loss": 2.2227,
      "step": 765
    },
    {
      "epoch": 0.17022222222222222,
      "grad_norm": 0.9069720506668091,
      "learning_rate": 0.0001663251670378619,
      "loss": 2.2042,
      "step": 766
    },
    {
      "epoch": 0.17044444444444445,
      "grad_norm": 0.8413894772529602,
      "learning_rate": 0.0001662806236080178,
      "loss": 2.0638,
      "step": 767
    },
    {
      "epoch": 0.17066666666666666,
      "grad_norm": 0.5857881903648376,
      "learning_rate": 0.00016623608017817372,
      "loss": 0.8278,
      "step": 768
    },
    {
      "epoch": 0.1708888888888889,
      "grad_norm": 0.084866464138031,
      "learning_rate": 0.00016619153674832964,
      "loss": 0.0179,
      "step": 769
    },
    {
      "epoch": 0.1711111111111111,
      "grad_norm": 0.08603781461715698,
      "learning_rate": 0.00016614699331848553,
      "loss": 0.0177,
      "step": 770
    },
    {
      "epoch": 0.17133333333333334,
      "grad_norm": 0.07927680015563965,
      "learning_rate": 0.00016610244988864143,
      "loss": 0.0174,
      "step": 771
    },
    {
      "epoch": 0.17155555555555554,
      "grad_norm": 0.09696035087108612,
      "learning_rate": 0.00016605790645879732,
      "loss": 0.0208,
      "step": 772
    },
    {
      "epoch": 0.17177777777777778,
      "grad_norm": 0.8104578852653503,
      "learning_rate": 0.00016601336302895324,
      "loss": 0.89,
      "step": 773
    },
    {
      "epoch": 0.172,
      "grad_norm": 1.0266926288604736,
      "learning_rate": 0.00016596881959910916,
      "loss": 1.8188,
      "step": 774
    },
    {
      "epoch": 0.17222222222222222,
      "grad_norm": 1.1254407167434692,
      "learning_rate": 0.00016592427616926505,
      "loss": 2.1854,
      "step": 775
    },
    {
      "epoch": 0.17244444444444446,
      "grad_norm": 1.0652568340301514,
      "learning_rate": 0.00016587973273942094,
      "loss": 1.8448,
      "step": 776
    },
    {
      "epoch": 0.17266666666666666,
      "grad_norm": 0.9953740239143372,
      "learning_rate": 0.00016583518930957683,
      "loss": 1.7428,
      "step": 777
    },
    {
      "epoch": 0.1728888888888889,
      "grad_norm": 0.9609451293945312,
      "learning_rate": 0.00016579064587973275,
      "loss": 1.7552,
      "step": 778
    },
    {
      "epoch": 0.1731111111111111,
      "grad_norm": 1.3759422302246094,
      "learning_rate": 0.00016574610244988864,
      "loss": 1.0064,
      "step": 779
    },
    {
      "epoch": 0.17333333333333334,
      "grad_norm": 0.103799007833004,
      "learning_rate": 0.00016570155902004456,
      "loss": 0.0194,
      "step": 780
    },
    {
      "epoch": 0.17355555555555555,
      "grad_norm": 0.6549257040023804,
      "learning_rate": 0.00016565701559020045,
      "loss": 0.7385,
      "step": 781
    },
    {
      "epoch": 0.17377777777777778,
      "grad_norm": 0.8397656679153442,
      "learning_rate": 0.00016561247216035634,
      "loss": 0.0268,
      "step": 782
    },
    {
      "epoch": 0.174,
      "grad_norm": 0.07175108045339584,
      "learning_rate": 0.00016556792873051226,
      "loss": 0.0176,
      "step": 783
    },
    {
      "epoch": 0.17422222222222222,
      "grad_norm": 0.07930734753608704,
      "learning_rate": 0.00016552338530066816,
      "loss": 0.0173,
      "step": 784
    },
    {
      "epoch": 0.17444444444444446,
      "grad_norm": 0.6065148115158081,
      "learning_rate": 0.00016547884187082405,
      "loss": 0.819,
      "step": 785
    },
    {
      "epoch": 0.17466666666666666,
      "grad_norm": 0.8312206864356995,
      "learning_rate": 0.00016543429844097997,
      "loss": 0.9267,
      "step": 786
    },
    {
      "epoch": 0.1748888888888889,
      "grad_norm": 0.2022327035665512,
      "learning_rate": 0.00016538975501113588,
      "loss": 0.0356,
      "step": 787
    },
    {
      "epoch": 0.1751111111111111,
      "grad_norm": 0.996240496635437,
      "learning_rate": 0.00016534521158129178,
      "loss": 1.6089,
      "step": 788
    },
    {
      "epoch": 0.17533333333333334,
      "grad_norm": 0.9978050589561462,
      "learning_rate": 0.00016530066815144767,
      "loss": 1.7166,
      "step": 789
    },
    {
      "epoch": 0.17555555555555555,
      "grad_norm": 1.1079049110412598,
      "learning_rate": 0.00016525612472160356,
      "loss": 1.5275,
      "step": 790
    },
    {
      "epoch": 0.17577777777777778,
      "grad_norm": 0.9986259937286377,
      "learning_rate": 0.00016521158129175945,
      "loss": 1.4903,
      "step": 791
    },
    {
      "epoch": 0.176,
      "grad_norm": 0.7572327852249146,
      "learning_rate": 0.0001651670378619154,
      "loss": 0.7548,
      "step": 792
    },
    {
      "epoch": 0.17622222222222222,
      "grad_norm": 0.16977067291736603,
      "learning_rate": 0.0001651224944320713,
      "loss": 0.0267,
      "step": 793
    },
    {
      "epoch": 0.17644444444444443,
      "grad_norm": 0.16834843158721924,
      "learning_rate": 0.00016507795100222718,
      "loss": 0.0259,
      "step": 794
    },
    {
      "epoch": 0.17666666666666667,
      "grad_norm": 0.8039578199386597,
      "learning_rate": 0.00016503340757238307,
      "loss": 0.7745,
      "step": 795
    },
    {
      "epoch": 0.1768888888888889,
      "grad_norm": 1.0637822151184082,
      "learning_rate": 0.000164988864142539,
      "loss": 1.2708,
      "step": 796
    },
    {
      "epoch": 0.1771111111111111,
      "grad_norm": 1.1139543056488037,
      "learning_rate": 0.00016494432071269488,
      "loss": 1.1626,
      "step": 797
    },
    {
      "epoch": 0.17733333333333334,
      "grad_norm": 0.8471236824989319,
      "learning_rate": 0.0001648997772828508,
      "loss": 0.5387,
      "step": 798
    },
    {
      "epoch": 0.17755555555555555,
      "grad_norm": 1.1162381172180176,
      "learning_rate": 0.0001648552338530067,
      "loss": 0.9235,
      "step": 799
    },
    {
      "epoch": 0.17777777777777778,
      "grad_norm": 1.160738229751587,
      "learning_rate": 0.0001648106904231626,
      "loss": 0.9536,
      "step": 800
    },
    {
      "epoch": 0.178,
      "grad_norm": 0.055902622640132904,
      "learning_rate": 0.0001647661469933185,
      "loss": 0.011,
      "step": 801
    },
    {
      "epoch": 0.17822222222222223,
      "grad_norm": 0.6805006861686707,
      "learning_rate": 0.0001647216035634744,
      "loss": 1.1912,
      "step": 802
    },
    {
      "epoch": 0.17844444444444443,
      "grad_norm": 0.5978904962539673,
      "learning_rate": 0.0001646770601336303,
      "loss": 1.242,
      "step": 803
    },
    {
      "epoch": 0.17866666666666667,
      "grad_norm": 1.012514352798462,
      "learning_rate": 0.0001646325167037862,
      "loss": 2.2625,
      "step": 804
    },
    {
      "epoch": 0.17888888888888888,
      "grad_norm": 0.06615343689918518,
      "learning_rate": 0.0001645879732739421,
      "loss": 0.0109,
      "step": 805
    },
    {
      "epoch": 0.1791111111111111,
      "grad_norm": 0.8926467299461365,
      "learning_rate": 0.00016454342984409802,
      "loss": 2.1026,
      "step": 806
    },
    {
      "epoch": 0.17933333333333334,
      "grad_norm": 0.7982742786407471,
      "learning_rate": 0.0001644988864142539,
      "loss": 1.7316,
      "step": 807
    },
    {
      "epoch": 0.17955555555555555,
      "grad_norm": 1.4942423105239868,
      "learning_rate": 0.0001644543429844098,
      "loss": 0.039,
      "step": 808
    },
    {
      "epoch": 0.1797777777777778,
      "grad_norm": 0.26765769720077515,
      "learning_rate": 0.0001644097995545657,
      "loss": 0.0198,
      "step": 809
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9778783321380615,
      "learning_rate": 0.0001643652561247216,
      "loss": 2.0191,
      "step": 810
    },
    {
      "epoch": 0.18022222222222223,
      "grad_norm": 1.1812803745269775,
      "learning_rate": 0.00016432071269487753,
      "loss": 2.7094,
      "step": 811
    },
    {
      "epoch": 0.18044444444444444,
      "grad_norm": 0.9629987478256226,
      "learning_rate": 0.00016427616926503342,
      "loss": 2.0157,
      "step": 812
    },
    {
      "epoch": 0.18066666666666667,
      "grad_norm": 0.895087718963623,
      "learning_rate": 0.00016423162583518932,
      "loss": 2.144,
      "step": 813
    },
    {
      "epoch": 0.18088888888888888,
      "grad_norm": 0.9080528616905212,
      "learning_rate": 0.0001641870824053452,
      "loss": 2.1558,
      "step": 814
    },
    {
      "epoch": 0.1811111111111111,
      "grad_norm": 1.078696846961975,
      "learning_rate": 0.00016414253897550113,
      "loss": 0.0447,
      "step": 815
    },
    {
      "epoch": 0.18133333333333335,
      "grad_norm": 0.9236059784889221,
      "learning_rate": 0.00016409799554565702,
      "loss": 2.0536,
      "step": 816
    },
    {
      "epoch": 0.18155555555555555,
      "grad_norm": 0.7201200127601624,
      "learning_rate": 0.00016405345211581294,
      "loss": 0.8334,
      "step": 817
    },
    {
      "epoch": 0.1817777777777778,
      "grad_norm": 0.08575476706027985,
      "learning_rate": 0.00016400890868596883,
      "loss": 0.0198,
      "step": 818
    },
    {
      "epoch": 0.182,
      "grad_norm": 1.1828711032867432,
      "learning_rate": 0.00016396436525612472,
      "loss": 0.9102,
      "step": 819
    },
    {
      "epoch": 0.18222222222222223,
      "grad_norm": 0.7130089998245239,
      "learning_rate": 0.00016391982182628064,
      "loss": 1.0971,
      "step": 820
    },
    {
      "epoch": 0.18244444444444444,
      "grad_norm": 0.9672996997833252,
      "learning_rate": 0.00016387527839643653,
      "loss": 1.9789,
      "step": 821
    },
    {
      "epoch": 0.18266666666666667,
      "grad_norm": 1.0078965425491333,
      "learning_rate": 0.00016383073496659242,
      "loss": 1.7371,
      "step": 822
    },
    {
      "epoch": 0.18288888888888888,
      "grad_norm": 1.0774242877960205,
      "learning_rate": 0.00016378619153674834,
      "loss": 1.7296,
      "step": 823
    },
    {
      "epoch": 0.1831111111111111,
      "grad_norm": 0.9907069802284241,
      "learning_rate": 0.00016374164810690423,
      "loss": 1.8363,
      "step": 824
    },
    {
      "epoch": 0.18333333333333332,
      "grad_norm": 0.9560150504112244,
      "learning_rate": 0.00016369710467706015,
      "loss": 1.6966,
      "step": 825
    },
    {
      "epoch": 0.18355555555555556,
      "grad_norm": 0.9652445912361145,
      "learning_rate": 0.00016365256124721604,
      "loss": 1.7625,
      "step": 826
    },
    {
      "epoch": 0.1837777777777778,
      "grad_norm": 0.9431360960006714,
      "learning_rate": 0.00016360801781737194,
      "loss": 1.6979,
      "step": 827
    },
    {
      "epoch": 0.184,
      "grad_norm": 0.9380690455436707,
      "learning_rate": 0.00016356347438752783,
      "loss": 1.7109,
      "step": 828
    },
    {
      "epoch": 0.18422222222222223,
      "grad_norm": 1.3345482349395752,
      "learning_rate": 0.00016351893095768375,
      "loss": 0.083,
      "step": 829
    },
    {
      "epoch": 0.18444444444444444,
      "grad_norm": 0.8567286729812622,
      "learning_rate": 0.00016347438752783967,
      "loss": 0.0372,
      "step": 830
    },
    {
      "epoch": 0.18466666666666667,
      "grad_norm": 0.14056961238384247,
      "learning_rate": 0.00016342984409799556,
      "loss": 0.0209,
      "step": 831
    },
    {
      "epoch": 0.18488888888888888,
      "grad_norm": 0.6902725696563721,
      "learning_rate": 0.00016338530066815145,
      "loss": 0.8186,
      "step": 832
    },
    {
      "epoch": 0.18511111111111112,
      "grad_norm": 0.8135867118835449,
      "learning_rate": 0.00016334075723830734,
      "loss": 0.8957,
      "step": 833
    },
    {
      "epoch": 0.18533333333333332,
      "grad_norm": 0.1166531890630722,
      "learning_rate": 0.00016329621380846326,
      "loss": 0.0205,
      "step": 834
    },
    {
      "epoch": 0.18555555555555556,
      "grad_norm": 0.09129589051008224,
      "learning_rate": 0.00016325167037861918,
      "loss": 0.0199,
      "step": 835
    },
    {
      "epoch": 0.18577777777777776,
      "grad_norm": 0.9971749782562256,
      "learning_rate": 0.00016320712694877507,
      "loss": 1.7846,
      "step": 836
    },
    {
      "epoch": 0.186,
      "grad_norm": 1.054129719734192,
      "learning_rate": 0.00016316258351893096,
      "loss": 1.4707,
      "step": 837
    },
    {
      "epoch": 0.18622222222222223,
      "grad_norm": 1.067037582397461,
      "learning_rate": 0.00016311804008908685,
      "loss": 1.8865,
      "step": 838
    },
    {
      "epoch": 0.18644444444444444,
      "grad_norm": 1.2100924253463745,
      "learning_rate": 0.00016307349665924277,
      "loss": 1.8347,
      "step": 839
    },
    {
      "epoch": 0.18666666666666668,
      "grad_norm": 1.0970559120178223,
      "learning_rate": 0.00016302895322939867,
      "loss": 1.7943,
      "step": 840
    },
    {
      "epoch": 0.18688888888888888,
      "grad_norm": 1.0739829540252686,
      "learning_rate": 0.00016298440979955458,
      "loss": 1.6663,
      "step": 841
    },
    {
      "epoch": 0.18711111111111112,
      "grad_norm": 0.9962330460548401,
      "learning_rate": 0.00016293986636971048,
      "loss": 1.6769,
      "step": 842
    },
    {
      "epoch": 0.18733333333333332,
      "grad_norm": 0.9708060026168823,
      "learning_rate": 0.00016289532293986637,
      "loss": 1.4129,
      "step": 843
    },
    {
      "epoch": 0.18755555555555556,
      "grad_norm": 0.9281109571456909,
      "learning_rate": 0.0001628507795100223,
      "loss": 1.276,
      "step": 844
    },
    {
      "epoch": 0.18777777777777777,
      "grad_norm": 0.7527890801429749,
      "learning_rate": 0.00016280623608017818,
      "loss": 0.8151,
      "step": 845
    },
    {
      "epoch": 0.188,
      "grad_norm": 1.0176119804382324,
      "learning_rate": 0.00016276169265033407,
      "loss": 1.1774,
      "step": 846
    },
    {
      "epoch": 0.18822222222222224,
      "grad_norm": 1.2225992679595947,
      "learning_rate": 0.00016271714922049,
      "loss": 1.2259,
      "step": 847
    },
    {
      "epoch": 0.18844444444444444,
      "grad_norm": 0.7326065897941589,
      "learning_rate": 0.00016267260579064588,
      "loss": 0.5637,
      "step": 848
    },
    {
      "epoch": 0.18866666666666668,
      "grad_norm": 0.3323253095149994,
      "learning_rate": 0.0001626280623608018,
      "loss": 0.05,
      "step": 849
    },
    {
      "epoch": 0.18888888888888888,
      "grad_norm": 1.0131728649139404,
      "learning_rate": 0.0001625835189309577,
      "loss": 0.8704,
      "step": 850
    },
    {
      "epoch": 0.18911111111111112,
      "grad_norm": 0.9873238205909729,
      "learning_rate": 0.00016253897550111358,
      "loss": 2.2498,
      "step": 851
    },
    {
      "epoch": 0.18933333333333333,
      "grad_norm": 0.8620486855506897,
      "learning_rate": 0.00016249443207126948,
      "loss": 2.1671,
      "step": 852
    },
    {
      "epoch": 0.18955555555555556,
      "grad_norm": 0.5899505019187927,
      "learning_rate": 0.0001624498886414254,
      "loss": 0.9804,
      "step": 853
    },
    {
      "epoch": 0.18977777777777777,
      "grad_norm": 0.5602113604545593,
      "learning_rate": 0.0001624053452115813,
      "loss": 1.1528,
      "step": 854
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9018243551254272,
      "learning_rate": 0.0001623608017817372,
      "loss": 2.1603,
      "step": 855
    },
    {
      "epoch": 0.1902222222222222,
      "grad_norm": 1.0384292602539062,
      "learning_rate": 0.0001623162583518931,
      "loss": 0.0507,
      "step": 856
    },
    {
      "epoch": 0.19044444444444444,
      "grad_norm": 0.16755282878875732,
      "learning_rate": 0.000162271714922049,
      "loss": 0.0188,
      "step": 857
    },
    {
      "epoch": 0.19066666666666668,
      "grad_norm": 0.5630192756652832,
      "learning_rate": 0.0001622271714922049,
      "loss": 1.1861,
      "step": 858
    },
    {
      "epoch": 0.19088888888888889,
      "grad_norm": 0.8935820460319519,
      "learning_rate": 0.00016218262806236083,
      "loss": 2.1745,
      "step": 859
    },
    {
      "epoch": 0.19111111111111112,
      "grad_norm": 0.8242104649543762,
      "learning_rate": 0.00016213808463251672,
      "loss": 1.8877,
      "step": 860
    },
    {
      "epoch": 0.19133333333333333,
      "grad_norm": 0.8368860483169556,
      "learning_rate": 0.0001620935412026726,
      "loss": 2.219,
      "step": 861
    },
    {
      "epoch": 0.19155555555555556,
      "grad_norm": 0.9018285870552063,
      "learning_rate": 0.0001620489977728285,
      "loss": 2.1182,
      "step": 862
    },
    {
      "epoch": 0.19177777777777777,
      "grad_norm": 0.8542325496673584,
      "learning_rate": 0.00016200445434298442,
      "loss": 1.9889,
      "step": 863
    },
    {
      "epoch": 0.192,
      "grad_norm": 1.7638332843780518,
      "learning_rate": 0.0001619599109131403,
      "loss": 0.1212,
      "step": 864
    },
    {
      "epoch": 0.1922222222222222,
      "grad_norm": 0.8848968148231506,
      "learning_rate": 0.00016191536748329623,
      "loss": 1.8857,
      "step": 865
    },
    {
      "epoch": 0.19244444444444445,
      "grad_norm": 0.8312684893608093,
      "learning_rate": 0.00016187082405345212,
      "loss": 2.09,
      "step": 866
    },
    {
      "epoch": 0.19266666666666668,
      "grad_norm": 0.8967249989509583,
      "learning_rate": 0.00016182628062360802,
      "loss": 1.8655,
      "step": 867
    },
    {
      "epoch": 0.1928888888888889,
      "grad_norm": 1.0011157989501953,
      "learning_rate": 0.00016178173719376393,
      "loss": 2.1108,
      "step": 868
    },
    {
      "epoch": 0.19311111111111112,
      "grad_norm": 0.8654418587684631,
      "learning_rate": 0.00016173719376391983,
      "loss": 1.8368,
      "step": 869
    },
    {
      "epoch": 0.19333333333333333,
      "grad_norm": 0.6689291596412659,
      "learning_rate": 0.00016169265033407572,
      "loss": 0.9421,
      "step": 870
    },
    {
      "epoch": 0.19355555555555556,
      "grad_norm": 0.19246675074100494,
      "learning_rate": 0.00016164810690423164,
      "loss": 0.0235,
      "step": 871
    },
    {
      "epoch": 0.19377777777777777,
      "grad_norm": 0.17772454023361206,
      "learning_rate": 0.00016160356347438756,
      "loss": 0.0224,
      "step": 872
    },
    {
      "epoch": 0.194,
      "grad_norm": 0.145878866314888,
      "learning_rate": 0.00016155902004454345,
      "loss": 0.0196,
      "step": 873
    },
    {
      "epoch": 0.1942222222222222,
      "grad_norm": 0.6575565338134766,
      "learning_rate": 0.00016151447661469934,
      "loss": 0.9732,
      "step": 874
    },
    {
      "epoch": 0.19444444444444445,
      "grad_norm": 0.949036180973053,
      "learning_rate": 0.00016146993318485523,
      "loss": 0.8626,
      "step": 875
    },
    {
      "epoch": 0.19466666666666665,
      "grad_norm": 0.9357439279556274,
      "learning_rate": 0.00016142538975501112,
      "loss": 1.8252,
      "step": 876
    },
    {
      "epoch": 0.1948888888888889,
      "grad_norm": 1.0359864234924316,
      "learning_rate": 0.00016138084632516707,
      "loss": 1.9521,
      "step": 877
    },
    {
      "epoch": 0.19511111111111112,
      "grad_norm": 0.9912081360816956,
      "learning_rate": 0.00016133630289532296,
      "loss": 1.7392,
      "step": 878
    },
    {
      "epoch": 0.19533333333333333,
      "grad_norm": 0.839015543460846,
      "learning_rate": 0.00016129175946547885,
      "loss": 0.9531,
      "step": 879
    },
    {
      "epoch": 0.19555555555555557,
      "grad_norm": 0.40147385001182556,
      "learning_rate": 0.00016124721603563474,
      "loss": 0.0272,
      "step": 880
    },
    {
      "epoch": 0.19577777777777777,
      "grad_norm": 0.09291915595531464,
      "learning_rate": 0.00016120267260579066,
      "loss": 0.0201,
      "step": 881
    },
    {
      "epoch": 0.196,
      "grad_norm": 0.7130599021911621,
      "learning_rate": 0.00016115812917594656,
      "loss": 0.9825,
      "step": 882
    },
    {
      "epoch": 0.19622222222222221,
      "grad_norm": 0.6923748254776001,
      "learning_rate": 0.00016111358574610245,
      "loss": 0.6706,
      "step": 883
    },
    {
      "epoch": 0.19644444444444445,
      "grad_norm": 0.09329139441251755,
      "learning_rate": 0.00016106904231625837,
      "loss": 0.0186,
      "step": 884
    },
    {
      "epoch": 0.19666666666666666,
      "grad_norm": 0.6442102789878845,
      "learning_rate": 0.00016102449888641426,
      "loss": 0.6065,
      "step": 885
    },
    {
      "epoch": 0.1968888888888889,
      "grad_norm": 1.2954368591308594,
      "learning_rate": 0.00016097995545657018,
      "loss": 1.7283,
      "step": 886
    },
    {
      "epoch": 0.1971111111111111,
      "grad_norm": 0.8830384612083435,
      "learning_rate": 0.00016093541202672607,
      "loss": 0.8123,
      "step": 887
    },
    {
      "epoch": 0.19733333333333333,
      "grad_norm": 1.0833762884140015,
      "learning_rate": 0.00016089086859688196,
      "loss": 1.6151,
      "step": 888
    },
    {
      "epoch": 0.19755555555555557,
      "grad_norm": 1.110963225364685,
      "learning_rate": 0.00016084632516703785,
      "loss": 1.5561,
      "step": 889
    },
    {
      "epoch": 0.19777777777777777,
      "grad_norm": 0.9899112582206726,
      "learning_rate": 0.00016080178173719377,
      "loss": 1.2833,
      "step": 890
    },
    {
      "epoch": 0.198,
      "grad_norm": 1.1277340650558472,
      "learning_rate": 0.0001607572383073497,
      "loss": 1.8641,
      "step": 891
    },
    {
      "epoch": 0.19822222222222222,
      "grad_norm": 1.1691646575927734,
      "learning_rate": 0.00016071269487750558,
      "loss": 1.3711,
      "step": 892
    },
    {
      "epoch": 0.19844444444444445,
      "grad_norm": 1.13434636592865,
      "learning_rate": 0.00016066815144766147,
      "loss": 1.6223,
      "step": 893
    },
    {
      "epoch": 0.19866666666666666,
      "grad_norm": 0.7454355955123901,
      "learning_rate": 0.00016062360801781737,
      "loss": 0.6587,
      "step": 894
    },
    {
      "epoch": 0.1988888888888889,
      "grad_norm": 1.034938931465149,
      "learning_rate": 0.00016057906458797328,
      "loss": 1.203,
      "step": 895
    },
    {
      "epoch": 0.1991111111111111,
      "grad_norm": 1.118506908416748,
      "learning_rate": 0.0001605345211581292,
      "loss": 1.1357,
      "step": 896
    },
    {
      "epoch": 0.19933333333333333,
      "grad_norm": 0.6530042290687561,
      "learning_rate": 0.0001604899777282851,
      "loss": 0.5032,
      "step": 897
    },
    {
      "epoch": 0.19955555555555557,
      "grad_norm": 0.6382874846458435,
      "learning_rate": 0.000160445434298441,
      "loss": 0.475,
      "step": 898
    },
    {
      "epoch": 0.19977777777777778,
      "grad_norm": 1.1674174070358276,
      "learning_rate": 0.00016040089086859688,
      "loss": 0.9756,
      "step": 899
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1725280284881592,
      "learning_rate": 0.0001603563474387528,
      "loss": 0.7818,
      "step": 900
    },
    {
      "epoch": 0.2,
      "eval_loss": 1.222800612449646,
      "eval_runtime": 242.0579,
      "eval_samples_per_second": 4.131,
      "eval_steps_per_second": 4.131,
      "step": 900
    },
    {
      "epoch": 0.20022222222222222,
      "grad_norm": 0.5205540060997009,
      "learning_rate": 0.0001603118040089087,
      "loss": 0.9559,
      "step": 901
    },
    {
      "epoch": 0.20044444444444445,
      "grad_norm": 0.08372654020786285,
      "learning_rate": 0.0001602672605790646,
      "loss": 0.0132,
      "step": 902
    },
    {
      "epoch": 0.20066666666666666,
      "grad_norm": 0.07322391122579575,
      "learning_rate": 0.0001602227171492205,
      "loss": 0.0129,
      "step": 903
    },
    {
      "epoch": 0.2008888888888889,
      "grad_norm": 0.07906320691108704,
      "learning_rate": 0.0001601781737193764,
      "loss": 0.0128,
      "step": 904
    },
    {
      "epoch": 0.2011111111111111,
      "grad_norm": 0.546818196773529,
      "learning_rate": 0.0001601336302895323,
      "loss": 1.0331,
      "step": 905
    },
    {
      "epoch": 0.20133333333333334,
      "grad_norm": 0.8553550243377686,
      "learning_rate": 0.0001600890868596882,
      "loss": 1.9287,
      "step": 906
    },
    {
      "epoch": 0.20155555555555554,
      "grad_norm": 0.1751287281513214,
      "learning_rate": 0.0001600445434298441,
      "loss": 0.017,
      "step": 907
    },
    {
      "epoch": 0.20177777777777778,
      "grad_norm": 0.6139826774597168,
      "learning_rate": 0.00016,
      "loss": 1.0172,
      "step": 908
    },
    {
      "epoch": 0.202,
      "grad_norm": 0.8161941766738892,
      "learning_rate": 0.0001599554565701559,
      "loss": 2.0837,
      "step": 909
    },
    {
      "epoch": 0.20222222222222222,
      "grad_norm": 0.9680670499801636,
      "learning_rate": 0.00015991091314031182,
      "loss": 2.0895,
      "step": 910
    },
    {
      "epoch": 0.20244444444444445,
      "grad_norm": 1.0817011594772339,
      "learning_rate": 0.00015986636971046772,
      "loss": 2.2301,
      "step": 911
    },
    {
      "epoch": 0.20266666666666666,
      "grad_norm": 0.9138790369033813,
      "learning_rate": 0.0001598218262806236,
      "loss": 1.9302,
      "step": 912
    },
    {
      "epoch": 0.2028888888888889,
      "grad_norm": 0.8514297604560852,
      "learning_rate": 0.0001597772828507795,
      "loss": 0.9703,
      "step": 913
    },
    {
      "epoch": 0.2031111111111111,
      "grad_norm": 0.9511028528213501,
      "learning_rate": 0.00015973273942093542,
      "loss": 2.1424,
      "step": 914
    },
    {
      "epoch": 0.20333333333333334,
      "grad_norm": 0.8551965355873108,
      "learning_rate": 0.00015968819599109134,
      "loss": 1.825,
      "step": 915
    },
    {
      "epoch": 0.20355555555555555,
      "grad_norm": 0.8839070200920105,
      "learning_rate": 0.00015964365256124723,
      "loss": 2.013,
      "step": 916
    },
    {
      "epoch": 0.20377777777777778,
      "grad_norm": 0.9161024689674377,
      "learning_rate": 0.00015959910913140312,
      "loss": 1.9285,
      "step": 917
    },
    {
      "epoch": 0.204,
      "grad_norm": 0.8351966738700867,
      "learning_rate": 0.000159554565701559,
      "loss": 1.7185,
      "step": 918
    },
    {
      "epoch": 0.20422222222222222,
      "grad_norm": 0.9648137092590332,
      "learning_rate": 0.00015951002227171493,
      "loss": 2.0408,
      "step": 919
    },
    {
      "epoch": 0.20444444444444446,
      "grad_norm": 1.0050605535507202,
      "learning_rate": 0.00015946547884187085,
      "loss": 1.9834,
      "step": 920
    },
    {
      "epoch": 0.20466666666666666,
      "grad_norm": 0.5988847613334656,
      "learning_rate": 0.00015942093541202674,
      "loss": 0.8184,
      "step": 921
    },
    {
      "epoch": 0.2048888888888889,
      "grad_norm": 0.39480581879615784,
      "learning_rate": 0.00015937639198218263,
      "loss": 0.0215,
      "step": 922
    },
    {
      "epoch": 0.2051111111111111,
      "grad_norm": 0.08290968835353851,
      "learning_rate": 0.00015933184855233853,
      "loss": 0.0178,
      "step": 923
    },
    {
      "epoch": 0.20533333333333334,
      "grad_norm": 0.0785195529460907,
      "learning_rate": 0.00015928730512249444,
      "loss": 0.0172,
      "step": 924
    },
    {
      "epoch": 0.20555555555555555,
      "grad_norm": 0.901236891746521,
      "learning_rate": 0.00015924276169265034,
      "loss": 1.7259,
      "step": 925
    },
    {
      "epoch": 0.20577777777777778,
      "grad_norm": 1.0050063133239746,
      "learning_rate": 0.00015919821826280626,
      "loss": 1.8985,
      "step": 926
    },
    {
      "epoch": 0.206,
      "grad_norm": 1.1211670637130737,
      "learning_rate": 0.00015915367483296215,
      "loss": 2.0145,
      "step": 927
    },
    {
      "epoch": 0.20622222222222222,
      "grad_norm": 0.9888872504234314,
      "learning_rate": 0.00015910913140311804,
      "loss": 1.8903,
      "step": 928
    },
    {
      "epoch": 0.20644444444444446,
      "grad_norm": 1.1626695394515991,
      "learning_rate": 0.00015906458797327396,
      "loss": 1.8271,
      "step": 929
    },
    {
      "epoch": 0.20666666666666667,
      "grad_norm": 1.0316535234451294,
      "learning_rate": 0.00015902004454342985,
      "loss": 1.6792,
      "step": 930
    },
    {
      "epoch": 0.2068888888888889,
      "grad_norm": 0.9496293663978577,
      "learning_rate": 0.00015897550111358574,
      "loss": 1.825,
      "step": 931
    },
    {
      "epoch": 0.2071111111111111,
      "grad_norm": 0.29808974266052246,
      "learning_rate": 0.00015893095768374166,
      "loss": 0.0233,
      "step": 932
    },
    {
      "epoch": 0.20733333333333334,
      "grad_norm": 0.11366426944732666,
      "learning_rate": 0.00015888641425389755,
      "loss": 0.0209,
      "step": 933
    },
    {
      "epoch": 0.20755555555555555,
      "grad_norm": 1.003063678741455,
      "learning_rate": 0.00015884187082405347,
      "loss": 1.7181,
      "step": 934
    },
    {
      "epoch": 0.20777777777777778,
      "grad_norm": 1.1148592233657837,
      "learning_rate": 0.00015879732739420936,
      "loss": 2.0231,
      "step": 935
    },
    {
      "epoch": 0.208,
      "grad_norm": 0.08369893580675125,
      "learning_rate": 0.00015875278396436525,
      "loss": 0.0197,
      "step": 936
    },
    {
      "epoch": 0.20822222222222223,
      "grad_norm": 0.6577330827713013,
      "learning_rate": 0.00015870824053452115,
      "loss": 0.7512,
      "step": 937
    },
    {
      "epoch": 0.20844444444444443,
      "grad_norm": 0.9851068258285522,
      "learning_rate": 0.00015866369710467707,
      "loss": 0.724,
      "step": 938
    },
    {
      "epoch": 0.20866666666666667,
      "grad_norm": 0.9157700538635254,
      "learning_rate": 0.00015861915367483298,
      "loss": 1.6358,
      "step": 939
    },
    {
      "epoch": 0.2088888888888889,
      "grad_norm": 1.2965058088302612,
      "learning_rate": 0.00015857461024498888,
      "loss": 1.4643,
      "step": 940
    },
    {
      "epoch": 0.2091111111111111,
      "grad_norm": 1.1065465211868286,
      "learning_rate": 0.00015853006681514477,
      "loss": 1.4903,
      "step": 941
    },
    {
      "epoch": 0.20933333333333334,
      "grad_norm": 1.0369428396224976,
      "learning_rate": 0.00015848552338530066,
      "loss": 1.4679,
      "step": 942
    },
    {
      "epoch": 0.20955555555555555,
      "grad_norm": 1.2130650281906128,
      "learning_rate": 0.00015844097995545658,
      "loss": 1.4029,
      "step": 943
    },
    {
      "epoch": 0.20977777777777779,
      "grad_norm": 0.7914889454841614,
      "learning_rate": 0.0001583964365256125,
      "loss": 0.7555,
      "step": 944
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.36863523721694946,
      "learning_rate": 0.0001583518930957684,
      "loss": 0.0382,
      "step": 945
    },
    {
      "epoch": 0.21022222222222223,
      "grad_norm": 0.6304244995117188,
      "learning_rate": 0.00015830734966592428,
      "loss": 0.6299,
      "step": 946
    },
    {
      "epoch": 0.21044444444444443,
      "grad_norm": 1.0273418426513672,
      "learning_rate": 0.00015826280623608017,
      "loss": 1.5365,
      "step": 947
    },
    {
      "epoch": 0.21066666666666667,
      "grad_norm": 0.9680097103118896,
      "learning_rate": 0.0001582182628062361,
      "loss": 1.1998,
      "step": 948
    },
    {
      "epoch": 0.21088888888888888,
      "grad_norm": 0.338010311126709,
      "learning_rate": 0.00015817371937639198,
      "loss": 0.0447,
      "step": 949
    },
    {
      "epoch": 0.2111111111111111,
      "grad_norm": 1.1934006214141846,
      "learning_rate": 0.0001581291759465479,
      "loss": 1.1876,
      "step": 950
    },
    {
      "epoch": 0.21133333333333335,
      "grad_norm": 0.6406352519989014,
      "learning_rate": 0.0001580846325167038,
      "loss": 1.2899,
      "step": 951
    },
    {
      "epoch": 0.21155555555555555,
      "grad_norm": 0.06388068944215775,
      "learning_rate": 0.00015804008908685969,
      "loss": 0.0118,
      "step": 952
    },
    {
      "epoch": 0.2117777777777778,
      "grad_norm": 0.5354200005531311,
      "learning_rate": 0.0001579955456570156,
      "loss": 1.1527,
      "step": 953
    },
    {
      "epoch": 0.212,
      "grad_norm": 0.7551606893539429,
      "learning_rate": 0.0001579510022271715,
      "loss": 2.0341,
      "step": 954
    },
    {
      "epoch": 0.21222222222222223,
      "grad_norm": 0.5737783908843994,
      "learning_rate": 0.0001579064587973274,
      "loss": 1.0804,
      "step": 955
    },
    {
      "epoch": 0.21244444444444444,
      "grad_norm": 0.8123324513435364,
      "learning_rate": 0.00015786191536748328,
      "loss": 2.1932,
      "step": 956
    },
    {
      "epoch": 0.21266666666666667,
      "grad_norm": 0.6195405721664429,
      "learning_rate": 0.00015781737193763923,
      "loss": 1.1485,
      "step": 957
    },
    {
      "epoch": 0.21288888888888888,
      "grad_norm": 0.08890359103679657,
      "learning_rate": 0.00015777282850779512,
      "loss": 0.0144,
      "step": 958
    },
    {
      "epoch": 0.2131111111111111,
      "grad_norm": 0.5360710024833679,
      "learning_rate": 0.000157728285077951,
      "loss": 1.2024,
      "step": 959
    },
    {
      "epoch": 0.21333333333333335,
      "grad_norm": 0.8111135959625244,
      "learning_rate": 0.0001576837416481069,
      "loss": 2.0705,
      "step": 960
    },
    {
      "epoch": 0.21355555555555555,
      "grad_norm": 0.8970634937286377,
      "learning_rate": 0.0001576391982182628,
      "loss": 1.9576,
      "step": 961
    },
    {
      "epoch": 0.2137777777777778,
      "grad_norm": 0.9263898730278015,
      "learning_rate": 0.0001575946547884187,
      "loss": 2.3869,
      "step": 962
    },
    {
      "epoch": 0.214,
      "grad_norm": 0.9336531758308411,
      "learning_rate": 0.00015755011135857463,
      "loss": 2.125,
      "step": 963
    },
    {
      "epoch": 0.21422222222222223,
      "grad_norm": 0.7852495908737183,
      "learning_rate": 0.00015750556792873052,
      "loss": 0.9513,
      "step": 964
    },
    {
      "epoch": 0.21444444444444444,
      "grad_norm": 0.17854809761047363,
      "learning_rate": 0.00015746102449888642,
      "loss": 0.0245,
      "step": 965
    },
    {
      "epoch": 0.21466666666666667,
      "grad_norm": 0.9751142263412476,
      "learning_rate": 0.00015741648106904233,
      "loss": 1.6707,
      "step": 966
    },
    {
      "epoch": 0.21488888888888888,
      "grad_norm": 0.8332890272140503,
      "learning_rate": 0.00015737193763919823,
      "loss": 1.9127,
      "step": 967
    },
    {
      "epoch": 0.21511111111111111,
      "grad_norm": 0.9040923118591309,
      "learning_rate": 0.00015732739420935412,
      "loss": 2.0763,
      "step": 968
    },
    {
      "epoch": 0.21533333333333332,
      "grad_norm": 1.0410791635513306,
      "learning_rate": 0.00015728285077951004,
      "loss": 2.3181,
      "step": 969
    },
    {
      "epoch": 0.21555555555555556,
      "grad_norm": 0.9526614546775818,
      "learning_rate": 0.00015723830734966593,
      "loss": 1.8509,
      "step": 970
    },
    {
      "epoch": 0.2157777777777778,
      "grad_norm": 0.07503191381692886,
      "learning_rate": 0.00015719376391982185,
      "loss": 0.0165,
      "step": 971
    },
    {
      "epoch": 0.216,
      "grad_norm": 0.07039511948823929,
      "learning_rate": 0.00015714922048997774,
      "loss": 0.0168,
      "step": 972
    },
    {
      "epoch": 0.21622222222222223,
      "grad_norm": 0.6502916216850281,
      "learning_rate": 0.00015710467706013363,
      "loss": 0.9638,
      "step": 973
    },
    {
      "epoch": 0.21644444444444444,
      "grad_norm": 0.8625080585479736,
      "learning_rate": 0.00015706013363028952,
      "loss": 1.9742,
      "step": 974
    },
    {
      "epoch": 0.21666666666666667,
      "grad_norm": 0.9066895842552185,
      "learning_rate": 0.00015701559020044544,
      "loss": 1.6882,
      "step": 975
    },
    {
      "epoch": 0.21688888888888888,
      "grad_norm": 0.8513925075531006,
      "learning_rate": 0.00015697104677060136,
      "loss": 1.7857,
      "step": 976
    },
    {
      "epoch": 0.21711111111111112,
      "grad_norm": 0.9731569886207581,
      "learning_rate": 0.00015692650334075725,
      "loss": 1.6924,
      "step": 977
    },
    {
      "epoch": 0.21733333333333332,
      "grad_norm": 1.0422254800796509,
      "learning_rate": 0.00015688195991091314,
      "loss": 2.0664,
      "step": 978
    },
    {
      "epoch": 0.21755555555555556,
      "grad_norm": 0.9497055411338806,
      "learning_rate": 0.00015683741648106904,
      "loss": 1.7705,
      "step": 979
    },
    {
      "epoch": 0.21777777777777776,
      "grad_norm": 1.1931400299072266,
      "learning_rate": 0.00015679287305122495,
      "loss": 1.861,
      "step": 980
    },
    {
      "epoch": 0.218,
      "grad_norm": 0.40133345127105713,
      "learning_rate": 0.00015674832962138087,
      "loss": 0.0212,
      "step": 981
    },
    {
      "epoch": 0.21822222222222223,
      "grad_norm": 0.0913856029510498,
      "learning_rate": 0.00015670378619153677,
      "loss": 0.0189,
      "step": 982
    },
    {
      "epoch": 0.21844444444444444,
      "grad_norm": 0.08609500527381897,
      "learning_rate": 0.00015665924276169266,
      "loss": 0.0203,
      "step": 983
    },
    {
      "epoch": 0.21866666666666668,
      "grad_norm": 0.11373770982027054,
      "learning_rate": 0.00015661469933184855,
      "loss": 0.0218,
      "step": 984
    },
    {
      "epoch": 0.21888888888888888,
      "grad_norm": 0.09455129504203796,
      "learning_rate": 0.00015657015590200447,
      "loss": 0.0197,
      "step": 985
    },
    {
      "epoch": 0.21911111111111112,
      "grad_norm": 0.11562435328960419,
      "learning_rate": 0.00015652561247216036,
      "loss": 0.0198,
      "step": 986
    },
    {
      "epoch": 0.21933333333333332,
      "grad_norm": 0.07547328621149063,
      "learning_rate": 0.00015648106904231628,
      "loss": 0.0189,
      "step": 987
    },
    {
      "epoch": 0.21955555555555556,
      "grad_norm": 1.1698229312896729,
      "learning_rate": 0.00015643652561247217,
      "loss": 1.5906,
      "step": 988
    },
    {
      "epoch": 0.21977777777777777,
      "grad_norm": 0.8595744371414185,
      "learning_rate": 0.00015639198218262806,
      "loss": 0.7687,
      "step": 989
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0414891242980957,
      "learning_rate": 0.00015634743875278398,
      "loss": 1.5007,
      "step": 990
    },
    {
      "epoch": 0.22022222222222224,
      "grad_norm": 1.2998589277267456,
      "learning_rate": 0.00015630289532293987,
      "loss": 1.5281,
      "step": 991
    },
    {
      "epoch": 0.22044444444444444,
      "grad_norm": 1.1212791204452515,
      "learning_rate": 0.00015625835189309577,
      "loss": 1.6504,
      "step": 992
    },
    {
      "epoch": 0.22066666666666668,
      "grad_norm": 1.2405270338058472,
      "learning_rate": 0.00015621380846325168,
      "loss": 1.4231,
      "step": 993
    },
    {
      "epoch": 0.22088888888888888,
      "grad_norm": 0.7674121856689453,
      "learning_rate": 0.00015616926503340758,
      "loss": 0.709,
      "step": 994
    },
    {
      "epoch": 0.22111111111111112,
      "grad_norm": 0.19164682924747467,
      "learning_rate": 0.0001561247216035635,
      "loss": 0.0275,
      "step": 995
    },
    {
      "epoch": 0.22133333333333333,
      "grad_norm": 1.1732245683670044,
      "learning_rate": 0.0001560801781737194,
      "loss": 1.419,
      "step": 996
    },
    {
      "epoch": 0.22155555555555556,
      "grad_norm": 1.1951122283935547,
      "learning_rate": 0.00015603563474387528,
      "loss": 1.0835,
      "step": 997
    },
    {
      "epoch": 0.22177777777777777,
      "grad_norm": 0.1803571879863739,
      "learning_rate": 0.00015599109131403117,
      "loss": 0.0346,
      "step": 998
    },
    {
      "epoch": 0.222,
      "grad_norm": 0.9627526998519897,
      "learning_rate": 0.0001559465478841871,
      "loss": 1.0206,
      "step": 999
    },
    {
      "epoch": 0.2222222222222222,
      "grad_norm": 1.1441150903701782,
      "learning_rate": 0.000155902004454343,
      "loss": 0.8732,
      "step": 1000
    },
    {
      "epoch": 0.22244444444444444,
      "grad_norm": 0.6593534350395203,
      "learning_rate": 0.0001558574610244989,
      "loss": 1.1676,
      "step": 1001
    },
    {
      "epoch": 0.22266666666666668,
      "grad_norm": 0.664578914642334,
      "learning_rate": 0.0001558129175946548,
      "loss": 1.1948,
      "step": 1002
    },
    {
      "epoch": 0.2228888888888889,
      "grad_norm": 0.0607873909175396,
      "learning_rate": 0.00015576837416481068,
      "loss": 0.0118,
      "step": 1003
    },
    {
      "epoch": 0.22311111111111112,
      "grad_norm": 1.1000303030014038,
      "learning_rate": 0.0001557238307349666,
      "loss": 2.4529,
      "step": 1004
    },
    {
      "epoch": 0.22333333333333333,
      "grad_norm": 0.6257951259613037,
      "learning_rate": 0.00015567928730512252,
      "loss": 1.1959,
      "step": 1005
    },
    {
      "epoch": 0.22355555555555556,
      "grad_norm": 0.540494978427887,
      "learning_rate": 0.0001556347438752784,
      "loss": 1.1178,
      "step": 1006
    },
    {
      "epoch": 0.22377777777777777,
      "grad_norm": 0.8437963128089905,
      "learning_rate": 0.0001555902004454343,
      "loss": 2.0957,
      "step": 1007
    },
    {
      "epoch": 0.224,
      "grad_norm": 0.8972681760787964,
      "learning_rate": 0.0001555456570155902,
      "loss": 2.3074,
      "step": 1008
    },
    {
      "epoch": 0.2242222222222222,
      "grad_norm": 0.837619960308075,
      "learning_rate": 0.00015550111358574612,
      "loss": 1.8041,
      "step": 1009
    },
    {
      "epoch": 0.22444444444444445,
      "grad_norm": 0.6094018816947937,
      "learning_rate": 0.000155456570155902,
      "loss": 1.1169,
      "step": 1010
    },
    {
      "epoch": 0.22466666666666665,
      "grad_norm": 1.000135064125061,
      "learning_rate": 0.00015541202672605793,
      "loss": 2.1065,
      "step": 1011
    },
    {
      "epoch": 0.2248888888888889,
      "grad_norm": 0.8690905570983887,
      "learning_rate": 0.00015536748329621382,
      "loss": 2.1307,
      "step": 1012
    },
    {
      "epoch": 0.22511111111111112,
      "grad_norm": 0.7830039858818054,
      "learning_rate": 0.0001553229398663697,
      "loss": 2.0187,
      "step": 1013
    },
    {
      "epoch": 0.22533333333333333,
      "grad_norm": 0.9192244410514832,
      "learning_rate": 0.00015527839643652563,
      "loss": 2.1958,
      "step": 1014
    },
    {
      "epoch": 0.22555555555555556,
      "grad_norm": 1.086327314376831,
      "learning_rate": 0.00015523385300668152,
      "loss": 2.2896,
      "step": 1015
    },
    {
      "epoch": 0.22577777777777777,
      "grad_norm": 0.6661816239356995,
      "learning_rate": 0.0001551893095768374,
      "loss": 1.0338,
      "step": 1016
    },
    {
      "epoch": 0.226,
      "grad_norm": 0.1229943260550499,
      "learning_rate": 0.00015514476614699333,
      "loss": 0.019,
      "step": 1017
    },
    {
      "epoch": 0.2262222222222222,
      "grad_norm": 0.6619732975959778,
      "learning_rate": 0.00015510022271714922,
      "loss": 0.8572,
      "step": 1018
    },
    {
      "epoch": 0.22644444444444445,
      "grad_norm": 0.9510409832000732,
      "learning_rate": 0.00015505567928730514,
      "loss": 1.8705,
      "step": 1019
    },
    {
      "epoch": 0.22666666666666666,
      "grad_norm": 0.9156755805015564,
      "learning_rate": 0.00015501113585746103,
      "loss": 1.7297,
      "step": 1020
    },
    {
      "epoch": 0.2268888888888889,
      "grad_norm": 0.8265693187713623,
      "learning_rate": 0.00015496659242761693,
      "loss": 1.8998,
      "step": 1021
    },
    {
      "epoch": 0.22711111111111112,
      "grad_norm": 0.9348383545875549,
      "learning_rate": 0.00015492204899777282,
      "loss": 1.7829,
      "step": 1022
    },
    {
      "epoch": 0.22733333333333333,
      "grad_norm": 0.07994972914457321,
      "learning_rate": 0.00015487750556792874,
      "loss": 0.0171,
      "step": 1023
    },
    {
      "epoch": 0.22755555555555557,
      "grad_norm": 0.9954615831375122,
      "learning_rate": 0.00015483296213808466,
      "loss": 2.0246,
      "step": 1024
    },
    {
      "epoch": 0.22777777777777777,
      "grad_norm": 0.811485230922699,
      "learning_rate": 0.00015478841870824055,
      "loss": 0.9906,
      "step": 1025
    },
    {
      "epoch": 0.228,
      "grad_norm": 0.9506434202194214,
      "learning_rate": 0.00015474387527839644,
      "loss": 1.8778,
      "step": 1026
    },
    {
      "epoch": 0.22822222222222222,
      "grad_norm": 1.0921201705932617,
      "learning_rate": 0.00015469933184855233,
      "loss": 1.7785,
      "step": 1027
    },
    {
      "epoch": 0.22844444444444445,
      "grad_norm": 1.0791798830032349,
      "learning_rate": 0.00015465478841870825,
      "loss": 1.7389,
      "step": 1028
    },
    {
      "epoch": 0.22866666666666666,
      "grad_norm": 0.8696101307868958,
      "learning_rate": 0.00015461024498886414,
      "loss": 1.6503,
      "step": 1029
    },
    {
      "epoch": 0.2288888888888889,
      "grad_norm": 1.0953142642974854,
      "learning_rate": 0.00015456570155902006,
      "loss": 1.5829,
      "step": 1030
    },
    {
      "epoch": 0.2291111111111111,
      "grad_norm": 1.0775700807571411,
      "learning_rate": 0.00015452115812917595,
      "loss": 1.7792,
      "step": 1031
    },
    {
      "epoch": 0.22933333333333333,
      "grad_norm": 1.0957459211349487,
      "learning_rate": 0.00015447661469933184,
      "loss": 1.6229,
      "step": 1032
    },
    {
      "epoch": 0.22955555555555557,
      "grad_norm": 0.9933871030807495,
      "learning_rate": 0.00015443207126948776,
      "loss": 1.9686,
      "step": 1033
    },
    {
      "epoch": 0.22977777777777778,
      "grad_norm": 0.6201068758964539,
      "learning_rate": 0.00015438752783964365,
      "loss": 0.9276,
      "step": 1034
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.08361168205738068,
      "learning_rate": 0.00015434298440979955,
      "loss": 0.0183,
      "step": 1035
    },
    {
      "epoch": 0.23022222222222222,
      "grad_norm": 0.0638362318277359,
      "learning_rate": 0.00015429844097995547,
      "loss": 0.0174,
      "step": 1036
    },
    {
      "epoch": 0.23044444444444445,
      "grad_norm": 0.6617216467857361,
      "learning_rate": 0.00015425389755011136,
      "loss": 0.9611,
      "step": 1037
    },
    {
      "epoch": 0.23066666666666666,
      "grad_norm": 0.6723819375038147,
      "learning_rate": 0.00015420935412026728,
      "loss": 0.7742,
      "step": 1038
    },
    {
      "epoch": 0.2308888888888889,
      "grad_norm": 1.1894159317016602,
      "learning_rate": 0.00015416481069042317,
      "loss": 1.9163,
      "step": 1039
    },
    {
      "epoch": 0.2311111111111111,
      "grad_norm": 0.8032247424125671,
      "learning_rate": 0.00015412026726057906,
      "loss": 0.9474,
      "step": 1040
    },
    {
      "epoch": 0.23133333333333334,
      "grad_norm": 0.6486397981643677,
      "learning_rate": 0.00015407572383073495,
      "loss": 0.8002,
      "step": 1041
    },
    {
      "epoch": 0.23155555555555554,
      "grad_norm": 0.9920786619186401,
      "learning_rate": 0.0001540311804008909,
      "loss": 1.2296,
      "step": 1042
    },
    {
      "epoch": 0.23177777777777778,
      "grad_norm": 1.0267969369888306,
      "learning_rate": 0.0001539866369710468,
      "loss": 1.479,
      "step": 1043
    },
    {
      "epoch": 0.232,
      "grad_norm": 1.1731008291244507,
      "learning_rate": 0.00015394209354120268,
      "loss": 1.6044,
      "step": 1044
    },
    {
      "epoch": 0.23222222222222222,
      "grad_norm": 0.9902610182762146,
      "learning_rate": 0.00015389755011135857,
      "loss": 1.4472,
      "step": 1045
    },
    {
      "epoch": 0.23244444444444445,
      "grad_norm": 1.1087284088134766,
      "learning_rate": 0.00015385300668151446,
      "loss": 1.2586,
      "step": 1046
    },
    {
      "epoch": 0.23266666666666666,
      "grad_norm": 0.7335534691810608,
      "learning_rate": 0.00015380846325167038,
      "loss": 0.7099,
      "step": 1047
    },
    {
      "epoch": 0.2328888888888889,
      "grad_norm": 1.1326829195022583,
      "learning_rate": 0.0001537639198218263,
      "loss": 1.0497,
      "step": 1048
    },
    {
      "epoch": 0.2331111111111111,
      "grad_norm": 0.7942183613777161,
      "learning_rate": 0.0001537193763919822,
      "loss": 0.6312,
      "step": 1049
    },
    {
      "epoch": 0.23333333333333334,
      "grad_norm": 1.005306601524353,
      "learning_rate": 0.00015367483296213809,
      "loss": 0.7768,
      "step": 1050
    },
    {
      "epoch": 0.23355555555555554,
      "grad_norm": 0.9126147031784058,
      "learning_rate": 0.000153630289532294,
      "loss": 2.3264,
      "step": 1051
    },
    {
      "epoch": 0.23377777777777778,
      "grad_norm": 0.6329047083854675,
      "learning_rate": 0.0001535857461024499,
      "loss": 1.0818,
      "step": 1052
    },
    {
      "epoch": 0.234,
      "grad_norm": 0.05343164876103401,
      "learning_rate": 0.0001535412026726058,
      "loss": 0.0112,
      "step": 1053
    },
    {
      "epoch": 0.23422222222222222,
      "grad_norm": 0.9046483039855957,
      "learning_rate": 0.0001534966592427617,
      "loss": 2.37,
      "step": 1054
    },
    {
      "epoch": 0.23444444444444446,
      "grad_norm": 0.8647105693817139,
      "learning_rate": 0.0001534521158129176,
      "loss": 2.4517,
      "step": 1055
    },
    {
      "epoch": 0.23466666666666666,
      "grad_norm": 0.648027241230011,
      "learning_rate": 0.00015340757238307352,
      "loss": 1.0719,
      "step": 1056
    },
    {
      "epoch": 0.2348888888888889,
      "grad_norm": 0.5614693760871887,
      "learning_rate": 0.0001533630289532294,
      "loss": 1.1226,
      "step": 1057
    },
    {
      "epoch": 0.2351111111111111,
      "grad_norm": 0.8491237163543701,
      "learning_rate": 0.0001533184855233853,
      "loss": 2.069,
      "step": 1058
    },
    {
      "epoch": 0.23533333333333334,
      "grad_norm": 0.6382585167884827,
      "learning_rate": 0.0001532739420935412,
      "loss": 1.3122,
      "step": 1059
    },
    {
      "epoch": 0.23555555555555555,
      "grad_norm": 0.12528251111507416,
      "learning_rate": 0.0001532293986636971,
      "loss": 0.0166,
      "step": 1060
    },
    {
      "epoch": 0.23577777777777778,
      "grad_norm": 0.09820661693811417,
      "learning_rate": 0.00015318485523385303,
      "loss": 0.0157,
      "step": 1061
    },
    {
      "epoch": 0.236,
      "grad_norm": 0.537966787815094,
      "learning_rate": 0.00015314031180400892,
      "loss": 0.887,
      "step": 1062
    },
    {
      "epoch": 0.23622222222222222,
      "grad_norm": 0.869125485420227,
      "learning_rate": 0.00015309576837416482,
      "loss": 2.243,
      "step": 1063
    },
    {
      "epoch": 0.23644444444444446,
      "grad_norm": 0.9121571779251099,
      "learning_rate": 0.0001530512249443207,
      "loss": 2.0393,
      "step": 1064
    },
    {
      "epoch": 0.23666666666666666,
      "grad_norm": 0.8999320864677429,
      "learning_rate": 0.00015300668151447663,
      "loss": 2.0973,
      "step": 1065
    },
    {
      "epoch": 0.2368888888888889,
      "grad_norm": 1.0168380737304688,
      "learning_rate": 0.00015296213808463254,
      "loss": 1.9858,
      "step": 1066
    },
    {
      "epoch": 0.2371111111111111,
      "grad_norm": 0.10989035665988922,
      "learning_rate": 0.00015291759465478844,
      "loss": 0.0164,
      "step": 1067
    },
    {
      "epoch": 0.23733333333333334,
      "grad_norm": 0.09613081812858582,
      "learning_rate": 0.00015287305122494433,
      "loss": 0.0164,
      "step": 1068
    },
    {
      "epoch": 0.23755555555555555,
      "grad_norm": 1.0540229082107544,
      "learning_rate": 0.00015282850779510022,
      "loss": 2.1282,
      "step": 1069
    },
    {
      "epoch": 0.23777777777777778,
      "grad_norm": 0.6315290927886963,
      "learning_rate": 0.00015278396436525614,
      "loss": 0.9098,
      "step": 1070
    },
    {
      "epoch": 0.238,
      "grad_norm": 0.08182475715875626,
      "learning_rate": 0.00015273942093541203,
      "loss": 0.018,
      "step": 1071
    },
    {
      "epoch": 0.23822222222222222,
      "grad_norm": 0.07903318852186203,
      "learning_rate": 0.00015269487750556795,
      "loss": 0.0176,
      "step": 1072
    },
    {
      "epoch": 0.23844444444444443,
      "grad_norm": 0.07375822216272354,
      "learning_rate": 0.00015265033407572384,
      "loss": 0.017,
      "step": 1073
    },
    {
      "epoch": 0.23866666666666667,
      "grad_norm": 1.087067723274231,
      "learning_rate": 0.00015260579064587973,
      "loss": 1.0629,
      "step": 1074
    },
    {
      "epoch": 0.2388888888888889,
      "grad_norm": 0.9791780114173889,
      "learning_rate": 0.00015256124721603565,
      "loss": 1.8542,
      "step": 1075
    },
    {
      "epoch": 0.2391111111111111,
      "grad_norm": 0.920712411403656,
      "learning_rate": 0.00015251670378619154,
      "loss": 1.8871,
      "step": 1076
    },
    {
      "epoch": 0.23933333333333334,
      "grad_norm": 0.9495246410369873,
      "learning_rate": 0.00015247216035634744,
      "loss": 1.798,
      "step": 1077
    },
    {
      "epoch": 0.23955555555555555,
      "grad_norm": 0.927691638469696,
      "learning_rate": 0.00015242761692650335,
      "loss": 1.5861,
      "step": 1078
    },
    {
      "epoch": 0.23977777777777778,
      "grad_norm": 0.9088736176490784,
      "learning_rate": 0.00015238307349665925,
      "loss": 1.4221,
      "step": 1079
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7401149868965149,
      "learning_rate": 0.00015233853006681517,
      "loss": 0.9053,
      "step": 1080
    },
    {
      "epoch": 0.24022222222222223,
      "grad_norm": 0.5902130007743835,
      "learning_rate": 0.00015229398663697106,
      "loss": 0.5401,
      "step": 1081
    },
    {
      "epoch": 0.24044444444444443,
      "grad_norm": 0.06568460166454315,
      "learning_rate": 0.00015224944320712695,
      "loss": 0.0187,
      "step": 1082
    },
    {
      "epoch": 0.24066666666666667,
      "grad_norm": 0.08122013509273529,
      "learning_rate": 0.00015220489977728284,
      "loss": 0.0183,
      "step": 1083
    },
    {
      "epoch": 0.2408888888888889,
      "grad_norm": 1.0280604362487793,
      "learning_rate": 0.00015216035634743876,
      "loss": 1.7743,
      "step": 1084
    },
    {
      "epoch": 0.2411111111111111,
      "grad_norm": 1.0982590913772583,
      "learning_rate": 0.00015211581291759468,
      "loss": 1.8326,
      "step": 1085
    },
    {
      "epoch": 0.24133333333333334,
      "grad_norm": 0.109102763235569,
      "learning_rate": 0.00015207126948775057,
      "loss": 0.0195,
      "step": 1086
    },
    {
      "epoch": 0.24155555555555555,
      "grad_norm": 0.1620292216539383,
      "learning_rate": 0.00015202672605790646,
      "loss": 0.0194,
      "step": 1087
    },
    {
      "epoch": 0.24177777777777779,
      "grad_norm": 0.11929841339588165,
      "learning_rate": 0.00015198218262806235,
      "loss": 0.0187,
      "step": 1088
    },
    {
      "epoch": 0.242,
      "grad_norm": 1.131895661354065,
      "learning_rate": 0.00015193763919821827,
      "loss": 1.6243,
      "step": 1089
    },
    {
      "epoch": 0.24222222222222223,
      "grad_norm": 0.714919924736023,
      "learning_rate": 0.0001518930957683742,
      "loss": 0.8017,
      "step": 1090
    },
    {
      "epoch": 0.24244444444444443,
      "grad_norm": 0.9816338419914246,
      "learning_rate": 0.00015184855233853008,
      "loss": 1.386,
      "step": 1091
    },
    {
      "epoch": 0.24266666666666667,
      "grad_norm": 1.0071079730987549,
      "learning_rate": 0.00015180400890868598,
      "loss": 1.5263,
      "step": 1092
    },
    {
      "epoch": 0.24288888888888888,
      "grad_norm": 1.1087195873260498,
      "learning_rate": 0.00015175946547884187,
      "loss": 1.7932,
      "step": 1093
    },
    {
      "epoch": 0.2431111111111111,
      "grad_norm": 0.9575614333152771,
      "learning_rate": 0.00015171492204899779,
      "loss": 1.5521,
      "step": 1094
    },
    {
      "epoch": 0.24333333333333335,
      "grad_norm": 0.7373946309089661,
      "learning_rate": 0.00015167037861915368,
      "loss": 0.7751,
      "step": 1095
    },
    {
      "epoch": 0.24355555555555555,
      "grad_norm": 1.1326210498809814,
      "learning_rate": 0.0001516258351893096,
      "loss": 1.3552,
      "step": 1096
    },
    {
      "epoch": 0.2437777777777778,
      "grad_norm": 1.0046254396438599,
      "learning_rate": 0.0001515812917594655,
      "loss": 1.3028,
      "step": 1097
    },
    {
      "epoch": 0.244,
      "grad_norm": 1.095888376235962,
      "learning_rate": 0.00015153674832962138,
      "loss": 1.2255,
      "step": 1098
    },
    {
      "epoch": 0.24422222222222223,
      "grad_norm": 0.9542917609214783,
      "learning_rate": 0.0001514922048997773,
      "loss": 0.6265,
      "step": 1099
    },
    {
      "epoch": 0.24444444444444444,
      "grad_norm": 0.17893174290657043,
      "learning_rate": 0.0001514476614699332,
      "loss": 0.037,
      "step": 1100
    },
    {
      "epoch": 0.24466666666666667,
      "grad_norm": 0.8549418449401855,
      "learning_rate": 0.00015140311804008908,
      "loss": 2.1731,
      "step": 1101
    },
    {
      "epoch": 0.24488888888888888,
      "grad_norm": 0.7608986496925354,
      "learning_rate": 0.00015135857461024498,
      "loss": 1.9913,
      "step": 1102
    },
    {
      "epoch": 0.2451111111111111,
      "grad_norm": 0.9019181728363037,
      "learning_rate": 0.0001513140311804009,
      "loss": 2.2666,
      "step": 1103
    },
    {
      "epoch": 0.24533333333333332,
      "grad_norm": 0.07070691883563995,
      "learning_rate": 0.0001512694877505568,
      "loss": 0.0127,
      "step": 1104
    },
    {
      "epoch": 0.24555555555555555,
      "grad_norm": 0.07447178661823273,
      "learning_rate": 0.0001512249443207127,
      "loss": 0.0126,
      "step": 1105
    },
    {
      "epoch": 0.2457777777777778,
      "grad_norm": 0.07123079895973206,
      "learning_rate": 0.0001511804008908686,
      "loss": 0.0125,
      "step": 1106
    },
    {
      "epoch": 0.246,
      "grad_norm": 0.06680039316415787,
      "learning_rate": 0.0001511358574610245,
      "loss": 0.012,
      "step": 1107
    },
    {
      "epoch": 0.24622222222222223,
      "grad_norm": 0.8252882361412048,
      "learning_rate": 0.0001510913140311804,
      "loss": 1.9517,
      "step": 1108
    },
    {
      "epoch": 0.24644444444444444,
      "grad_norm": 0.8782687783241272,
      "learning_rate": 0.00015104677060133633,
      "loss": 2.119,
      "step": 1109
    },
    {
      "epoch": 0.24666666666666667,
      "grad_norm": 0.9111925363540649,
      "learning_rate": 0.00015100222717149222,
      "loss": 2.3125,
      "step": 1110
    },
    {
      "epoch": 0.24688888888888888,
      "grad_norm": 0.9598534107208252,
      "learning_rate": 0.0001509576837416481,
      "loss": 1.9542,
      "step": 1111
    },
    {
      "epoch": 0.24711111111111111,
      "grad_norm": 0.9928382635116577,
      "learning_rate": 0.000150913140311804,
      "loss": 2.1632,
      "step": 1112
    },
    {
      "epoch": 0.24733333333333332,
      "grad_norm": 0.8307510614395142,
      "learning_rate": 0.00015086859688195992,
      "loss": 2.0449,
      "step": 1113
    },
    {
      "epoch": 0.24755555555555556,
      "grad_norm": 0.8130167722702026,
      "learning_rate": 0.0001508240534521158,
      "loss": 1.6968,
      "step": 1114
    },
    {
      "epoch": 0.2477777777777778,
      "grad_norm": 0.9309992790222168,
      "learning_rate": 0.00015077951002227173,
      "loss": 2.1483,
      "step": 1115
    },
    {
      "epoch": 0.248,
      "grad_norm": 0.7260227203369141,
      "learning_rate": 0.00015073496659242762,
      "loss": 1.0392,
      "step": 1116
    },
    {
      "epoch": 0.24822222222222223,
      "grad_norm": 0.6399407386779785,
      "learning_rate": 0.00015069042316258351,
      "loss": 1.005,
      "step": 1117
    },
    {
      "epoch": 0.24844444444444444,
      "grad_norm": 0.9045966863632202,
      "learning_rate": 0.00015064587973273943,
      "loss": 1.9107,
      "step": 1118
    },
    {
      "epoch": 0.24866666666666667,
      "grad_norm": 0.9134828448295593,
      "learning_rate": 0.00015060133630289533,
      "loss": 1.8531,
      "step": 1119
    },
    {
      "epoch": 0.24888888888888888,
      "grad_norm": 0.8679183721542358,
      "learning_rate": 0.00015055679287305122,
      "loss": 1.8696,
      "step": 1120
    },
    {
      "epoch": 0.24911111111111112,
      "grad_norm": 0.9295245409011841,
      "learning_rate": 0.00015051224944320714,
      "loss": 1.8919,
      "step": 1121
    },
    {
      "epoch": 0.24933333333333332,
      "grad_norm": 0.7018031477928162,
      "learning_rate": 0.00015046770601336303,
      "loss": 0.8568,
      "step": 1122
    },
    {
      "epoch": 0.24955555555555556,
      "grad_norm": 0.9831565618515015,
      "learning_rate": 0.00015042316258351895,
      "loss": 1.9167,
      "step": 1123
    },
    {
      "epoch": 0.24977777777777777,
      "grad_norm": 0.788474977016449,
      "learning_rate": 0.00015037861915367484,
      "loss": 1.0294,
      "step": 1124
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9335272908210754,
      "learning_rate": 0.00015033407572383073,
      "loss": 1.7164,
      "step": 1125
    },
    {
      "epoch": 0.25022222222222223,
      "grad_norm": 1.0208749771118164,
      "learning_rate": 0.00015028953229398662,
      "loss": 1.7482,
      "step": 1126
    },
    {
      "epoch": 0.25044444444444447,
      "grad_norm": 1.1490461826324463,
      "learning_rate": 0.00015024498886414257,
      "loss": 2.2458,
      "step": 1127
    },
    {
      "epoch": 0.25066666666666665,
      "grad_norm": 0.07004405558109283,
      "learning_rate": 0.00015020044543429846,
      "loss": 0.018,
      "step": 1128
    },
    {
      "epoch": 0.2508888888888889,
      "grad_norm": 0.07559281587600708,
      "learning_rate": 0.00015015590200445435,
      "loss": 0.0176,
      "step": 1129
    },
    {
      "epoch": 0.2511111111111111,
      "grad_norm": 0.6543799638748169,
      "learning_rate": 0.00015011135857461024,
      "loss": 0.8183,
      "step": 1130
    },
    {
      "epoch": 0.25133333333333335,
      "grad_norm": 0.9669918417930603,
      "learning_rate": 0.00015006681514476614,
      "loss": 1.591,
      "step": 1131
    },
    {
      "epoch": 0.25155555555555553,
      "grad_norm": 1.1905044317245483,
      "learning_rate": 0.00015002227171492205,
      "loss": 2.0181,
      "step": 1132
    },
    {
      "epoch": 0.25177777777777777,
      "grad_norm": 0.6645126938819885,
      "learning_rate": 0.00014997772828507797,
      "loss": 0.9216,
      "step": 1133
    },
    {
      "epoch": 0.252,
      "grad_norm": 0.7185103893280029,
      "learning_rate": 0.00014993318485523387,
      "loss": 0.867,
      "step": 1134
    },
    {
      "epoch": 0.25222222222222224,
      "grad_norm": 1.1038732528686523,
      "learning_rate": 0.00014988864142538976,
      "loss": 1.8593,
      "step": 1135
    },
    {
      "epoch": 0.25244444444444447,
      "grad_norm": 1.0466231107711792,
      "learning_rate": 0.00014984409799554568,
      "loss": 1.6207,
      "step": 1136
    },
    {
      "epoch": 0.25266666666666665,
      "grad_norm": 1.0683057308197021,
      "learning_rate": 0.00014979955456570157,
      "loss": 1.6742,
      "step": 1137
    },
    {
      "epoch": 0.2528888888888889,
      "grad_norm": 0.14429320394992828,
      "learning_rate": 0.00014975501113585746,
      "loss": 0.0322,
      "step": 1138
    },
    {
      "epoch": 0.2531111111111111,
      "grad_norm": 1.1202466487884521,
      "learning_rate": 0.00014971046770601338,
      "loss": 1.7064,
      "step": 1139
    },
    {
      "epoch": 0.25333333333333335,
      "grad_norm": 1.1153877973556519,
      "learning_rate": 0.00014966592427616927,
      "loss": 1.5018,
      "step": 1140
    },
    {
      "epoch": 0.25355555555555553,
      "grad_norm": 1.383662223815918,
      "learning_rate": 0.0001496213808463252,
      "loss": 1.4785,
      "step": 1141
    },
    {
      "epoch": 0.25377777777777777,
      "grad_norm": 1.060515284538269,
      "learning_rate": 0.00014957683741648108,
      "loss": 1.5914,
      "step": 1142
    },
    {
      "epoch": 0.254,
      "grad_norm": 0.6889761090278625,
      "learning_rate": 0.00014953229398663697,
      "loss": 0.686,
      "step": 1143
    },
    {
      "epoch": 0.25422222222222224,
      "grad_norm": 0.8010234832763672,
      "learning_rate": 0.00014948775055679286,
      "loss": 0.6522,
      "step": 1144
    },
    {
      "epoch": 0.2544444444444444,
      "grad_norm": 0.9428964257240295,
      "learning_rate": 0.00014944320712694878,
      "loss": 1.1183,
      "step": 1145
    },
    {
      "epoch": 0.25466666666666665,
      "grad_norm": 1.2112162113189697,
      "learning_rate": 0.0001493986636971047,
      "loss": 1.4202,
      "step": 1146
    },
    {
      "epoch": 0.2548888888888889,
      "grad_norm": 0.8112894892692566,
      "learning_rate": 0.0001493541202672606,
      "loss": 0.7603,
      "step": 1147
    },
    {
      "epoch": 0.2551111111111111,
      "grad_norm": 0.18849223852157593,
      "learning_rate": 0.00014930957683741649,
      "loss": 0.0361,
      "step": 1148
    },
    {
      "epoch": 0.25533333333333336,
      "grad_norm": 1.0638991594314575,
      "learning_rate": 0.00014926503340757238,
      "loss": 1.2604,
      "step": 1149
    },
    {
      "epoch": 0.25555555555555554,
      "grad_norm": 1.0759894847869873,
      "learning_rate": 0.0001492204899777283,
      "loss": 0.8551,
      "step": 1150
    },
    {
      "epoch": 0.25577777777777777,
      "grad_norm": 0.8875473737716675,
      "learning_rate": 0.00014917594654788422,
      "loss": 2.2251,
      "step": 1151
    },
    {
      "epoch": 0.256,
      "grad_norm": 0.06436305493116379,
      "learning_rate": 0.0001491314031180401,
      "loss": 0.0134,
      "step": 1152
    },
    {
      "epoch": 0.25622222222222224,
      "grad_norm": 0.06132598966360092,
      "learning_rate": 0.000149086859688196,
      "loss": 0.013,
      "step": 1153
    },
    {
      "epoch": 0.2564444444444444,
      "grad_norm": 0.6333918571472168,
      "learning_rate": 0.0001490423162583519,
      "loss": 1.2197,
      "step": 1154
    },
    {
      "epoch": 0.25666666666666665,
      "grad_norm": 0.6512543559074402,
      "learning_rate": 0.0001489977728285078,
      "loss": 1.0077,
      "step": 1155
    },
    {
      "epoch": 0.2568888888888889,
      "grad_norm": 0.8547171354293823,
      "learning_rate": 0.0001489532293986637,
      "loss": 1.9731,
      "step": 1156
    },
    {
      "epoch": 0.2571111111111111,
      "grad_norm": 0.9840943217277527,
      "learning_rate": 0.00014890868596881962,
      "loss": 2.437,
      "step": 1157
    },
    {
      "epoch": 0.25733333333333336,
      "grad_norm": 0.08678994327783585,
      "learning_rate": 0.0001488641425389755,
      "loss": 0.0136,
      "step": 1158
    },
    {
      "epoch": 0.25755555555555554,
      "grad_norm": 0.891431450843811,
      "learning_rate": 0.0001488195991091314,
      "loss": 1.6169,
      "step": 1159
    },
    {
      "epoch": 0.2577777777777778,
      "grad_norm": 1.0839104652404785,
      "learning_rate": 0.00014877505567928732,
      "loss": 1.8314,
      "step": 1160
    },
    {
      "epoch": 0.258,
      "grad_norm": 1.0359727144241333,
      "learning_rate": 0.00014873051224944322,
      "loss": 2.0611,
      "step": 1161
    },
    {
      "epoch": 0.25822222222222224,
      "grad_norm": 0.1595863550901413,
      "learning_rate": 0.0001486859688195991,
      "loss": 0.0189,
      "step": 1162
    },
    {
      "epoch": 0.2584444444444444,
      "grad_norm": 0.09386439621448517,
      "learning_rate": 0.00014864142538975503,
      "loss": 0.0174,
      "step": 1163
    },
    {
      "epoch": 0.25866666666666666,
      "grad_norm": 0.9504187107086182,
      "learning_rate": 0.00014859688195991092,
      "loss": 1.9366,
      "step": 1164
    },
    {
      "epoch": 0.2588888888888889,
      "grad_norm": 0.9891676902770996,
      "learning_rate": 0.00014855233853006684,
      "loss": 2.3224,
      "step": 1165
    },
    {
      "epoch": 0.2591111111111111,
      "grad_norm": 0.817260205745697,
      "learning_rate": 0.00014850779510022273,
      "loss": 1.9071,
      "step": 1166
    },
    {
      "epoch": 0.25933333333333336,
      "grad_norm": 0.9042514562606812,
      "learning_rate": 0.00014846325167037862,
      "loss": 1.9343,
      "step": 1167
    },
    {
      "epoch": 0.25955555555555554,
      "grad_norm": 0.9342247843742371,
      "learning_rate": 0.0001484187082405345,
      "loss": 2.2231,
      "step": 1168
    },
    {
      "epoch": 0.2597777777777778,
      "grad_norm": 0.5495959520339966,
      "learning_rate": 0.00014837416481069043,
      "loss": 0.6888,
      "step": 1169
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.081369549036026,
      "learning_rate": 0.00014832962138084635,
      "loss": 0.0182,
      "step": 1170
    },
    {
      "epoch": 0.26022222222222224,
      "grad_norm": 0.08859319984912872,
      "learning_rate": 0.00014828507795100224,
      "loss": 0.0182,
      "step": 1171
    },
    {
      "epoch": 0.2604444444444444,
      "grad_norm": 0.07650327682495117,
      "learning_rate": 0.00014824053452115813,
      "loss": 0.0176,
      "step": 1172
    },
    {
      "epoch": 0.26066666666666666,
      "grad_norm": 0.8651443123817444,
      "learning_rate": 0.00014819599109131403,
      "loss": 1.6714,
      "step": 1173
    },
    {
      "epoch": 0.2608888888888889,
      "grad_norm": 0.7669498920440674,
      "learning_rate": 0.00014815144766146994,
      "loss": 0.0361,
      "step": 1174
    },
    {
      "epoch": 0.2611111111111111,
      "grad_norm": 0.9396262764930725,
      "learning_rate": 0.00014810690423162584,
      "loss": 1.8296,
      "step": 1175
    },
    {
      "epoch": 0.2613333333333333,
      "grad_norm": 1.0305185317993164,
      "learning_rate": 0.00014806236080178175,
      "loss": 1.9626,
      "step": 1176
    },
    {
      "epoch": 0.26155555555555554,
      "grad_norm": 0.9989243745803833,
      "learning_rate": 0.00014801781737193765,
      "loss": 2.113,
      "step": 1177
    },
    {
      "epoch": 0.2617777777777778,
      "grad_norm": 0.8748054504394531,
      "learning_rate": 0.00014797327394209354,
      "loss": 1.5958,
      "step": 1178
    },
    {
      "epoch": 0.262,
      "grad_norm": 1.3041480779647827,
      "learning_rate": 0.00014792873051224946,
      "loss": 1.7549,
      "step": 1179
    },
    {
      "epoch": 0.26222222222222225,
      "grad_norm": 1.0158839225769043,
      "learning_rate": 0.00014788418708240535,
      "loss": 1.9756,
      "step": 1180
    },
    {
      "epoch": 0.2624444444444444,
      "grad_norm": 0.7090574502944946,
      "learning_rate": 0.00014783964365256124,
      "loss": 1.051,
      "step": 1181
    },
    {
      "epoch": 0.26266666666666666,
      "grad_norm": 0.13157020509243011,
      "learning_rate": 0.00014779510022271716,
      "loss": 0.0176,
      "step": 1182
    },
    {
      "epoch": 0.2628888888888889,
      "grad_norm": 0.738917350769043,
      "learning_rate": 0.00014775055679287305,
      "loss": 0.9179,
      "step": 1183
    },
    {
      "epoch": 0.26311111111111113,
      "grad_norm": 0.14277049899101257,
      "learning_rate": 0.00014770601336302897,
      "loss": 0.0199,
      "step": 1184
    },
    {
      "epoch": 0.2633333333333333,
      "grad_norm": 0.1282823085784912,
      "learning_rate": 0.00014766146993318486,
      "loss": 0.0193,
      "step": 1185
    },
    {
      "epoch": 0.26355555555555554,
      "grad_norm": 0.10668764263391495,
      "learning_rate": 0.00014761692650334075,
      "loss": 0.018,
      "step": 1186
    },
    {
      "epoch": 0.2637777777777778,
      "grad_norm": 0.0911671444773674,
      "learning_rate": 0.00014757238307349665,
      "loss": 0.0169,
      "step": 1187
    },
    {
      "epoch": 0.264,
      "grad_norm": 0.7024106979370117,
      "learning_rate": 0.00014752783964365256,
      "loss": 0.9606,
      "step": 1188
    },
    {
      "epoch": 0.26422222222222225,
      "grad_norm": 0.9707425236701965,
      "learning_rate": 0.00014748329621380848,
      "loss": 1.8245,
      "step": 1189
    },
    {
      "epoch": 0.2644444444444444,
      "grad_norm": 0.25269874930381775,
      "learning_rate": 0.00014743875278396438,
      "loss": 0.0316,
      "step": 1190
    },
    {
      "epoch": 0.26466666666666666,
      "grad_norm": 0.9807192087173462,
      "learning_rate": 0.00014739420935412027,
      "loss": 1.659,
      "step": 1191
    },
    {
      "epoch": 0.2648888888888889,
      "grad_norm": 1.0586344003677368,
      "learning_rate": 0.00014734966592427616,
      "loss": 1.6508,
      "step": 1192
    },
    {
      "epoch": 0.26511111111111113,
      "grad_norm": 1.1077786684036255,
      "learning_rate": 0.00014730512249443208,
      "loss": 1.7849,
      "step": 1193
    },
    {
      "epoch": 0.2653333333333333,
      "grad_norm": 0.9380425810813904,
      "learning_rate": 0.000147260579064588,
      "loss": 1.4617,
      "step": 1194
    },
    {
      "epoch": 0.26555555555555554,
      "grad_norm": 0.8665103912353516,
      "learning_rate": 0.0001472160356347439,
      "loss": 0.8163,
      "step": 1195
    },
    {
      "epoch": 0.2657777777777778,
      "grad_norm": 0.20836298167705536,
      "learning_rate": 0.00014717149220489978,
      "loss": 0.0295,
      "step": 1196
    },
    {
      "epoch": 0.266,
      "grad_norm": 0.7868085503578186,
      "learning_rate": 0.00014712694877505567,
      "loss": 0.6978,
      "step": 1197
    },
    {
      "epoch": 0.26622222222222225,
      "grad_norm": 0.9609499573707581,
      "learning_rate": 0.0001470824053452116,
      "loss": 1.2007,
      "step": 1198
    },
    {
      "epoch": 0.26644444444444443,
      "grad_norm": 0.6641119718551636,
      "learning_rate": 0.00014703786191536748,
      "loss": 0.5312,
      "step": 1199
    },
    {
      "epoch": 0.26666666666666666,
      "grad_norm": 0.5177371501922607,
      "learning_rate": 0.0001469933184855234,
      "loss": 0.2728,
      "step": 1200
    },
    {
      "epoch": 0.2668888888888889,
      "grad_norm": 0.6398945450782776,
      "learning_rate": 0.0001469487750556793,
      "loss": 1.1822,
      "step": 1201
    },
    {
      "epoch": 0.26711111111111113,
      "grad_norm": 0.5665084719657898,
      "learning_rate": 0.00014690423162583519,
      "loss": 1.0326,
      "step": 1202
    },
    {
      "epoch": 0.2673333333333333,
      "grad_norm": 0.806073009967804,
      "learning_rate": 0.0001468596881959911,
      "loss": 2.2595,
      "step": 1203
    },
    {
      "epoch": 0.26755555555555555,
      "grad_norm": 0.05791214853525162,
      "learning_rate": 0.000146815144766147,
      "loss": 0.0123,
      "step": 1204
    },
    {
      "epoch": 0.2677777777777778,
      "grad_norm": 0.5717757344245911,
      "learning_rate": 0.0001467706013363029,
      "loss": 1.0509,
      "step": 1205
    },
    {
      "epoch": 0.268,
      "grad_norm": 0.8858723044395447,
      "learning_rate": 0.0001467260579064588,
      "loss": 2.0359,
      "step": 1206
    },
    {
      "epoch": 0.2682222222222222,
      "grad_norm": 0.14826105535030365,
      "learning_rate": 0.0001466815144766147,
      "loss": 0.0167,
      "step": 1207
    },
    {
      "epoch": 0.26844444444444443,
      "grad_norm": 0.099408358335495,
      "learning_rate": 0.00014663697104677062,
      "loss": 0.0163,
      "step": 1208
    },
    {
      "epoch": 0.26866666666666666,
      "grad_norm": 0.09596351534128189,
      "learning_rate": 0.0001465924276169265,
      "loss": 0.0153,
      "step": 1209
    },
    {
      "epoch": 0.2688888888888889,
      "grad_norm": 0.8464707732200623,
      "learning_rate": 0.0001465478841870824,
      "loss": 2.4877,
      "step": 1210
    },
    {
      "epoch": 0.26911111111111113,
      "grad_norm": 0.8414135575294495,
      "learning_rate": 0.0001465033407572383,
      "loss": 2.4347,
      "step": 1211
    },
    {
      "epoch": 0.2693333333333333,
      "grad_norm": 0.8445218205451965,
      "learning_rate": 0.00014645879732739424,
      "loss": 2.0295,
      "step": 1212
    },
    {
      "epoch": 0.26955555555555555,
      "grad_norm": 0.8376966118812561,
      "learning_rate": 0.00014641425389755013,
      "loss": 1.9265,
      "step": 1213
    },
    {
      "epoch": 0.2697777777777778,
      "grad_norm": 1.121559977531433,
      "learning_rate": 0.00014636971046770602,
      "loss": 1.9569,
      "step": 1214
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9244825839996338,
      "learning_rate": 0.00014632516703786191,
      "loss": 1.9792,
      "step": 1215
    },
    {
      "epoch": 0.2702222222222222,
      "grad_norm": 0.9264352321624756,
      "learning_rate": 0.0001462806236080178,
      "loss": 1.965,
      "step": 1216
    },
    {
      "epoch": 0.27044444444444443,
      "grad_norm": 0.9068411588668823,
      "learning_rate": 0.00014623608017817373,
      "loss": 1.9279,
      "step": 1217
    },
    {
      "epoch": 0.27066666666666667,
      "grad_norm": 0.9238573312759399,
      "learning_rate": 0.00014619153674832964,
      "loss": 1.9785,
      "step": 1218
    },
    {
      "epoch": 0.2708888888888889,
      "grad_norm": 0.8850834965705872,
      "learning_rate": 0.00014614699331848554,
      "loss": 1.8371,
      "step": 1219
    },
    {
      "epoch": 0.27111111111111114,
      "grad_norm": 0.8664029240608215,
      "learning_rate": 0.00014610244988864143,
      "loss": 1.6893,
      "step": 1220
    },
    {
      "epoch": 0.2713333333333333,
      "grad_norm": 0.9488045573234558,
      "learning_rate": 0.00014605790645879735,
      "loss": 1.9064,
      "step": 1221
    },
    {
      "epoch": 0.27155555555555555,
      "grad_norm": 0.11144151538610458,
      "learning_rate": 0.00014601336302895324,
      "loss": 0.0165,
      "step": 1222
    },
    {
      "epoch": 0.2717777777777778,
      "grad_norm": 0.11891952157020569,
      "learning_rate": 0.00014596881959910913,
      "loss": 0.0163,
      "step": 1223
    },
    {
      "epoch": 0.272,
      "grad_norm": 0.07421109825372696,
      "learning_rate": 0.00014592427616926505,
      "loss": 0.016,
      "step": 1224
    },
    {
      "epoch": 0.2722222222222222,
      "grad_norm": 0.5813721418380737,
      "learning_rate": 0.00014587973273942094,
      "loss": 1.0181,
      "step": 1225
    },
    {
      "epoch": 0.27244444444444443,
      "grad_norm": 0.720731258392334,
      "learning_rate": 0.00014583518930957686,
      "loss": 0.8357,
      "step": 1226
    },
    {
      "epoch": 0.27266666666666667,
      "grad_norm": 0.8886854648590088,
      "learning_rate": 0.00014579064587973275,
      "loss": 1.6687,
      "step": 1227
    },
    {
      "epoch": 0.2728888888888889,
      "grad_norm": 1.0154438018798828,
      "learning_rate": 0.00014574610244988864,
      "loss": 2.0414,
      "step": 1228
    },
    {
      "epoch": 0.27311111111111114,
      "grad_norm": 1.0460702180862427,
      "learning_rate": 0.00014570155902004454,
      "loss": 1.8346,
      "step": 1229
    },
    {
      "epoch": 0.2733333333333333,
      "grad_norm": 1.0006906986236572,
      "learning_rate": 0.00014565701559020045,
      "loss": 0.9797,
      "step": 1230
    },
    {
      "epoch": 0.27355555555555555,
      "grad_norm": 0.08748811483383179,
      "learning_rate": 0.00014561247216035637,
      "loss": 0.0191,
      "step": 1231
    },
    {
      "epoch": 0.2737777777777778,
      "grad_norm": 0.08443690091371536,
      "learning_rate": 0.00014556792873051227,
      "loss": 0.0191,
      "step": 1232
    },
    {
      "epoch": 0.274,
      "grad_norm": 0.8554977774620056,
      "learning_rate": 0.00014552338530066816,
      "loss": 0.8648,
      "step": 1233
    },
    {
      "epoch": 0.2742222222222222,
      "grad_norm": 0.2030288428068161,
      "learning_rate": 0.00014547884187082405,
      "loss": 0.0251,
      "step": 1234
    },
    {
      "epoch": 0.27444444444444444,
      "grad_norm": 0.1689607948064804,
      "learning_rate": 0.00014543429844097997,
      "loss": 0.0226,
      "step": 1235
    },
    {
      "epoch": 0.27466666666666667,
      "grad_norm": 0.14935432374477386,
      "learning_rate": 0.0001453897550111359,
      "loss": 0.0201,
      "step": 1236
    },
    {
      "epoch": 0.2748888888888889,
      "grad_norm": 0.9350739121437073,
      "learning_rate": 0.00014534521158129178,
      "loss": 1.5576,
      "step": 1237
    },
    {
      "epoch": 0.2751111111111111,
      "grad_norm": 0.11490830034017563,
      "learning_rate": 0.00014530066815144767,
      "loss": 0.0268,
      "step": 1238
    },
    {
      "epoch": 0.2753333333333333,
      "grad_norm": 1.1253389120101929,
      "learning_rate": 0.00014525612472160356,
      "loss": 1.6596,
      "step": 1239
    },
    {
      "epoch": 0.27555555555555555,
      "grad_norm": 1.0991276502609253,
      "learning_rate": 0.00014521158129175948,
      "loss": 1.4434,
      "step": 1240
    },
    {
      "epoch": 0.2757777777777778,
      "grad_norm": 1.245217204093933,
      "learning_rate": 0.00014516703786191537,
      "loss": 1.6334,
      "step": 1241
    },
    {
      "epoch": 0.276,
      "grad_norm": 1.0247350931167603,
      "learning_rate": 0.00014512249443207126,
      "loss": 1.5174,
      "step": 1242
    },
    {
      "epoch": 0.2762222222222222,
      "grad_norm": 1.1460856199264526,
      "learning_rate": 0.00014507795100222718,
      "loss": 1.4504,
      "step": 1243
    },
    {
      "epoch": 0.27644444444444444,
      "grad_norm": 0.8328757286071777,
      "learning_rate": 0.00014503340757238308,
      "loss": 0.7809,
      "step": 1244
    },
    {
      "epoch": 0.27666666666666667,
      "grad_norm": 0.726498007774353,
      "learning_rate": 0.000144988864142539,
      "loss": 0.6805,
      "step": 1245
    },
    {
      "epoch": 0.2768888888888889,
      "grad_norm": 1.1462013721466064,
      "learning_rate": 0.00014494432071269489,
      "loss": 1.3072,
      "step": 1246
    },
    {
      "epoch": 0.2771111111111111,
      "grad_norm": 1.0997343063354492,
      "learning_rate": 0.00014489977728285078,
      "loss": 1.18,
      "step": 1247
    },
    {
      "epoch": 0.2773333333333333,
      "grad_norm": 0.16613906621932983,
      "learning_rate": 0.00014485523385300667,
      "loss": 0.0333,
      "step": 1248
    },
    {
      "epoch": 0.27755555555555556,
      "grad_norm": 1.0108569860458374,
      "learning_rate": 0.0001448106904231626,
      "loss": 1.0494,
      "step": 1249
    },
    {
      "epoch": 0.2777777777777778,
      "grad_norm": 1.0045305490493774,
      "learning_rate": 0.0001447661469933185,
      "loss": 0.7517,
      "step": 1250
    },
    {
      "epoch": 0.278,
      "grad_norm": 0.591677188873291,
      "learning_rate": 0.0001447216035634744,
      "loss": 1.1579,
      "step": 1251
    },
    {
      "epoch": 0.2782222222222222,
      "grad_norm": 0.6461545825004578,
      "learning_rate": 0.0001446770601336303,
      "loss": 1.2558,
      "step": 1252
    },
    {
      "epoch": 0.27844444444444444,
      "grad_norm": 0.5436459183692932,
      "learning_rate": 0.00014463251670378618,
      "loss": 1.0179,
      "step": 1253
    },
    {
      "epoch": 0.2786666666666667,
      "grad_norm": 0.5476921200752258,
      "learning_rate": 0.0001445879732739421,
      "loss": 1.1344,
      "step": 1254
    },
    {
      "epoch": 0.2788888888888889,
      "grad_norm": 0.8634714484214783,
      "learning_rate": 0.00014454342984409802,
      "loss": 2.2497,
      "step": 1255
    },
    {
      "epoch": 0.2791111111111111,
      "grad_norm": 0.8563636541366577,
      "learning_rate": 0.0001444988864142539,
      "loss": 2.2022,
      "step": 1256
    },
    {
      "epoch": 0.2793333333333333,
      "grad_norm": 0.6468019485473633,
      "learning_rate": 0.0001444543429844098,
      "loss": 1.102,
      "step": 1257
    },
    {
      "epoch": 0.27955555555555556,
      "grad_norm": 0.0794794037938118,
      "learning_rate": 0.0001444097995545657,
      "loss": 0.013,
      "step": 1258
    },
    {
      "epoch": 0.2797777777777778,
      "grad_norm": 0.07662985473871231,
      "learning_rate": 0.00014436525612472161,
      "loss": 0.0129,
      "step": 1259
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5852623581886292,
      "learning_rate": 0.0001443207126948775,
      "loss": 1.1717,
      "step": 1260
    },
    {
      "epoch": 0.2802222222222222,
      "grad_norm": 0.8723194003105164,
      "learning_rate": 0.00014427616926503343,
      "loss": 2.0411,
      "step": 1261
    },
    {
      "epoch": 0.28044444444444444,
      "grad_norm": 0.927684485912323,
      "learning_rate": 0.00014423162583518932,
      "loss": 2.4348,
      "step": 1262
    },
    {
      "epoch": 0.2806666666666667,
      "grad_norm": 0.9913063645362854,
      "learning_rate": 0.0001441870824053452,
      "loss": 2.1211,
      "step": 1263
    },
    {
      "epoch": 0.2808888888888889,
      "grad_norm": 0.8557515144348145,
      "learning_rate": 0.00014414253897550113,
      "loss": 2.1376,
      "step": 1264
    },
    {
      "epoch": 0.2811111111111111,
      "grad_norm": 0.9847201704978943,
      "learning_rate": 0.00014409799554565702,
      "loss": 1.9602,
      "step": 1265
    },
    {
      "epoch": 0.2813333333333333,
      "grad_norm": 0.9515376687049866,
      "learning_rate": 0.0001440534521158129,
      "loss": 1.8319,
      "step": 1266
    },
    {
      "epoch": 0.28155555555555556,
      "grad_norm": 0.12117951363325119,
      "learning_rate": 0.00014400890868596883,
      "loss": 0.0204,
      "step": 1267
    },
    {
      "epoch": 0.2817777777777778,
      "grad_norm": 0.8738093376159668,
      "learning_rate": 0.00014396436525612472,
      "loss": 2.0693,
      "step": 1268
    },
    {
      "epoch": 0.282,
      "grad_norm": 0.9916730523109436,
      "learning_rate": 0.00014391982182628064,
      "loss": 1.8292,
      "step": 1269
    },
    {
      "epoch": 0.2822222222222222,
      "grad_norm": 0.9203552007675171,
      "learning_rate": 0.00014387527839643653,
      "loss": 1.9077,
      "step": 1270
    },
    {
      "epoch": 0.28244444444444444,
      "grad_norm": 0.9378885626792908,
      "learning_rate": 0.00014383073496659243,
      "loss": 1.6729,
      "step": 1271
    },
    {
      "epoch": 0.2826666666666667,
      "grad_norm": 0.8718151450157166,
      "learning_rate": 0.00014378619153674832,
      "loss": 1.8453,
      "step": 1272
    },
    {
      "epoch": 0.2828888888888889,
      "grad_norm": 0.545022189617157,
      "learning_rate": 0.00014374164810690424,
      "loss": 0.9028,
      "step": 1273
    },
    {
      "epoch": 0.2831111111111111,
      "grad_norm": 0.10532009601593018,
      "learning_rate": 0.00014369710467706015,
      "loss": 0.0177,
      "step": 1274
    },
    {
      "epoch": 0.2833333333333333,
      "grad_norm": 0.08250175416469574,
      "learning_rate": 0.00014365256124721605,
      "loss": 0.0174,
      "step": 1275
    },
    {
      "epoch": 0.28355555555555556,
      "grad_norm": 0.08483847975730896,
      "learning_rate": 0.00014360801781737194,
      "loss": 0.017,
      "step": 1276
    },
    {
      "epoch": 0.2837777777777778,
      "grad_norm": 0.07746873795986176,
      "learning_rate": 0.00014356347438752783,
      "loss": 0.0165,
      "step": 1277
    },
    {
      "epoch": 0.284,
      "grad_norm": 0.0772603377699852,
      "learning_rate": 0.00014351893095768375,
      "loss": 0.0162,
      "step": 1278
    },
    {
      "epoch": 0.2842222222222222,
      "grad_norm": 0.9422643780708313,
      "learning_rate": 0.00014347438752783967,
      "loss": 1.9495,
      "step": 1279
    },
    {
      "epoch": 0.28444444444444444,
      "grad_norm": 0.563165009021759,
      "learning_rate": 0.00014342984409799556,
      "loss": 0.79,
      "step": 1280
    },
    {
      "epoch": 0.2846666666666667,
      "grad_norm": 0.9257560968399048,
      "learning_rate": 0.00014338530066815145,
      "loss": 1.8217,
      "step": 1281
    },
    {
      "epoch": 0.2848888888888889,
      "grad_norm": 1.079615831375122,
      "learning_rate": 0.00014334075723830734,
      "loss": 1.9557,
      "step": 1282
    },
    {
      "epoch": 0.2851111111111111,
      "grad_norm": 0.8131044507026672,
      "learning_rate": 0.00014329621380846326,
      "loss": 1.0458,
      "step": 1283
    },
    {
      "epoch": 0.2853333333333333,
      "grad_norm": 0.793594241142273,
      "learning_rate": 0.00014325167037861915,
      "loss": 0.9087,
      "step": 1284
    },
    {
      "epoch": 0.28555555555555556,
      "grad_norm": 0.6490100622177124,
      "learning_rate": 0.00014320712694877507,
      "loss": 0.7678,
      "step": 1285
    },
    {
      "epoch": 0.2857777777777778,
      "grad_norm": 0.07962695509195328,
      "learning_rate": 0.00014316258351893096,
      "loss": 0.0181,
      "step": 1286
    },
    {
      "epoch": 0.286,
      "grad_norm": 0.08379562944173813,
      "learning_rate": 0.00014311804008908686,
      "loss": 0.0177,
      "step": 1287
    },
    {
      "epoch": 0.2862222222222222,
      "grad_norm": 0.7064594030380249,
      "learning_rate": 0.00014307349665924278,
      "loss": 0.7159,
      "step": 1288
    },
    {
      "epoch": 0.28644444444444445,
      "grad_norm": 1.0957010984420776,
      "learning_rate": 0.00014302895322939867,
      "loss": 1.9004,
      "step": 1289
    },
    {
      "epoch": 0.2866666666666667,
      "grad_norm": 1.0294493436813354,
      "learning_rate": 0.00014298440979955456,
      "loss": 1.4984,
      "step": 1290
    },
    {
      "epoch": 0.2868888888888889,
      "grad_norm": 1.0541222095489502,
      "learning_rate": 0.00014293986636971048,
      "loss": 1.4094,
      "step": 1291
    },
    {
      "epoch": 0.2871111111111111,
      "grad_norm": 0.9578663110733032,
      "learning_rate": 0.00014289532293986637,
      "loss": 1.7305,
      "step": 1292
    },
    {
      "epoch": 0.28733333333333333,
      "grad_norm": 1.0165977478027344,
      "learning_rate": 0.0001428507795100223,
      "loss": 1.4897,
      "step": 1293
    },
    {
      "epoch": 0.28755555555555556,
      "grad_norm": 0.6953179240226746,
      "learning_rate": 0.00014280623608017818,
      "loss": 0.7403,
      "step": 1294
    },
    {
      "epoch": 0.2877777777777778,
      "grad_norm": 0.6080345511436462,
      "learning_rate": 0.00014276169265033407,
      "loss": 0.5275,
      "step": 1295
    },
    {
      "epoch": 0.288,
      "grad_norm": 1.2395416498184204,
      "learning_rate": 0.00014271714922048996,
      "loss": 1.2051,
      "step": 1296
    },
    {
      "epoch": 0.2882222222222222,
      "grad_norm": 0.724810779094696,
      "learning_rate": 0.0001426726057906459,
      "loss": 0.5437,
      "step": 1297
    },
    {
      "epoch": 0.28844444444444445,
      "grad_norm": 0.7165504693984985,
      "learning_rate": 0.0001426280623608018,
      "loss": 0.5968,
      "step": 1298
    },
    {
      "epoch": 0.2886666666666667,
      "grad_norm": 1.0052456855773926,
      "learning_rate": 0.0001425835189309577,
      "loss": 1.0547,
      "step": 1299
    },
    {
      "epoch": 0.28888888888888886,
      "grad_norm": 0.8707148432731628,
      "learning_rate": 0.00014253897550111359,
      "loss": 0.6631,
      "step": 1300
    },
    {
      "epoch": 0.2891111111111111,
      "grad_norm": 0.811651349067688,
      "learning_rate": 0.00014249443207126948,
      "loss": 2.4015,
      "step": 1301
    },
    {
      "epoch": 0.28933333333333333,
      "grad_norm": 0.10851640999317169,
      "learning_rate": 0.0001424498886414254,
      "loss": 0.0133,
      "step": 1302
    },
    {
      "epoch": 0.28955555555555557,
      "grad_norm": 0.628270149230957,
      "learning_rate": 0.00014240534521158132,
      "loss": 1.2922,
      "step": 1303
    },
    {
      "epoch": 0.2897777777777778,
      "grad_norm": 1.039566993713379,
      "learning_rate": 0.0001423608017817372,
      "loss": 2.5175,
      "step": 1304
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.3334747552871704,
      "learning_rate": 0.0001423162583518931,
      "loss": 0.0157,
      "step": 1305
    },
    {
      "epoch": 0.2902222222222222,
      "grad_norm": 0.14356687664985657,
      "learning_rate": 0.00014227171492204902,
      "loss": 0.0136,
      "step": 1306
    },
    {
      "epoch": 0.29044444444444445,
      "grad_norm": 0.07359285652637482,
      "learning_rate": 0.0001422271714922049,
      "loss": 0.012,
      "step": 1307
    },
    {
      "epoch": 0.2906666666666667,
      "grad_norm": 0.683387815952301,
      "learning_rate": 0.0001421826280623608,
      "loss": 0.9133,
      "step": 1308
    },
    {
      "epoch": 0.29088888888888886,
      "grad_norm": 0.8599250316619873,
      "learning_rate": 0.00014213808463251672,
      "loss": 1.9407,
      "step": 1309
    },
    {
      "epoch": 0.2911111111111111,
      "grad_norm": 0.9184352159500122,
      "learning_rate": 0.0001420935412026726,
      "loss": 2.1807,
      "step": 1310
    },
    {
      "epoch": 0.29133333333333333,
      "grad_norm": 1.0117696523666382,
      "learning_rate": 0.00014204899777282853,
      "loss": 2.1512,
      "step": 1311
    },
    {
      "epoch": 0.29155555555555557,
      "grad_norm": 0.7668831944465637,
      "learning_rate": 0.00014200445434298442,
      "loss": 1.7311,
      "step": 1312
    },
    {
      "epoch": 0.2917777777777778,
      "grad_norm": 0.7954047918319702,
      "learning_rate": 0.00014195991091314031,
      "loss": 1.8633,
      "step": 1313
    },
    {
      "epoch": 0.292,
      "grad_norm": 1.201979637145996,
      "learning_rate": 0.0001419153674832962,
      "loss": 1.9811,
      "step": 1314
    },
    {
      "epoch": 0.2922222222222222,
      "grad_norm": 0.8454548716545105,
      "learning_rate": 0.0001418708240534521,
      "loss": 1.9974,
      "step": 1315
    },
    {
      "epoch": 0.29244444444444445,
      "grad_norm": 1.0249965190887451,
      "learning_rate": 0.00014182628062360804,
      "loss": 2.3543,
      "step": 1316
    },
    {
      "epoch": 0.2926666666666667,
      "grad_norm": 1.0549813508987427,
      "learning_rate": 0.00014178173719376394,
      "loss": 2.3438,
      "step": 1317
    },
    {
      "epoch": 0.29288888888888887,
      "grad_norm": 0.9025498628616333,
      "learning_rate": 0.00014173719376391983,
      "loss": 1.8958,
      "step": 1318
    },
    {
      "epoch": 0.2931111111111111,
      "grad_norm": 0.9086618423461914,
      "learning_rate": 0.00014169265033407572,
      "loss": 2.1565,
      "step": 1319
    },
    {
      "epoch": 0.29333333333333333,
      "grad_norm": 0.9326392412185669,
      "learning_rate": 0.00014164810690423164,
      "loss": 1.8965,
      "step": 1320
    },
    {
      "epoch": 0.29355555555555557,
      "grad_norm": 0.8608342409133911,
      "learning_rate": 0.00014160356347438753,
      "loss": 1.8891,
      "step": 1321
    },
    {
      "epoch": 0.2937777777777778,
      "grad_norm": 0.8682828545570374,
      "learning_rate": 0.00014155902004454345,
      "loss": 1.9273,
      "step": 1322
    },
    {
      "epoch": 0.294,
      "grad_norm": 0.7296971082687378,
      "learning_rate": 0.00014151447661469934,
      "loss": 1.0999,
      "step": 1323
    },
    {
      "epoch": 0.2942222222222222,
      "grad_norm": 0.09005673974752426,
      "learning_rate": 0.00014146993318485523,
      "loss": 0.0146,
      "step": 1324
    },
    {
      "epoch": 0.29444444444444445,
      "grad_norm": 0.08192627131938934,
      "learning_rate": 0.00014142538975501115,
      "loss": 0.0152,
      "step": 1325
    },
    {
      "epoch": 0.2946666666666667,
      "grad_norm": 0.08258790522813797,
      "learning_rate": 0.00014138084632516704,
      "loss": 0.0148,
      "step": 1326
    },
    {
      "epoch": 0.29488888888888887,
      "grad_norm": 0.07657041400671005,
      "learning_rate": 0.00014133630289532294,
      "loss": 0.0146,
      "step": 1327
    },
    {
      "epoch": 0.2951111111111111,
      "grad_norm": 0.09436430037021637,
      "learning_rate": 0.00014129175946547885,
      "loss": 0.0206,
      "step": 1328
    },
    {
      "epoch": 0.29533333333333334,
      "grad_norm": 0.6281077265739441,
      "learning_rate": 0.00014124721603563475,
      "loss": 0.9227,
      "step": 1329
    },
    {
      "epoch": 0.29555555555555557,
      "grad_norm": 0.9210866093635559,
      "learning_rate": 0.00014120267260579067,
      "loss": 1.4825,
      "step": 1330
    },
    {
      "epoch": 0.29577777777777775,
      "grad_norm": 0.9637800455093384,
      "learning_rate": 0.00014115812917594656,
      "loss": 1.84,
      "step": 1331
    },
    {
      "epoch": 0.296,
      "grad_norm": 1.1256612539291382,
      "learning_rate": 0.00014111358574610245,
      "loss": 1.732,
      "step": 1332
    },
    {
      "epoch": 0.2962222222222222,
      "grad_norm": 1.3389207124710083,
      "learning_rate": 0.00014106904231625834,
      "loss": 2.1392,
      "step": 1333
    },
    {
      "epoch": 0.29644444444444445,
      "grad_norm": 0.770871639251709,
      "learning_rate": 0.00014102449888641426,
      "loss": 0.8635,
      "step": 1334
    },
    {
      "epoch": 0.2966666666666667,
      "grad_norm": 0.08359196037054062,
      "learning_rate": 0.00014097995545657018,
      "loss": 0.0214,
      "step": 1335
    },
    {
      "epoch": 0.29688888888888887,
      "grad_norm": 0.07316497713327408,
      "learning_rate": 0.00014093541202672607,
      "loss": 0.0209,
      "step": 1336
    },
    {
      "epoch": 0.2971111111111111,
      "grad_norm": 1.0184942483901978,
      "learning_rate": 0.00014089086859688196,
      "loss": 1.8638,
      "step": 1337
    },
    {
      "epoch": 0.29733333333333334,
      "grad_norm": 0.7653923630714417,
      "learning_rate": 0.00014084632516703785,
      "loss": 1.0134,
      "step": 1338
    },
    {
      "epoch": 0.2975555555555556,
      "grad_norm": 0.07775040715932846,
      "learning_rate": 0.00014080178173719377,
      "loss": 0.0195,
      "step": 1339
    },
    {
      "epoch": 0.29777777777777775,
      "grad_norm": 1.0716129541397095,
      "learning_rate": 0.0001407572383073497,
      "loss": 1.75,
      "step": 1340
    },
    {
      "epoch": 0.298,
      "grad_norm": 1.022926688194275,
      "learning_rate": 0.00014071269487750558,
      "loss": 1.6189,
      "step": 1341
    },
    {
      "epoch": 0.2982222222222222,
      "grad_norm": 1.1022212505340576,
      "learning_rate": 0.00014066815144766148,
      "loss": 1.5175,
      "step": 1342
    },
    {
      "epoch": 0.29844444444444446,
      "grad_norm": 0.8157255053520203,
      "learning_rate": 0.00014062360801781737,
      "loss": 0.0359,
      "step": 1343
    },
    {
      "epoch": 0.2986666666666667,
      "grad_norm": 1.034464955329895,
      "learning_rate": 0.00014057906458797329,
      "loss": 1.6205,
      "step": 1344
    },
    {
      "epoch": 0.29888888888888887,
      "grad_norm": 1.1047078371047974,
      "learning_rate": 0.00014053452115812918,
      "loss": 1.4756,
      "step": 1345
    },
    {
      "epoch": 0.2991111111111111,
      "grad_norm": 1.0624406337738037,
      "learning_rate": 0.0001404899777282851,
      "loss": 1.4263,
      "step": 1346
    },
    {
      "epoch": 0.29933333333333334,
      "grad_norm": 0.9821346402168274,
      "learning_rate": 0.000140445434298441,
      "loss": 1.4288,
      "step": 1347
    },
    {
      "epoch": 0.2995555555555556,
      "grad_norm": 1.1753016710281372,
      "learning_rate": 0.00014040089086859688,
      "loss": 1.6721,
      "step": 1348
    },
    {
      "epoch": 0.29977777777777775,
      "grad_norm": 1.0248849391937256,
      "learning_rate": 0.0001403563474387528,
      "loss": 1.3122,
      "step": 1349
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0691323280334473,
      "learning_rate": 0.0001403118040089087,
      "loss": 1.3309,
      "step": 1350
    },
    {
      "epoch": 0.3002222222222222,
      "grad_norm": 0.055053651332855225,
      "learning_rate": 0.00014026726057906458,
      "loss": 0.0123,
      "step": 1351
    },
    {
      "epoch": 0.30044444444444446,
      "grad_norm": 0.05664655193686485,
      "learning_rate": 0.0001402227171492205,
      "loss": 0.0123,
      "step": 1352
    },
    {
      "epoch": 0.3006666666666667,
      "grad_norm": 0.8870833516120911,
      "learning_rate": 0.0001401781737193764,
      "loss": 2.1987,
      "step": 1353
    },
    {
      "epoch": 0.3008888888888889,
      "grad_norm": 0.5931410193443298,
      "learning_rate": 0.0001401336302895323,
      "loss": 0.9983,
      "step": 1354
    },
    {
      "epoch": 0.3011111111111111,
      "grad_norm": 0.8538689017295837,
      "learning_rate": 0.0001400890868596882,
      "loss": 2.2606,
      "step": 1355
    },
    {
      "epoch": 0.30133333333333334,
      "grad_norm": 0.05451615899801254,
      "learning_rate": 0.0001400445434298441,
      "loss": 0.0118,
      "step": 1356
    },
    {
      "epoch": 0.3015555555555556,
      "grad_norm": 0.881386935710907,
      "learning_rate": 0.00014,
      "loss": 2.3174,
      "step": 1357
    },
    {
      "epoch": 0.30177777777777776,
      "grad_norm": 0.8224138617515564,
      "learning_rate": 0.0001399554565701559,
      "loss": 0.0327,
      "step": 1358
    },
    {
      "epoch": 0.302,
      "grad_norm": 0.08270707726478577,
      "learning_rate": 0.00013991091314031183,
      "loss": 0.0139,
      "step": 1359
    },
    {
      "epoch": 0.3022222222222222,
      "grad_norm": 0.0797787606716156,
      "learning_rate": 0.00013986636971046772,
      "loss": 0.014,
      "step": 1360
    },
    {
      "epoch": 0.30244444444444446,
      "grad_norm": 0.6289453506469727,
      "learning_rate": 0.0001398218262806236,
      "loss": 0.973,
      "step": 1361
    },
    {
      "epoch": 0.30266666666666664,
      "grad_norm": 0.9311420917510986,
      "learning_rate": 0.0001397772828507795,
      "loss": 2.1816,
      "step": 1362
    },
    {
      "epoch": 0.3028888888888889,
      "grad_norm": 0.8370685577392578,
      "learning_rate": 0.00013973273942093542,
      "loss": 2.128,
      "step": 1363
    },
    {
      "epoch": 0.3031111111111111,
      "grad_norm": 0.7551019787788391,
      "learning_rate": 0.00013968819599109134,
      "loss": 1.9265,
      "step": 1364
    },
    {
      "epoch": 0.30333333333333334,
      "grad_norm": 0.8512743711471558,
      "learning_rate": 0.00013964365256124723,
      "loss": 1.9408,
      "step": 1365
    },
    {
      "epoch": 0.3035555555555556,
      "grad_norm": 0.9641419649124146,
      "learning_rate": 0.00013959910913140312,
      "loss": 2.1117,
      "step": 1366
    },
    {
      "epoch": 0.30377777777777776,
      "grad_norm": 0.841379702091217,
      "learning_rate": 0.00013955456570155901,
      "loss": 1.7673,
      "step": 1367
    },
    {
      "epoch": 0.304,
      "grad_norm": 0.660455584526062,
      "learning_rate": 0.00013951002227171493,
      "loss": 0.9827,
      "step": 1368
    },
    {
      "epoch": 0.3042222222222222,
      "grad_norm": 0.11817409098148346,
      "learning_rate": 0.00013946547884187082,
      "loss": 0.0225,
      "step": 1369
    },
    {
      "epoch": 0.30444444444444446,
      "grad_norm": 0.7172605395317078,
      "learning_rate": 0.00013942093541202674,
      "loss": 1.3491,
      "step": 1370
    },
    {
      "epoch": 0.30466666666666664,
      "grad_norm": 0.9483502507209778,
      "learning_rate": 0.00013937639198218264,
      "loss": 2.0678,
      "step": 1371
    },
    {
      "epoch": 0.3048888888888889,
      "grad_norm": 1.0518776178359985,
      "learning_rate": 0.00013933184855233853,
      "loss": 2.0944,
      "step": 1372
    },
    {
      "epoch": 0.3051111111111111,
      "grad_norm": 0.9454103708267212,
      "learning_rate": 0.00013928730512249445,
      "loss": 1.8151,
      "step": 1373
    },
    {
      "epoch": 0.30533333333333335,
      "grad_norm": 0.6248193383216858,
      "learning_rate": 0.00013924276169265034,
      "loss": 0.9771,
      "step": 1374
    },
    {
      "epoch": 0.3055555555555556,
      "grad_norm": 0.08333655446767807,
      "learning_rate": 0.00013919821826280623,
      "loss": 0.0158,
      "step": 1375
    },
    {
      "epoch": 0.30577777777777776,
      "grad_norm": 0.08563945442438126,
      "learning_rate": 0.00013915367483296215,
      "loss": 0.0191,
      "step": 1376
    },
    {
      "epoch": 0.306,
      "grad_norm": 0.9253622889518738,
      "learning_rate": 0.00013910913140311804,
      "loss": 1.7993,
      "step": 1377
    },
    {
      "epoch": 0.30622222222222223,
      "grad_norm": 0.9118351936340332,
      "learning_rate": 0.00013906458797327396,
      "loss": 1.855,
      "step": 1378
    },
    {
      "epoch": 0.30644444444444446,
      "grad_norm": 1.037188172340393,
      "learning_rate": 0.00013902004454342985,
      "loss": 1.8305,
      "step": 1379
    },
    {
      "epoch": 0.30666666666666664,
      "grad_norm": 0.9743615984916687,
      "learning_rate": 0.00013897550111358574,
      "loss": 1.7063,
      "step": 1380
    },
    {
      "epoch": 0.3068888888888889,
      "grad_norm": 1.013454794883728,
      "learning_rate": 0.00013893095768374164,
      "loss": 1.9382,
      "step": 1381
    },
    {
      "epoch": 0.3071111111111111,
      "grad_norm": 0.9891089797019958,
      "learning_rate": 0.00013888641425389758,
      "loss": 2.0535,
      "step": 1382
    },
    {
      "epoch": 0.30733333333333335,
      "grad_norm": 0.10896483063697815,
      "learning_rate": 0.00013884187082405347,
      "loss": 0.02,
      "step": 1383
    },
    {
      "epoch": 0.3075555555555556,
      "grad_norm": 0.07615262269973755,
      "learning_rate": 0.00013879732739420936,
      "loss": 0.02,
      "step": 1384
    },
    {
      "epoch": 0.30777777777777776,
      "grad_norm": 0.6620470881462097,
      "learning_rate": 0.00013875278396436526,
      "loss": 0.8813,
      "step": 1385
    },
    {
      "epoch": 0.308,
      "grad_norm": 1.031408667564392,
      "learning_rate": 0.00013870824053452115,
      "loss": 1.9278,
      "step": 1386
    },
    {
      "epoch": 0.30822222222222223,
      "grad_norm": 0.08425939083099365,
      "learning_rate": 0.00013866369710467707,
      "loss": 0.0183,
      "step": 1387
    },
    {
      "epoch": 0.30844444444444447,
      "grad_norm": 0.07298826426267624,
      "learning_rate": 0.00013861915367483296,
      "loss": 0.0176,
      "step": 1388
    },
    {
      "epoch": 0.30866666666666664,
      "grad_norm": 0.6326761245727539,
      "learning_rate": 0.00013857461024498888,
      "loss": 0.7375,
      "step": 1389
    },
    {
      "epoch": 0.3088888888888889,
      "grad_norm": 0.7388807535171509,
      "learning_rate": 0.00013853006681514477,
      "loss": 0.8331,
      "step": 1390
    },
    {
      "epoch": 0.3091111111111111,
      "grad_norm": 1.3735177516937256,
      "learning_rate": 0.00013848552338530066,
      "loss": 1.5142,
      "step": 1391
    },
    {
      "epoch": 0.30933333333333335,
      "grad_norm": 1.0506902933120728,
      "learning_rate": 0.00013844097995545658,
      "loss": 1.5543,
      "step": 1392
    },
    {
      "epoch": 0.30955555555555553,
      "grad_norm": 0.9635607600212097,
      "learning_rate": 0.00013839643652561247,
      "loss": 1.3675,
      "step": 1393
    },
    {
      "epoch": 0.30977777777777776,
      "grad_norm": 1.2587249279022217,
      "learning_rate": 0.00013835189309576836,
      "loss": 1.7267,
      "step": 1394
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1692713499069214,
      "learning_rate": 0.00013830734966592428,
      "loss": 1.442,
      "step": 1395
    },
    {
      "epoch": 0.31022222222222223,
      "grad_norm": 1.0046931505203247,
      "learning_rate": 0.0001382628062360802,
      "loss": 1.2172,
      "step": 1396
    },
    {
      "epoch": 0.31044444444444447,
      "grad_norm": 0.7264860272407532,
      "learning_rate": 0.0001382182628062361,
      "loss": 0.5203,
      "step": 1397
    },
    {
      "epoch": 0.31066666666666665,
      "grad_norm": 1.1405805349349976,
      "learning_rate": 0.00013817371937639199,
      "loss": 1.0399,
      "step": 1398
    },
    {
      "epoch": 0.3108888888888889,
      "grad_norm": 1.1510692834854126,
      "learning_rate": 0.00013812917594654788,
      "loss": 1.1632,
      "step": 1399
    },
    {
      "epoch": 0.3111111111111111,
      "grad_norm": 1.4786934852600098,
      "learning_rate": 0.00013808463251670377,
      "loss": 1.1631,
      "step": 1400
    },
    {
      "epoch": 0.31133333333333335,
      "grad_norm": 0.7321925759315491,
      "learning_rate": 0.00013804008908685972,
      "loss": 1.2846,
      "step": 1401
    },
    {
      "epoch": 0.31155555555555553,
      "grad_norm": 0.8748204708099365,
      "learning_rate": 0.0001379955456570156,
      "loss": 1.9939,
      "step": 1402
    },
    {
      "epoch": 0.31177777777777776,
      "grad_norm": 0.6573612689971924,
      "learning_rate": 0.0001379510022271715,
      "loss": 1.1956,
      "step": 1403
    },
    {
      "epoch": 0.312,
      "grad_norm": 0.049669332802295685,
      "learning_rate": 0.0001379064587973274,
      "loss": 0.0108,
      "step": 1404
    },
    {
      "epoch": 0.31222222222222223,
      "grad_norm": 0.5930540561676025,
      "learning_rate": 0.0001378619153674833,
      "loss": 1.1255,
      "step": 1405
    },
    {
      "epoch": 0.31244444444444447,
      "grad_norm": 0.585092306137085,
      "learning_rate": 0.0001378173719376392,
      "loss": 0.9278,
      "step": 1406
    },
    {
      "epoch": 0.31266666666666665,
      "grad_norm": 0.7510188221931458,
      "learning_rate": 0.00013777282850779512,
      "loss": 1.3691,
      "step": 1407
    },
    {
      "epoch": 0.3128888888888889,
      "grad_norm": 0.675545334815979,
      "learning_rate": 0.000137728285077951,
      "loss": 0.0309,
      "step": 1408
    },
    {
      "epoch": 0.3131111111111111,
      "grad_norm": 0.13560031354427338,
      "learning_rate": 0.0001376837416481069,
      "loss": 0.0159,
      "step": 1409
    },
    {
      "epoch": 0.31333333333333335,
      "grad_norm": 0.8208870887756348,
      "learning_rate": 0.00013763919821826282,
      "loss": 1.9514,
      "step": 1410
    },
    {
      "epoch": 0.31355555555555553,
      "grad_norm": 0.8589663505554199,
      "learning_rate": 0.00013759465478841871,
      "loss": 2.0799,
      "step": 1411
    },
    {
      "epoch": 0.31377777777777777,
      "grad_norm": 0.9203690886497498,
      "learning_rate": 0.0001375501113585746,
      "loss": 2.1629,
      "step": 1412
    },
    {
      "epoch": 0.314,
      "grad_norm": 1.0017417669296265,
      "learning_rate": 0.00013750556792873053,
      "loss": 2.403,
      "step": 1413
    },
    {
      "epoch": 0.31422222222222224,
      "grad_norm": 0.9020522236824036,
      "learning_rate": 0.00013746102449888642,
      "loss": 1.8001,
      "step": 1414
    },
    {
      "epoch": 0.31444444444444447,
      "grad_norm": 0.8959416747093201,
      "learning_rate": 0.00013741648106904234,
      "loss": 1.9272,
      "step": 1415
    },
    {
      "epoch": 0.31466666666666665,
      "grad_norm": 0.8453971147537231,
      "learning_rate": 0.00013737193763919823,
      "loss": 2.0584,
      "step": 1416
    },
    {
      "epoch": 0.3148888888888889,
      "grad_norm": 1.0558977127075195,
      "learning_rate": 0.00013732739420935412,
      "loss": 2.1863,
      "step": 1417
    },
    {
      "epoch": 0.3151111111111111,
      "grad_norm": 0.9428475499153137,
      "learning_rate": 0.00013728285077951,
      "loss": 1.8684,
      "step": 1418
    },
    {
      "epoch": 0.31533333333333335,
      "grad_norm": 1.0137181282043457,
      "learning_rate": 0.00013723830734966593,
      "loss": 1.9836,
      "step": 1419
    },
    {
      "epoch": 0.31555555555555553,
      "grad_norm": 0.9282086491584778,
      "learning_rate": 0.00013719376391982185,
      "loss": 1.9903,
      "step": 1420
    },
    {
      "epoch": 0.31577777777777777,
      "grad_norm": 0.6798798441886902,
      "learning_rate": 0.00013714922048997774,
      "loss": 0.0244,
      "step": 1421
    },
    {
      "epoch": 0.316,
      "grad_norm": 0.07246652245521545,
      "learning_rate": 0.00013710467706013363,
      "loss": 0.0155,
      "step": 1422
    },
    {
      "epoch": 0.31622222222222224,
      "grad_norm": 0.7266292572021484,
      "learning_rate": 0.00013706013363028952,
      "loss": 0.74,
      "step": 1423
    },
    {
      "epoch": 0.3164444444444444,
      "grad_norm": 0.30071863532066345,
      "learning_rate": 0.00013701559020044544,
      "loss": 0.0299,
      "step": 1424
    },
    {
      "epoch": 0.31666666666666665,
      "grad_norm": 0.6319347620010376,
      "learning_rate": 0.00013697104677060136,
      "loss": 0.9216,
      "step": 1425
    },
    {
      "epoch": 0.3168888888888889,
      "grad_norm": 0.9551006555557251,
      "learning_rate": 0.00013692650334075725,
      "loss": 1.9096,
      "step": 1426
    },
    {
      "epoch": 0.3171111111111111,
      "grad_norm": 0.9880414009094238,
      "learning_rate": 0.00013688195991091315,
      "loss": 1.8509,
      "step": 1427
    },
    {
      "epoch": 0.31733333333333336,
      "grad_norm": 0.9100226163864136,
      "learning_rate": 0.00013683741648106904,
      "loss": 1.8315,
      "step": 1428
    },
    {
      "epoch": 0.31755555555555554,
      "grad_norm": 1.0499440431594849,
      "learning_rate": 0.00013679287305122496,
      "loss": 2.1878,
      "step": 1429
    },
    {
      "epoch": 0.31777777777777777,
      "grad_norm": 1.0114299058914185,
      "learning_rate": 0.00013674832962138085,
      "loss": 1.5405,
      "step": 1430
    },
    {
      "epoch": 0.318,
      "grad_norm": 0.0601423941552639,
      "learning_rate": 0.00013670378619153677,
      "loss": 0.018,
      "step": 1431
    },
    {
      "epoch": 0.31822222222222224,
      "grad_norm": 0.05997453257441521,
      "learning_rate": 0.00013665924276169266,
      "loss": 0.0182,
      "step": 1432
    },
    {
      "epoch": 0.3184444444444444,
      "grad_norm": 0.8638304471969604,
      "learning_rate": 0.00013661469933184855,
      "loss": 0.829,
      "step": 1433
    },
    {
      "epoch": 0.31866666666666665,
      "grad_norm": 0.9641528725624084,
      "learning_rate": 0.00013657015590200447,
      "loss": 1.7045,
      "step": 1434
    },
    {
      "epoch": 0.3188888888888889,
      "grad_norm": 1.0239591598510742,
      "learning_rate": 0.00013652561247216036,
      "loss": 1.6333,
      "step": 1435
    },
    {
      "epoch": 0.3191111111111111,
      "grad_norm": 0.07839752733707428,
      "learning_rate": 0.00013648106904231625,
      "loss": 0.0199,
      "step": 1436
    },
    {
      "epoch": 0.31933333333333336,
      "grad_norm": 0.07986389845609665,
      "learning_rate": 0.00013643652561247217,
      "loss": 0.0205,
      "step": 1437
    },
    {
      "epoch": 0.31955555555555554,
      "grad_norm": 0.0760846883058548,
      "learning_rate": 0.00013639198218262806,
      "loss": 0.0196,
      "step": 1438
    },
    {
      "epoch": 0.31977777777777777,
      "grad_norm": 1.220151424407959,
      "learning_rate": 0.00013634743875278398,
      "loss": 1.8283,
      "step": 1439
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1636601686477661,
      "learning_rate": 0.00013630289532293988,
      "loss": 1.661,
      "step": 1440
    },
    {
      "epoch": 0.32022222222222224,
      "grad_norm": 1.003257393836975,
      "learning_rate": 0.00013625835189309577,
      "loss": 1.5869,
      "step": 1441
    },
    {
      "epoch": 0.3204444444444444,
      "grad_norm": 0.850940465927124,
      "learning_rate": 0.00013621380846325166,
      "loss": 0.8508,
      "step": 1442
    },
    {
      "epoch": 0.32066666666666666,
      "grad_norm": 0.8343439102172852,
      "learning_rate": 0.00013616926503340758,
      "loss": 0.9602,
      "step": 1443
    },
    {
      "epoch": 0.3208888888888889,
      "grad_norm": 1.075269103050232,
      "learning_rate": 0.0001361247216035635,
      "loss": 1.7471,
      "step": 1444
    },
    {
      "epoch": 0.3211111111111111,
      "grad_norm": 1.0553182363510132,
      "learning_rate": 0.0001360801781737194,
      "loss": 1.465,
      "step": 1445
    },
    {
      "epoch": 0.32133333333333336,
      "grad_norm": 1.0383347272872925,
      "learning_rate": 0.00013603563474387528,
      "loss": 1.332,
      "step": 1446
    },
    {
      "epoch": 0.32155555555555554,
      "grad_norm": 1.3711236715316772,
      "learning_rate": 0.00013599109131403117,
      "loss": 1.7505,
      "step": 1447
    },
    {
      "epoch": 0.3217777777777778,
      "grad_norm": 1.1211059093475342,
      "learning_rate": 0.0001359465478841871,
      "loss": 1.3927,
      "step": 1448
    },
    {
      "epoch": 0.322,
      "grad_norm": 1.0040308237075806,
      "learning_rate": 0.000135902004454343,
      "loss": 1.2536,
      "step": 1449
    },
    {
      "epoch": 0.32222222222222224,
      "grad_norm": 0.7612625956535339,
      "learning_rate": 0.0001358574610244989,
      "loss": 0.4927,
      "step": 1450
    },
    {
      "epoch": 0.3224444444444444,
      "grad_norm": 0.8916938900947571,
      "learning_rate": 0.0001358129175946548,
      "loss": 2.2834,
      "step": 1451
    },
    {
      "epoch": 0.32266666666666666,
      "grad_norm": 0.6277791261672974,
      "learning_rate": 0.00013576837416481069,
      "loss": 1.0415,
      "step": 1452
    },
    {
      "epoch": 0.3228888888888889,
      "grad_norm": 0.8357473611831665,
      "learning_rate": 0.0001357238307349666,
      "loss": 2.2224,
      "step": 1453
    },
    {
      "epoch": 0.3231111111111111,
      "grad_norm": 0.051700394600629807,
      "learning_rate": 0.0001356792873051225,
      "loss": 0.0112,
      "step": 1454
    },
    {
      "epoch": 0.3233333333333333,
      "grad_norm": 0.6184151768684387,
      "learning_rate": 0.00013563474387527841,
      "loss": 1.1171,
      "step": 1455
    },
    {
      "epoch": 0.32355555555555554,
      "grad_norm": 0.8683602809906006,
      "learning_rate": 0.0001355902004454343,
      "loss": 2.2732,
      "step": 1456
    },
    {
      "epoch": 0.3237777777777778,
      "grad_norm": 0.810332179069519,
      "learning_rate": 0.0001355456570155902,
      "loss": 2.1909,
      "step": 1457
    },
    {
      "epoch": 0.324,
      "grad_norm": 0.9960488677024841,
      "learning_rate": 0.00013550111358574612,
      "loss": 2.3706,
      "step": 1458
    },
    {
      "epoch": 0.32422222222222224,
      "grad_norm": 0.8388908505439758,
      "learning_rate": 0.000135456570155902,
      "loss": 2.0544,
      "step": 1459
    },
    {
      "epoch": 0.3244444444444444,
      "grad_norm": 1.571382999420166,
      "learning_rate": 0.0001354120267260579,
      "loss": 0.0447,
      "step": 1460
    },
    {
      "epoch": 0.32466666666666666,
      "grad_norm": 0.8016864657402039,
      "learning_rate": 0.0001353674832962138,
      "loss": 1.9393,
      "step": 1461
    },
    {
      "epoch": 0.3248888888888889,
      "grad_norm": 0.973527729511261,
      "learning_rate": 0.0001353229398663697,
      "loss": 2.0011,
      "step": 1462
    },
    {
      "epoch": 0.32511111111111113,
      "grad_norm": 0.896851658821106,
      "learning_rate": 0.00013527839643652563,
      "loss": 2.0065,
      "step": 1463
    },
    {
      "epoch": 0.3253333333333333,
      "grad_norm": 1.0477668046951294,
      "learning_rate": 0.00013523385300668152,
      "loss": 1.781,
      "step": 1464
    },
    {
      "epoch": 0.32555555555555554,
      "grad_norm": 0.9915692806243896,
      "learning_rate": 0.00013518930957683741,
      "loss": 2.0669,
      "step": 1465
    },
    {
      "epoch": 0.3257777777777778,
      "grad_norm": 0.1883118599653244,
      "learning_rate": 0.0001351447661469933,
      "loss": 0.0247,
      "step": 1466
    },
    {
      "epoch": 0.326,
      "grad_norm": 0.69676274061203,
      "learning_rate": 0.00013510022271714922,
      "loss": 1.0799,
      "step": 1467
    },
    {
      "epoch": 0.32622222222222225,
      "grad_norm": 0.6046315431594849,
      "learning_rate": 0.00013505567928730514,
      "loss": 0.9688,
      "step": 1468
    },
    {
      "epoch": 0.3264444444444444,
      "grad_norm": 1.0804486274719238,
      "learning_rate": 0.00013501113585746104,
      "loss": 1.9587,
      "step": 1469
    },
    {
      "epoch": 0.32666666666666666,
      "grad_norm": 0.8974558115005493,
      "learning_rate": 0.00013496659242761693,
      "loss": 1.7077,
      "step": 1470
    },
    {
      "epoch": 0.3268888888888889,
      "grad_norm": 0.8613162040710449,
      "learning_rate": 0.00013492204899777282,
      "loss": 1.8079,
      "step": 1471
    },
    {
      "epoch": 0.32711111111111113,
      "grad_norm": 0.9599412083625793,
      "learning_rate": 0.00013487750556792874,
      "loss": 2.1691,
      "step": 1472
    },
    {
      "epoch": 0.3273333333333333,
      "grad_norm": 1.0441149473190308,
      "learning_rate": 0.00013483296213808463,
      "loss": 2.1019,
      "step": 1473
    },
    {
      "epoch": 0.32755555555555554,
      "grad_norm": 0.9434587359428406,
      "learning_rate": 0.00013478841870824055,
      "loss": 1.6825,
      "step": 1474
    },
    {
      "epoch": 0.3277777777777778,
      "grad_norm": 0.17473283410072327,
      "learning_rate": 0.00013474387527839644,
      "loss": 0.0173,
      "step": 1475
    },
    {
      "epoch": 0.328,
      "grad_norm": 0.10064394772052765,
      "learning_rate": 0.00013469933184855233,
      "loss": 0.0165,
      "step": 1476
    },
    {
      "epoch": 0.32822222222222225,
      "grad_norm": 0.6770084500312805,
      "learning_rate": 0.00013465478841870825,
      "loss": 0.9154,
      "step": 1477
    },
    {
      "epoch": 0.32844444444444443,
      "grad_norm": 1.1016879081726074,
      "learning_rate": 0.00013461024498886414,
      "loss": 1.7554,
      "step": 1478
    },
    {
      "epoch": 0.32866666666666666,
      "grad_norm": 1.096827745437622,
      "learning_rate": 0.00013456570155902003,
      "loss": 1.6961,
      "step": 1479
    },
    {
      "epoch": 0.3288888888888889,
      "grad_norm": 0.9961020350456238,
      "learning_rate": 0.00013452115812917595,
      "loss": 1.9033,
      "step": 1480
    },
    {
      "epoch": 0.32911111111111113,
      "grad_norm": 0.08242473751306534,
      "learning_rate": 0.00013447661469933187,
      "loss": 0.0181,
      "step": 1481
    },
    {
      "epoch": 0.3293333333333333,
      "grad_norm": 0.07623957842588425,
      "learning_rate": 0.00013443207126948776,
      "loss": 0.018,
      "step": 1482
    },
    {
      "epoch": 0.32955555555555555,
      "grad_norm": 0.06432683765888214,
      "learning_rate": 0.00013438752783964366,
      "loss": 0.0177,
      "step": 1483
    },
    {
      "epoch": 0.3297777777777778,
      "grad_norm": 0.06077761575579643,
      "learning_rate": 0.00013434298440979955,
      "loss": 0.0173,
      "step": 1484
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.0834859162569046,
      "learning_rate": 0.00013429844097995544,
      "loss": 0.0173,
      "step": 1485
    },
    {
      "epoch": 0.3302222222222222,
      "grad_norm": 1.0607445240020752,
      "learning_rate": 0.00013425389755011139,
      "loss": 1.8938,
      "step": 1486
    },
    {
      "epoch": 0.33044444444444443,
      "grad_norm": 0.7781770825386047,
      "learning_rate": 0.00013420935412026728,
      "loss": 0.8223,
      "step": 1487
    },
    {
      "epoch": 0.33066666666666666,
      "grad_norm": 0.1246686577796936,
      "learning_rate": 0.00013416481069042317,
      "loss": 0.0205,
      "step": 1488
    },
    {
      "epoch": 0.3308888888888889,
      "grad_norm": 0.10837747901678085,
      "learning_rate": 0.00013412026726057906,
      "loss": 0.0194,
      "step": 1489
    },
    {
      "epoch": 0.33111111111111113,
      "grad_norm": 0.09982477873563766,
      "learning_rate": 0.00013407572383073498,
      "loss": 0.0186,
      "step": 1490
    },
    {
      "epoch": 0.3313333333333333,
      "grad_norm": 0.9533151984214783,
      "learning_rate": 0.00013403118040089087,
      "loss": 1.656,
      "step": 1491
    },
    {
      "epoch": 0.33155555555555555,
      "grad_norm": 0.8519287109375,
      "learning_rate": 0.0001339866369710468,
      "loss": 0.7699,
      "step": 1492
    },
    {
      "epoch": 0.3317777777777778,
      "grad_norm": 1.026831865310669,
      "learning_rate": 0.00013394209354120268,
      "loss": 1.4599,
      "step": 1493
    },
    {
      "epoch": 0.332,
      "grad_norm": 1.4534752368927002,
      "learning_rate": 0.00013389755011135857,
      "loss": 2.2483,
      "step": 1494
    },
    {
      "epoch": 0.3322222222222222,
      "grad_norm": 1.1159143447875977,
      "learning_rate": 0.0001338530066815145,
      "loss": 1.3013,
      "step": 1495
    },
    {
      "epoch": 0.33244444444444443,
      "grad_norm": 0.7417425513267517,
      "learning_rate": 0.00013380846325167039,
      "loss": 0.6602,
      "step": 1496
    },
    {
      "epoch": 0.33266666666666667,
      "grad_norm": 0.7993981242179871,
      "learning_rate": 0.00013376391982182628,
      "loss": 0.5802,
      "step": 1497
    },
    {
      "epoch": 0.3328888888888889,
      "grad_norm": 0.962468147277832,
      "learning_rate": 0.0001337193763919822,
      "loss": 1.0806,
      "step": 1498
    },
    {
      "epoch": 0.33311111111111114,
      "grad_norm": 1.2233699560165405,
      "learning_rate": 0.0001336748329621381,
      "loss": 1.338,
      "step": 1499
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 0.9210373163223267,
      "learning_rate": 0.000133630289532294,
      "loss": 1.0784,
      "step": 1500
    },
    {
      "epoch": 0.33355555555555555,
      "grad_norm": 0.5745635032653809,
      "learning_rate": 0.0001335857461024499,
      "loss": 1.0502,
      "step": 1501
    },
    {
      "epoch": 0.3337777777777778,
      "grad_norm": 0.06606602668762207,
      "learning_rate": 0.0001335412026726058,
      "loss": 0.0111,
      "step": 1502
    },
    {
      "epoch": 0.334,
      "grad_norm": 0.5875163078308105,
      "learning_rate": 0.00013349665924276168,
      "loss": 1.1621,
      "step": 1503
    },
    {
      "epoch": 0.3342222222222222,
      "grad_norm": 0.0689874216914177,
      "learning_rate": 0.0001334521158129176,
      "loss": 0.0108,
      "step": 1504
    },
    {
      "epoch": 0.33444444444444443,
      "grad_norm": 0.4859806299209595,
      "learning_rate": 0.00013340757238307352,
      "loss": 0.9335,
      "step": 1505
    },
    {
      "epoch": 0.33466666666666667,
      "grad_norm": 0.7597318291664124,
      "learning_rate": 0.0001333630289532294,
      "loss": 0.9311,
      "step": 1506
    },
    {
      "epoch": 0.3348888888888889,
      "grad_norm": 0.9831368327140808,
      "learning_rate": 0.0001333184855233853,
      "loss": 2.4321,
      "step": 1507
    },
    {
      "epoch": 0.33511111111111114,
      "grad_norm": 0.9348986148834229,
      "learning_rate": 0.0001332739420935412,
      "loss": 1.38,
      "step": 1508
    },
    {
      "epoch": 0.3353333333333333,
      "grad_norm": 0.7181216478347778,
      "learning_rate": 0.00013322939866369711,
      "loss": 0.0187,
      "step": 1509
    },
    {
      "epoch": 0.33555555555555555,
      "grad_norm": 1.021496057510376,
      "learning_rate": 0.00013318485523385303,
      "loss": 2.2939,
      "step": 1510
    },
    {
      "epoch": 0.3357777777777778,
      "grad_norm": 0.8876065015792847,
      "learning_rate": 0.00013314031180400893,
      "loss": 1.9854,
      "step": 1511
    },
    {
      "epoch": 0.336,
      "grad_norm": 0.9143947958946228,
      "learning_rate": 0.00013309576837416482,
      "loss": 1.9859,
      "step": 1512
    },
    {
      "epoch": 0.3362222222222222,
      "grad_norm": 0.9279030561447144,
      "learning_rate": 0.0001330512249443207,
      "loss": 2.1078,
      "step": 1513
    },
    {
      "epoch": 0.33644444444444443,
      "grad_norm": 0.9055482149124146,
      "learning_rate": 0.00013300668151447663,
      "loss": 2.1865,
      "step": 1514
    },
    {
      "epoch": 0.33666666666666667,
      "grad_norm": 0.9191523790359497,
      "learning_rate": 0.00013296213808463252,
      "loss": 2.2237,
      "step": 1515
    },
    {
      "epoch": 0.3368888888888889,
      "grad_norm": 0.8818835020065308,
      "learning_rate": 0.00013291759465478844,
      "loss": 1.8723,
      "step": 1516
    },
    {
      "epoch": 0.3371111111111111,
      "grad_norm": 0.9102388024330139,
      "learning_rate": 0.00013287305122494433,
      "loss": 1.9411,
      "step": 1517
    },
    {
      "epoch": 0.3373333333333333,
      "grad_norm": 1.2459015846252441,
      "learning_rate": 0.00013282850779510022,
      "loss": 1.9948,
      "step": 1518
    },
    {
      "epoch": 0.33755555555555555,
      "grad_norm": 0.9848894476890564,
      "learning_rate": 0.00013278396436525614,
      "loss": 2.0083,
      "step": 1519
    },
    {
      "epoch": 0.3377777777777778,
      "grad_norm": 0.900453507900238,
      "learning_rate": 0.00013273942093541203,
      "loss": 1.9048,
      "step": 1520
    },
    {
      "epoch": 0.338,
      "grad_norm": 0.9033414721488953,
      "learning_rate": 0.00013269487750556792,
      "loss": 1.8965,
      "step": 1521
    },
    {
      "epoch": 0.3382222222222222,
      "grad_norm": 0.8703753352165222,
      "learning_rate": 0.00013265033407572384,
      "loss": 2.0901,
      "step": 1522
    },
    {
      "epoch": 0.33844444444444444,
      "grad_norm": 0.15832370519638062,
      "learning_rate": 0.00013260579064587974,
      "loss": 0.0187,
      "step": 1523
    },
    {
      "epoch": 0.33866666666666667,
      "grad_norm": 0.11121733486652374,
      "learning_rate": 0.00013256124721603565,
      "loss": 0.0181,
      "step": 1524
    },
    {
      "epoch": 0.3388888888888889,
      "grad_norm": 0.131880983710289,
      "learning_rate": 0.00013251670378619155,
      "loss": 0.0212,
      "step": 1525
    },
    {
      "epoch": 0.3391111111111111,
      "grad_norm": 1.021878957748413,
      "learning_rate": 0.00013247216035634744,
      "loss": 2.0158,
      "step": 1526
    },
    {
      "epoch": 0.3393333333333333,
      "grad_norm": 0.9855987429618835,
      "learning_rate": 0.00013242761692650333,
      "loss": 1.7378,
      "step": 1527
    },
    {
      "epoch": 0.33955555555555555,
      "grad_norm": 0.9633174538612366,
      "learning_rate": 0.00013238307349665925,
      "loss": 1.9319,
      "step": 1528
    },
    {
      "epoch": 0.3397777777777778,
      "grad_norm": 0.9162039160728455,
      "learning_rate": 0.00013233853006681517,
      "loss": 1.6992,
      "step": 1529
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.06041941046714783,
      "learning_rate": 0.00013229398663697106,
      "loss": 0.0164,
      "step": 1530
    },
    {
      "epoch": 0.3402222222222222,
      "grad_norm": 0.06718610972166061,
      "learning_rate": 0.00013224944320712695,
      "loss": 0.0163,
      "step": 1531
    },
    {
      "epoch": 0.34044444444444444,
      "grad_norm": 0.06589064002037048,
      "learning_rate": 0.00013220489977728284,
      "loss": 0.0165,
      "step": 1532
    },
    {
      "epoch": 0.3406666666666667,
      "grad_norm": 0.7173296809196472,
      "learning_rate": 0.00013216035634743876,
      "loss": 0.9545,
      "step": 1533
    },
    {
      "epoch": 0.3408888888888889,
      "grad_norm": 0.6503753662109375,
      "learning_rate": 0.00013211581291759465,
      "loss": 0.8755,
      "step": 1534
    },
    {
      "epoch": 0.3411111111111111,
      "grad_norm": 0.09492537379264832,
      "learning_rate": 0.00013207126948775057,
      "loss": 0.0178,
      "step": 1535
    },
    {
      "epoch": 0.3413333333333333,
      "grad_norm": 0.07086139172315598,
      "learning_rate": 0.00013202672605790646,
      "loss": 0.0174,
      "step": 1536
    },
    {
      "epoch": 0.34155555555555556,
      "grad_norm": 0.7318459749221802,
      "learning_rate": 0.00013198218262806236,
      "loss": 0.7707,
      "step": 1537
    },
    {
      "epoch": 0.3417777777777778,
      "grad_norm": 0.9774269461631775,
      "learning_rate": 0.00013193763919821827,
      "loss": 1.4131,
      "step": 1538
    },
    {
      "epoch": 0.342,
      "grad_norm": 0.6436899900436401,
      "learning_rate": 0.00013189309576837417,
      "loss": 0.7927,
      "step": 1539
    },
    {
      "epoch": 0.3422222222222222,
      "grad_norm": 0.12925714254379272,
      "learning_rate": 0.00013184855233853006,
      "loss": 0.0295,
      "step": 1540
    },
    {
      "epoch": 0.34244444444444444,
      "grad_norm": 0.6985756158828735,
      "learning_rate": 0.00013180400890868598,
      "loss": 0.6512,
      "step": 1541
    },
    {
      "epoch": 0.3426666666666667,
      "grad_norm": 1.2356915473937988,
      "learning_rate": 0.00013175946547884187,
      "loss": 1.9498,
      "step": 1542
    },
    {
      "epoch": 0.3428888888888889,
      "grad_norm": 1.0016330480575562,
      "learning_rate": 0.0001317149220489978,
      "loss": 0.8292,
      "step": 1543
    },
    {
      "epoch": 0.3431111111111111,
      "grad_norm": 1.0059810876846313,
      "learning_rate": 0.00013167037861915368,
      "loss": 1.1816,
      "step": 1544
    },
    {
      "epoch": 0.3433333333333333,
      "grad_norm": 1.080196738243103,
      "learning_rate": 0.00013162583518930957,
      "loss": 1.1594,
      "step": 1545
    },
    {
      "epoch": 0.34355555555555556,
      "grad_norm": 0.25775691866874695,
      "learning_rate": 0.00013158129175946546,
      "loss": 0.0452,
      "step": 1546
    },
    {
      "epoch": 0.3437777777777778,
      "grad_norm": 0.2299448549747467,
      "learning_rate": 0.00013153674832962138,
      "loss": 0.0418,
      "step": 1547
    },
    {
      "epoch": 0.344,
      "grad_norm": 1.0654411315917969,
      "learning_rate": 0.0001314922048997773,
      "loss": 0.8916,
      "step": 1548
    },
    {
      "epoch": 0.3442222222222222,
      "grad_norm": 1.1717489957809448,
      "learning_rate": 0.0001314476614699332,
      "loss": 1.1441,
      "step": 1549
    },
    {
      "epoch": 0.34444444444444444,
      "grad_norm": 0.9188210964202881,
      "learning_rate": 0.00013140311804008909,
      "loss": 0.7552,
      "step": 1550
    },
    {
      "epoch": 0.3446666666666667,
      "grad_norm": 0.04411787912249565,
      "learning_rate": 0.00013135857461024498,
      "loss": 0.0097,
      "step": 1551
    },
    {
      "epoch": 0.3448888888888889,
      "grad_norm": 0.923112690448761,
      "learning_rate": 0.0001313140311804009,
      "loss": 2.3056,
      "step": 1552
    },
    {
      "epoch": 0.3451111111111111,
      "grad_norm": 0.5338932275772095,
      "learning_rate": 0.00013126948775055681,
      "loss": 1.0079,
      "step": 1553
    },
    {
      "epoch": 0.3453333333333333,
      "grad_norm": 0.6365249156951904,
      "learning_rate": 0.0001312249443207127,
      "loss": 1.1061,
      "step": 1554
    },
    {
      "epoch": 0.34555555555555556,
      "grad_norm": 0.6134782433509827,
      "learning_rate": 0.0001311804008908686,
      "loss": 1.1132,
      "step": 1555
    },
    {
      "epoch": 0.3457777777777778,
      "grad_norm": 0.9963126182556152,
      "learning_rate": 0.0001311358574610245,
      "loss": 2.2424,
      "step": 1556
    },
    {
      "epoch": 0.346,
      "grad_norm": 0.6900335550308228,
      "learning_rate": 0.0001310913140311804,
      "loss": 0.0327,
      "step": 1557
    },
    {
      "epoch": 0.3462222222222222,
      "grad_norm": 0.5578808188438416,
      "learning_rate": 0.0001310467706013363,
      "loss": 1.0124,
      "step": 1558
    },
    {
      "epoch": 0.34644444444444444,
      "grad_norm": 0.9253682494163513,
      "learning_rate": 0.00013100222717149222,
      "loss": 2.1314,
      "step": 1559
    },
    {
      "epoch": 0.3466666666666667,
      "grad_norm": 0.9170551896095276,
      "learning_rate": 0.0001309576837416481,
      "loss": 2.1579,
      "step": 1560
    },
    {
      "epoch": 0.3468888888888889,
      "grad_norm": 0.9059464931488037,
      "learning_rate": 0.000130913140311804,
      "loss": 2.0186,
      "step": 1561
    },
    {
      "epoch": 0.3471111111111111,
      "grad_norm": 0.9621554017066956,
      "learning_rate": 0.00013086859688195992,
      "loss": 2.1287,
      "step": 1562
    },
    {
      "epoch": 0.3473333333333333,
      "grad_norm": 1.1893537044525146,
      "learning_rate": 0.00013082405345211581,
      "loss": 2.3136,
      "step": 1563
    },
    {
      "epoch": 0.34755555555555556,
      "grad_norm": 0.9095190167427063,
      "learning_rate": 0.0001307795100222717,
      "loss": 1.9219,
      "step": 1564
    },
    {
      "epoch": 0.3477777777777778,
      "grad_norm": 1.1559150218963623,
      "learning_rate": 0.00013073496659242762,
      "loss": 2.1498,
      "step": 1565
    },
    {
      "epoch": 0.348,
      "grad_norm": 1.0603296756744385,
      "learning_rate": 0.00013069042316258354,
      "loss": 1.8157,
      "step": 1566
    },
    {
      "epoch": 0.3482222222222222,
      "grad_norm": 0.8160759806632996,
      "learning_rate": 0.00013064587973273944,
      "loss": 1.6901,
      "step": 1567
    },
    {
      "epoch": 0.34844444444444445,
      "grad_norm": 0.8839480876922607,
      "learning_rate": 0.00013060133630289533,
      "loss": 2.0199,
      "step": 1568
    },
    {
      "epoch": 0.3486666666666667,
      "grad_norm": 0.9084996581077576,
      "learning_rate": 0.00013055679287305122,
      "loss": 1.6581,
      "step": 1569
    },
    {
      "epoch": 0.3488888888888889,
      "grad_norm": 1.0160479545593262,
      "learning_rate": 0.0001305122494432071,
      "loss": 2.0388,
      "step": 1570
    },
    {
      "epoch": 0.3491111111111111,
      "grad_norm": 0.949428141117096,
      "learning_rate": 0.00013046770601336306,
      "loss": 1.9788,
      "step": 1571
    },
    {
      "epoch": 0.34933333333333333,
      "grad_norm": 0.08356664329767227,
      "learning_rate": 0.00013042316258351895,
      "loss": 0.0148,
      "step": 1572
    },
    {
      "epoch": 0.34955555555555556,
      "grad_norm": 0.6757658123970032,
      "learning_rate": 0.00013037861915367484,
      "loss": 0.7336,
      "step": 1573
    },
    {
      "epoch": 0.3497777777777778,
      "grad_norm": 1.2868684530258179,
      "learning_rate": 0.00013033407572383073,
      "loss": 2.1339,
      "step": 1574
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.1288681924343109,
      "learning_rate": 0.00013028953229398665,
      "loss": 0.0233,
      "step": 1575
    },
    {
      "epoch": 0.3502222222222222,
      "grad_norm": 0.9786088466644287,
      "learning_rate": 0.00013024498886414254,
      "loss": 1.8232,
      "step": 1576
    },
    {
      "epoch": 0.35044444444444445,
      "grad_norm": 1.028579831123352,
      "learning_rate": 0.00013020044543429846,
      "loss": 1.8186,
      "step": 1577
    },
    {
      "epoch": 0.3506666666666667,
      "grad_norm": 1.0287123918533325,
      "learning_rate": 0.00013015590200445435,
      "loss": 1.7949,
      "step": 1578
    },
    {
      "epoch": 0.35088888888888886,
      "grad_norm": 1.1234492063522339,
      "learning_rate": 0.00013011135857461025,
      "loss": 1.7413,
      "step": 1579
    },
    {
      "epoch": 0.3511111111111111,
      "grad_norm": 0.061823874711990356,
      "learning_rate": 0.00013006681514476616,
      "loss": 0.0176,
      "step": 1580
    },
    {
      "epoch": 0.35133333333333333,
      "grad_norm": 0.6424452662467957,
      "learning_rate": 0.00013002227171492206,
      "loss": 0.707,
      "step": 1581
    },
    {
      "epoch": 0.35155555555555557,
      "grad_norm": 0.9415295124053955,
      "learning_rate": 0.00012997772828507795,
      "loss": 1.6596,
      "step": 1582
    },
    {
      "epoch": 0.3517777777777778,
      "grad_norm": 0.9959827065467834,
      "learning_rate": 0.00012993318485523387,
      "loss": 1.7779,
      "step": 1583
    },
    {
      "epoch": 0.352,
      "grad_norm": 0.7211580872535706,
      "learning_rate": 0.00012988864142538976,
      "loss": 1.0332,
      "step": 1584
    },
    {
      "epoch": 0.3522222222222222,
      "grad_norm": 0.08983895927667618,
      "learning_rate": 0.00012984409799554568,
      "loss": 0.0193,
      "step": 1585
    },
    {
      "epoch": 0.35244444444444445,
      "grad_norm": 0.07530716061592102,
      "learning_rate": 0.00012979955456570157,
      "loss": 0.0195,
      "step": 1586
    },
    {
      "epoch": 0.3526666666666667,
      "grad_norm": 1.0737003087997437,
      "learning_rate": 0.00012975501113585746,
      "loss": 1.4139,
      "step": 1587
    },
    {
      "epoch": 0.35288888888888886,
      "grad_norm": 1.1176050901412964,
      "learning_rate": 0.00012971046770601335,
      "loss": 1.6433,
      "step": 1588
    },
    {
      "epoch": 0.3531111111111111,
      "grad_norm": 1.0836116075515747,
      "learning_rate": 0.00012966592427616927,
      "loss": 1.602,
      "step": 1589
    },
    {
      "epoch": 0.35333333333333333,
      "grad_norm": 0.3639664351940155,
      "learning_rate": 0.0001296213808463252,
      "loss": 0.0277,
      "step": 1590
    },
    {
      "epoch": 0.35355555555555557,
      "grad_norm": 0.9747996926307678,
      "learning_rate": 0.00012957683741648108,
      "loss": 1.4553,
      "step": 1591
    },
    {
      "epoch": 0.3537777777777778,
      "grad_norm": 1.146167278289795,
      "learning_rate": 0.00012953229398663697,
      "loss": 1.7357,
      "step": 1592
    },
    {
      "epoch": 0.354,
      "grad_norm": 1.1450601816177368,
      "learning_rate": 0.00012948775055679287,
      "loss": 1.6419,
      "step": 1593
    },
    {
      "epoch": 0.3542222222222222,
      "grad_norm": 1.1255170106887817,
      "learning_rate": 0.00012944320712694879,
      "loss": 1.5562,
      "step": 1594
    },
    {
      "epoch": 0.35444444444444445,
      "grad_norm": 0.783320963382721,
      "learning_rate": 0.0001293986636971047,
      "loss": 0.6857,
      "step": 1595
    },
    {
      "epoch": 0.3546666666666667,
      "grad_norm": 0.5967231392860413,
      "learning_rate": 0.0001293541202672606,
      "loss": 0.5624,
      "step": 1596
    },
    {
      "epoch": 0.35488888888888886,
      "grad_norm": 1.0421006679534912,
      "learning_rate": 0.0001293095768374165,
      "loss": 1.3535,
      "step": 1597
    },
    {
      "epoch": 0.3551111111111111,
      "grad_norm": 0.8290188908576965,
      "learning_rate": 0.00012926503340757238,
      "loss": 0.7833,
      "step": 1598
    },
    {
      "epoch": 0.35533333333333333,
      "grad_norm": 0.8200139403343201,
      "learning_rate": 0.0001292204899777283,
      "loss": 0.6196,
      "step": 1599
    },
    {
      "epoch": 0.35555555555555557,
      "grad_norm": 0.9781906604766846,
      "learning_rate": 0.0001291759465478842,
      "loss": 0.8058,
      "step": 1600
    },
    {
      "epoch": 0.3557777777777778,
      "grad_norm": 0.0529782809317112,
      "learning_rate": 0.0001291314031180401,
      "loss": 0.0104,
      "step": 1601
    },
    {
      "epoch": 0.356,
      "grad_norm": 0.5451091527938843,
      "learning_rate": 0.000129086859688196,
      "loss": 0.9391,
      "step": 1602
    },
    {
      "epoch": 0.3562222222222222,
      "grad_norm": 0.8948948979377747,
      "learning_rate": 0.0001290423162583519,
      "loss": 2.1385,
      "step": 1603
    },
    {
      "epoch": 0.35644444444444445,
      "grad_norm": 0.0486118420958519,
      "learning_rate": 0.0001289977728285078,
      "loss": 0.0104,
      "step": 1604
    },
    {
      "epoch": 0.3566666666666667,
      "grad_norm": 0.5522439479827881,
      "learning_rate": 0.0001289532293986637,
      "loss": 1.1311,
      "step": 1605
    },
    {
      "epoch": 0.35688888888888887,
      "grad_norm": 0.847810685634613,
      "learning_rate": 0.0001289086859688196,
      "loss": 2.2308,
      "step": 1606
    },
    {
      "epoch": 0.3571111111111111,
      "grad_norm": 0.8593490719795227,
      "learning_rate": 0.0001288641425389755,
      "loss": 2.0835,
      "step": 1607
    },
    {
      "epoch": 0.35733333333333334,
      "grad_norm": 0.8339969515800476,
      "learning_rate": 0.0001288195991091314,
      "loss": 2.0113,
      "step": 1608
    },
    {
      "epoch": 0.35755555555555557,
      "grad_norm": 0.38288450241088867,
      "learning_rate": 0.00012877505567928732,
      "loss": 0.0169,
      "step": 1609
    },
    {
      "epoch": 0.35777777777777775,
      "grad_norm": 0.11315584927797318,
      "learning_rate": 0.00012873051224944322,
      "loss": 0.0161,
      "step": 1610
    },
    {
      "epoch": 0.358,
      "grad_norm": 0.1084010973572731,
      "learning_rate": 0.0001286859688195991,
      "loss": 0.015,
      "step": 1611
    },
    {
      "epoch": 0.3582222222222222,
      "grad_norm": 0.10393639653921127,
      "learning_rate": 0.000128641425389755,
      "loss": 0.0146,
      "step": 1612
    },
    {
      "epoch": 0.35844444444444445,
      "grad_norm": 0.8769986629486084,
      "learning_rate": 0.00012859688195991092,
      "loss": 2.0321,
      "step": 1613
    },
    {
      "epoch": 0.3586666666666667,
      "grad_norm": 0.8985224366188049,
      "learning_rate": 0.00012855233853006684,
      "loss": 1.9825,
      "step": 1614
    },
    {
      "epoch": 0.35888888888888887,
      "grad_norm": 0.8849453926086426,
      "learning_rate": 0.00012850779510022273,
      "loss": 1.8286,
      "step": 1615
    },
    {
      "epoch": 0.3591111111111111,
      "grad_norm": 0.876132607460022,
      "learning_rate": 0.00012846325167037862,
      "loss": 2.1886,
      "step": 1616
    },
    {
      "epoch": 0.35933333333333334,
      "grad_norm": 0.7373232245445251,
      "learning_rate": 0.00012841870824053451,
      "loss": 1.0218,
      "step": 1617
    },
    {
      "epoch": 0.3595555555555556,
      "grad_norm": 0.9138587117195129,
      "learning_rate": 0.00012837416481069043,
      "loss": 2.1635,
      "step": 1618
    },
    {
      "epoch": 0.35977777777777775,
      "grad_norm": 0.8863195180892944,
      "learning_rate": 0.00012832962138084632,
      "loss": 1.9095,
      "step": 1619
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9143100380897522,
      "learning_rate": 0.00012828507795100224,
      "loss": 1.8664,
      "step": 1620
    },
    {
      "epoch": 0.3602222222222222,
      "grad_norm": 0.9049918055534363,
      "learning_rate": 0.00012824053452115814,
      "loss": 1.9491,
      "step": 1621
    },
    {
      "epoch": 0.36044444444444446,
      "grad_norm": 0.8652317523956299,
      "learning_rate": 0.00012819599109131403,
      "loss": 1.7523,
      "step": 1622
    },
    {
      "epoch": 0.3606666666666667,
      "grad_norm": 0.07398027926683426,
      "learning_rate": 0.00012815144766146995,
      "loss": 0.0159,
      "step": 1623
    },
    {
      "epoch": 0.36088888888888887,
      "grad_norm": 0.07411263883113861,
      "learning_rate": 0.00012810690423162584,
      "loss": 0.0185,
      "step": 1624
    },
    {
      "epoch": 0.3611111111111111,
      "grad_norm": 0.11097452044487,
      "learning_rate": 0.00012806236080178173,
      "loss": 0.0206,
      "step": 1625
    },
    {
      "epoch": 0.36133333333333334,
      "grad_norm": 1.0089179277420044,
      "learning_rate": 0.00012801781737193765,
      "loss": 1.8515,
      "step": 1626
    },
    {
      "epoch": 0.3615555555555556,
      "grad_norm": 0.9654055237770081,
      "learning_rate": 0.00012797327394209354,
      "loss": 1.7223,
      "step": 1627
    },
    {
      "epoch": 0.36177777777777775,
      "grad_norm": 0.9917669892311096,
      "learning_rate": 0.00012792873051224946,
      "loss": 1.8853,
      "step": 1628
    },
    {
      "epoch": 0.362,
      "grad_norm": 1.2126929759979248,
      "learning_rate": 0.00012788418708240535,
      "loss": 1.8215,
      "step": 1629
    },
    {
      "epoch": 0.3622222222222222,
      "grad_norm": 1.3606003522872925,
      "learning_rate": 0.00012783964365256124,
      "loss": 1.9842,
      "step": 1630
    },
    {
      "epoch": 0.36244444444444446,
      "grad_norm": 0.7445697784423828,
      "learning_rate": 0.00012779510022271713,
      "loss": 0.9715,
      "step": 1631
    },
    {
      "epoch": 0.3626666666666667,
      "grad_norm": 0.0977458506822586,
      "learning_rate": 0.00012775055679287305,
      "loss": 0.0185,
      "step": 1632
    },
    {
      "epoch": 0.36288888888888887,
      "grad_norm": 0.07269278168678284,
      "learning_rate": 0.00012770601336302897,
      "loss": 0.0188,
      "step": 1633
    },
    {
      "epoch": 0.3631111111111111,
      "grad_norm": 0.7048798203468323,
      "learning_rate": 0.00012766146993318486,
      "loss": 0.8731,
      "step": 1634
    },
    {
      "epoch": 0.36333333333333334,
      "grad_norm": 0.9043886661529541,
      "learning_rate": 0.00012761692650334076,
      "loss": 1.8761,
      "step": 1635
    },
    {
      "epoch": 0.3635555555555556,
      "grad_norm": 1.0288832187652588,
      "learning_rate": 0.00012757238307349665,
      "loss": 1.7554,
      "step": 1636
    },
    {
      "epoch": 0.36377777777777776,
      "grad_norm": 1.0029970407485962,
      "learning_rate": 0.00012752783964365257,
      "loss": 1.8181,
      "step": 1637
    },
    {
      "epoch": 0.364,
      "grad_norm": 0.0940476730465889,
      "learning_rate": 0.00012748329621380849,
      "loss": 0.0175,
      "step": 1638
    },
    {
      "epoch": 0.3642222222222222,
      "grad_norm": 0.07410628348588943,
      "learning_rate": 0.00012743875278396438,
      "loss": 0.0175,
      "step": 1639
    },
    {
      "epoch": 0.36444444444444446,
      "grad_norm": 1.1884973049163818,
      "learning_rate": 0.00012739420935412027,
      "loss": 1.936,
      "step": 1640
    },
    {
      "epoch": 0.36466666666666664,
      "grad_norm": 0.9561774730682373,
      "learning_rate": 0.00012734966592427616,
      "loss": 1.3877,
      "step": 1641
    },
    {
      "epoch": 0.3648888888888889,
      "grad_norm": 1.1464483737945557,
      "learning_rate": 0.00012730512249443208,
      "loss": 1.9865,
      "step": 1642
    },
    {
      "epoch": 0.3651111111111111,
      "grad_norm": 0.7155196666717529,
      "learning_rate": 0.00012726057906458797,
      "loss": 0.6446,
      "step": 1643
    },
    {
      "epoch": 0.36533333333333334,
      "grad_norm": 1.1351078748703003,
      "learning_rate": 0.0001272160356347439,
      "loss": 1.8303,
      "step": 1644
    },
    {
      "epoch": 0.3655555555555556,
      "grad_norm": 1.0295593738555908,
      "learning_rate": 0.00012717149220489978,
      "loss": 1.2797,
      "step": 1645
    },
    {
      "epoch": 0.36577777777777776,
      "grad_norm": 0.20271006226539612,
      "learning_rate": 0.00012712694877505567,
      "loss": 0.0336,
      "step": 1646
    },
    {
      "epoch": 0.366,
      "grad_norm": 0.2018907517194748,
      "learning_rate": 0.0001270824053452116,
      "loss": 0.0321,
      "step": 1647
    },
    {
      "epoch": 0.3662222222222222,
      "grad_norm": 1.1571309566497803,
      "learning_rate": 0.00012703786191536748,
      "loss": 1.2524,
      "step": 1648
    },
    {
      "epoch": 0.36644444444444446,
      "grad_norm": 0.6432564854621887,
      "learning_rate": 0.00012699331848552338,
      "loss": 0.503,
      "step": 1649
    },
    {
      "epoch": 0.36666666666666664,
      "grad_norm": 0.9266985058784485,
      "learning_rate": 0.0001269487750556793,
      "loss": 0.8534,
      "step": 1650
    },
    {
      "epoch": 0.3668888888888889,
      "grad_norm": 0.581489086151123,
      "learning_rate": 0.00012690423162583521,
      "loss": 1.1384,
      "step": 1651
    },
    {
      "epoch": 0.3671111111111111,
      "grad_norm": 0.5554734468460083,
      "learning_rate": 0.0001268596881959911,
      "loss": 1.0044,
      "step": 1652
    },
    {
      "epoch": 0.36733333333333335,
      "grad_norm": 0.6623711585998535,
      "learning_rate": 0.000126815144766147,
      "loss": 1.161,
      "step": 1653
    },
    {
      "epoch": 0.3675555555555556,
      "grad_norm": 0.060292765498161316,
      "learning_rate": 0.0001267706013363029,
      "loss": 0.0109,
      "step": 1654
    },
    {
      "epoch": 0.36777777777777776,
      "grad_norm": 0.582197368144989,
      "learning_rate": 0.00012672605790645878,
      "loss": 1.1099,
      "step": 1655
    },
    {
      "epoch": 0.368,
      "grad_norm": 0.8612513542175293,
      "learning_rate": 0.00012668151447661473,
      "loss": 1.9373,
      "step": 1656
    },
    {
      "epoch": 0.36822222222222223,
      "grad_norm": 0.252760112285614,
      "learning_rate": 0.00012663697104677062,
      "loss": 0.0131,
      "step": 1657
    },
    {
      "epoch": 0.36844444444444446,
      "grad_norm": 0.07458077371120453,
      "learning_rate": 0.0001265924276169265,
      "loss": 0.0122,
      "step": 1658
    },
    {
      "epoch": 0.36866666666666664,
      "grad_norm": 0.926961362361908,
      "learning_rate": 0.0001265478841870824,
      "loss": 2.0833,
      "step": 1659
    },
    {
      "epoch": 0.3688888888888889,
      "grad_norm": 0.8995304107666016,
      "learning_rate": 0.00012650334075723832,
      "loss": 2.253,
      "step": 1660
    },
    {
      "epoch": 0.3691111111111111,
      "grad_norm": 0.9721949100494385,
      "learning_rate": 0.00012645879732739421,
      "loss": 2.0844,
      "step": 1661
    },
    {
      "epoch": 0.36933333333333335,
      "grad_norm": 0.8844018578529358,
      "learning_rate": 0.00012641425389755013,
      "loss": 2.176,
      "step": 1662
    },
    {
      "epoch": 0.3695555555555556,
      "grad_norm": 1.2494245767593384,
      "learning_rate": 0.00012636971046770602,
      "loss": 2.3682,
      "step": 1663
    },
    {
      "epoch": 0.36977777777777776,
      "grad_norm": 0.8971030712127686,
      "learning_rate": 0.00012632516703786192,
      "loss": 1.9487,
      "step": 1664
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6521131992340088,
      "learning_rate": 0.00012628062360801784,
      "loss": 1.0122,
      "step": 1665
    },
    {
      "epoch": 0.37022222222222223,
      "grad_norm": 0.9701309204101562,
      "learning_rate": 0.00012623608017817373,
      "loss": 2.0993,
      "step": 1666
    },
    {
      "epoch": 0.37044444444444447,
      "grad_norm": 0.8997014760971069,
      "learning_rate": 0.00012619153674832962,
      "loss": 1.8794,
      "step": 1667
    },
    {
      "epoch": 0.37066666666666664,
      "grad_norm": 1.065514087677002,
      "learning_rate": 0.00012614699331848554,
      "loss": 2.1362,
      "step": 1668
    },
    {
      "epoch": 0.3708888888888889,
      "grad_norm": 0.995553731918335,
      "learning_rate": 0.00012610244988864143,
      "loss": 1.7254,
      "step": 1669
    },
    {
      "epoch": 0.3711111111111111,
      "grad_norm": 0.8794861435890198,
      "learning_rate": 0.00012605790645879735,
      "loss": 1.0144,
      "step": 1670
    },
    {
      "epoch": 0.37133333333333335,
      "grad_norm": 0.0776643380522728,
      "learning_rate": 0.00012601336302895324,
      "loss": 0.0167,
      "step": 1671
    },
    {
      "epoch": 0.37155555555555553,
      "grad_norm": 0.6524280905723572,
      "learning_rate": 0.00012596881959910913,
      "loss": 1.0171,
      "step": 1672
    },
    {
      "epoch": 0.37177777777777776,
      "grad_norm": 0.9473826289176941,
      "learning_rate": 0.00012592427616926502,
      "loss": 1.8923,
      "step": 1673
    },
    {
      "epoch": 0.372,
      "grad_norm": 0.9059301614761353,
      "learning_rate": 0.00012587973273942094,
      "loss": 1.8065,
      "step": 1674
    },
    {
      "epoch": 0.37222222222222223,
      "grad_norm": 0.9567731022834778,
      "learning_rate": 0.00012583518930957686,
      "loss": 1.8692,
      "step": 1675
    },
    {
      "epoch": 0.37244444444444447,
      "grad_norm": 1.0518147945404053,
      "learning_rate": 0.00012579064587973275,
      "loss": 1.9397,
      "step": 1676
    },
    {
      "epoch": 0.37266666666666665,
      "grad_norm": 1.0581625699996948,
      "learning_rate": 0.00012574610244988865,
      "loss": 1.9263,
      "step": 1677
    },
    {
      "epoch": 0.3728888888888889,
      "grad_norm": 1.148897647857666,
      "learning_rate": 0.00012570155902004454,
      "loss": 1.8185,
      "step": 1678
    },
    {
      "epoch": 0.3731111111111111,
      "grad_norm": 0.6279930472373962,
      "learning_rate": 0.00012565701559020046,
      "loss": 0.6442,
      "step": 1679
    },
    {
      "epoch": 0.37333333333333335,
      "grad_norm": 0.06652513891458511,
      "learning_rate": 0.00012561247216035635,
      "loss": 0.0181,
      "step": 1680
    },
    {
      "epoch": 0.37355555555555553,
      "grad_norm": 0.5910684466362,
      "learning_rate": 0.00012556792873051227,
      "loss": 0.857,
      "step": 1681
    },
    {
      "epoch": 0.37377777777777776,
      "grad_norm": 0.09267974644899368,
      "learning_rate": 0.00012552338530066816,
      "loss": 0.0185,
      "step": 1682
    },
    {
      "epoch": 0.374,
      "grad_norm": 0.09794706106185913,
      "learning_rate": 0.00012547884187082405,
      "loss": 0.0179,
      "step": 1683
    },
    {
      "epoch": 0.37422222222222223,
      "grad_norm": 1.141258955001831,
      "learning_rate": 0.00012543429844097997,
      "loss": 1.9392,
      "step": 1684
    },
    {
      "epoch": 0.37444444444444447,
      "grad_norm": 0.9182208776473999,
      "learning_rate": 0.00012538975501113586,
      "loss": 1.4379,
      "step": 1685
    },
    {
      "epoch": 0.37466666666666665,
      "grad_norm": 0.9447404742240906,
      "learning_rate": 0.00012534521158129175,
      "loss": 1.3792,
      "step": 1686
    },
    {
      "epoch": 0.3748888888888889,
      "grad_norm": 0.6784771680831909,
      "learning_rate": 0.00012530066815144767,
      "loss": 0.8427,
      "step": 1687
    },
    {
      "epoch": 0.3751111111111111,
      "grad_norm": 0.10267607867717743,
      "learning_rate": 0.00012525612472160356,
      "loss": 0.0257,
      "step": 1688
    },
    {
      "epoch": 0.37533333333333335,
      "grad_norm": 0.9849367141723633,
      "learning_rate": 0.00012521158129175948,
      "loss": 1.5144,
      "step": 1689
    },
    {
      "epoch": 0.37555555555555553,
      "grad_norm": 1.1103235483169556,
      "learning_rate": 0.00012516703786191537,
      "loss": 1.7006,
      "step": 1690
    },
    {
      "epoch": 0.37577777777777777,
      "grad_norm": 1.041797399520874,
      "learning_rate": 0.00012512249443207127,
      "loss": 1.4283,
      "step": 1691
    },
    {
      "epoch": 0.376,
      "grad_norm": 0.8971735835075378,
      "learning_rate": 0.00012507795100222716,
      "loss": 1.3154,
      "step": 1692
    },
    {
      "epoch": 0.37622222222222224,
      "grad_norm": 1.0990266799926758,
      "learning_rate": 0.00012503340757238308,
      "loss": 1.3619,
      "step": 1693
    },
    {
      "epoch": 0.37644444444444447,
      "grad_norm": 1.2396061420440674,
      "learning_rate": 0.000124988864142539,
      "loss": 1.5702,
      "step": 1694
    },
    {
      "epoch": 0.37666666666666665,
      "grad_norm": 0.661430835723877,
      "learning_rate": 0.0001249443207126949,
      "loss": 0.5549,
      "step": 1695
    },
    {
      "epoch": 0.3768888888888889,
      "grad_norm": 1.2013130187988281,
      "learning_rate": 0.00012489977728285078,
      "loss": 1.2375,
      "step": 1696
    },
    {
      "epoch": 0.3771111111111111,
      "grad_norm": 0.6722013354301453,
      "learning_rate": 0.00012485523385300667,
      "loss": 0.6046,
      "step": 1697
    },
    {
      "epoch": 0.37733333333333335,
      "grad_norm": 0.19260694086551666,
      "learning_rate": 0.0001248106904231626,
      "loss": 0.0361,
      "step": 1698
    },
    {
      "epoch": 0.37755555555555553,
      "grad_norm": 1.0123697519302368,
      "learning_rate": 0.0001247661469933185,
      "loss": 1.0244,
      "step": 1699
    },
    {
      "epoch": 0.37777777777777777,
      "grad_norm": 0.949657678604126,
      "learning_rate": 0.0001247216035634744,
      "loss": 0.8659,
      "step": 1700
    },
    {
      "epoch": 0.378,
      "grad_norm": 0.7017142176628113,
      "learning_rate": 0.0001246770601336303,
      "loss": 1.1181,
      "step": 1701
    },
    {
      "epoch": 0.37822222222222224,
      "grad_norm": 0.9786233305931091,
      "learning_rate": 0.00012463251670378618,
      "loss": 2.1521,
      "step": 1702
    },
    {
      "epoch": 0.37844444444444447,
      "grad_norm": 0.6545943021774292,
      "learning_rate": 0.0001245879732739421,
      "loss": 1.2272,
      "step": 1703
    },
    {
      "epoch": 0.37866666666666665,
      "grad_norm": 1.0188517570495605,
      "learning_rate": 0.000124543429844098,
      "loss": 2.0431,
      "step": 1704
    },
    {
      "epoch": 0.3788888888888889,
      "grad_norm": 0.7633973360061646,
      "learning_rate": 0.00012449888641425391,
      "loss": 2.082,
      "step": 1705
    },
    {
      "epoch": 0.3791111111111111,
      "grad_norm": 0.9081476926803589,
      "learning_rate": 0.0001244543429844098,
      "loss": 2.2362,
      "step": 1706
    },
    {
      "epoch": 0.37933333333333336,
      "grad_norm": 0.08680958300828934,
      "learning_rate": 0.0001244097995545657,
      "loss": 0.0136,
      "step": 1707
    },
    {
      "epoch": 0.37955555555555553,
      "grad_norm": 0.0849529430270195,
      "learning_rate": 0.00012436525612472162,
      "loss": 0.0133,
      "step": 1708
    },
    {
      "epoch": 0.37977777777777777,
      "grad_norm": 0.5420004725456238,
      "learning_rate": 0.0001243207126948775,
      "loss": 1.0004,
      "step": 1709
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9287367463111877,
      "learning_rate": 0.0001242761692650334,
      "loss": 2.0996,
      "step": 1710
    },
    {
      "epoch": 0.38022222222222224,
      "grad_norm": 1.0013773441314697,
      "learning_rate": 0.00012423162583518932,
      "loss": 2.0191,
      "step": 1711
    },
    {
      "epoch": 0.3804444444444444,
      "grad_norm": 0.9471575021743774,
      "learning_rate": 0.0001241870824053452,
      "loss": 1.9956,
      "step": 1712
    },
    {
      "epoch": 0.38066666666666665,
      "grad_norm": 0.981694221496582,
      "learning_rate": 0.00012414253897550113,
      "loss": 2.1098,
      "step": 1713
    },
    {
      "epoch": 0.3808888888888889,
      "grad_norm": 0.970139741897583,
      "learning_rate": 0.00012409799554565702,
      "loss": 2.0178,
      "step": 1714
    },
    {
      "epoch": 0.3811111111111111,
      "grad_norm": 0.598267138004303,
      "learning_rate": 0.0001240534521158129,
      "loss": 1.0541,
      "step": 1715
    },
    {
      "epoch": 0.38133333333333336,
      "grad_norm": 1.0712449550628662,
      "learning_rate": 0.0001240089086859688,
      "loss": 1.9999,
      "step": 1716
    },
    {
      "epoch": 0.38155555555555554,
      "grad_norm": 0.9940736293792725,
      "learning_rate": 0.00012396436525612472,
      "loss": 2.1087,
      "step": 1717
    },
    {
      "epoch": 0.38177777777777777,
      "grad_norm": 0.0704389214515686,
      "learning_rate": 0.00012391982182628064,
      "loss": 0.0179,
      "step": 1718
    },
    {
      "epoch": 0.382,
      "grad_norm": 0.07912923395633698,
      "learning_rate": 0.00012387527839643653,
      "loss": 0.018,
      "step": 1719
    },
    {
      "epoch": 0.38222222222222224,
      "grad_norm": 0.6652305126190186,
      "learning_rate": 0.00012383073496659243,
      "loss": 0.9506,
      "step": 1720
    },
    {
      "epoch": 0.3824444444444444,
      "grad_norm": 0.2545645833015442,
      "learning_rate": 0.00012378619153674832,
      "loss": 0.0274,
      "step": 1721
    },
    {
      "epoch": 0.38266666666666665,
      "grad_norm": 1.0166536569595337,
      "learning_rate": 0.00012374164810690424,
      "loss": 1.8203,
      "step": 1722
    },
    {
      "epoch": 0.3828888888888889,
      "grad_norm": 0.967710018157959,
      "learning_rate": 0.00012369710467706016,
      "loss": 1.8026,
      "step": 1723
    },
    {
      "epoch": 0.3831111111111111,
      "grad_norm": 0.9521524310112,
      "learning_rate": 0.00012365256124721605,
      "loss": 1.8211,
      "step": 1724
    },
    {
      "epoch": 0.38333333333333336,
      "grad_norm": 0.9643096923828125,
      "learning_rate": 0.00012360801781737194,
      "loss": 1.8337,
      "step": 1725
    },
    {
      "epoch": 0.38355555555555554,
      "grad_norm": 1.082641839981079,
      "learning_rate": 0.00012356347438752783,
      "loss": 1.9756,
      "step": 1726
    },
    {
      "epoch": 0.3837777777777778,
      "grad_norm": 0.2606064975261688,
      "learning_rate": 0.00012351893095768375,
      "loss": 0.0189,
      "step": 1727
    },
    {
      "epoch": 0.384,
      "grad_norm": 0.06352642178535461,
      "learning_rate": 0.00012347438752783964,
      "loss": 0.0185,
      "step": 1728
    },
    {
      "epoch": 0.38422222222222224,
      "grad_norm": 0.9276388883590698,
      "learning_rate": 0.00012342984409799556,
      "loss": 1.7712,
      "step": 1729
    },
    {
      "epoch": 0.3844444444444444,
      "grad_norm": 0.9787095785140991,
      "learning_rate": 0.00012338530066815145,
      "loss": 1.6116,
      "step": 1730
    },
    {
      "epoch": 0.38466666666666666,
      "grad_norm": 1.4584063291549683,
      "learning_rate": 0.00012334075723830735,
      "loss": 1.6803,
      "step": 1731
    },
    {
      "epoch": 0.3848888888888889,
      "grad_norm": 0.10419341921806335,
      "learning_rate": 0.00012329621380846326,
      "loss": 0.0198,
      "step": 1732
    },
    {
      "epoch": 0.3851111111111111,
      "grad_norm": 0.9885858297348022,
      "learning_rate": 0.00012325167037861916,
      "loss": 1.5324,
      "step": 1733
    },
    {
      "epoch": 0.38533333333333336,
      "grad_norm": 0.9763101935386658,
      "learning_rate": 0.00012320712694877505,
      "loss": 1.7128,
      "step": 1734
    },
    {
      "epoch": 0.38555555555555554,
      "grad_norm": 1.02174973487854,
      "learning_rate": 0.00012316258351893097,
      "loss": 1.6472,
      "step": 1735
    },
    {
      "epoch": 0.3857777777777778,
      "grad_norm": 1.0153290033340454,
      "learning_rate": 0.00012311804008908689,
      "loss": 1.7305,
      "step": 1736
    },
    {
      "epoch": 0.386,
      "grad_norm": 0.9473196864128113,
      "learning_rate": 0.00012307349665924278,
      "loss": 0.0297,
      "step": 1737
    },
    {
      "epoch": 0.38622222222222224,
      "grad_norm": 0.9792290329933167,
      "learning_rate": 0.00012302895322939867,
      "loss": 1.6719,
      "step": 1738
    },
    {
      "epoch": 0.3864444444444444,
      "grad_norm": 1.081703782081604,
      "learning_rate": 0.00012298440979955456,
      "loss": 1.6265,
      "step": 1739
    },
    {
      "epoch": 0.38666666666666666,
      "grad_norm": 1.0634948015213013,
      "learning_rate": 0.00012293986636971045,
      "loss": 1.4929,
      "step": 1740
    },
    {
      "epoch": 0.3868888888888889,
      "grad_norm": 1.219645380973816,
      "learning_rate": 0.0001228953229398664,
      "loss": 1.3222,
      "step": 1741
    },
    {
      "epoch": 0.38711111111111113,
      "grad_norm": 1.1311880350112915,
      "learning_rate": 0.0001228507795100223,
      "loss": 1.4129,
      "step": 1742
    },
    {
      "epoch": 0.3873333333333333,
      "grad_norm": 1.1938977241516113,
      "learning_rate": 0.00012280623608017818,
      "loss": 1.6393,
      "step": 1743
    },
    {
      "epoch": 0.38755555555555554,
      "grad_norm": 0.7046709656715393,
      "learning_rate": 0.00012276169265033407,
      "loss": 0.6052,
      "step": 1744
    },
    {
      "epoch": 0.3877777777777778,
      "grad_norm": 1.2281843423843384,
      "learning_rate": 0.00012271714922049,
      "loss": 1.361,
      "step": 1745
    },
    {
      "epoch": 0.388,
      "grad_norm": 1.0567635297775269,
      "learning_rate": 0.00012267260579064588,
      "loss": 1.1866,
      "step": 1746
    },
    {
      "epoch": 0.38822222222222225,
      "grad_norm": 0.9681671261787415,
      "learning_rate": 0.0001226280623608018,
      "loss": 1.2505,
      "step": 1747
    },
    {
      "epoch": 0.3884444444444444,
      "grad_norm": 0.7260466814041138,
      "learning_rate": 0.0001225835189309577,
      "loss": 0.5553,
      "step": 1748
    },
    {
      "epoch": 0.38866666666666666,
      "grad_norm": 0.8488286137580872,
      "learning_rate": 0.0001225389755011136,
      "loss": 0.7687,
      "step": 1749
    },
    {
      "epoch": 0.3888888888888889,
      "grad_norm": 1.0907729864120483,
      "learning_rate": 0.0001224944320712695,
      "loss": 0.9859,
      "step": 1750
    },
    {
      "epoch": 0.38911111111111113,
      "grad_norm": 0.5968582630157471,
      "learning_rate": 0.0001224498886414254,
      "loss": 1.0487,
      "step": 1751
    },
    {
      "epoch": 0.3893333333333333,
      "grad_norm": 0.4760688245296478,
      "learning_rate": 0.0001224053452115813,
      "loss": 1.1006,
      "step": 1752
    },
    {
      "epoch": 0.38955555555555554,
      "grad_norm": 0.880111813545227,
      "learning_rate": 0.00012236080178173718,
      "loss": 2.0063,
      "step": 1753
    },
    {
      "epoch": 0.3897777777777778,
      "grad_norm": 0.6034107804298401,
      "learning_rate": 0.0001223162583518931,
      "loss": 1.2052,
      "step": 1754
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.055661823600530624,
      "learning_rate": 0.00012227171492204902,
      "loss": 0.0117,
      "step": 1755
    },
    {
      "epoch": 0.39022222222222225,
      "grad_norm": 0.05036118999123573,
      "learning_rate": 0.0001222271714922049,
      "loss": 0.0128,
      "step": 1756
    },
    {
      "epoch": 0.3904444444444444,
      "grad_norm": 0.09149646013975143,
      "learning_rate": 0.0001221826280623608,
      "loss": 0.014,
      "step": 1757
    },
    {
      "epoch": 0.39066666666666666,
      "grad_norm": 0.09102200716733932,
      "learning_rate": 0.0001221380846325167,
      "loss": 0.0142,
      "step": 1758
    },
    {
      "epoch": 0.3908888888888889,
      "grad_norm": 0.9077672958374023,
      "learning_rate": 0.00012209354120267261,
      "loss": 2.2172,
      "step": 1759
    },
    {
      "epoch": 0.39111111111111113,
      "grad_norm": 0.9596045613288879,
      "learning_rate": 0.00012204899777282852,
      "loss": 2.005,
      "step": 1760
    },
    {
      "epoch": 0.3913333333333333,
      "grad_norm": 0.9369930624961853,
      "learning_rate": 0.00012200445434298442,
      "loss": 1.9653,
      "step": 1761
    },
    {
      "epoch": 0.39155555555555555,
      "grad_norm": 0.7747904062271118,
      "learning_rate": 0.00012195991091314032,
      "loss": 1.9532,
      "step": 1762
    },
    {
      "epoch": 0.3917777777777778,
      "grad_norm": 0.9672890305519104,
      "learning_rate": 0.00012191536748329622,
      "loss": 2.137,
      "step": 1763
    },
    {
      "epoch": 0.392,
      "grad_norm": 0.1316177099943161,
      "learning_rate": 0.00012187082405345211,
      "loss": 0.0221,
      "step": 1764
    },
    {
      "epoch": 0.39222222222222225,
      "grad_norm": 0.9752106070518494,
      "learning_rate": 0.00012182628062360802,
      "loss": 1.8069,
      "step": 1765
    },
    {
      "epoch": 0.39244444444444443,
      "grad_norm": 0.9790940284729004,
      "learning_rate": 0.00012178173719376394,
      "loss": 1.9365,
      "step": 1766
    },
    {
      "epoch": 0.39266666666666666,
      "grad_norm": 0.9355417490005493,
      "learning_rate": 0.00012173719376391983,
      "loss": 2.0998,
      "step": 1767
    },
    {
      "epoch": 0.3928888888888889,
      "grad_norm": 0.8696015477180481,
      "learning_rate": 0.00012169265033407573,
      "loss": 1.8553,
      "step": 1768
    },
    {
      "epoch": 0.39311111111111113,
      "grad_norm": 1.026228666305542,
      "learning_rate": 0.00012164810690423163,
      "loss": 1.7821,
      "step": 1769
    },
    {
      "epoch": 0.3933333333333333,
      "grad_norm": 1.0769325494766235,
      "learning_rate": 0.00012160356347438753,
      "loss": 2.1182,
      "step": 1770
    },
    {
      "epoch": 0.39355555555555555,
      "grad_norm": 0.9091227650642395,
      "learning_rate": 0.00012155902004454342,
      "loss": 1.7079,
      "step": 1771
    },
    {
      "epoch": 0.3937777777777778,
      "grad_norm": 0.0760640799999237,
      "learning_rate": 0.00012151447661469934,
      "loss": 0.0169,
      "step": 1772
    },
    {
      "epoch": 0.394,
      "grad_norm": 0.07410979270935059,
      "learning_rate": 0.00012146993318485525,
      "loss": 0.0165,
      "step": 1773
    },
    {
      "epoch": 0.3942222222222222,
      "grad_norm": 0.5938198566436768,
      "learning_rate": 0.00012142538975501114,
      "loss": 0.9765,
      "step": 1774
    },
    {
      "epoch": 0.39444444444444443,
      "grad_norm": 0.15130677819252014,
      "learning_rate": 0.00012138084632516705,
      "loss": 0.0236,
      "step": 1775
    },
    {
      "epoch": 0.39466666666666667,
      "grad_norm": 0.6686200499534607,
      "learning_rate": 0.00012133630289532294,
      "loss": 0.9716,
      "step": 1776
    },
    {
      "epoch": 0.3948888888888889,
      "grad_norm": 1.011210322380066,
      "learning_rate": 0.00012129175946547884,
      "loss": 1.7901,
      "step": 1777
    },
    {
      "epoch": 0.39511111111111114,
      "grad_norm": 1.0566589832305908,
      "learning_rate": 0.00012124721603563476,
      "loss": 1.6834,
      "step": 1778
    },
    {
      "epoch": 0.3953333333333333,
      "grad_norm": 1.1285459995269775,
      "learning_rate": 0.00012120267260579065,
      "loss": 1.7103,
      "step": 1779
    },
    {
      "epoch": 0.39555555555555555,
      "grad_norm": 0.9587770104408264,
      "learning_rate": 0.00012115812917594656,
      "loss": 2.0388,
      "step": 1780
    },
    {
      "epoch": 0.3957777777777778,
      "grad_norm": 0.9700530171394348,
      "learning_rate": 0.00012111358574610245,
      "loss": 1.7206,
      "step": 1781
    },
    {
      "epoch": 0.396,
      "grad_norm": 0.6485929489135742,
      "learning_rate": 0.00012106904231625836,
      "loss": 0.7268,
      "step": 1782
    },
    {
      "epoch": 0.3962222222222222,
      "grad_norm": 0.07007116824388504,
      "learning_rate": 0.00012102449888641425,
      "loss": 0.0187,
      "step": 1783
    },
    {
      "epoch": 0.39644444444444443,
      "grad_norm": 0.06163879111409187,
      "learning_rate": 0.00012097995545657017,
      "loss": 0.0191,
      "step": 1784
    },
    {
      "epoch": 0.39666666666666667,
      "grad_norm": 0.9925112128257751,
      "learning_rate": 0.00012093541202672607,
      "loss": 1.7063,
      "step": 1785
    },
    {
      "epoch": 0.3968888888888889,
      "grad_norm": 0.1028611809015274,
      "learning_rate": 0.00012089086859688196,
      "loss": 0.0217,
      "step": 1786
    },
    {
      "epoch": 0.39711111111111114,
      "grad_norm": 0.10675106197595596,
      "learning_rate": 0.00012084632516703787,
      "loss": 0.0214,
      "step": 1787
    },
    {
      "epoch": 0.3973333333333333,
      "grad_norm": 0.09875133633613586,
      "learning_rate": 0.00012080178173719376,
      "loss": 0.0206,
      "step": 1788
    },
    {
      "epoch": 0.39755555555555555,
      "grad_norm": 1.1721303462982178,
      "learning_rate": 0.00012075723830734967,
      "loss": 1.4865,
      "step": 1789
    },
    {
      "epoch": 0.3977777777777778,
      "grad_norm": 1.0789026021957397,
      "learning_rate": 0.00012071269487750559,
      "loss": 1.7231,
      "step": 1790
    },
    {
      "epoch": 0.398,
      "grad_norm": 1.559047818183899,
      "learning_rate": 0.00012066815144766148,
      "loss": 0.0602,
      "step": 1791
    },
    {
      "epoch": 0.3982222222222222,
      "grad_norm": 1.1085612773895264,
      "learning_rate": 0.00012062360801781738,
      "loss": 1.6387,
      "step": 1792
    },
    {
      "epoch": 0.39844444444444443,
      "grad_norm": 1.3449455499649048,
      "learning_rate": 0.00012057906458797327,
      "loss": 1.7407,
      "step": 1793
    },
    {
      "epoch": 0.39866666666666667,
      "grad_norm": 1.04912531375885,
      "learning_rate": 0.00012053452115812918,
      "loss": 1.5535,
      "step": 1794
    },
    {
      "epoch": 0.3988888888888889,
      "grad_norm": 1.0405304431915283,
      "learning_rate": 0.00012048997772828507,
      "loss": 1.4078,
      "step": 1795
    },
    {
      "epoch": 0.39911111111111114,
      "grad_norm": 1.0141276121139526,
      "learning_rate": 0.00012044543429844099,
      "loss": 1.2146,
      "step": 1796
    },
    {
      "epoch": 0.3993333333333333,
      "grad_norm": 0.9934976100921631,
      "learning_rate": 0.0001204008908685969,
      "loss": 1.0574,
      "step": 1797
    },
    {
      "epoch": 0.39955555555555555,
      "grad_norm": 1.1789382696151733,
      "learning_rate": 0.00012035634743875279,
      "loss": 1.3544,
      "step": 1798
    },
    {
      "epoch": 0.3997777777777778,
      "grad_norm": 0.6419237852096558,
      "learning_rate": 0.00012031180400890869,
      "loss": 0.4579,
      "step": 1799
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6202405095100403,
      "learning_rate": 0.00012026726057906458,
      "loss": 0.4062,
      "step": 1800
    },
    {
      "epoch": 0.4,
      "eval_loss": 1.200887680053711,
      "eval_runtime": 240.6565,
      "eval_samples_per_second": 4.155,
      "eval_steps_per_second": 4.155,
      "step": 1800
    },
    {
      "epoch": 0.4002222222222222,
      "grad_norm": 0.974485456943512,
      "learning_rate": 0.00012022271714922049,
      "loss": 2.0298,
      "step": 1801
    },
    {
      "epoch": 0.40044444444444444,
      "grad_norm": 0.8032060265541077,
      "learning_rate": 0.00012017817371937641,
      "loss": 2.1083,
      "step": 1802
    },
    {
      "epoch": 0.40066666666666667,
      "grad_norm": 0.054990362375974655,
      "learning_rate": 0.0001201336302895323,
      "loss": 0.011,
      "step": 1803
    },
    {
      "epoch": 0.4008888888888889,
      "grad_norm": 0.8215484619140625,
      "learning_rate": 0.0001200890868596882,
      "loss": 2.1251,
      "step": 1804
    },
    {
      "epoch": 0.4011111111111111,
      "grad_norm": 0.7075866460800171,
      "learning_rate": 0.0001200445434298441,
      "loss": 0.9495,
      "step": 1805
    },
    {
      "epoch": 0.4013333333333333,
      "grad_norm": 0.08694480359554291,
      "learning_rate": 0.00012,
      "loss": 0.0124,
      "step": 1806
    },
    {
      "epoch": 0.40155555555555555,
      "grad_norm": 0.5654726028442383,
      "learning_rate": 0.0001199554565701559,
      "loss": 1.032,
      "step": 1807
    },
    {
      "epoch": 0.4017777777777778,
      "grad_norm": 0.9461843371391296,
      "learning_rate": 0.00011991091314031181,
      "loss": 2.3011,
      "step": 1808
    },
    {
      "epoch": 0.402,
      "grad_norm": 0.9733036160469055,
      "learning_rate": 0.00011986636971046772,
      "loss": 2.212,
      "step": 1809
    },
    {
      "epoch": 0.4022222222222222,
      "grad_norm": 0.9258533716201782,
      "learning_rate": 0.00011982182628062361,
      "loss": 2.2234,
      "step": 1810
    },
    {
      "epoch": 0.40244444444444444,
      "grad_norm": 0.900391697883606,
      "learning_rate": 0.00011977728285077952,
      "loss": 2.1527,
      "step": 1811
    },
    {
      "epoch": 0.4026666666666667,
      "grad_norm": 1.021876573562622,
      "learning_rate": 0.00011973273942093541,
      "loss": 2.1505,
      "step": 1812
    },
    {
      "epoch": 0.4028888888888889,
      "grad_norm": 0.8823310136795044,
      "learning_rate": 0.00011968819599109131,
      "loss": 1.9947,
      "step": 1813
    },
    {
      "epoch": 0.4031111111111111,
      "grad_norm": 0.603898286819458,
      "learning_rate": 0.00011964365256124723,
      "loss": 1.0451,
      "step": 1814
    },
    {
      "epoch": 0.4033333333333333,
      "grad_norm": 0.5974671840667725,
      "learning_rate": 0.00011959910913140312,
      "loss": 0.9056,
      "step": 1815
    },
    {
      "epoch": 0.40355555555555556,
      "grad_norm": 0.8365625143051147,
      "learning_rate": 0.00011955456570155903,
      "loss": 1.7608,
      "step": 1816
    },
    {
      "epoch": 0.4037777777777778,
      "grad_norm": 0.9293599128723145,
      "learning_rate": 0.00011951002227171492,
      "loss": 1.8854,
      "step": 1817
    },
    {
      "epoch": 0.404,
      "grad_norm": 1.0051164627075195,
      "learning_rate": 0.00011946547884187083,
      "loss": 1.9833,
      "step": 1818
    },
    {
      "epoch": 0.4042222222222222,
      "grad_norm": 0.9288824200630188,
      "learning_rate": 0.00011942093541202672,
      "loss": 1.8936,
      "step": 1819
    },
    {
      "epoch": 0.40444444444444444,
      "grad_norm": 0.9323967099189758,
      "learning_rate": 0.00011937639198218265,
      "loss": 1.6103,
      "step": 1820
    },
    {
      "epoch": 0.4046666666666667,
      "grad_norm": 0.9585559964179993,
      "learning_rate": 0.00011933184855233854,
      "loss": 1.8573,
      "step": 1821
    },
    {
      "epoch": 0.4048888888888889,
      "grad_norm": 0.9867689609527588,
      "learning_rate": 0.00011928730512249445,
      "loss": 1.8621,
      "step": 1822
    },
    {
      "epoch": 0.4051111111111111,
      "grad_norm": 0.08060096949338913,
      "learning_rate": 0.00011924276169265034,
      "loss": 0.0168,
      "step": 1823
    },
    {
      "epoch": 0.4053333333333333,
      "grad_norm": 0.08404132723808289,
      "learning_rate": 0.00011919821826280623,
      "loss": 0.0171,
      "step": 1824
    },
    {
      "epoch": 0.40555555555555556,
      "grad_norm": 0.12815998494625092,
      "learning_rate": 0.00011915367483296214,
      "loss": 0.0181,
      "step": 1825
    },
    {
      "epoch": 0.4057777777777778,
      "grad_norm": 0.8509750366210938,
      "learning_rate": 0.00011910913140311803,
      "loss": 1.1935,
      "step": 1826
    },
    {
      "epoch": 0.406,
      "grad_norm": 0.929954469203949,
      "learning_rate": 0.00011906458797327396,
      "loss": 1.7651,
      "step": 1827
    },
    {
      "epoch": 0.4062222222222222,
      "grad_norm": 0.8800256252288818,
      "learning_rate": 0.00011902004454342985,
      "loss": 1.4242,
      "step": 1828
    },
    {
      "epoch": 0.40644444444444444,
      "grad_norm": 0.7143679857254028,
      "learning_rate": 0.00011897550111358576,
      "loss": 0.8556,
      "step": 1829
    },
    {
      "epoch": 0.4066666666666667,
      "grad_norm": 0.06789492070674896,
      "learning_rate": 0.00011893095768374165,
      "loss": 0.0177,
      "step": 1830
    },
    {
      "epoch": 0.4068888888888889,
      "grad_norm": 0.06996285915374756,
      "learning_rate": 0.00011888641425389756,
      "loss": 0.0176,
      "step": 1831
    },
    {
      "epoch": 0.4071111111111111,
      "grad_norm": 0.06404642760753632,
      "learning_rate": 0.00011884187082405345,
      "loss": 0.0172,
      "step": 1832
    },
    {
      "epoch": 0.4073333333333333,
      "grad_norm": 0.6085385680198669,
      "learning_rate": 0.00011879732739420937,
      "loss": 0.8157,
      "step": 1833
    },
    {
      "epoch": 0.40755555555555556,
      "grad_norm": 0.6177558302879333,
      "learning_rate": 0.00011875278396436527,
      "loss": 0.6847,
      "step": 1834
    },
    {
      "epoch": 0.4077777777777778,
      "grad_norm": 1.0499635934829712,
      "learning_rate": 0.00011870824053452116,
      "loss": 1.6652,
      "step": 1835
    },
    {
      "epoch": 0.408,
      "grad_norm": 1.0262913703918457,
      "learning_rate": 0.00011866369710467707,
      "loss": 1.6644,
      "step": 1836
    },
    {
      "epoch": 0.4082222222222222,
      "grad_norm": 0.6902967095375061,
      "learning_rate": 0.00011861915367483296,
      "loss": 0.8171,
      "step": 1837
    },
    {
      "epoch": 0.40844444444444444,
      "grad_norm": 0.07804334908723831,
      "learning_rate": 0.00011857461024498887,
      "loss": 0.0196,
      "step": 1838
    },
    {
      "epoch": 0.4086666666666667,
      "grad_norm": 0.9165345430374146,
      "learning_rate": 0.00011853006681514479,
      "loss": 0.8781,
      "step": 1839
    },
    {
      "epoch": 0.4088888888888889,
      "grad_norm": 0.8940587639808655,
      "learning_rate": 0.00011848552338530068,
      "loss": 1.5392,
      "step": 1840
    },
    {
      "epoch": 0.4091111111111111,
      "grad_norm": 1.045750379562378,
      "learning_rate": 0.00011844097995545658,
      "loss": 1.6757,
      "step": 1841
    },
    {
      "epoch": 0.4093333333333333,
      "grad_norm": 1.0602422952651978,
      "learning_rate": 0.00011839643652561247,
      "loss": 1.5899,
      "step": 1842
    },
    {
      "epoch": 0.40955555555555556,
      "grad_norm": 0.7604432702064514,
      "learning_rate": 0.00011835189309576838,
      "loss": 0.9366,
      "step": 1843
    },
    {
      "epoch": 0.4097777777777778,
      "grad_norm": 1.1251657009124756,
      "learning_rate": 0.00011830734966592427,
      "loss": 1.7115,
      "step": 1844
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1490317583084106,
      "learning_rate": 0.00011826280623608019,
      "loss": 1.644,
      "step": 1845
    },
    {
      "epoch": 0.4102222222222222,
      "grad_norm": 1.2152633666992188,
      "learning_rate": 0.0001182182628062361,
      "loss": 1.8793,
      "step": 1846
    },
    {
      "epoch": 0.41044444444444445,
      "grad_norm": 1.234496831893921,
      "learning_rate": 0.00011817371937639199,
      "loss": 1.2693,
      "step": 1847
    },
    {
      "epoch": 0.4106666666666667,
      "grad_norm": 1.104956030845642,
      "learning_rate": 0.00011812917594654789,
      "loss": 1.3936,
      "step": 1848
    },
    {
      "epoch": 0.4108888888888889,
      "grad_norm": 0.8974543213844299,
      "learning_rate": 0.00011808463251670378,
      "loss": 0.5888,
      "step": 1849
    },
    {
      "epoch": 0.4111111111111111,
      "grad_norm": 0.5945538282394409,
      "learning_rate": 0.00011804008908685969,
      "loss": 0.4158,
      "step": 1850
    },
    {
      "epoch": 0.41133333333333333,
      "grad_norm": 0.713422417640686,
      "learning_rate": 0.00011799554565701561,
      "loss": 1.1928,
      "step": 1851
    },
    {
      "epoch": 0.41155555555555556,
      "grad_norm": 0.8967742919921875,
      "learning_rate": 0.0001179510022271715,
      "loss": 2.1678,
      "step": 1852
    },
    {
      "epoch": 0.4117777777777778,
      "grad_norm": 0.8929163813591003,
      "learning_rate": 0.0001179064587973274,
      "loss": 2.1692,
      "step": 1853
    },
    {
      "epoch": 0.412,
      "grad_norm": 0.8850563168525696,
      "learning_rate": 0.0001178619153674833,
      "loss": 2.1808,
      "step": 1854
    },
    {
      "epoch": 0.4122222222222222,
      "grad_norm": 0.8336376547813416,
      "learning_rate": 0.0001178173719376392,
      "loss": 2.5249,
      "step": 1855
    },
    {
      "epoch": 0.41244444444444445,
      "grad_norm": 0.5886844396591187,
      "learning_rate": 0.0001177728285077951,
      "loss": 1.2025,
      "step": 1856
    },
    {
      "epoch": 0.4126666666666667,
      "grad_norm": 0.9081274271011353,
      "learning_rate": 0.00011772828507795101,
      "loss": 2.1547,
      "step": 1857
    },
    {
      "epoch": 0.4128888888888889,
      "grad_norm": 0.07262948900461197,
      "learning_rate": 0.00011768374164810692,
      "loss": 0.0127,
      "step": 1858
    },
    {
      "epoch": 0.4131111111111111,
      "grad_norm": 0.5580977201461792,
      "learning_rate": 0.00011763919821826281,
      "loss": 1.0184,
      "step": 1859
    },
    {
      "epoch": 0.41333333333333333,
      "grad_norm": 0.9046309590339661,
      "learning_rate": 0.00011759465478841872,
      "loss": 2.1869,
      "step": 1860
    },
    {
      "epoch": 0.41355555555555557,
      "grad_norm": 1.056998372077942,
      "learning_rate": 0.00011755011135857461,
      "loss": 1.9969,
      "step": 1861
    },
    {
      "epoch": 0.4137777777777778,
      "grad_norm": 1.0445380210876465,
      "learning_rate": 0.00011750556792873051,
      "loss": 2.2281,
      "step": 1862
    },
    {
      "epoch": 0.414,
      "grad_norm": 0.9709343910217285,
      "learning_rate": 0.00011746102449888643,
      "loss": 1.7402,
      "step": 1863
    },
    {
      "epoch": 0.4142222222222222,
      "grad_norm": 0.9131556153297424,
      "learning_rate": 0.00011741648106904232,
      "loss": 1.7472,
      "step": 1864
    },
    {
      "epoch": 0.41444444444444445,
      "grad_norm": 0.8268289566040039,
      "learning_rate": 0.00011737193763919823,
      "loss": 2.1226,
      "step": 1865
    },
    {
      "epoch": 0.4146666666666667,
      "grad_norm": 0.8866710066795349,
      "learning_rate": 0.00011732739420935412,
      "loss": 1.819,
      "step": 1866
    },
    {
      "epoch": 0.41488888888888886,
      "grad_norm": 0.9805562496185303,
      "learning_rate": 0.00011728285077951003,
      "loss": 2.2956,
      "step": 1867
    },
    {
      "epoch": 0.4151111111111111,
      "grad_norm": 1.0550199747085571,
      "learning_rate": 0.00011723830734966592,
      "loss": 1.9666,
      "step": 1868
    },
    {
      "epoch": 0.41533333333333333,
      "grad_norm": 1.027684211730957,
      "learning_rate": 0.00011719376391982184,
      "loss": 1.7633,
      "step": 1869
    },
    {
      "epoch": 0.41555555555555557,
      "grad_norm": 0.07350092381238937,
      "learning_rate": 0.00011714922048997774,
      "loss": 0.0169,
      "step": 1870
    },
    {
      "epoch": 0.4157777777777778,
      "grad_norm": 0.7602355480194092,
      "learning_rate": 0.00011710467706013363,
      "loss": 0.8183,
      "step": 1871
    },
    {
      "epoch": 0.416,
      "grad_norm": 0.21299070119857788,
      "learning_rate": 0.00011706013363028954,
      "loss": 0.0191,
      "step": 1872
    },
    {
      "epoch": 0.4162222222222222,
      "grad_norm": 1.0202507972717285,
      "learning_rate": 0.00011701559020044543,
      "loss": 1.8291,
      "step": 1873
    },
    {
      "epoch": 0.41644444444444445,
      "grad_norm": 0.9267475008964539,
      "learning_rate": 0.00011697104677060134,
      "loss": 1.546,
      "step": 1874
    },
    {
      "epoch": 0.4166666666666667,
      "grad_norm": 1.0164928436279297,
      "learning_rate": 0.00011692650334075726,
      "loss": 1.6923,
      "step": 1875
    },
    {
      "epoch": 0.41688888888888886,
      "grad_norm": 1.030597448348999,
      "learning_rate": 0.00011688195991091315,
      "loss": 1.858,
      "step": 1876
    },
    {
      "epoch": 0.4171111111111111,
      "grad_norm": 0.9687047600746155,
      "learning_rate": 0.00011683741648106905,
      "loss": 1.6026,
      "step": 1877
    },
    {
      "epoch": 0.41733333333333333,
      "grad_norm": 1.1525236368179321,
      "learning_rate": 0.00011679287305122494,
      "loss": 1.6997,
      "step": 1878
    },
    {
      "epoch": 0.41755555555555557,
      "grad_norm": 0.07384829223155975,
      "learning_rate": 0.00011674832962138085,
      "loss": 0.0186,
      "step": 1879
    },
    {
      "epoch": 0.4177777777777778,
      "grad_norm": 0.07113431394100189,
      "learning_rate": 0.00011670378619153674,
      "loss": 0.0181,
      "step": 1880
    },
    {
      "epoch": 0.418,
      "grad_norm": 1.0431753396987915,
      "learning_rate": 0.00011665924276169266,
      "loss": 1.8063,
      "step": 1881
    },
    {
      "epoch": 0.4182222222222222,
      "grad_norm": 1.003762125968933,
      "learning_rate": 0.00011661469933184857,
      "loss": 1.8644,
      "step": 1882
    },
    {
      "epoch": 0.41844444444444445,
      "grad_norm": 0.08568891882896423,
      "learning_rate": 0.00011657015590200446,
      "loss": 0.0177,
      "step": 1883
    },
    {
      "epoch": 0.4186666666666667,
      "grad_norm": 0.08027679473161697,
      "learning_rate": 0.00011652561247216036,
      "loss": 0.0176,
      "step": 1884
    },
    {
      "epoch": 0.41888888888888887,
      "grad_norm": 1.0662174224853516,
      "learning_rate": 0.00011648106904231626,
      "loss": 1.7168,
      "step": 1885
    },
    {
      "epoch": 0.4191111111111111,
      "grad_norm": 1.0285723209381104,
      "learning_rate": 0.00011643652561247216,
      "loss": 1.6087,
      "step": 1886
    },
    {
      "epoch": 0.41933333333333334,
      "grad_norm": 0.7417445182800293,
      "learning_rate": 0.00011639198218262808,
      "loss": 0.7676,
      "step": 1887
    },
    {
      "epoch": 0.41955555555555557,
      "grad_norm": 0.9185432195663452,
      "learning_rate": 0.00011634743875278397,
      "loss": 1.336,
      "step": 1888
    },
    {
      "epoch": 0.4197777777777778,
      "grad_norm": 1.0804965496063232,
      "learning_rate": 0.00011630289532293988,
      "loss": 1.5121,
      "step": 1889
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3549460172653198,
      "learning_rate": 0.00011625835189309577,
      "loss": 1.5925,
      "step": 1890
    },
    {
      "epoch": 0.4202222222222222,
      "grad_norm": 0.9999265670776367,
      "learning_rate": 0.00011621380846325167,
      "loss": 1.3402,
      "step": 1891
    },
    {
      "epoch": 0.42044444444444445,
      "grad_norm": 0.7168278694152832,
      "learning_rate": 0.00011616926503340757,
      "loss": 0.698,
      "step": 1892
    },
    {
      "epoch": 0.4206666666666667,
      "grad_norm": 0.2091141939163208,
      "learning_rate": 0.00011612472160356347,
      "loss": 0.0375,
      "step": 1893
    },
    {
      "epoch": 0.42088888888888887,
      "grad_norm": 1.0474592447280884,
      "learning_rate": 0.00011608017817371939,
      "loss": 1.0801,
      "step": 1894
    },
    {
      "epoch": 0.4211111111111111,
      "grad_norm": 1.3502936363220215,
      "learning_rate": 0.00011603563474387528,
      "loss": 1.3399,
      "step": 1895
    },
    {
      "epoch": 0.42133333333333334,
      "grad_norm": 1.0498988628387451,
      "learning_rate": 0.00011599109131403119,
      "loss": 1.3411,
      "step": 1896
    },
    {
      "epoch": 0.42155555555555557,
      "grad_norm": 0.9921227693557739,
      "learning_rate": 0.00011594654788418708,
      "loss": 1.1275,
      "step": 1897
    },
    {
      "epoch": 0.42177777777777775,
      "grad_norm": 0.766704261302948,
      "learning_rate": 0.00011590200445434298,
      "loss": 0.5538,
      "step": 1898
    },
    {
      "epoch": 0.422,
      "grad_norm": 1.1626564264297485,
      "learning_rate": 0.00011585746102449888,
      "loss": 1.1005,
      "step": 1899
    },
    {
      "epoch": 0.4222222222222222,
      "grad_norm": 1.2583063840866089,
      "learning_rate": 0.0001158129175946548,
      "loss": 1.0355,
      "step": 1900
    },
    {
      "epoch": 0.42244444444444446,
      "grad_norm": 1.069173812866211,
      "learning_rate": 0.0001157683741648107,
      "loss": 1.0298,
      "step": 1901
    },
    {
      "epoch": 0.4226666666666667,
      "grad_norm": 0.06103937700390816,
      "learning_rate": 0.00011572383073496659,
      "loss": 0.012,
      "step": 1902
    },
    {
      "epoch": 0.42288888888888887,
      "grad_norm": 0.07574369013309479,
      "learning_rate": 0.0001156792873051225,
      "loss": 0.0123,
      "step": 1903
    },
    {
      "epoch": 0.4231111111111111,
      "grad_norm": 0.5607222318649292,
      "learning_rate": 0.00011563474387527839,
      "loss": 1.0307,
      "step": 1904
    },
    {
      "epoch": 0.42333333333333334,
      "grad_norm": 0.619121253490448,
      "learning_rate": 0.0001155902004454343,
      "loss": 1.1993,
      "step": 1905
    },
    {
      "epoch": 0.4235555555555556,
      "grad_norm": 0.07177204638719559,
      "learning_rate": 0.00011554565701559021,
      "loss": 0.0122,
      "step": 1906
    },
    {
      "epoch": 0.42377777777777775,
      "grad_norm": 0.06914813816547394,
      "learning_rate": 0.00011550111358574612,
      "loss": 0.0125,
      "step": 1907
    },
    {
      "epoch": 0.424,
      "grad_norm": 0.06676662713289261,
      "learning_rate": 0.00011545657015590201,
      "loss": 0.0123,
      "step": 1908
    },
    {
      "epoch": 0.4242222222222222,
      "grad_norm": 0.8784866333007812,
      "learning_rate": 0.0001154120267260579,
      "loss": 2.1322,
      "step": 1909
    },
    {
      "epoch": 0.42444444444444446,
      "grad_norm": 0.9178574085235596,
      "learning_rate": 0.00011536748329621381,
      "loss": 2.1509,
      "step": 1910
    },
    {
      "epoch": 0.4246666666666667,
      "grad_norm": 0.8715436458587646,
      "learning_rate": 0.0001153229398663697,
      "loss": 1.9367,
      "step": 1911
    },
    {
      "epoch": 0.42488888888888887,
      "grad_norm": 0.8655092716217041,
      "learning_rate": 0.00011527839643652563,
      "loss": 1.8575,
      "step": 1912
    },
    {
      "epoch": 0.4251111111111111,
      "grad_norm": 1.0920130014419556,
      "learning_rate": 0.00011523385300668152,
      "loss": 2.1347,
      "step": 1913
    },
    {
      "epoch": 0.42533333333333334,
      "grad_norm": 0.8793624639511108,
      "learning_rate": 0.00011518930957683743,
      "loss": 1.7823,
      "step": 1914
    },
    {
      "epoch": 0.4255555555555556,
      "grad_norm": 0.9117141962051392,
      "learning_rate": 0.00011514476614699332,
      "loss": 1.7285,
      "step": 1915
    },
    {
      "epoch": 0.42577777777777776,
      "grad_norm": 0.866205096244812,
      "learning_rate": 0.00011510022271714921,
      "loss": 1.7268,
      "step": 1916
    },
    {
      "epoch": 0.426,
      "grad_norm": 1.1967967748641968,
      "learning_rate": 0.00011505567928730512,
      "loss": 2.0665,
      "step": 1917
    },
    {
      "epoch": 0.4262222222222222,
      "grad_norm": 0.9093246459960938,
      "learning_rate": 0.00011501113585746104,
      "loss": 1.8155,
      "step": 1918
    },
    {
      "epoch": 0.42644444444444446,
      "grad_norm": 1.162400484085083,
      "learning_rate": 0.00011496659242761694,
      "loss": 2.068,
      "step": 1919
    },
    {
      "epoch": 0.4266666666666667,
      "grad_norm": 0.978716254234314,
      "learning_rate": 0.00011492204899777283,
      "loss": 2.0323,
      "step": 1920
    },
    {
      "epoch": 0.4268888888888889,
      "grad_norm": 0.9247249960899353,
      "learning_rate": 0.00011487750556792874,
      "loss": 1.707,
      "step": 1921
    },
    {
      "epoch": 0.4271111111111111,
      "grad_norm": 0.08401922136545181,
      "learning_rate": 0.00011483296213808463,
      "loss": 0.0179,
      "step": 1922
    },
    {
      "epoch": 0.42733333333333334,
      "grad_norm": 0.081658273935318,
      "learning_rate": 0.00011478841870824054,
      "loss": 0.0181,
      "step": 1923
    },
    {
      "epoch": 0.4275555555555556,
      "grad_norm": 1.0231783390045166,
      "learning_rate": 0.00011474387527839646,
      "loss": 1.6638,
      "step": 1924
    },
    {
      "epoch": 0.42777777777777776,
      "grad_norm": 1.0630674362182617,
      "learning_rate": 0.00011469933184855235,
      "loss": 1.9074,
      "step": 1925
    },
    {
      "epoch": 0.428,
      "grad_norm": 1.016446590423584,
      "learning_rate": 0.00011465478841870825,
      "loss": 1.5796,
      "step": 1926
    },
    {
      "epoch": 0.4282222222222222,
      "grad_norm": 1.0378187894821167,
      "learning_rate": 0.00011461024498886414,
      "loss": 1.8205,
      "step": 1927
    },
    {
      "epoch": 0.42844444444444446,
      "grad_norm": 0.06329286843538284,
      "learning_rate": 0.00011456570155902005,
      "loss": 0.0175,
      "step": 1928
    },
    {
      "epoch": 0.42866666666666664,
      "grad_norm": 0.06730126589536667,
      "learning_rate": 0.00011452115812917594,
      "loss": 0.0173,
      "step": 1929
    },
    {
      "epoch": 0.4288888888888889,
      "grad_norm": 0.8092349767684937,
      "learning_rate": 0.00011447661469933186,
      "loss": 0.9686,
      "step": 1930
    },
    {
      "epoch": 0.4291111111111111,
      "grad_norm": 0.6625300049781799,
      "learning_rate": 0.00011443207126948777,
      "loss": 0.7906,
      "step": 1931
    },
    {
      "epoch": 0.42933333333333334,
      "grad_norm": 0.09340567141771317,
      "learning_rate": 0.00011438752783964366,
      "loss": 0.0185,
      "step": 1932
    },
    {
      "epoch": 0.4295555555555556,
      "grad_norm": 0.08747432380914688,
      "learning_rate": 0.00011434298440979956,
      "loss": 0.0186,
      "step": 1933
    },
    {
      "epoch": 0.42977777777777776,
      "grad_norm": 0.08998148888349533,
      "learning_rate": 0.00011429844097995546,
      "loss": 0.0179,
      "step": 1934
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9794445633888245,
      "learning_rate": 0.00011425389755011136,
      "loss": 1.8862,
      "step": 1935
    },
    {
      "epoch": 0.43022222222222223,
      "grad_norm": 1.0736924409866333,
      "learning_rate": 0.00011420935412026728,
      "loss": 1.8727,
      "step": 1936
    },
    {
      "epoch": 0.43044444444444446,
      "grad_norm": 1.05514395236969,
      "learning_rate": 0.00011416481069042317,
      "loss": 1.6888,
      "step": 1937
    },
    {
      "epoch": 0.43066666666666664,
      "grad_norm": 1.1766986846923828,
      "learning_rate": 0.00011412026726057908,
      "loss": 1.7106,
      "step": 1938
    },
    {
      "epoch": 0.4308888888888889,
      "grad_norm": 0.9431614875793457,
      "learning_rate": 0.00011407572383073497,
      "loss": 1.4491,
      "step": 1939
    },
    {
      "epoch": 0.4311111111111111,
      "grad_norm": 1.1172568798065186,
      "learning_rate": 0.00011403118040089087,
      "loss": 1.7868,
      "step": 1940
    },
    {
      "epoch": 0.43133333333333335,
      "grad_norm": 1.3255879878997803,
      "learning_rate": 0.00011398663697104677,
      "loss": 1.7341,
      "step": 1941
    },
    {
      "epoch": 0.4315555555555556,
      "grad_norm": 0.9136682152748108,
      "learning_rate": 0.00011394209354120268,
      "loss": 1.4765,
      "step": 1942
    },
    {
      "epoch": 0.43177777777777776,
      "grad_norm": 1.1250746250152588,
      "learning_rate": 0.00011389755011135859,
      "loss": 1.212,
      "step": 1943
    },
    {
      "epoch": 0.432,
      "grad_norm": 0.7082473039627075,
      "learning_rate": 0.00011385300668151448,
      "loss": 0.6973,
      "step": 1944
    },
    {
      "epoch": 0.43222222222222223,
      "grad_norm": 1.1732277870178223,
      "learning_rate": 0.00011380846325167039,
      "loss": 1.3756,
      "step": 1945
    },
    {
      "epoch": 0.43244444444444446,
      "grad_norm": 1.1802074909210205,
      "learning_rate": 0.00011376391982182628,
      "loss": 1.2276,
      "step": 1946
    },
    {
      "epoch": 0.43266666666666664,
      "grad_norm": 0.745093584060669,
      "learning_rate": 0.00011371937639198218,
      "loss": 0.6378,
      "step": 1947
    },
    {
      "epoch": 0.4328888888888889,
      "grad_norm": 1.0691252946853638,
      "learning_rate": 0.0001136748329621381,
      "loss": 1.076,
      "step": 1948
    },
    {
      "epoch": 0.4331111111111111,
      "grad_norm": 0.9302070140838623,
      "learning_rate": 0.000113630289532294,
      "loss": 0.9216,
      "step": 1949
    },
    {
      "epoch": 0.43333333333333335,
      "grad_norm": 0.9691843390464783,
      "learning_rate": 0.0001135857461024499,
      "loss": 0.6993,
      "step": 1950
    },
    {
      "epoch": 0.4335555555555556,
      "grad_norm": 0.5687994956970215,
      "learning_rate": 0.00011354120267260579,
      "loss": 0.8047,
      "step": 1951
    },
    {
      "epoch": 0.43377777777777776,
      "grad_norm": 0.5520983338356018,
      "learning_rate": 0.0001134966592427617,
      "loss": 1.0069,
      "step": 1952
    },
    {
      "epoch": 0.434,
      "grad_norm": 0.8625077605247498,
      "learning_rate": 0.00011345211581291759,
      "loss": 2.2169,
      "step": 1953
    },
    {
      "epoch": 0.43422222222222223,
      "grad_norm": 0.6881237030029297,
      "learning_rate": 0.00011340757238307351,
      "loss": 1.4639,
      "step": 1954
    },
    {
      "epoch": 0.43444444444444447,
      "grad_norm": 0.9478729367256165,
      "learning_rate": 0.00011336302895322941,
      "loss": 2.1246,
      "step": 1955
    },
    {
      "epoch": 0.43466666666666665,
      "grad_norm": 0.5995079874992371,
      "learning_rate": 0.0001133184855233853,
      "loss": 0.8358,
      "step": 1956
    },
    {
      "epoch": 0.4348888888888889,
      "grad_norm": 0.8051624298095703,
      "learning_rate": 0.00011327394209354121,
      "loss": 2.3354,
      "step": 1957
    },
    {
      "epoch": 0.4351111111111111,
      "grad_norm": 0.9365907907485962,
      "learning_rate": 0.0001132293986636971,
      "loss": 2.3088,
      "step": 1958
    },
    {
      "epoch": 0.43533333333333335,
      "grad_norm": 0.06474913656711578,
      "learning_rate": 0.00011318485523385301,
      "loss": 0.0119,
      "step": 1959
    },
    {
      "epoch": 0.43555555555555553,
      "grad_norm": 0.06161544471979141,
      "learning_rate": 0.00011314031180400893,
      "loss": 0.0116,
      "step": 1960
    },
    {
      "epoch": 0.43577777777777776,
      "grad_norm": 0.05895036458969116,
      "learning_rate": 0.00011309576837416482,
      "loss": 0.0117,
      "step": 1961
    },
    {
      "epoch": 0.436,
      "grad_norm": 0.058882202953100204,
      "learning_rate": 0.00011305122494432072,
      "loss": 0.0114,
      "step": 1962
    },
    {
      "epoch": 0.43622222222222223,
      "grad_norm": 0.845483124256134,
      "learning_rate": 0.00011300668151447662,
      "loss": 1.9667,
      "step": 1963
    },
    {
      "epoch": 0.43644444444444447,
      "grad_norm": 0.841730535030365,
      "learning_rate": 0.00011296213808463252,
      "loss": 2.0258,
      "step": 1964
    },
    {
      "epoch": 0.43666666666666665,
      "grad_norm": 0.85284024477005,
      "learning_rate": 0.00011291759465478841,
      "loss": 1.9691,
      "step": 1965
    },
    {
      "epoch": 0.4368888888888889,
      "grad_norm": 0.5937424898147583,
      "learning_rate": 0.00011287305122494432,
      "loss": 0.8792,
      "step": 1966
    },
    {
      "epoch": 0.4371111111111111,
      "grad_norm": 0.9278184175491333,
      "learning_rate": 0.00011282850779510024,
      "loss": 1.8017,
      "step": 1967
    },
    {
      "epoch": 0.43733333333333335,
      "grad_norm": 0.9445812106132507,
      "learning_rate": 0.00011278396436525613,
      "loss": 1.9442,
      "step": 1968
    },
    {
      "epoch": 0.43755555555555553,
      "grad_norm": 1.0501065254211426,
      "learning_rate": 0.00011273942093541203,
      "loss": 2.2303,
      "step": 1969
    },
    {
      "epoch": 0.43777777777777777,
      "grad_norm": 1.0860295295715332,
      "learning_rate": 0.00011269487750556793,
      "loss": 2.1409,
      "step": 1970
    },
    {
      "epoch": 0.438,
      "grad_norm": 1.156929850578308,
      "learning_rate": 0.00011265033407572383,
      "loss": 1.8208,
      "step": 1971
    },
    {
      "epoch": 0.43822222222222224,
      "grad_norm": 0.10455144941806793,
      "learning_rate": 0.00011260579064587972,
      "loss": 0.0198,
      "step": 1972
    },
    {
      "epoch": 0.43844444444444447,
      "grad_norm": 0.0996774211525917,
      "learning_rate": 0.00011256124721603564,
      "loss": 0.0196,
      "step": 1973
    },
    {
      "epoch": 0.43866666666666665,
      "grad_norm": 0.0888049378991127,
      "learning_rate": 0.00011251670378619155,
      "loss": 0.0188,
      "step": 1974
    },
    {
      "epoch": 0.4388888888888889,
      "grad_norm": 0.6885740756988525,
      "learning_rate": 0.00011247216035634744,
      "loss": 0.9559,
      "step": 1975
    },
    {
      "epoch": 0.4391111111111111,
      "grad_norm": 0.9182388186454773,
      "learning_rate": 0.00011242761692650334,
      "loss": 1.5226,
      "step": 1976
    },
    {
      "epoch": 0.43933333333333335,
      "grad_norm": 1.1160727739334106,
      "learning_rate": 0.00011238307349665924,
      "loss": 1.8567,
      "step": 1977
    },
    {
      "epoch": 0.43955555555555553,
      "grad_norm": 0.0670580118894577,
      "learning_rate": 0.00011233853006681514,
      "loss": 0.0177,
      "step": 1978
    },
    {
      "epoch": 0.43977777777777777,
      "grad_norm": 0.06605665385723114,
      "learning_rate": 0.00011229398663697106,
      "loss": 0.0174,
      "step": 1979
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.07107221335172653,
      "learning_rate": 0.00011224944320712695,
      "loss": 0.0175,
      "step": 1980
    },
    {
      "epoch": 0.44022222222222224,
      "grad_norm": 0.06550829857587814,
      "learning_rate": 0.00011220489977728286,
      "loss": 0.0171,
      "step": 1981
    },
    {
      "epoch": 0.44044444444444447,
      "grad_norm": 0.06652254611253738,
      "learning_rate": 0.00011216035634743875,
      "loss": 0.0169,
      "step": 1982
    },
    {
      "epoch": 0.44066666666666665,
      "grad_norm": 1.0364837646484375,
      "learning_rate": 0.00011211581291759466,
      "loss": 1.8154,
      "step": 1983
    },
    {
      "epoch": 0.4408888888888889,
      "grad_norm": 1.0154790878295898,
      "learning_rate": 0.00011207126948775055,
      "loss": 1.5184,
      "step": 1984
    },
    {
      "epoch": 0.4411111111111111,
      "grad_norm": 0.985426664352417,
      "learning_rate": 0.00011202672605790647,
      "loss": 1.5944,
      "step": 1985
    },
    {
      "epoch": 0.44133333333333336,
      "grad_norm": 1.0827574729919434,
      "learning_rate": 0.00011198218262806237,
      "loss": 1.7389,
      "step": 1986
    },
    {
      "epoch": 0.44155555555555553,
      "grad_norm": 0.1062050461769104,
      "learning_rate": 0.00011193763919821826,
      "loss": 0.0204,
      "step": 1987
    },
    {
      "epoch": 0.44177777777777777,
      "grad_norm": 0.7836151719093323,
      "learning_rate": 0.00011189309576837417,
      "loss": 1.0073,
      "step": 1988
    },
    {
      "epoch": 0.442,
      "grad_norm": 0.09760961681604385,
      "learning_rate": 0.00011184855233853006,
      "loss": 0.0197,
      "step": 1989
    },
    {
      "epoch": 0.44222222222222224,
      "grad_norm": 1.1412192583084106,
      "learning_rate": 0.00011180400890868597,
      "loss": 1.7364,
      "step": 1990
    },
    {
      "epoch": 0.4424444444444444,
      "grad_norm": 0.9756342172622681,
      "learning_rate": 0.00011175946547884188,
      "loss": 1.3964,
      "step": 1991
    },
    {
      "epoch": 0.44266666666666665,
      "grad_norm": 0.13070262968540192,
      "learning_rate": 0.00011171492204899779,
      "loss": 0.0298,
      "step": 1992
    },
    {
      "epoch": 0.4428888888888889,
      "grad_norm": 0.7475621104240417,
      "learning_rate": 0.00011167037861915368,
      "loss": 0.5803,
      "step": 1993
    },
    {
      "epoch": 0.4431111111111111,
      "grad_norm": 1.2354745864868164,
      "learning_rate": 0.00011162583518930957,
      "loss": 1.7266,
      "step": 1994
    },
    {
      "epoch": 0.44333333333333336,
      "grad_norm": 1.1055632829666138,
      "learning_rate": 0.00011158129175946548,
      "loss": 1.3517,
      "step": 1995
    },
    {
      "epoch": 0.44355555555555554,
      "grad_norm": 1.1422277688980103,
      "learning_rate": 0.00011153674832962137,
      "loss": 1.7153,
      "step": 1996
    },
    {
      "epoch": 0.44377777777777777,
      "grad_norm": 1.0632517337799072,
      "learning_rate": 0.0001114922048997773,
      "loss": 1.3935,
      "step": 1997
    },
    {
      "epoch": 0.444,
      "grad_norm": 1.2863705158233643,
      "learning_rate": 0.0001114476614699332,
      "loss": 1.3315,
      "step": 1998
    },
    {
      "epoch": 0.44422222222222224,
      "grad_norm": 0.7513629198074341,
      "learning_rate": 0.0001114031180400891,
      "loss": 0.6531,
      "step": 1999
    },
    {
      "epoch": 0.4444444444444444,
      "grad_norm": 0.9309746026992798,
      "learning_rate": 0.00011135857461024499,
      "loss": 0.6415,
      "step": 2000
    },
    {
      "epoch": 0.44466666666666665,
      "grad_norm": 0.9425560235977173,
      "learning_rate": 0.00011131403118040088,
      "loss": 2.1203,
      "step": 2001
    },
    {
      "epoch": 0.4448888888888889,
      "grad_norm": 0.8981547951698303,
      "learning_rate": 0.00011126948775055679,
      "loss": 2.4304,
      "step": 2002
    },
    {
      "epoch": 0.4451111111111111,
      "grad_norm": 0.8358199000358582,
      "learning_rate": 0.00011122494432071271,
      "loss": 2.1793,
      "step": 2003
    },
    {
      "epoch": 0.44533333333333336,
      "grad_norm": 0.8026860952377319,
      "learning_rate": 0.00011118040089086861,
      "loss": 2.198,
      "step": 2004
    },
    {
      "epoch": 0.44555555555555554,
      "grad_norm": 0.8084419369697571,
      "learning_rate": 0.0001111358574610245,
      "loss": 2.1637,
      "step": 2005
    },
    {
      "epoch": 0.4457777777777778,
      "grad_norm": 0.0967579036951065,
      "learning_rate": 0.00011109131403118041,
      "loss": 0.0117,
      "step": 2006
    },
    {
      "epoch": 0.446,
      "grad_norm": 0.5831789374351501,
      "learning_rate": 0.0001110467706013363,
      "loss": 0.9959,
      "step": 2007
    },
    {
      "epoch": 0.44622222222222224,
      "grad_norm": 0.8523693084716797,
      "learning_rate": 0.00011100222717149221,
      "loss": 1.8045,
      "step": 2008
    },
    {
      "epoch": 0.4464444444444444,
      "grad_norm": 0.9027776122093201,
      "learning_rate": 0.00011095768374164813,
      "loss": 1.922,
      "step": 2009
    },
    {
      "epoch": 0.44666666666666666,
      "grad_norm": 0.9854663014411926,
      "learning_rate": 0.00011091314031180402,
      "loss": 1.8949,
      "step": 2010
    },
    {
      "epoch": 0.4468888888888889,
      "grad_norm": 1.163071632385254,
      "learning_rate": 0.00011086859688195992,
      "loss": 2.5929,
      "step": 2011
    },
    {
      "epoch": 0.4471111111111111,
      "grad_norm": 0.8055479526519775,
      "learning_rate": 0.00011082405345211582,
      "loss": 1.2043,
      "step": 2012
    },
    {
      "epoch": 0.44733333333333336,
      "grad_norm": 0.8408487439155579,
      "learning_rate": 0.00011077951002227172,
      "loss": 1.966,
      "step": 2013
    },
    {
      "epoch": 0.44755555555555554,
      "grad_norm": 0.8684518337249756,
      "learning_rate": 0.00011073496659242761,
      "loss": 1.9108,
      "step": 2014
    },
    {
      "epoch": 0.4477777777777778,
      "grad_norm": 1.0258240699768066,
      "learning_rate": 0.00011069042316258353,
      "loss": 2.0882,
      "step": 2015
    },
    {
      "epoch": 0.448,
      "grad_norm": 0.9539505839347839,
      "learning_rate": 0.00011064587973273944,
      "loss": 2.0714,
      "step": 2016
    },
    {
      "epoch": 0.44822222222222224,
      "grad_norm": 0.8587532639503479,
      "learning_rate": 0.00011060133630289533,
      "loss": 1.7786,
      "step": 2017
    },
    {
      "epoch": 0.4484444444444444,
      "grad_norm": 0.9189285039901733,
      "learning_rate": 0.00011055679287305123,
      "loss": 2.0443,
      "step": 2018
    },
    {
      "epoch": 0.44866666666666666,
      "grad_norm": 0.1925644874572754,
      "learning_rate": 0.00011051224944320713,
      "loss": 0.0213,
      "step": 2019
    },
    {
      "epoch": 0.4488888888888889,
      "grad_norm": 0.17474225163459778,
      "learning_rate": 0.00011046770601336303,
      "loss": 0.0198,
      "step": 2020
    },
    {
      "epoch": 0.4491111111111111,
      "grad_norm": 0.12866677343845367,
      "learning_rate": 0.00011042316258351895,
      "loss": 0.0168,
      "step": 2021
    },
    {
      "epoch": 0.4493333333333333,
      "grad_norm": 0.9263811111450195,
      "learning_rate": 0.00011037861915367484,
      "loss": 2.0245,
      "step": 2022
    },
    {
      "epoch": 0.44955555555555554,
      "grad_norm": 1.2182332277297974,
      "learning_rate": 0.00011033407572383075,
      "loss": 1.944,
      "step": 2023
    },
    {
      "epoch": 0.4497777777777778,
      "grad_norm": 0.9207272529602051,
      "learning_rate": 0.00011028953229398664,
      "loss": 1.7777,
      "step": 2024
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9211624264717102,
      "learning_rate": 0.00011024498886414254,
      "loss": 1.9517,
      "step": 2025
    },
    {
      "epoch": 0.45022222222222225,
      "grad_norm": 0.9342603087425232,
      "learning_rate": 0.00011020044543429844,
      "loss": 1.7293,
      "step": 2026
    },
    {
      "epoch": 0.4504444444444444,
      "grad_norm": 0.7406251430511475,
      "learning_rate": 0.00011015590200445436,
      "loss": 1.0289,
      "step": 2027
    },
    {
      "epoch": 0.45066666666666666,
      "grad_norm": 0.09853217005729675,
      "learning_rate": 0.00011011135857461026,
      "loss": 0.0172,
      "step": 2028
    },
    {
      "epoch": 0.4508888888888889,
      "grad_norm": 0.0647294893860817,
      "learning_rate": 0.00011006681514476615,
      "loss": 0.0167,
      "step": 2029
    },
    {
      "epoch": 0.45111111111111113,
      "grad_norm": 0.6249412894248962,
      "learning_rate": 0.00011002227171492206,
      "loss": 0.7176,
      "step": 2030
    },
    {
      "epoch": 0.4513333333333333,
      "grad_norm": 1.0367200374603271,
      "learning_rate": 0.00010997772828507795,
      "loss": 1.6925,
      "step": 2031
    },
    {
      "epoch": 0.45155555555555554,
      "grad_norm": 0.08039866387844086,
      "learning_rate": 0.00010993318485523386,
      "loss": 0.0182,
      "step": 2032
    },
    {
      "epoch": 0.4517777777777778,
      "grad_norm": 0.08283301442861557,
      "learning_rate": 0.00010988864142538977,
      "loss": 0.0183,
      "step": 2033
    },
    {
      "epoch": 0.452,
      "grad_norm": 1.053772211074829,
      "learning_rate": 0.00010984409799554567,
      "loss": 1.9159,
      "step": 2034
    },
    {
      "epoch": 0.45222222222222225,
      "grad_norm": 0.8648183941841125,
      "learning_rate": 0.00010979955456570157,
      "loss": 1.4673,
      "step": 2035
    },
    {
      "epoch": 0.4524444444444444,
      "grad_norm": 1.0042818784713745,
      "learning_rate": 0.00010975501113585746,
      "loss": 1.397,
      "step": 2036
    },
    {
      "epoch": 0.45266666666666666,
      "grad_norm": 0.6784095764160156,
      "learning_rate": 0.00010971046770601337,
      "loss": 0.7173,
      "step": 2037
    },
    {
      "epoch": 0.4528888888888889,
      "grad_norm": 0.09913664311170578,
      "learning_rate": 0.00010966592427616926,
      "loss": 0.0251,
      "step": 2038
    },
    {
      "epoch": 0.45311111111111113,
      "grad_norm": 0.0990590900182724,
      "learning_rate": 0.00010962138084632517,
      "loss": 0.0241,
      "step": 2039
    },
    {
      "epoch": 0.4533333333333333,
      "grad_norm": 0.09208090603351593,
      "learning_rate": 0.00010957683741648108,
      "loss": 0.0232,
      "step": 2040
    },
    {
      "epoch": 0.45355555555555555,
      "grad_norm": 1.100632667541504,
      "learning_rate": 0.00010953229398663698,
      "loss": 1.5947,
      "step": 2041
    },
    {
      "epoch": 0.4537777777777778,
      "grad_norm": 1.0162431001663208,
      "learning_rate": 0.00010948775055679288,
      "loss": 1.4251,
      "step": 2042
    },
    {
      "epoch": 0.454,
      "grad_norm": 1.1525739431381226,
      "learning_rate": 0.00010944320712694877,
      "loss": 1.2817,
      "step": 2043
    },
    {
      "epoch": 0.45422222222222225,
      "grad_norm": 0.8342036008834839,
      "learning_rate": 0.00010939866369710468,
      "loss": 0.6935,
      "step": 2044
    },
    {
      "epoch": 0.45444444444444443,
      "grad_norm": 0.8119909167289734,
      "learning_rate": 0.00010935412026726057,
      "loss": 0.6888,
      "step": 2045
    },
    {
      "epoch": 0.45466666666666666,
      "grad_norm": 1.0022333860397339,
      "learning_rate": 0.00010930957683741649,
      "loss": 1.4122,
      "step": 2046
    },
    {
      "epoch": 0.4548888888888889,
      "grad_norm": 1.043531060218811,
      "learning_rate": 0.0001092650334075724,
      "loss": 0.8783,
      "step": 2047
    },
    {
      "epoch": 0.45511111111111113,
      "grad_norm": 1.1184370517730713,
      "learning_rate": 0.00010922048997772829,
      "loss": 1.028,
      "step": 2048
    },
    {
      "epoch": 0.4553333333333333,
      "grad_norm": 0.9111670851707458,
      "learning_rate": 0.00010917594654788419,
      "loss": 0.7038,
      "step": 2049
    },
    {
      "epoch": 0.45555555555555555,
      "grad_norm": 0.9852802753448486,
      "learning_rate": 0.00010913140311804008,
      "loss": 0.68,
      "step": 2050
    },
    {
      "epoch": 0.4557777777777778,
      "grad_norm": 0.5692037343978882,
      "learning_rate": 0.00010908685968819599,
      "loss": 1.1471,
      "step": 2051
    },
    {
      "epoch": 0.456,
      "grad_norm": 0.060164306312799454,
      "learning_rate": 0.00010904231625835191,
      "loss": 0.0125,
      "step": 2052
    },
    {
      "epoch": 0.4562222222222222,
      "grad_norm": 0.05939817428588867,
      "learning_rate": 0.0001089977728285078,
      "loss": 0.0124,
      "step": 2053
    },
    {
      "epoch": 0.45644444444444443,
      "grad_norm": 0.9658234119415283,
      "learning_rate": 0.0001089532293986637,
      "loss": 2.4221,
      "step": 2054
    },
    {
      "epoch": 0.45666666666666667,
      "grad_norm": 0.608363926410675,
      "learning_rate": 0.0001089086859688196,
      "loss": 1.0087,
      "step": 2055
    },
    {
      "epoch": 0.4568888888888889,
      "grad_norm": 0.6269051432609558,
      "learning_rate": 0.0001088641425389755,
      "loss": 1.1338,
      "step": 2056
    },
    {
      "epoch": 0.45711111111111113,
      "grad_norm": 0.06373189389705658,
      "learning_rate": 0.0001088195991091314,
      "loss": 0.0117,
      "step": 2057
    },
    {
      "epoch": 0.4573333333333333,
      "grad_norm": 0.656608521938324,
      "learning_rate": 0.00010877505567928731,
      "loss": 1.0346,
      "step": 2058
    },
    {
      "epoch": 0.45755555555555555,
      "grad_norm": 0.7977051138877869,
      "learning_rate": 0.00010873051224944322,
      "loss": 1.8994,
      "step": 2059
    },
    {
      "epoch": 0.4577777777777778,
      "grad_norm": 0.8953185677528381,
      "learning_rate": 0.00010868596881959911,
      "loss": 1.9597,
      "step": 2060
    },
    {
      "epoch": 0.458,
      "grad_norm": 0.9071193933486938,
      "learning_rate": 0.00010864142538975502,
      "loss": 2.0342,
      "step": 2061
    },
    {
      "epoch": 0.4582222222222222,
      "grad_norm": 0.9120450019836426,
      "learning_rate": 0.00010859688195991091,
      "loss": 2.1583,
      "step": 2062
    },
    {
      "epoch": 0.45844444444444443,
      "grad_norm": 0.93471759557724,
      "learning_rate": 0.00010855233853006681,
      "loss": 1.7983,
      "step": 2063
    },
    {
      "epoch": 0.45866666666666667,
      "grad_norm": 1.089474081993103,
      "learning_rate": 0.00010850779510022273,
      "loss": 2.1283,
      "step": 2064
    },
    {
      "epoch": 0.4588888888888889,
      "grad_norm": 1.0539686679840088,
      "learning_rate": 0.00010846325167037862,
      "loss": 1.8288,
      "step": 2065
    },
    {
      "epoch": 0.45911111111111114,
      "grad_norm": 0.41104814410209656,
      "learning_rate": 0.00010841870824053453,
      "loss": 0.0214,
      "step": 2066
    },
    {
      "epoch": 0.4593333333333333,
      "grad_norm": 0.8927615284919739,
      "learning_rate": 0.00010837416481069042,
      "loss": 1.9371,
      "step": 2067
    },
    {
      "epoch": 0.45955555555555555,
      "grad_norm": 0.9341305494308472,
      "learning_rate": 0.00010832962138084633,
      "loss": 1.921,
      "step": 2068
    },
    {
      "epoch": 0.4597777777777778,
      "grad_norm": 1.0359078645706177,
      "learning_rate": 0.00010828507795100222,
      "loss": 2.3344,
      "step": 2069
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0806338787078857,
      "learning_rate": 0.00010824053452115814,
      "loss": 1.9512,
      "step": 2070
    },
    {
      "epoch": 0.4602222222222222,
      "grad_norm": 0.9538819193840027,
      "learning_rate": 0.00010819599109131404,
      "loss": 2.1274,
      "step": 2071
    },
    {
      "epoch": 0.46044444444444443,
      "grad_norm": 0.6135286688804626,
      "learning_rate": 0.00010815144766146993,
      "loss": 1.0131,
      "step": 2072
    },
    {
      "epoch": 0.46066666666666667,
      "grad_norm": 0.6279967427253723,
      "learning_rate": 0.00010810690423162584,
      "loss": 0.8183,
      "step": 2073
    },
    {
      "epoch": 0.4608888888888889,
      "grad_norm": 0.687468409538269,
      "learning_rate": 0.00010806236080178173,
      "loss": 0.8753,
      "step": 2074
    },
    {
      "epoch": 0.46111111111111114,
      "grad_norm": 1.2607934474945068,
      "learning_rate": 0.00010801781737193764,
      "loss": 2.009,
      "step": 2075
    },
    {
      "epoch": 0.4613333333333333,
      "grad_norm": 1.0350881814956665,
      "learning_rate": 0.00010797327394209356,
      "loss": 1.9684,
      "step": 2076
    },
    {
      "epoch": 0.46155555555555555,
      "grad_norm": 0.897770881652832,
      "learning_rate": 0.00010792873051224946,
      "loss": 1.678,
      "step": 2077
    },
    {
      "epoch": 0.4617777777777778,
      "grad_norm": 1.0721005201339722,
      "learning_rate": 0.00010788418708240535,
      "loss": 1.6479,
      "step": 2078
    },
    {
      "epoch": 0.462,
      "grad_norm": 0.6707905530929565,
      "learning_rate": 0.00010783964365256124,
      "loss": 0.7607,
      "step": 2079
    },
    {
      "epoch": 0.4622222222222222,
      "grad_norm": 1.0232561826705933,
      "learning_rate": 0.00010779510022271715,
      "loss": 0.9529,
      "step": 2080
    },
    {
      "epoch": 0.46244444444444444,
      "grad_norm": 1.1841635704040527,
      "learning_rate": 0.00010775055679287304,
      "loss": 2.0433,
      "step": 2081
    },
    {
      "epoch": 0.46266666666666667,
      "grad_norm": 1.104246973991394,
      "learning_rate": 0.00010770601336302897,
      "loss": 1.8199,
      "step": 2082
    },
    {
      "epoch": 0.4628888888888889,
      "grad_norm": 0.7725056409835815,
      "learning_rate": 0.00010766146993318487,
      "loss": 0.7594,
      "step": 2083
    },
    {
      "epoch": 0.4631111111111111,
      "grad_norm": 0.9705109596252441,
      "learning_rate": 0.00010761692650334077,
      "loss": 1.6149,
      "step": 2084
    },
    {
      "epoch": 0.4633333333333333,
      "grad_norm": 1.2132149934768677,
      "learning_rate": 0.00010757238307349666,
      "loss": 1.7415,
      "step": 2085
    },
    {
      "epoch": 0.46355555555555555,
      "grad_norm": 0.6384971737861633,
      "learning_rate": 0.00010752783964365255,
      "loss": 0.7416,
      "step": 2086
    },
    {
      "epoch": 0.4637777777777778,
      "grad_norm": 0.08440492302179337,
      "learning_rate": 0.00010748329621380846,
      "loss": 0.0227,
      "step": 2087
    },
    {
      "epoch": 0.464,
      "grad_norm": 1.0965360403060913,
      "learning_rate": 0.00010743875278396438,
      "loss": 1.8355,
      "step": 2088
    },
    {
      "epoch": 0.4642222222222222,
      "grad_norm": 0.9710419178009033,
      "learning_rate": 0.00010739420935412028,
      "loss": 1.4107,
      "step": 2089
    },
    {
      "epoch": 0.46444444444444444,
      "grad_norm": 1.1657572984695435,
      "learning_rate": 0.00010734966592427618,
      "loss": 1.529,
      "step": 2090
    },
    {
      "epoch": 0.4646666666666667,
      "grad_norm": 1.0341477394104004,
      "learning_rate": 0.00010730512249443208,
      "loss": 1.3314,
      "step": 2091
    },
    {
      "epoch": 0.4648888888888889,
      "grad_norm": 1.185089111328125,
      "learning_rate": 0.00010726057906458797,
      "loss": 1.3492,
      "step": 2092
    },
    {
      "epoch": 0.4651111111111111,
      "grad_norm": 1.172006607055664,
      "learning_rate": 0.00010721603563474388,
      "loss": 1.6585,
      "step": 2093
    },
    {
      "epoch": 0.4653333333333333,
      "grad_norm": 0.19929863512516022,
      "learning_rate": 0.0001071714922048998,
      "loss": 0.0307,
      "step": 2094
    },
    {
      "epoch": 0.46555555555555556,
      "grad_norm": 0.7375540137290955,
      "learning_rate": 0.00010712694877505569,
      "loss": 0.6284,
      "step": 2095
    },
    {
      "epoch": 0.4657777777777778,
      "grad_norm": 1.1733025312423706,
      "learning_rate": 0.0001070824053452116,
      "loss": 1.3572,
      "step": 2096
    },
    {
      "epoch": 0.466,
      "grad_norm": 1.2688745260238647,
      "learning_rate": 0.00010703786191536749,
      "loss": 1.1305,
      "step": 2097
    },
    {
      "epoch": 0.4662222222222222,
      "grad_norm": 0.3591971695423126,
      "learning_rate": 0.00010699331848552339,
      "loss": 0.0473,
      "step": 2098
    },
    {
      "epoch": 0.46644444444444444,
      "grad_norm": 0.7150940299034119,
      "learning_rate": 0.00010694877505567928,
      "loss": 0.4544,
      "step": 2099
    },
    {
      "epoch": 0.4666666666666667,
      "grad_norm": 0.8277695775032043,
      "learning_rate": 0.0001069042316258352,
      "loss": 0.8539,
      "step": 2100
    },
    {
      "epoch": 0.4668888888888889,
      "grad_norm": 0.6670131087303162,
      "learning_rate": 0.00010685968819599111,
      "loss": 1.1052,
      "step": 2101
    },
    {
      "epoch": 0.4671111111111111,
      "grad_norm": 0.06290578842163086,
      "learning_rate": 0.000106815144766147,
      "loss": 0.0125,
      "step": 2102
    },
    {
      "epoch": 0.4673333333333333,
      "grad_norm": 0.058846112340688705,
      "learning_rate": 0.0001067706013363029,
      "loss": 0.012,
      "step": 2103
    },
    {
      "epoch": 0.46755555555555556,
      "grad_norm": 0.537786602973938,
      "learning_rate": 0.0001067260579064588,
      "loss": 1.0569,
      "step": 2104
    },
    {
      "epoch": 0.4677777777777778,
      "grad_norm": 0.9007193446159363,
      "learning_rate": 0.0001066815144766147,
      "loss": 2.1002,
      "step": 2105
    },
    {
      "epoch": 0.468,
      "grad_norm": 0.527800440788269,
      "learning_rate": 0.00010663697104677062,
      "loss": 0.9616,
      "step": 2106
    },
    {
      "epoch": 0.4682222222222222,
      "grad_norm": 0.9083489775657654,
      "learning_rate": 0.00010659242761692651,
      "loss": 2.4556,
      "step": 2107
    },
    {
      "epoch": 0.46844444444444444,
      "grad_norm": 7.447436332702637,
      "learning_rate": 0.00010654788418708242,
      "loss": 1.1922,
      "step": 2108
    },
    {
      "epoch": 0.4686666666666667,
      "grad_norm": 0.6265543699264526,
      "learning_rate": 0.00010650334075723831,
      "loss": 1.0863,
      "step": 2109
    },
    {
      "epoch": 0.4688888888888889,
      "grad_norm": 0.0921320989727974,
      "learning_rate": 0.00010645879732739422,
      "loss": 0.0133,
      "step": 2110
    },
    {
      "epoch": 0.4691111111111111,
      "grad_norm": 0.12606237828731537,
      "learning_rate": 0.00010641425389755011,
      "loss": 0.014,
      "step": 2111
    },
    {
      "epoch": 0.4693333333333333,
      "grad_norm": 0.5374711155891418,
      "learning_rate": 0.00010636971046770601,
      "loss": 0.9771,
      "step": 2112
    },
    {
      "epoch": 0.46955555555555556,
      "grad_norm": 1.0356422662734985,
      "learning_rate": 0.00010632516703786193,
      "loss": 2.4527,
      "step": 2113
    },
    {
      "epoch": 0.4697777777777778,
      "grad_norm": 0.9254876375198364,
      "learning_rate": 0.00010628062360801782,
      "loss": 2.0676,
      "step": 2114
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7463611960411072,
      "learning_rate": 0.00010623608017817373,
      "loss": 0.9246,
      "step": 2115
    },
    {
      "epoch": 0.4702222222222222,
      "grad_norm": 1.0094149112701416,
      "learning_rate": 0.00010619153674832962,
      "loss": 1.8244,
      "step": 2116
    },
    {
      "epoch": 0.47044444444444444,
      "grad_norm": 0.995177149772644,
      "learning_rate": 0.00010614699331848553,
      "loss": 2.0105,
      "step": 2117
    },
    {
      "epoch": 0.4706666666666667,
      "grad_norm": 0.9415448904037476,
      "learning_rate": 0.00010610244988864142,
      "loss": 2.0042,
      "step": 2118
    },
    {
      "epoch": 0.4708888888888889,
      "grad_norm": 0.9262849688529968,
      "learning_rate": 0.00010605790645879734,
      "loss": 1.8853,
      "step": 2119
    },
    {
      "epoch": 0.4711111111111111,
      "grad_norm": 0.13545557856559753,
      "learning_rate": 0.00010601336302895324,
      "loss": 0.0187,
      "step": 2120
    },
    {
      "epoch": 0.4713333333333333,
      "grad_norm": 0.11557869613170624,
      "learning_rate": 0.00010596881959910913,
      "loss": 0.0182,
      "step": 2121
    },
    {
      "epoch": 0.47155555555555556,
      "grad_norm": 0.10856274515390396,
      "learning_rate": 0.00010592427616926504,
      "loss": 0.0175,
      "step": 2122
    },
    {
      "epoch": 0.4717777777777778,
      "grad_norm": 0.09006939828395844,
      "learning_rate": 0.00010587973273942093,
      "loss": 0.0166,
      "step": 2123
    },
    {
      "epoch": 0.472,
      "grad_norm": 0.08400023728609085,
      "learning_rate": 0.00010583518930957684,
      "loss": 0.0155,
      "step": 2124
    },
    {
      "epoch": 0.4722222222222222,
      "grad_norm": 0.6308079361915588,
      "learning_rate": 0.00010579064587973276,
      "loss": 0.9436,
      "step": 2125
    },
    {
      "epoch": 0.47244444444444444,
      "grad_norm": 0.6802711486816406,
      "learning_rate": 0.00010574610244988865,
      "loss": 0.8038,
      "step": 2126
    },
    {
      "epoch": 0.4726666666666667,
      "grad_norm": 1.0816277265548706,
      "learning_rate": 0.00010570155902004455,
      "loss": 1.6768,
      "step": 2127
    },
    {
      "epoch": 0.4728888888888889,
      "grad_norm": 0.15280470252037048,
      "learning_rate": 0.00010565701559020044,
      "loss": 0.0236,
      "step": 2128
    },
    {
      "epoch": 0.4731111111111111,
      "grad_norm": 0.13545870780944824,
      "learning_rate": 0.00010561247216035635,
      "loss": 0.0222,
      "step": 2129
    },
    {
      "epoch": 0.47333333333333333,
      "grad_norm": 0.9755976796150208,
      "learning_rate": 0.00010556792873051224,
      "loss": 2.1984,
      "step": 2130
    },
    {
      "epoch": 0.47355555555555556,
      "grad_norm": 1.1221860647201538,
      "learning_rate": 0.00010552338530066816,
      "loss": 1.9101,
      "step": 2131
    },
    {
      "epoch": 0.4737777777777778,
      "grad_norm": 1.06197190284729,
      "learning_rate": 0.00010547884187082407,
      "loss": 1.9334,
      "step": 2132
    },
    {
      "epoch": 0.474,
      "grad_norm": 0.6913040280342102,
      "learning_rate": 0.00010543429844097996,
      "loss": 0.8087,
      "step": 2133
    },
    {
      "epoch": 0.4742222222222222,
      "grad_norm": 0.10353131592273712,
      "learning_rate": 0.00010538975501113586,
      "loss": 0.0182,
      "step": 2134
    },
    {
      "epoch": 0.47444444444444445,
      "grad_norm": 0.8617785573005676,
      "learning_rate": 0.00010534521158129175,
      "loss": 1.0227,
      "step": 2135
    },
    {
      "epoch": 0.4746666666666667,
      "grad_norm": 0.9836186170578003,
      "learning_rate": 0.00010530066815144766,
      "loss": 1.6456,
      "step": 2136
    },
    {
      "epoch": 0.4748888888888889,
      "grad_norm": 0.07714372128248215,
      "learning_rate": 0.00010525612472160358,
      "loss": 0.0202,
      "step": 2137
    },
    {
      "epoch": 0.4751111111111111,
      "grad_norm": 0.7352029085159302,
      "learning_rate": 0.00010521158129175947,
      "loss": 0.8581,
      "step": 2138
    },
    {
      "epoch": 0.47533333333333333,
      "grad_norm": 0.8806314468383789,
      "learning_rate": 0.00010516703786191538,
      "loss": 1.4621,
      "step": 2139
    },
    {
      "epoch": 0.47555555555555556,
      "grad_norm": 1.0340739488601685,
      "learning_rate": 0.00010512249443207127,
      "loss": 1.6558,
      "step": 2140
    },
    {
      "epoch": 0.4757777777777778,
      "grad_norm": 0.9357542395591736,
      "learning_rate": 0.00010507795100222717,
      "loss": 0.0534,
      "step": 2141
    },
    {
      "epoch": 0.476,
      "grad_norm": 1.0452251434326172,
      "learning_rate": 0.00010503340757238307,
      "loss": 1.7621,
      "step": 2142
    },
    {
      "epoch": 0.4762222222222222,
      "grad_norm": 1.091395378112793,
      "learning_rate": 0.00010498886414253898,
      "loss": 1.5887,
      "step": 2143
    },
    {
      "epoch": 0.47644444444444445,
      "grad_norm": 1.1353317499160767,
      "learning_rate": 0.00010494432071269489,
      "loss": 1.7071,
      "step": 2144
    },
    {
      "epoch": 0.4766666666666667,
      "grad_norm": 0.9791475534439087,
      "learning_rate": 0.00010489977728285078,
      "loss": 1.3087,
      "step": 2145
    },
    {
      "epoch": 0.47688888888888886,
      "grad_norm": 1.2445948123931885,
      "learning_rate": 0.00010485523385300669,
      "loss": 1.6218,
      "step": 2146
    },
    {
      "epoch": 0.4771111111111111,
      "grad_norm": 0.684476912021637,
      "learning_rate": 0.00010481069042316258,
      "loss": 0.5532,
      "step": 2147
    },
    {
      "epoch": 0.47733333333333333,
      "grad_norm": 1.0223796367645264,
      "learning_rate": 0.00010476614699331848,
      "loss": 1.4684,
      "step": 2148
    },
    {
      "epoch": 0.47755555555555557,
      "grad_norm": 0.9877771735191345,
      "learning_rate": 0.0001047216035634744,
      "loss": 0.7886,
      "step": 2149
    },
    {
      "epoch": 0.4777777777777778,
      "grad_norm": 1.0899747610092163,
      "learning_rate": 0.0001046770601336303,
      "loss": 0.9223,
      "step": 2150
    },
    {
      "epoch": 0.478,
      "grad_norm": 0.7108315825462341,
      "learning_rate": 0.0001046325167037862,
      "loss": 1.2016,
      "step": 2151
    },
    {
      "epoch": 0.4782222222222222,
      "grad_norm": 0.06013140454888344,
      "learning_rate": 0.00010458797327394209,
      "loss": 0.0102,
      "step": 2152
    },
    {
      "epoch": 0.47844444444444445,
      "grad_norm": 0.8427261114120483,
      "learning_rate": 0.000104543429844098,
      "loss": 2.311,
      "step": 2153
    },
    {
      "epoch": 0.4786666666666667,
      "grad_norm": 1.2041535377502441,
      "learning_rate": 0.00010449888641425389,
      "loss": 2.3181,
      "step": 2154
    },
    {
      "epoch": 0.47888888888888886,
      "grad_norm": 0.8807494640350342,
      "learning_rate": 0.00010445434298440981,
      "loss": 1.9016,
      "step": 2155
    },
    {
      "epoch": 0.4791111111111111,
      "grad_norm": 0.5941738486289978,
      "learning_rate": 0.00010440979955456571,
      "loss": 1.1593,
      "step": 2156
    },
    {
      "epoch": 0.47933333333333333,
      "grad_norm": 0.05396668612957001,
      "learning_rate": 0.0001043652561247216,
      "loss": 0.0105,
      "step": 2157
    },
    {
      "epoch": 0.47955555555555557,
      "grad_norm": 0.056162334978580475,
      "learning_rate": 0.00010432071269487751,
      "loss": 0.01,
      "step": 2158
    },
    {
      "epoch": 0.4797777777777778,
      "grad_norm": 0.543596625328064,
      "learning_rate": 0.0001042761692650334,
      "loss": 0.9017,
      "step": 2159
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.8763737082481384,
      "learning_rate": 0.00010423162583518931,
      "loss": 2.3425,
      "step": 2160
    },
    {
      "epoch": 0.4802222222222222,
      "grad_norm": 0.9465508460998535,
      "learning_rate": 0.00010418708240534523,
      "loss": 1.934,
      "step": 2161
    },
    {
      "epoch": 0.48044444444444445,
      "grad_norm": 0.9368391633033752,
      "learning_rate": 0.00010414253897550113,
      "loss": 2.1625,
      "step": 2162
    },
    {
      "epoch": 0.4806666666666667,
      "grad_norm": 0.8468746542930603,
      "learning_rate": 0.00010409799554565702,
      "loss": 1.8851,
      "step": 2163
    },
    {
      "epoch": 0.48088888888888887,
      "grad_norm": 0.9411273002624512,
      "learning_rate": 0.00010405345211581292,
      "loss": 1.9171,
      "step": 2164
    },
    {
      "epoch": 0.4811111111111111,
      "grad_norm": 0.9668144583702087,
      "learning_rate": 0.00010400890868596882,
      "loss": 1.8995,
      "step": 2165
    },
    {
      "epoch": 0.48133333333333334,
      "grad_norm": 1.0552144050598145,
      "learning_rate": 0.00010396436525612471,
      "loss": 1.92,
      "step": 2166
    },
    {
      "epoch": 0.48155555555555557,
      "grad_norm": 0.8945801854133606,
      "learning_rate": 0.00010391982182628064,
      "loss": 2.0224,
      "step": 2167
    },
    {
      "epoch": 0.4817777777777778,
      "grad_norm": 0.8795874714851379,
      "learning_rate": 0.00010387527839643654,
      "loss": 1.8932,
      "step": 2168
    },
    {
      "epoch": 0.482,
      "grad_norm": 0.7880940437316895,
      "learning_rate": 0.00010383073496659244,
      "loss": 0.981,
      "step": 2169
    },
    {
      "epoch": 0.4822222222222222,
      "grad_norm": 0.06655468791723251,
      "learning_rate": 0.00010378619153674833,
      "loss": 0.0142,
      "step": 2170
    },
    {
      "epoch": 0.48244444444444445,
      "grad_norm": 0.06633251905441284,
      "learning_rate": 0.00010374164810690423,
      "loss": 0.0146,
      "step": 2171
    },
    {
      "epoch": 0.4826666666666667,
      "grad_norm": 0.0680522546172142,
      "learning_rate": 0.00010369710467706013,
      "loss": 0.0143,
      "step": 2172
    },
    {
      "epoch": 0.48288888888888887,
      "grad_norm": 0.6770047545433044,
      "learning_rate": 0.00010365256124721605,
      "loss": 1.0753,
      "step": 2173
    },
    {
      "epoch": 0.4831111111111111,
      "grad_norm": 1.0793815851211548,
      "learning_rate": 0.00010360801781737196,
      "loss": 1.7203,
      "step": 2174
    },
    {
      "epoch": 0.48333333333333334,
      "grad_norm": 0.8015415668487549,
      "learning_rate": 0.00010356347438752785,
      "loss": 1.7094,
      "step": 2175
    },
    {
      "epoch": 0.48355555555555557,
      "grad_norm": 1.0239602327346802,
      "learning_rate": 0.00010351893095768375,
      "loss": 1.9498,
      "step": 2176
    },
    {
      "epoch": 0.48377777777777775,
      "grad_norm": 1.029447078704834,
      "learning_rate": 0.00010347438752783964,
      "loss": 1.9248,
      "step": 2177
    },
    {
      "epoch": 0.484,
      "grad_norm": 0.9458478689193726,
      "learning_rate": 0.00010342984409799555,
      "loss": 1.7154,
      "step": 2178
    },
    {
      "epoch": 0.4842222222222222,
      "grad_norm": 0.06287504732608795,
      "learning_rate": 0.00010338530066815147,
      "loss": 0.0172,
      "step": 2179
    },
    {
      "epoch": 0.48444444444444446,
      "grad_norm": 0.07798685878515244,
      "learning_rate": 0.00010334075723830736,
      "loss": 0.0179,
      "step": 2180
    },
    {
      "epoch": 0.4846666666666667,
      "grad_norm": 0.06901486217975616,
      "learning_rate": 0.00010329621380846327,
      "loss": 0.0182,
      "step": 2181
    },
    {
      "epoch": 0.48488888888888887,
      "grad_norm": 1.101205587387085,
      "learning_rate": 0.00010325167037861916,
      "loss": 1.6031,
      "step": 2182
    },
    {
      "epoch": 0.4851111111111111,
      "grad_norm": 1.084505319595337,
      "learning_rate": 0.00010320712694877506,
      "loss": 1.6042,
      "step": 2183
    },
    {
      "epoch": 0.48533333333333334,
      "grad_norm": 0.10013191401958466,
      "learning_rate": 0.00010316258351893095,
      "loss": 0.0203,
      "step": 2184
    },
    {
      "epoch": 0.4855555555555556,
      "grad_norm": 0.08007735759019852,
      "learning_rate": 0.00010311804008908686,
      "loss": 0.0207,
      "step": 2185
    },
    {
      "epoch": 0.48577777777777775,
      "grad_norm": 1.1262269020080566,
      "learning_rate": 0.00010307349665924278,
      "loss": 1.7542,
      "step": 2186
    },
    {
      "epoch": 0.486,
      "grad_norm": 1.2522791624069214,
      "learning_rate": 0.00010302895322939867,
      "loss": 1.7026,
      "step": 2187
    },
    {
      "epoch": 0.4862222222222222,
      "grad_norm": 1.145750880241394,
      "learning_rate": 0.00010298440979955458,
      "loss": 1.6401,
      "step": 2188
    },
    {
      "epoch": 0.48644444444444446,
      "grad_norm": 0.6675021052360535,
      "learning_rate": 0.00010293986636971047,
      "loss": 0.7744,
      "step": 2189
    },
    {
      "epoch": 0.4866666666666667,
      "grad_norm": 0.12333891540765762,
      "learning_rate": 0.00010289532293986637,
      "loss": 0.0266,
      "step": 2190
    },
    {
      "epoch": 0.48688888888888887,
      "grad_norm": 0.678531289100647,
      "learning_rate": 0.00010285077951002227,
      "loss": 0.66,
      "step": 2191
    },
    {
      "epoch": 0.4871111111111111,
      "grad_norm": 1.046586036682129,
      "learning_rate": 0.00010280623608017818,
      "loss": 1.3619,
      "step": 2192
    },
    {
      "epoch": 0.48733333333333334,
      "grad_norm": 1.0906909704208374,
      "learning_rate": 0.00010276169265033409,
      "loss": 1.6156,
      "step": 2193
    },
    {
      "epoch": 0.4875555555555556,
      "grad_norm": 1.0561549663543701,
      "learning_rate": 0.00010271714922048998,
      "loss": 1.455,
      "step": 2194
    },
    {
      "epoch": 0.48777777777777775,
      "grad_norm": 0.9658767580986023,
      "learning_rate": 0.00010267260579064589,
      "loss": 1.372,
      "step": 2195
    },
    {
      "epoch": 0.488,
      "grad_norm": 0.9055988192558289,
      "learning_rate": 0.00010262806236080178,
      "loss": 0.8257,
      "step": 2196
    },
    {
      "epoch": 0.4882222222222222,
      "grad_norm": 0.7421156764030457,
      "learning_rate": 0.00010258351893095768,
      "loss": 0.6387,
      "step": 2197
    },
    {
      "epoch": 0.48844444444444446,
      "grad_norm": 1.0203956365585327,
      "learning_rate": 0.0001025389755011136,
      "loss": 1.2457,
      "step": 2198
    },
    {
      "epoch": 0.4886666666666667,
      "grad_norm": 1.1960588693618774,
      "learning_rate": 0.0001024944320712695,
      "loss": 1.2971,
      "step": 2199
    },
    {
      "epoch": 0.4888888888888889,
      "grad_norm": 0.9425092935562134,
      "learning_rate": 0.0001024498886414254,
      "loss": 0.5784,
      "step": 2200
    },
    {
      "epoch": 0.4891111111111111,
      "grad_norm": 0.8036244511604309,
      "learning_rate": 0.00010240534521158129,
      "loss": 2.0153,
      "step": 2201
    },
    {
      "epoch": 0.48933333333333334,
      "grad_norm": 0.9643092155456543,
      "learning_rate": 0.0001023608017817372,
      "loss": 2.0731,
      "step": 2202
    },
    {
      "epoch": 0.4895555555555556,
      "grad_norm": 0.5496547222137451,
      "learning_rate": 0.00010231625835189309,
      "loss": 1.0441,
      "step": 2203
    },
    {
      "epoch": 0.48977777777777776,
      "grad_norm": 0.052178915590047836,
      "learning_rate": 0.00010227171492204901,
      "loss": 0.0109,
      "step": 2204
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.054271552711725235,
      "learning_rate": 0.00010222717149220491,
      "loss": 0.0106,
      "step": 2205
    },
    {
      "epoch": 0.4902222222222222,
      "grad_norm": 0.6506833434104919,
      "learning_rate": 0.0001021826280623608,
      "loss": 1.151,
      "step": 2206
    },
    {
      "epoch": 0.49044444444444446,
      "grad_norm": 0.9006673097610474,
      "learning_rate": 0.00010213808463251671,
      "loss": 2.2136,
      "step": 2207
    },
    {
      "epoch": 0.49066666666666664,
      "grad_norm": 0.8316347002983093,
      "learning_rate": 0.0001020935412026726,
      "loss": 2.2916,
      "step": 2208
    },
    {
      "epoch": 0.4908888888888889,
      "grad_norm": 0.07608042657375336,
      "learning_rate": 0.00010204899777282851,
      "loss": 0.0124,
      "step": 2209
    },
    {
      "epoch": 0.4911111111111111,
      "grad_norm": 0.07590346783399582,
      "learning_rate": 0.00010200445434298443,
      "loss": 0.0125,
      "step": 2210
    },
    {
      "epoch": 0.49133333333333334,
      "grad_norm": 0.07187937945127487,
      "learning_rate": 0.00010195991091314032,
      "loss": 0.012,
      "step": 2211
    },
    {
      "epoch": 0.4915555555555556,
      "grad_norm": 0.6782304644584656,
      "learning_rate": 0.00010191536748329622,
      "loss": 1.0925,
      "step": 2212
    },
    {
      "epoch": 0.49177777777777776,
      "grad_norm": 0.8945388197898865,
      "learning_rate": 0.00010187082405345212,
      "loss": 2.0201,
      "step": 2213
    },
    {
      "epoch": 0.492,
      "grad_norm": 0.8869574666023254,
      "learning_rate": 0.00010182628062360802,
      "loss": 1.7918,
      "step": 2214
    },
    {
      "epoch": 0.4922222222222222,
      "grad_norm": 0.9882270097732544,
      "learning_rate": 0.00010178173719376391,
      "loss": 2.2219,
      "step": 2215
    },
    {
      "epoch": 0.49244444444444446,
      "grad_norm": 1.0089894533157349,
      "learning_rate": 0.00010173719376391983,
      "loss": 2.1878,
      "step": 2216
    },
    {
      "epoch": 0.49266666666666664,
      "grad_norm": 0.9218000173568726,
      "learning_rate": 0.00010169265033407574,
      "loss": 1.8475,
      "step": 2217
    },
    {
      "epoch": 0.4928888888888889,
      "grad_norm": 0.8486325740814209,
      "learning_rate": 0.00010164810690423163,
      "loss": 1.7585,
      "step": 2218
    },
    {
      "epoch": 0.4931111111111111,
      "grad_norm": 0.9325646162033081,
      "learning_rate": 0.00010160356347438753,
      "loss": 1.9068,
      "step": 2219
    },
    {
      "epoch": 0.49333333333333335,
      "grad_norm": 1.0260847806930542,
      "learning_rate": 0.00010155902004454343,
      "loss": 1.8463,
      "step": 2220
    },
    {
      "epoch": 0.4935555555555556,
      "grad_norm": 0.8245062828063965,
      "learning_rate": 0.00010151447661469933,
      "loss": 1.8072,
      "step": 2221
    },
    {
      "epoch": 0.49377777777777776,
      "grad_norm": 1.05905020236969,
      "learning_rate": 0.00010146993318485525,
      "loss": 1.7947,
      "step": 2222
    },
    {
      "epoch": 0.494,
      "grad_norm": 0.06895928084850311,
      "learning_rate": 0.00010142538975501114,
      "loss": 0.0156,
      "step": 2223
    },
    {
      "epoch": 0.49422222222222223,
      "grad_norm": 0.9304447174072266,
      "learning_rate": 0.00010138084632516705,
      "loss": 1.8109,
      "step": 2224
    },
    {
      "epoch": 0.49444444444444446,
      "grad_norm": 0.6271647810935974,
      "learning_rate": 0.00010133630289532294,
      "loss": 0.9784,
      "step": 2225
    },
    {
      "epoch": 0.49466666666666664,
      "grad_norm": 0.10684725642204285,
      "learning_rate": 0.00010129175946547884,
      "loss": 0.0192,
      "step": 2226
    },
    {
      "epoch": 0.4948888888888889,
      "grad_norm": 0.09946753084659576,
      "learning_rate": 0.00010124721603563474,
      "loss": 0.0185,
      "step": 2227
    },
    {
      "epoch": 0.4951111111111111,
      "grad_norm": 1.025982141494751,
      "learning_rate": 0.00010120267260579065,
      "loss": 1.6198,
      "step": 2228
    },
    {
      "epoch": 0.49533333333333335,
      "grad_norm": 1.0194659233093262,
      "learning_rate": 0.00010115812917594656,
      "loss": 2.1222,
      "step": 2229
    },
    {
      "epoch": 0.4955555555555556,
      "grad_norm": 0.9168062806129456,
      "learning_rate": 0.00010111358574610245,
      "loss": 1.8122,
      "step": 2230
    },
    {
      "epoch": 0.49577777777777776,
      "grad_norm": 0.8633151054382324,
      "learning_rate": 0.00010106904231625836,
      "loss": 1.4762,
      "step": 2231
    },
    {
      "epoch": 0.496,
      "grad_norm": 0.9805095791816711,
      "learning_rate": 0.00010102449888641425,
      "loss": 1.8171,
      "step": 2232
    },
    {
      "epoch": 0.49622222222222223,
      "grad_norm": 0.9416176676750183,
      "learning_rate": 0.00010097995545657015,
      "loss": 1.5266,
      "step": 2233
    },
    {
      "epoch": 0.49644444444444447,
      "grad_norm": 0.6914428472518921,
      "learning_rate": 0.00010093541202672607,
      "loss": 0.8921,
      "step": 2234
    },
    {
      "epoch": 0.49666666666666665,
      "grad_norm": 0.06182475388050079,
      "learning_rate": 0.00010089086859688197,
      "loss": 0.018,
      "step": 2235
    },
    {
      "epoch": 0.4968888888888889,
      "grad_norm": 0.7663688063621521,
      "learning_rate": 0.00010084632516703787,
      "loss": 0.9712,
      "step": 2236
    },
    {
      "epoch": 0.4971111111111111,
      "grad_norm": 0.9623875617980957,
      "learning_rate": 0.00010080178173719376,
      "loss": 1.7805,
      "step": 2237
    },
    {
      "epoch": 0.49733333333333335,
      "grad_norm": 0.812954306602478,
      "learning_rate": 0.00010075723830734967,
      "loss": 0.8818,
      "step": 2238
    },
    {
      "epoch": 0.49755555555555553,
      "grad_norm": 0.8574005961418152,
      "learning_rate": 0.00010071269487750556,
      "loss": 0.9017,
      "step": 2239
    },
    {
      "epoch": 0.49777777777777776,
      "grad_norm": 1.052270770072937,
      "learning_rate": 0.00010066815144766148,
      "loss": 1.6329,
      "step": 2240
    },
    {
      "epoch": 0.498,
      "grad_norm": 1.0629431009292603,
      "learning_rate": 0.00010062360801781738,
      "loss": 1.5515,
      "step": 2241
    },
    {
      "epoch": 0.49822222222222223,
      "grad_norm": 1.1712193489074707,
      "learning_rate": 0.00010057906458797328,
      "loss": 1.6491,
      "step": 2242
    },
    {
      "epoch": 0.49844444444444447,
      "grad_norm": 1.318710207939148,
      "learning_rate": 0.00010053452115812918,
      "loss": 1.5213,
      "step": 2243
    },
    {
      "epoch": 0.49866666666666665,
      "grad_norm": 0.9559906721115112,
      "learning_rate": 0.00010048997772828507,
      "loss": 1.2708,
      "step": 2244
    },
    {
      "epoch": 0.4988888888888889,
      "grad_norm": 0.9218617081642151,
      "learning_rate": 0.00010044543429844098,
      "loss": 0.7616,
      "step": 2245
    },
    {
      "epoch": 0.4991111111111111,
      "grad_norm": 1.2992888689041138,
      "learning_rate": 0.0001004008908685969,
      "loss": 1.2365,
      "step": 2246
    },
    {
      "epoch": 0.49933333333333335,
      "grad_norm": 0.9246402382850647,
      "learning_rate": 0.00010035634743875279,
      "loss": 1.2666,
      "step": 2247
    },
    {
      "epoch": 0.49955555555555553,
      "grad_norm": 1.1523358821868896,
      "learning_rate": 0.0001003118040089087,
      "loss": 1.3827,
      "step": 2248
    },
    {
      "epoch": 0.49977777777777777,
      "grad_norm": 0.19934004545211792,
      "learning_rate": 0.00010026726057906459,
      "loss": 0.0396,
      "step": 2249
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7492770552635193,
      "learning_rate": 0.00010022271714922049,
      "loss": 0.4563,
      "step": 2250
    },
    {
      "epoch": 0.5002222222222222,
      "grad_norm": 0.051572561264038086,
      "learning_rate": 0.00010017817371937638,
      "loss": 0.0103,
      "step": 2251
    },
    {
      "epoch": 0.5004444444444445,
      "grad_norm": 0.05010450258851051,
      "learning_rate": 0.00010013363028953232,
      "loss": 0.0102,
      "step": 2252
    },
    {
      "epoch": 0.5006666666666667,
      "grad_norm": 0.7738996148109436,
      "learning_rate": 0.00010008908685968821,
      "loss": 2.2673,
      "step": 2253
    },
    {
      "epoch": 0.5008888888888889,
      "grad_norm": 0.5947201251983643,
      "learning_rate": 0.00010004454342984411,
      "loss": 1.0109,
      "step": 2254
    },
    {
      "epoch": 0.5011111111111111,
      "grad_norm": 0.07506557554006577,
      "learning_rate": 0.0001,
      "loss": 0.0122,
      "step": 2255
    },
    {
      "epoch": 0.5013333333333333,
      "grad_norm": 0.0709841400384903,
      "learning_rate": 9.99554565701559e-05,
      "loss": 0.0119,
      "step": 2256
    },
    {
      "epoch": 0.5015555555555555,
      "grad_norm": 0.793897271156311,
      "learning_rate": 9.991091314031182e-05,
      "loss": 1.9008,
      "step": 2257
    },
    {
      "epoch": 0.5017777777777778,
      "grad_norm": 1.026395320892334,
      "learning_rate": 9.986636971046771e-05,
      "loss": 2.1726,
      "step": 2258
    },
    {
      "epoch": 0.502,
      "grad_norm": 0.9329989552497864,
      "learning_rate": 9.982182628062361e-05,
      "loss": 2.034,
      "step": 2259
    },
    {
      "epoch": 0.5022222222222222,
      "grad_norm": 0.5686019062995911,
      "learning_rate": 9.977728285077952e-05,
      "loss": 0.9681,
      "step": 2260
    },
    {
      "epoch": 0.5024444444444445,
      "grad_norm": 0.17110589146614075,
      "learning_rate": 9.973273942093542e-05,
      "loss": 0.0237,
      "step": 2261
    },
    {
      "epoch": 0.5026666666666667,
      "grad_norm": 0.9597615599632263,
      "learning_rate": 9.968819599109132e-05,
      "loss": 1.9351,
      "step": 2262
    },
    {
      "epoch": 0.5028888888888889,
      "grad_norm": 0.8988699913024902,
      "learning_rate": 9.964365256124722e-05,
      "loss": 2.0789,
      "step": 2263
    },
    {
      "epoch": 0.5031111111111111,
      "grad_norm": 1.0947890281677246,
      "learning_rate": 9.959910913140313e-05,
      "loss": 2.2087,
      "step": 2264
    },
    {
      "epoch": 0.5033333333333333,
      "grad_norm": 0.9448829889297485,
      "learning_rate": 9.955456570155902e-05,
      "loss": 1.6314,
      "step": 2265
    },
    {
      "epoch": 0.5035555555555555,
      "grad_norm": 0.6050695776939392,
      "learning_rate": 9.951002227171494e-05,
      "loss": 0.7855,
      "step": 2266
    },
    {
      "epoch": 0.5037777777777778,
      "grad_norm": 0.0719807967543602,
      "learning_rate": 9.946547884187083e-05,
      "loss": 0.016,
      "step": 2267
    },
    {
      "epoch": 0.504,
      "grad_norm": 0.07161426544189453,
      "learning_rate": 9.942093541202673e-05,
      "loss": 0.0151,
      "step": 2268
    },
    {
      "epoch": 0.5042222222222222,
      "grad_norm": 0.06885481625795364,
      "learning_rate": 9.937639198218264e-05,
      "loss": 0.0157,
      "step": 2269
    },
    {
      "epoch": 0.5044444444444445,
      "grad_norm": 0.06610265374183655,
      "learning_rate": 9.933184855233853e-05,
      "loss": 0.0163,
      "step": 2270
    },
    {
      "epoch": 0.5046666666666667,
      "grad_norm": 1.0415818691253662,
      "learning_rate": 9.928730512249444e-05,
      "loss": 1.9357,
      "step": 2271
    },
    {
      "epoch": 0.5048888888888889,
      "grad_norm": 1.081796646118164,
      "learning_rate": 9.924276169265034e-05,
      "loss": 1.6581,
      "step": 2272
    },
    {
      "epoch": 0.5051111111111111,
      "grad_norm": 0.9375271201133728,
      "learning_rate": 9.919821826280625e-05,
      "loss": 1.8193,
      "step": 2273
    },
    {
      "epoch": 0.5053333333333333,
      "grad_norm": 0.9642285108566284,
      "learning_rate": 9.915367483296214e-05,
      "loss": 1.6289,
      "step": 2274
    },
    {
      "epoch": 0.5055555555555555,
      "grad_norm": 1.1919479370117188,
      "learning_rate": 9.910913140311804e-05,
      "loss": 2.1311,
      "step": 2275
    },
    {
      "epoch": 0.5057777777777778,
      "grad_norm": 1.0379412174224854,
      "learning_rate": 9.906458797327395e-05,
      "loss": 1.7015,
      "step": 2276
    },
    {
      "epoch": 0.506,
      "grad_norm": 0.7220401763916016,
      "learning_rate": 9.902004454342984e-05,
      "loss": 0.9768,
      "step": 2277
    },
    {
      "epoch": 0.5062222222222222,
      "grad_norm": 0.0648246705532074,
      "learning_rate": 9.897550111358576e-05,
      "loss": 0.0176,
      "step": 2278
    },
    {
      "epoch": 0.5064444444444445,
      "grad_norm": 0.06818456947803497,
      "learning_rate": 9.893095768374165e-05,
      "loss": 0.0179,
      "step": 2279
    },
    {
      "epoch": 0.5066666666666667,
      "grad_norm": 0.07543423771858215,
      "learning_rate": 9.888641425389756e-05,
      "loss": 0.018,
      "step": 2280
    },
    {
      "epoch": 0.5068888888888889,
      "grad_norm": 1.0633699893951416,
      "learning_rate": 9.884187082405346e-05,
      "loss": 1.578,
      "step": 2281
    },
    {
      "epoch": 0.5071111111111111,
      "grad_norm": 0.11469082534313202,
      "learning_rate": 9.879732739420935e-05,
      "loss": 0.0209,
      "step": 2282
    },
    {
      "epoch": 0.5073333333333333,
      "grad_norm": 0.10379460453987122,
      "learning_rate": 9.875278396436526e-05,
      "loss": 0.02,
      "step": 2283
    },
    {
      "epoch": 0.5075555555555555,
      "grad_norm": 0.09670916199684143,
      "learning_rate": 9.870824053452117e-05,
      "loss": 0.019,
      "step": 2284
    },
    {
      "epoch": 0.5077777777777778,
      "grad_norm": 1.0629053115844727,
      "learning_rate": 9.866369710467707e-05,
      "loss": 1.845,
      "step": 2285
    },
    {
      "epoch": 0.508,
      "grad_norm": 1.166548252105713,
      "learning_rate": 9.861915367483296e-05,
      "loss": 1.8971,
      "step": 2286
    },
    {
      "epoch": 0.5082222222222222,
      "grad_norm": 1.0978573560714722,
      "learning_rate": 9.857461024498887e-05,
      "loss": 1.6318,
      "step": 2287
    },
    {
      "epoch": 0.5084444444444445,
      "grad_norm": 0.13032492995262146,
      "learning_rate": 9.853006681514477e-05,
      "loss": 0.0259,
      "step": 2288
    },
    {
      "epoch": 0.5086666666666667,
      "grad_norm": 1.131226897239685,
      "learning_rate": 9.848552338530067e-05,
      "loss": 1.5446,
      "step": 2289
    },
    {
      "epoch": 0.5088888888888888,
      "grad_norm": 1.1936326026916504,
      "learning_rate": 9.844097995545658e-05,
      "loss": 1.6931,
      "step": 2290
    },
    {
      "epoch": 0.5091111111111111,
      "grad_norm": 1.0433292388916016,
      "learning_rate": 9.839643652561248e-05,
      "loss": 1.6301,
      "step": 2291
    },
    {
      "epoch": 0.5093333333333333,
      "grad_norm": 0.991683840751648,
      "learning_rate": 9.835189309576838e-05,
      "loss": 1.4689,
      "step": 2292
    },
    {
      "epoch": 0.5095555555555555,
      "grad_norm": 1.026808500289917,
      "learning_rate": 9.830734966592427e-05,
      "loss": 1.3458,
      "step": 2293
    },
    {
      "epoch": 0.5097777777777778,
      "grad_norm": 0.7094257473945618,
      "learning_rate": 9.826280623608018e-05,
      "loss": 0.5261,
      "step": 2294
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.722606360912323,
      "learning_rate": 9.821826280623608e-05,
      "loss": 0.5935,
      "step": 2295
    },
    {
      "epoch": 0.5102222222222222,
      "grad_norm": 1.1530025005340576,
      "learning_rate": 9.817371937639198e-05,
      "loss": 1.3226,
      "step": 2296
    },
    {
      "epoch": 0.5104444444444445,
      "grad_norm": 0.14330358803272247,
      "learning_rate": 9.81291759465479e-05,
      "loss": 0.0357,
      "step": 2297
    },
    {
      "epoch": 0.5106666666666667,
      "grad_norm": 1.1050106287002563,
      "learning_rate": 9.808463251670379e-05,
      "loss": 1.0314,
      "step": 2298
    },
    {
      "epoch": 0.5108888888888888,
      "grad_norm": 1.1559122800827026,
      "learning_rate": 9.804008908685969e-05,
      "loss": 1.084,
      "step": 2299
    },
    {
      "epoch": 0.5111111111111111,
      "grad_norm": 0.9388430118560791,
      "learning_rate": 9.79955456570156e-05,
      "loss": 0.6384,
      "step": 2300
    },
    {
      "epoch": 0.5113333333333333,
      "grad_norm": 0.5832968950271606,
      "learning_rate": 9.79510022271715e-05,
      "loss": 1.085,
      "step": 2301
    },
    {
      "epoch": 0.5115555555555555,
      "grad_norm": 0.7767675518989563,
      "learning_rate": 9.79064587973274e-05,
      "loss": 2.0396,
      "step": 2302
    },
    {
      "epoch": 0.5117777777777778,
      "grad_norm": 0.5899970531463623,
      "learning_rate": 9.78619153674833e-05,
      "loss": 0.9667,
      "step": 2303
    },
    {
      "epoch": 0.512,
      "grad_norm": 0.8278191089630127,
      "learning_rate": 9.78173719376392e-05,
      "loss": 2.134,
      "step": 2304
    },
    {
      "epoch": 0.5122222222222222,
      "grad_norm": 0.5901010632514954,
      "learning_rate": 9.77728285077951e-05,
      "loss": 1.0668,
      "step": 2305
    },
    {
      "epoch": 0.5124444444444445,
      "grad_norm": 0.0631280392408371,
      "learning_rate": 9.772828507795102e-05,
      "loss": 0.0116,
      "step": 2306
    },
    {
      "epoch": 0.5126666666666667,
      "grad_norm": 0.06386229395866394,
      "learning_rate": 9.768374164810691e-05,
      "loss": 0.0114,
      "step": 2307
    },
    {
      "epoch": 0.5128888888888888,
      "grad_norm": 0.6113215684890747,
      "learning_rate": 9.763919821826281e-05,
      "loss": 0.9675,
      "step": 2308
    },
    {
      "epoch": 0.5131111111111111,
      "grad_norm": 0.8982253670692444,
      "learning_rate": 9.759465478841872e-05,
      "loss": 2.0637,
      "step": 2309
    },
    {
      "epoch": 0.5133333333333333,
      "grad_norm": 0.8227818608283997,
      "learning_rate": 9.755011135857461e-05,
      "loss": 2.1668,
      "step": 2310
    },
    {
      "epoch": 0.5135555555555555,
      "grad_norm": 0.9095910787582397,
      "learning_rate": 9.750556792873052e-05,
      "loss": 2.2258,
      "step": 2311
    },
    {
      "epoch": 0.5137777777777778,
      "grad_norm": 1.043130874633789,
      "learning_rate": 9.746102449888642e-05,
      "loss": 2.5546,
      "step": 2312
    },
    {
      "epoch": 0.514,
      "grad_norm": 0.9570296406745911,
      "learning_rate": 9.741648106904233e-05,
      "loss": 2.1633,
      "step": 2313
    },
    {
      "epoch": 0.5142222222222222,
      "grad_norm": 0.5847756862640381,
      "learning_rate": 9.737193763919822e-05,
      "loss": 0.9857,
      "step": 2314
    },
    {
      "epoch": 0.5144444444444445,
      "grad_norm": 1.1674765348434448,
      "learning_rate": 9.732739420935412e-05,
      "loss": 1.876,
      "step": 2315
    },
    {
      "epoch": 0.5146666666666667,
      "grad_norm": 1.0518763065338135,
      "learning_rate": 9.728285077951003e-05,
      "loss": 2.0836,
      "step": 2316
    },
    {
      "epoch": 0.5148888888888888,
      "grad_norm": 0.8954287767410278,
      "learning_rate": 9.723830734966592e-05,
      "loss": 1.9936,
      "step": 2317
    },
    {
      "epoch": 0.5151111111111111,
      "grad_norm": 0.9926402568817139,
      "learning_rate": 9.719376391982184e-05,
      "loss": 1.9524,
      "step": 2318
    },
    {
      "epoch": 0.5153333333333333,
      "grad_norm": 1.602445125579834,
      "learning_rate": 9.714922048997773e-05,
      "loss": 1.5352,
      "step": 2319
    },
    {
      "epoch": 0.5155555555555555,
      "grad_norm": 0.09388995170593262,
      "learning_rate": 9.710467706013364e-05,
      "loss": 0.0163,
      "step": 2320
    },
    {
      "epoch": 0.5157777777777778,
      "grad_norm": 0.0820835530757904,
      "learning_rate": 9.706013363028954e-05,
      "loss": 0.0164,
      "step": 2321
    },
    {
      "epoch": 0.516,
      "grad_norm": 0.07170173525810242,
      "learning_rate": 9.701559020044543e-05,
      "loss": 0.016,
      "step": 2322
    },
    {
      "epoch": 0.5162222222222222,
      "grad_norm": 0.6576219201087952,
      "learning_rate": 9.697104677060134e-05,
      "loss": 0.8959,
      "step": 2323
    },
    {
      "epoch": 0.5164444444444445,
      "grad_norm": 1.153394103050232,
      "learning_rate": 9.692650334075724e-05,
      "loss": 2.3199,
      "step": 2324
    },
    {
      "epoch": 0.5166666666666667,
      "grad_norm": 0.977983832359314,
      "learning_rate": 9.688195991091315e-05,
      "loss": 1.7632,
      "step": 2325
    },
    {
      "epoch": 0.5168888888888888,
      "grad_norm": 0.9710626006126404,
      "learning_rate": 9.683741648106904e-05,
      "loss": 1.6654,
      "step": 2326
    },
    {
      "epoch": 0.5171111111111111,
      "grad_norm": 0.9058797955513,
      "learning_rate": 9.679287305122495e-05,
      "loss": 1.6685,
      "step": 2327
    },
    {
      "epoch": 0.5173333333333333,
      "grad_norm": 0.8887612223625183,
      "learning_rate": 9.674832962138085e-05,
      "loss": 1.7818,
      "step": 2328
    },
    {
      "epoch": 0.5175555555555555,
      "grad_norm": 0.9914031028747559,
      "learning_rate": 9.670378619153674e-05,
      "loss": 1.7536,
      "step": 2329
    },
    {
      "epoch": 0.5177777777777778,
      "grad_norm": 0.9197384119033813,
      "learning_rate": 9.665924276169266e-05,
      "loss": 1.74,
      "step": 2330
    },
    {
      "epoch": 0.518,
      "grad_norm": 1.1414260864257812,
      "learning_rate": 9.661469933184855e-05,
      "loss": 1.8903,
      "step": 2331
    },
    {
      "epoch": 0.5182222222222223,
      "grad_norm": 3.304169178009033,
      "learning_rate": 9.657015590200446e-05,
      "loss": 0.9065,
      "step": 2332
    },
    {
      "epoch": 0.5184444444444445,
      "grad_norm": 4.9997334480285645,
      "learning_rate": 9.652561247216037e-05,
      "loss": 0.0771,
      "step": 2333
    },
    {
      "epoch": 0.5186666666666667,
      "grad_norm": 0.09176503121852875,
      "learning_rate": 9.648106904231626e-05,
      "loss": 0.0186,
      "step": 2334
    },
    {
      "epoch": 0.5188888888888888,
      "grad_norm": 1.1866215467453003,
      "learning_rate": 9.643652561247216e-05,
      "loss": 1.6262,
      "step": 2335
    },
    {
      "epoch": 0.5191111111111111,
      "grad_norm": 0.07120410352945328,
      "learning_rate": 9.639198218262807e-05,
      "loss": 0.019,
      "step": 2336
    },
    {
      "epoch": 0.5193333333333333,
      "grad_norm": 0.07103080302476883,
      "learning_rate": 9.634743875278397e-05,
      "loss": 0.0195,
      "step": 2337
    },
    {
      "epoch": 0.5195555555555555,
      "grad_norm": 0.06759145110845566,
      "learning_rate": 9.630289532293986e-05,
      "loss": 0.0186,
      "step": 2338
    },
    {
      "epoch": 0.5197777777777778,
      "grad_norm": 1.2702637910842896,
      "learning_rate": 9.625835189309578e-05,
      "loss": 2.0136,
      "step": 2339
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1234042644500732,
      "learning_rate": 9.621380846325168e-05,
      "loss": 1.5986,
      "step": 2340
    },
    {
      "epoch": 0.5202222222222223,
      "grad_norm": 1.0025362968444824,
      "learning_rate": 9.616926503340757e-05,
      "loss": 1.3816,
      "step": 2341
    },
    {
      "epoch": 0.5204444444444445,
      "grad_norm": 0.9683412909507751,
      "learning_rate": 9.612472160356349e-05,
      "loss": 1.7117,
      "step": 2342
    },
    {
      "epoch": 0.5206666666666667,
      "grad_norm": 0.6754570603370667,
      "learning_rate": 9.608017817371938e-05,
      "loss": 0.7684,
      "step": 2343
    },
    {
      "epoch": 0.5208888888888888,
      "grad_norm": 1.094579815864563,
      "learning_rate": 9.603563474387528e-05,
      "loss": 1.6406,
      "step": 2344
    },
    {
      "epoch": 0.5211111111111111,
      "grad_norm": 1.0832680463790894,
      "learning_rate": 9.599109131403119e-05,
      "loss": 1.2147,
      "step": 2345
    },
    {
      "epoch": 0.5213333333333333,
      "grad_norm": 0.7980796098709106,
      "learning_rate": 9.59465478841871e-05,
      "loss": 0.8512,
      "step": 2346
    },
    {
      "epoch": 0.5215555555555556,
      "grad_norm": 0.7709640264511108,
      "learning_rate": 9.590200445434299e-05,
      "loss": 0.6093,
      "step": 2347
    },
    {
      "epoch": 0.5217777777777778,
      "grad_norm": 1.0753897428512573,
      "learning_rate": 9.585746102449889e-05,
      "loss": 1.26,
      "step": 2348
    },
    {
      "epoch": 0.522,
      "grad_norm": 1.1210321187973022,
      "learning_rate": 9.58129175946548e-05,
      "loss": 1.3757,
      "step": 2349
    },
    {
      "epoch": 0.5222222222222223,
      "grad_norm": 1.2580091953277588,
      "learning_rate": 9.576837416481069e-05,
      "loss": 1.1165,
      "step": 2350
    },
    {
      "epoch": 0.5224444444444445,
      "grad_norm": 0.12348097562789917,
      "learning_rate": 9.572383073496661e-05,
      "loss": 0.0121,
      "step": 2351
    },
    {
      "epoch": 0.5226666666666666,
      "grad_norm": 0.05580771714448929,
      "learning_rate": 9.56792873051225e-05,
      "loss": 0.0115,
      "step": 2352
    },
    {
      "epoch": 0.5228888888888888,
      "grad_norm": 0.8565195798873901,
      "learning_rate": 9.56347438752784e-05,
      "loss": 1.1554,
      "step": 2353
    },
    {
      "epoch": 0.5231111111111111,
      "grad_norm": 0.0494968518614769,
      "learning_rate": 9.559020044543431e-05,
      "loss": 0.0113,
      "step": 2354
    },
    {
      "epoch": 0.5233333333333333,
      "grad_norm": 1.011706829071045,
      "learning_rate": 9.55456570155902e-05,
      "loss": 2.2168,
      "step": 2355
    },
    {
      "epoch": 0.5235555555555556,
      "grad_norm": 0.7530580759048462,
      "learning_rate": 9.550111358574611e-05,
      "loss": 1.9735,
      "step": 2356
    },
    {
      "epoch": 0.5237777777777778,
      "grad_norm": 0.5390753149986267,
      "learning_rate": 9.545657015590201e-05,
      "loss": 0.9808,
      "step": 2357
    },
    {
      "epoch": 0.524,
      "grad_norm": 0.06390012800693512,
      "learning_rate": 9.541202672605792e-05,
      "loss": 0.0113,
      "step": 2358
    },
    {
      "epoch": 0.5242222222222223,
      "grad_norm": 0.06459398567676544,
      "learning_rate": 9.536748329621381e-05,
      "loss": 0.0115,
      "step": 2359
    },
    {
      "epoch": 0.5244444444444445,
      "grad_norm": 0.779170036315918,
      "learning_rate": 9.532293986636972e-05,
      "loss": 1.8657,
      "step": 2360
    },
    {
      "epoch": 0.5246666666666666,
      "grad_norm": 0.969144344329834,
      "learning_rate": 9.527839643652562e-05,
      "loss": 1.8891,
      "step": 2361
    },
    {
      "epoch": 0.5248888888888888,
      "grad_norm": 0.9192339777946472,
      "learning_rate": 9.523385300668151e-05,
      "loss": 1.1894,
      "step": 2362
    },
    {
      "epoch": 0.5251111111111111,
      "grad_norm": 0.9349969625473022,
      "learning_rate": 9.518930957683743e-05,
      "loss": 1.8199,
      "step": 2363
    },
    {
      "epoch": 0.5253333333333333,
      "grad_norm": 1.0784986019134521,
      "learning_rate": 9.514476614699332e-05,
      "loss": 1.6971,
      "step": 2364
    },
    {
      "epoch": 0.5255555555555556,
      "grad_norm": 0.8992215394973755,
      "learning_rate": 9.510022271714923e-05,
      "loss": 1.8382,
      "step": 2365
    },
    {
      "epoch": 0.5257777777777778,
      "grad_norm": 1.3966472148895264,
      "learning_rate": 9.505567928730512e-05,
      "loss": 1.9623,
      "step": 2366
    },
    {
      "epoch": 0.526,
      "grad_norm": 0.8984045386314392,
      "learning_rate": 9.501113585746103e-05,
      "loss": 1.5869,
      "step": 2367
    },
    {
      "epoch": 0.5262222222222223,
      "grad_norm": 0.9143683314323425,
      "learning_rate": 9.496659242761693e-05,
      "loss": 1.9555,
      "step": 2368
    },
    {
      "epoch": 0.5264444444444445,
      "grad_norm": 0.9720048308372498,
      "learning_rate": 9.492204899777282e-05,
      "loss": 2.0711,
      "step": 2369
    },
    {
      "epoch": 0.5266666666666666,
      "grad_norm": 0.07726185023784637,
      "learning_rate": 9.487750556792874e-05,
      "loss": 0.0151,
      "step": 2370
    },
    {
      "epoch": 0.5268888888888889,
      "grad_norm": 0.06836960464715958,
      "learning_rate": 9.483296213808463e-05,
      "loss": 0.0151,
      "step": 2371
    },
    {
      "epoch": 0.5271111111111111,
      "grad_norm": 0.06594307720661163,
      "learning_rate": 9.478841870824054e-05,
      "loss": 0.0152,
      "step": 2372
    },
    {
      "epoch": 0.5273333333333333,
      "grad_norm": 0.6360456943511963,
      "learning_rate": 9.474387527839644e-05,
      "loss": 0.7369,
      "step": 2373
    },
    {
      "epoch": 0.5275555555555556,
      "grad_norm": 0.18018539249897003,
      "learning_rate": 9.469933184855234e-05,
      "loss": 0.023,
      "step": 2374
    },
    {
      "epoch": 0.5277777777777778,
      "grad_norm": 0.663093090057373,
      "learning_rate": 9.465478841870824e-05,
      "loss": 1.0362,
      "step": 2375
    },
    {
      "epoch": 0.528,
      "grad_norm": 0.9098090529441833,
      "learning_rate": 9.461024498886415e-05,
      "loss": 1.6369,
      "step": 2376
    },
    {
      "epoch": 0.5282222222222223,
      "grad_norm": 1.1311285495758057,
      "learning_rate": 9.456570155902005e-05,
      "loss": 1.8766,
      "step": 2377
    },
    {
      "epoch": 0.5284444444444445,
      "grad_norm": 0.9926170706748962,
      "learning_rate": 9.452115812917594e-05,
      "loss": 1.6138,
      "step": 2378
    },
    {
      "epoch": 0.5286666666666666,
      "grad_norm": 1.0175913572311401,
      "learning_rate": 9.447661469933185e-05,
      "loss": 1.6726,
      "step": 2379
    },
    {
      "epoch": 0.5288888888888889,
      "grad_norm": 1.056575894355774,
      "learning_rate": 9.443207126948775e-05,
      "loss": 1.7264,
      "step": 2380
    },
    {
      "epoch": 0.5291111111111111,
      "grad_norm": 1.1231061220169067,
      "learning_rate": 9.438752783964365e-05,
      "loss": 1.0777,
      "step": 2381
    },
    {
      "epoch": 0.5293333333333333,
      "grad_norm": 0.7852345705032349,
      "learning_rate": 9.434298440979957e-05,
      "loss": 0.8974,
      "step": 2382
    },
    {
      "epoch": 0.5295555555555556,
      "grad_norm": 0.7592169046401978,
      "learning_rate": 9.429844097995546e-05,
      "loss": 0.9902,
      "step": 2383
    },
    {
      "epoch": 0.5297777777777778,
      "grad_norm": 0.07275737076997757,
      "learning_rate": 9.425389755011136e-05,
      "loss": 0.0192,
      "step": 2384
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9310175180435181,
      "learning_rate": 9.420935412026727e-05,
      "loss": 1.7347,
      "step": 2385
    },
    {
      "epoch": 0.5302222222222223,
      "grad_norm": 1.182883620262146,
      "learning_rate": 9.416481069042317e-05,
      "loss": 1.5165,
      "step": 2386
    },
    {
      "epoch": 0.5304444444444445,
      "grad_norm": 1.1949394941329956,
      "learning_rate": 9.412026726057906e-05,
      "loss": 1.8244,
      "step": 2387
    },
    {
      "epoch": 0.5306666666666666,
      "grad_norm": 1.121900200843811,
      "learning_rate": 9.407572383073497e-05,
      "loss": 1.9119,
      "step": 2388
    },
    {
      "epoch": 0.5308888888888889,
      "grad_norm": 0.10859935730695724,
      "learning_rate": 9.403118040089088e-05,
      "loss": 0.0262,
      "step": 2389
    },
    {
      "epoch": 0.5311111111111111,
      "grad_norm": 0.7552645802497864,
      "learning_rate": 9.398663697104677e-05,
      "loss": 0.797,
      "step": 2390
    },
    {
      "epoch": 0.5313333333333333,
      "grad_norm": 1.0325798988342285,
      "learning_rate": 9.394209354120269e-05,
      "loss": 1.3011,
      "step": 2391
    },
    {
      "epoch": 0.5315555555555556,
      "grad_norm": 1.0349661111831665,
      "learning_rate": 9.389755011135858e-05,
      "loss": 1.6267,
      "step": 2392
    },
    {
      "epoch": 0.5317777777777778,
      "grad_norm": 1.0448760986328125,
      "learning_rate": 9.385300668151448e-05,
      "loss": 1.3726,
      "step": 2393
    },
    {
      "epoch": 0.532,
      "grad_norm": 1.186620831489563,
      "learning_rate": 9.380846325167039e-05,
      "loss": 1.5912,
      "step": 2394
    },
    {
      "epoch": 0.5322222222222223,
      "grad_norm": 0.7071300148963928,
      "learning_rate": 9.376391982182628e-05,
      "loss": 0.7175,
      "step": 2395
    },
    {
      "epoch": 0.5324444444444445,
      "grad_norm": 0.6794847249984741,
      "learning_rate": 9.371937639198219e-05,
      "loss": 0.6406,
      "step": 2396
    },
    {
      "epoch": 0.5326666666666666,
      "grad_norm": 0.978138267993927,
      "learning_rate": 9.367483296213809e-05,
      "loss": 1.0755,
      "step": 2397
    },
    {
      "epoch": 0.5328888888888889,
      "grad_norm": 1.1468720436096191,
      "learning_rate": 9.3630289532294e-05,
      "loss": 1.3126,
      "step": 2398
    },
    {
      "epoch": 0.5331111111111111,
      "grad_norm": 1.0386849641799927,
      "learning_rate": 9.358574610244989e-05,
      "loss": 1.1505,
      "step": 2399
    },
    {
      "epoch": 0.5333333333333333,
      "grad_norm": 1.2117102146148682,
      "learning_rate": 9.35412026726058e-05,
      "loss": 1.1134,
      "step": 2400
    },
    {
      "epoch": 0.5335555555555556,
      "grad_norm": 0.6301453113555908,
      "learning_rate": 9.34966592427617e-05,
      "loss": 1.1273,
      "step": 2401
    },
    {
      "epoch": 0.5337777777777778,
      "grad_norm": 0.5409572124481201,
      "learning_rate": 9.345211581291759e-05,
      "loss": 1.0298,
      "step": 2402
    },
    {
      "epoch": 0.534,
      "grad_norm": 0.05003529414534569,
      "learning_rate": 9.340757238307351e-05,
      "loss": 0.0111,
      "step": 2403
    },
    {
      "epoch": 0.5342222222222223,
      "grad_norm": 0.547648549079895,
      "learning_rate": 9.33630289532294e-05,
      "loss": 0.9771,
      "step": 2404
    },
    {
      "epoch": 0.5344444444444445,
      "grad_norm": 0.8171392679214478,
      "learning_rate": 9.331848552338531e-05,
      "loss": 2.2949,
      "step": 2405
    },
    {
      "epoch": 0.5346666666666666,
      "grad_norm": 0.5529095530509949,
      "learning_rate": 9.327394209354121e-05,
      "loss": 1.2102,
      "step": 2406
    },
    {
      "epoch": 0.5348888888888889,
      "grad_norm": 0.622061550617218,
      "learning_rate": 9.32293986636971e-05,
      "loss": 1.2203,
      "step": 2407
    },
    {
      "epoch": 0.5351111111111111,
      "grad_norm": 0.4969142973423004,
      "learning_rate": 9.318485523385301e-05,
      "loss": 1.1624,
      "step": 2408
    },
    {
      "epoch": 0.5353333333333333,
      "grad_norm": 0.9374632239341736,
      "learning_rate": 9.314031180400892e-05,
      "loss": 2.6747,
      "step": 2409
    },
    {
      "epoch": 0.5355555555555556,
      "grad_norm": 0.8105266690254211,
      "learning_rate": 9.309576837416482e-05,
      "loss": 2.2292,
      "step": 2410
    },
    {
      "epoch": 0.5357777777777778,
      "grad_norm": 0.8644961714744568,
      "learning_rate": 9.305122494432071e-05,
      "loss": 1.1513,
      "step": 2411
    },
    {
      "epoch": 0.536,
      "grad_norm": 0.05260290950536728,
      "learning_rate": 9.300668151447662e-05,
      "loss": 0.0104,
      "step": 2412
    },
    {
      "epoch": 0.5362222222222223,
      "grad_norm": 0.053732600063085556,
      "learning_rate": 9.296213808463252e-05,
      "loss": 0.0108,
      "step": 2413
    },
    {
      "epoch": 0.5364444444444444,
      "grad_norm": 0.05255819112062454,
      "learning_rate": 9.291759465478841e-05,
      "loss": 0.0103,
      "step": 2414
    },
    {
      "epoch": 0.5366666666666666,
      "grad_norm": 0.5198526978492737,
      "learning_rate": 9.287305122494433e-05,
      "loss": 0.9938,
      "step": 2415
    },
    {
      "epoch": 0.5368888888888889,
      "grad_norm": 0.9449132680892944,
      "learning_rate": 9.282850779510023e-05,
      "loss": 2.1894,
      "step": 2416
    },
    {
      "epoch": 0.5371111111111111,
      "grad_norm": 0.8379865288734436,
      "learning_rate": 9.278396436525613e-05,
      "loss": 1.9937,
      "step": 2417
    },
    {
      "epoch": 0.5373333333333333,
      "grad_norm": 0.9000791311264038,
      "learning_rate": 9.273942093541204e-05,
      "loss": 1.9498,
      "step": 2418
    },
    {
      "epoch": 0.5375555555555556,
      "grad_norm": 1.0631499290466309,
      "learning_rate": 9.269487750556793e-05,
      "loss": 2.1338,
      "step": 2419
    },
    {
      "epoch": 0.5377777777777778,
      "grad_norm": 0.9265825748443604,
      "learning_rate": 9.265033407572383e-05,
      "loss": 2.1304,
      "step": 2420
    },
    {
      "epoch": 0.538,
      "grad_norm": 0.8675844669342041,
      "learning_rate": 9.260579064587974e-05,
      "loss": 2.0546,
      "step": 2421
    },
    {
      "epoch": 0.5382222222222223,
      "grad_norm": 0.14311416447162628,
      "learning_rate": 9.256124721603564e-05,
      "loss": 0.0225,
      "step": 2422
    },
    {
      "epoch": 0.5384444444444444,
      "grad_norm": 0.6641564965248108,
      "learning_rate": 9.251670378619154e-05,
      "loss": 1.1475,
      "step": 2423
    },
    {
      "epoch": 0.5386666666666666,
      "grad_norm": 0.9647601246833801,
      "learning_rate": 9.247216035634745e-05,
      "loss": 1.9414,
      "step": 2424
    },
    {
      "epoch": 0.5388888888888889,
      "grad_norm": 0.9718881845474243,
      "learning_rate": 9.242761692650335e-05,
      "loss": 2.031,
      "step": 2425
    },
    {
      "epoch": 0.5391111111111111,
      "grad_norm": 0.9469605088233948,
      "learning_rate": 9.238307349665924e-05,
      "loss": 1.9717,
      "step": 2426
    },
    {
      "epoch": 0.5393333333333333,
      "grad_norm": 0.9883223176002502,
      "learning_rate": 9.233853006681516e-05,
      "loss": 1.919,
      "step": 2427
    },
    {
      "epoch": 0.5395555555555556,
      "grad_norm": 1.31260085105896,
      "learning_rate": 9.229398663697105e-05,
      "loss": 2.0415,
      "step": 2428
    },
    {
      "epoch": 0.5397777777777778,
      "grad_norm": 0.9640005230903625,
      "learning_rate": 9.224944320712695e-05,
      "loss": 1.8964,
      "step": 2429
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7503467202186584,
      "learning_rate": 9.220489977728286e-05,
      "loss": 0.8938,
      "step": 2430
    },
    {
      "epoch": 0.5402222222222223,
      "grad_norm": 0.8691534996032715,
      "learning_rate": 9.216035634743877e-05,
      "loss": 1.9263,
      "step": 2431
    },
    {
      "epoch": 0.5404444444444444,
      "grad_norm": 1.175347089767456,
      "learning_rate": 9.211581291759466e-05,
      "loss": 1.5685,
      "step": 2432
    },
    {
      "epoch": 0.5406666666666666,
      "grad_norm": 1.2837430238723755,
      "learning_rate": 9.207126948775056e-05,
      "loss": 0.0653,
      "step": 2433
    },
    {
      "epoch": 0.5408888888888889,
      "grad_norm": 0.23923173546791077,
      "learning_rate": 9.202672605790647e-05,
      "loss": 0.0208,
      "step": 2434
    },
    {
      "epoch": 0.5411111111111111,
      "grad_norm": 0.075802281498909,
      "learning_rate": 9.198218262806236e-05,
      "loss": 0.0198,
      "step": 2435
    },
    {
      "epoch": 0.5413333333333333,
      "grad_norm": 0.6716746091842651,
      "learning_rate": 9.193763919821828e-05,
      "loss": 0.7959,
      "step": 2436
    },
    {
      "epoch": 0.5415555555555556,
      "grad_norm": 1.0868823528289795,
      "learning_rate": 9.189309576837417e-05,
      "loss": 1.7425,
      "step": 2437
    },
    {
      "epoch": 0.5417777777777778,
      "grad_norm": 0.9050690531730652,
      "learning_rate": 9.184855233853008e-05,
      "loss": 1.725,
      "step": 2438
    },
    {
      "epoch": 0.542,
      "grad_norm": 1.1277137994766235,
      "learning_rate": 9.180400890868597e-05,
      "loss": 1.7227,
      "step": 2439
    },
    {
      "epoch": 0.5422222222222223,
      "grad_norm": 0.11746444553136826,
      "learning_rate": 9.175946547884187e-05,
      "loss": 0.0203,
      "step": 2440
    },
    {
      "epoch": 0.5424444444444444,
      "grad_norm": 0.8444859981536865,
      "learning_rate": 9.171492204899778e-05,
      "loss": 1.0308,
      "step": 2441
    },
    {
      "epoch": 0.5426666666666666,
      "grad_norm": 0.7121472954750061,
      "learning_rate": 9.167037861915367e-05,
      "loss": 1.0558,
      "step": 2442
    },
    {
      "epoch": 0.5428888888888889,
      "grad_norm": 1.0242067575454712,
      "learning_rate": 9.162583518930959e-05,
      "loss": 1.3226,
      "step": 2443
    },
    {
      "epoch": 0.5431111111111111,
      "grad_norm": 1.067833423614502,
      "learning_rate": 9.158129175946548e-05,
      "loss": 1.3312,
      "step": 2444
    },
    {
      "epoch": 0.5433333333333333,
      "grad_norm": 1.1601375341415405,
      "learning_rate": 9.153674832962139e-05,
      "loss": 1.7604,
      "step": 2445
    },
    {
      "epoch": 0.5435555555555556,
      "grad_norm": 0.9809809327125549,
      "learning_rate": 9.149220489977729e-05,
      "loss": 1.3923,
      "step": 2446
    },
    {
      "epoch": 0.5437777777777778,
      "grad_norm": 0.74070143699646,
      "learning_rate": 9.144766146993318e-05,
      "loss": 0.7608,
      "step": 2447
    },
    {
      "epoch": 0.544,
      "grad_norm": 0.19705651700496674,
      "learning_rate": 9.140311804008909e-05,
      "loss": 0.0306,
      "step": 2448
    },
    {
      "epoch": 0.5442222222222223,
      "grad_norm": 0.8063182234764099,
      "learning_rate": 9.1358574610245e-05,
      "loss": 0.7462,
      "step": 2449
    },
    {
      "epoch": 0.5444444444444444,
      "grad_norm": 0.5909017324447632,
      "learning_rate": 9.13140311804009e-05,
      "loss": 0.4208,
      "step": 2450
    },
    {
      "epoch": 0.5446666666666666,
      "grad_norm": 0.7135075926780701,
      "learning_rate": 9.126948775055679e-05,
      "loss": 1.023,
      "step": 2451
    },
    {
      "epoch": 0.5448888888888889,
      "grad_norm": 0.5500476956367493,
      "learning_rate": 9.12249443207127e-05,
      "loss": 0.8607,
      "step": 2452
    },
    {
      "epoch": 0.5451111111111111,
      "grad_norm": 0.5393461585044861,
      "learning_rate": 9.11804008908686e-05,
      "loss": 1.1374,
      "step": 2453
    },
    {
      "epoch": 0.5453333333333333,
      "grad_norm": 0.8940771222114563,
      "learning_rate": 9.11358574610245e-05,
      "loss": 2.0951,
      "step": 2454
    },
    {
      "epoch": 0.5455555555555556,
      "grad_norm": 0.8712387084960938,
      "learning_rate": 9.109131403118041e-05,
      "loss": 2.3463,
      "step": 2455
    },
    {
      "epoch": 0.5457777777777778,
      "grad_norm": 0.8256047368049622,
      "learning_rate": 9.10467706013363e-05,
      "loss": 2.0528,
      "step": 2456
    },
    {
      "epoch": 0.546,
      "grad_norm": 0.06170797720551491,
      "learning_rate": 9.100222717149221e-05,
      "loss": 0.0108,
      "step": 2457
    },
    {
      "epoch": 0.5462222222222223,
      "grad_norm": 0.06142743304371834,
      "learning_rate": 9.095768374164811e-05,
      "loss": 0.0108,
      "step": 2458
    },
    {
      "epoch": 0.5464444444444444,
      "grad_norm": 0.5574305057525635,
      "learning_rate": 9.091314031180401e-05,
      "loss": 0.9544,
      "step": 2459
    },
    {
      "epoch": 0.5466666666666666,
      "grad_norm": 0.7911089062690735,
      "learning_rate": 9.086859688195991e-05,
      "loss": 1.9205,
      "step": 2460
    },
    {
      "epoch": 0.5468888888888889,
      "grad_norm": 0.9855570197105408,
      "learning_rate": 9.082405345211582e-05,
      "loss": 2.1234,
      "step": 2461
    },
    {
      "epoch": 0.5471111111111111,
      "grad_norm": 0.9141358137130737,
      "learning_rate": 9.077951002227172e-05,
      "loss": 2.0343,
      "step": 2462
    },
    {
      "epoch": 0.5473333333333333,
      "grad_norm": 0.8803722262382507,
      "learning_rate": 9.073496659242761e-05,
      "loss": 1.6688,
      "step": 2463
    },
    {
      "epoch": 0.5475555555555556,
      "grad_norm": 0.8939493894577026,
      "learning_rate": 9.069042316258352e-05,
      "loss": 2.0294,
      "step": 2464
    },
    {
      "epoch": 0.5477777777777778,
      "grad_norm": 1.09419584274292,
      "learning_rate": 9.064587973273943e-05,
      "loss": 1.9657,
      "step": 2465
    },
    {
      "epoch": 0.548,
      "grad_norm": 0.6533779501914978,
      "learning_rate": 9.060133630289532e-05,
      "loss": 0.9263,
      "step": 2466
    },
    {
      "epoch": 0.5482222222222223,
      "grad_norm": 0.8392800688743591,
      "learning_rate": 9.055679287305124e-05,
      "loss": 1.602,
      "step": 2467
    },
    {
      "epoch": 0.5484444444444444,
      "grad_norm": 0.9414380788803101,
      "learning_rate": 9.051224944320713e-05,
      "loss": 1.823,
      "step": 2468
    },
    {
      "epoch": 0.5486666666666666,
      "grad_norm": 0.9360827207565308,
      "learning_rate": 9.046770601336303e-05,
      "loss": 1.8465,
      "step": 2469
    },
    {
      "epoch": 0.5488888888888889,
      "grad_norm": 0.9744712114334106,
      "learning_rate": 9.042316258351894e-05,
      "loss": 2.1427,
      "step": 2470
    },
    {
      "epoch": 0.5491111111111111,
      "grad_norm": 0.7434724569320679,
      "learning_rate": 9.037861915367484e-05,
      "loss": 0.9291,
      "step": 2471
    },
    {
      "epoch": 0.5493333333333333,
      "grad_norm": 0.0730072483420372,
      "learning_rate": 9.033407572383074e-05,
      "loss": 0.0156,
      "step": 2472
    },
    {
      "epoch": 0.5495555555555556,
      "grad_norm": 0.07402710616588593,
      "learning_rate": 9.028953229398664e-05,
      "loss": 0.0155,
      "step": 2473
    },
    {
      "epoch": 0.5497777777777778,
      "grad_norm": 0.146684929728508,
      "learning_rate": 9.024498886414255e-05,
      "loss": 0.0244,
      "step": 2474
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6161303520202637,
      "learning_rate": 9.020044543429844e-05,
      "loss": 0.7681,
      "step": 2475
    },
    {
      "epoch": 0.5502222222222222,
      "grad_norm": 1.186063289642334,
      "learning_rate": 9.015590200445436e-05,
      "loss": 1.8129,
      "step": 2476
    },
    {
      "epoch": 0.5504444444444444,
      "grad_norm": 0.9506951570510864,
      "learning_rate": 9.011135857461025e-05,
      "loss": 1.884,
      "step": 2477
    },
    {
      "epoch": 0.5506666666666666,
      "grad_norm": 1.1868743896484375,
      "learning_rate": 9.006681514476615e-05,
      "loss": 1.974,
      "step": 2478
    },
    {
      "epoch": 0.5508888888888889,
      "grad_norm": 1.0391061305999756,
      "learning_rate": 9.002227171492206e-05,
      "loss": 1.4596,
      "step": 2479
    },
    {
      "epoch": 0.5511111111111111,
      "grad_norm": 1.0040979385375977,
      "learning_rate": 8.997772828507795e-05,
      "loss": 1.7033,
      "step": 2480
    },
    {
      "epoch": 0.5513333333333333,
      "grad_norm": 1.0166115760803223,
      "learning_rate": 8.993318485523386e-05,
      "loss": 1.7985,
      "step": 2481
    },
    {
      "epoch": 0.5515555555555556,
      "grad_norm": 1.0222774744033813,
      "learning_rate": 8.988864142538976e-05,
      "loss": 1.9303,
      "step": 2482
    },
    {
      "epoch": 0.5517777777777778,
      "grad_norm": 1.0761734247207642,
      "learning_rate": 8.984409799554567e-05,
      "loss": 1.6934,
      "step": 2483
    },
    {
      "epoch": 0.552,
      "grad_norm": 0.5333191156387329,
      "learning_rate": 8.979955456570156e-05,
      "loss": 0.0299,
      "step": 2484
    },
    {
      "epoch": 0.5522222222222222,
      "grad_norm": 0.22201071679592133,
      "learning_rate": 8.975501113585746e-05,
      "loss": 0.0227,
      "step": 2485
    },
    {
      "epoch": 0.5524444444444444,
      "grad_norm": 0.7878185510635376,
      "learning_rate": 8.971046770601337e-05,
      "loss": 0.9822,
      "step": 2486
    },
    {
      "epoch": 0.5526666666666666,
      "grad_norm": 0.6330142617225647,
      "learning_rate": 8.966592427616926e-05,
      "loss": 0.7849,
      "step": 2487
    },
    {
      "epoch": 0.5528888888888889,
      "grad_norm": 0.6124374866485596,
      "learning_rate": 8.962138084632518e-05,
      "loss": 0.7545,
      "step": 2488
    },
    {
      "epoch": 0.5531111111111111,
      "grad_norm": 0.9551767706871033,
      "learning_rate": 8.957683741648107e-05,
      "loss": 1.6892,
      "step": 2489
    },
    {
      "epoch": 0.5533333333333333,
      "grad_norm": 0.9491903781890869,
      "learning_rate": 8.953229398663698e-05,
      "loss": 1.5815,
      "step": 2490
    },
    {
      "epoch": 0.5535555555555556,
      "grad_norm": 0.6652069091796875,
      "learning_rate": 8.948775055679288e-05,
      "loss": 0.7771,
      "step": 2491
    },
    {
      "epoch": 0.5537777777777778,
      "grad_norm": 1.0102852582931519,
      "learning_rate": 8.944320712694878e-05,
      "loss": 1.5977,
      "step": 2492
    },
    {
      "epoch": 0.554,
      "grad_norm": 1.2895917892456055,
      "learning_rate": 8.939866369710468e-05,
      "loss": 1.5941,
      "step": 2493
    },
    {
      "epoch": 0.5542222222222222,
      "grad_norm": 0.7654258012771606,
      "learning_rate": 8.935412026726059e-05,
      "loss": 0.7395,
      "step": 2494
    },
    {
      "epoch": 0.5544444444444444,
      "grad_norm": 1.508339524269104,
      "learning_rate": 8.930957683741649e-05,
      "loss": 1.4961,
      "step": 2495
    },
    {
      "epoch": 0.5546666666666666,
      "grad_norm": 1.0076874494552612,
      "learning_rate": 8.926503340757238e-05,
      "loss": 1.1791,
      "step": 2496
    },
    {
      "epoch": 0.5548888888888889,
      "grad_norm": 1.1195073127746582,
      "learning_rate": 8.922048997772829e-05,
      "loss": 1.0248,
      "step": 2497
    },
    {
      "epoch": 0.5551111111111111,
      "grad_norm": 1.0789536237716675,
      "learning_rate": 8.91759465478842e-05,
      "loss": 1.0849,
      "step": 2498
    },
    {
      "epoch": 0.5553333333333333,
      "grad_norm": 0.8178943991661072,
      "learning_rate": 8.913140311804009e-05,
      "loss": 0.4074,
      "step": 2499
    },
    {
      "epoch": 0.5555555555555556,
      "grad_norm": 1.1042306423187256,
      "learning_rate": 8.9086859688196e-05,
      "loss": 0.909,
      "step": 2500
    },
    {
      "epoch": 0.5557777777777778,
      "grad_norm": 0.6491156816482544,
      "learning_rate": 8.90423162583519e-05,
      "loss": 1.2739,
      "step": 2501
    },
    {
      "epoch": 0.556,
      "grad_norm": 0.04861566051840782,
      "learning_rate": 8.89977728285078e-05,
      "loss": 0.0115,
      "step": 2502
    },
    {
      "epoch": 0.5562222222222222,
      "grad_norm": 0.8540087938308716,
      "learning_rate": 8.895322939866371e-05,
      "loss": 2.3845,
      "step": 2503
    },
    {
      "epoch": 0.5564444444444444,
      "grad_norm": 1.0630886554718018,
      "learning_rate": 8.89086859688196e-05,
      "loss": 2.3142,
      "step": 2504
    },
    {
      "epoch": 0.5566666666666666,
      "grad_norm": 0.7888458371162415,
      "learning_rate": 8.88641425389755e-05,
      "loss": 2.1994,
      "step": 2505
    },
    {
      "epoch": 0.5568888888888889,
      "grad_norm": 0.0522671639919281,
      "learning_rate": 8.881959910913141e-05,
      "loss": 0.0114,
      "step": 2506
    },
    {
      "epoch": 0.5571111111111111,
      "grad_norm": 0.8858940601348877,
      "learning_rate": 8.877505567928731e-05,
      "loss": 2.1228,
      "step": 2507
    },
    {
      "epoch": 0.5573333333333333,
      "grad_norm": 0.9195045232772827,
      "learning_rate": 8.87305122494432e-05,
      "loss": 2.1459,
      "step": 2508
    },
    {
      "epoch": 0.5575555555555556,
      "grad_norm": 0.08028994500637054,
      "learning_rate": 8.868596881959911e-05,
      "loss": 0.0116,
      "step": 2509
    },
    {
      "epoch": 0.5577777777777778,
      "grad_norm": 0.5606245398521423,
      "learning_rate": 8.864142538975502e-05,
      "loss": 1.1338,
      "step": 2510
    },
    {
      "epoch": 0.558,
      "grad_norm": 0.8265031576156616,
      "learning_rate": 8.859688195991091e-05,
      "loss": 2.0906,
      "step": 2511
    },
    {
      "epoch": 0.5582222222222222,
      "grad_norm": 0.8628373742103577,
      "learning_rate": 8.855233853006681e-05,
      "loss": 2.1225,
      "step": 2512
    },
    {
      "epoch": 0.5584444444444444,
      "grad_norm": 0.8644111752510071,
      "learning_rate": 8.850779510022272e-05,
      "loss": 1.9861,
      "step": 2513
    },
    {
      "epoch": 0.5586666666666666,
      "grad_norm": 1.0040253400802612,
      "learning_rate": 8.846325167037863e-05,
      "loss": 2.3637,
      "step": 2514
    },
    {
      "epoch": 0.5588888888888889,
      "grad_norm": 1.0627641677856445,
      "learning_rate": 8.841870824053452e-05,
      "loss": 1.7167,
      "step": 2515
    },
    {
      "epoch": 0.5591111111111111,
      "grad_norm": 0.9366372227668762,
      "learning_rate": 8.837416481069044e-05,
      "loss": 1.843,
      "step": 2516
    },
    {
      "epoch": 0.5593333333333333,
      "grad_norm": 0.9823426604270935,
      "learning_rate": 8.832962138084633e-05,
      "loss": 1.7333,
      "step": 2517
    },
    {
      "epoch": 0.5595555555555556,
      "grad_norm": 0.9016628861427307,
      "learning_rate": 8.828507795100222e-05,
      "loss": 1.6935,
      "step": 2518
    },
    {
      "epoch": 0.5597777777777778,
      "grad_norm": 1.0293992757797241,
      "learning_rate": 8.824053452115814e-05,
      "loss": 2.4355,
      "step": 2519
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.8653985261917114,
      "learning_rate": 8.819599109131403e-05,
      "loss": 1.9367,
      "step": 2520
    },
    {
      "epoch": 0.5602222222222222,
      "grad_norm": 0.9979233145713806,
      "learning_rate": 8.815144766146994e-05,
      "loss": 1.9982,
      "step": 2521
    },
    {
      "epoch": 0.5604444444444444,
      "grad_norm": 0.9345369338989258,
      "learning_rate": 8.810690423162584e-05,
      "loss": 1.6632,
      "step": 2522
    },
    {
      "epoch": 0.5606666666666666,
      "grad_norm": 0.9781597256660461,
      "learning_rate": 8.806236080178175e-05,
      "loss": 1.945,
      "step": 2523
    },
    {
      "epoch": 0.5608888888888889,
      "grad_norm": 0.6337805390357971,
      "learning_rate": 8.801781737193764e-05,
      "loss": 1.0621,
      "step": 2524
    },
    {
      "epoch": 0.5611111111111111,
      "grad_norm": 0.07470656931400299,
      "learning_rate": 8.797327394209354e-05,
      "loss": 0.0167,
      "step": 2525
    },
    {
      "epoch": 0.5613333333333334,
      "grad_norm": 0.8214187622070312,
      "learning_rate": 8.792873051224945e-05,
      "loss": 1.1169,
      "step": 2526
    },
    {
      "epoch": 0.5615555555555556,
      "grad_norm": 0.10050620138645172,
      "learning_rate": 8.788418708240534e-05,
      "loss": 0.0203,
      "step": 2527
    },
    {
      "epoch": 0.5617777777777778,
      "grad_norm": 0.09838547557592392,
      "learning_rate": 8.783964365256126e-05,
      "loss": 0.0198,
      "step": 2528
    },
    {
      "epoch": 0.562,
      "grad_norm": 0.9661954641342163,
      "learning_rate": 8.779510022271715e-05,
      "loss": 1.6663,
      "step": 2529
    },
    {
      "epoch": 0.5622222222222222,
      "grad_norm": 0.9664443731307983,
      "learning_rate": 8.775055679287306e-05,
      "loss": 1.6305,
      "step": 2530
    },
    {
      "epoch": 0.5624444444444444,
      "grad_norm": 1.0631190538406372,
      "learning_rate": 8.770601336302896e-05,
      "loss": 2.0862,
      "step": 2531
    },
    {
      "epoch": 0.5626666666666666,
      "grad_norm": 0.9939605593681335,
      "learning_rate": 8.766146993318485e-05,
      "loss": 1.8518,
      "step": 2532
    },
    {
      "epoch": 0.5628888888888889,
      "grad_norm": 0.06220734864473343,
      "learning_rate": 8.761692650334076e-05,
      "loss": 0.0191,
      "step": 2533
    },
    {
      "epoch": 0.5631111111111111,
      "grad_norm": 0.07106296718120575,
      "learning_rate": 8.757238307349666e-05,
      "loss": 0.0181,
      "step": 2534
    },
    {
      "epoch": 0.5633333333333334,
      "grad_norm": 1.0672193765640259,
      "learning_rate": 8.752783964365257e-05,
      "loss": 1.9556,
      "step": 2535
    },
    {
      "epoch": 0.5635555555555556,
      "grad_norm": 1.0547032356262207,
      "learning_rate": 8.748329621380846e-05,
      "loss": 1.848,
      "step": 2536
    },
    {
      "epoch": 0.5637777777777778,
      "grad_norm": 0.08132950961589813,
      "learning_rate": 8.743875278396437e-05,
      "loss": 0.02,
      "step": 2537
    },
    {
      "epoch": 0.564,
      "grad_norm": 0.08906566351652145,
      "learning_rate": 8.739420935412027e-05,
      "loss": 0.0208,
      "step": 2538
    },
    {
      "epoch": 0.5642222222222222,
      "grad_norm": 0.0810704380273819,
      "learning_rate": 8.734966592427616e-05,
      "loss": 0.0199,
      "step": 2539
    },
    {
      "epoch": 0.5644444444444444,
      "grad_norm": 0.6286865472793579,
      "learning_rate": 8.730512249443208e-05,
      "loss": 0.789,
      "step": 2540
    },
    {
      "epoch": 0.5646666666666667,
      "grad_norm": 1.0283888578414917,
      "learning_rate": 8.726057906458798e-05,
      "loss": 1.6464,
      "step": 2541
    },
    {
      "epoch": 0.5648888888888889,
      "grad_norm": 0.7151986956596375,
      "learning_rate": 8.721603563474388e-05,
      "loss": 0.6798,
      "step": 2542
    },
    {
      "epoch": 0.5651111111111111,
      "grad_norm": 1.166221261024475,
      "learning_rate": 8.717149220489979e-05,
      "loss": 1.8092,
      "step": 2543
    },
    {
      "epoch": 0.5653333333333334,
      "grad_norm": 0.9559175968170166,
      "learning_rate": 8.712694877505568e-05,
      "loss": 1.5216,
      "step": 2544
    },
    {
      "epoch": 0.5655555555555556,
      "grad_norm": 1.0090394020080566,
      "learning_rate": 8.708240534521158e-05,
      "loss": 0.7943,
      "step": 2545
    },
    {
      "epoch": 0.5657777777777778,
      "grad_norm": 0.18629422783851624,
      "learning_rate": 8.703786191536749e-05,
      "loss": 0.0287,
      "step": 2546
    },
    {
      "epoch": 0.566,
      "grad_norm": 0.7050996422767639,
      "learning_rate": 8.69933184855234e-05,
      "loss": 0.6541,
      "step": 2547
    },
    {
      "epoch": 0.5662222222222222,
      "grad_norm": 1.126369833946228,
      "learning_rate": 8.694877505567929e-05,
      "loss": 1.3952,
      "step": 2548
    },
    {
      "epoch": 0.5664444444444444,
      "grad_norm": 0.15956442058086395,
      "learning_rate": 8.690423162583519e-05,
      "loss": 0.0382,
      "step": 2549
    },
    {
      "epoch": 0.5666666666666667,
      "grad_norm": 1.2113162279129028,
      "learning_rate": 8.68596881959911e-05,
      "loss": 1.149,
      "step": 2550
    },
    {
      "epoch": 0.5668888888888889,
      "grad_norm": 0.5767436027526855,
      "learning_rate": 8.681514476614699e-05,
      "loss": 0.8876,
      "step": 2551
    },
    {
      "epoch": 0.5671111111111111,
      "grad_norm": 0.06607482582330704,
      "learning_rate": 8.677060133630291e-05,
      "loss": 0.0109,
      "step": 2552
    },
    {
      "epoch": 0.5673333333333334,
      "grad_norm": 0.5429921746253967,
      "learning_rate": 8.67260579064588e-05,
      "loss": 1.1846,
      "step": 2553
    },
    {
      "epoch": 0.5675555555555556,
      "grad_norm": 0.04655059799551964,
      "learning_rate": 8.66815144766147e-05,
      "loss": 0.0108,
      "step": 2554
    },
    {
      "epoch": 0.5677777777777778,
      "grad_norm": 0.8319535255432129,
      "learning_rate": 8.663697104677061e-05,
      "loss": 2.2138,
      "step": 2555
    },
    {
      "epoch": 0.568,
      "grad_norm": 0.5527912378311157,
      "learning_rate": 8.65924276169265e-05,
      "loss": 1.0141,
      "step": 2556
    },
    {
      "epoch": 0.5682222222222222,
      "grad_norm": 0.07282774150371552,
      "learning_rate": 8.65478841870824e-05,
      "loss": 0.0119,
      "step": 2557
    },
    {
      "epoch": 0.5684444444444444,
      "grad_norm": 0.08080356568098068,
      "learning_rate": 8.650334075723831e-05,
      "loss": 0.0117,
      "step": 2558
    },
    {
      "epoch": 0.5686666666666667,
      "grad_norm": 0.07515871524810791,
      "learning_rate": 8.645879732739422e-05,
      "loss": 0.0117,
      "step": 2559
    },
    {
      "epoch": 0.5688888888888889,
      "grad_norm": 0.07429923862218857,
      "learning_rate": 8.641425389755011e-05,
      "loss": 0.011,
      "step": 2560
    },
    {
      "epoch": 0.5691111111111111,
      "grad_norm": 0.985916793346405,
      "learning_rate": 8.636971046770603e-05,
      "loss": 2.1608,
      "step": 2561
    },
    {
      "epoch": 0.5693333333333334,
      "grad_norm": 1.0047152042388916,
      "learning_rate": 8.632516703786192e-05,
      "loss": 2.3047,
      "step": 2562
    },
    {
      "epoch": 0.5695555555555556,
      "grad_norm": 0.8193495273590088,
      "learning_rate": 8.628062360801783e-05,
      "loss": 1.8905,
      "step": 2563
    },
    {
      "epoch": 0.5697777777777778,
      "grad_norm": 0.8767317533493042,
      "learning_rate": 8.623608017817373e-05,
      "loss": 1.795,
      "step": 2564
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9993298053741455,
      "learning_rate": 8.619153674832962e-05,
      "loss": 2.022,
      "step": 2565
    },
    {
      "epoch": 0.5702222222222222,
      "grad_norm": 0.9578080773353577,
      "learning_rate": 8.614699331848553e-05,
      "loss": 1.7333,
      "step": 2566
    },
    {
      "epoch": 0.5704444444444444,
      "grad_norm": 0.8383786082267761,
      "learning_rate": 8.610244988864143e-05,
      "loss": 1.9282,
      "step": 2567
    },
    {
      "epoch": 0.5706666666666667,
      "grad_norm": 0.900726854801178,
      "learning_rate": 8.605790645879734e-05,
      "loss": 1.835,
      "step": 2568
    },
    {
      "epoch": 0.5708888888888889,
      "grad_norm": 0.8840144872665405,
      "learning_rate": 8.601336302895323e-05,
      "loss": 1.9359,
      "step": 2569
    },
    {
      "epoch": 0.5711111111111111,
      "grad_norm": 1.0611618757247925,
      "learning_rate": 8.596881959910914e-05,
      "loss": 1.7402,
      "step": 2570
    },
    {
      "epoch": 0.5713333333333334,
      "grad_norm": 0.960759162902832,
      "learning_rate": 8.592427616926504e-05,
      "loss": 1.8016,
      "step": 2571
    },
    {
      "epoch": 0.5715555555555556,
      "grad_norm": 0.962713897228241,
      "learning_rate": 8.587973273942093e-05,
      "loss": 1.8722,
      "step": 2572
    },
    {
      "epoch": 0.5717777777777778,
      "grad_norm": 0.6798233985900879,
      "learning_rate": 8.583518930957685e-05,
      "loss": 0.8461,
      "step": 2573
    },
    {
      "epoch": 0.572,
      "grad_norm": 0.7111622095108032,
      "learning_rate": 8.579064587973274e-05,
      "loss": 1.0258,
      "step": 2574
    },
    {
      "epoch": 0.5722222222222222,
      "grad_norm": 0.7947016358375549,
      "learning_rate": 8.574610244988865e-05,
      "loss": 1.0723,
      "step": 2575
    },
    {
      "epoch": 0.5724444444444444,
      "grad_norm": 0.645455002784729,
      "learning_rate": 8.570155902004455e-05,
      "loss": 0.9378,
      "step": 2576
    },
    {
      "epoch": 0.5726666666666667,
      "grad_norm": 0.9577328562736511,
      "learning_rate": 8.565701559020045e-05,
      "loss": 1.7592,
      "step": 2577
    },
    {
      "epoch": 0.5728888888888889,
      "grad_norm": 0.9869621396064758,
      "learning_rate": 8.561247216035635e-05,
      "loss": 1.8991,
      "step": 2578
    },
    {
      "epoch": 0.5731111111111111,
      "grad_norm": 0.9232078194618225,
      "learning_rate": 8.556792873051226e-05,
      "loss": 1.5691,
      "step": 2579
    },
    {
      "epoch": 0.5733333333333334,
      "grad_norm": 1.0303270816802979,
      "learning_rate": 8.552338530066816e-05,
      "loss": 1.6218,
      "step": 2580
    },
    {
      "epoch": 0.5735555555555556,
      "grad_norm": 0.9738333225250244,
      "learning_rate": 8.547884187082405e-05,
      "loss": 1.9149,
      "step": 2581
    },
    {
      "epoch": 0.5737777777777778,
      "grad_norm": 1.0307059288024902,
      "learning_rate": 8.543429844097996e-05,
      "loss": 1.8569,
      "step": 2582
    },
    {
      "epoch": 0.574,
      "grad_norm": 1.0071452856063843,
      "learning_rate": 8.538975501113586e-05,
      "loss": 1.5601,
      "step": 2583
    },
    {
      "epoch": 0.5742222222222222,
      "grad_norm": 0.06155632063746452,
      "learning_rate": 8.534521158129176e-05,
      "loss": 0.0178,
      "step": 2584
    },
    {
      "epoch": 0.5744444444444444,
      "grad_norm": 0.061177369207143784,
      "learning_rate": 8.530066815144766e-05,
      "loss": 0.0178,
      "step": 2585
    },
    {
      "epoch": 0.5746666666666667,
      "grad_norm": 0.07538451999425888,
      "learning_rate": 8.525612472160357e-05,
      "loss": 0.0177,
      "step": 2586
    },
    {
      "epoch": 0.5748888888888889,
      "grad_norm": 0.8191643357276917,
      "learning_rate": 8.521158129175947e-05,
      "loss": 0.99,
      "step": 2587
    },
    {
      "epoch": 0.5751111111111111,
      "grad_norm": 0.8007138967514038,
      "learning_rate": 8.516703786191536e-05,
      "loss": 0.9497,
      "step": 2588
    },
    {
      "epoch": 0.5753333333333334,
      "grad_norm": 0.0795382633805275,
      "learning_rate": 8.512249443207127e-05,
      "loss": 0.0185,
      "step": 2589
    },
    {
      "epoch": 0.5755555555555556,
      "grad_norm": 0.8413631319999695,
      "learning_rate": 8.507795100222718e-05,
      "loss": 0.901,
      "step": 2590
    },
    {
      "epoch": 0.5757777777777778,
      "grad_norm": 1.0749456882476807,
      "learning_rate": 8.503340757238307e-05,
      "loss": 1.6439,
      "step": 2591
    },
    {
      "epoch": 0.576,
      "grad_norm": 1.0282601118087769,
      "learning_rate": 8.498886414253899e-05,
      "loss": 1.7618,
      "step": 2592
    },
    {
      "epoch": 0.5762222222222222,
      "grad_norm": 1.302951455116272,
      "learning_rate": 8.494432071269488e-05,
      "loss": 1.6646,
      "step": 2593
    },
    {
      "epoch": 0.5764444444444444,
      "grad_norm": 1.0702754259109497,
      "learning_rate": 8.489977728285078e-05,
      "loss": 1.5267,
      "step": 2594
    },
    {
      "epoch": 0.5766666666666667,
      "grad_norm": 1.0590934753417969,
      "learning_rate": 8.485523385300669e-05,
      "loss": 1.7966,
      "step": 2595
    },
    {
      "epoch": 0.5768888888888889,
      "grad_norm": 1.1090075969696045,
      "learning_rate": 8.481069042316258e-05,
      "loss": 1.6027,
      "step": 2596
    },
    {
      "epoch": 0.5771111111111111,
      "grad_norm": 1.0323162078857422,
      "learning_rate": 8.476614699331849e-05,
      "loss": 1.8052,
      "step": 2597
    },
    {
      "epoch": 0.5773333333333334,
      "grad_norm": 1.0176801681518555,
      "learning_rate": 8.472160356347439e-05,
      "loss": 1.3763,
      "step": 2598
    },
    {
      "epoch": 0.5775555555555556,
      "grad_norm": 0.6453489065170288,
      "learning_rate": 8.46770601336303e-05,
      "loss": 0.673,
      "step": 2599
    },
    {
      "epoch": 0.5777777777777777,
      "grad_norm": 0.6275585293769836,
      "learning_rate": 8.463251670378619e-05,
      "loss": 0.3759,
      "step": 2600
    },
    {
      "epoch": 0.578,
      "grad_norm": 0.5658282041549683,
      "learning_rate": 8.458797327394211e-05,
      "loss": 1.0322,
      "step": 2601
    },
    {
      "epoch": 0.5782222222222222,
      "grad_norm": 0.0510311983525753,
      "learning_rate": 8.4543429844098e-05,
      "loss": 0.0103,
      "step": 2602
    },
    {
      "epoch": 0.5784444444444444,
      "grad_norm": 0.7874890565872192,
      "learning_rate": 8.449888641425389e-05,
      "loss": 0.8342,
      "step": 2603
    },
    {
      "epoch": 0.5786666666666667,
      "grad_norm": 0.5974457859992981,
      "learning_rate": 8.445434298440981e-05,
      "loss": 1.0163,
      "step": 2604
    },
    {
      "epoch": 0.5788888888888889,
      "grad_norm": 0.901496171951294,
      "learning_rate": 8.44097995545657e-05,
      "loss": 2.1625,
      "step": 2605
    },
    {
      "epoch": 0.5791111111111111,
      "grad_norm": 0.6653454899787903,
      "learning_rate": 8.43652561247216e-05,
      "loss": 1.1271,
      "step": 2606
    },
    {
      "epoch": 0.5793333333333334,
      "grad_norm": 0.0663490742444992,
      "learning_rate": 8.432071269487751e-05,
      "loss": 0.0111,
      "step": 2607
    },
    {
      "epoch": 0.5795555555555556,
      "grad_norm": 0.062182825058698654,
      "learning_rate": 8.427616926503342e-05,
      "loss": 0.011,
      "step": 2608
    },
    {
      "epoch": 0.5797777777777777,
      "grad_norm": 0.6534640789031982,
      "learning_rate": 8.423162583518931e-05,
      "loss": 1.1674,
      "step": 2609
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.872759222984314,
      "learning_rate": 8.418708240534521e-05,
      "loss": 1.9497,
      "step": 2610
    },
    {
      "epoch": 0.5802222222222222,
      "grad_norm": 0.9230912923812866,
      "learning_rate": 8.414253897550112e-05,
      "loss": 1.8053,
      "step": 2611
    },
    {
      "epoch": 0.5804444444444444,
      "grad_norm": 0.9444859623908997,
      "learning_rate": 8.409799554565701e-05,
      "loss": 2.0272,
      "step": 2612
    },
    {
      "epoch": 0.5806666666666667,
      "grad_norm": 0.8272079825401306,
      "learning_rate": 8.405345211581293e-05,
      "loss": 1.9847,
      "step": 2613
    },
    {
      "epoch": 0.5808888888888889,
      "grad_norm": 0.9048417806625366,
      "learning_rate": 8.400890868596882e-05,
      "loss": 1.1981,
      "step": 2614
    },
    {
      "epoch": 0.5811111111111111,
      "grad_norm": 0.7303177118301392,
      "learning_rate": 8.396436525612473e-05,
      "loss": 0.8572,
      "step": 2615
    },
    {
      "epoch": 0.5813333333333334,
      "grad_norm": 0.6562807559967041,
      "learning_rate": 8.391982182628063e-05,
      "loss": 1.179,
      "step": 2616
    },
    {
      "epoch": 0.5815555555555556,
      "grad_norm": 1.0120536088943481,
      "learning_rate": 8.387527839643652e-05,
      "loss": 1.8926,
      "step": 2617
    },
    {
      "epoch": 0.5817777777777777,
      "grad_norm": 0.9938877820968628,
      "learning_rate": 8.383073496659243e-05,
      "loss": 2.0306,
      "step": 2618
    },
    {
      "epoch": 0.582,
      "grad_norm": 0.9313610196113586,
      "learning_rate": 8.378619153674834e-05,
      "loss": 2.047,
      "step": 2619
    },
    {
      "epoch": 0.5822222222222222,
      "grad_norm": 0.06936561316251755,
      "learning_rate": 8.374164810690424e-05,
      "loss": 0.0158,
      "step": 2620
    },
    {
      "epoch": 0.5824444444444444,
      "grad_norm": 0.06766082346439362,
      "learning_rate": 8.369710467706013e-05,
      "loss": 0.016,
      "step": 2621
    },
    {
      "epoch": 0.5826666666666667,
      "grad_norm": 0.07017278671264648,
      "learning_rate": 8.365256124721604e-05,
      "loss": 0.0156,
      "step": 2622
    },
    {
      "epoch": 0.5828888888888889,
      "grad_norm": 0.7275362014770508,
      "learning_rate": 8.360801781737194e-05,
      "loss": 0.8656,
      "step": 2623
    },
    {
      "epoch": 0.5831111111111111,
      "grad_norm": 1.1176071166992188,
      "learning_rate": 8.356347438752784e-05,
      "loss": 2.0985,
      "step": 2624
    },
    {
      "epoch": 0.5833333333333334,
      "grad_norm": 0.9742321968078613,
      "learning_rate": 8.351893095768375e-05,
      "loss": 1.8387,
      "step": 2625
    },
    {
      "epoch": 0.5835555555555556,
      "grad_norm": 0.940450131893158,
      "learning_rate": 8.347438752783965e-05,
      "loss": 1.6521,
      "step": 2626
    },
    {
      "epoch": 0.5837777777777777,
      "grad_norm": 1.1521259546279907,
      "learning_rate": 8.342984409799555e-05,
      "loss": 1.8395,
      "step": 2627
    },
    {
      "epoch": 0.584,
      "grad_norm": 0.9441390037536621,
      "learning_rate": 8.338530066815146e-05,
      "loss": 1.6441,
      "step": 2628
    },
    {
      "epoch": 0.5842222222222222,
      "grad_norm": 1.1160365343093872,
      "learning_rate": 8.334075723830735e-05,
      "loss": 1.5527,
      "step": 2629
    },
    {
      "epoch": 0.5844444444444444,
      "grad_norm": 0.6974783539772034,
      "learning_rate": 8.329621380846325e-05,
      "loss": 0.9513,
      "step": 2630
    },
    {
      "epoch": 0.5846666666666667,
      "grad_norm": 0.06145935505628586,
      "learning_rate": 8.325167037861916e-05,
      "loss": 0.0166,
      "step": 2631
    },
    {
      "epoch": 0.5848888888888889,
      "grad_norm": 0.06906305998563766,
      "learning_rate": 8.320712694877506e-05,
      "loss": 0.0169,
      "step": 2632
    },
    {
      "epoch": 0.5851111111111111,
      "grad_norm": 0.06415744870901108,
      "learning_rate": 8.316258351893096e-05,
      "loss": 0.0169,
      "step": 2633
    },
    {
      "epoch": 0.5853333333333334,
      "grad_norm": 1.028851866722107,
      "learning_rate": 8.311804008908686e-05,
      "loss": 1.9996,
      "step": 2634
    },
    {
      "epoch": 0.5855555555555556,
      "grad_norm": 0.6387873291969299,
      "learning_rate": 8.307349665924277e-05,
      "loss": 1.0506,
      "step": 2635
    },
    {
      "epoch": 0.5857777777777777,
      "grad_norm": 0.07523876428604126,
      "learning_rate": 8.302895322939866e-05,
      "loss": 0.0182,
      "step": 2636
    },
    {
      "epoch": 0.586,
      "grad_norm": 0.07396306097507477,
      "learning_rate": 8.298440979955458e-05,
      "loss": 0.0178,
      "step": 2637
    },
    {
      "epoch": 0.5862222222222222,
      "grad_norm": 0.6903396248817444,
      "learning_rate": 8.293986636971047e-05,
      "loss": 0.7665,
      "step": 2638
    },
    {
      "epoch": 0.5864444444444444,
      "grad_norm": 1.0969858169555664,
      "learning_rate": 8.289532293986638e-05,
      "loss": 1.7489,
      "step": 2639
    },
    {
      "epoch": 0.5866666666666667,
      "grad_norm": 1.394546389579773,
      "learning_rate": 8.285077951002228e-05,
      "loss": 1.6268,
      "step": 2640
    },
    {
      "epoch": 0.5868888888888889,
      "grad_norm": 1.185672640800476,
      "learning_rate": 8.280623608017817e-05,
      "loss": 1.7259,
      "step": 2641
    },
    {
      "epoch": 0.5871111111111111,
      "grad_norm": 1.1864769458770752,
      "learning_rate": 8.276169265033408e-05,
      "loss": 1.4244,
      "step": 2642
    },
    {
      "epoch": 0.5873333333333334,
      "grad_norm": 1.172638177871704,
      "learning_rate": 8.271714922048998e-05,
      "loss": 1.4388,
      "step": 2643
    },
    {
      "epoch": 0.5875555555555556,
      "grad_norm": 1.2217496633529663,
      "learning_rate": 8.267260579064589e-05,
      "loss": 1.4179,
      "step": 2644
    },
    {
      "epoch": 0.5877777777777777,
      "grad_norm": 1.3695220947265625,
      "learning_rate": 8.262806236080178e-05,
      "loss": 1.7871,
      "step": 2645
    },
    {
      "epoch": 0.588,
      "grad_norm": 1.0224231481552124,
      "learning_rate": 8.25835189309577e-05,
      "loss": 1.24,
      "step": 2646
    },
    {
      "epoch": 0.5882222222222222,
      "grad_norm": 0.9784666299819946,
      "learning_rate": 8.253897550111359e-05,
      "loss": 1.1633,
      "step": 2647
    },
    {
      "epoch": 0.5884444444444444,
      "grad_norm": 0.9947894215583801,
      "learning_rate": 8.24944320712695e-05,
      "loss": 1.1847,
      "step": 2648
    },
    {
      "epoch": 0.5886666666666667,
      "grad_norm": 0.17486204206943512,
      "learning_rate": 8.24498886414254e-05,
      "loss": 0.0404,
      "step": 2649
    },
    {
      "epoch": 0.5888888888888889,
      "grad_norm": 1.0700467824935913,
      "learning_rate": 8.24053452115813e-05,
      "loss": 1.1755,
      "step": 2650
    },
    {
      "epoch": 0.5891111111111111,
      "grad_norm": 0.050411712378263474,
      "learning_rate": 8.23608017817372e-05,
      "loss": 0.0105,
      "step": 2651
    },
    {
      "epoch": 0.5893333333333334,
      "grad_norm": 0.9025883078575134,
      "learning_rate": 8.23162583518931e-05,
      "loss": 2.3288,
      "step": 2652
    },
    {
      "epoch": 0.5895555555555556,
      "grad_norm": 0.6692728996276855,
      "learning_rate": 8.227171492204901e-05,
      "loss": 1.1624,
      "step": 2653
    },
    {
      "epoch": 0.5897777777777777,
      "grad_norm": 0.053395964205265045,
      "learning_rate": 8.22271714922049e-05,
      "loss": 0.0103,
      "step": 2654
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9292510747909546,
      "learning_rate": 8.21826280623608e-05,
      "loss": 2.1717,
      "step": 2655
    },
    {
      "epoch": 0.5902222222222222,
      "grad_norm": 0.08087780326604843,
      "learning_rate": 8.213808463251671e-05,
      "loss": 0.0115,
      "step": 2656
    },
    {
      "epoch": 0.5904444444444444,
      "grad_norm": 0.635247528553009,
      "learning_rate": 8.20935412026726e-05,
      "loss": 1.0443,
      "step": 2657
    },
    {
      "epoch": 0.5906666666666667,
      "grad_norm": 0.9392407536506653,
      "learning_rate": 8.204899777282851e-05,
      "loss": 2.1964,
      "step": 2658
    },
    {
      "epoch": 0.5908888888888889,
      "grad_norm": 0.9710378646850586,
      "learning_rate": 8.200445434298441e-05,
      "loss": 2.2006,
      "step": 2659
    },
    {
      "epoch": 0.5911111111111111,
      "grad_norm": 0.9601635336875916,
      "learning_rate": 8.195991091314032e-05,
      "loss": 2.1372,
      "step": 2660
    },
    {
      "epoch": 0.5913333333333334,
      "grad_norm": 0.9137882590293884,
      "learning_rate": 8.191536748329621e-05,
      "loss": 2.1513,
      "step": 2661
    },
    {
      "epoch": 0.5915555555555555,
      "grad_norm": 1.1451430320739746,
      "learning_rate": 8.187082405345212e-05,
      "loss": 1.9865,
      "step": 2662
    },
    {
      "epoch": 0.5917777777777777,
      "grad_norm": 0.91966313123703,
      "learning_rate": 8.182628062360802e-05,
      "loss": 1.8152,
      "step": 2663
    },
    {
      "epoch": 0.592,
      "grad_norm": 0.9007231593132019,
      "learning_rate": 8.178173719376391e-05,
      "loss": 2.0409,
      "step": 2664
    },
    {
      "epoch": 0.5922222222222222,
      "grad_norm": 0.6561061143875122,
      "learning_rate": 8.173719376391983e-05,
      "loss": 1.1022,
      "step": 2665
    },
    {
      "epoch": 0.5924444444444444,
      "grad_norm": 0.6513422727584839,
      "learning_rate": 8.169265033407572e-05,
      "loss": 0.8374,
      "step": 2666
    },
    {
      "epoch": 0.5926666666666667,
      "grad_norm": 0.8476526737213135,
      "learning_rate": 8.164810690423163e-05,
      "loss": 1.6632,
      "step": 2667
    },
    {
      "epoch": 0.5928888888888889,
      "grad_norm": 1.1757445335388184,
      "learning_rate": 8.160356347438754e-05,
      "loss": 2.2507,
      "step": 2668
    },
    {
      "epoch": 0.5931111111111111,
      "grad_norm": 0.9867371916770935,
      "learning_rate": 8.155902004454343e-05,
      "loss": 1.8475,
      "step": 2669
    },
    {
      "epoch": 0.5933333333333334,
      "grad_norm": 1.1854937076568604,
      "learning_rate": 8.151447661469933e-05,
      "loss": 1.7041,
      "step": 2670
    },
    {
      "epoch": 0.5935555555555555,
      "grad_norm": 0.721083402633667,
      "learning_rate": 8.146993318485524e-05,
      "loss": 0.8732,
      "step": 2671
    },
    {
      "epoch": 0.5937777777777777,
      "grad_norm": 0.6721528768539429,
      "learning_rate": 8.142538975501114e-05,
      "loss": 0.8163,
      "step": 2672
    },
    {
      "epoch": 0.594,
      "grad_norm": 1.2233518362045288,
      "learning_rate": 8.138084632516704e-05,
      "loss": 1.7324,
      "step": 2673
    },
    {
      "epoch": 0.5942222222222222,
      "grad_norm": 1.009818196296692,
      "learning_rate": 8.133630289532294e-05,
      "loss": 1.9385,
      "step": 2674
    },
    {
      "epoch": 0.5944444444444444,
      "grad_norm": 0.9225603342056274,
      "learning_rate": 8.129175946547885e-05,
      "loss": 1.6991,
      "step": 2675
    },
    {
      "epoch": 0.5946666666666667,
      "grad_norm": 0.07418637722730637,
      "learning_rate": 8.124721603563474e-05,
      "loss": 0.0175,
      "step": 2676
    },
    {
      "epoch": 0.5948888888888889,
      "grad_norm": 0.07073847204446793,
      "learning_rate": 8.120267260579066e-05,
      "loss": 0.0174,
      "step": 2677
    },
    {
      "epoch": 0.5951111111111111,
      "grad_norm": 0.06910637021064758,
      "learning_rate": 8.115812917594655e-05,
      "loss": 0.0172,
      "step": 2678
    },
    {
      "epoch": 0.5953333333333334,
      "grad_norm": 1.236820936203003,
      "learning_rate": 8.111358574610245e-05,
      "loss": 1.7179,
      "step": 2679
    },
    {
      "epoch": 0.5955555555555555,
      "grad_norm": 1.0825942754745483,
      "learning_rate": 8.106904231625836e-05,
      "loss": 1.547,
      "step": 2680
    },
    {
      "epoch": 0.5957777777777777,
      "grad_norm": 0.08058004081249237,
      "learning_rate": 8.102449888641425e-05,
      "loss": 0.0191,
      "step": 2681
    },
    {
      "epoch": 0.596,
      "grad_norm": 0.08336427807807922,
      "learning_rate": 8.097995545657016e-05,
      "loss": 0.0191,
      "step": 2682
    },
    {
      "epoch": 0.5962222222222222,
      "grad_norm": 0.08025740832090378,
      "learning_rate": 8.093541202672606e-05,
      "loss": 0.019,
      "step": 2683
    },
    {
      "epoch": 0.5964444444444444,
      "grad_norm": 0.07246199250221252,
      "learning_rate": 8.089086859688197e-05,
      "loss": 0.0186,
      "step": 2684
    },
    {
      "epoch": 0.5966666666666667,
      "grad_norm": 0.07082468271255493,
      "learning_rate": 8.084632516703786e-05,
      "loss": 0.018,
      "step": 2685
    },
    {
      "epoch": 0.5968888888888889,
      "grad_norm": 0.08357842266559601,
      "learning_rate": 8.080178173719378e-05,
      "loss": 0.0182,
      "step": 2686
    },
    {
      "epoch": 0.5971111111111111,
      "grad_norm": 0.07589254528284073,
      "learning_rate": 8.075723830734967e-05,
      "loss": 0.0171,
      "step": 2687
    },
    {
      "epoch": 0.5973333333333334,
      "grad_norm": 1.0739028453826904,
      "learning_rate": 8.071269487750556e-05,
      "loss": 1.6881,
      "step": 2688
    },
    {
      "epoch": 0.5975555555555555,
      "grad_norm": 1.0699751377105713,
      "learning_rate": 8.066815144766148e-05,
      "loss": 1.4065,
      "step": 2689
    },
    {
      "epoch": 0.5977777777777777,
      "grad_norm": 0.8686650395393372,
      "learning_rate": 8.062360801781737e-05,
      "loss": 0.8745,
      "step": 2690
    },
    {
      "epoch": 0.598,
      "grad_norm": 0.19912366569042206,
      "learning_rate": 8.057906458797328e-05,
      "loss": 0.029,
      "step": 2691
    },
    {
      "epoch": 0.5982222222222222,
      "grad_norm": 1.0016716718673706,
      "learning_rate": 8.053452115812918e-05,
      "loss": 1.6162,
      "step": 2692
    },
    {
      "epoch": 0.5984444444444444,
      "grad_norm": 1.1502323150634766,
      "learning_rate": 8.048997772828509e-05,
      "loss": 1.5778,
      "step": 2693
    },
    {
      "epoch": 0.5986666666666667,
      "grad_norm": 1.196444034576416,
      "learning_rate": 8.044543429844098e-05,
      "loss": 1.2702,
      "step": 2694
    },
    {
      "epoch": 0.5988888888888889,
      "grad_norm": 1.1185574531555176,
      "learning_rate": 8.040089086859689e-05,
      "loss": 1.4555,
      "step": 2695
    },
    {
      "epoch": 0.5991111111111111,
      "grad_norm": 0.9055349230766296,
      "learning_rate": 8.035634743875279e-05,
      "loss": 1.1932,
      "step": 2696
    },
    {
      "epoch": 0.5993333333333334,
      "grad_norm": 1.1257692575454712,
      "learning_rate": 8.031180400890868e-05,
      "loss": 1.1586,
      "step": 2697
    },
    {
      "epoch": 0.5995555555555555,
      "grad_norm": 1.1968945264816284,
      "learning_rate": 8.02672605790646e-05,
      "loss": 1.194,
      "step": 2698
    },
    {
      "epoch": 0.5997777777777777,
      "grad_norm": 0.8461349606513977,
      "learning_rate": 8.02227171492205e-05,
      "loss": 0.6877,
      "step": 2699
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3101285696029663,
      "learning_rate": 8.01781737193764e-05,
      "loss": 1.1574,
      "step": 2700
    },
    {
      "epoch": 0.6,
      "eval_loss": 1.185476541519165,
      "eval_runtime": 240.0565,
      "eval_samples_per_second": 4.166,
      "eval_steps_per_second": 4.166,
      "step": 2700
    },
    {
      "epoch": 0.6002222222222222,
      "grad_norm": 0.6387638449668884,
      "learning_rate": 8.01336302895323e-05,
      "loss": 1.1591,
      "step": 2701
    },
    {
      "epoch": 0.6004444444444444,
      "grad_norm": 0.05776821821928024,
      "learning_rate": 8.00890868596882e-05,
      "loss": 0.0107,
      "step": 2702
    },
    {
      "epoch": 0.6006666666666667,
      "grad_norm": 0.05349646508693695,
      "learning_rate": 8.00445434298441e-05,
      "loss": 0.0104,
      "step": 2703
    },
    {
      "epoch": 0.6008888888888889,
      "grad_norm": 0.0507376454770565,
      "learning_rate": 8e-05,
      "loss": 0.0103,
      "step": 2704
    },
    {
      "epoch": 0.6011111111111112,
      "grad_norm": 0.7964654564857483,
      "learning_rate": 7.995545657015591e-05,
      "loss": 2.1793,
      "step": 2705
    },
    {
      "epoch": 0.6013333333333334,
      "grad_norm": 0.9528084397315979,
      "learning_rate": 7.99109131403118e-05,
      "loss": 2.3925,
      "step": 2706
    },
    {
      "epoch": 0.6015555555555555,
      "grad_norm": 0.8756290078163147,
      "learning_rate": 7.986636971046771e-05,
      "loss": 2.1808,
      "step": 2707
    },
    {
      "epoch": 0.6017777777777777,
      "grad_norm": 0.0803305059671402,
      "learning_rate": 7.982182628062361e-05,
      "loss": 0.0128,
      "step": 2708
    },
    {
      "epoch": 0.602,
      "grad_norm": 0.08680860698223114,
      "learning_rate": 7.97772828507795e-05,
      "loss": 0.0128,
      "step": 2709
    },
    {
      "epoch": 0.6022222222222222,
      "grad_norm": 0.08112699538469315,
      "learning_rate": 7.973273942093543e-05,
      "loss": 0.0125,
      "step": 2710
    },
    {
      "epoch": 0.6024444444444444,
      "grad_norm": 0.5621529221534729,
      "learning_rate": 7.968819599109132e-05,
      "loss": 0.8918,
      "step": 2711
    },
    {
      "epoch": 0.6026666666666667,
      "grad_norm": 0.8549271821975708,
      "learning_rate": 7.964365256124722e-05,
      "loss": 1.9602,
      "step": 2712
    },
    {
      "epoch": 0.6028888888888889,
      "grad_norm": 0.8815329670906067,
      "learning_rate": 7.959910913140313e-05,
      "loss": 2.0818,
      "step": 2713
    },
    {
      "epoch": 0.6031111111111112,
      "grad_norm": 0.9298145174980164,
      "learning_rate": 7.955456570155902e-05,
      "loss": 2.07,
      "step": 2714
    },
    {
      "epoch": 0.6033333333333334,
      "grad_norm": 0.9375580549240112,
      "learning_rate": 7.951002227171492e-05,
      "loss": 2.0471,
      "step": 2715
    },
    {
      "epoch": 0.6035555555555555,
      "grad_norm": 0.8987560868263245,
      "learning_rate": 7.946547884187083e-05,
      "loss": 2.002,
      "step": 2716
    },
    {
      "epoch": 0.6037777777777777,
      "grad_norm": 1.3601025342941284,
      "learning_rate": 7.942093541202674e-05,
      "loss": 0.0422,
      "step": 2717
    },
    {
      "epoch": 0.604,
      "grad_norm": 0.9038719534873962,
      "learning_rate": 7.937639198218263e-05,
      "loss": 1.99,
      "step": 2718
    },
    {
      "epoch": 0.6042222222222222,
      "grad_norm": 0.9744821786880493,
      "learning_rate": 7.933184855233853e-05,
      "loss": 1.9441,
      "step": 2719
    },
    {
      "epoch": 0.6044444444444445,
      "grad_norm": 1.00676691532135,
      "learning_rate": 7.928730512249444e-05,
      "loss": 1.658,
      "step": 2720
    },
    {
      "epoch": 0.6046666666666667,
      "grad_norm": 1.040614128112793,
      "learning_rate": 7.924276169265033e-05,
      "loss": 1.8342,
      "step": 2721
    },
    {
      "epoch": 0.6048888888888889,
      "grad_norm": 1.0080763101577759,
      "learning_rate": 7.919821826280625e-05,
      "loss": 1.9057,
      "step": 2722
    },
    {
      "epoch": 0.6051111111111112,
      "grad_norm": 0.6371939182281494,
      "learning_rate": 7.915367483296214e-05,
      "loss": 0.7754,
      "step": 2723
    },
    {
      "epoch": 0.6053333333333333,
      "grad_norm": 0.08159324526786804,
      "learning_rate": 7.910913140311805e-05,
      "loss": 0.0167,
      "step": 2724
    },
    {
      "epoch": 0.6055555555555555,
      "grad_norm": 0.0773693099617958,
      "learning_rate": 7.906458797327395e-05,
      "loss": 0.0165,
      "step": 2725
    },
    {
      "epoch": 0.6057777777777777,
      "grad_norm": 0.6714785695075989,
      "learning_rate": 7.902004454342984e-05,
      "loss": 0.824,
      "step": 2726
    },
    {
      "epoch": 0.606,
      "grad_norm": 0.6955849528312683,
      "learning_rate": 7.897550111358575e-05,
      "loss": 0.9747,
      "step": 2727
    },
    {
      "epoch": 0.6062222222222222,
      "grad_norm": 0.12702590227127075,
      "learning_rate": 7.893095768374164e-05,
      "loss": 0.02,
      "step": 2728
    },
    {
      "epoch": 0.6064444444444445,
      "grad_norm": 0.9727218151092529,
      "learning_rate": 7.888641425389756e-05,
      "loss": 1.8577,
      "step": 2729
    },
    {
      "epoch": 0.6066666666666667,
      "grad_norm": 1.186213493347168,
      "learning_rate": 7.884187082405345e-05,
      "loss": 2.0189,
      "step": 2730
    },
    {
      "epoch": 0.6068888888888889,
      "grad_norm": 0.9478958249092102,
      "learning_rate": 7.879732739420936e-05,
      "loss": 1.7233,
      "step": 2731
    },
    {
      "epoch": 0.6071111111111112,
      "grad_norm": 0.9013267159461975,
      "learning_rate": 7.875278396436526e-05,
      "loss": 1.6254,
      "step": 2732
    },
    {
      "epoch": 0.6073333333333333,
      "grad_norm": 1.2980573177337646,
      "learning_rate": 7.870824053452117e-05,
      "loss": 2.0018,
      "step": 2733
    },
    {
      "epoch": 0.6075555555555555,
      "grad_norm": 0.6931120753288269,
      "learning_rate": 7.866369710467706e-05,
      "loss": 0.7662,
      "step": 2734
    },
    {
      "epoch": 0.6077777777777778,
      "grad_norm": 0.0702509731054306,
      "learning_rate": 7.861915367483296e-05,
      "loss": 0.0174,
      "step": 2735
    },
    {
      "epoch": 0.608,
      "grad_norm": 0.9084820747375488,
      "learning_rate": 7.857461024498887e-05,
      "loss": 0.7707,
      "step": 2736
    },
    {
      "epoch": 0.6082222222222222,
      "grad_norm": 0.06436257809400558,
      "learning_rate": 7.853006681514476e-05,
      "loss": 0.0169,
      "step": 2737
    },
    {
      "epoch": 0.6084444444444445,
      "grad_norm": 1.086901068687439,
      "learning_rate": 7.848552338530068e-05,
      "loss": 1.8021,
      "step": 2738
    },
    {
      "epoch": 0.6086666666666667,
      "grad_norm": 0.799363911151886,
      "learning_rate": 7.844097995545657e-05,
      "loss": 0.9215,
      "step": 2739
    },
    {
      "epoch": 0.6088888888888889,
      "grad_norm": 0.101883664727211,
      "learning_rate": 7.839643652561248e-05,
      "loss": 0.0245,
      "step": 2740
    },
    {
      "epoch": 0.6091111111111112,
      "grad_norm": 1.1138994693756104,
      "learning_rate": 7.835189309576838e-05,
      "loss": 1.3771,
      "step": 2741
    },
    {
      "epoch": 0.6093333333333333,
      "grad_norm": 1.1364567279815674,
      "learning_rate": 7.830734966592427e-05,
      "loss": 1.3817,
      "step": 2742
    },
    {
      "epoch": 0.6095555555555555,
      "grad_norm": 1.1259334087371826,
      "learning_rate": 7.826280623608018e-05,
      "loss": 1.5021,
      "step": 2743
    },
    {
      "epoch": 0.6097777777777778,
      "grad_norm": 1.2044109106063843,
      "learning_rate": 7.821826280623609e-05,
      "loss": 1.5266,
      "step": 2744
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1969807147979736,
      "learning_rate": 7.817371937639199e-05,
      "loss": 0.864,
      "step": 2745
    },
    {
      "epoch": 0.6102222222222222,
      "grad_norm": 1.0737295150756836,
      "learning_rate": 7.812917594654788e-05,
      "loss": 1.3506,
      "step": 2746
    },
    {
      "epoch": 0.6104444444444445,
      "grad_norm": 1.2049528360366821,
      "learning_rate": 7.808463251670379e-05,
      "loss": 1.2664,
      "step": 2747
    },
    {
      "epoch": 0.6106666666666667,
      "grad_norm": 1.0982084274291992,
      "learning_rate": 7.80400890868597e-05,
      "loss": 1.2182,
      "step": 2748
    },
    {
      "epoch": 0.6108888888888889,
      "grad_norm": 1.0116227865219116,
      "learning_rate": 7.799554565701559e-05,
      "loss": 0.919,
      "step": 2749
    },
    {
      "epoch": 0.6111111111111112,
      "grad_norm": 1.211203932762146,
      "learning_rate": 7.79510022271715e-05,
      "loss": 1.043,
      "step": 2750
    },
    {
      "epoch": 0.6113333333333333,
      "grad_norm": 0.04495595768094063,
      "learning_rate": 7.79064587973274e-05,
      "loss": 0.0098,
      "step": 2751
    },
    {
      "epoch": 0.6115555555555555,
      "grad_norm": 0.885490357875824,
      "learning_rate": 7.78619153674833e-05,
      "loss": 2.1379,
      "step": 2752
    },
    {
      "epoch": 0.6117777777777778,
      "grad_norm": 0.5612982511520386,
      "learning_rate": 7.78173719376392e-05,
      "loss": 1.091,
      "step": 2753
    },
    {
      "epoch": 0.612,
      "grad_norm": 0.8652318120002747,
      "learning_rate": 7.77728285077951e-05,
      "loss": 2.1604,
      "step": 2754
    },
    {
      "epoch": 0.6122222222222222,
      "grad_norm": 0.5881816148757935,
      "learning_rate": 7.7728285077951e-05,
      "loss": 0.8575,
      "step": 2755
    },
    {
      "epoch": 0.6124444444444445,
      "grad_norm": 0.06693144142627716,
      "learning_rate": 7.768374164810691e-05,
      "loss": 0.0108,
      "step": 2756
    },
    {
      "epoch": 0.6126666666666667,
      "grad_norm": 0.0678841769695282,
      "learning_rate": 7.763919821826281e-05,
      "loss": 0.0108,
      "step": 2757
    },
    {
      "epoch": 0.6128888888888889,
      "grad_norm": 0.06649115681648254,
      "learning_rate": 7.75946547884187e-05,
      "loss": 0.0105,
      "step": 2758
    },
    {
      "epoch": 0.6131111111111112,
      "grad_norm": 0.7412600517272949,
      "learning_rate": 7.755011135857461e-05,
      "loss": 1.1085,
      "step": 2759
    },
    {
      "epoch": 0.6133333333333333,
      "grad_norm": 0.8898929357528687,
      "learning_rate": 7.750556792873052e-05,
      "loss": 2.0215,
      "step": 2760
    },
    {
      "epoch": 0.6135555555555555,
      "grad_norm": 0.8269761204719543,
      "learning_rate": 7.746102449888641e-05,
      "loss": 1.7194,
      "step": 2761
    },
    {
      "epoch": 0.6137777777777778,
      "grad_norm": 0.9091681241989136,
      "learning_rate": 7.741648106904233e-05,
      "loss": 2.13,
      "step": 2762
    },
    {
      "epoch": 0.614,
      "grad_norm": 0.9192904233932495,
      "learning_rate": 7.737193763919822e-05,
      "loss": 2.057,
      "step": 2763
    },
    {
      "epoch": 0.6142222222222222,
      "grad_norm": 1.021559476852417,
      "learning_rate": 7.732739420935412e-05,
      "loss": 2.085,
      "step": 2764
    },
    {
      "epoch": 0.6144444444444445,
      "grad_norm": 0.8879882097244263,
      "learning_rate": 7.728285077951003e-05,
      "loss": 1.7405,
      "step": 2765
    },
    {
      "epoch": 0.6146666666666667,
      "grad_norm": 0.9193564057350159,
      "learning_rate": 7.723830734966592e-05,
      "loss": 1.7514,
      "step": 2766
    },
    {
      "epoch": 0.6148888888888889,
      "grad_norm": 0.8495803475379944,
      "learning_rate": 7.719376391982183e-05,
      "loss": 1.1789,
      "step": 2767
    },
    {
      "epoch": 0.6151111111111112,
      "grad_norm": 0.9236475229263306,
      "learning_rate": 7.714922048997773e-05,
      "loss": 1.9599,
      "step": 2768
    },
    {
      "epoch": 0.6153333333333333,
      "grad_norm": 1.0752800703048706,
      "learning_rate": 7.710467706013364e-05,
      "loss": 1.8044,
      "step": 2769
    },
    {
      "epoch": 0.6155555555555555,
      "grad_norm": 0.9333148002624512,
      "learning_rate": 7.706013363028953e-05,
      "loss": 2.0566,
      "step": 2770
    },
    {
      "epoch": 0.6157777777777778,
      "grad_norm": 0.07040537893772125,
      "learning_rate": 7.701559020044545e-05,
      "loss": 0.016,
      "step": 2771
    },
    {
      "epoch": 0.616,
      "grad_norm": 0.07483159750699997,
      "learning_rate": 7.697104677060134e-05,
      "loss": 0.0166,
      "step": 2772
    },
    {
      "epoch": 0.6162222222222222,
      "grad_norm": 0.07214007526636124,
      "learning_rate": 7.692650334075723e-05,
      "loss": 0.016,
      "step": 2773
    },
    {
      "epoch": 0.6164444444444445,
      "grad_norm": 0.07688527554273605,
      "learning_rate": 7.688195991091315e-05,
      "loss": 0.0162,
      "step": 2774
    },
    {
      "epoch": 0.6166666666666667,
      "grad_norm": 0.6190181374549866,
      "learning_rate": 7.683741648106904e-05,
      "loss": 0.8162,
      "step": 2775
    },
    {
      "epoch": 0.6168888888888889,
      "grad_norm": 0.9131618142127991,
      "learning_rate": 7.679287305122495e-05,
      "loss": 0.9931,
      "step": 2776
    },
    {
      "epoch": 0.6171111111111112,
      "grad_norm": 0.9522696137428284,
      "learning_rate": 7.674832962138085e-05,
      "loss": 2.0182,
      "step": 2777
    },
    {
      "epoch": 0.6173333333333333,
      "grad_norm": 0.9142736196517944,
      "learning_rate": 7.670378619153676e-05,
      "loss": 1.4818,
      "step": 2778
    },
    {
      "epoch": 0.6175555555555555,
      "grad_norm": 0.6549271941184998,
      "learning_rate": 7.665924276169265e-05,
      "loss": 0.8369,
      "step": 2779
    },
    {
      "epoch": 0.6177777777777778,
      "grad_norm": 0.5640666484832764,
      "learning_rate": 7.661469933184856e-05,
      "loss": 0.6976,
      "step": 2780
    },
    {
      "epoch": 0.618,
      "grad_norm": 0.9751452803611755,
      "learning_rate": 7.657015590200446e-05,
      "loss": 1.9721,
      "step": 2781
    },
    {
      "epoch": 0.6182222222222222,
      "grad_norm": 1.0083777904510498,
      "learning_rate": 7.652561247216035e-05,
      "loss": 1.8079,
      "step": 2782
    },
    {
      "epoch": 0.6184444444444445,
      "grad_norm": 0.1356029212474823,
      "learning_rate": 7.648106904231627e-05,
      "loss": 0.0189,
      "step": 2783
    },
    {
      "epoch": 0.6186666666666667,
      "grad_norm": 0.09193126112222672,
      "learning_rate": 7.643652561247216e-05,
      "loss": 0.0188,
      "step": 2784
    },
    {
      "epoch": 0.6188888888888889,
      "grad_norm": 0.07181335985660553,
      "learning_rate": 7.639198218262807e-05,
      "loss": 0.0176,
      "step": 2785
    },
    {
      "epoch": 0.6191111111111111,
      "grad_norm": 0.07424760609865189,
      "learning_rate": 7.634743875278397e-05,
      "loss": 0.0175,
      "step": 2786
    },
    {
      "epoch": 0.6193333333333333,
      "grad_norm": 0.07973092794418335,
      "learning_rate": 7.630289532293987e-05,
      "loss": 0.0176,
      "step": 2787
    },
    {
      "epoch": 0.6195555555555555,
      "grad_norm": 0.665320634841919,
      "learning_rate": 7.625835189309577e-05,
      "loss": 0.8284,
      "step": 2788
    },
    {
      "epoch": 0.6197777777777778,
      "grad_norm": 1.122811198234558,
      "learning_rate": 7.621380846325168e-05,
      "loss": 1.87,
      "step": 2789
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7436560988426208,
      "learning_rate": 7.616926503340758e-05,
      "loss": 0.9044,
      "step": 2790
    },
    {
      "epoch": 0.6202222222222222,
      "grad_norm": 1.0274792909622192,
      "learning_rate": 7.612472160356347e-05,
      "loss": 1.7865,
      "step": 2791
    },
    {
      "epoch": 0.6204444444444445,
      "grad_norm": 1.0092792510986328,
      "learning_rate": 7.608017817371938e-05,
      "loss": 1.4015,
      "step": 2792
    },
    {
      "epoch": 0.6206666666666667,
      "grad_norm": 0.9540352821350098,
      "learning_rate": 7.603563474387529e-05,
      "loss": 1.3971,
      "step": 2793
    },
    {
      "epoch": 0.6208888888888889,
      "grad_norm": 1.1279139518737793,
      "learning_rate": 7.599109131403118e-05,
      "loss": 1.6411,
      "step": 2794
    },
    {
      "epoch": 0.6211111111111111,
      "grad_norm": 0.9098860025405884,
      "learning_rate": 7.59465478841871e-05,
      "loss": 0.7837,
      "step": 2795
    },
    {
      "epoch": 0.6213333333333333,
      "grad_norm": 1.027552843093872,
      "learning_rate": 7.590200445434299e-05,
      "loss": 1.2958,
      "step": 2796
    },
    {
      "epoch": 0.6215555555555555,
      "grad_norm": 0.9595284461975098,
      "learning_rate": 7.585746102449889e-05,
      "loss": 0.9841,
      "step": 2797
    },
    {
      "epoch": 0.6217777777777778,
      "grad_norm": 0.8761973977088928,
      "learning_rate": 7.58129175946548e-05,
      "loss": 0.5813,
      "step": 2798
    },
    {
      "epoch": 0.622,
      "grad_norm": 1.033424973487854,
      "learning_rate": 7.576837416481069e-05,
      "loss": 1.3788,
      "step": 2799
    },
    {
      "epoch": 0.6222222222222222,
      "grad_norm": 0.8536433577537537,
      "learning_rate": 7.57238307349666e-05,
      "loss": 0.5428,
      "step": 2800
    },
    {
      "epoch": 0.6224444444444445,
      "grad_norm": 0.5578122138977051,
      "learning_rate": 7.567928730512249e-05,
      "loss": 1.0879,
      "step": 2801
    },
    {
      "epoch": 0.6226666666666667,
      "grad_norm": 0.09419236332178116,
      "learning_rate": 7.56347438752784e-05,
      "loss": 0.0109,
      "step": 2802
    },
    {
      "epoch": 0.6228888888888889,
      "grad_norm": 0.618521511554718,
      "learning_rate": 7.55902004454343e-05,
      "loss": 1.218,
      "step": 2803
    },
    {
      "epoch": 0.6231111111111111,
      "grad_norm": 0.05352301150560379,
      "learning_rate": 7.55456570155902e-05,
      "loss": 0.0103,
      "step": 2804
    },
    {
      "epoch": 0.6233333333333333,
      "grad_norm": 0.6948210000991821,
      "learning_rate": 7.550111358574611e-05,
      "loss": 1.0658,
      "step": 2805
    },
    {
      "epoch": 0.6235555555555555,
      "grad_norm": 0.6293577551841736,
      "learning_rate": 7.5456570155902e-05,
      "loss": 0.9913,
      "step": 2806
    },
    {
      "epoch": 0.6237777777777778,
      "grad_norm": 0.8518357276916504,
      "learning_rate": 7.54120267260579e-05,
      "loss": 2.4278,
      "step": 2807
    },
    {
      "epoch": 0.624,
      "grad_norm": 0.9139655828475952,
      "learning_rate": 7.536748329621381e-05,
      "loss": 2.1537,
      "step": 2808
    },
    {
      "epoch": 0.6242222222222222,
      "grad_norm": 0.08207522332668304,
      "learning_rate": 7.532293986636972e-05,
      "loss": 0.012,
      "step": 2809
    },
    {
      "epoch": 0.6244444444444445,
      "grad_norm": 0.0847187265753746,
      "learning_rate": 7.527839643652561e-05,
      "loss": 0.012,
      "step": 2810
    },
    {
      "epoch": 0.6246666666666667,
      "grad_norm": 0.658014178276062,
      "learning_rate": 7.523385300668151e-05,
      "loss": 0.9869,
      "step": 2811
    },
    {
      "epoch": 0.6248888888888889,
      "grad_norm": 0.8833963871002197,
      "learning_rate": 7.518930957683742e-05,
      "loss": 1.9448,
      "step": 2812
    },
    {
      "epoch": 0.6251111111111111,
      "grad_norm": 0.8396661877632141,
      "learning_rate": 7.514476614699331e-05,
      "loss": 2.1203,
      "step": 2813
    },
    {
      "epoch": 0.6253333333333333,
      "grad_norm": 0.8181087970733643,
      "learning_rate": 7.510022271714923e-05,
      "loss": 1.8757,
      "step": 2814
    },
    {
      "epoch": 0.6255555555555555,
      "grad_norm": 0.8380526900291443,
      "learning_rate": 7.505567928730512e-05,
      "loss": 1.7779,
      "step": 2815
    },
    {
      "epoch": 0.6257777777777778,
      "grad_norm": 1.1210718154907227,
      "learning_rate": 7.501113585746103e-05,
      "loss": 2.1145,
      "step": 2816
    },
    {
      "epoch": 0.626,
      "grad_norm": 1.0055640935897827,
      "learning_rate": 7.496659242761693e-05,
      "loss": 1.8104,
      "step": 2817
    },
    {
      "epoch": 0.6262222222222222,
      "grad_norm": 0.8134049773216248,
      "learning_rate": 7.492204899777284e-05,
      "loss": 0.9472,
      "step": 2818
    },
    {
      "epoch": 0.6264444444444445,
      "grad_norm": 0.9640253782272339,
      "learning_rate": 7.487750556792873e-05,
      "loss": 2.0194,
      "step": 2819
    },
    {
      "epoch": 0.6266666666666667,
      "grad_norm": 0.901558518409729,
      "learning_rate": 7.483296213808464e-05,
      "loss": 1.7276,
      "step": 2820
    },
    {
      "epoch": 0.6268888888888889,
      "grad_norm": 0.9449894428253174,
      "learning_rate": 7.478841870824054e-05,
      "loss": 2.0367,
      "step": 2821
    },
    {
      "epoch": 0.6271111111111111,
      "grad_norm": 1.145552396774292,
      "learning_rate": 7.474387527839643e-05,
      "loss": 2.5524,
      "step": 2822
    },
    {
      "epoch": 0.6273333333333333,
      "grad_norm": 1.0274474620819092,
      "learning_rate": 7.469933184855235e-05,
      "loss": 1.9253,
      "step": 2823
    },
    {
      "epoch": 0.6275555555555555,
      "grad_norm": 0.9850492477416992,
      "learning_rate": 7.465478841870824e-05,
      "loss": 1.848,
      "step": 2824
    },
    {
      "epoch": 0.6277777777777778,
      "grad_norm": 0.0741516649723053,
      "learning_rate": 7.461024498886415e-05,
      "loss": 0.0151,
      "step": 2825
    },
    {
      "epoch": 0.628,
      "grad_norm": 0.07517000287771225,
      "learning_rate": 7.456570155902005e-05,
      "loss": 0.0145,
      "step": 2826
    },
    {
      "epoch": 0.6282222222222222,
      "grad_norm": 0.07452172785997391,
      "learning_rate": 7.452115812917595e-05,
      "loss": 0.0153,
      "step": 2827
    },
    {
      "epoch": 0.6284444444444445,
      "grad_norm": 1.0018540620803833,
      "learning_rate": 7.447661469933185e-05,
      "loss": 1.8204,
      "step": 2828
    },
    {
      "epoch": 0.6286666666666667,
      "grad_norm": 0.941403865814209,
      "learning_rate": 7.443207126948776e-05,
      "loss": 1.9339,
      "step": 2829
    },
    {
      "epoch": 0.6288888888888889,
      "grad_norm": 1.0924185514450073,
      "learning_rate": 7.438752783964366e-05,
      "loss": 1.6405,
      "step": 2830
    },
    {
      "epoch": 0.6291111111111111,
      "grad_norm": 0.6763534545898438,
      "learning_rate": 7.434298440979955e-05,
      "loss": 0.661,
      "step": 2831
    },
    {
      "epoch": 0.6293333333333333,
      "grad_norm": 0.7361119389533997,
      "learning_rate": 7.429844097995546e-05,
      "loss": 0.8457,
      "step": 2832
    },
    {
      "epoch": 0.6295555555555555,
      "grad_norm": 0.0745445117354393,
      "learning_rate": 7.425389755011136e-05,
      "loss": 0.0192,
      "step": 2833
    },
    {
      "epoch": 0.6297777777777778,
      "grad_norm": 1.0449111461639404,
      "learning_rate": 7.420935412026726e-05,
      "loss": 1.599,
      "step": 2834
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0760903358459473,
      "learning_rate": 7.416481069042317e-05,
      "loss": 1.6259,
      "step": 2835
    },
    {
      "epoch": 0.6302222222222222,
      "grad_norm": 0.726026713848114,
      "learning_rate": 7.412026726057907e-05,
      "loss": 0.7053,
      "step": 2836
    },
    {
      "epoch": 0.6304444444444445,
      "grad_norm": 0.07237496972084045,
      "learning_rate": 7.407572383073497e-05,
      "loss": 0.0162,
      "step": 2837
    },
    {
      "epoch": 0.6306666666666667,
      "grad_norm": 0.06699542701244354,
      "learning_rate": 7.403118040089088e-05,
      "loss": 0.0162,
      "step": 2838
    },
    {
      "epoch": 0.6308888888888889,
      "grad_norm": 0.7108114957809448,
      "learning_rate": 7.398663697104677e-05,
      "loss": 0.727,
      "step": 2839
    },
    {
      "epoch": 0.6311111111111111,
      "grad_norm": 0.9890654683113098,
      "learning_rate": 7.394209354120267e-05,
      "loss": 1.4659,
      "step": 2840
    },
    {
      "epoch": 0.6313333333333333,
      "grad_norm": 1.0247973203659058,
      "learning_rate": 7.389755011135858e-05,
      "loss": 1.8139,
      "step": 2841
    },
    {
      "epoch": 0.6315555555555555,
      "grad_norm": 1.0868635177612305,
      "learning_rate": 7.385300668151449e-05,
      "loss": 1.6254,
      "step": 2842
    },
    {
      "epoch": 0.6317777777777778,
      "grad_norm": 1.1002624034881592,
      "learning_rate": 7.380846325167038e-05,
      "loss": 1.7458,
      "step": 2843
    },
    {
      "epoch": 0.632,
      "grad_norm": 1.13886296749115,
      "learning_rate": 7.376391982182628e-05,
      "loss": 1.5266,
      "step": 2844
    },
    {
      "epoch": 0.6322222222222222,
      "grad_norm": 1.1197274923324585,
      "learning_rate": 7.371937639198219e-05,
      "loss": 1.3135,
      "step": 2845
    },
    {
      "epoch": 0.6324444444444445,
      "grad_norm": 1.2070372104644775,
      "learning_rate": 7.367483296213808e-05,
      "loss": 1.1524,
      "step": 2846
    },
    {
      "epoch": 0.6326666666666667,
      "grad_norm": 0.9080721139907837,
      "learning_rate": 7.3630289532294e-05,
      "loss": 0.7859,
      "step": 2847
    },
    {
      "epoch": 0.6328888888888888,
      "grad_norm": 1.1644912958145142,
      "learning_rate": 7.358574610244989e-05,
      "loss": 1.3371,
      "step": 2848
    },
    {
      "epoch": 0.6331111111111111,
      "grad_norm": 0.7726056575775146,
      "learning_rate": 7.35412026726058e-05,
      "loss": 0.6174,
      "step": 2849
    },
    {
      "epoch": 0.6333333333333333,
      "grad_norm": 1.1530473232269287,
      "learning_rate": 7.34966592427617e-05,
      "loss": 0.6304,
      "step": 2850
    },
    {
      "epoch": 0.6335555555555555,
      "grad_norm": 0.044335298240184784,
      "learning_rate": 7.345211581291759e-05,
      "loss": 0.0101,
      "step": 2851
    },
    {
      "epoch": 0.6337777777777778,
      "grad_norm": 0.615163266658783,
      "learning_rate": 7.34075723830735e-05,
      "loss": 1.1783,
      "step": 2852
    },
    {
      "epoch": 0.634,
      "grad_norm": 0.7737541794776917,
      "learning_rate": 7.33630289532294e-05,
      "loss": 2.0947,
      "step": 2853
    },
    {
      "epoch": 0.6342222222222222,
      "grad_norm": 0.4981004297733307,
      "learning_rate": 7.331848552338531e-05,
      "loss": 0.9559,
      "step": 2854
    },
    {
      "epoch": 0.6344444444444445,
      "grad_norm": 0.7779539823532104,
      "learning_rate": 7.32739420935412e-05,
      "loss": 0.9773,
      "step": 2855
    },
    {
      "epoch": 0.6346666666666667,
      "grad_norm": 0.9028410315513611,
      "learning_rate": 7.322939866369712e-05,
      "loss": 2.1857,
      "step": 2856
    },
    {
      "epoch": 0.6348888888888888,
      "grad_norm": 0.757631778717041,
      "learning_rate": 7.318485523385301e-05,
      "loss": 1.4806,
      "step": 2857
    },
    {
      "epoch": 0.6351111111111111,
      "grad_norm": 0.8450924754142761,
      "learning_rate": 7.31403118040089e-05,
      "loss": 2.0115,
      "step": 2858
    },
    {
      "epoch": 0.6353333333333333,
      "grad_norm": 0.8707918524742126,
      "learning_rate": 7.309576837416482e-05,
      "loss": 1.8007,
      "step": 2859
    },
    {
      "epoch": 0.6355555555555555,
      "grad_norm": 0.8273522257804871,
      "learning_rate": 7.305122494432071e-05,
      "loss": 1.8579,
      "step": 2860
    },
    {
      "epoch": 0.6357777777777778,
      "grad_norm": 0.9253904223442078,
      "learning_rate": 7.300668151447662e-05,
      "loss": 2.0011,
      "step": 2861
    },
    {
      "epoch": 0.636,
      "grad_norm": 0.8965882658958435,
      "learning_rate": 7.296213808463252e-05,
      "loss": 2.0516,
      "step": 2862
    },
    {
      "epoch": 0.6362222222222222,
      "grad_norm": 0.9190070629119873,
      "learning_rate": 7.291759465478843e-05,
      "loss": 1.7634,
      "step": 2863
    },
    {
      "epoch": 0.6364444444444445,
      "grad_norm": 0.919740617275238,
      "learning_rate": 7.287305122494432e-05,
      "loss": 1.7735,
      "step": 2864
    },
    {
      "epoch": 0.6366666666666667,
      "grad_norm": 1.0333331823349,
      "learning_rate": 7.282850779510023e-05,
      "loss": 2.2958,
      "step": 2865
    },
    {
      "epoch": 0.6368888888888888,
      "grad_norm": 0.9126089811325073,
      "learning_rate": 7.278396436525613e-05,
      "loss": 1.9232,
      "step": 2866
    },
    {
      "epoch": 0.6371111111111111,
      "grad_norm": 0.8983725309371948,
      "learning_rate": 7.273942093541202e-05,
      "loss": 1.6496,
      "step": 2867
    },
    {
      "epoch": 0.6373333333333333,
      "grad_norm": 0.07827930152416229,
      "learning_rate": 7.269487750556794e-05,
      "loss": 0.0164,
      "step": 2868
    },
    {
      "epoch": 0.6375555555555555,
      "grad_norm": 0.07462301850318909,
      "learning_rate": 7.265033407572384e-05,
      "loss": 0.0165,
      "step": 2869
    },
    {
      "epoch": 0.6377777777777778,
      "grad_norm": 0.6954313516616821,
      "learning_rate": 7.260579064587974e-05,
      "loss": 0.9052,
      "step": 2870
    },
    {
      "epoch": 0.638,
      "grad_norm": 0.888656497001648,
      "learning_rate": 7.256124721603563e-05,
      "loss": 1.6937,
      "step": 2871
    },
    {
      "epoch": 0.6382222222222222,
      "grad_norm": 0.1639782041311264,
      "learning_rate": 7.251670378619154e-05,
      "loss": 0.0263,
      "step": 2872
    },
    {
      "epoch": 0.6384444444444445,
      "grad_norm": 0.6530879735946655,
      "learning_rate": 7.247216035634744e-05,
      "loss": 0.8118,
      "step": 2873
    },
    {
      "epoch": 0.6386666666666667,
      "grad_norm": 0.9214199185371399,
      "learning_rate": 7.242761692650333e-05,
      "loss": 1.5913,
      "step": 2874
    },
    {
      "epoch": 0.6388888888888888,
      "grad_norm": 0.9698308706283569,
      "learning_rate": 7.238307349665925e-05,
      "loss": 1.9825,
      "step": 2875
    },
    {
      "epoch": 0.6391111111111111,
      "grad_norm": 1.0415962934494019,
      "learning_rate": 7.233853006681515e-05,
      "loss": 1.738,
      "step": 2876
    },
    {
      "epoch": 0.6393333333333333,
      "grad_norm": 1.2774953842163086,
      "learning_rate": 7.229398663697105e-05,
      "loss": 1.7611,
      "step": 2877
    },
    {
      "epoch": 0.6395555555555555,
      "grad_norm": 0.06831032782793045,
      "learning_rate": 7.224944320712696e-05,
      "loss": 0.018,
      "step": 2878
    },
    {
      "epoch": 0.6397777777777778,
      "grad_norm": 0.07592401653528214,
      "learning_rate": 7.220489977728285e-05,
      "loss": 0.0179,
      "step": 2879
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.06443265825510025,
      "learning_rate": 7.216035634743875e-05,
      "loss": 0.0179,
      "step": 2880
    },
    {
      "epoch": 0.6402222222222222,
      "grad_norm": 0.7548007369041443,
      "learning_rate": 7.211581291759466e-05,
      "loss": 0.7391,
      "step": 2881
    },
    {
      "epoch": 0.6404444444444445,
      "grad_norm": 1.4142894744873047,
      "learning_rate": 7.207126948775056e-05,
      "loss": 0.877,
      "step": 2882
    },
    {
      "epoch": 0.6406666666666667,
      "grad_norm": 0.07341942191123962,
      "learning_rate": 7.202672605790646e-05,
      "loss": 0.0166,
      "step": 2883
    },
    {
      "epoch": 0.6408888888888888,
      "grad_norm": 0.06469117850065231,
      "learning_rate": 7.198218262806236e-05,
      "loss": 0.0172,
      "step": 2884
    },
    {
      "epoch": 0.6411111111111111,
      "grad_norm": 0.94828861951828,
      "learning_rate": 7.193763919821827e-05,
      "loss": 1.5546,
      "step": 2885
    },
    {
      "epoch": 0.6413333333333333,
      "grad_norm": 1.0259699821472168,
      "learning_rate": 7.189309576837416e-05,
      "loss": 1.6598,
      "step": 2886
    },
    {
      "epoch": 0.6415555555555555,
      "grad_norm": 1.1069990396499634,
      "learning_rate": 7.184855233853008e-05,
      "loss": 1.8913,
      "step": 2887
    },
    {
      "epoch": 0.6417777777777778,
      "grad_norm": 1.0191295146942139,
      "learning_rate": 7.180400890868597e-05,
      "loss": 1.5918,
      "step": 2888
    },
    {
      "epoch": 0.642,
      "grad_norm": 1.0177274942398071,
      "learning_rate": 7.175946547884187e-05,
      "loss": 1.6859,
      "step": 2889
    },
    {
      "epoch": 0.6422222222222222,
      "grad_norm": 1.0870459079742432,
      "learning_rate": 7.171492204899778e-05,
      "loss": 2.0939,
      "step": 2890
    },
    {
      "epoch": 0.6424444444444445,
      "grad_norm": 0.7583724856376648,
      "learning_rate": 7.167037861915367e-05,
      "loss": 0.8794,
      "step": 2891
    },
    {
      "epoch": 0.6426666666666667,
      "grad_norm": 1.1080893278121948,
      "learning_rate": 7.162583518930958e-05,
      "loss": 1.653,
      "step": 2892
    },
    {
      "epoch": 0.6428888888888888,
      "grad_norm": 0.999817967414856,
      "learning_rate": 7.158129175946548e-05,
      "loss": 1.3451,
      "step": 2893
    },
    {
      "epoch": 0.6431111111111111,
      "grad_norm": 1.2734150886535645,
      "learning_rate": 7.153674832962139e-05,
      "loss": 1.4211,
      "step": 2894
    },
    {
      "epoch": 0.6433333333333333,
      "grad_norm": 1.1219244003295898,
      "learning_rate": 7.149220489977728e-05,
      "loss": 1.2831,
      "step": 2895
    },
    {
      "epoch": 0.6435555555555555,
      "grad_norm": 1.1147305965423584,
      "learning_rate": 7.144766146993318e-05,
      "loss": 1.2474,
      "step": 2896
    },
    {
      "epoch": 0.6437777777777778,
      "grad_norm": 1.0683484077453613,
      "learning_rate": 7.140311804008909e-05,
      "loss": 1.2979,
      "step": 2897
    },
    {
      "epoch": 0.644,
      "grad_norm": 0.7551613450050354,
      "learning_rate": 7.135857461024498e-05,
      "loss": 0.7246,
      "step": 2898
    },
    {
      "epoch": 0.6442222222222223,
      "grad_norm": 1.0384818315505981,
      "learning_rate": 7.13140311804009e-05,
      "loss": 1.0033,
      "step": 2899
    },
    {
      "epoch": 0.6444444444444445,
      "grad_norm": 1.0102633237838745,
      "learning_rate": 7.126948775055679e-05,
      "loss": 0.8248,
      "step": 2900
    },
    {
      "epoch": 0.6446666666666667,
      "grad_norm": 0.05665779858827591,
      "learning_rate": 7.12249443207127e-05,
      "loss": 0.0108,
      "step": 2901
    },
    {
      "epoch": 0.6448888888888888,
      "grad_norm": 0.054136212915182114,
      "learning_rate": 7.11804008908686e-05,
      "loss": 0.0107,
      "step": 2902
    },
    {
      "epoch": 0.6451111111111111,
      "grad_norm": 0.8140088319778442,
      "learning_rate": 7.113585746102451e-05,
      "loss": 2.1089,
      "step": 2903
    },
    {
      "epoch": 0.6453333333333333,
      "grad_norm": 0.8073779344558716,
      "learning_rate": 7.10913140311804e-05,
      "loss": 2.156,
      "step": 2904
    },
    {
      "epoch": 0.6455555555555555,
      "grad_norm": 0.8879762887954712,
      "learning_rate": 7.10467706013363e-05,
      "loss": 2.5526,
      "step": 2905
    },
    {
      "epoch": 0.6457777777777778,
      "grad_norm": 0.4919484853744507,
      "learning_rate": 7.100222717149221e-05,
      "loss": 1.0987,
      "step": 2906
    },
    {
      "epoch": 0.646,
      "grad_norm": 0.7251628637313843,
      "learning_rate": 7.09576837416481e-05,
      "loss": 1.2484,
      "step": 2907
    },
    {
      "epoch": 0.6462222222222223,
      "grad_norm": 0.10525999963283539,
      "learning_rate": 7.091314031180402e-05,
      "loss": 0.0137,
      "step": 2908
    },
    {
      "epoch": 0.6464444444444445,
      "grad_norm": 0.5602700114250183,
      "learning_rate": 7.086859688195991e-05,
      "loss": 1.1304,
      "step": 2909
    },
    {
      "epoch": 0.6466666666666666,
      "grad_norm": 0.8258494138717651,
      "learning_rate": 7.082405345211582e-05,
      "loss": 2.225,
      "step": 2910
    },
    {
      "epoch": 0.6468888888888888,
      "grad_norm": 0.841549277305603,
      "learning_rate": 7.077951002227172e-05,
      "loss": 2.0783,
      "step": 2911
    },
    {
      "epoch": 0.6471111111111111,
      "grad_norm": 0.9405723214149475,
      "learning_rate": 7.073496659242762e-05,
      "loss": 2.3224,
      "step": 2912
    },
    {
      "epoch": 0.6473333333333333,
      "grad_norm": 0.8422486186027527,
      "learning_rate": 7.069042316258352e-05,
      "loss": 1.8426,
      "step": 2913
    },
    {
      "epoch": 0.6475555555555556,
      "grad_norm": 0.9155295491218567,
      "learning_rate": 7.064587973273943e-05,
      "loss": 1.9838,
      "step": 2914
    },
    {
      "epoch": 0.6477777777777778,
      "grad_norm": 1.0015355348587036,
      "learning_rate": 7.060133630289533e-05,
      "loss": 2.0283,
      "step": 2915
    },
    {
      "epoch": 0.648,
      "grad_norm": 1.0666885375976562,
      "learning_rate": 7.055679287305122e-05,
      "loss": 2.1292,
      "step": 2916
    },
    {
      "epoch": 0.6482222222222223,
      "grad_norm": 0.863190233707428,
      "learning_rate": 7.051224944320713e-05,
      "loss": 1.6575,
      "step": 2917
    },
    {
      "epoch": 0.6484444444444445,
      "grad_norm": 1.0107028484344482,
      "learning_rate": 7.046770601336304e-05,
      "loss": 1.6859,
      "step": 2918
    },
    {
      "epoch": 0.6486666666666666,
      "grad_norm": 0.7367758750915527,
      "learning_rate": 7.042316258351893e-05,
      "loss": 1.109,
      "step": 2919
    },
    {
      "epoch": 0.6488888888888888,
      "grad_norm": 0.17549914121627808,
      "learning_rate": 7.037861915367485e-05,
      "loss": 0.0181,
      "step": 2920
    },
    {
      "epoch": 0.6491111111111111,
      "grad_norm": 0.1470581293106079,
      "learning_rate": 7.033407572383074e-05,
      "loss": 0.0229,
      "step": 2921
    },
    {
      "epoch": 0.6493333333333333,
      "grad_norm": 0.6036903858184814,
      "learning_rate": 7.028953229398664e-05,
      "loss": 0.9571,
      "step": 2922
    },
    {
      "epoch": 0.6495555555555556,
      "grad_norm": 0.9932591319084167,
      "learning_rate": 7.024498886414255e-05,
      "loss": 1.8507,
      "step": 2923
    },
    {
      "epoch": 0.6497777777777778,
      "grad_norm": 1.0042146444320679,
      "learning_rate": 7.020044543429844e-05,
      "loss": 1.6924,
      "step": 2924
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1565930843353271,
      "learning_rate": 7.015590200445435e-05,
      "loss": 1.7762,
      "step": 2925
    },
    {
      "epoch": 0.6502222222222223,
      "grad_norm": 1.0624140501022339,
      "learning_rate": 7.011135857461025e-05,
      "loss": 1.9526,
      "step": 2926
    },
    {
      "epoch": 0.6504444444444445,
      "grad_norm": 0.9543834328651428,
      "learning_rate": 7.006681514476616e-05,
      "loss": 1.832,
      "step": 2927
    },
    {
      "epoch": 0.6506666666666666,
      "grad_norm": 0.23686853051185608,
      "learning_rate": 7.002227171492205e-05,
      "loss": 0.0233,
      "step": 2928
    },
    {
      "epoch": 0.6508888888888889,
      "grad_norm": 0.21207448840141296,
      "learning_rate": 6.997772828507795e-05,
      "loss": 0.0227,
      "step": 2929
    },
    {
      "epoch": 0.6511111111111111,
      "grad_norm": 0.1674102395772934,
      "learning_rate": 6.993318485523386e-05,
      "loss": 0.0203,
      "step": 2930
    },
    {
      "epoch": 0.6513333333333333,
      "grad_norm": 0.6476364135742188,
      "learning_rate": 6.988864142538975e-05,
      "loss": 0.8066,
      "step": 2931
    },
    {
      "epoch": 0.6515555555555556,
      "grad_norm": 0.0691596120595932,
      "learning_rate": 6.984409799554567e-05,
      "loss": 0.0172,
      "step": 2932
    },
    {
      "epoch": 0.6517777777777778,
      "grad_norm": 0.06929878145456314,
      "learning_rate": 6.979955456570156e-05,
      "loss": 0.017,
      "step": 2933
    },
    {
      "epoch": 0.652,
      "grad_norm": 0.6000028252601624,
      "learning_rate": 6.975501113585747e-05,
      "loss": 0.7008,
      "step": 2934
    },
    {
      "epoch": 0.6522222222222223,
      "grad_norm": 0.9273680448532104,
      "learning_rate": 6.971046770601337e-05,
      "loss": 1.6629,
      "step": 2935
    },
    {
      "epoch": 0.6524444444444445,
      "grad_norm": 1.1437641382217407,
      "learning_rate": 6.966592427616926e-05,
      "loss": 1.4685,
      "step": 2936
    },
    {
      "epoch": 0.6526666666666666,
      "grad_norm": 0.954337477684021,
      "learning_rate": 6.962138084632517e-05,
      "loss": 1.4641,
      "step": 2937
    },
    {
      "epoch": 0.6528888888888889,
      "grad_norm": 1.209396243095398,
      "learning_rate": 6.957683741648107e-05,
      "loss": 1.6734,
      "step": 2938
    },
    {
      "epoch": 0.6531111111111111,
      "grad_norm": 1.0835387706756592,
      "learning_rate": 6.953229398663698e-05,
      "loss": 1.4416,
      "step": 2939
    },
    {
      "epoch": 0.6533333333333333,
      "grad_norm": 1.2384669780731201,
      "learning_rate": 6.948775055679287e-05,
      "loss": 1.5449,
      "step": 2940
    },
    {
      "epoch": 0.6535555555555556,
      "grad_norm": 1.0444763898849487,
      "learning_rate": 6.944320712694879e-05,
      "loss": 1.4081,
      "step": 2941
    },
    {
      "epoch": 0.6537777777777778,
      "grad_norm": 0.19488677382469177,
      "learning_rate": 6.939866369710468e-05,
      "loss": 0.0357,
      "step": 2942
    },
    {
      "epoch": 0.654,
      "grad_norm": 0.7100367546081543,
      "learning_rate": 6.935412026726057e-05,
      "loss": 0.6988,
      "step": 2943
    },
    {
      "epoch": 0.6542222222222223,
      "grad_norm": 1.0661877393722534,
      "learning_rate": 6.930957683741648e-05,
      "loss": 1.402,
      "step": 2944
    },
    {
      "epoch": 0.6544444444444445,
      "grad_norm": 1.0283530950546265,
      "learning_rate": 6.926503340757238e-05,
      "loss": 1.2476,
      "step": 2945
    },
    {
      "epoch": 0.6546666666666666,
      "grad_norm": 1.0251097679138184,
      "learning_rate": 6.922048997772829e-05,
      "loss": 1.2837,
      "step": 2946
    },
    {
      "epoch": 0.6548888888888889,
      "grad_norm": 1.202881932258606,
      "learning_rate": 6.917594654788418e-05,
      "loss": 1.5042,
      "step": 2947
    },
    {
      "epoch": 0.6551111111111111,
      "grad_norm": 0.8322992324829102,
      "learning_rate": 6.91314031180401e-05,
      "loss": 0.5849,
      "step": 2948
    },
    {
      "epoch": 0.6553333333333333,
      "grad_norm": 1.1371495723724365,
      "learning_rate": 6.908685968819599e-05,
      "loss": 0.8999,
      "step": 2949
    },
    {
      "epoch": 0.6555555555555556,
      "grad_norm": 1.1280728578567505,
      "learning_rate": 6.904231625835188e-05,
      "loss": 0.8439,
      "step": 2950
    },
    {
      "epoch": 0.6557777777777778,
      "grad_norm": 0.750573456287384,
      "learning_rate": 6.89977728285078e-05,
      "loss": 1.2617,
      "step": 2951
    },
    {
      "epoch": 0.656,
      "grad_norm": 0.6734370589256287,
      "learning_rate": 6.89532293986637e-05,
      "loss": 1.4601,
      "step": 2952
    },
    {
      "epoch": 0.6562222222222223,
      "grad_norm": 0.8959650993347168,
      "learning_rate": 6.89086859688196e-05,
      "loss": 2.0815,
      "step": 2953
    },
    {
      "epoch": 0.6564444444444445,
      "grad_norm": 0.7922069430351257,
      "learning_rate": 6.88641425389755e-05,
      "loss": 1.825,
      "step": 2954
    },
    {
      "epoch": 0.6566666666666666,
      "grad_norm": 0.8229972720146179,
      "learning_rate": 6.881959910913141e-05,
      "loss": 2.1376,
      "step": 2955
    },
    {
      "epoch": 0.6568888888888889,
      "grad_norm": 0.8326950669288635,
      "learning_rate": 6.87750556792873e-05,
      "loss": 1.7849,
      "step": 2956
    },
    {
      "epoch": 0.6571111111111111,
      "grad_norm": 0.07217428833246231,
      "learning_rate": 6.873051224944321e-05,
      "loss": 0.0109,
      "step": 2957
    },
    {
      "epoch": 0.6573333333333333,
      "grad_norm": 0.07548868656158447,
      "learning_rate": 6.868596881959911e-05,
      "loss": 0.0109,
      "step": 2958
    },
    {
      "epoch": 0.6575555555555556,
      "grad_norm": 1.0550813674926758,
      "learning_rate": 6.8641425389755e-05,
      "loss": 2.1406,
      "step": 2959
    },
    {
      "epoch": 0.6577777777777778,
      "grad_norm": 1.059350848197937,
      "learning_rate": 6.859688195991092e-05,
      "loss": 2.3544,
      "step": 2960
    },
    {
      "epoch": 0.658,
      "grad_norm": 1.044738531112671,
      "learning_rate": 6.855233853006682e-05,
      "loss": 1.9748,
      "step": 2961
    },
    {
      "epoch": 0.6582222222222223,
      "grad_norm": 0.7435956597328186,
      "learning_rate": 6.850779510022272e-05,
      "loss": 1.0326,
      "step": 2962
    },
    {
      "epoch": 0.6584444444444445,
      "grad_norm": 1.0756325721740723,
      "learning_rate": 6.846325167037863e-05,
      "loss": 1.902,
      "step": 2963
    },
    {
      "epoch": 0.6586666666666666,
      "grad_norm": 0.9899616837501526,
      "learning_rate": 6.841870824053452e-05,
      "loss": 2.0294,
      "step": 2964
    },
    {
      "epoch": 0.6588888888888889,
      "grad_norm": 0.9705696105957031,
      "learning_rate": 6.837416481069042e-05,
      "loss": 1.7723,
      "step": 2965
    },
    {
      "epoch": 0.6591111111111111,
      "grad_norm": 0.7591641545295715,
      "learning_rate": 6.832962138084633e-05,
      "loss": 1.054,
      "step": 2966
    },
    {
      "epoch": 0.6593333333333333,
      "grad_norm": 0.6711844801902771,
      "learning_rate": 6.828507795100223e-05,
      "loss": 1.002,
      "step": 2967
    },
    {
      "epoch": 0.6595555555555556,
      "grad_norm": 0.10974638164043427,
      "learning_rate": 6.824053452115813e-05,
      "loss": 0.0174,
      "step": 2968
    },
    {
      "epoch": 0.6597777777777778,
      "grad_norm": 0.6256340742111206,
      "learning_rate": 6.819599109131403e-05,
      "loss": 0.7909,
      "step": 2969
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9503763914108276,
      "learning_rate": 6.815144766146994e-05,
      "loss": 1.6599,
      "step": 2970
    },
    {
      "epoch": 0.6602222222222223,
      "grad_norm": 0.10511241108179092,
      "learning_rate": 6.810690423162583e-05,
      "loss": 0.0191,
      "step": 2971
    },
    {
      "epoch": 0.6604444444444444,
      "grad_norm": 0.10218800604343414,
      "learning_rate": 6.806236080178175e-05,
      "loss": 0.0192,
      "step": 2972
    },
    {
      "epoch": 0.6606666666666666,
      "grad_norm": 1.087993860244751,
      "learning_rate": 6.801781737193764e-05,
      "loss": 2.0295,
      "step": 2973
    },
    {
      "epoch": 0.6608888888888889,
      "grad_norm": 0.9236508011817932,
      "learning_rate": 6.797327394209355e-05,
      "loss": 1.8498,
      "step": 2974
    },
    {
      "epoch": 0.6611111111111111,
      "grad_norm": 0.9590336680412292,
      "learning_rate": 6.792873051224945e-05,
      "loss": 1.8817,
      "step": 2975
    },
    {
      "epoch": 0.6613333333333333,
      "grad_norm": 1.0234291553497314,
      "learning_rate": 6.788418708240534e-05,
      "loss": 1.7545,
      "step": 2976
    },
    {
      "epoch": 0.6615555555555556,
      "grad_norm": 1.0496752262115479,
      "learning_rate": 6.783964365256125e-05,
      "loss": 1.838,
      "step": 2977
    },
    {
      "epoch": 0.6617777777777778,
      "grad_norm": 1.0736680030822754,
      "learning_rate": 6.779510022271715e-05,
      "loss": 1.8839,
      "step": 2978
    },
    {
      "epoch": 0.662,
      "grad_norm": 0.06254412978887558,
      "learning_rate": 6.775055679287306e-05,
      "loss": 0.0165,
      "step": 2979
    },
    {
      "epoch": 0.6622222222222223,
      "grad_norm": 0.06592579185962677,
      "learning_rate": 6.770601336302895e-05,
      "loss": 0.0167,
      "step": 2980
    },
    {
      "epoch": 0.6624444444444444,
      "grad_norm": 0.0603296123445034,
      "learning_rate": 6.766146993318486e-05,
      "loss": 0.0164,
      "step": 2981
    },
    {
      "epoch": 0.6626666666666666,
      "grad_norm": 0.0696684792637825,
      "learning_rate": 6.761692650334076e-05,
      "loss": 0.0163,
      "step": 2982
    },
    {
      "epoch": 0.6628888888888889,
      "grad_norm": 0.711073637008667,
      "learning_rate": 6.757238307349665e-05,
      "loss": 0.7294,
      "step": 2983
    },
    {
      "epoch": 0.6631111111111111,
      "grad_norm": 1.0906970500946045,
      "learning_rate": 6.752783964365257e-05,
      "loss": 1.8489,
      "step": 2984
    },
    {
      "epoch": 0.6633333333333333,
      "grad_norm": 0.6575995683670044,
      "learning_rate": 6.748329621380846e-05,
      "loss": 0.7741,
      "step": 2985
    },
    {
      "epoch": 0.6635555555555556,
      "grad_norm": 0.9926353096961975,
      "learning_rate": 6.743875278396437e-05,
      "loss": 1.7744,
      "step": 2986
    },
    {
      "epoch": 0.6637777777777778,
      "grad_norm": 1.089295506477356,
      "learning_rate": 6.739420935412027e-05,
      "loss": 1.7575,
      "step": 2987
    },
    {
      "epoch": 0.664,
      "grad_norm": 0.10425405949354172,
      "learning_rate": 6.734966592427617e-05,
      "loss": 0.027,
      "step": 2988
    },
    {
      "epoch": 0.6642222222222223,
      "grad_norm": 0.682433009147644,
      "learning_rate": 6.730512249443207e-05,
      "loss": 0.7208,
      "step": 2989
    },
    {
      "epoch": 0.6644444444444444,
      "grad_norm": 1.045576572418213,
      "learning_rate": 6.726057906458798e-05,
      "loss": 1.1317,
      "step": 2990
    },
    {
      "epoch": 0.6646666666666666,
      "grad_norm": 1.2633000612258911,
      "learning_rate": 6.721603563474388e-05,
      "loss": 1.6152,
      "step": 2991
    },
    {
      "epoch": 0.6648888888888889,
      "grad_norm": 1.0451045036315918,
      "learning_rate": 6.717149220489977e-05,
      "loss": 1.412,
      "step": 2992
    },
    {
      "epoch": 0.6651111111111111,
      "grad_norm": 0.9378172159194946,
      "learning_rate": 6.712694877505569e-05,
      "loss": 1.4484,
      "step": 2993
    },
    {
      "epoch": 0.6653333333333333,
      "grad_norm": 0.9717287421226501,
      "learning_rate": 6.708240534521158e-05,
      "loss": 1.2648,
      "step": 2994
    },
    {
      "epoch": 0.6655555555555556,
      "grad_norm": 0.7894330620765686,
      "learning_rate": 6.703786191536749e-05,
      "loss": 0.7415,
      "step": 2995
    },
    {
      "epoch": 0.6657777777777778,
      "grad_norm": 1.0013213157653809,
      "learning_rate": 6.69933184855234e-05,
      "loss": 1.1406,
      "step": 2996
    },
    {
      "epoch": 0.666,
      "grad_norm": 0.16363666951656342,
      "learning_rate": 6.694877505567929e-05,
      "loss": 0.038,
      "step": 2997
    },
    {
      "epoch": 0.6662222222222223,
      "grad_norm": 0.7024639844894409,
      "learning_rate": 6.690423162583519e-05,
      "loss": 0.5375,
      "step": 2998
    },
    {
      "epoch": 0.6664444444444444,
      "grad_norm": 0.9310855865478516,
      "learning_rate": 6.68596881959911e-05,
      "loss": 1.042,
      "step": 2999
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 1.0746338367462158,
      "learning_rate": 6.6815144766147e-05,
      "loss": 0.8101,
      "step": 3000
    },
    {
      "epoch": 0.6668888888888889,
      "grad_norm": 0.8144944906234741,
      "learning_rate": 6.67706013363029e-05,
      "loss": 1.9887,
      "step": 3001
    },
    {
      "epoch": 0.6671111111111111,
      "grad_norm": 0.8721863627433777,
      "learning_rate": 6.67260579064588e-05,
      "loss": 2.3995,
      "step": 3002
    },
    {
      "epoch": 0.6673333333333333,
      "grad_norm": 0.801108717918396,
      "learning_rate": 6.66815144766147e-05,
      "loss": 2.1919,
      "step": 3003
    },
    {
      "epoch": 0.6675555555555556,
      "grad_norm": 0.6709057688713074,
      "learning_rate": 6.66369710467706e-05,
      "loss": 1.0861,
      "step": 3004
    },
    {
      "epoch": 0.6677777777777778,
      "grad_norm": 0.05186731740832329,
      "learning_rate": 6.659242761692652e-05,
      "loss": 0.0109,
      "step": 3005
    },
    {
      "epoch": 0.668,
      "grad_norm": 0.6289195418357849,
      "learning_rate": 6.654788418708241e-05,
      "loss": 1.1656,
      "step": 3006
    },
    {
      "epoch": 0.6682222222222223,
      "grad_norm": 0.5143423080444336,
      "learning_rate": 6.650334075723831e-05,
      "loss": 1.1429,
      "step": 3007
    },
    {
      "epoch": 0.6684444444444444,
      "grad_norm": 0.7924249768257141,
      "learning_rate": 6.645879732739422e-05,
      "loss": 1.8575,
      "step": 3008
    },
    {
      "epoch": 0.6686666666666666,
      "grad_norm": 0.09778264164924622,
      "learning_rate": 6.641425389755011e-05,
      "loss": 0.0128,
      "step": 3009
    },
    {
      "epoch": 0.6688888888888889,
      "grad_norm": 0.10178276896476746,
      "learning_rate": 6.636971046770602e-05,
      "loss": 0.013,
      "step": 3010
    },
    {
      "epoch": 0.6691111111111111,
      "grad_norm": 0.09697845578193665,
      "learning_rate": 6.632516703786192e-05,
      "loss": 0.0126,
      "step": 3011
    },
    {
      "epoch": 0.6693333333333333,
      "grad_norm": 1.0395288467407227,
      "learning_rate": 6.628062360801783e-05,
      "loss": 2.0803,
      "step": 3012
    },
    {
      "epoch": 0.6695555555555556,
      "grad_norm": 0.8418979048728943,
      "learning_rate": 6.623608017817372e-05,
      "loss": 2.1971,
      "step": 3013
    },
    {
      "epoch": 0.6697777777777778,
      "grad_norm": 0.9855999946594238,
      "learning_rate": 6.619153674832962e-05,
      "loss": 1.9663,
      "step": 3014
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9182018637657166,
      "learning_rate": 6.614699331848553e-05,
      "loss": 2.1056,
      "step": 3015
    },
    {
      "epoch": 0.6702222222222223,
      "grad_norm": 0.9927064180374146,
      "learning_rate": 6.610244988864142e-05,
      "loss": 1.8567,
      "step": 3016
    },
    {
      "epoch": 0.6704444444444444,
      "grad_norm": 0.8513967394828796,
      "learning_rate": 6.605790645879733e-05,
      "loss": 1.8663,
      "step": 3017
    },
    {
      "epoch": 0.6706666666666666,
      "grad_norm": 0.6829978227615356,
      "learning_rate": 6.601336302895323e-05,
      "loss": 0.9329,
      "step": 3018
    },
    {
      "epoch": 0.6708888888888889,
      "grad_norm": 1.0144321918487549,
      "learning_rate": 6.596881959910914e-05,
      "loss": 2.3309,
      "step": 3019
    },
    {
      "epoch": 0.6711111111111111,
      "grad_norm": 0.9434064030647278,
      "learning_rate": 6.592427616926503e-05,
      "loss": 1.841,
      "step": 3020
    },
    {
      "epoch": 0.6713333333333333,
      "grad_norm": 0.9861494302749634,
      "learning_rate": 6.587973273942093e-05,
      "loss": 2.0507,
      "step": 3021
    },
    {
      "epoch": 0.6715555555555556,
      "grad_norm": 1.0820823907852173,
      "learning_rate": 6.583518930957684e-05,
      "loss": 1.5992,
      "step": 3022
    },
    {
      "epoch": 0.6717777777777778,
      "grad_norm": 0.6171742677688599,
      "learning_rate": 6.579064587973273e-05,
      "loss": 0.876,
      "step": 3023
    },
    {
      "epoch": 0.672,
      "grad_norm": 0.07197222858667374,
      "learning_rate": 6.574610244988865e-05,
      "loss": 0.0164,
      "step": 3024
    },
    {
      "epoch": 0.6722222222222223,
      "grad_norm": 0.7509397268295288,
      "learning_rate": 6.570155902004454e-05,
      "loss": 0.9635,
      "step": 3025
    },
    {
      "epoch": 0.6724444444444444,
      "grad_norm": 0.9577994346618652,
      "learning_rate": 6.565701559020045e-05,
      "loss": 1.7846,
      "step": 3026
    },
    {
      "epoch": 0.6726666666666666,
      "grad_norm": 0.9909307360649109,
      "learning_rate": 6.561247216035635e-05,
      "loss": 1.7811,
      "step": 3027
    },
    {
      "epoch": 0.6728888888888889,
      "grad_norm": 1.076392412185669,
      "learning_rate": 6.556792873051225e-05,
      "loss": 1.6848,
      "step": 3028
    },
    {
      "epoch": 0.6731111111111111,
      "grad_norm": 0.9113189578056335,
      "learning_rate": 6.552338530066815e-05,
      "loss": 1.7005,
      "step": 3029
    },
    {
      "epoch": 0.6733333333333333,
      "grad_norm": 1.2456274032592773,
      "learning_rate": 6.547884187082406e-05,
      "loss": 1.7526,
      "step": 3030
    },
    {
      "epoch": 0.6735555555555556,
      "grad_norm": 1.0734461545944214,
      "learning_rate": 6.543429844097996e-05,
      "loss": 1.9902,
      "step": 3031
    },
    {
      "epoch": 0.6737777777777778,
      "grad_norm": 0.7110247015953064,
      "learning_rate": 6.538975501113585e-05,
      "loss": 0.8453,
      "step": 3032
    },
    {
      "epoch": 0.674,
      "grad_norm": 0.059201423078775406,
      "learning_rate": 6.534521158129177e-05,
      "loss": 0.0163,
      "step": 3033
    },
    {
      "epoch": 0.6742222222222222,
      "grad_norm": 0.7983320355415344,
      "learning_rate": 6.530066815144766e-05,
      "loss": 0.9296,
      "step": 3034
    },
    {
      "epoch": 0.6744444444444444,
      "grad_norm": 0.07402991503477097,
      "learning_rate": 6.525612472160356e-05,
      "loss": 0.0193,
      "step": 3035
    },
    {
      "epoch": 0.6746666666666666,
      "grad_norm": 0.07244686037302017,
      "learning_rate": 6.521158129175947e-05,
      "loss": 0.0193,
      "step": 3036
    },
    {
      "epoch": 0.6748888888888889,
      "grad_norm": 1.0383340120315552,
      "learning_rate": 6.516703786191537e-05,
      "loss": 1.4567,
      "step": 3037
    },
    {
      "epoch": 0.6751111111111111,
      "grad_norm": 1.0180835723876953,
      "learning_rate": 6.512249443207127e-05,
      "loss": 1.8275,
      "step": 3038
    },
    {
      "epoch": 0.6753333333333333,
      "grad_norm": 1.225290298461914,
      "learning_rate": 6.507795100222718e-05,
      "loss": 1.7056,
      "step": 3039
    },
    {
      "epoch": 0.6755555555555556,
      "grad_norm": 0.8802182674407959,
      "learning_rate": 6.503340757238308e-05,
      "loss": 1.0935,
      "step": 3040
    },
    {
      "epoch": 0.6757777777777778,
      "grad_norm": 1.0758693218231201,
      "learning_rate": 6.498886414253897e-05,
      "loss": 1.7778,
      "step": 3041
    },
    {
      "epoch": 0.676,
      "grad_norm": 1.1325352191925049,
      "learning_rate": 6.494432071269488e-05,
      "loss": 1.5706,
      "step": 3042
    },
    {
      "epoch": 0.6762222222222222,
      "grad_norm": 1.0380780696868896,
      "learning_rate": 6.489977728285078e-05,
      "loss": 1.535,
      "step": 3043
    },
    {
      "epoch": 0.6764444444444444,
      "grad_norm": 0.9906545281410217,
      "learning_rate": 6.485523385300668e-05,
      "loss": 1.4007,
      "step": 3044
    },
    {
      "epoch": 0.6766666666666666,
      "grad_norm": 0.17783640325069427,
      "learning_rate": 6.48106904231626e-05,
      "loss": 0.0312,
      "step": 3045
    },
    {
      "epoch": 0.6768888888888889,
      "grad_norm": 0.9812122583389282,
      "learning_rate": 6.476614699331849e-05,
      "loss": 1.2594,
      "step": 3046
    },
    {
      "epoch": 0.6771111111111111,
      "grad_norm": 1.060013771057129,
      "learning_rate": 6.472160356347439e-05,
      "loss": 1.1064,
      "step": 3047
    },
    {
      "epoch": 0.6773333333333333,
      "grad_norm": 0.6272473335266113,
      "learning_rate": 6.46770601336303e-05,
      "loss": 0.4809,
      "step": 3048
    },
    {
      "epoch": 0.6775555555555556,
      "grad_norm": 0.9412599802017212,
      "learning_rate": 6.463251670378619e-05,
      "loss": 0.9192,
      "step": 3049
    },
    {
      "epoch": 0.6777777777777778,
      "grad_norm": 1.0236815214157104,
      "learning_rate": 6.45879732739421e-05,
      "loss": 0.8751,
      "step": 3050
    },
    {
      "epoch": 0.678,
      "grad_norm": 0.05509922653436661,
      "learning_rate": 6.4543429844098e-05,
      "loss": 0.0112,
      "step": 3051
    },
    {
      "epoch": 0.6782222222222222,
      "grad_norm": 0.8858595490455627,
      "learning_rate": 6.44988864142539e-05,
      "loss": 2.2657,
      "step": 3052
    },
    {
      "epoch": 0.6784444444444444,
      "grad_norm": 0.5961353182792664,
      "learning_rate": 6.44543429844098e-05,
      "loss": 1.2772,
      "step": 3053
    },
    {
      "epoch": 0.6786666666666666,
      "grad_norm": 0.05628953129053116,
      "learning_rate": 6.44097995545657e-05,
      "loss": 0.011,
      "step": 3054
    },
    {
      "epoch": 0.6788888888888889,
      "grad_norm": 0.8051088452339172,
      "learning_rate": 6.436525612472161e-05,
      "loss": 2.2465,
      "step": 3055
    },
    {
      "epoch": 0.6791111111111111,
      "grad_norm": 0.9172492027282715,
      "learning_rate": 6.43207126948775e-05,
      "loss": 2.1671,
      "step": 3056
    },
    {
      "epoch": 0.6793333333333333,
      "grad_norm": 0.622685968875885,
      "learning_rate": 6.427616926503342e-05,
      "loss": 1.1392,
      "step": 3057
    },
    {
      "epoch": 0.6795555555555556,
      "grad_norm": 1.047365665435791,
      "learning_rate": 6.423162583518931e-05,
      "loss": 2.1024,
      "step": 3058
    },
    {
      "epoch": 0.6797777777777778,
      "grad_norm": 0.6596314311027527,
      "learning_rate": 6.418708240534522e-05,
      "loss": 1.2222,
      "step": 3059
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.058744728565216064,
      "learning_rate": 6.414253897550112e-05,
      "loss": 0.0111,
      "step": 3060
    },
    {
      "epoch": 0.6802222222222222,
      "grad_norm": 0.9326651692390442,
      "learning_rate": 6.409799554565701e-05,
      "loss": 2.1789,
      "step": 3061
    },
    {
      "epoch": 0.6804444444444444,
      "grad_norm": 0.9691800475120544,
      "learning_rate": 6.405345211581292e-05,
      "loss": 2.2824,
      "step": 3062
    },
    {
      "epoch": 0.6806666666666666,
      "grad_norm": 0.925193190574646,
      "learning_rate": 6.400890868596882e-05,
      "loss": 2.431,
      "step": 3063
    },
    {
      "epoch": 0.6808888888888889,
      "grad_norm": 0.9088225364685059,
      "learning_rate": 6.396436525612473e-05,
      "loss": 2.0492,
      "step": 3064
    },
    {
      "epoch": 0.6811111111111111,
      "grad_norm": 0.8372054696083069,
      "learning_rate": 6.391982182628062e-05,
      "loss": 2.2126,
      "step": 3065
    },
    {
      "epoch": 0.6813333333333333,
      "grad_norm": 0.8477223515510559,
      "learning_rate": 6.387527839643653e-05,
      "loss": 1.933,
      "step": 3066
    },
    {
      "epoch": 0.6815555555555556,
      "grad_norm": 0.6637649536132812,
      "learning_rate": 6.383073496659243e-05,
      "loss": 1.0526,
      "step": 3067
    },
    {
      "epoch": 0.6817777777777778,
      "grad_norm": 0.9227988719940186,
      "learning_rate": 6.378619153674832e-05,
      "loss": 1.9949,
      "step": 3068
    },
    {
      "epoch": 0.682,
      "grad_norm": 0.9380735754966736,
      "learning_rate": 6.374164810690424e-05,
      "loss": 1.8282,
      "step": 3069
    },
    {
      "epoch": 0.6822222222222222,
      "grad_norm": 0.992690920829773,
      "learning_rate": 6.369710467706013e-05,
      "loss": 1.7263,
      "step": 3070
    },
    {
      "epoch": 0.6824444444444444,
      "grad_norm": 0.9732444286346436,
      "learning_rate": 6.365256124721604e-05,
      "loss": 2.0818,
      "step": 3071
    },
    {
      "epoch": 0.6826666666666666,
      "grad_norm": 0.07128032296895981,
      "learning_rate": 6.360801781737195e-05,
      "loss": 0.0167,
      "step": 3072
    },
    {
      "epoch": 0.6828888888888889,
      "grad_norm": 0.9442581534385681,
      "learning_rate": 6.356347438752784e-05,
      "loss": 1.9286,
      "step": 3073
    },
    {
      "epoch": 0.6831111111111111,
      "grad_norm": 0.13606970012187958,
      "learning_rate": 6.351893095768374e-05,
      "loss": 0.0205,
      "step": 3074
    },
    {
      "epoch": 0.6833333333333333,
      "grad_norm": 0.648127555847168,
      "learning_rate": 6.347438752783965e-05,
      "loss": 0.8501,
      "step": 3075
    },
    {
      "epoch": 0.6835555555555556,
      "grad_norm": 0.9328134655952454,
      "learning_rate": 6.342984409799555e-05,
      "loss": 1.6076,
      "step": 3076
    },
    {
      "epoch": 0.6837777777777778,
      "grad_norm": 1.0561175346374512,
      "learning_rate": 6.338530066815144e-05,
      "loss": 1.765,
      "step": 3077
    },
    {
      "epoch": 0.684,
      "grad_norm": 1.1816853284835815,
      "learning_rate": 6.334075723830736e-05,
      "loss": 2.1397,
      "step": 3078
    },
    {
      "epoch": 0.6842222222222222,
      "grad_norm": 1.151865839958191,
      "learning_rate": 6.329621380846326e-05,
      "loss": 1.8881,
      "step": 3079
    },
    {
      "epoch": 0.6844444444444444,
      "grad_norm": 0.07445438951253891,
      "learning_rate": 6.325167037861916e-05,
      "loss": 0.0177,
      "step": 3080
    },
    {
      "epoch": 0.6846666666666666,
      "grad_norm": 0.07042374461889267,
      "learning_rate": 6.320712694877507e-05,
      "loss": 0.0171,
      "step": 3081
    },
    {
      "epoch": 0.6848888888888889,
      "grad_norm": 0.6800836324691772,
      "learning_rate": 6.316258351893096e-05,
      "loss": 0.7686,
      "step": 3082
    },
    {
      "epoch": 0.6851111111111111,
      "grad_norm": 0.7797111868858337,
      "learning_rate": 6.311804008908686e-05,
      "loss": 0.9435,
      "step": 3083
    },
    {
      "epoch": 0.6853333333333333,
      "grad_norm": 0.07849026471376419,
      "learning_rate": 6.307349665924277e-05,
      "loss": 0.0194,
      "step": 3084
    },
    {
      "epoch": 0.6855555555555556,
      "grad_norm": 0.07662785053253174,
      "learning_rate": 6.302895322939867e-05,
      "loss": 0.0191,
      "step": 3085
    },
    {
      "epoch": 0.6857777777777778,
      "grad_norm": 0.0744476169347763,
      "learning_rate": 6.298440979955457e-05,
      "loss": 0.0183,
      "step": 3086
    },
    {
      "epoch": 0.686,
      "grad_norm": 0.0717550590634346,
      "learning_rate": 6.293986636971047e-05,
      "loss": 0.0184,
      "step": 3087
    },
    {
      "epoch": 0.6862222222222222,
      "grad_norm": 1.115823745727539,
      "learning_rate": 6.289532293986638e-05,
      "loss": 1.871,
      "step": 3088
    },
    {
      "epoch": 0.6864444444444444,
      "grad_norm": 0.9394058585166931,
      "learning_rate": 6.285077951002227e-05,
      "loss": 1.2818,
      "step": 3089
    },
    {
      "epoch": 0.6866666666666666,
      "grad_norm": 0.7753637433052063,
      "learning_rate": 6.280623608017817e-05,
      "loss": 0.9434,
      "step": 3090
    },
    {
      "epoch": 0.6868888888888889,
      "grad_norm": 0.7117932438850403,
      "learning_rate": 6.276169265033408e-05,
      "loss": 0.7796,
      "step": 3091
    },
    {
      "epoch": 0.6871111111111111,
      "grad_norm": 1.1098551750183105,
      "learning_rate": 6.271714922048998e-05,
      "loss": 1.3902,
      "step": 3092
    },
    {
      "epoch": 0.6873333333333334,
      "grad_norm": 1.1206355094909668,
      "learning_rate": 6.267260579064588e-05,
      "loss": 1.4423,
      "step": 3093
    },
    {
      "epoch": 0.6875555555555556,
      "grad_norm": 1.0141700506210327,
      "learning_rate": 6.262806236080178e-05,
      "loss": 1.587,
      "step": 3094
    },
    {
      "epoch": 0.6877777777777778,
      "grad_norm": 1.2595239877700806,
      "learning_rate": 6.258351893095769e-05,
      "loss": 1.5666,
      "step": 3095
    },
    {
      "epoch": 0.688,
      "grad_norm": 0.9674675464630127,
      "learning_rate": 6.253897550111358e-05,
      "loss": 0.7225,
      "step": 3096
    },
    {
      "epoch": 0.6882222222222222,
      "grad_norm": 0.1801719069480896,
      "learning_rate": 6.24944320712695e-05,
      "loss": 0.0303,
      "step": 3097
    },
    {
      "epoch": 0.6884444444444444,
      "grad_norm": 1.2141374349594116,
      "learning_rate": 6.244988864142539e-05,
      "loss": 1.0987,
      "step": 3098
    },
    {
      "epoch": 0.6886666666666666,
      "grad_norm": 1.031459093093872,
      "learning_rate": 6.24053452115813e-05,
      "loss": 1.0589,
      "step": 3099
    },
    {
      "epoch": 0.6888888888888889,
      "grad_norm": 0.8709812760353088,
      "learning_rate": 6.23608017817372e-05,
      "loss": 0.5863,
      "step": 3100
    },
    {
      "epoch": 0.6891111111111111,
      "grad_norm": 0.607231616973877,
      "learning_rate": 6.231625835189309e-05,
      "loss": 1.0338,
      "step": 3101
    },
    {
      "epoch": 0.6893333333333334,
      "grad_norm": 0.5942530035972595,
      "learning_rate": 6.2271714922049e-05,
      "loss": 1.0758,
      "step": 3102
    },
    {
      "epoch": 0.6895555555555556,
      "grad_norm": 0.8875899910926819,
      "learning_rate": 6.22271714922049e-05,
      "loss": 2.1266,
      "step": 3103
    },
    {
      "epoch": 0.6897777777777778,
      "grad_norm": 0.8262476325035095,
      "learning_rate": 6.218262806236081e-05,
      "loss": 2.2635,
      "step": 3104
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.0524749718606472,
      "learning_rate": 6.21380846325167e-05,
      "loss": 0.0106,
      "step": 3105
    },
    {
      "epoch": 0.6902222222222222,
      "grad_norm": 0.5803321003913879,
      "learning_rate": 6.20935412026726e-05,
      "loss": 1.0884,
      "step": 3106
    },
    {
      "epoch": 0.6904444444444444,
      "grad_norm": 1.0942132472991943,
      "learning_rate": 6.204899777282851e-05,
      "loss": 2.2134,
      "step": 3107
    },
    {
      "epoch": 0.6906666666666667,
      "grad_norm": 0.07692880928516388,
      "learning_rate": 6.20044543429844e-05,
      "loss": 0.0116,
      "step": 3108
    },
    {
      "epoch": 0.6908888888888889,
      "grad_norm": 0.07232845574617386,
      "learning_rate": 6.195991091314032e-05,
      "loss": 0.0117,
      "step": 3109
    },
    {
      "epoch": 0.6911111111111111,
      "grad_norm": 0.8545564413070679,
      "learning_rate": 6.191536748329621e-05,
      "loss": 2.0441,
      "step": 3110
    },
    {
      "epoch": 0.6913333333333334,
      "grad_norm": 0.8483017086982727,
      "learning_rate": 6.187082405345212e-05,
      "loss": 1.9926,
      "step": 3111
    },
    {
      "epoch": 0.6915555555555556,
      "grad_norm": 0.8519989848136902,
      "learning_rate": 6.182628062360802e-05,
      "loss": 2.1871,
      "step": 3112
    },
    {
      "epoch": 0.6917777777777778,
      "grad_norm": 0.8962295055389404,
      "learning_rate": 6.178173719376392e-05,
      "loss": 1.9079,
      "step": 3113
    },
    {
      "epoch": 0.692,
      "grad_norm": 0.908099353313446,
      "learning_rate": 6.173719376391982e-05,
      "loss": 2.2056,
      "step": 3114
    },
    {
      "epoch": 0.6922222222222222,
      "grad_norm": 0.9471180438995361,
      "learning_rate": 6.169265033407573e-05,
      "loss": 1.6695,
      "step": 3115
    },
    {
      "epoch": 0.6924444444444444,
      "grad_norm": 0.9277594685554504,
      "learning_rate": 6.164810690423163e-05,
      "loss": 2.0879,
      "step": 3116
    },
    {
      "epoch": 0.6926666666666667,
      "grad_norm": 0.6673265695571899,
      "learning_rate": 6.160356347438752e-05,
      "loss": 0.9213,
      "step": 3117
    },
    {
      "epoch": 0.6928888888888889,
      "grad_norm": 0.12814414501190186,
      "learning_rate": 6.155902004454344e-05,
      "loss": 0.0211,
      "step": 3118
    },
    {
      "epoch": 0.6931111111111111,
      "grad_norm": 0.9539985656738281,
      "learning_rate": 6.151447661469933e-05,
      "loss": 1.9212,
      "step": 3119
    },
    {
      "epoch": 0.6933333333333334,
      "grad_norm": 0.927853524684906,
      "learning_rate": 6.146993318485523e-05,
      "loss": 1.6054,
      "step": 3120
    },
    {
      "epoch": 0.6935555555555556,
      "grad_norm": 0.6636569499969482,
      "learning_rate": 6.142538975501115e-05,
      "loss": 0.9331,
      "step": 3121
    },
    {
      "epoch": 0.6937777777777778,
      "grad_norm": 0.07317844778299332,
      "learning_rate": 6.138084632516704e-05,
      "loss": 0.0166,
      "step": 3122
    },
    {
      "epoch": 0.694,
      "grad_norm": 0.07253949344158173,
      "learning_rate": 6.133630289532294e-05,
      "loss": 0.0165,
      "step": 3123
    },
    {
      "epoch": 0.6942222222222222,
      "grad_norm": 0.07455820590257645,
      "learning_rate": 6.129175946547885e-05,
      "loss": 0.0168,
      "step": 3124
    },
    {
      "epoch": 0.6944444444444444,
      "grad_norm": 0.7180811762809753,
      "learning_rate": 6.124721603563475e-05,
      "loss": 1.3197,
      "step": 3125
    },
    {
      "epoch": 0.6946666666666667,
      "grad_norm": 1.0325121879577637,
      "learning_rate": 6.120267260579064e-05,
      "loss": 1.8146,
      "step": 3126
    },
    {
      "epoch": 0.6948888888888889,
      "grad_norm": 1.0472650527954102,
      "learning_rate": 6.115812917594655e-05,
      "loss": 1.8477,
      "step": 3127
    },
    {
      "epoch": 0.6951111111111111,
      "grad_norm": 1.3057109117507935,
      "learning_rate": 6.111358574610246e-05,
      "loss": 1.6522,
      "step": 3128
    },
    {
      "epoch": 0.6953333333333334,
      "grad_norm": 0.9642925262451172,
      "learning_rate": 6.106904231625835e-05,
      "loss": 1.9227,
      "step": 3129
    },
    {
      "epoch": 0.6955555555555556,
      "grad_norm": 0.9852336049079895,
      "learning_rate": 6.102449888641426e-05,
      "loss": 1.8925,
      "step": 3130
    },
    {
      "epoch": 0.6957777777777778,
      "grad_norm": 0.0911262258887291,
      "learning_rate": 6.097995545657016e-05,
      "loss": 0.0171,
      "step": 3131
    },
    {
      "epoch": 0.696,
      "grad_norm": 0.6741465330123901,
      "learning_rate": 6.093541202672606e-05,
      "loss": 0.8653,
      "step": 3132
    },
    {
      "epoch": 0.6962222222222222,
      "grad_norm": 0.9752011895179749,
      "learning_rate": 6.089086859688197e-05,
      "loss": 1.4045,
      "step": 3133
    },
    {
      "epoch": 0.6964444444444444,
      "grad_norm": 0.07129085063934326,
      "learning_rate": 6.084632516703787e-05,
      "loss": 0.0192,
      "step": 3134
    },
    {
      "epoch": 0.6966666666666667,
      "grad_norm": 0.0695280209183693,
      "learning_rate": 6.0801781737193766e-05,
      "loss": 0.0186,
      "step": 3135
    },
    {
      "epoch": 0.6968888888888889,
      "grad_norm": 0.07262587547302246,
      "learning_rate": 6.075723830734967e-05,
      "loss": 0.0183,
      "step": 3136
    },
    {
      "epoch": 0.6971111111111111,
      "grad_norm": 0.9831186532974243,
      "learning_rate": 6.071269487750557e-05,
      "loss": 1.4122,
      "step": 3137
    },
    {
      "epoch": 0.6973333333333334,
      "grad_norm": 0.9442914724349976,
      "learning_rate": 6.066815144766147e-05,
      "loss": 1.452,
      "step": 3138
    },
    {
      "epoch": 0.6975555555555556,
      "grad_norm": 1.1144623756408691,
      "learning_rate": 6.062360801781738e-05,
      "loss": 1.6665,
      "step": 3139
    },
    {
      "epoch": 0.6977777777777778,
      "grad_norm": 1.0614639520645142,
      "learning_rate": 6.057906458797328e-05,
      "loss": 1.5621,
      "step": 3140
    },
    {
      "epoch": 0.698,
      "grad_norm": 1.2415484189987183,
      "learning_rate": 6.053452115812918e-05,
      "loss": 1.5338,
      "step": 3141
    },
    {
      "epoch": 0.6982222222222222,
      "grad_norm": 1.146238923072815,
      "learning_rate": 6.048997772828508e-05,
      "loss": 1.8936,
      "step": 3142
    },
    {
      "epoch": 0.6984444444444444,
      "grad_norm": 1.1693158149719238,
      "learning_rate": 6.044543429844098e-05,
      "loss": 1.7386,
      "step": 3143
    },
    {
      "epoch": 0.6986666666666667,
      "grad_norm": 1.2340409755706787,
      "learning_rate": 6.040089086859688e-05,
      "loss": 1.4781,
      "step": 3144
    },
    {
      "epoch": 0.6988888888888889,
      "grad_norm": 1.0042845010757446,
      "learning_rate": 6.035634743875279e-05,
      "loss": 1.3662,
      "step": 3145
    },
    {
      "epoch": 0.6991111111111111,
      "grad_norm": 0.18454298377037048,
      "learning_rate": 6.031180400890869e-05,
      "loss": 0.0276,
      "step": 3146
    },
    {
      "epoch": 0.6993333333333334,
      "grad_norm": 1.1719262599945068,
      "learning_rate": 6.026726057906459e-05,
      "loss": 1.0601,
      "step": 3147
    },
    {
      "epoch": 0.6995555555555556,
      "grad_norm": 0.9232467412948608,
      "learning_rate": 6.0222717149220495e-05,
      "loss": 0.8415,
      "step": 3148
    },
    {
      "epoch": 0.6997777777777778,
      "grad_norm": 0.194104865193367,
      "learning_rate": 6.0178173719376394e-05,
      "loss": 0.0401,
      "step": 3149
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7421103119850159,
      "learning_rate": 6.013363028953229e-05,
      "loss": 0.6284,
      "step": 3150
    },
    {
      "epoch": 0.7002222222222222,
      "grad_norm": 0.7694705724716187,
      "learning_rate": 6.0089086859688204e-05,
      "loss": 1.1152,
      "step": 3151
    },
    {
      "epoch": 0.7004444444444444,
      "grad_norm": 0.703349232673645,
      "learning_rate": 6.00445434298441e-05,
      "loss": 1.1683,
      "step": 3152
    },
    {
      "epoch": 0.7006666666666667,
      "grad_norm": 0.05406121537089348,
      "learning_rate": 6e-05,
      "loss": 0.0105,
      "step": 3153
    },
    {
      "epoch": 0.7008888888888889,
      "grad_norm": 0.5842484831809998,
      "learning_rate": 5.995545657015591e-05,
      "loss": 1.15,
      "step": 3154
    },
    {
      "epoch": 0.7011111111111111,
      "grad_norm": 0.05234431475400925,
      "learning_rate": 5.9910913140311805e-05,
      "loss": 0.0105,
      "step": 3155
    },
    {
      "epoch": 0.7013333333333334,
      "grad_norm": 0.5893082618713379,
      "learning_rate": 5.9866369710467704e-05,
      "loss": 1.1913,
      "step": 3156
    },
    {
      "epoch": 0.7015555555555556,
      "grad_norm": 0.5218148231506348,
      "learning_rate": 5.9821826280623616e-05,
      "loss": 0.9835,
      "step": 3157
    },
    {
      "epoch": 0.7017777777777777,
      "grad_norm": 0.5484596490859985,
      "learning_rate": 5.9777282850779515e-05,
      "loss": 0.9247,
      "step": 3158
    },
    {
      "epoch": 0.702,
      "grad_norm": 0.6557696461677551,
      "learning_rate": 5.973273942093541e-05,
      "loss": 1.1391,
      "step": 3159
    },
    {
      "epoch": 0.7022222222222222,
      "grad_norm": 0.5898274779319763,
      "learning_rate": 5.9688195991091325e-05,
      "loss": 1.2284,
      "step": 3160
    },
    {
      "epoch": 0.7024444444444444,
      "grad_norm": 0.09231838583946228,
      "learning_rate": 5.9643652561247224e-05,
      "loss": 0.0125,
      "step": 3161
    },
    {
      "epoch": 0.7026666666666667,
      "grad_norm": 1.012488842010498,
      "learning_rate": 5.9599109131403116e-05,
      "loss": 2.0515,
      "step": 3162
    },
    {
      "epoch": 0.7028888888888889,
      "grad_norm": 0.9501926302909851,
      "learning_rate": 5.9554565701559014e-05,
      "loss": 2.3767,
      "step": 3163
    },
    {
      "epoch": 0.7031111111111111,
      "grad_norm": 0.9576533436775208,
      "learning_rate": 5.9510022271714927e-05,
      "loss": 2.3394,
      "step": 3164
    },
    {
      "epoch": 0.7033333333333334,
      "grad_norm": 0.944797694683075,
      "learning_rate": 5.9465478841870825e-05,
      "loss": 1.9659,
      "step": 3165
    },
    {
      "epoch": 0.7035555555555556,
      "grad_norm": 0.8810012340545654,
      "learning_rate": 5.9420935412026724e-05,
      "loss": 1.8656,
      "step": 3166
    },
    {
      "epoch": 0.7037777777777777,
      "grad_norm": 0.6439220309257507,
      "learning_rate": 5.9376391982182636e-05,
      "loss": 0.8335,
      "step": 3167
    },
    {
      "epoch": 0.704,
      "grad_norm": 0.9962994456291199,
      "learning_rate": 5.9331848552338534e-05,
      "loss": 2.0233,
      "step": 3168
    },
    {
      "epoch": 0.7042222222222222,
      "grad_norm": 0.9703332185745239,
      "learning_rate": 5.928730512249443e-05,
      "loss": 1.9519,
      "step": 3169
    },
    {
      "epoch": 0.7044444444444444,
      "grad_norm": 1.0500884056091309,
      "learning_rate": 5.924276169265034e-05,
      "loss": 1.578,
      "step": 3170
    },
    {
      "epoch": 0.7046666666666667,
      "grad_norm": 0.9718672037124634,
      "learning_rate": 5.919821826280624e-05,
      "loss": 2.0021,
      "step": 3171
    },
    {
      "epoch": 0.7048888888888889,
      "grad_norm": 0.07014777511358261,
      "learning_rate": 5.9153674832962136e-05,
      "loss": 0.0162,
      "step": 3172
    },
    {
      "epoch": 0.7051111111111111,
      "grad_norm": 0.07737057656049728,
      "learning_rate": 5.910913140311805e-05,
      "loss": 0.0156,
      "step": 3173
    },
    {
      "epoch": 0.7053333333333334,
      "grad_norm": 0.12630076706409454,
      "learning_rate": 5.9064587973273946e-05,
      "loss": 0.0213,
      "step": 3174
    },
    {
      "epoch": 0.7055555555555556,
      "grad_norm": 0.7619150876998901,
      "learning_rate": 5.9020044543429845e-05,
      "loss": 0.9373,
      "step": 3175
    },
    {
      "epoch": 0.7057777777777777,
      "grad_norm": 1.0807890892028809,
      "learning_rate": 5.897550111358575e-05,
      "loss": 2.0725,
      "step": 3176
    },
    {
      "epoch": 0.706,
      "grad_norm": 0.9409441351890564,
      "learning_rate": 5.893095768374165e-05,
      "loss": 1.6597,
      "step": 3177
    },
    {
      "epoch": 0.7062222222222222,
      "grad_norm": 1.096917986869812,
      "learning_rate": 5.888641425389755e-05,
      "loss": 1.9767,
      "step": 3178
    },
    {
      "epoch": 0.7064444444444444,
      "grad_norm": 1.091698408126831,
      "learning_rate": 5.884187082405346e-05,
      "loss": 1.7166,
      "step": 3179
    },
    {
      "epoch": 0.7066666666666667,
      "grad_norm": 1.0211970806121826,
      "learning_rate": 5.879732739420936e-05,
      "loss": 1.6798,
      "step": 3180
    },
    {
      "epoch": 0.7068888888888889,
      "grad_norm": 0.6886789202690125,
      "learning_rate": 5.875278396436526e-05,
      "loss": 1.0461,
      "step": 3181
    },
    {
      "epoch": 0.7071111111111111,
      "grad_norm": 0.05880124494433403,
      "learning_rate": 5.870824053452116e-05,
      "loss": 0.0178,
      "step": 3182
    },
    {
      "epoch": 0.7073333333333334,
      "grad_norm": 0.060819823294878006,
      "learning_rate": 5.866369710467706e-05,
      "loss": 0.0178,
      "step": 3183
    },
    {
      "epoch": 0.7075555555555556,
      "grad_norm": 0.724615752696991,
      "learning_rate": 5.861915367483296e-05,
      "loss": 1.0519,
      "step": 3184
    },
    {
      "epoch": 0.7077777777777777,
      "grad_norm": 0.8110787868499756,
      "learning_rate": 5.857461024498887e-05,
      "loss": 0.9285,
      "step": 3185
    },
    {
      "epoch": 0.708,
      "grad_norm": 0.06422421336174011,
      "learning_rate": 5.853006681514477e-05,
      "loss": 0.0176,
      "step": 3186
    },
    {
      "epoch": 0.7082222222222222,
      "grad_norm": 0.067098468542099,
      "learning_rate": 5.848552338530067e-05,
      "loss": 0.0175,
      "step": 3187
    },
    {
      "epoch": 0.7084444444444444,
      "grad_norm": 0.06487097591161728,
      "learning_rate": 5.8440979955456574e-05,
      "loss": 0.0172,
      "step": 3188
    },
    {
      "epoch": 0.7086666666666667,
      "grad_norm": 0.06535470485687256,
      "learning_rate": 5.839643652561247e-05,
      "loss": 0.0174,
      "step": 3189
    },
    {
      "epoch": 0.7088888888888889,
      "grad_norm": 1.172293782234192,
      "learning_rate": 5.835189309576837e-05,
      "loss": 1.5616,
      "step": 3190
    },
    {
      "epoch": 0.7091111111111111,
      "grad_norm": 1.1036264896392822,
      "learning_rate": 5.830734966592428e-05,
      "loss": 1.6201,
      "step": 3191
    },
    {
      "epoch": 0.7093333333333334,
      "grad_norm": 0.7746077179908752,
      "learning_rate": 5.826280623608018e-05,
      "loss": 0.8634,
      "step": 3192
    },
    {
      "epoch": 0.7095555555555556,
      "grad_norm": 0.9545249342918396,
      "learning_rate": 5.821826280623608e-05,
      "loss": 1.4337,
      "step": 3193
    },
    {
      "epoch": 0.7097777777777777,
      "grad_norm": 1.0250579118728638,
      "learning_rate": 5.8173719376391986e-05,
      "loss": 1.6208,
      "step": 3194
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0089478492736816,
      "learning_rate": 5.8129175946547884e-05,
      "loss": 1.2085,
      "step": 3195
    },
    {
      "epoch": 0.7102222222222222,
      "grad_norm": 1.1248306035995483,
      "learning_rate": 5.808463251670378e-05,
      "loss": 1.5717,
      "step": 3196
    },
    {
      "epoch": 0.7104444444444444,
      "grad_norm": 0.6627147793769836,
      "learning_rate": 5.8040089086859695e-05,
      "loss": 0.6573,
      "step": 3197
    },
    {
      "epoch": 0.7106666666666667,
      "grad_norm": 1.230597972869873,
      "learning_rate": 5.7995545657015594e-05,
      "loss": 1.2766,
      "step": 3198
    },
    {
      "epoch": 0.7108888888888889,
      "grad_norm": 0.1396600902080536,
      "learning_rate": 5.795100222717149e-05,
      "loss": 0.0322,
      "step": 3199
    },
    {
      "epoch": 0.7111111111111111,
      "grad_norm": 0.9573265314102173,
      "learning_rate": 5.79064587973274e-05,
      "loss": 0.507,
      "step": 3200
    },
    {
      "epoch": 0.7113333333333334,
      "grad_norm": 0.8519662618637085,
      "learning_rate": 5.7861915367483296e-05,
      "loss": 1.1341,
      "step": 3201
    },
    {
      "epoch": 0.7115555555555556,
      "grad_norm": 0.04184136167168617,
      "learning_rate": 5.7817371937639195e-05,
      "loss": 0.0093,
      "step": 3202
    },
    {
      "epoch": 0.7117777777777777,
      "grad_norm": 0.6974391341209412,
      "learning_rate": 5.777282850779511e-05,
      "loss": 1.3839,
      "step": 3203
    },
    {
      "epoch": 0.712,
      "grad_norm": 0.8318896293640137,
      "learning_rate": 5.7728285077951005e-05,
      "loss": 2.0999,
      "step": 3204
    },
    {
      "epoch": 0.7122222222222222,
      "grad_norm": 0.5589978694915771,
      "learning_rate": 5.7683741648106904e-05,
      "loss": 1.1487,
      "step": 3205
    },
    {
      "epoch": 0.7124444444444444,
      "grad_norm": 0.07977552711963654,
      "learning_rate": 5.7639198218262816e-05,
      "loss": 0.0123,
      "step": 3206
    },
    {
      "epoch": 0.7126666666666667,
      "grad_norm": 0.07479345053434372,
      "learning_rate": 5.7594654788418715e-05,
      "loss": 0.0123,
      "step": 3207
    },
    {
      "epoch": 0.7128888888888889,
      "grad_norm": 0.7520397305488586,
      "learning_rate": 5.7550111358574607e-05,
      "loss": 1.8669,
      "step": 3208
    },
    {
      "epoch": 0.7131111111111111,
      "grad_norm": 0.891527533531189,
      "learning_rate": 5.750556792873052e-05,
      "loss": 1.8476,
      "step": 3209
    },
    {
      "epoch": 0.7133333333333334,
      "grad_norm": 0.870412290096283,
      "learning_rate": 5.746102449888642e-05,
      "loss": 1.9461,
      "step": 3210
    },
    {
      "epoch": 0.7135555555555556,
      "grad_norm": 0.9231261610984802,
      "learning_rate": 5.7416481069042316e-05,
      "loss": 2.1436,
      "step": 3211
    },
    {
      "epoch": 0.7137777777777777,
      "grad_norm": 0.804538369178772,
      "learning_rate": 5.737193763919823e-05,
      "loss": 1.6058,
      "step": 3212
    },
    {
      "epoch": 0.714,
      "grad_norm": 0.9710292220115662,
      "learning_rate": 5.7327394209354127e-05,
      "loss": 1.0738,
      "step": 3213
    },
    {
      "epoch": 0.7142222222222222,
      "grad_norm": 0.9411685466766357,
      "learning_rate": 5.7282850779510025e-05,
      "loss": 2.0708,
      "step": 3214
    },
    {
      "epoch": 0.7144444444444444,
      "grad_norm": 0.9712237119674683,
      "learning_rate": 5.723830734966593e-05,
      "loss": 2.1416,
      "step": 3215
    },
    {
      "epoch": 0.7146666666666667,
      "grad_norm": 0.6982542872428894,
      "learning_rate": 5.719376391982183e-05,
      "loss": 0.7926,
      "step": 3216
    },
    {
      "epoch": 0.7148888888888889,
      "grad_norm": 0.7483058571815491,
      "learning_rate": 5.714922048997773e-05,
      "loss": 0.8696,
      "step": 3217
    },
    {
      "epoch": 0.7151111111111111,
      "grad_norm": 0.6382774114608765,
      "learning_rate": 5.710467706013364e-05,
      "loss": 0.8758,
      "step": 3218
    },
    {
      "epoch": 0.7153333333333334,
      "grad_norm": 0.09534616768360138,
      "learning_rate": 5.706013363028954e-05,
      "loss": 0.0179,
      "step": 3219
    },
    {
      "epoch": 0.7155555555555555,
      "grad_norm": 0.9931474328041077,
      "learning_rate": 5.701559020044544e-05,
      "loss": 1.7448,
      "step": 3220
    },
    {
      "epoch": 0.7157777777777777,
      "grad_norm": 1.051207184791565,
      "learning_rate": 5.697104677060134e-05,
      "loss": 1.8485,
      "step": 3221
    },
    {
      "epoch": 0.716,
      "grad_norm": 0.9426413178443909,
      "learning_rate": 5.692650334075724e-05,
      "loss": 1.6347,
      "step": 3222
    },
    {
      "epoch": 0.7162222222222222,
      "grad_norm": 0.919272243976593,
      "learning_rate": 5.688195991091314e-05,
      "loss": 1.7151,
      "step": 3223
    },
    {
      "epoch": 0.7164444444444444,
      "grad_norm": 0.9655510783195496,
      "learning_rate": 5.683741648106905e-05,
      "loss": 1.6615,
      "step": 3224
    },
    {
      "epoch": 0.7166666666666667,
      "grad_norm": 1.2728337049484253,
      "learning_rate": 5.679287305122495e-05,
      "loss": 1.8277,
      "step": 3225
    },
    {
      "epoch": 0.7168888888888889,
      "grad_norm": 0.7086578011512756,
      "learning_rate": 5.674832962138085e-05,
      "loss": 0.8322,
      "step": 3226
    },
    {
      "epoch": 0.7171111111111111,
      "grad_norm": 0.06795133650302887,
      "learning_rate": 5.6703786191536754e-05,
      "loss": 0.017,
      "step": 3227
    },
    {
      "epoch": 0.7173333333333334,
      "grad_norm": 0.06331969052553177,
      "learning_rate": 5.665924276169265e-05,
      "loss": 0.0171,
      "step": 3228
    },
    {
      "epoch": 0.7175555555555555,
      "grad_norm": 0.0663456916809082,
      "learning_rate": 5.661469933184855e-05,
      "loss": 0.0173,
      "step": 3229
    },
    {
      "epoch": 0.7177777777777777,
      "grad_norm": 0.8989565968513489,
      "learning_rate": 5.6570155902004463e-05,
      "loss": 1.6765,
      "step": 3230
    },
    {
      "epoch": 0.718,
      "grad_norm": 0.7637456059455872,
      "learning_rate": 5.652561247216036e-05,
      "loss": 0.7514,
      "step": 3231
    },
    {
      "epoch": 0.7182222222222222,
      "grad_norm": 0.08078856021165848,
      "learning_rate": 5.648106904231626e-05,
      "loss": 0.0163,
      "step": 3232
    },
    {
      "epoch": 0.7184444444444444,
      "grad_norm": 0.8078843951225281,
      "learning_rate": 5.643652561247216e-05,
      "loss": 0.8599,
      "step": 3233
    },
    {
      "epoch": 0.7186666666666667,
      "grad_norm": 1.0271605253219604,
      "learning_rate": 5.6391982182628065e-05,
      "loss": 1.415,
      "step": 3234
    },
    {
      "epoch": 0.7188888888888889,
      "grad_norm": 1.2213661670684814,
      "learning_rate": 5.634743875278396e-05,
      "loss": 1.8322,
      "step": 3235
    },
    {
      "epoch": 0.7191111111111111,
      "grad_norm": 1.0940077304840088,
      "learning_rate": 5.630289532293986e-05,
      "loss": 1.5287,
      "step": 3236
    },
    {
      "epoch": 0.7193333333333334,
      "grad_norm": 1.0005013942718506,
      "learning_rate": 5.6258351893095774e-05,
      "loss": 1.6254,
      "step": 3237
    },
    {
      "epoch": 0.7195555555555555,
      "grad_norm": 0.8303656578063965,
      "learning_rate": 5.621380846325167e-05,
      "loss": 1.027,
      "step": 3238
    },
    {
      "epoch": 0.7197777777777777,
      "grad_norm": 0.704897403717041,
      "learning_rate": 5.616926503340757e-05,
      "loss": 0.7988,
      "step": 3239
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0700993537902832,
      "learning_rate": 5.6124721603563476e-05,
      "loss": 1.7471,
      "step": 3240
    },
    {
      "epoch": 0.7202222222222222,
      "grad_norm": 1.1328794956207275,
      "learning_rate": 5.6080178173719375e-05,
      "loss": 1.2742,
      "step": 3241
    },
    {
      "epoch": 0.7204444444444444,
      "grad_norm": 0.9732044339179993,
      "learning_rate": 5.6035634743875274e-05,
      "loss": 1.3644,
      "step": 3242
    },
    {
      "epoch": 0.7206666666666667,
      "grad_norm": 1.174729347229004,
      "learning_rate": 5.5991091314031186e-05,
      "loss": 1.5483,
      "step": 3243
    },
    {
      "epoch": 0.7208888888888889,
      "grad_norm": 0.78294837474823,
      "learning_rate": 5.5946547884187084e-05,
      "loss": 0.6034,
      "step": 3244
    },
    {
      "epoch": 0.7211111111111111,
      "grad_norm": 0.8941324949264526,
      "learning_rate": 5.590200445434298e-05,
      "loss": 1.0701,
      "step": 3245
    },
    {
      "epoch": 0.7213333333333334,
      "grad_norm": 1.1886690855026245,
      "learning_rate": 5.5857461024498895e-05,
      "loss": 1.3666,
      "step": 3246
    },
    {
      "epoch": 0.7215555555555555,
      "grad_norm": 1.0057522058486938,
      "learning_rate": 5.581291759465479e-05,
      "loss": 1.1996,
      "step": 3247
    },
    {
      "epoch": 0.7217777777777777,
      "grad_norm": 0.839670717716217,
      "learning_rate": 5.5768374164810685e-05,
      "loss": 0.6609,
      "step": 3248
    },
    {
      "epoch": 0.722,
      "grad_norm": 1.1767035722732544,
      "learning_rate": 5.57238307349666e-05,
      "loss": 1.0941,
      "step": 3249
    },
    {
      "epoch": 0.7222222222222222,
      "grad_norm": 1.2154204845428467,
      "learning_rate": 5.5679287305122496e-05,
      "loss": 0.7415,
      "step": 3250
    },
    {
      "epoch": 0.7224444444444444,
      "grad_norm": 0.5861397385597229,
      "learning_rate": 5.5634743875278395e-05,
      "loss": 1.1437,
      "step": 3251
    },
    {
      "epoch": 0.7226666666666667,
      "grad_norm": 0.041759125888347626,
      "learning_rate": 5.559020044543431e-05,
      "loss": 0.0097,
      "step": 3252
    },
    {
      "epoch": 0.7228888888888889,
      "grad_norm": 0.7977886199951172,
      "learning_rate": 5.5545657015590205e-05,
      "loss": 2.1774,
      "step": 3253
    },
    {
      "epoch": 0.7231111111111111,
      "grad_norm": 0.571662425994873,
      "learning_rate": 5.5501113585746104e-05,
      "loss": 1.1858,
      "step": 3254
    },
    {
      "epoch": 0.7233333333333334,
      "grad_norm": 0.7104848027229309,
      "learning_rate": 5.545657015590201e-05,
      "loss": 1.0467,
      "step": 3255
    },
    {
      "epoch": 0.7235555555555555,
      "grad_norm": 0.8153942823410034,
      "learning_rate": 5.541202672605791e-05,
      "loss": 1.976,
      "step": 3256
    },
    {
      "epoch": 0.7237777777777777,
      "grad_norm": 0.08071549981832504,
      "learning_rate": 5.5367483296213806e-05,
      "loss": 0.0125,
      "step": 3257
    },
    {
      "epoch": 0.724,
      "grad_norm": 0.12843948602676392,
      "learning_rate": 5.532293986636972e-05,
      "loss": 0.0124,
      "step": 3258
    },
    {
      "epoch": 0.7242222222222222,
      "grad_norm": 0.07995433360338211,
      "learning_rate": 5.527839643652562e-05,
      "loss": 0.0122,
      "step": 3259
    },
    {
      "epoch": 0.7244444444444444,
      "grad_norm": 0.0713566243648529,
      "learning_rate": 5.5233853006681516e-05,
      "loss": 0.0119,
      "step": 3260
    },
    {
      "epoch": 0.7246666666666667,
      "grad_norm": 0.07306591421365738,
      "learning_rate": 5.518930957683742e-05,
      "loss": 0.0118,
      "step": 3261
    },
    {
      "epoch": 0.7248888888888889,
      "grad_norm": 0.8607704043388367,
      "learning_rate": 5.514476614699332e-05,
      "loss": 2.1437,
      "step": 3262
    },
    {
      "epoch": 0.7251111111111112,
      "grad_norm": 0.8772170543670654,
      "learning_rate": 5.510022271714922e-05,
      "loss": 1.9092,
      "step": 3263
    },
    {
      "epoch": 0.7253333333333334,
      "grad_norm": 0.9902425408363342,
      "learning_rate": 5.505567928730513e-05,
      "loss": 2.1999,
      "step": 3264
    },
    {
      "epoch": 0.7255555555555555,
      "grad_norm": 0.926304817199707,
      "learning_rate": 5.501113585746103e-05,
      "loss": 2.0622,
      "step": 3265
    },
    {
      "epoch": 0.7257777777777777,
      "grad_norm": 0.8717379570007324,
      "learning_rate": 5.496659242761693e-05,
      "loss": 1.6769,
      "step": 3266
    },
    {
      "epoch": 0.726,
      "grad_norm": 1.0354970693588257,
      "learning_rate": 5.492204899777283e-05,
      "loss": 1.9093,
      "step": 3267
    },
    {
      "epoch": 0.7262222222222222,
      "grad_norm": 0.9445512890815735,
      "learning_rate": 5.487750556792873e-05,
      "loss": 1.9806,
      "step": 3268
    },
    {
      "epoch": 0.7264444444444444,
      "grad_norm": 0.9720260500907898,
      "learning_rate": 5.483296213808463e-05,
      "loss": 1.793,
      "step": 3269
    },
    {
      "epoch": 0.7266666666666667,
      "grad_norm": 0.932304859161377,
      "learning_rate": 5.478841870824054e-05,
      "loss": 1.8463,
      "step": 3270
    },
    {
      "epoch": 0.7268888888888889,
      "grad_norm": 0.9925035238265991,
      "learning_rate": 5.474387527839644e-05,
      "loss": 1.9726,
      "step": 3271
    },
    {
      "epoch": 0.7271111111111112,
      "grad_norm": 0.5608296990394592,
      "learning_rate": 5.469933184855234e-05,
      "loss": 0.7764,
      "step": 3272
    },
    {
      "epoch": 0.7273333333333334,
      "grad_norm": 0.6601234674453735,
      "learning_rate": 5.4654788418708245e-05,
      "loss": 0.8271,
      "step": 3273
    },
    {
      "epoch": 0.7275555555555555,
      "grad_norm": 0.6779617071151733,
      "learning_rate": 5.461024498886414e-05,
      "loss": 0.9032,
      "step": 3274
    },
    {
      "epoch": 0.7277777777777777,
      "grad_norm": 0.9753432869911194,
      "learning_rate": 5.456570155902004e-05,
      "loss": 1.7793,
      "step": 3275
    },
    {
      "epoch": 0.728,
      "grad_norm": 0.9676978588104248,
      "learning_rate": 5.4521158129175954e-05,
      "loss": 1.6972,
      "step": 3276
    },
    {
      "epoch": 0.7282222222222222,
      "grad_norm": 1.093235969543457,
      "learning_rate": 5.447661469933185e-05,
      "loss": 2.0882,
      "step": 3277
    },
    {
      "epoch": 0.7284444444444444,
      "grad_norm": 1.0347819328308105,
      "learning_rate": 5.443207126948775e-05,
      "loss": 2.039,
      "step": 3278
    },
    {
      "epoch": 0.7286666666666667,
      "grad_norm": 0.071097731590271,
      "learning_rate": 5.4387527839643657e-05,
      "loss": 0.0174,
      "step": 3279
    },
    {
      "epoch": 0.7288888888888889,
      "grad_norm": 0.9010851383209229,
      "learning_rate": 5.4342984409799555e-05,
      "loss": 1.0428,
      "step": 3280
    },
    {
      "epoch": 0.7291111111111112,
      "grad_norm": 0.07293925434350967,
      "learning_rate": 5.4298440979955454e-05,
      "loss": 0.0173,
      "step": 3281
    },
    {
      "epoch": 0.7293333333333333,
      "grad_norm": 1.1432619094848633,
      "learning_rate": 5.4253897550111366e-05,
      "loss": 1.8629,
      "step": 3282
    },
    {
      "epoch": 0.7295555555555555,
      "grad_norm": 1.1886756420135498,
      "learning_rate": 5.4209354120267264e-05,
      "loss": 1.6837,
      "step": 3283
    },
    {
      "epoch": 0.7297777777777777,
      "grad_norm": 1.0832699537277222,
      "learning_rate": 5.416481069042316e-05,
      "loss": 1.536,
      "step": 3284
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6643537878990173,
      "learning_rate": 5.412026726057907e-05,
      "loss": 0.7855,
      "step": 3285
    },
    {
      "epoch": 0.7302222222222222,
      "grad_norm": 1.0094225406646729,
      "learning_rate": 5.407572383073497e-05,
      "loss": 1.4139,
      "step": 3286
    },
    {
      "epoch": 0.7304444444444445,
      "grad_norm": 1.14029860496521,
      "learning_rate": 5.4031180400890866e-05,
      "loss": 1.5854,
      "step": 3287
    },
    {
      "epoch": 0.7306666666666667,
      "grad_norm": 0.9698799848556519,
      "learning_rate": 5.398663697104678e-05,
      "loss": 1.4863,
      "step": 3288
    },
    {
      "epoch": 0.7308888888888889,
      "grad_norm": 1.1054226160049438,
      "learning_rate": 5.3942093541202676e-05,
      "loss": 1.3324,
      "step": 3289
    },
    {
      "epoch": 0.7311111111111112,
      "grad_norm": 1.1010569334030151,
      "learning_rate": 5.3897550111358575e-05,
      "loss": 1.4656,
      "step": 3290
    },
    {
      "epoch": 0.7313333333333333,
      "grad_norm": 1.315499186515808,
      "learning_rate": 5.385300668151449e-05,
      "loss": 1.4048,
      "step": 3291
    },
    {
      "epoch": 0.7315555555555555,
      "grad_norm": 1.108127474784851,
      "learning_rate": 5.3808463251670386e-05,
      "loss": 1.327,
      "step": 3292
    },
    {
      "epoch": 0.7317777777777777,
      "grad_norm": 0.6661926507949829,
      "learning_rate": 5.376391982182628e-05,
      "loss": 0.6108,
      "step": 3293
    },
    {
      "epoch": 0.732,
      "grad_norm": 0.9805776476860046,
      "learning_rate": 5.371937639198219e-05,
      "loss": 1.1752,
      "step": 3294
    },
    {
      "epoch": 0.7322222222222222,
      "grad_norm": 1.0693986415863037,
      "learning_rate": 5.367483296213809e-05,
      "loss": 1.3078,
      "step": 3295
    },
    {
      "epoch": 0.7324444444444445,
      "grad_norm": 1.078148603439331,
      "learning_rate": 5.363028953229399e-05,
      "loss": 1.2446,
      "step": 3296
    },
    {
      "epoch": 0.7326666666666667,
      "grad_norm": 1.1625440120697021,
      "learning_rate": 5.35857461024499e-05,
      "loss": 1.2387,
      "step": 3297
    },
    {
      "epoch": 0.7328888888888889,
      "grad_norm": 1.1278488636016846,
      "learning_rate": 5.35412026726058e-05,
      "loss": 1.1962,
      "step": 3298
    },
    {
      "epoch": 0.7331111111111112,
      "grad_norm": 1.182511806488037,
      "learning_rate": 5.3496659242761696e-05,
      "loss": 1.1573,
      "step": 3299
    },
    {
      "epoch": 0.7333333333333333,
      "grad_norm": 1.1381057500839233,
      "learning_rate": 5.34521158129176e-05,
      "loss": 0.7817,
      "step": 3300
    },
    {
      "epoch": 0.7335555555555555,
      "grad_norm": 0.5531929135322571,
      "learning_rate": 5.34075723830735e-05,
      "loss": 0.8331,
      "step": 3301
    },
    {
      "epoch": 0.7337777777777778,
      "grad_norm": 0.8333101868629456,
      "learning_rate": 5.33630289532294e-05,
      "loss": 1.9768,
      "step": 3302
    },
    {
      "epoch": 0.734,
      "grad_norm": 0.6918635964393616,
      "learning_rate": 5.331848552338531e-05,
      "loss": 1.0828,
      "step": 3303
    },
    {
      "epoch": 0.7342222222222222,
      "grad_norm": 0.9859722256660461,
      "learning_rate": 5.327394209354121e-05,
      "loss": 2.2754,
      "step": 3304
    },
    {
      "epoch": 0.7344444444444445,
      "grad_norm": 0.6960622072219849,
      "learning_rate": 5.322939866369711e-05,
      "loss": 1.0663,
      "step": 3305
    },
    {
      "epoch": 0.7346666666666667,
      "grad_norm": 1.1575109958648682,
      "learning_rate": 5.3184855233853006e-05,
      "loss": 2.2622,
      "step": 3306
    },
    {
      "epoch": 0.7348888888888889,
      "grad_norm": 0.5985379219055176,
      "learning_rate": 5.314031180400891e-05,
      "loss": 1.0319,
      "step": 3307
    },
    {
      "epoch": 0.7351111111111112,
      "grad_norm": 0.06290951371192932,
      "learning_rate": 5.309576837416481e-05,
      "loss": 0.0109,
      "step": 3308
    },
    {
      "epoch": 0.7353333333333333,
      "grad_norm": 0.06811843812465668,
      "learning_rate": 5.305122494432071e-05,
      "loss": 0.0109,
      "step": 3309
    },
    {
      "epoch": 0.7355555555555555,
      "grad_norm": 0.06429023295640945,
      "learning_rate": 5.300668151447662e-05,
      "loss": 0.0107,
      "step": 3310
    },
    {
      "epoch": 0.7357777777777778,
      "grad_norm": 0.06323552876710892,
      "learning_rate": 5.296213808463252e-05,
      "loss": 0.0107,
      "step": 3311
    },
    {
      "epoch": 0.736,
      "grad_norm": 0.6487092971801758,
      "learning_rate": 5.291759465478842e-05,
      "loss": 0.9286,
      "step": 3312
    },
    {
      "epoch": 0.7362222222222222,
      "grad_norm": 0.8638578653335571,
      "learning_rate": 5.2873051224944324e-05,
      "loss": 1.8427,
      "step": 3313
    },
    {
      "epoch": 0.7364444444444445,
      "grad_norm": 0.9095218181610107,
      "learning_rate": 5.282850779510022e-05,
      "loss": 2.0546,
      "step": 3314
    },
    {
      "epoch": 0.7366666666666667,
      "grad_norm": 0.87845379114151,
      "learning_rate": 5.278396436525612e-05,
      "loss": 1.9648,
      "step": 3315
    },
    {
      "epoch": 0.7368888888888889,
      "grad_norm": 0.8854038119316101,
      "learning_rate": 5.273942093541203e-05,
      "loss": 1.8114,
      "step": 3316
    },
    {
      "epoch": 0.7371111111111112,
      "grad_norm": 0.5725350379943848,
      "learning_rate": 5.269487750556793e-05,
      "loss": 1.0721,
      "step": 3317
    },
    {
      "epoch": 0.7373333333333333,
      "grad_norm": 0.6683716177940369,
      "learning_rate": 5.265033407572383e-05,
      "loss": 0.9192,
      "step": 3318
    },
    {
      "epoch": 0.7375555555555555,
      "grad_norm": 0.9927780628204346,
      "learning_rate": 5.2605790645879735e-05,
      "loss": 1.8748,
      "step": 3319
    },
    {
      "epoch": 0.7377777777777778,
      "grad_norm": 0.8612250685691833,
      "learning_rate": 5.2561247216035634e-05,
      "loss": 1.8307,
      "step": 3320
    },
    {
      "epoch": 0.738,
      "grad_norm": 0.9024035930633545,
      "learning_rate": 5.251670378619153e-05,
      "loss": 1.8448,
      "step": 3321
    },
    {
      "epoch": 0.7382222222222222,
      "grad_norm": 0.969914436340332,
      "learning_rate": 5.2472160356347445e-05,
      "loss": 1.866,
      "step": 3322
    },
    {
      "epoch": 0.7384444444444445,
      "grad_norm": 0.6315984129905701,
      "learning_rate": 5.242761692650334e-05,
      "loss": 0.9124,
      "step": 3323
    },
    {
      "epoch": 0.7386666666666667,
      "grad_norm": 0.07167524099349976,
      "learning_rate": 5.238307349665924e-05,
      "loss": 0.0158,
      "step": 3324
    },
    {
      "epoch": 0.7388888888888889,
      "grad_norm": 0.07736406475305557,
      "learning_rate": 5.233853006681515e-05,
      "loss": 0.0161,
      "step": 3325
    },
    {
      "epoch": 0.7391111111111112,
      "grad_norm": 0.07857107371091843,
      "learning_rate": 5.2293986636971046e-05,
      "loss": 0.0164,
      "step": 3326
    },
    {
      "epoch": 0.7393333333333333,
      "grad_norm": 0.0633215382695198,
      "learning_rate": 5.2249443207126944e-05,
      "loss": 0.0173,
      "step": 3327
    },
    {
      "epoch": 0.7395555555555555,
      "grad_norm": 0.7630808353424072,
      "learning_rate": 5.2204899777282857e-05,
      "loss": 0.9757,
      "step": 3328
    },
    {
      "epoch": 0.7397777777777778,
      "grad_norm": 0.8969722986221313,
      "learning_rate": 5.2160356347438755e-05,
      "loss": 1.6171,
      "step": 3329
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9955383539199829,
      "learning_rate": 5.2115812917594654e-05,
      "loss": 1.6627,
      "step": 3330
    },
    {
      "epoch": 0.7402222222222222,
      "grad_norm": 1.0531073808670044,
      "learning_rate": 5.2071269487750566e-05,
      "loss": 1.7925,
      "step": 3331
    },
    {
      "epoch": 0.7404444444444445,
      "grad_norm": 1.1096101999282837,
      "learning_rate": 5.202672605790646e-05,
      "loss": 1.4716,
      "step": 3332
    },
    {
      "epoch": 0.7406666666666667,
      "grad_norm": 0.06471211463212967,
      "learning_rate": 5.1982182628062356e-05,
      "loss": 0.0184,
      "step": 3333
    },
    {
      "epoch": 0.7408888888888889,
      "grad_norm": 0.07156452536582947,
      "learning_rate": 5.193763919821827e-05,
      "loss": 0.0183,
      "step": 3334
    },
    {
      "epoch": 0.7411111111111112,
      "grad_norm": 0.7111669182777405,
      "learning_rate": 5.189309576837417e-05,
      "loss": 0.8435,
      "step": 3335
    },
    {
      "epoch": 0.7413333333333333,
      "grad_norm": 0.784017026424408,
      "learning_rate": 5.1848552338530066e-05,
      "loss": 0.9037,
      "step": 3336
    },
    {
      "epoch": 0.7415555555555555,
      "grad_norm": 0.07464414834976196,
      "learning_rate": 5.180400890868598e-05,
      "loss": 0.0173,
      "step": 3337
    },
    {
      "epoch": 0.7417777777777778,
      "grad_norm": 0.7238468527793884,
      "learning_rate": 5.1759465478841876e-05,
      "loss": 0.8807,
      "step": 3338
    },
    {
      "epoch": 0.742,
      "grad_norm": 0.07420375943183899,
      "learning_rate": 5.1714922048997775e-05,
      "loss": 0.0192,
      "step": 3339
    },
    {
      "epoch": 0.7422222222222222,
      "grad_norm": 0.07133994251489639,
      "learning_rate": 5.167037861915368e-05,
      "loss": 0.0189,
      "step": 3340
    },
    {
      "epoch": 0.7424444444444445,
      "grad_norm": 0.0961189940571785,
      "learning_rate": 5.162583518930958e-05,
      "loss": 0.0194,
      "step": 3341
    },
    {
      "epoch": 0.7426666666666667,
      "grad_norm": 1.0209311246871948,
      "learning_rate": 5.158129175946548e-05,
      "loss": 1.7523,
      "step": 3342
    },
    {
      "epoch": 0.7428888888888889,
      "grad_norm": 1.067814588546753,
      "learning_rate": 5.153674832962139e-05,
      "loss": 1.7394,
      "step": 3343
    },
    {
      "epoch": 0.7431111111111111,
      "grad_norm": 0.10426237434148788,
      "learning_rate": 5.149220489977729e-05,
      "loss": 0.0266,
      "step": 3344
    },
    {
      "epoch": 0.7433333333333333,
      "grad_norm": 1.1256235837936401,
      "learning_rate": 5.144766146993319e-05,
      "loss": 1.5493,
      "step": 3345
    },
    {
      "epoch": 0.7435555555555555,
      "grad_norm": 1.0838463306427002,
      "learning_rate": 5.140311804008909e-05,
      "loss": 1.508,
      "step": 3346
    },
    {
      "epoch": 0.7437777777777778,
      "grad_norm": 1.0034325122833252,
      "learning_rate": 5.135857461024499e-05,
      "loss": 1.3716,
      "step": 3347
    },
    {
      "epoch": 0.744,
      "grad_norm": 1.1057904958724976,
      "learning_rate": 5.131403118040089e-05,
      "loss": 0.9587,
      "step": 3348
    },
    {
      "epoch": 0.7442222222222222,
      "grad_norm": 0.19667142629623413,
      "learning_rate": 5.12694877505568e-05,
      "loss": 0.0377,
      "step": 3349
    },
    {
      "epoch": 0.7444444444444445,
      "grad_norm": 1.0404895544052124,
      "learning_rate": 5.12249443207127e-05,
      "loss": 1.0799,
      "step": 3350
    },
    {
      "epoch": 0.7446666666666667,
      "grad_norm": 0.8521629571914673,
      "learning_rate": 5.11804008908686e-05,
      "loss": 2.0826,
      "step": 3351
    },
    {
      "epoch": 0.7448888888888889,
      "grad_norm": 0.046493686735630035,
      "learning_rate": 5.1135857461024504e-05,
      "loss": 0.0101,
      "step": 3352
    },
    {
      "epoch": 0.7451111111111111,
      "grad_norm": 0.04533799737691879,
      "learning_rate": 5.10913140311804e-05,
      "loss": 0.0102,
      "step": 3353
    },
    {
      "epoch": 0.7453333333333333,
      "grad_norm": 0.6256393194198608,
      "learning_rate": 5.10467706013363e-05,
      "loss": 1.2161,
      "step": 3354
    },
    {
      "epoch": 0.7455555555555555,
      "grad_norm": 0.5878841280937195,
      "learning_rate": 5.100222717149221e-05,
      "loss": 1.1603,
      "step": 3355
    },
    {
      "epoch": 0.7457777777777778,
      "grad_norm": 0.04651748016476631,
      "learning_rate": 5.095768374164811e-05,
      "loss": 0.01,
      "step": 3356
    },
    {
      "epoch": 0.746,
      "grad_norm": 0.03794243186712265,
      "learning_rate": 5.091314031180401e-05,
      "loss": 0.01,
      "step": 3357
    },
    {
      "epoch": 0.7462222222222222,
      "grad_norm": 0.04922659322619438,
      "learning_rate": 5.0868596881959916e-05,
      "loss": 0.0097,
      "step": 3358
    },
    {
      "epoch": 0.7464444444444445,
      "grad_norm": 0.8625622391700745,
      "learning_rate": 5.0824053452115814e-05,
      "loss": 1.8859,
      "step": 3359
    },
    {
      "epoch": 0.7466666666666667,
      "grad_norm": 0.8704177141189575,
      "learning_rate": 5.077951002227171e-05,
      "loss": 1.9087,
      "step": 3360
    },
    {
      "epoch": 0.7468888888888889,
      "grad_norm": 0.9514003992080688,
      "learning_rate": 5.0734966592427625e-05,
      "loss": 2.1152,
      "step": 3361
    },
    {
      "epoch": 0.7471111111111111,
      "grad_norm": 0.9952490925788879,
      "learning_rate": 5.0690423162583524e-05,
      "loss": 2.237,
      "step": 3362
    },
    {
      "epoch": 0.7473333333333333,
      "grad_norm": 1.0425519943237305,
      "learning_rate": 5.064587973273942e-05,
      "loss": 2.1412,
      "step": 3363
    },
    {
      "epoch": 0.7475555555555555,
      "grad_norm": 0.7753322124481201,
      "learning_rate": 5.060133630289533e-05,
      "loss": 1.7639,
      "step": 3364
    },
    {
      "epoch": 0.7477777777777778,
      "grad_norm": 0.9439111351966858,
      "learning_rate": 5.0556792873051226e-05,
      "loss": 1.7622,
      "step": 3365
    },
    {
      "epoch": 0.748,
      "grad_norm": 0.9274625778198242,
      "learning_rate": 5.0512249443207125e-05,
      "loss": 2.1017,
      "step": 3366
    },
    {
      "epoch": 0.7482222222222222,
      "grad_norm": 0.9550508856773376,
      "learning_rate": 5.046770601336304e-05,
      "loss": 1.8416,
      "step": 3367
    },
    {
      "epoch": 0.7484444444444445,
      "grad_norm": 0.8628423810005188,
      "learning_rate": 5.0423162583518935e-05,
      "loss": 1.9227,
      "step": 3368
    },
    {
      "epoch": 0.7486666666666667,
      "grad_norm": 1.0649088621139526,
      "learning_rate": 5.0378619153674834e-05,
      "loss": 2.1865,
      "step": 3369
    },
    {
      "epoch": 0.7488888888888889,
      "grad_norm": 0.9452845454216003,
      "learning_rate": 5.033407572383074e-05,
      "loss": 1.9341,
      "step": 3370
    },
    {
      "epoch": 0.7491111111111111,
      "grad_norm": 0.9852356910705566,
      "learning_rate": 5.028953229398664e-05,
      "loss": 1.6767,
      "step": 3371
    },
    {
      "epoch": 0.7493333333333333,
      "grad_norm": 0.9458546042442322,
      "learning_rate": 5.0244988864142536e-05,
      "loss": 1.786,
      "step": 3372
    },
    {
      "epoch": 0.7495555555555555,
      "grad_norm": 0.07178652286529541,
      "learning_rate": 5.020044543429845e-05,
      "loss": 0.015,
      "step": 3373
    },
    {
      "epoch": 0.7497777777777778,
      "grad_norm": 0.07055787742137909,
      "learning_rate": 5.015590200445435e-05,
      "loss": 0.0153,
      "step": 3374
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6104269027709961,
      "learning_rate": 5.0111358574610246e-05,
      "loss": 0.8618,
      "step": 3375
    },
    {
      "epoch": 0.7502222222222222,
      "grad_norm": 0.6599386930465698,
      "learning_rate": 5.006681514476616e-05,
      "loss": 0.8642,
      "step": 3376
    },
    {
      "epoch": 0.7504444444444445,
      "grad_norm": 0.6750035881996155,
      "learning_rate": 5.0022271714922056e-05,
      "loss": 0.8647,
      "step": 3377
    },
    {
      "epoch": 0.7506666666666667,
      "grad_norm": 0.9692963361740112,
      "learning_rate": 4.997772828507795e-05,
      "loss": 1.8036,
      "step": 3378
    },
    {
      "epoch": 0.7508888888888889,
      "grad_norm": 1.0836691856384277,
      "learning_rate": 4.9933184855233854e-05,
      "loss": 2.039,
      "step": 3379
    },
    {
      "epoch": 0.7511111111111111,
      "grad_norm": 0.06479348987340927,
      "learning_rate": 4.988864142538976e-05,
      "loss": 0.0173,
      "step": 3380
    },
    {
      "epoch": 0.7513333333333333,
      "grad_norm": 0.06957981735467911,
      "learning_rate": 4.984409799554566e-05,
      "loss": 0.0166,
      "step": 3381
    },
    {
      "epoch": 0.7515555555555555,
      "grad_norm": 0.666901707649231,
      "learning_rate": 4.979955456570156e-05,
      "loss": 0.7578,
      "step": 3382
    },
    {
      "epoch": 0.7517777777777778,
      "grad_norm": 1.0305155515670776,
      "learning_rate": 4.975501113585747e-05,
      "loss": 1.6703,
      "step": 3383
    },
    {
      "epoch": 0.752,
      "grad_norm": 0.9969210624694824,
      "learning_rate": 4.971046770601337e-05,
      "loss": 1.7831,
      "step": 3384
    },
    {
      "epoch": 0.7522222222222222,
      "grad_norm": 0.068308025598526,
      "learning_rate": 4.9665924276169265e-05,
      "loss": 0.018,
      "step": 3385
    },
    {
      "epoch": 0.7524444444444445,
      "grad_norm": 0.06835668534040451,
      "learning_rate": 4.962138084632517e-05,
      "loss": 0.0171,
      "step": 3386
    },
    {
      "epoch": 0.7526666666666667,
      "grad_norm": 0.562114417552948,
      "learning_rate": 4.957683741648107e-05,
      "loss": 0.8015,
      "step": 3387
    },
    {
      "epoch": 0.7528888888888889,
      "grad_norm": 0.9326373338699341,
      "learning_rate": 4.9532293986636975e-05,
      "loss": 1.7364,
      "step": 3388
    },
    {
      "epoch": 0.7531111111111111,
      "grad_norm": 1.0560567378997803,
      "learning_rate": 4.948775055679288e-05,
      "loss": 1.3854,
      "step": 3389
    },
    {
      "epoch": 0.7533333333333333,
      "grad_norm": 1.0617526769638062,
      "learning_rate": 4.944320712694878e-05,
      "loss": 1.3826,
      "step": 3390
    },
    {
      "epoch": 0.7535555555555555,
      "grad_norm": 0.6773163080215454,
      "learning_rate": 4.939866369710468e-05,
      "loss": 0.9724,
      "step": 3391
    },
    {
      "epoch": 0.7537777777777778,
      "grad_norm": 0.8919631838798523,
      "learning_rate": 4.935412026726058e-05,
      "loss": 1.4029,
      "step": 3392
    },
    {
      "epoch": 0.754,
      "grad_norm": 1.0007896423339844,
      "learning_rate": 4.930957683741648e-05,
      "loss": 1.3675,
      "step": 3393
    },
    {
      "epoch": 0.7542222222222222,
      "grad_norm": 1.1181669235229492,
      "learning_rate": 4.9265033407572387e-05,
      "loss": 1.5695,
      "step": 3394
    },
    {
      "epoch": 0.7544444444444445,
      "grad_norm": 1.058223843574524,
      "learning_rate": 4.922048997772829e-05,
      "loss": 1.444,
      "step": 3395
    },
    {
      "epoch": 0.7546666666666667,
      "grad_norm": 1.0917662382125854,
      "learning_rate": 4.917594654788419e-05,
      "loss": 1.5776,
      "step": 3396
    },
    {
      "epoch": 0.7548888888888889,
      "grad_norm": 1.2129132747650146,
      "learning_rate": 4.913140311804009e-05,
      "loss": 1.5378,
      "step": 3397
    },
    {
      "epoch": 0.7551111111111111,
      "grad_norm": 0.7757513523101807,
      "learning_rate": 4.908685968819599e-05,
      "loss": 0.7143,
      "step": 3398
    },
    {
      "epoch": 0.7553333333333333,
      "grad_norm": 1.0675660371780396,
      "learning_rate": 4.904231625835189e-05,
      "loss": 1.2625,
      "step": 3399
    },
    {
      "epoch": 0.7555555555555555,
      "grad_norm": 0.7911191582679749,
      "learning_rate": 4.89977728285078e-05,
      "loss": 0.6726,
      "step": 3400
    },
    {
      "epoch": 0.7557777777777778,
      "grad_norm": 0.936028003692627,
      "learning_rate": 4.89532293986637e-05,
      "loss": 2.5741,
      "step": 3401
    },
    {
      "epoch": 0.756,
      "grad_norm": 0.04625101387500763,
      "learning_rate": 4.89086859688196e-05,
      "loss": 0.0098,
      "step": 3402
    },
    {
      "epoch": 0.7562222222222222,
      "grad_norm": 0.5739651918411255,
      "learning_rate": 4.886414253897551e-05,
      "loss": 1.0021,
      "step": 3403
    },
    {
      "epoch": 0.7564444444444445,
      "grad_norm": 0.874405562877655,
      "learning_rate": 4.8819599109131406e-05,
      "loss": 2.1036,
      "step": 3404
    },
    {
      "epoch": 0.7566666666666667,
      "grad_norm": 0.5654922723770142,
      "learning_rate": 4.8775055679287305e-05,
      "loss": 0.9892,
      "step": 3405
    },
    {
      "epoch": 0.7568888888888889,
      "grad_norm": 0.6591737866401672,
      "learning_rate": 4.873051224944321e-05,
      "loss": 0.9575,
      "step": 3406
    },
    {
      "epoch": 0.7571111111111111,
      "grad_norm": 0.05461383983492851,
      "learning_rate": 4.868596881959911e-05,
      "loss": 0.011,
      "step": 3407
    },
    {
      "epoch": 0.7573333333333333,
      "grad_norm": 0.0622735358774662,
      "learning_rate": 4.8641425389755014e-05,
      "loss": 0.0112,
      "step": 3408
    },
    {
      "epoch": 0.7575555555555555,
      "grad_norm": 0.059408292174339294,
      "learning_rate": 4.859688195991092e-05,
      "loss": 0.011,
      "step": 3409
    },
    {
      "epoch": 0.7577777777777778,
      "grad_norm": 0.6495372653007507,
      "learning_rate": 4.855233853006682e-05,
      "loss": 0.8378,
      "step": 3410
    },
    {
      "epoch": 0.758,
      "grad_norm": 0.9061746001243591,
      "learning_rate": 4.850779510022272e-05,
      "loss": 2.2088,
      "step": 3411
    },
    {
      "epoch": 0.7582222222222222,
      "grad_norm": 0.8633875846862793,
      "learning_rate": 4.846325167037862e-05,
      "loss": 1.9511,
      "step": 3412
    },
    {
      "epoch": 0.7584444444444445,
      "grad_norm": 1.055767297744751,
      "learning_rate": 4.841870824053452e-05,
      "loss": 1.9755,
      "step": 3413
    },
    {
      "epoch": 0.7586666666666667,
      "grad_norm": 0.8679887056350708,
      "learning_rate": 4.8374164810690426e-05,
      "loss": 1.8676,
      "step": 3414
    },
    {
      "epoch": 0.7588888888888888,
      "grad_norm": 0.9158828258514404,
      "learning_rate": 4.832962138084633e-05,
      "loss": 2.0772,
      "step": 3415
    },
    {
      "epoch": 0.7591111111111111,
      "grad_norm": 0.6672974228858948,
      "learning_rate": 4.828507795100223e-05,
      "loss": 1.1813,
      "step": 3416
    },
    {
      "epoch": 0.7593333333333333,
      "grad_norm": 0.9546223282814026,
      "learning_rate": 4.824053452115813e-05,
      "loss": 1.9467,
      "step": 3417
    },
    {
      "epoch": 0.7595555555555555,
      "grad_norm": 1.0391935110092163,
      "learning_rate": 4.8195991091314034e-05,
      "loss": 2.041,
      "step": 3418
    },
    {
      "epoch": 0.7597777777777778,
      "grad_norm": 1.0147621631622314,
      "learning_rate": 4.815144766146993e-05,
      "loss": 2.0473,
      "step": 3419
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6334058046340942,
      "learning_rate": 4.810690423162584e-05,
      "loss": 0.8882,
      "step": 3420
    },
    {
      "epoch": 0.7602222222222222,
      "grad_norm": 0.06809257715940475,
      "learning_rate": 4.806236080178174e-05,
      "loss": 0.0153,
      "step": 3421
    },
    {
      "epoch": 0.7604444444444445,
      "grad_norm": 0.06833475828170776,
      "learning_rate": 4.801781737193764e-05,
      "loss": 0.0156,
      "step": 3422
    },
    {
      "epoch": 0.7606666666666667,
      "grad_norm": 0.09722508490085602,
      "learning_rate": 4.797327394209355e-05,
      "loss": 0.0179,
      "step": 3423
    },
    {
      "epoch": 0.7608888888888888,
      "grad_norm": 0.92330402135849,
      "learning_rate": 4.7928730512249446e-05,
      "loss": 1.7349,
      "step": 3424
    },
    {
      "epoch": 0.7611111111111111,
      "grad_norm": 1.0066584348678589,
      "learning_rate": 4.7884187082405344e-05,
      "loss": 1.6615,
      "step": 3425
    },
    {
      "epoch": 0.7613333333333333,
      "grad_norm": 0.9122890830039978,
      "learning_rate": 4.783964365256125e-05,
      "loss": 1.9283,
      "step": 3426
    },
    {
      "epoch": 0.7615555555555555,
      "grad_norm": 1.0834369659423828,
      "learning_rate": 4.7795100222717155e-05,
      "loss": 1.8457,
      "step": 3427
    },
    {
      "epoch": 0.7617777777777778,
      "grad_norm": 0.9122326970100403,
      "learning_rate": 4.7750556792873054e-05,
      "loss": 1.5779,
      "step": 3428
    },
    {
      "epoch": 0.762,
      "grad_norm": 0.6459372639656067,
      "learning_rate": 4.770601336302896e-05,
      "loss": 0.8526,
      "step": 3429
    },
    {
      "epoch": 0.7622222222222222,
      "grad_norm": 0.06661590933799744,
      "learning_rate": 4.766146993318486e-05,
      "loss": 0.018,
      "step": 3430
    },
    {
      "epoch": 0.7624444444444445,
      "grad_norm": 0.06595264375209808,
      "learning_rate": 4.7616926503340756e-05,
      "loss": 0.0176,
      "step": 3431
    },
    {
      "epoch": 0.7626666666666667,
      "grad_norm": 0.06258884072303772,
      "learning_rate": 4.757238307349666e-05,
      "loss": 0.0175,
      "step": 3432
    },
    {
      "epoch": 0.7628888888888888,
      "grad_norm": 0.9908372163772583,
      "learning_rate": 4.752783964365256e-05,
      "loss": 1.5601,
      "step": 3433
    },
    {
      "epoch": 0.7631111111111111,
      "grad_norm": 1.1008018255233765,
      "learning_rate": 4.7483296213808465e-05,
      "loss": 1.9175,
      "step": 3434
    },
    {
      "epoch": 0.7633333333333333,
      "grad_norm": 0.06766713410615921,
      "learning_rate": 4.743875278396437e-05,
      "loss": 0.0185,
      "step": 3435
    },
    {
      "epoch": 0.7635555555555555,
      "grad_norm": 0.06862013787031174,
      "learning_rate": 4.739420935412027e-05,
      "loss": 0.018,
      "step": 3436
    },
    {
      "epoch": 0.7637777777777778,
      "grad_norm": 0.995215654373169,
      "learning_rate": 4.734966592427617e-05,
      "loss": 1.6609,
      "step": 3437
    },
    {
      "epoch": 0.764,
      "grad_norm": 1.1150976419448853,
      "learning_rate": 4.730512249443207e-05,
      "loss": 1.5773,
      "step": 3438
    },
    {
      "epoch": 0.7642222222222222,
      "grad_norm": 0.10110121965408325,
      "learning_rate": 4.726057906458797e-05,
      "loss": 0.0254,
      "step": 3439
    },
    {
      "epoch": 0.7644444444444445,
      "grad_norm": 0.8509777188301086,
      "learning_rate": 4.721603563474388e-05,
      "loss": 0.8449,
      "step": 3440
    },
    {
      "epoch": 0.7646666666666667,
      "grad_norm": 1.163260579109192,
      "learning_rate": 4.717149220489978e-05,
      "loss": 1.6988,
      "step": 3441
    },
    {
      "epoch": 0.7648888888888888,
      "grad_norm": 1.1963449716567993,
      "learning_rate": 4.712694877505568e-05,
      "loss": 1.6756,
      "step": 3442
    },
    {
      "epoch": 0.7651111111111111,
      "grad_norm": 1.1867884397506714,
      "learning_rate": 4.7082405345211587e-05,
      "loss": 1.6131,
      "step": 3443
    },
    {
      "epoch": 0.7653333333333333,
      "grad_norm": 1.0478819608688354,
      "learning_rate": 4.7037861915367485e-05,
      "loss": 1.4666,
      "step": 3444
    },
    {
      "epoch": 0.7655555555555555,
      "grad_norm": 1.076615571975708,
      "learning_rate": 4.6993318485523384e-05,
      "loss": 1.3148,
      "step": 3445
    },
    {
      "epoch": 0.7657777777777778,
      "grad_norm": 0.7551054954528809,
      "learning_rate": 4.694877505567929e-05,
      "loss": 0.7423,
      "step": 3446
    },
    {
      "epoch": 0.766,
      "grad_norm": 0.7709291577339172,
      "learning_rate": 4.6904231625835194e-05,
      "loss": 0.749,
      "step": 3447
    },
    {
      "epoch": 0.7662222222222222,
      "grad_norm": 0.9779494404792786,
      "learning_rate": 4.685968819599109e-05,
      "loss": 1.1534,
      "step": 3448
    },
    {
      "epoch": 0.7664444444444445,
      "grad_norm": 0.7176189422607422,
      "learning_rate": 4.6815144766147e-05,
      "loss": 0.5791,
      "step": 3449
    },
    {
      "epoch": 0.7666666666666667,
      "grad_norm": 2.1151397228240967,
      "learning_rate": 4.67706013363029e-05,
      "loss": 1.2452,
      "step": 3450
    },
    {
      "epoch": 0.7668888888888888,
      "grad_norm": 0.04698283597826958,
      "learning_rate": 4.6726057906458796e-05,
      "loss": 0.0105,
      "step": 3451
    },
    {
      "epoch": 0.7671111111111111,
      "grad_norm": 0.806088387966156,
      "learning_rate": 4.66815144766147e-05,
      "loss": 2.1148,
      "step": 3452
    },
    {
      "epoch": 0.7673333333333333,
      "grad_norm": 0.9896338582038879,
      "learning_rate": 4.6636971046770606e-05,
      "loss": 2.4891,
      "step": 3453
    },
    {
      "epoch": 0.7675555555555555,
      "grad_norm": 0.6359859704971313,
      "learning_rate": 4.6592427616926505e-05,
      "loss": 0.8817,
      "step": 3454
    },
    {
      "epoch": 0.7677777777777778,
      "grad_norm": 0.6366167068481445,
      "learning_rate": 4.654788418708241e-05,
      "loss": 1.1016,
      "step": 3455
    },
    {
      "epoch": 0.768,
      "grad_norm": 0.6625463366508484,
      "learning_rate": 4.650334075723831e-05,
      "loss": 1.2164,
      "step": 3456
    },
    {
      "epoch": 0.7682222222222223,
      "grad_norm": 0.6121510863304138,
      "learning_rate": 4.645879732739421e-05,
      "loss": 0.8625,
      "step": 3457
    },
    {
      "epoch": 0.7684444444444445,
      "grad_norm": 1.0105525255203247,
      "learning_rate": 4.641425389755011e-05,
      "loss": 2.7758,
      "step": 3458
    },
    {
      "epoch": 0.7686666666666667,
      "grad_norm": 0.8283724188804626,
      "learning_rate": 4.636971046770602e-05,
      "loss": 2.0447,
      "step": 3459
    },
    {
      "epoch": 0.7688888888888888,
      "grad_norm": 0.06813201308250427,
      "learning_rate": 4.632516703786192e-05,
      "loss": 0.0111,
      "step": 3460
    },
    {
      "epoch": 0.7691111111111111,
      "grad_norm": 0.0665576308965683,
      "learning_rate": 4.628062360801782e-05,
      "loss": 0.011,
      "step": 3461
    },
    {
      "epoch": 0.7693333333333333,
      "grad_norm": 0.06463496387004852,
      "learning_rate": 4.623608017817373e-05,
      "loss": 0.0111,
      "step": 3462
    },
    {
      "epoch": 0.7695555555555555,
      "grad_norm": 0.8166987895965576,
      "learning_rate": 4.619153674832962e-05,
      "loss": 2.0366,
      "step": 3463
    },
    {
      "epoch": 0.7697777777777778,
      "grad_norm": 0.9549795985221863,
      "learning_rate": 4.6146993318485525e-05,
      "loss": 1.8478,
      "step": 3464
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8335583209991455,
      "learning_rate": 4.610244988864143e-05,
      "loss": 1.8226,
      "step": 3465
    },
    {
      "epoch": 0.7702222222222223,
      "grad_norm": 0.9823237657546997,
      "learning_rate": 4.605790645879733e-05,
      "loss": 2.114,
      "step": 3466
    },
    {
      "epoch": 0.7704444444444445,
      "grad_norm": 0.9316264986991882,
      "learning_rate": 4.6013363028953234e-05,
      "loss": 2.0765,
      "step": 3467
    },
    {
      "epoch": 0.7706666666666667,
      "grad_norm": 0.8862332701683044,
      "learning_rate": 4.596881959910914e-05,
      "loss": 1.8899,
      "step": 3468
    },
    {
      "epoch": 0.7708888888888888,
      "grad_norm": 0.9615729451179504,
      "learning_rate": 4.592427616926504e-05,
      "loss": 1.9076,
      "step": 3469
    },
    {
      "epoch": 0.7711111111111111,
      "grad_norm": 0.107745461165905,
      "learning_rate": 4.5879732739420936e-05,
      "loss": 0.0184,
      "step": 3470
    },
    {
      "epoch": 0.7713333333333333,
      "grad_norm": 0.9588910341262817,
      "learning_rate": 4.5835189309576835e-05,
      "loss": 1.7689,
      "step": 3471
    },
    {
      "epoch": 0.7715555555555556,
      "grad_norm": 0.8808805346488953,
      "learning_rate": 4.579064587973274e-05,
      "loss": 2.204,
      "step": 3472
    },
    {
      "epoch": 0.7717777777777778,
      "grad_norm": 1.1614326238632202,
      "learning_rate": 4.5746102449888646e-05,
      "loss": 1.9956,
      "step": 3473
    },
    {
      "epoch": 0.772,
      "grad_norm": 0.8884471654891968,
      "learning_rate": 4.5701559020044544e-05,
      "loss": 1.875,
      "step": 3474
    },
    {
      "epoch": 0.7722222222222223,
      "grad_norm": 0.9541723728179932,
      "learning_rate": 4.565701559020045e-05,
      "loss": 1.8806,
      "step": 3475
    },
    {
      "epoch": 0.7724444444444445,
      "grad_norm": 0.07305742055177689,
      "learning_rate": 4.561247216035635e-05,
      "loss": 0.0169,
      "step": 3476
    },
    {
      "epoch": 0.7726666666666666,
      "grad_norm": 0.6821660399436951,
      "learning_rate": 4.556792873051225e-05,
      "loss": 1.079,
      "step": 3477
    },
    {
      "epoch": 0.7728888888888888,
      "grad_norm": 0.9830121994018555,
      "learning_rate": 4.552338530066815e-05,
      "loss": 1.7285,
      "step": 3478
    },
    {
      "epoch": 0.7731111111111111,
      "grad_norm": 0.6831437945365906,
      "learning_rate": 4.547884187082406e-05,
      "loss": 0.8884,
      "step": 3479
    },
    {
      "epoch": 0.7733333333333333,
      "grad_norm": 1.0051524639129639,
      "learning_rate": 4.5434298440979956e-05,
      "loss": 1.8973,
      "step": 3480
    },
    {
      "epoch": 0.7735555555555556,
      "grad_norm": 0.9727129340171814,
      "learning_rate": 4.538975501113586e-05,
      "loss": 2.042,
      "step": 3481
    },
    {
      "epoch": 0.7737777777777778,
      "grad_norm": 1.0296839475631714,
      "learning_rate": 4.534521158129176e-05,
      "loss": 1.7367,
      "step": 3482
    },
    {
      "epoch": 0.774,
      "grad_norm": 0.9972522258758545,
      "learning_rate": 4.530066815144766e-05,
      "loss": 1.8867,
      "step": 3483
    },
    {
      "epoch": 0.7742222222222223,
      "grad_norm": 1.0227113962173462,
      "learning_rate": 4.5256124721603564e-05,
      "loss": 1.8279,
      "step": 3484
    },
    {
      "epoch": 0.7744444444444445,
      "grad_norm": 1.061448335647583,
      "learning_rate": 4.521158129175947e-05,
      "loss": 1.9245,
      "step": 3485
    },
    {
      "epoch": 0.7746666666666666,
      "grad_norm": 0.6845740675926208,
      "learning_rate": 4.516703786191537e-05,
      "loss": 0.9532,
      "step": 3486
    },
    {
      "epoch": 0.7748888888888888,
      "grad_norm": 1.010504961013794,
      "learning_rate": 4.512249443207127e-05,
      "loss": 1.6469,
      "step": 3487
    },
    {
      "epoch": 0.7751111111111111,
      "grad_norm": 1.15483820438385,
      "learning_rate": 4.507795100222718e-05,
      "loss": 1.735,
      "step": 3488
    },
    {
      "epoch": 0.7753333333333333,
      "grad_norm": 0.06943599879741669,
      "learning_rate": 4.503340757238308e-05,
      "loss": 0.0192,
      "step": 3489
    },
    {
      "epoch": 0.7755555555555556,
      "grad_norm": 0.06758453696966171,
      "learning_rate": 4.4988864142538976e-05,
      "loss": 0.0185,
      "step": 3490
    },
    {
      "epoch": 0.7757777777777778,
      "grad_norm": 0.06915237754583359,
      "learning_rate": 4.494432071269488e-05,
      "loss": 0.0189,
      "step": 3491
    },
    {
      "epoch": 0.776,
      "grad_norm": 0.7292212843894958,
      "learning_rate": 4.489977728285078e-05,
      "loss": 0.8598,
      "step": 3492
    },
    {
      "epoch": 0.7762222222222223,
      "grad_norm": 0.9773833751678467,
      "learning_rate": 4.4855233853006685e-05,
      "loss": 1.5498,
      "step": 3493
    },
    {
      "epoch": 0.7764444444444445,
      "grad_norm": 1.0763559341430664,
      "learning_rate": 4.481069042316259e-05,
      "loss": 1.6527,
      "step": 3494
    },
    {
      "epoch": 0.7766666666666666,
      "grad_norm": 1.0425339937210083,
      "learning_rate": 4.476614699331849e-05,
      "loss": 1.272,
      "step": 3495
    },
    {
      "epoch": 0.7768888888888889,
      "grad_norm": 1.1225720643997192,
      "learning_rate": 4.472160356347439e-05,
      "loss": 1.3202,
      "step": 3496
    },
    {
      "epoch": 0.7771111111111111,
      "grad_norm": 1.2557756900787354,
      "learning_rate": 4.467706013363029e-05,
      "loss": 1.378,
      "step": 3497
    },
    {
      "epoch": 0.7773333333333333,
      "grad_norm": 0.17433112859725952,
      "learning_rate": 4.463251670378619e-05,
      "loss": 0.038,
      "step": 3498
    },
    {
      "epoch": 0.7775555555555556,
      "grad_norm": 1.008841633796692,
      "learning_rate": 4.45879732739421e-05,
      "loss": 0.8367,
      "step": 3499
    },
    {
      "epoch": 0.7777777777777778,
      "grad_norm": 0.9712222814559937,
      "learning_rate": 4.4543429844098e-05,
      "loss": 0.6577,
      "step": 3500
    },
    {
      "epoch": 0.778,
      "grad_norm": 0.6440428495407104,
      "learning_rate": 4.44988864142539e-05,
      "loss": 0.9956,
      "step": 3501
    },
    {
      "epoch": 0.7782222222222223,
      "grad_norm": 0.5025835633277893,
      "learning_rate": 4.44543429844098e-05,
      "loss": 0.9893,
      "step": 3502
    },
    {
      "epoch": 0.7784444444444445,
      "grad_norm": 0.04510605335235596,
      "learning_rate": 4.4409799554565705e-05,
      "loss": 0.0106,
      "step": 3503
    },
    {
      "epoch": 0.7786666666666666,
      "grad_norm": 0.9423682689666748,
      "learning_rate": 4.43652561247216e-05,
      "loss": 2.3701,
      "step": 3504
    },
    {
      "epoch": 0.7788888888888889,
      "grad_norm": 0.9391410946846008,
      "learning_rate": 4.432071269487751e-05,
      "loss": 2.172,
      "step": 3505
    },
    {
      "epoch": 0.7791111111111111,
      "grad_norm": 0.9086732864379883,
      "learning_rate": 4.427616926503341e-05,
      "loss": 2.217,
      "step": 3506
    },
    {
      "epoch": 0.7793333333333333,
      "grad_norm": 0.9702697396278381,
      "learning_rate": 4.423162583518931e-05,
      "loss": 2.1024,
      "step": 3507
    },
    {
      "epoch": 0.7795555555555556,
      "grad_norm": 0.9364957809448242,
      "learning_rate": 4.418708240534522e-05,
      "loss": 1.9179,
      "step": 3508
    },
    {
      "epoch": 0.7797777777777778,
      "grad_norm": 0.9006823301315308,
      "learning_rate": 4.414253897550111e-05,
      "loss": 1.9431,
      "step": 3509
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8712829947471619,
      "learning_rate": 4.4097995545657015e-05,
      "loss": 1.7075,
      "step": 3510
    },
    {
      "epoch": 0.7802222222222223,
      "grad_norm": 0.8921668529510498,
      "learning_rate": 4.405345211581292e-05,
      "loss": 2.0187,
      "step": 3511
    },
    {
      "epoch": 0.7804444444444445,
      "grad_norm": 1.2319942712783813,
      "learning_rate": 4.400890868596882e-05,
      "loss": 2.2116,
      "step": 3512
    },
    {
      "epoch": 0.7806666666666666,
      "grad_norm": 1.0687848329544067,
      "learning_rate": 4.3964365256124724e-05,
      "loss": 1.827,
      "step": 3513
    },
    {
      "epoch": 0.7808888888888889,
      "grad_norm": 0.06872207671403885,
      "learning_rate": 4.391982182628063e-05,
      "loss": 0.016,
      "step": 3514
    },
    {
      "epoch": 0.7811111111111111,
      "grad_norm": 0.06946699321269989,
      "learning_rate": 4.387527839643653e-05,
      "loss": 0.016,
      "step": 3515
    },
    {
      "epoch": 0.7813333333333333,
      "grad_norm": 0.06873323768377304,
      "learning_rate": 4.383073496659243e-05,
      "loss": 0.0157,
      "step": 3516
    },
    {
      "epoch": 0.7815555555555556,
      "grad_norm": 0.7765884399414062,
      "learning_rate": 4.378619153674833e-05,
      "loss": 0.974,
      "step": 3517
    },
    {
      "epoch": 0.7817777777777778,
      "grad_norm": 0.7765089869499207,
      "learning_rate": 4.374164810690423e-05,
      "loss": 0.9251,
      "step": 3518
    },
    {
      "epoch": 0.782,
      "grad_norm": 0.6271977424621582,
      "learning_rate": 4.3697104677060136e-05,
      "loss": 0.8018,
      "step": 3519
    },
    {
      "epoch": 0.7822222222222223,
      "grad_norm": 0.726948618888855,
      "learning_rate": 4.365256124721604e-05,
      "loss": 0.8888,
      "step": 3520
    },
    {
      "epoch": 0.7824444444444445,
      "grad_norm": 0.9243329167366028,
      "learning_rate": 4.360801781737194e-05,
      "loss": 1.6066,
      "step": 3521
    },
    {
      "epoch": 0.7826666666666666,
      "grad_norm": 1.2513469457626343,
      "learning_rate": 4.356347438752784e-05,
      "loss": 1.9953,
      "step": 3522
    },
    {
      "epoch": 0.7828888888888889,
      "grad_norm": 0.9244915246963501,
      "learning_rate": 4.3518930957683744e-05,
      "loss": 1.8274,
      "step": 3523
    },
    {
      "epoch": 0.7831111111111111,
      "grad_norm": 1.0428435802459717,
      "learning_rate": 4.347438752783964e-05,
      "loss": 1.6283,
      "step": 3524
    },
    {
      "epoch": 0.7833333333333333,
      "grad_norm": 0.8906724452972412,
      "learning_rate": 4.342984409799555e-05,
      "loss": 1.6417,
      "step": 3525
    },
    {
      "epoch": 0.7835555555555556,
      "grad_norm": 1.084955096244812,
      "learning_rate": 4.3385300668151454e-05,
      "loss": 1.6931,
      "step": 3526
    },
    {
      "epoch": 0.7837777777777778,
      "grad_norm": 0.945055365562439,
      "learning_rate": 4.334075723830735e-05,
      "loss": 1.0096,
      "step": 3527
    },
    {
      "epoch": 0.784,
      "grad_norm": 0.0929998904466629,
      "learning_rate": 4.329621380846325e-05,
      "loss": 0.0173,
      "step": 3528
    },
    {
      "epoch": 0.7842222222222223,
      "grad_norm": 1.0231083631515503,
      "learning_rate": 4.3251670378619156e-05,
      "loss": 1.729,
      "step": 3529
    },
    {
      "epoch": 0.7844444444444445,
      "grad_norm": 1.0118030309677124,
      "learning_rate": 4.3207126948775055e-05,
      "loss": 1.6239,
      "step": 3530
    },
    {
      "epoch": 0.7846666666666666,
      "grad_norm": 0.712536096572876,
      "learning_rate": 4.316258351893096e-05,
      "loss": 0.7916,
      "step": 3531
    },
    {
      "epoch": 0.7848888888888889,
      "grad_norm": 0.0708150640130043,
      "learning_rate": 4.3118040089086865e-05,
      "loss": 0.0182,
      "step": 3532
    },
    {
      "epoch": 0.7851111111111111,
      "grad_norm": 0.08458317071199417,
      "learning_rate": 4.3073496659242764e-05,
      "loss": 0.0185,
      "step": 3533
    },
    {
      "epoch": 0.7853333333333333,
      "grad_norm": 1.0665457248687744,
      "learning_rate": 4.302895322939867e-05,
      "loss": 1.6411,
      "step": 3534
    },
    {
      "epoch": 0.7855555555555556,
      "grad_norm": 1.1385186910629272,
      "learning_rate": 4.298440979955457e-05,
      "loss": 1.3018,
      "step": 3535
    },
    {
      "epoch": 0.7857777777777778,
      "grad_norm": 1.0706120729446411,
      "learning_rate": 4.2939866369710466e-05,
      "loss": 1.5641,
      "step": 3536
    },
    {
      "epoch": 0.786,
      "grad_norm": 1.1300500631332397,
      "learning_rate": 4.289532293986637e-05,
      "loss": 1.8248,
      "step": 3537
    },
    {
      "epoch": 0.7862222222222223,
      "grad_norm": 1.3996295928955078,
      "learning_rate": 4.285077951002228e-05,
      "loss": 1.5244,
      "step": 3538
    },
    {
      "epoch": 0.7864444444444444,
      "grad_norm": 1.079788088798523,
      "learning_rate": 4.2806236080178176e-05,
      "loss": 1.6515,
      "step": 3539
    },
    {
      "epoch": 0.7866666666666666,
      "grad_norm": 1.3016208410263062,
      "learning_rate": 4.276169265033408e-05,
      "loss": 1.5737,
      "step": 3540
    },
    {
      "epoch": 0.7868888888888889,
      "grad_norm": 1.1378430128097534,
      "learning_rate": 4.271714922048998e-05,
      "loss": 1.4334,
      "step": 3541
    },
    {
      "epoch": 0.7871111111111111,
      "grad_norm": 1.0300368070602417,
      "learning_rate": 4.267260579064588e-05,
      "loss": 1.4333,
      "step": 3542
    },
    {
      "epoch": 0.7873333333333333,
      "grad_norm": 1.0914349555969238,
      "learning_rate": 4.2628062360801784e-05,
      "loss": 1.3632,
      "step": 3543
    },
    {
      "epoch": 0.7875555555555556,
      "grad_norm": 1.0840858221054077,
      "learning_rate": 4.258351893095768e-05,
      "loss": 1.471,
      "step": 3544
    },
    {
      "epoch": 0.7877777777777778,
      "grad_norm": 0.19576247036457062,
      "learning_rate": 4.253897550111359e-05,
      "loss": 0.0362,
      "step": 3545
    },
    {
      "epoch": 0.788,
      "grad_norm": 0.19378961622714996,
      "learning_rate": 4.249443207126949e-05,
      "loss": 0.035,
      "step": 3546
    },
    {
      "epoch": 0.7882222222222223,
      "grad_norm": 0.13323235511779785,
      "learning_rate": 4.244988864142539e-05,
      "loss": 0.0349,
      "step": 3547
    },
    {
      "epoch": 0.7884444444444444,
      "grad_norm": 0.1556854248046875,
      "learning_rate": 4.240534521158129e-05,
      "loss": 0.0353,
      "step": 3548
    },
    {
      "epoch": 0.7886666666666666,
      "grad_norm": 1.26119065284729,
      "learning_rate": 4.2360801781737195e-05,
      "loss": 1.1486,
      "step": 3549
    },
    {
      "epoch": 0.7888888888888889,
      "grad_norm": 0.8363838791847229,
      "learning_rate": 4.2316258351893094e-05,
      "loss": 0.5973,
      "step": 3550
    },
    {
      "epoch": 0.7891111111111111,
      "grad_norm": 0.6089571118354797,
      "learning_rate": 4.2271714922049e-05,
      "loss": 1.1064,
      "step": 3551
    },
    {
      "epoch": 0.7893333333333333,
      "grad_norm": 0.04548042267560959,
      "learning_rate": 4.2227171492204905e-05,
      "loss": 0.0104,
      "step": 3552
    },
    {
      "epoch": 0.7895555555555556,
      "grad_norm": 0.6471196413040161,
      "learning_rate": 4.21826280623608e-05,
      "loss": 1.1893,
      "step": 3553
    },
    {
      "epoch": 0.7897777777777778,
      "grad_norm": 0.046915166079998016,
      "learning_rate": 4.213808463251671e-05,
      "loss": 0.0107,
      "step": 3554
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.5992786884307861,
      "learning_rate": 4.209354120267261e-05,
      "loss": 0.9432,
      "step": 3555
    },
    {
      "epoch": 0.7902222222222223,
      "grad_norm": 0.08186815679073334,
      "learning_rate": 4.2048997772828506e-05,
      "loss": 0.0118,
      "step": 3556
    },
    {
      "epoch": 0.7904444444444444,
      "grad_norm": 0.9210121035575867,
      "learning_rate": 4.200445434298441e-05,
      "loss": 2.1219,
      "step": 3557
    },
    {
      "epoch": 0.7906666666666666,
      "grad_norm": 1.006956696510315,
      "learning_rate": 4.1959910913140317e-05,
      "loss": 1.8555,
      "step": 3558
    },
    {
      "epoch": 0.7908888888888889,
      "grad_norm": 0.9316953420639038,
      "learning_rate": 4.1915367483296215e-05,
      "loss": 2.0922,
      "step": 3559
    },
    {
      "epoch": 0.7911111111111111,
      "grad_norm": 0.9348610639572144,
      "learning_rate": 4.187082405345212e-05,
      "loss": 2.2086,
      "step": 3560
    },
    {
      "epoch": 0.7913333333333333,
      "grad_norm": 0.884831964969635,
      "learning_rate": 4.182628062360802e-05,
      "loss": 1.712,
      "step": 3561
    },
    {
      "epoch": 0.7915555555555556,
      "grad_norm": 0.9388924837112427,
      "learning_rate": 4.178173719376392e-05,
      "loss": 1.9738,
      "step": 3562
    },
    {
      "epoch": 0.7917777777777778,
      "grad_norm": 0.8886390924453735,
      "learning_rate": 4.173719376391982e-05,
      "loss": 2.0557,
      "step": 3563
    },
    {
      "epoch": 0.792,
      "grad_norm": 0.9279087781906128,
      "learning_rate": 4.169265033407573e-05,
      "loss": 1.8811,
      "step": 3564
    },
    {
      "epoch": 0.7922222222222223,
      "grad_norm": 0.8624377250671387,
      "learning_rate": 4.164810690423163e-05,
      "loss": 1.7352,
      "step": 3565
    },
    {
      "epoch": 0.7924444444444444,
      "grad_norm": 0.9816845059394836,
      "learning_rate": 4.160356347438753e-05,
      "loss": 2.084,
      "step": 3566
    },
    {
      "epoch": 0.7926666666666666,
      "grad_norm": 1.113109827041626,
      "learning_rate": 4.155902004454343e-05,
      "loss": 1.8477,
      "step": 3567
    },
    {
      "epoch": 0.7928888888888889,
      "grad_norm": 0.9390388131141663,
      "learning_rate": 4.151447661469933e-05,
      "loss": 2.0081,
      "step": 3568
    },
    {
      "epoch": 0.7931111111111111,
      "grad_norm": 0.06967326253652573,
      "learning_rate": 4.1469933184855235e-05,
      "loss": 0.0158,
      "step": 3569
    },
    {
      "epoch": 0.7933333333333333,
      "grad_norm": 0.7333217859268188,
      "learning_rate": 4.142538975501114e-05,
      "loss": 0.959,
      "step": 3570
    },
    {
      "epoch": 0.7935555555555556,
      "grad_norm": 0.9319103360176086,
      "learning_rate": 4.138084632516704e-05,
      "loss": 1.7261,
      "step": 3571
    },
    {
      "epoch": 0.7937777777777778,
      "grad_norm": 1.187261700630188,
      "learning_rate": 4.1336302895322944e-05,
      "loss": 2.1856,
      "step": 3572
    },
    {
      "epoch": 0.794,
      "grad_norm": 0.9051704406738281,
      "learning_rate": 4.129175946547885e-05,
      "loss": 1.5193,
      "step": 3573
    },
    {
      "epoch": 0.7942222222222223,
      "grad_norm": 1.0516455173492432,
      "learning_rate": 4.124721603563475e-05,
      "loss": 1.8372,
      "step": 3574
    },
    {
      "epoch": 0.7944444444444444,
      "grad_norm": 0.9026862978935242,
      "learning_rate": 4.120267260579065e-05,
      "loss": 1.7081,
      "step": 3575
    },
    {
      "epoch": 0.7946666666666666,
      "grad_norm": 1.0022499561309814,
      "learning_rate": 4.115812917594655e-05,
      "loss": 1.8029,
      "step": 3576
    },
    {
      "epoch": 0.7948888888888889,
      "grad_norm": 1.0183892250061035,
      "learning_rate": 4.111358574610245e-05,
      "loss": 1.8359,
      "step": 3577
    },
    {
      "epoch": 0.7951111111111111,
      "grad_norm": 0.06487128883600235,
      "learning_rate": 4.1069042316258356e-05,
      "loss": 0.018,
      "step": 3578
    },
    {
      "epoch": 0.7953333333333333,
      "grad_norm": 0.07973368465900421,
      "learning_rate": 4.1024498886414255e-05,
      "loss": 0.0177,
      "step": 3579
    },
    {
      "epoch": 0.7955555555555556,
      "grad_norm": 0.6818245053291321,
      "learning_rate": 4.097995545657016e-05,
      "loss": 0.7042,
      "step": 3580
    },
    {
      "epoch": 0.7957777777777778,
      "grad_norm": 0.7157871127128601,
      "learning_rate": 4.093541202672606e-05,
      "loss": 0.9386,
      "step": 3581
    },
    {
      "epoch": 0.796,
      "grad_norm": 0.0813339352607727,
      "learning_rate": 4.089086859688196e-05,
      "loss": 0.0178,
      "step": 3582
    },
    {
      "epoch": 0.7962222222222223,
      "grad_norm": 0.07369329035282135,
      "learning_rate": 4.084632516703786e-05,
      "loss": 0.0175,
      "step": 3583
    },
    {
      "epoch": 0.7964444444444444,
      "grad_norm": 0.07479511946439743,
      "learning_rate": 4.080178173719377e-05,
      "loss": 0.0181,
      "step": 3584
    },
    {
      "epoch": 0.7966666666666666,
      "grad_norm": 0.07840964198112488,
      "learning_rate": 4.0757238307349666e-05,
      "loss": 0.0169,
      "step": 3585
    },
    {
      "epoch": 0.7968888888888889,
      "grad_norm": 1.3525023460388184,
      "learning_rate": 4.071269487750557e-05,
      "loss": 1.9031,
      "step": 3586
    },
    {
      "epoch": 0.7971111111111111,
      "grad_norm": 1.6226398944854736,
      "learning_rate": 4.066815144766147e-05,
      "loss": 1.763,
      "step": 3587
    },
    {
      "epoch": 0.7973333333333333,
      "grad_norm": 0.9485257267951965,
      "learning_rate": 4.062360801781737e-05,
      "loss": 1.4717,
      "step": 3588
    },
    {
      "epoch": 0.7975555555555556,
      "grad_norm": 0.9942083358764648,
      "learning_rate": 4.0579064587973274e-05,
      "loss": 1.61,
      "step": 3589
    },
    {
      "epoch": 0.7977777777777778,
      "grad_norm": 0.10811223834753036,
      "learning_rate": 4.053452115812918e-05,
      "loss": 0.0255,
      "step": 3590
    },
    {
      "epoch": 0.798,
      "grad_norm": 0.9328152537345886,
      "learning_rate": 4.048997772828508e-05,
      "loss": 1.6944,
      "step": 3591
    },
    {
      "epoch": 0.7982222222222223,
      "grad_norm": 1.1752344369888306,
      "learning_rate": 4.0445434298440984e-05,
      "loss": 1.5285,
      "step": 3592
    },
    {
      "epoch": 0.7984444444444444,
      "grad_norm": 1.2680740356445312,
      "learning_rate": 4.040089086859689e-05,
      "loss": 1.4009,
      "step": 3593
    },
    {
      "epoch": 0.7986666666666666,
      "grad_norm": 1.1118732690811157,
      "learning_rate": 4.035634743875278e-05,
      "loss": 1.3982,
      "step": 3594
    },
    {
      "epoch": 0.7988888888888889,
      "grad_norm": 1.389929175376892,
      "learning_rate": 4.0311804008908686e-05,
      "loss": 1.3998,
      "step": 3595
    },
    {
      "epoch": 0.7991111111111111,
      "grad_norm": 0.8431739807128906,
      "learning_rate": 4.026726057906459e-05,
      "loss": 0.8518,
      "step": 3596
    },
    {
      "epoch": 0.7993333333333333,
      "grad_norm": 1.3290144205093384,
      "learning_rate": 4.022271714922049e-05,
      "loss": 1.4607,
      "step": 3597
    },
    {
      "epoch": 0.7995555555555556,
      "grad_norm": 0.15491686761379242,
      "learning_rate": 4.0178173719376395e-05,
      "loss": 0.0335,
      "step": 3598
    },
    {
      "epoch": 0.7997777777777778,
      "grad_norm": 0.7583034038543701,
      "learning_rate": 4.01336302895323e-05,
      "loss": 0.4833,
      "step": 3599
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.900272786617279,
      "learning_rate": 4.00890868596882e-05,
      "loss": 0.8159,
      "step": 3600
    },
    {
      "epoch": 0.8,
      "eval_loss": 1.1739096641540527,
      "eval_runtime": 239.8403,
      "eval_samples_per_second": 4.169,
      "eval_steps_per_second": 4.169,
      "step": 3600
    },
    {
      "epoch": 0.8002222222222222,
      "grad_norm": 0.5315160155296326,
      "learning_rate": 4.00445434298441e-05,
      "loss": 1.0474,
      "step": 3601
    },
    {
      "epoch": 0.8004444444444444,
      "grad_norm": 0.5535997152328491,
      "learning_rate": 4e-05,
      "loss": 0.9027,
      "step": 3602
    },
    {
      "epoch": 0.8006666666666666,
      "grad_norm": 0.7079357504844666,
      "learning_rate": 3.99554565701559e-05,
      "loss": 1.1432,
      "step": 3603
    },
    {
      "epoch": 0.8008888888888889,
      "grad_norm": 0.6734809875488281,
      "learning_rate": 3.991091314031181e-05,
      "loss": 1.0644,
      "step": 3604
    },
    {
      "epoch": 0.8011111111111111,
      "grad_norm": 0.6071887016296387,
      "learning_rate": 3.986636971046771e-05,
      "loss": 1.101,
      "step": 3605
    },
    {
      "epoch": 0.8013333333333333,
      "grad_norm": 0.5823980569839478,
      "learning_rate": 3.982182628062361e-05,
      "loss": 0.8761,
      "step": 3606
    },
    {
      "epoch": 0.8015555555555556,
      "grad_norm": 0.09482403099536896,
      "learning_rate": 3.977728285077951e-05,
      "loss": 0.0129,
      "step": 3607
    },
    {
      "epoch": 0.8017777777777778,
      "grad_norm": 0.588431179523468,
      "learning_rate": 3.9732739420935415e-05,
      "loss": 1.0989,
      "step": 3608
    },
    {
      "epoch": 0.802,
      "grad_norm": 0.9978165626525879,
      "learning_rate": 3.9688195991091314e-05,
      "loss": 1.6576,
      "step": 3609
    },
    {
      "epoch": 0.8022222222222222,
      "grad_norm": 0.9243869185447693,
      "learning_rate": 3.964365256124722e-05,
      "loss": 2.1462,
      "step": 3610
    },
    {
      "epoch": 0.8024444444444444,
      "grad_norm": 0.8666023015975952,
      "learning_rate": 3.9599109131403124e-05,
      "loss": 2.0262,
      "step": 3611
    },
    {
      "epoch": 0.8026666666666666,
      "grad_norm": 0.9008248448371887,
      "learning_rate": 3.955456570155902e-05,
      "loss": 2.0854,
      "step": 3612
    },
    {
      "epoch": 0.8028888888888889,
      "grad_norm": 0.6247386336326599,
      "learning_rate": 3.951002227171492e-05,
      "loss": 0.8506,
      "step": 3613
    },
    {
      "epoch": 0.8031111111111111,
      "grad_norm": 0.13101428747177124,
      "learning_rate": 3.946547884187082e-05,
      "loss": 0.0223,
      "step": 3614
    },
    {
      "epoch": 0.8033333333333333,
      "grad_norm": 0.12358218431472778,
      "learning_rate": 3.9420935412026726e-05,
      "loss": 0.0211,
      "step": 3615
    },
    {
      "epoch": 0.8035555555555556,
      "grad_norm": 0.9996263980865479,
      "learning_rate": 3.937639198218263e-05,
      "loss": 2.0538,
      "step": 3616
    },
    {
      "epoch": 0.8037777777777778,
      "grad_norm": 1.048120379447937,
      "learning_rate": 3.933184855233853e-05,
      "loss": 1.6907,
      "step": 3617
    },
    {
      "epoch": 0.804,
      "grad_norm": 0.9704152941703796,
      "learning_rate": 3.9287305122494435e-05,
      "loss": 2.025,
      "step": 3618
    },
    {
      "epoch": 0.8042222222222222,
      "grad_norm": 0.9544731378555298,
      "learning_rate": 3.924276169265034e-05,
      "loss": 1.8855,
      "step": 3619
    },
    {
      "epoch": 0.8044444444444444,
      "grad_norm": 0.869174599647522,
      "learning_rate": 3.919821826280624e-05,
      "loss": 2.1548,
      "step": 3620
    },
    {
      "epoch": 0.8046666666666666,
      "grad_norm": 0.7145273685455322,
      "learning_rate": 3.915367483296214e-05,
      "loss": 1.0897,
      "step": 3621
    },
    {
      "epoch": 0.8048888888888889,
      "grad_norm": 0.06968183070421219,
      "learning_rate": 3.910913140311804e-05,
      "loss": 0.0157,
      "step": 3622
    },
    {
      "epoch": 0.8051111111111111,
      "grad_norm": 0.636101484298706,
      "learning_rate": 3.906458797327394e-05,
      "loss": 0.8512,
      "step": 3623
    },
    {
      "epoch": 0.8053333333333333,
      "grad_norm": 1.1181496381759644,
      "learning_rate": 3.902004454342985e-05,
      "loss": 1.5164,
      "step": 3624
    },
    {
      "epoch": 0.8055555555555556,
      "grad_norm": 0.6359825730323792,
      "learning_rate": 3.897550111358575e-05,
      "loss": 1.0128,
      "step": 3625
    },
    {
      "epoch": 0.8057777777777778,
      "grad_norm": 0.943658173084259,
      "learning_rate": 3.893095768374165e-05,
      "loss": 1.582,
      "step": 3626
    },
    {
      "epoch": 0.806,
      "grad_norm": 1.047963261604309,
      "learning_rate": 3.888641425389755e-05,
      "loss": 1.704,
      "step": 3627
    },
    {
      "epoch": 0.8062222222222222,
      "grad_norm": 0.9852431416511536,
      "learning_rate": 3.8841870824053455e-05,
      "loss": 1.6806,
      "step": 3628
    },
    {
      "epoch": 0.8064444444444444,
      "grad_norm": 0.8574654459953308,
      "learning_rate": 3.879732739420935e-05,
      "loss": 0.916,
      "step": 3629
    },
    {
      "epoch": 0.8066666666666666,
      "grad_norm": 0.9380385279655457,
      "learning_rate": 3.875278396436526e-05,
      "loss": 0.9947,
      "step": 3630
    },
    {
      "epoch": 0.8068888888888889,
      "grad_norm": 0.06696880608797073,
      "learning_rate": 3.8708240534521164e-05,
      "loss": 0.017,
      "step": 3631
    },
    {
      "epoch": 0.8071111111111111,
      "grad_norm": 0.0664907768368721,
      "learning_rate": 3.866369710467706e-05,
      "loss": 0.0173,
      "step": 3632
    },
    {
      "epoch": 0.8073333333333333,
      "grad_norm": 0.06398806720972061,
      "learning_rate": 3.861915367483296e-05,
      "loss": 0.0171,
      "step": 3633
    },
    {
      "epoch": 0.8075555555555556,
      "grad_norm": 1.21707022190094,
      "learning_rate": 3.8574610244988866e-05,
      "loss": 2.0207,
      "step": 3634
    },
    {
      "epoch": 0.8077777777777778,
      "grad_norm": 1.04575777053833,
      "learning_rate": 3.8530066815144765e-05,
      "loss": 1.8143,
      "step": 3635
    },
    {
      "epoch": 0.808,
      "grad_norm": 0.0705951601266861,
      "learning_rate": 3.848552338530067e-05,
      "loss": 0.0166,
      "step": 3636
    },
    {
      "epoch": 0.8082222222222222,
      "grad_norm": 0.076121024787426,
      "learning_rate": 3.8440979955456576e-05,
      "loss": 0.0167,
      "step": 3637
    },
    {
      "epoch": 0.8084444444444444,
      "grad_norm": 1.0178598165512085,
      "learning_rate": 3.8396436525612474e-05,
      "loss": 1.6507,
      "step": 3638
    },
    {
      "epoch": 0.8086666666666666,
      "grad_norm": 1.102067232131958,
      "learning_rate": 3.835189309576838e-05,
      "loss": 1.7494,
      "step": 3639
    },
    {
      "epoch": 0.8088888888888889,
      "grad_norm": 0.10027281194925308,
      "learning_rate": 3.830734966592428e-05,
      "loss": 0.0251,
      "step": 3640
    },
    {
      "epoch": 0.8091111111111111,
      "grad_norm": 0.6546877026557922,
      "learning_rate": 3.826280623608018e-05,
      "loss": 0.6345,
      "step": 3641
    },
    {
      "epoch": 0.8093333333333333,
      "grad_norm": 1.25735342502594,
      "learning_rate": 3.821826280623608e-05,
      "loss": 1.7112,
      "step": 3642
    },
    {
      "epoch": 0.8095555555555556,
      "grad_norm": 1.1318788528442383,
      "learning_rate": 3.817371937639199e-05,
      "loss": 1.5739,
      "step": 3643
    },
    {
      "epoch": 0.8097777777777778,
      "grad_norm": 1.0459527969360352,
      "learning_rate": 3.8129175946547886e-05,
      "loss": 1.3376,
      "step": 3644
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0099126100540161,
      "learning_rate": 3.808463251670379e-05,
      "loss": 1.2814,
      "step": 3645
    },
    {
      "epoch": 0.8102222222222222,
      "grad_norm": 0.82170170545578,
      "learning_rate": 3.804008908685969e-05,
      "loss": 0.9743,
      "step": 3646
    },
    {
      "epoch": 0.8104444444444444,
      "grad_norm": 1.0343072414398193,
      "learning_rate": 3.799554565701559e-05,
      "loss": 1.4283,
      "step": 3647
    },
    {
      "epoch": 0.8106666666666666,
      "grad_norm": 0.944911777973175,
      "learning_rate": 3.7951002227171494e-05,
      "loss": 1.05,
      "step": 3648
    },
    {
      "epoch": 0.8108888888888889,
      "grad_norm": 0.7252945303916931,
      "learning_rate": 3.79064587973274e-05,
      "loss": 0.6662,
      "step": 3649
    },
    {
      "epoch": 0.8111111111111111,
      "grad_norm": 1.005825400352478,
      "learning_rate": 3.78619153674833e-05,
      "loss": 0.7958,
      "step": 3650
    },
    {
      "epoch": 0.8113333333333334,
      "grad_norm": 0.82235187292099,
      "learning_rate": 3.78173719376392e-05,
      "loss": 2.2468,
      "step": 3651
    },
    {
      "epoch": 0.8115555555555556,
      "grad_norm": 0.6531208157539368,
      "learning_rate": 3.77728285077951e-05,
      "loss": 0.9914,
      "step": 3652
    },
    {
      "epoch": 0.8117777777777778,
      "grad_norm": 0.8134424686431885,
      "learning_rate": 3.7728285077951e-05,
      "loss": 2.2763,
      "step": 3653
    },
    {
      "epoch": 0.812,
      "grad_norm": 0.043821610510349274,
      "learning_rate": 3.7683741648106906e-05,
      "loss": 0.0101,
      "step": 3654
    },
    {
      "epoch": 0.8122222222222222,
      "grad_norm": 0.6952782273292542,
      "learning_rate": 3.7639198218262804e-05,
      "loss": 1.234,
      "step": 3655
    },
    {
      "epoch": 0.8124444444444444,
      "grad_norm": 0.6162470579147339,
      "learning_rate": 3.759465478841871e-05,
      "loss": 1.0527,
      "step": 3656
    },
    {
      "epoch": 0.8126666666666666,
      "grad_norm": 1.0366730690002441,
      "learning_rate": 3.7550111358574615e-05,
      "loss": 2.3824,
      "step": 3657
    },
    {
      "epoch": 0.8128888888888889,
      "grad_norm": 0.08013699948787689,
      "learning_rate": 3.7505567928730514e-05,
      "loss": 0.0114,
      "step": 3658
    },
    {
      "epoch": 0.8131111111111111,
      "grad_norm": 0.07211296260356903,
      "learning_rate": 3.746102449888642e-05,
      "loss": 0.0114,
      "step": 3659
    },
    {
      "epoch": 0.8133333333333334,
      "grad_norm": 0.07523675262928009,
      "learning_rate": 3.741648106904232e-05,
      "loss": 0.0114,
      "step": 3660
    },
    {
      "epoch": 0.8135555555555556,
      "grad_norm": 0.4986688494682312,
      "learning_rate": 3.7371937639198216e-05,
      "loss": 0.946,
      "step": 3661
    },
    {
      "epoch": 0.8137777777777778,
      "grad_norm": 0.9343963265419006,
      "learning_rate": 3.732739420935412e-05,
      "loss": 2.0871,
      "step": 3662
    },
    {
      "epoch": 0.814,
      "grad_norm": 0.881712794303894,
      "learning_rate": 3.728285077951003e-05,
      "loss": 2.0004,
      "step": 3663
    },
    {
      "epoch": 0.8142222222222222,
      "grad_norm": 1.054946780204773,
      "learning_rate": 3.7238307349665925e-05,
      "loss": 2.1722,
      "step": 3664
    },
    {
      "epoch": 0.8144444444444444,
      "grad_norm": 0.9730517268180847,
      "learning_rate": 3.719376391982183e-05,
      "loss": 1.7299,
      "step": 3665
    },
    {
      "epoch": 0.8146666666666667,
      "grad_norm": 0.8659468293190002,
      "learning_rate": 3.714922048997773e-05,
      "loss": 2.1389,
      "step": 3666
    },
    {
      "epoch": 0.8148888888888889,
      "grad_norm": 0.6216636896133423,
      "learning_rate": 3.710467706013363e-05,
      "loss": 1.0111,
      "step": 3667
    },
    {
      "epoch": 0.8151111111111111,
      "grad_norm": 0.1096658706665039,
      "learning_rate": 3.706013363028953e-05,
      "loss": 0.0178,
      "step": 3668
    },
    {
      "epoch": 0.8153333333333334,
      "grad_norm": 0.6512637138366699,
      "learning_rate": 3.701559020044544e-05,
      "loss": 1.0441,
      "step": 3669
    },
    {
      "epoch": 0.8155555555555556,
      "grad_norm": 1.0497286319732666,
      "learning_rate": 3.697104677060134e-05,
      "loss": 1.7955,
      "step": 3670
    },
    {
      "epoch": 0.8157777777777778,
      "grad_norm": 0.9653757214546204,
      "learning_rate": 3.692650334075724e-05,
      "loss": 1.8275,
      "step": 3671
    },
    {
      "epoch": 0.816,
      "grad_norm": 0.6577117443084717,
      "learning_rate": 3.688195991091314e-05,
      "loss": 0.9564,
      "step": 3672
    },
    {
      "epoch": 0.8162222222222222,
      "grad_norm": 0.07206307351589203,
      "learning_rate": 3.683741648106904e-05,
      "loss": 0.0158,
      "step": 3673
    },
    {
      "epoch": 0.8164444444444444,
      "grad_norm": 0.06910723447799683,
      "learning_rate": 3.6792873051224945e-05,
      "loss": 0.0159,
      "step": 3674
    },
    {
      "epoch": 0.8166666666666667,
      "grad_norm": 0.9517031908035278,
      "learning_rate": 3.674832962138085e-05,
      "loss": 1.9874,
      "step": 3675
    },
    {
      "epoch": 0.8168888888888889,
      "grad_norm": 0.1267796903848648,
      "learning_rate": 3.670378619153675e-05,
      "loss": 0.0203,
      "step": 3676
    },
    {
      "epoch": 0.8171111111111111,
      "grad_norm": 0.6326009631156921,
      "learning_rate": 3.6659242761692654e-05,
      "loss": 0.8404,
      "step": 3677
    },
    {
      "epoch": 0.8173333333333334,
      "grad_norm": 0.950645387172699,
      "learning_rate": 3.661469933184856e-05,
      "loss": 1.7112,
      "step": 3678
    },
    {
      "epoch": 0.8175555555555556,
      "grad_norm": 1.134836196899414,
      "learning_rate": 3.657015590200445e-05,
      "loss": 1.8671,
      "step": 3679
    },
    {
      "epoch": 0.8177777777777778,
      "grad_norm": 0.9950535893440247,
      "learning_rate": 3.652561247216036e-05,
      "loss": 1.9121,
      "step": 3680
    },
    {
      "epoch": 0.818,
      "grad_norm": 0.06296125799417496,
      "learning_rate": 3.648106904231626e-05,
      "loss": 0.0175,
      "step": 3681
    },
    {
      "epoch": 0.8182222222222222,
      "grad_norm": 0.9528807401657104,
      "learning_rate": 3.643652561247216e-05,
      "loss": 1.7288,
      "step": 3682
    },
    {
      "epoch": 0.8184444444444444,
      "grad_norm": 1.0748895406723022,
      "learning_rate": 3.6391982182628066e-05,
      "loss": 1.3809,
      "step": 3683
    },
    {
      "epoch": 0.8186666666666667,
      "grad_norm": 0.07369447499513626,
      "learning_rate": 3.634743875278397e-05,
      "loss": 0.0173,
      "step": 3684
    },
    {
      "epoch": 0.8188888888888889,
      "grad_norm": 0.07020772248506546,
      "learning_rate": 3.630289532293987e-05,
      "loss": 0.0172,
      "step": 3685
    },
    {
      "epoch": 0.8191111111111111,
      "grad_norm": 0.7225638628005981,
      "learning_rate": 3.625835189309577e-05,
      "loss": 0.9078,
      "step": 3686
    },
    {
      "epoch": 0.8193333333333334,
      "grad_norm": 1.205776333808899,
      "learning_rate": 3.621380846325167e-05,
      "loss": 2.0067,
      "step": 3687
    },
    {
      "epoch": 0.8195555555555556,
      "grad_norm": 1.0239263772964478,
      "learning_rate": 3.616926503340757e-05,
      "loss": 1.3173,
      "step": 3688
    },
    {
      "epoch": 0.8197777777777778,
      "grad_norm": 0.09923997521400452,
      "learning_rate": 3.612472160356348e-05,
      "loss": 0.0243,
      "step": 3689
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.150796890258789,
      "learning_rate": 3.608017817371938e-05,
      "loss": 1.5561,
      "step": 3690
    },
    {
      "epoch": 0.8202222222222222,
      "grad_norm": 1.0021523237228394,
      "learning_rate": 3.603563474387528e-05,
      "loss": 1.6326,
      "step": 3691
    },
    {
      "epoch": 0.8204444444444444,
      "grad_norm": 0.9547725319862366,
      "learning_rate": 3.599109131403118e-05,
      "loss": 1.2198,
      "step": 3692
    },
    {
      "epoch": 0.8206666666666667,
      "grad_norm": 0.9803183674812317,
      "learning_rate": 3.594654788418708e-05,
      "loss": 1.4813,
      "step": 3693
    },
    {
      "epoch": 0.8208888888888889,
      "grad_norm": 1.140236258506775,
      "learning_rate": 3.5902004454342985e-05,
      "loss": 1.5703,
      "step": 3694
    },
    {
      "epoch": 0.8211111111111111,
      "grad_norm": 0.1800074428319931,
      "learning_rate": 3.585746102449889e-05,
      "loss": 0.0296,
      "step": 3695
    },
    {
      "epoch": 0.8213333333333334,
      "grad_norm": 0.744806170463562,
      "learning_rate": 3.581291759465479e-05,
      "loss": 0.7234,
      "step": 3696
    },
    {
      "epoch": 0.8215555555555556,
      "grad_norm": 1.0382987260818481,
      "learning_rate": 3.5768374164810694e-05,
      "loss": 1.0165,
      "step": 3697
    },
    {
      "epoch": 0.8217777777777778,
      "grad_norm": 1.01142156124115,
      "learning_rate": 3.572383073496659e-05,
      "loss": 1.2093,
      "step": 3698
    },
    {
      "epoch": 0.822,
      "grad_norm": 0.575733482837677,
      "learning_rate": 3.567928730512249e-05,
      "loss": 0.4409,
      "step": 3699
    },
    {
      "epoch": 0.8222222222222222,
      "grad_norm": 0.9521045684814453,
      "learning_rate": 3.5634743875278396e-05,
      "loss": 0.6805,
      "step": 3700
    },
    {
      "epoch": 0.8224444444444444,
      "grad_norm": 0.9057009220123291,
      "learning_rate": 3.55902004454343e-05,
      "loss": 2.3256,
      "step": 3701
    },
    {
      "epoch": 0.8226666666666667,
      "grad_norm": 0.8539500832557678,
      "learning_rate": 3.55456570155902e-05,
      "loss": 2.1458,
      "step": 3702
    },
    {
      "epoch": 0.8228888888888889,
      "grad_norm": 0.598690390586853,
      "learning_rate": 3.5501113585746106e-05,
      "loss": 1.164,
      "step": 3703
    },
    {
      "epoch": 0.8231111111111111,
      "grad_norm": 1.0174680948257446,
      "learning_rate": 3.545657015590201e-05,
      "loss": 2.2128,
      "step": 3704
    },
    {
      "epoch": 0.8233333333333334,
      "grad_norm": 0.5973488092422485,
      "learning_rate": 3.541202672605791e-05,
      "loss": 1.1449,
      "step": 3705
    },
    {
      "epoch": 0.8235555555555556,
      "grad_norm": 0.07344137132167816,
      "learning_rate": 3.536748329621381e-05,
      "loss": 0.0108,
      "step": 3706
    },
    {
      "epoch": 0.8237777777777778,
      "grad_norm": 0.08576471358537674,
      "learning_rate": 3.5322939866369714e-05,
      "loss": 0.0111,
      "step": 3707
    },
    {
      "epoch": 0.824,
      "grad_norm": 0.0801275447010994,
      "learning_rate": 3.527839643652561e-05,
      "loss": 0.0111,
      "step": 3708
    },
    {
      "epoch": 0.8242222222222222,
      "grad_norm": 0.08194973319768906,
      "learning_rate": 3.523385300668152e-05,
      "loss": 0.0109,
      "step": 3709
    },
    {
      "epoch": 0.8244444444444444,
      "grad_norm": 1.0708433389663696,
      "learning_rate": 3.518930957683742e-05,
      "loss": 2.033,
      "step": 3710
    },
    {
      "epoch": 0.8246666666666667,
      "grad_norm": 1.263612985610962,
      "learning_rate": 3.514476614699332e-05,
      "loss": 2.6208,
      "step": 3711
    },
    {
      "epoch": 0.8248888888888889,
      "grad_norm": 0.9804373383522034,
      "learning_rate": 3.510022271714922e-05,
      "loss": 1.8539,
      "step": 3712
    },
    {
      "epoch": 0.8251111111111111,
      "grad_norm": 0.8642978668212891,
      "learning_rate": 3.5055679287305125e-05,
      "loss": 1.7259,
      "step": 3713
    },
    {
      "epoch": 0.8253333333333334,
      "grad_norm": 0.9090456962585449,
      "learning_rate": 3.5011135857461024e-05,
      "loss": 1.8745,
      "step": 3714
    },
    {
      "epoch": 0.8255555555555556,
      "grad_norm": 0.9151667356491089,
      "learning_rate": 3.496659242761693e-05,
      "loss": 1.9903,
      "step": 3715
    },
    {
      "epoch": 0.8257777777777778,
      "grad_norm": 0.6366732120513916,
      "learning_rate": 3.4922048997772835e-05,
      "loss": 0.9141,
      "step": 3716
    },
    {
      "epoch": 0.826,
      "grad_norm": 0.7700564861297607,
      "learning_rate": 3.487750556792873e-05,
      "loss": 0.9737,
      "step": 3717
    },
    {
      "epoch": 0.8262222222222222,
      "grad_norm": 0.9940738081932068,
      "learning_rate": 3.483296213808463e-05,
      "loss": 2.0155,
      "step": 3718
    },
    {
      "epoch": 0.8264444444444444,
      "grad_norm": 0.8816027641296387,
      "learning_rate": 3.478841870824054e-05,
      "loss": 1.7297,
      "step": 3719
    },
    {
      "epoch": 0.8266666666666667,
      "grad_norm": 1.3799381256103516,
      "learning_rate": 3.4743875278396436e-05,
      "loss": 2.159,
      "step": 3720
    },
    {
      "epoch": 0.8268888888888889,
      "grad_norm": 1.0440785884857178,
      "learning_rate": 3.469933184855234e-05,
      "loss": 1.7563,
      "step": 3721
    },
    {
      "epoch": 0.8271111111111111,
      "grad_norm": 0.6796101331710815,
      "learning_rate": 3.465478841870824e-05,
      "loss": 0.9447,
      "step": 3722
    },
    {
      "epoch": 0.8273333333333334,
      "grad_norm": 0.06959293782711029,
      "learning_rate": 3.4610244988864145e-05,
      "loss": 0.0163,
      "step": 3723
    },
    {
      "epoch": 0.8275555555555556,
      "grad_norm": 0.06835558265447617,
      "learning_rate": 3.456570155902005e-05,
      "loss": 0.0164,
      "step": 3724
    },
    {
      "epoch": 0.8277777777777777,
      "grad_norm": 1.080851435661316,
      "learning_rate": 3.452115812917594e-05,
      "loss": 1.9956,
      "step": 3725
    },
    {
      "epoch": 0.828,
      "grad_norm": 0.6416263580322266,
      "learning_rate": 3.447661469933185e-05,
      "loss": 0.6763,
      "step": 3726
    },
    {
      "epoch": 0.8282222222222222,
      "grad_norm": 1.0977882146835327,
      "learning_rate": 3.443207126948775e-05,
      "loss": 1.7526,
      "step": 3727
    },
    {
      "epoch": 0.8284444444444444,
      "grad_norm": 0.8835853338241577,
      "learning_rate": 3.438752783964365e-05,
      "loss": 1.7257,
      "step": 3728
    },
    {
      "epoch": 0.8286666666666667,
      "grad_norm": 1.0617055892944336,
      "learning_rate": 3.434298440979956e-05,
      "loss": 1.9876,
      "step": 3729
    },
    {
      "epoch": 0.8288888888888889,
      "grad_norm": 1.1054614782333374,
      "learning_rate": 3.429844097995546e-05,
      "loss": 1.4692,
      "step": 3730
    },
    {
      "epoch": 0.8291111111111111,
      "grad_norm": 0.06595687568187714,
      "learning_rate": 3.425389755011136e-05,
      "loss": 0.0171,
      "step": 3731
    },
    {
      "epoch": 0.8293333333333334,
      "grad_norm": 0.06783387809991837,
      "learning_rate": 3.420935412026726e-05,
      "loss": 0.0172,
      "step": 3732
    },
    {
      "epoch": 0.8295555555555556,
      "grad_norm": 0.7828741073608398,
      "learning_rate": 3.4164810690423165e-05,
      "loss": 0.8107,
      "step": 3733
    },
    {
      "epoch": 0.8297777777777777,
      "grad_norm": 0.07702479511499405,
      "learning_rate": 3.4120267260579063e-05,
      "loss": 0.0177,
      "step": 3734
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6449767351150513,
      "learning_rate": 3.407572383073497e-05,
      "loss": 0.7656,
      "step": 3735
    },
    {
      "epoch": 0.8302222222222222,
      "grad_norm": 1.3564252853393555,
      "learning_rate": 3.4031180400890874e-05,
      "loss": 1.9644,
      "step": 3736
    },
    {
      "epoch": 0.8304444444444444,
      "grad_norm": 0.7747462391853333,
      "learning_rate": 3.398663697104677e-05,
      "loss": 1.0032,
      "step": 3737
    },
    {
      "epoch": 0.8306666666666667,
      "grad_norm": 1.13922119140625,
      "learning_rate": 3.394209354120267e-05,
      "loss": 1.545,
      "step": 3738
    },
    {
      "epoch": 0.8308888888888889,
      "grad_norm": 0.9685484766960144,
      "learning_rate": 3.389755011135858e-05,
      "loss": 1.3771,
      "step": 3739
    },
    {
      "epoch": 0.8311111111111111,
      "grad_norm": 0.7709338665008545,
      "learning_rate": 3.3853006681514475e-05,
      "loss": 0.696,
      "step": 3740
    },
    {
      "epoch": 0.8313333333333334,
      "grad_norm": 1.0451520681381226,
      "learning_rate": 3.380846325167038e-05,
      "loss": 1.564,
      "step": 3741
    },
    {
      "epoch": 0.8315555555555556,
      "grad_norm": 1.1514785289764404,
      "learning_rate": 3.3763919821826286e-05,
      "loss": 1.4297,
      "step": 3742
    },
    {
      "epoch": 0.8317777777777777,
      "grad_norm": 1.3433165550231934,
      "learning_rate": 3.3719376391982185e-05,
      "loss": 1.7462,
      "step": 3743
    },
    {
      "epoch": 0.832,
      "grad_norm": 0.9320109486579895,
      "learning_rate": 3.367483296213808e-05,
      "loss": 0.95,
      "step": 3744
    },
    {
      "epoch": 0.8322222222222222,
      "grad_norm": 0.9835542440414429,
      "learning_rate": 3.363028953229399e-05,
      "loss": 1.1297,
      "step": 3745
    },
    {
      "epoch": 0.8324444444444444,
      "grad_norm": 0.18634167313575745,
      "learning_rate": 3.358574610244989e-05,
      "loss": 0.0346,
      "step": 3746
    },
    {
      "epoch": 0.8326666666666667,
      "grad_norm": 0.1696268618106842,
      "learning_rate": 3.354120267260579e-05,
      "loss": 0.034,
      "step": 3747
    },
    {
      "epoch": 0.8328888888888889,
      "grad_norm": 1.043257236480713,
      "learning_rate": 3.34966592427617e-05,
      "loss": 0.9229,
      "step": 3748
    },
    {
      "epoch": 0.8331111111111111,
      "grad_norm": 1.21977698802948,
      "learning_rate": 3.3452115812917596e-05,
      "loss": 1.0685,
      "step": 3749
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 1.1700232028961182,
      "learning_rate": 3.34075723830735e-05,
      "loss": 0.7485,
      "step": 3750
    },
    {
      "epoch": 0.8335555555555556,
      "grad_norm": 0.4857214093208313,
      "learning_rate": 3.33630289532294e-05,
      "loss": 0.9854,
      "step": 3751
    },
    {
      "epoch": 0.8337777777777777,
      "grad_norm": 0.8265995383262634,
      "learning_rate": 3.33184855233853e-05,
      "loss": 1.9879,
      "step": 3752
    },
    {
      "epoch": 0.834,
      "grad_norm": 0.48844748735427856,
      "learning_rate": 3.3273942093541204e-05,
      "loss": 1.0029,
      "step": 3753
    },
    {
      "epoch": 0.8342222222222222,
      "grad_norm": 0.6757328510284424,
      "learning_rate": 3.322939866369711e-05,
      "loss": 1.0597,
      "step": 3754
    },
    {
      "epoch": 0.8344444444444444,
      "grad_norm": 1.0443055629730225,
      "learning_rate": 3.318485523385301e-05,
      "loss": 2.3737,
      "step": 3755
    },
    {
      "epoch": 0.8346666666666667,
      "grad_norm": 0.8254187703132629,
      "learning_rate": 3.3140311804008914e-05,
      "loss": 1.9924,
      "step": 3756
    },
    {
      "epoch": 0.8348888888888889,
      "grad_norm": 0.09497911483049393,
      "learning_rate": 3.309576837416481e-05,
      "loss": 0.0113,
      "step": 3757
    },
    {
      "epoch": 0.8351111111111111,
      "grad_norm": 0.09401866048574448,
      "learning_rate": 3.305122494432071e-05,
      "loss": 0.0116,
      "step": 3758
    },
    {
      "epoch": 0.8353333333333334,
      "grad_norm": 0.09458266943693161,
      "learning_rate": 3.3006681514476616e-05,
      "loss": 0.0116,
      "step": 3759
    },
    {
      "epoch": 0.8355555555555556,
      "grad_norm": 0.09231861680746078,
      "learning_rate": 3.2962138084632515e-05,
      "loss": 0.0114,
      "step": 3760
    },
    {
      "epoch": 0.8357777777777777,
      "grad_norm": 1.012135624885559,
      "learning_rate": 3.291759465478842e-05,
      "loss": 2.2142,
      "step": 3761
    },
    {
      "epoch": 0.836,
      "grad_norm": 0.8345160484313965,
      "learning_rate": 3.2873051224944325e-05,
      "loss": 2.0887,
      "step": 3762
    },
    {
      "epoch": 0.8362222222222222,
      "grad_norm": 0.777621865272522,
      "learning_rate": 3.2828507795100224e-05,
      "loss": 1.7456,
      "step": 3763
    },
    {
      "epoch": 0.8364444444444444,
      "grad_norm": 0.9471651911735535,
      "learning_rate": 3.278396436525612e-05,
      "loss": 1.9976,
      "step": 3764
    },
    {
      "epoch": 0.8366666666666667,
      "grad_norm": 0.9481960535049438,
      "learning_rate": 3.273942093541203e-05,
      "loss": 2.1314,
      "step": 3765
    },
    {
      "epoch": 0.8368888888888889,
      "grad_norm": 1.0837010145187378,
      "learning_rate": 3.2694877505567926e-05,
      "loss": 2.0858,
      "step": 3766
    },
    {
      "epoch": 0.8371111111111111,
      "grad_norm": 1.0105607509613037,
      "learning_rate": 3.265033407572383e-05,
      "loss": 2.1732,
      "step": 3767
    },
    {
      "epoch": 0.8373333333333334,
      "grad_norm": 0.5977281928062439,
      "learning_rate": 3.260579064587974e-05,
      "loss": 0.8871,
      "step": 3768
    },
    {
      "epoch": 0.8375555555555556,
      "grad_norm": 1.2047114372253418,
      "learning_rate": 3.2561247216035636e-05,
      "loss": 2.0168,
      "step": 3769
    },
    {
      "epoch": 0.8377777777777777,
      "grad_norm": 0.8766410946846008,
      "learning_rate": 3.251670378619154e-05,
      "loss": 1.6475,
      "step": 3770
    },
    {
      "epoch": 0.838,
      "grad_norm": 0.9371228218078613,
      "learning_rate": 3.247216035634744e-05,
      "loss": 1.8688,
      "step": 3771
    },
    {
      "epoch": 0.8382222222222222,
      "grad_norm": 1.0611170530319214,
      "learning_rate": 3.242761692650334e-05,
      "loss": 1.8825,
      "step": 3772
    },
    {
      "epoch": 0.8384444444444444,
      "grad_norm": 1.0075304508209229,
      "learning_rate": 3.2383073496659244e-05,
      "loss": 2.1026,
      "step": 3773
    },
    {
      "epoch": 0.8386666666666667,
      "grad_norm": 0.06949839740991592,
      "learning_rate": 3.233853006681515e-05,
      "loss": 0.0162,
      "step": 3774
    },
    {
      "epoch": 0.8388888888888889,
      "grad_norm": 0.06908978521823883,
      "learning_rate": 3.229398663697105e-05,
      "loss": 0.0159,
      "step": 3775
    },
    {
      "epoch": 0.8391111111111111,
      "grad_norm": 0.905208945274353,
      "learning_rate": 3.224944320712695e-05,
      "loss": 1.7153,
      "step": 3776
    },
    {
      "epoch": 0.8393333333333334,
      "grad_norm": 0.8136224150657654,
      "learning_rate": 3.220489977728285e-05,
      "loss": 0.903,
      "step": 3777
    },
    {
      "epoch": 0.8395555555555556,
      "grad_norm": 0.9069592356681824,
      "learning_rate": 3.216035634743875e-05,
      "loss": 1.6758,
      "step": 3778
    },
    {
      "epoch": 0.8397777777777777,
      "grad_norm": 1.0851026773452759,
      "learning_rate": 3.2115812917594655e-05,
      "loss": 1.8553,
      "step": 3779
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.998877763748169,
      "learning_rate": 3.207126948775056e-05,
      "loss": 1.8056,
      "step": 3780
    },
    {
      "epoch": 0.8402222222222222,
      "grad_norm": 0.0660950317978859,
      "learning_rate": 3.202672605790646e-05,
      "loss": 0.0175,
      "step": 3781
    },
    {
      "epoch": 0.8404444444444444,
      "grad_norm": 0.0653744786977768,
      "learning_rate": 3.1982182628062365e-05,
      "loss": 0.0173,
      "step": 3782
    },
    {
      "epoch": 0.8406666666666667,
      "grad_norm": 0.06411214917898178,
      "learning_rate": 3.193763919821826e-05,
      "loss": 0.0175,
      "step": 3783
    },
    {
      "epoch": 0.8408888888888889,
      "grad_norm": 0.06954985857009888,
      "learning_rate": 3.189309576837416e-05,
      "loss": 0.0174,
      "step": 3784
    },
    {
      "epoch": 0.8411111111111111,
      "grad_norm": 0.7279578447341919,
      "learning_rate": 3.184855233853007e-05,
      "loss": 1.0075,
      "step": 3785
    },
    {
      "epoch": 0.8413333333333334,
      "grad_norm": 1.0854923725128174,
      "learning_rate": 3.180400890868597e-05,
      "loss": 1.8502,
      "step": 3786
    },
    {
      "epoch": 0.8415555555555555,
      "grad_norm": 1.1248599290847778,
      "learning_rate": 3.175946547884187e-05,
      "loss": 1.769,
      "step": 3787
    },
    {
      "epoch": 0.8417777777777777,
      "grad_norm": 0.6535754203796387,
      "learning_rate": 3.1714922048997777e-05,
      "loss": 0.922,
      "step": 3788
    },
    {
      "epoch": 0.842,
      "grad_norm": 1.0051473379135132,
      "learning_rate": 3.167037861915368e-05,
      "loss": 1.6551,
      "step": 3789
    },
    {
      "epoch": 0.8422222222222222,
      "grad_norm": 1.0957441329956055,
      "learning_rate": 3.162583518930958e-05,
      "loss": 1.7058,
      "step": 3790
    },
    {
      "epoch": 0.8424444444444444,
      "grad_norm": 1.0055428743362427,
      "learning_rate": 3.158129175946548e-05,
      "loss": 1.5342,
      "step": 3791
    },
    {
      "epoch": 0.8426666666666667,
      "grad_norm": 0.8064576387405396,
      "learning_rate": 3.1536748329621384e-05,
      "loss": 0.8161,
      "step": 3792
    },
    {
      "epoch": 0.8428888888888889,
      "grad_norm": 1.1807235479354858,
      "learning_rate": 3.149220489977728e-05,
      "loss": 1.8853,
      "step": 3793
    },
    {
      "epoch": 0.8431111111111111,
      "grad_norm": 1.0103986263275146,
      "learning_rate": 3.144766146993319e-05,
      "loss": 1.3899,
      "step": 3794
    },
    {
      "epoch": 0.8433333333333334,
      "grad_norm": 0.2762221693992615,
      "learning_rate": 3.140311804008909e-05,
      "loss": 0.0367,
      "step": 3795
    },
    {
      "epoch": 0.8435555555555555,
      "grad_norm": 0.7262986302375793,
      "learning_rate": 3.135857461024499e-05,
      "loss": 0.7566,
      "step": 3796
    },
    {
      "epoch": 0.8437777777777777,
      "grad_norm": 1.0480473041534424,
      "learning_rate": 3.131403118040089e-05,
      "loss": 1.6141,
      "step": 3797
    },
    {
      "epoch": 0.844,
      "grad_norm": 1.0954132080078125,
      "learning_rate": 3.126948775055679e-05,
      "loss": 1.269,
      "step": 3798
    },
    {
      "epoch": 0.8442222222222222,
      "grad_norm": 0.7563920021057129,
      "learning_rate": 3.1224944320712695e-05,
      "loss": 0.6253,
      "step": 3799
    },
    {
      "epoch": 0.8444444444444444,
      "grad_norm": 0.7246300578117371,
      "learning_rate": 3.11804008908686e-05,
      "loss": 0.3778,
      "step": 3800
    },
    {
      "epoch": 0.8446666666666667,
      "grad_norm": 0.9608231782913208,
      "learning_rate": 3.11358574610245e-05,
      "loss": 2.1959,
      "step": 3801
    },
    {
      "epoch": 0.8448888888888889,
      "grad_norm": 0.04354199394583702,
      "learning_rate": 3.1091314031180404e-05,
      "loss": 0.0106,
      "step": 3802
    },
    {
      "epoch": 0.8451111111111111,
      "grad_norm": 0.8725544810295105,
      "learning_rate": 3.10467706013363e-05,
      "loss": 2.4019,
      "step": 3803
    },
    {
      "epoch": 0.8453333333333334,
      "grad_norm": 0.04339034482836723,
      "learning_rate": 3.10022271714922e-05,
      "loss": 0.0107,
      "step": 3804
    },
    {
      "epoch": 0.8455555555555555,
      "grad_norm": 0.8996299505233765,
      "learning_rate": 3.095768374164811e-05,
      "loss": 1.7978,
      "step": 3805
    },
    {
      "epoch": 0.8457777777777777,
      "grad_norm": 0.08517049998044968,
      "learning_rate": 3.091314031180401e-05,
      "loss": 0.0108,
      "step": 3806
    },
    {
      "epoch": 0.846,
      "grad_norm": 0.08175533264875412,
      "learning_rate": 3.086859688195991e-05,
      "loss": 0.0109,
      "step": 3807
    },
    {
      "epoch": 0.8462222222222222,
      "grad_norm": 0.06705193221569061,
      "learning_rate": 3.0824053452115816e-05,
      "loss": 0.0103,
      "step": 3808
    },
    {
      "epoch": 0.8464444444444444,
      "grad_norm": 0.08038879185914993,
      "learning_rate": 3.077951002227172e-05,
      "loss": 0.0106,
      "step": 3809
    },
    {
      "epoch": 0.8466666666666667,
      "grad_norm": 0.617675244808197,
      "learning_rate": 3.073496659242761e-05,
      "loss": 1.0167,
      "step": 3810
    },
    {
      "epoch": 0.8468888888888889,
      "grad_norm": 0.8487913012504578,
      "learning_rate": 3.069042316258352e-05,
      "loss": 1.8088,
      "step": 3811
    },
    {
      "epoch": 0.8471111111111111,
      "grad_norm": 0.8923436403274536,
      "learning_rate": 3.0645879732739424e-05,
      "loss": 1.8555,
      "step": 3812
    },
    {
      "epoch": 0.8473333333333334,
      "grad_norm": 0.9946725368499756,
      "learning_rate": 3.060133630289532e-05,
      "loss": 2.1355,
      "step": 3813
    },
    {
      "epoch": 0.8475555555555555,
      "grad_norm": 0.8608193397521973,
      "learning_rate": 3.055679287305123e-05,
      "loss": 1.8501,
      "step": 3814
    },
    {
      "epoch": 0.8477777777777777,
      "grad_norm": 1.0533936023712158,
      "learning_rate": 3.051224944320713e-05,
      "loss": 2.3759,
      "step": 3815
    },
    {
      "epoch": 0.848,
      "grad_norm": 0.9395473003387451,
      "learning_rate": 3.046770601336303e-05,
      "loss": 1.8055,
      "step": 3816
    },
    {
      "epoch": 0.8482222222222222,
      "grad_norm": 0.868739902973175,
      "learning_rate": 3.0423162583518934e-05,
      "loss": 1.9014,
      "step": 3817
    },
    {
      "epoch": 0.8484444444444444,
      "grad_norm": 0.9286447167396545,
      "learning_rate": 3.0378619153674836e-05,
      "loss": 2.1256,
      "step": 3818
    },
    {
      "epoch": 0.8486666666666667,
      "grad_norm": 0.9453836679458618,
      "learning_rate": 3.0334075723830734e-05,
      "loss": 1.9572,
      "step": 3819
    },
    {
      "epoch": 0.8488888888888889,
      "grad_norm": 1.1995909214019775,
      "learning_rate": 3.028953229398664e-05,
      "loss": 1.7376,
      "step": 3820
    },
    {
      "epoch": 0.8491111111111111,
      "grad_norm": 1.2044036388397217,
      "learning_rate": 3.024498886414254e-05,
      "loss": 2.1343,
      "step": 3821
    },
    {
      "epoch": 0.8493333333333334,
      "grad_norm": 0.8160643577575684,
      "learning_rate": 3.020044543429844e-05,
      "loss": 0.9794,
      "step": 3822
    },
    {
      "epoch": 0.8495555555555555,
      "grad_norm": 0.06719803065061569,
      "learning_rate": 3.0155902004454346e-05,
      "loss": 0.0154,
      "step": 3823
    },
    {
      "epoch": 0.8497777777777777,
      "grad_norm": 0.06740820407867432,
      "learning_rate": 3.0111358574610248e-05,
      "loss": 0.0154,
      "step": 3824
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0504337549209595,
      "learning_rate": 3.0066815144766146e-05,
      "loss": 1.8658,
      "step": 3825
    },
    {
      "epoch": 0.8502222222222222,
      "grad_norm": 0.7439045906066895,
      "learning_rate": 3.002227171492205e-05,
      "loss": 1.1039,
      "step": 3826
    },
    {
      "epoch": 0.8504444444444444,
      "grad_norm": 0.1253952980041504,
      "learning_rate": 2.9977728285077953e-05,
      "loss": 0.0206,
      "step": 3827
    },
    {
      "epoch": 0.8506666666666667,
      "grad_norm": 1.0089833736419678,
      "learning_rate": 2.9933184855233852e-05,
      "loss": 2.0178,
      "step": 3828
    },
    {
      "epoch": 0.8508888888888889,
      "grad_norm": 1.0395070314407349,
      "learning_rate": 2.9888641425389757e-05,
      "loss": 1.9692,
      "step": 3829
    },
    {
      "epoch": 0.8511111111111112,
      "grad_norm": 1.0526185035705566,
      "learning_rate": 2.9844097995545663e-05,
      "loss": 1.8065,
      "step": 3830
    },
    {
      "epoch": 0.8513333333333334,
      "grad_norm": 1.0034129619598389,
      "learning_rate": 2.9799554565701558e-05,
      "loss": 1.679,
      "step": 3831
    },
    {
      "epoch": 0.8515555555555555,
      "grad_norm": 0.06555074453353882,
      "learning_rate": 2.9755011135857463e-05,
      "loss": 0.017,
      "step": 3832
    },
    {
      "epoch": 0.8517777777777777,
      "grad_norm": 0.8089559078216553,
      "learning_rate": 2.9710467706013362e-05,
      "loss": 0.9377,
      "step": 3833
    },
    {
      "epoch": 0.852,
      "grad_norm": 0.7607543468475342,
      "learning_rate": 2.9665924276169267e-05,
      "loss": 0.873,
      "step": 3834
    },
    {
      "epoch": 0.8522222222222222,
      "grad_norm": 0.08201993256807327,
      "learning_rate": 2.962138084632517e-05,
      "loss": 0.0183,
      "step": 3835
    },
    {
      "epoch": 0.8524444444444444,
      "grad_norm": 0.6691009402275085,
      "learning_rate": 2.9576837416481068e-05,
      "loss": 0.6488,
      "step": 3836
    },
    {
      "epoch": 0.8526666666666667,
      "grad_norm": 1.0818275213241577,
      "learning_rate": 2.9532293986636973e-05,
      "loss": 1.6231,
      "step": 3837
    },
    {
      "epoch": 0.8528888888888889,
      "grad_norm": 0.10234081745147705,
      "learning_rate": 2.9487750556792875e-05,
      "loss": 0.0249,
      "step": 3838
    },
    {
      "epoch": 0.8531111111111112,
      "grad_norm": 0.7301368117332458,
      "learning_rate": 2.9443207126948774e-05,
      "loss": 0.7285,
      "step": 3839
    },
    {
      "epoch": 0.8533333333333334,
      "grad_norm": 1.020973563194275,
      "learning_rate": 2.939866369710468e-05,
      "loss": 1.5453,
      "step": 3840
    },
    {
      "epoch": 0.8535555555555555,
      "grad_norm": 1.161118745803833,
      "learning_rate": 2.935412026726058e-05,
      "loss": 1.5789,
      "step": 3841
    },
    {
      "epoch": 0.8537777777777777,
      "grad_norm": 1.1855006217956543,
      "learning_rate": 2.930957683741648e-05,
      "loss": 1.443,
      "step": 3842
    },
    {
      "epoch": 0.854,
      "grad_norm": 1.0485907793045044,
      "learning_rate": 2.9265033407572385e-05,
      "loss": 1.2626,
      "step": 3843
    },
    {
      "epoch": 0.8542222222222222,
      "grad_norm": 1.0825096368789673,
      "learning_rate": 2.9220489977728287e-05,
      "loss": 1.4062,
      "step": 3844
    },
    {
      "epoch": 0.8544444444444445,
      "grad_norm": 0.16687047481536865,
      "learning_rate": 2.9175946547884186e-05,
      "loss": 0.0262,
      "step": 3845
    },
    {
      "epoch": 0.8546666666666667,
      "grad_norm": 1.0403611660003662,
      "learning_rate": 2.913140311804009e-05,
      "loss": 1.4555,
      "step": 3846
    },
    {
      "epoch": 0.8548888888888889,
      "grad_norm": 1.069176197052002,
      "learning_rate": 2.9086859688195993e-05,
      "loss": 1.2518,
      "step": 3847
    },
    {
      "epoch": 0.8551111111111112,
      "grad_norm": 1.2168667316436768,
      "learning_rate": 2.904231625835189e-05,
      "loss": 1.289,
      "step": 3848
    },
    {
      "epoch": 0.8553333333333333,
      "grad_norm": 0.5977094769477844,
      "learning_rate": 2.8997772828507797e-05,
      "loss": 0.4731,
      "step": 3849
    },
    {
      "epoch": 0.8555555555555555,
      "grad_norm": 0.543451189994812,
      "learning_rate": 2.89532293986637e-05,
      "loss": 0.3367,
      "step": 3850
    },
    {
      "epoch": 0.8557777777777777,
      "grad_norm": 0.044504791498184204,
      "learning_rate": 2.8908685968819597e-05,
      "loss": 0.0103,
      "step": 3851
    },
    {
      "epoch": 0.856,
      "grad_norm": 0.8173375725746155,
      "learning_rate": 2.8864142538975503e-05,
      "loss": 2.0658,
      "step": 3852
    },
    {
      "epoch": 0.8562222222222222,
      "grad_norm": 0.6008175015449524,
      "learning_rate": 2.8819599109131408e-05,
      "loss": 1.0048,
      "step": 3853
    },
    {
      "epoch": 0.8564444444444445,
      "grad_norm": 0.6246810555458069,
      "learning_rate": 2.8775055679287303e-05,
      "loss": 1.0661,
      "step": 3854
    },
    {
      "epoch": 0.8566666666666667,
      "grad_norm": 0.9632955193519592,
      "learning_rate": 2.873051224944321e-05,
      "loss": 2.273,
      "step": 3855
    },
    {
      "epoch": 0.8568888888888889,
      "grad_norm": 0.8222072720527649,
      "learning_rate": 2.8685968819599114e-05,
      "loss": 2.0065,
      "step": 3856
    },
    {
      "epoch": 0.8571111111111112,
      "grad_norm": 0.6057097911834717,
      "learning_rate": 2.8641425389755013e-05,
      "loss": 1.1629,
      "step": 3857
    },
    {
      "epoch": 0.8573333333333333,
      "grad_norm": 0.06852009892463684,
      "learning_rate": 2.8596881959910915e-05,
      "loss": 0.0104,
      "step": 3858
    },
    {
      "epoch": 0.8575555555555555,
      "grad_norm": 0.06707873195409775,
      "learning_rate": 2.855233853006682e-05,
      "loss": 0.0103,
      "step": 3859
    },
    {
      "epoch": 0.8577777777777778,
      "grad_norm": 0.0674692690372467,
      "learning_rate": 2.850779510022272e-05,
      "loss": 0.0099,
      "step": 3860
    },
    {
      "epoch": 0.858,
      "grad_norm": 0.5297547578811646,
      "learning_rate": 2.846325167037862e-05,
      "loss": 1.1045,
      "step": 3861
    },
    {
      "epoch": 0.8582222222222222,
      "grad_norm": 0.9173485040664673,
      "learning_rate": 2.8418708240534526e-05,
      "loss": 1.9225,
      "step": 3862
    },
    {
      "epoch": 0.8584444444444445,
      "grad_norm": 0.9960424900054932,
      "learning_rate": 2.8374164810690424e-05,
      "loss": 2.0529,
      "step": 3863
    },
    {
      "epoch": 0.8586666666666667,
      "grad_norm": 0.6144242286682129,
      "learning_rate": 2.8329621380846326e-05,
      "loss": 1.0756,
      "step": 3864
    },
    {
      "epoch": 0.8588888888888889,
      "grad_norm": 0.9492395520210266,
      "learning_rate": 2.8285077951002232e-05,
      "loss": 1.5811,
      "step": 3865
    },
    {
      "epoch": 0.8591111111111112,
      "grad_norm": 1.0924068689346313,
      "learning_rate": 2.824053452115813e-05,
      "loss": 2.1395,
      "step": 3866
    },
    {
      "epoch": 0.8593333333333333,
      "grad_norm": 1.214991569519043,
      "learning_rate": 2.8195991091314032e-05,
      "loss": 2.3004,
      "step": 3867
    },
    {
      "epoch": 0.8595555555555555,
      "grad_norm": 0.9233739972114563,
      "learning_rate": 2.815144766146993e-05,
      "loss": 1.9741,
      "step": 3868
    },
    {
      "epoch": 0.8597777777777778,
      "grad_norm": 0.9544225931167603,
      "learning_rate": 2.8106904231625836e-05,
      "loss": 2.0073,
      "step": 3869
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0409972667694092,
      "learning_rate": 2.8062360801781738e-05,
      "loss": 2.0343,
      "step": 3870
    },
    {
      "epoch": 0.8602222222222222,
      "grad_norm": 1.0500308275222778,
      "learning_rate": 2.8017817371937637e-05,
      "loss": 1.8736,
      "step": 3871
    },
    {
      "epoch": 0.8604444444444445,
      "grad_norm": 1.0792829990386963,
      "learning_rate": 2.7973273942093542e-05,
      "loss": 1.9845,
      "step": 3872
    },
    {
      "epoch": 0.8606666666666667,
      "grad_norm": 1.1363780498504639,
      "learning_rate": 2.7928730512249447e-05,
      "loss": 1.8806,
      "step": 3873
    },
    {
      "epoch": 0.8608888888888889,
      "grad_norm": 0.6764510869979858,
      "learning_rate": 2.7884187082405343e-05,
      "loss": 0.9852,
      "step": 3874
    },
    {
      "epoch": 0.8611111111111112,
      "grad_norm": 0.06674336642026901,
      "learning_rate": 2.7839643652561248e-05,
      "loss": 0.0153,
      "step": 3875
    },
    {
      "epoch": 0.8613333333333333,
      "grad_norm": 0.06754778325557709,
      "learning_rate": 2.7795100222717153e-05,
      "loss": 0.0156,
      "step": 3876
    },
    {
      "epoch": 0.8615555555555555,
      "grad_norm": 0.6871387362480164,
      "learning_rate": 2.7750556792873052e-05,
      "loss": 0.8689,
      "step": 3877
    },
    {
      "epoch": 0.8617777777777778,
      "grad_norm": 1.1654753684997559,
      "learning_rate": 2.7706013363028954e-05,
      "loss": 1.9632,
      "step": 3878
    },
    {
      "epoch": 0.862,
      "grad_norm": 0.13107286393642426,
      "learning_rate": 2.766146993318486e-05,
      "loss": 0.0207,
      "step": 3879
    },
    {
      "epoch": 0.8622222222222222,
      "grad_norm": 1.0098730325698853,
      "learning_rate": 2.7616926503340758e-05,
      "loss": 1.7388,
      "step": 3880
    },
    {
      "epoch": 0.8624444444444445,
      "grad_norm": 0.9629087448120117,
      "learning_rate": 2.757238307349666e-05,
      "loss": 1.7969,
      "step": 3881
    },
    {
      "epoch": 0.8626666666666667,
      "grad_norm": 0.9217532873153687,
      "learning_rate": 2.7527839643652565e-05,
      "loss": 1.9322,
      "step": 3882
    },
    {
      "epoch": 0.8628888888888889,
      "grad_norm": 1.0283830165863037,
      "learning_rate": 2.7483296213808464e-05,
      "loss": 1.7681,
      "step": 3883
    },
    {
      "epoch": 0.8631111111111112,
      "grad_norm": 0.701818585395813,
      "learning_rate": 2.7438752783964366e-05,
      "loss": 0.8642,
      "step": 3884
    },
    {
      "epoch": 0.8633333333333333,
      "grad_norm": 0.7634962201118469,
      "learning_rate": 2.739420935412027e-05,
      "loss": 0.7195,
      "step": 3885
    },
    {
      "epoch": 0.8635555555555555,
      "grad_norm": 0.9613010287284851,
      "learning_rate": 2.734966592427617e-05,
      "loss": 1.7299,
      "step": 3886
    },
    {
      "epoch": 0.8637777777777778,
      "grad_norm": 0.8127443790435791,
      "learning_rate": 2.730512249443207e-05,
      "loss": 0.9909,
      "step": 3887
    },
    {
      "epoch": 0.864,
      "grad_norm": 0.7633342146873474,
      "learning_rate": 2.7260579064587977e-05,
      "loss": 0.7784,
      "step": 3888
    },
    {
      "epoch": 0.8642222222222222,
      "grad_norm": 0.8209825754165649,
      "learning_rate": 2.7216035634743876e-05,
      "loss": 0.8605,
      "step": 3889
    },
    {
      "epoch": 0.8644444444444445,
      "grad_norm": 1.1006879806518555,
      "learning_rate": 2.7171492204899778e-05,
      "loss": 1.4274,
      "step": 3890
    },
    {
      "epoch": 0.8646666666666667,
      "grad_norm": 0.9458972811698914,
      "learning_rate": 2.7126948775055683e-05,
      "loss": 1.4813,
      "step": 3891
    },
    {
      "epoch": 0.8648888888888889,
      "grad_norm": 1.3641767501831055,
      "learning_rate": 2.708240534521158e-05,
      "loss": 1.5427,
      "step": 3892
    },
    {
      "epoch": 0.8651111111111112,
      "grad_norm": 1.0138379335403442,
      "learning_rate": 2.7037861915367484e-05,
      "loss": 1.4602,
      "step": 3893
    },
    {
      "epoch": 0.8653333333333333,
      "grad_norm": 1.0641552209854126,
      "learning_rate": 2.699331848552339e-05,
      "loss": 1.2653,
      "step": 3894
    },
    {
      "epoch": 0.8655555555555555,
      "grad_norm": 1.2748581171035767,
      "learning_rate": 2.6948775055679287e-05,
      "loss": 1.3019,
      "step": 3895
    },
    {
      "epoch": 0.8657777777777778,
      "grad_norm": 1.1393803358078003,
      "learning_rate": 2.6904231625835193e-05,
      "loss": 1.331,
      "step": 3896
    },
    {
      "epoch": 0.866,
      "grad_norm": 1.0819334983825684,
      "learning_rate": 2.6859688195991095e-05,
      "loss": 1.1292,
      "step": 3897
    },
    {
      "epoch": 0.8662222222222222,
      "grad_norm": 0.14373019337654114,
      "learning_rate": 2.6815144766146993e-05,
      "loss": 0.0321,
      "step": 3898
    },
    {
      "epoch": 0.8664444444444445,
      "grad_norm": 0.14658400416374207,
      "learning_rate": 2.67706013363029e-05,
      "loss": 0.032,
      "step": 3899
    },
    {
      "epoch": 0.8666666666666667,
      "grad_norm": 1.1421598196029663,
      "learning_rate": 2.67260579064588e-05,
      "loss": 1.0447,
      "step": 3900
    },
    {
      "epoch": 0.8668888888888889,
      "grad_norm": 0.6876357793807983,
      "learning_rate": 2.66815144766147e-05,
      "loss": 1.2227,
      "step": 3901
    },
    {
      "epoch": 0.8671111111111112,
      "grad_norm": 0.0448576956987381,
      "learning_rate": 2.6636971046770605e-05,
      "loss": 0.0104,
      "step": 3902
    },
    {
      "epoch": 0.8673333333333333,
      "grad_norm": 0.6660778522491455,
      "learning_rate": 2.6592427616926503e-05,
      "loss": 1.121,
      "step": 3903
    },
    {
      "epoch": 0.8675555555555555,
      "grad_norm": 0.043087027966976166,
      "learning_rate": 2.6547884187082405e-05,
      "loss": 0.0103,
      "step": 3904
    },
    {
      "epoch": 0.8677777777777778,
      "grad_norm": 0.5372818112373352,
      "learning_rate": 2.650334075723831e-05,
      "loss": 1.0993,
      "step": 3905
    },
    {
      "epoch": 0.868,
      "grad_norm": 0.9083240628242493,
      "learning_rate": 2.645879732739421e-05,
      "loss": 2.459,
      "step": 3906
    },
    {
      "epoch": 0.8682222222222222,
      "grad_norm": 0.8523256182670593,
      "learning_rate": 2.641425389755011e-05,
      "loss": 2.1683,
      "step": 3907
    },
    {
      "epoch": 0.8684444444444445,
      "grad_norm": 0.6197808384895325,
      "learning_rate": 2.6369710467706016e-05,
      "loss": 1.0535,
      "step": 3908
    },
    {
      "epoch": 0.8686666666666667,
      "grad_norm": 0.8953803181648254,
      "learning_rate": 2.6325167037861915e-05,
      "loss": 1.9434,
      "step": 3909
    },
    {
      "epoch": 0.8688888888888889,
      "grad_norm": 0.9139788150787354,
      "learning_rate": 2.6280623608017817e-05,
      "loss": 1.8545,
      "step": 3910
    },
    {
      "epoch": 0.8691111111111111,
      "grad_norm": 0.8638214468955994,
      "learning_rate": 2.6236080178173722e-05,
      "loss": 1.8329,
      "step": 3911
    },
    {
      "epoch": 0.8693333333333333,
      "grad_norm": 0.8344167470932007,
      "learning_rate": 2.619153674832962e-05,
      "loss": 1.7991,
      "step": 3912
    },
    {
      "epoch": 0.8695555555555555,
      "grad_norm": 0.96803879737854,
      "learning_rate": 2.6146993318485523e-05,
      "loss": 1.9849,
      "step": 3913
    },
    {
      "epoch": 0.8697777777777778,
      "grad_norm": 1.0239784717559814,
      "learning_rate": 2.6102449888641428e-05,
      "loss": 1.9256,
      "step": 3914
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8905801177024841,
      "learning_rate": 2.6057906458797327e-05,
      "loss": 1.6746,
      "step": 3915
    },
    {
      "epoch": 0.8702222222222222,
      "grad_norm": 1.0133596658706665,
      "learning_rate": 2.601336302895323e-05,
      "loss": 2.1594,
      "step": 3916
    },
    {
      "epoch": 0.8704444444444445,
      "grad_norm": 0.06942284107208252,
      "learning_rate": 2.5968819599109134e-05,
      "loss": 0.015,
      "step": 3917
    },
    {
      "epoch": 0.8706666666666667,
      "grad_norm": 0.07573316246271133,
      "learning_rate": 2.5924276169265033e-05,
      "loss": 0.0148,
      "step": 3918
    },
    {
      "epoch": 0.8708888888888889,
      "grad_norm": 0.07226064801216125,
      "learning_rate": 2.5879732739420938e-05,
      "loss": 0.015,
      "step": 3919
    },
    {
      "epoch": 0.8711111111111111,
      "grad_norm": 1.0551682710647583,
      "learning_rate": 2.583518930957684e-05,
      "loss": 1.9451,
      "step": 3920
    },
    {
      "epoch": 0.8713333333333333,
      "grad_norm": 1.0881084203720093,
      "learning_rate": 2.579064587973274e-05,
      "loss": 1.9361,
      "step": 3921
    },
    {
      "epoch": 0.8715555555555555,
      "grad_norm": 1.029228925704956,
      "learning_rate": 2.5746102449888644e-05,
      "loss": 1.97,
      "step": 3922
    },
    {
      "epoch": 0.8717777777777778,
      "grad_norm": 0.9416628479957581,
      "learning_rate": 2.5701559020044546e-05,
      "loss": 1.4815,
      "step": 3923
    },
    {
      "epoch": 0.872,
      "grad_norm": 1.8949933052062988,
      "learning_rate": 2.5657015590200445e-05,
      "loss": 2.0094,
      "step": 3924
    },
    {
      "epoch": 0.8722222222222222,
      "grad_norm": 0.9487776756286621,
      "learning_rate": 2.561247216035635e-05,
      "loss": 1.8348,
      "step": 3925
    },
    {
      "epoch": 0.8724444444444445,
      "grad_norm": 0.865877091884613,
      "learning_rate": 2.5567928730512252e-05,
      "loss": 1.5909,
      "step": 3926
    },
    {
      "epoch": 0.8726666666666667,
      "grad_norm": 0.9927725195884705,
      "learning_rate": 2.552338530066815e-05,
      "loss": 1.9465,
      "step": 3927
    },
    {
      "epoch": 0.8728888888888889,
      "grad_norm": 0.9912342429161072,
      "learning_rate": 2.5478841870824056e-05,
      "loss": 1.3772,
      "step": 3928
    },
    {
      "epoch": 0.8731111111111111,
      "grad_norm": 0.9611807465553284,
      "learning_rate": 2.5434298440979958e-05,
      "loss": 1.5165,
      "step": 3929
    },
    {
      "epoch": 0.8733333333333333,
      "grad_norm": 0.9328694343566895,
      "learning_rate": 2.5389755011135856e-05,
      "loss": 1.3826,
      "step": 3930
    },
    {
      "epoch": 0.8735555555555555,
      "grad_norm": 0.9587991237640381,
      "learning_rate": 2.5345211581291762e-05,
      "loss": 1.6781,
      "step": 3931
    },
    {
      "epoch": 0.8737777777777778,
      "grad_norm": 0.06626418977975845,
      "learning_rate": 2.5300668151447664e-05,
      "loss": 0.0176,
      "step": 3932
    },
    {
      "epoch": 0.874,
      "grad_norm": 0.06570940464735031,
      "learning_rate": 2.5256124721603562e-05,
      "loss": 0.0179,
      "step": 3933
    },
    {
      "epoch": 0.8742222222222222,
      "grad_norm": 0.06534791737794876,
      "learning_rate": 2.5211581291759468e-05,
      "loss": 0.0175,
      "step": 3934
    },
    {
      "epoch": 0.8744444444444445,
      "grad_norm": 0.07688681036233902,
      "learning_rate": 2.516703786191537e-05,
      "loss": 0.0177,
      "step": 3935
    },
    {
      "epoch": 0.8746666666666667,
      "grad_norm": 0.060970455408096313,
      "learning_rate": 2.5122494432071268e-05,
      "loss": 0.0177,
      "step": 3936
    },
    {
      "epoch": 0.8748888888888889,
      "grad_norm": 1.0032833814620972,
      "learning_rate": 2.5077951002227174e-05,
      "loss": 1.7855,
      "step": 3937
    },
    {
      "epoch": 0.8751111111111111,
      "grad_norm": 0.9916431903839111,
      "learning_rate": 2.503340757238308e-05,
      "loss": 1.7102,
      "step": 3938
    },
    {
      "epoch": 0.8753333333333333,
      "grad_norm": 0.6338675618171692,
      "learning_rate": 2.4988864142538974e-05,
      "loss": 0.744,
      "step": 3939
    },
    {
      "epoch": 0.8755555555555555,
      "grad_norm": 0.732306957244873,
      "learning_rate": 2.494432071269488e-05,
      "loss": 0.7834,
      "step": 3940
    },
    {
      "epoch": 0.8757777777777778,
      "grad_norm": 0.9343276619911194,
      "learning_rate": 2.489977728285078e-05,
      "loss": 1.6322,
      "step": 3941
    },
    {
      "epoch": 0.876,
      "grad_norm": 1.0164755582809448,
      "learning_rate": 2.4855233853006683e-05,
      "loss": 1.561,
      "step": 3942
    },
    {
      "epoch": 0.8762222222222222,
      "grad_norm": 0.968427300453186,
      "learning_rate": 2.4810690423162585e-05,
      "loss": 1.2585,
      "step": 3943
    },
    {
      "epoch": 0.8764444444444445,
      "grad_norm": 0.10157324373722076,
      "learning_rate": 2.4766146993318487e-05,
      "loss": 0.025,
      "step": 3944
    },
    {
      "epoch": 0.8766666666666667,
      "grad_norm": 1.4210426807403564,
      "learning_rate": 2.472160356347439e-05,
      "loss": 1.3668,
      "step": 3945
    },
    {
      "epoch": 0.8768888888888889,
      "grad_norm": 1.2264833450317383,
      "learning_rate": 2.467706013363029e-05,
      "loss": 1.2792,
      "step": 3946
    },
    {
      "epoch": 0.8771111111111111,
      "grad_norm": 1.10524582862854,
      "learning_rate": 2.4632516703786193e-05,
      "loss": 1.3331,
      "step": 3947
    },
    {
      "epoch": 0.8773333333333333,
      "grad_norm": 0.8390571475028992,
      "learning_rate": 2.4587973273942095e-05,
      "loss": 0.6529,
      "step": 3948
    },
    {
      "epoch": 0.8775555555555555,
      "grad_norm": 1.1716080904006958,
      "learning_rate": 2.4543429844097994e-05,
      "loss": 1.2058,
      "step": 3949
    },
    {
      "epoch": 0.8777777777777778,
      "grad_norm": 1.0907095670700073,
      "learning_rate": 2.44988864142539e-05,
      "loss": 0.9754,
      "step": 3950
    },
    {
      "epoch": 0.878,
      "grad_norm": 0.5868102312088013,
      "learning_rate": 2.44543429844098e-05,
      "loss": 1.1827,
      "step": 3951
    },
    {
      "epoch": 0.8782222222222222,
      "grad_norm": 0.8409274220466614,
      "learning_rate": 2.4409799554565703e-05,
      "loss": 2.1622,
      "step": 3952
    },
    {
      "epoch": 0.8784444444444445,
      "grad_norm": 0.6952332258224487,
      "learning_rate": 2.4365256124721605e-05,
      "loss": 1.1901,
      "step": 3953
    },
    {
      "epoch": 0.8786666666666667,
      "grad_norm": 0.519538164138794,
      "learning_rate": 2.4320712694877507e-05,
      "loss": 1.2036,
      "step": 3954
    },
    {
      "epoch": 0.8788888888888889,
      "grad_norm": 0.6376737356185913,
      "learning_rate": 2.427616926503341e-05,
      "loss": 1.1078,
      "step": 3955
    },
    {
      "epoch": 0.8791111111111111,
      "grad_norm": 0.044897519052028656,
      "learning_rate": 2.423162583518931e-05,
      "loss": 0.0104,
      "step": 3956
    },
    {
      "epoch": 0.8793333333333333,
      "grad_norm": 0.9802849292755127,
      "learning_rate": 2.4187082405345213e-05,
      "loss": 1.8767,
      "step": 3957
    },
    {
      "epoch": 0.8795555555555555,
      "grad_norm": 0.08882291615009308,
      "learning_rate": 2.4142538975501115e-05,
      "loss": 0.0114,
      "step": 3958
    },
    {
      "epoch": 0.8797777777777778,
      "grad_norm": 0.9282602667808533,
      "learning_rate": 2.4097995545657017e-05,
      "loss": 1.8114,
      "step": 3959
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9374412894248962,
      "learning_rate": 2.405345211581292e-05,
      "loss": 1.9425,
      "step": 3960
    },
    {
      "epoch": 0.8802222222222222,
      "grad_norm": 1.0642507076263428,
      "learning_rate": 2.400890868596882e-05,
      "loss": 2.585,
      "step": 3961
    },
    {
      "epoch": 0.8804444444444445,
      "grad_norm": 0.8070052862167358,
      "learning_rate": 2.3964365256124723e-05,
      "loss": 1.908,
      "step": 3962
    },
    {
      "epoch": 0.8806666666666667,
      "grad_norm": 0.8729952573776245,
      "learning_rate": 2.3919821826280625e-05,
      "loss": 1.9254,
      "step": 3963
    },
    {
      "epoch": 0.8808888888888889,
      "grad_norm": 0.12168601900339127,
      "learning_rate": 2.3875278396436527e-05,
      "loss": 0.0192,
      "step": 3964
    },
    {
      "epoch": 0.8811111111111111,
      "grad_norm": 0.6381791830062866,
      "learning_rate": 2.383073496659243e-05,
      "loss": 0.9943,
      "step": 3965
    },
    {
      "epoch": 0.8813333333333333,
      "grad_norm": 1.2023353576660156,
      "learning_rate": 2.378619153674833e-05,
      "loss": 1.9608,
      "step": 3966
    },
    {
      "epoch": 0.8815555555555555,
      "grad_norm": 0.9587229490280151,
      "learning_rate": 2.3741648106904233e-05,
      "loss": 1.8919,
      "step": 3967
    },
    {
      "epoch": 0.8817777777777778,
      "grad_norm": 1.0025968551635742,
      "learning_rate": 2.3697104677060135e-05,
      "loss": 1.953,
      "step": 3968
    },
    {
      "epoch": 0.882,
      "grad_norm": 0.9075009226799011,
      "learning_rate": 2.3652561247216037e-05,
      "loss": 1.9855,
      "step": 3969
    },
    {
      "epoch": 0.8822222222222222,
      "grad_norm": 0.06732242554426193,
      "learning_rate": 2.360801781737194e-05,
      "loss": 0.0149,
      "step": 3970
    },
    {
      "epoch": 0.8824444444444445,
      "grad_norm": 0.06586241722106934,
      "learning_rate": 2.356347438752784e-05,
      "loss": 0.015,
      "step": 3971
    },
    {
      "epoch": 0.8826666666666667,
      "grad_norm": 0.06589429080486298,
      "learning_rate": 2.3518930957683743e-05,
      "loss": 0.015,
      "step": 3972
    },
    {
      "epoch": 0.8828888888888888,
      "grad_norm": 0.7274507284164429,
      "learning_rate": 2.3474387527839645e-05,
      "loss": 0.9097,
      "step": 3973
    },
    {
      "epoch": 0.8831111111111111,
      "grad_norm": 0.9447082877159119,
      "learning_rate": 2.3429844097995547e-05,
      "loss": 1.8058,
      "step": 3974
    },
    {
      "epoch": 0.8833333333333333,
      "grad_norm": 0.13066767156124115,
      "learning_rate": 2.338530066815145e-05,
      "loss": 0.0211,
      "step": 3975
    },
    {
      "epoch": 0.8835555555555555,
      "grad_norm": 0.7804778218269348,
      "learning_rate": 2.334075723830735e-05,
      "loss": 1.0144,
      "step": 3976
    },
    {
      "epoch": 0.8837777777777778,
      "grad_norm": 1.0640380382537842,
      "learning_rate": 2.3296213808463252e-05,
      "loss": 1.7064,
      "step": 3977
    },
    {
      "epoch": 0.884,
      "grad_norm": 1.0175601243972778,
      "learning_rate": 2.3251670378619154e-05,
      "loss": 1.9517,
      "step": 3978
    },
    {
      "epoch": 0.8842222222222222,
      "grad_norm": 1.1040079593658447,
      "learning_rate": 2.3207126948775056e-05,
      "loss": 1.8058,
      "step": 3979
    },
    {
      "epoch": 0.8844444444444445,
      "grad_norm": 0.728284478187561,
      "learning_rate": 2.316258351893096e-05,
      "loss": 0.7271,
      "step": 3980
    },
    {
      "epoch": 0.8846666666666667,
      "grad_norm": 0.9347479939460754,
      "learning_rate": 2.3118040089086864e-05,
      "loss": 1.4855,
      "step": 3981
    },
    {
      "epoch": 0.8848888888888888,
      "grad_norm": 0.07804767787456512,
      "learning_rate": 2.3073496659242762e-05,
      "loss": 0.0192,
      "step": 3982
    },
    {
      "epoch": 0.8851111111111111,
      "grad_norm": 0.08066914230585098,
      "learning_rate": 2.3028953229398664e-05,
      "loss": 0.0188,
      "step": 3983
    },
    {
      "epoch": 0.8853333333333333,
      "grad_norm": 0.7905464768409729,
      "learning_rate": 2.298440979955457e-05,
      "loss": 0.8492,
      "step": 3984
    },
    {
      "epoch": 0.8855555555555555,
      "grad_norm": 1.2028931379318237,
      "learning_rate": 2.2939866369710468e-05,
      "loss": 0.9801,
      "step": 3985
    },
    {
      "epoch": 0.8857777777777778,
      "grad_norm": 0.10054640471935272,
      "learning_rate": 2.289532293986637e-05,
      "loss": 0.0247,
      "step": 3986
    },
    {
      "epoch": 0.886,
      "grad_norm": 1.1364169120788574,
      "learning_rate": 2.2850779510022272e-05,
      "loss": 1.6362,
      "step": 3987
    },
    {
      "epoch": 0.8862222222222222,
      "grad_norm": 1.2560831308364868,
      "learning_rate": 2.2806236080178174e-05,
      "loss": 1.8384,
      "step": 3988
    },
    {
      "epoch": 0.8864444444444445,
      "grad_norm": 1.1757941246032715,
      "learning_rate": 2.2761692650334076e-05,
      "loss": 1.4828,
      "step": 3989
    },
    {
      "epoch": 0.8866666666666667,
      "grad_norm": 1.120353102684021,
      "learning_rate": 2.2717149220489978e-05,
      "loss": 1.3649,
      "step": 3990
    },
    {
      "epoch": 0.8868888888888888,
      "grad_norm": 1.0847200155258179,
      "learning_rate": 2.267260579064588e-05,
      "loss": 1.7217,
      "step": 3991
    },
    {
      "epoch": 0.8871111111111111,
      "grad_norm": 1.1451468467712402,
      "learning_rate": 2.2628062360801782e-05,
      "loss": 1.6961,
      "step": 3992
    },
    {
      "epoch": 0.8873333333333333,
      "grad_norm": 1.0738978385925293,
      "learning_rate": 2.2583518930957684e-05,
      "loss": 1.4236,
      "step": 3993
    },
    {
      "epoch": 0.8875555555555555,
      "grad_norm": 1.3635321855545044,
      "learning_rate": 2.253897550111359e-05,
      "loss": 1.4417,
      "step": 3994
    },
    {
      "epoch": 0.8877777777777778,
      "grad_norm": 0.19308915734291077,
      "learning_rate": 2.2494432071269488e-05,
      "loss": 0.0304,
      "step": 3995
    },
    {
      "epoch": 0.888,
      "grad_norm": 1.2087732553482056,
      "learning_rate": 2.244988864142539e-05,
      "loss": 1.4158,
      "step": 3996
    },
    {
      "epoch": 0.8882222222222222,
      "grad_norm": 1.0328425168991089,
      "learning_rate": 2.2405345211581295e-05,
      "loss": 1.014,
      "step": 3997
    },
    {
      "epoch": 0.8884444444444445,
      "grad_norm": 0.8065721988677979,
      "learning_rate": 2.2360801781737194e-05,
      "loss": 0.6411,
      "step": 3998
    },
    {
      "epoch": 0.8886666666666667,
      "grad_norm": 0.6682571172714233,
      "learning_rate": 2.2316258351893096e-05,
      "loss": 0.5168,
      "step": 3999
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 0.9756750464439392,
      "learning_rate": 2.2271714922049e-05,
      "loss": 0.4815,
      "step": 4000
    },
    {
      "epoch": 0.8891111111111111,
      "grad_norm": 0.5801368355751038,
      "learning_rate": 2.22271714922049e-05,
      "loss": 0.8694,
      "step": 4001
    },
    {
      "epoch": 0.8893333333333333,
      "grad_norm": 0.6287752985954285,
      "learning_rate": 2.21826280623608e-05,
      "loss": 1.1768,
      "step": 4002
    },
    {
      "epoch": 0.8895555555555555,
      "grad_norm": 0.8464820981025696,
      "learning_rate": 2.2138084632516704e-05,
      "loss": 2.4031,
      "step": 4003
    },
    {
      "epoch": 0.8897777777777778,
      "grad_norm": 0.878257691860199,
      "learning_rate": 2.209354120267261e-05,
      "loss": 2.1387,
      "step": 4004
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6297408938407898,
      "learning_rate": 2.2048997772828508e-05,
      "loss": 1.0515,
      "step": 4005
    },
    {
      "epoch": 0.8902222222222222,
      "grad_norm": 0.8279980421066284,
      "learning_rate": 2.200445434298441e-05,
      "loss": 1.992,
      "step": 4006
    },
    {
      "epoch": 0.8904444444444445,
      "grad_norm": 0.07900725305080414,
      "learning_rate": 2.1959910913140315e-05,
      "loss": 0.0116,
      "step": 4007
    },
    {
      "epoch": 0.8906666666666667,
      "grad_norm": 0.07973389327526093,
      "learning_rate": 2.1915367483296214e-05,
      "loss": 0.0113,
      "step": 4008
    },
    {
      "epoch": 0.8908888888888888,
      "grad_norm": 0.5878556370735168,
      "learning_rate": 2.1870824053452115e-05,
      "loss": 1.1704,
      "step": 4009
    },
    {
      "epoch": 0.8911111111111111,
      "grad_norm": 0.9955252408981323,
      "learning_rate": 2.182628062360802e-05,
      "loss": 1.8866,
      "step": 4010
    },
    {
      "epoch": 0.8913333333333333,
      "grad_norm": 0.876213788986206,
      "learning_rate": 2.178173719376392e-05,
      "loss": 1.9363,
      "step": 4011
    },
    {
      "epoch": 0.8915555555555555,
      "grad_norm": 0.8237855434417725,
      "learning_rate": 2.173719376391982e-05,
      "loss": 2.1091,
      "step": 4012
    },
    {
      "epoch": 0.8917777777777778,
      "grad_norm": 0.6529291868209839,
      "learning_rate": 2.1692650334075727e-05,
      "loss": 0.9921,
      "step": 4013
    },
    {
      "epoch": 0.892,
      "grad_norm": 0.9489926099777222,
      "learning_rate": 2.1648106904231625e-05,
      "loss": 1.8424,
      "step": 4014
    },
    {
      "epoch": 0.8922222222222222,
      "grad_norm": 0.973099946975708,
      "learning_rate": 2.1603563474387527e-05,
      "loss": 1.9839,
      "step": 4015
    },
    {
      "epoch": 0.8924444444444445,
      "grad_norm": 0.8978729248046875,
      "learning_rate": 2.1559020044543433e-05,
      "loss": 1.7646,
      "step": 4016
    },
    {
      "epoch": 0.8926666666666667,
      "grad_norm": 0.9603530764579773,
      "learning_rate": 2.1514476614699335e-05,
      "loss": 1.8374,
      "step": 4017
    },
    {
      "epoch": 0.8928888888888888,
      "grad_norm": 0.07031574100255966,
      "learning_rate": 2.1469933184855233e-05,
      "loss": 0.0148,
      "step": 4018
    },
    {
      "epoch": 0.8931111111111111,
      "grad_norm": 0.06550273299217224,
      "learning_rate": 2.142538975501114e-05,
      "loss": 0.0147,
      "step": 4019
    },
    {
      "epoch": 0.8933333333333333,
      "grad_norm": 0.06782650202512741,
      "learning_rate": 2.138084632516704e-05,
      "loss": 0.0149,
      "step": 4020
    },
    {
      "epoch": 0.8935555555555555,
      "grad_norm": 0.7483673095703125,
      "learning_rate": 2.133630289532294e-05,
      "loss": 0.9084,
      "step": 4021
    },
    {
      "epoch": 0.8937777777777778,
      "grad_norm": 0.9090237617492676,
      "learning_rate": 2.129175946547884e-05,
      "loss": 1.8329,
      "step": 4022
    },
    {
      "epoch": 0.894,
      "grad_norm": 0.7626523971557617,
      "learning_rate": 2.1247216035634746e-05,
      "loss": 0.9174,
      "step": 4023
    },
    {
      "epoch": 0.8942222222222223,
      "grad_norm": 0.6706441640853882,
      "learning_rate": 2.1202672605790645e-05,
      "loss": 0.8133,
      "step": 4024
    },
    {
      "epoch": 0.8944444444444445,
      "grad_norm": 0.9489988684654236,
      "learning_rate": 2.1158129175946547e-05,
      "loss": 2.003,
      "step": 4025
    },
    {
      "epoch": 0.8946666666666667,
      "grad_norm": 0.9574695825576782,
      "learning_rate": 2.1113585746102452e-05,
      "loss": 1.5621,
      "step": 4026
    },
    {
      "epoch": 0.8948888888888888,
      "grad_norm": 1.1038743257522583,
      "learning_rate": 2.1069042316258354e-05,
      "loss": 1.8653,
      "step": 4027
    },
    {
      "epoch": 0.8951111111111111,
      "grad_norm": 1.0262362957000732,
      "learning_rate": 2.1024498886414253e-05,
      "loss": 1.6652,
      "step": 4028
    },
    {
      "epoch": 0.8953333333333333,
      "grad_norm": 0.8741075396537781,
      "learning_rate": 2.0979955456570158e-05,
      "loss": 1.6058,
      "step": 4029
    },
    {
      "epoch": 0.8955555555555555,
      "grad_norm": 0.7687373161315918,
      "learning_rate": 2.093541202672606e-05,
      "loss": 0.8818,
      "step": 4030
    },
    {
      "epoch": 0.8957777777777778,
      "grad_norm": 0.06525861471891403,
      "learning_rate": 2.089086859688196e-05,
      "loss": 0.0177,
      "step": 4031
    },
    {
      "epoch": 0.896,
      "grad_norm": 0.6307370066642761,
      "learning_rate": 2.0846325167037864e-05,
      "loss": 0.7724,
      "step": 4032
    },
    {
      "epoch": 0.8962222222222223,
      "grad_norm": 1.1199438571929932,
      "learning_rate": 2.0801781737193766e-05,
      "loss": 1.7534,
      "step": 4033
    },
    {
      "epoch": 0.8964444444444445,
      "grad_norm": 0.9748408794403076,
      "learning_rate": 2.0757238307349665e-05,
      "loss": 1.6166,
      "step": 4034
    },
    {
      "epoch": 0.8966666666666666,
      "grad_norm": 0.0824805200099945,
      "learning_rate": 2.071269487750557e-05,
      "loss": 0.0188,
      "step": 4035
    },
    {
      "epoch": 0.8968888888888888,
      "grad_norm": 0.09000510722398758,
      "learning_rate": 2.0668151447661472e-05,
      "loss": 0.0188,
      "step": 4036
    },
    {
      "epoch": 0.8971111111111111,
      "grad_norm": 0.08561154454946518,
      "learning_rate": 2.0623608017817374e-05,
      "loss": 0.0185,
      "step": 4037
    },
    {
      "epoch": 0.8973333333333333,
      "grad_norm": 0.7661683559417725,
      "learning_rate": 2.0579064587973276e-05,
      "loss": 0.907,
      "step": 4038
    },
    {
      "epoch": 0.8975555555555556,
      "grad_norm": 0.9890311360359192,
      "learning_rate": 2.0534521158129178e-05,
      "loss": 1.3849,
      "step": 4039
    },
    {
      "epoch": 0.8977777777777778,
      "grad_norm": 0.7973209619522095,
      "learning_rate": 2.048997772828508e-05,
      "loss": 0.749,
      "step": 4040
    },
    {
      "epoch": 0.898,
      "grad_norm": 1.1026244163513184,
      "learning_rate": 2.044543429844098e-05,
      "loss": 1.7591,
      "step": 4041
    },
    {
      "epoch": 0.8982222222222223,
      "grad_norm": 1.1480908393859863,
      "learning_rate": 2.0400890868596884e-05,
      "loss": 1.9024,
      "step": 4042
    },
    {
      "epoch": 0.8984444444444445,
      "grad_norm": 1.0242488384246826,
      "learning_rate": 2.0356347438752786e-05,
      "loss": 1.4006,
      "step": 4043
    },
    {
      "epoch": 0.8986666666666666,
      "grad_norm": 1.117613434791565,
      "learning_rate": 2.0311804008908684e-05,
      "loss": 1.3118,
      "step": 4044
    },
    {
      "epoch": 0.8988888888888888,
      "grad_norm": 1.1382890939712524,
      "learning_rate": 2.026726057906459e-05,
      "loss": 1.3193,
      "step": 4045
    },
    {
      "epoch": 0.8991111111111111,
      "grad_norm": 1.2156895399093628,
      "learning_rate": 2.0222717149220492e-05,
      "loss": 1.1794,
      "step": 4046
    },
    {
      "epoch": 0.8993333333333333,
      "grad_norm": 0.6855819225311279,
      "learning_rate": 2.017817371937639e-05,
      "loss": 0.4808,
      "step": 4047
    },
    {
      "epoch": 0.8995555555555556,
      "grad_norm": 0.1426740288734436,
      "learning_rate": 2.0133630289532296e-05,
      "loss": 0.033,
      "step": 4048
    },
    {
      "epoch": 0.8997777777777778,
      "grad_norm": 1.0770491361618042,
      "learning_rate": 2.0089086859688198e-05,
      "loss": 0.8844,
      "step": 4049
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9471620917320251,
      "learning_rate": 2.00445434298441e-05,
      "loss": 0.9019,
      "step": 4050
    },
    {
      "epoch": 0.9002222222222223,
      "grad_norm": 0.8870931267738342,
      "learning_rate": 2e-05,
      "loss": 1.9518,
      "step": 4051
    },
    {
      "epoch": 0.9004444444444445,
      "grad_norm": 0.681877851486206,
      "learning_rate": 1.9955456570155904e-05,
      "loss": 1.0902,
      "step": 4052
    },
    {
      "epoch": 0.9006666666666666,
      "grad_norm": 0.8550397157669067,
      "learning_rate": 1.9910913140311806e-05,
      "loss": 2.1819,
      "step": 4053
    },
    {
      "epoch": 0.9008888888888889,
      "grad_norm": 0.5659412741661072,
      "learning_rate": 1.9866369710467708e-05,
      "loss": 1.309,
      "step": 4054
    },
    {
      "epoch": 0.9011111111111111,
      "grad_norm": 0.618087887763977,
      "learning_rate": 1.982182628062361e-05,
      "loss": 1.0288,
      "step": 4055
    },
    {
      "epoch": 0.9013333333333333,
      "grad_norm": 0.5965234637260437,
      "learning_rate": 1.977728285077951e-05,
      "loss": 1.0864,
      "step": 4056
    },
    {
      "epoch": 0.9015555555555556,
      "grad_norm": 0.8751803636550903,
      "learning_rate": 1.973273942093541e-05,
      "loss": 2.2079,
      "step": 4057
    },
    {
      "epoch": 0.9017777777777778,
      "grad_norm": 0.907996416091919,
      "learning_rate": 1.9688195991091315e-05,
      "loss": 2.0763,
      "step": 4058
    },
    {
      "epoch": 0.902,
      "grad_norm": 0.08640366792678833,
      "learning_rate": 1.9643652561247217e-05,
      "loss": 0.0116,
      "step": 4059
    },
    {
      "epoch": 0.9022222222222223,
      "grad_norm": 0.08815193176269531,
      "learning_rate": 1.959910913140312e-05,
      "loss": 0.0114,
      "step": 4060
    },
    {
      "epoch": 0.9024444444444445,
      "grad_norm": 0.08544806391000748,
      "learning_rate": 1.955456570155902e-05,
      "loss": 0.0115,
      "step": 4061
    },
    {
      "epoch": 0.9026666666666666,
      "grad_norm": 0.6461583375930786,
      "learning_rate": 1.9510022271714923e-05,
      "loss": 1.1449,
      "step": 4062
    },
    {
      "epoch": 0.9028888888888889,
      "grad_norm": 0.8666505813598633,
      "learning_rate": 1.9465478841870825e-05,
      "loss": 2.0281,
      "step": 4063
    },
    {
      "epoch": 0.9031111111111111,
      "grad_norm": 1.0003634691238403,
      "learning_rate": 1.9420935412026727e-05,
      "loss": 1.929,
      "step": 4064
    },
    {
      "epoch": 0.9033333333333333,
      "grad_norm": 1.1285505294799805,
      "learning_rate": 1.937639198218263e-05,
      "loss": 2.3214,
      "step": 4065
    },
    {
      "epoch": 0.9035555555555556,
      "grad_norm": 0.6442127823829651,
      "learning_rate": 1.933184855233853e-05,
      "loss": 0.79,
      "step": 4066
    },
    {
      "epoch": 0.9037777777777778,
      "grad_norm": 0.6966649889945984,
      "learning_rate": 1.9287305122494433e-05,
      "loss": 0.9614,
      "step": 4067
    },
    {
      "epoch": 0.904,
      "grad_norm": 1.0646114349365234,
      "learning_rate": 1.9242761692650335e-05,
      "loss": 2.0066,
      "step": 4068
    },
    {
      "epoch": 0.9042222222222223,
      "grad_norm": 1.0722988843917847,
      "learning_rate": 1.9198218262806237e-05,
      "loss": 1.8977,
      "step": 4069
    },
    {
      "epoch": 0.9044444444444445,
      "grad_norm": 0.9870444536209106,
      "learning_rate": 1.915367483296214e-05,
      "loss": 1.6182,
      "step": 4070
    },
    {
      "epoch": 0.9046666666666666,
      "grad_norm": 0.6224427819252014,
      "learning_rate": 1.910913140311804e-05,
      "loss": 0.7754,
      "step": 4071
    },
    {
      "epoch": 0.9048888888888889,
      "grad_norm": 0.06635406613349915,
      "learning_rate": 1.9064587973273943e-05,
      "loss": 0.0147,
      "step": 4072
    },
    {
      "epoch": 0.9051111111111111,
      "grad_norm": 0.07058946043252945,
      "learning_rate": 1.9020044543429845e-05,
      "loss": 0.0173,
      "step": 4073
    },
    {
      "epoch": 0.9053333333333333,
      "grad_norm": 0.6457788348197937,
      "learning_rate": 1.8975501113585747e-05,
      "loss": 0.8879,
      "step": 4074
    },
    {
      "epoch": 0.9055555555555556,
      "grad_norm": 1.0111377239227295,
      "learning_rate": 1.893095768374165e-05,
      "loss": 1.617,
      "step": 4075
    },
    {
      "epoch": 0.9057777777777778,
      "grad_norm": 0.9277496933937073,
      "learning_rate": 1.888641425389755e-05,
      "loss": 1.8035,
      "step": 4076
    },
    {
      "epoch": 0.906,
      "grad_norm": 1.0849852561950684,
      "learning_rate": 1.8841870824053453e-05,
      "loss": 1.6758,
      "step": 4077
    },
    {
      "epoch": 0.9062222222222223,
      "grad_norm": 1.02144455909729,
      "learning_rate": 1.8797327394209355e-05,
      "loss": 1.812,
      "step": 4078
    },
    {
      "epoch": 0.9064444444444445,
      "grad_norm": 1.1183116436004639,
      "learning_rate": 1.8752783964365257e-05,
      "loss": 1.8675,
      "step": 4079
    },
    {
      "epoch": 0.9066666666666666,
      "grad_norm": 1.3064316511154175,
      "learning_rate": 1.870824053452116e-05,
      "loss": 1.7242,
      "step": 4080
    },
    {
      "epoch": 0.9068888888888889,
      "grad_norm": 0.06701880693435669,
      "learning_rate": 1.866369710467706e-05,
      "loss": 0.0177,
      "step": 4081
    },
    {
      "epoch": 0.9071111111111111,
      "grad_norm": 0.06481373310089111,
      "learning_rate": 1.8619153674832963e-05,
      "loss": 0.0178,
      "step": 4082
    },
    {
      "epoch": 0.9073333333333333,
      "grad_norm": 0.7761397361755371,
      "learning_rate": 1.8574610244988865e-05,
      "loss": 0.7757,
      "step": 4083
    },
    {
      "epoch": 0.9075555555555556,
      "grad_norm": 1.0291235446929932,
      "learning_rate": 1.8530066815144767e-05,
      "loss": 1.6664,
      "step": 4084
    },
    {
      "epoch": 0.9077777777777778,
      "grad_norm": 0.7274791598320007,
      "learning_rate": 1.848552338530067e-05,
      "loss": 0.979,
      "step": 4085
    },
    {
      "epoch": 0.908,
      "grad_norm": 0.6331042647361755,
      "learning_rate": 1.844097995545657e-05,
      "loss": 0.6799,
      "step": 4086
    },
    {
      "epoch": 0.9082222222222223,
      "grad_norm": 0.08008535206317902,
      "learning_rate": 1.8396436525612473e-05,
      "loss": 0.018,
      "step": 4087
    },
    {
      "epoch": 0.9084444444444445,
      "grad_norm": 0.08232392370700836,
      "learning_rate": 1.8351893095768375e-05,
      "loss": 0.018,
      "step": 4088
    },
    {
      "epoch": 0.9086666666666666,
      "grad_norm": 0.6026217341423035,
      "learning_rate": 1.830734966592428e-05,
      "loss": 0.8012,
      "step": 4089
    },
    {
      "epoch": 0.9088888888888889,
      "grad_norm": 1.0574473142623901,
      "learning_rate": 1.826280623608018e-05,
      "loss": 1.7651,
      "step": 4090
    },
    {
      "epoch": 0.9091111111111111,
      "grad_norm": 1.1249449253082275,
      "learning_rate": 1.821826280623608e-05,
      "loss": 1.6539,
      "step": 4091
    },
    {
      "epoch": 0.9093333333333333,
      "grad_norm": 0.7003470659255981,
      "learning_rate": 1.8173719376391986e-05,
      "loss": 0.764,
      "step": 4092
    },
    {
      "epoch": 0.9095555555555556,
      "grad_norm": 1.0299309492111206,
      "learning_rate": 1.8129175946547884e-05,
      "loss": 1.3027,
      "step": 4093
    },
    {
      "epoch": 0.9097777777777778,
      "grad_norm": 0.21282123029232025,
      "learning_rate": 1.8084632516703786e-05,
      "loss": 0.0309,
      "step": 4094
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1035081148147583,
      "learning_rate": 1.804008908685969e-05,
      "loss": 1.2723,
      "step": 4095
    },
    {
      "epoch": 0.9102222222222223,
      "grad_norm": 0.9910405874252319,
      "learning_rate": 1.799554565701559e-05,
      "loss": 1.0153,
      "step": 4096
    },
    {
      "epoch": 0.9104444444444444,
      "grad_norm": 1.0081919431686401,
      "learning_rate": 1.7951002227171492e-05,
      "loss": 1.0348,
      "step": 4097
    },
    {
      "epoch": 0.9106666666666666,
      "grad_norm": 0.1447010040283203,
      "learning_rate": 1.7906458797327394e-05,
      "loss": 0.0327,
      "step": 4098
    },
    {
      "epoch": 0.9108888888888889,
      "grad_norm": 1.1237828731536865,
      "learning_rate": 1.7861915367483296e-05,
      "loss": 1.332,
      "step": 4099
    },
    {
      "epoch": 0.9111111111111111,
      "grad_norm": 1.0061198472976685,
      "learning_rate": 1.7817371937639198e-05,
      "loss": 0.9762,
      "step": 4100
    },
    {
      "epoch": 0.9113333333333333,
      "grad_norm": 0.045394111424684525,
      "learning_rate": 1.77728285077951e-05,
      "loss": 0.0105,
      "step": 4101
    },
    {
      "epoch": 0.9115555555555556,
      "grad_norm": 0.6273143291473389,
      "learning_rate": 1.7728285077951006e-05,
      "loss": 0.8231,
      "step": 4102
    },
    {
      "epoch": 0.9117777777777778,
      "grad_norm": 0.5369709730148315,
      "learning_rate": 1.7683741648106904e-05,
      "loss": 1.0971,
      "step": 4103
    },
    {
      "epoch": 0.912,
      "grad_norm": 0.841785728931427,
      "learning_rate": 1.7639198218262806e-05,
      "loss": 2.1345,
      "step": 4104
    },
    {
      "epoch": 0.9122222222222223,
      "grad_norm": 0.5022440552711487,
      "learning_rate": 1.759465478841871e-05,
      "loss": 1.0847,
      "step": 4105
    },
    {
      "epoch": 0.9124444444444444,
      "grad_norm": 0.5736976265907288,
      "learning_rate": 1.755011135857461e-05,
      "loss": 1.0705,
      "step": 4106
    },
    {
      "epoch": 0.9126666666666666,
      "grad_norm": 0.7846779227256775,
      "learning_rate": 1.7505567928730512e-05,
      "loss": 2.2198,
      "step": 4107
    },
    {
      "epoch": 0.9128888888888889,
      "grad_norm": 0.04545416682958603,
      "learning_rate": 1.7461024498886417e-05,
      "loss": 0.0105,
      "step": 4108
    },
    {
      "epoch": 0.9131111111111111,
      "grad_norm": 0.8995314240455627,
      "learning_rate": 1.7416481069042316e-05,
      "loss": 2.0983,
      "step": 4109
    },
    {
      "epoch": 0.9133333333333333,
      "grad_norm": 0.08467597514390945,
      "learning_rate": 1.7371937639198218e-05,
      "loss": 0.0111,
      "step": 4110
    },
    {
      "epoch": 0.9135555555555556,
      "grad_norm": 0.06848177313804626,
      "learning_rate": 1.732739420935412e-05,
      "loss": 0.0109,
      "step": 4111
    },
    {
      "epoch": 0.9137777777777778,
      "grad_norm": 0.6615252494812012,
      "learning_rate": 1.7282850779510025e-05,
      "loss": 1.0423,
      "step": 4112
    },
    {
      "epoch": 0.914,
      "grad_norm": 0.8934789896011353,
      "learning_rate": 1.7238307349665924e-05,
      "loss": 1.8382,
      "step": 4113
    },
    {
      "epoch": 0.9142222222222223,
      "grad_norm": 0.8137645125389099,
      "learning_rate": 1.7193763919821826e-05,
      "loss": 1.9163,
      "step": 4114
    },
    {
      "epoch": 0.9144444444444444,
      "grad_norm": 0.8993197083473206,
      "learning_rate": 1.714922048997773e-05,
      "loss": 2.1383,
      "step": 4115
    },
    {
      "epoch": 0.9146666666666666,
      "grad_norm": 0.908676028251648,
      "learning_rate": 1.710467706013363e-05,
      "loss": 2.0524,
      "step": 4116
    },
    {
      "epoch": 0.9148888888888889,
      "grad_norm": 0.6348316669464111,
      "learning_rate": 1.7060133630289532e-05,
      "loss": 0.9211,
      "step": 4117
    },
    {
      "epoch": 0.9151111111111111,
      "grad_norm": 0.10803266614675522,
      "learning_rate": 1.7015590200445437e-05,
      "loss": 0.0177,
      "step": 4118
    },
    {
      "epoch": 0.9153333333333333,
      "grad_norm": 0.5778976678848267,
      "learning_rate": 1.6971046770601336e-05,
      "loss": 0.7947,
      "step": 4119
    },
    {
      "epoch": 0.9155555555555556,
      "grad_norm": 0.9023910164833069,
      "learning_rate": 1.6926503340757238e-05,
      "loss": 1.6568,
      "step": 4120
    },
    {
      "epoch": 0.9157777777777778,
      "grad_norm": 0.7427157759666443,
      "learning_rate": 1.6881959910913143e-05,
      "loss": 0.6653,
      "step": 4121
    },
    {
      "epoch": 0.916,
      "grad_norm": 0.8236956000328064,
      "learning_rate": 1.683741648106904e-05,
      "loss": 1.0835,
      "step": 4122
    },
    {
      "epoch": 0.9162222222222223,
      "grad_norm": 0.788445234298706,
      "learning_rate": 1.6792873051224944e-05,
      "loss": 1.1082,
      "step": 4123
    },
    {
      "epoch": 0.9164444444444444,
      "grad_norm": 0.7391776442527771,
      "learning_rate": 1.674832962138085e-05,
      "loss": 1.0042,
      "step": 4124
    },
    {
      "epoch": 0.9166666666666666,
      "grad_norm": 0.993009626865387,
      "learning_rate": 1.670378619153675e-05,
      "loss": 1.7091,
      "step": 4125
    },
    {
      "epoch": 0.9168888888888889,
      "grad_norm": 1.1671327352523804,
      "learning_rate": 1.665924276169265e-05,
      "loss": 1.8704,
      "step": 4126
    },
    {
      "epoch": 0.9171111111111111,
      "grad_norm": 0.9321463704109192,
      "learning_rate": 1.6614699331848555e-05,
      "loss": 1.3912,
      "step": 4127
    },
    {
      "epoch": 0.9173333333333333,
      "grad_norm": 0.7201982140541077,
      "learning_rate": 1.6570155902004457e-05,
      "loss": 1.0362,
      "step": 4128
    },
    {
      "epoch": 0.9175555555555556,
      "grad_norm": 0.06468725949525833,
      "learning_rate": 1.6525612472160355e-05,
      "loss": 0.0177,
      "step": 4129
    },
    {
      "epoch": 0.9177777777777778,
      "grad_norm": 0.06377862393856049,
      "learning_rate": 1.6481069042316257e-05,
      "loss": 0.0175,
      "step": 4130
    },
    {
      "epoch": 0.918,
      "grad_norm": 1.2018589973449707,
      "learning_rate": 1.6436525612472163e-05,
      "loss": 2.088,
      "step": 4131
    },
    {
      "epoch": 0.9182222222222223,
      "grad_norm": 0.08640787750482559,
      "learning_rate": 1.639198218262806e-05,
      "loss": 0.0178,
      "step": 4132
    },
    {
      "epoch": 0.9184444444444444,
      "grad_norm": 0.07626676559448242,
      "learning_rate": 1.6347438752783963e-05,
      "loss": 0.0179,
      "step": 4133
    },
    {
      "epoch": 0.9186666666666666,
      "grad_norm": 0.684622585773468,
      "learning_rate": 1.630289532293987e-05,
      "loss": 0.9346,
      "step": 4134
    },
    {
      "epoch": 0.9188888888888889,
      "grad_norm": 1.07980215549469,
      "learning_rate": 1.625835189309577e-05,
      "loss": 1.8093,
      "step": 4135
    },
    {
      "epoch": 0.9191111111111111,
      "grad_norm": 1.0103257894515991,
      "learning_rate": 1.621380846325167e-05,
      "loss": 1.3954,
      "step": 4136
    },
    {
      "epoch": 0.9193333333333333,
      "grad_norm": 1.0367659330368042,
      "learning_rate": 1.6169265033407574e-05,
      "loss": 1.665,
      "step": 4137
    },
    {
      "epoch": 0.9195555555555556,
      "grad_norm": 1.13039231300354,
      "learning_rate": 1.6124721603563476e-05,
      "loss": 1.3939,
      "step": 4138
    },
    {
      "epoch": 0.9197777777777778,
      "grad_norm": 1.2525602579116821,
      "learning_rate": 1.6080178173719375e-05,
      "loss": 1.7406,
      "step": 4139
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0886310338974,
      "learning_rate": 1.603563474387528e-05,
      "loss": 1.447,
      "step": 4140
    },
    {
      "epoch": 0.9202222222222223,
      "grad_norm": 1.0128674507141113,
      "learning_rate": 1.5991091314031182e-05,
      "loss": 1.3665,
      "step": 4141
    },
    {
      "epoch": 0.9204444444444444,
      "grad_norm": 1.087297797203064,
      "learning_rate": 1.594654788418708e-05,
      "loss": 1.2043,
      "step": 4142
    },
    {
      "epoch": 0.9206666666666666,
      "grad_norm": 1.0681723356246948,
      "learning_rate": 1.5902004454342986e-05,
      "loss": 1.3818,
      "step": 4143
    },
    {
      "epoch": 0.9208888888888889,
      "grad_norm": 0.8169934153556824,
      "learning_rate": 1.5857461024498888e-05,
      "loss": 0.7076,
      "step": 4144
    },
    {
      "epoch": 0.9211111111111111,
      "grad_norm": 1.1659146547317505,
      "learning_rate": 1.581291759465479e-05,
      "loss": 1.3053,
      "step": 4145
    },
    {
      "epoch": 0.9213333333333333,
      "grad_norm": 1.0384572744369507,
      "learning_rate": 1.5768374164810692e-05,
      "loss": 1.1864,
      "step": 4146
    },
    {
      "epoch": 0.9215555555555556,
      "grad_norm": 1.0471240282058716,
      "learning_rate": 1.5723830734966594e-05,
      "loss": 1.066,
      "step": 4147
    },
    {
      "epoch": 0.9217777777777778,
      "grad_norm": 0.7413065433502197,
      "learning_rate": 1.5679287305122496e-05,
      "loss": 0.5414,
      "step": 4148
    },
    {
      "epoch": 0.922,
      "grad_norm": 0.9853238463401794,
      "learning_rate": 1.5634743875278395e-05,
      "loss": 0.9216,
      "step": 4149
    },
    {
      "epoch": 0.9222222222222223,
      "grad_norm": 0.9708325266838074,
      "learning_rate": 1.55902004454343e-05,
      "loss": 0.8306,
      "step": 4150
    },
    {
      "epoch": 0.9224444444444444,
      "grad_norm": 0.565495491027832,
      "learning_rate": 1.5545657015590202e-05,
      "loss": 1.0385,
      "step": 4151
    },
    {
      "epoch": 0.9226666666666666,
      "grad_norm": 0.046501629054546356,
      "learning_rate": 1.55011135857461e-05,
      "loss": 0.0103,
      "step": 4152
    },
    {
      "epoch": 0.9228888888888889,
      "grad_norm": 0.5920565128326416,
      "learning_rate": 1.5456570155902006e-05,
      "loss": 1.1167,
      "step": 4153
    },
    {
      "epoch": 0.9231111111111111,
      "grad_norm": 0.6111573576927185,
      "learning_rate": 1.5412026726057908e-05,
      "loss": 0.9939,
      "step": 4154
    },
    {
      "epoch": 0.9233333333333333,
      "grad_norm": 0.5594994425773621,
      "learning_rate": 1.5367483296213807e-05,
      "loss": 1.1777,
      "step": 4155
    },
    {
      "epoch": 0.9235555555555556,
      "grad_norm": 0.05240090563893318,
      "learning_rate": 1.5322939866369712e-05,
      "loss": 0.0107,
      "step": 4156
    },
    {
      "epoch": 0.9237777777777778,
      "grad_norm": 0.8332452178001404,
      "learning_rate": 1.5278396436525614e-05,
      "loss": 2.1969,
      "step": 4157
    },
    {
      "epoch": 0.924,
      "grad_norm": 0.60703045129776,
      "learning_rate": 1.5233853006681514e-05,
      "loss": 0.8806,
      "step": 4158
    },
    {
      "epoch": 0.9242222222222222,
      "grad_norm": 0.0834372490644455,
      "learning_rate": 1.5189309576837418e-05,
      "loss": 0.0109,
      "step": 4159
    },
    {
      "epoch": 0.9244444444444444,
      "grad_norm": 0.06800226867198944,
      "learning_rate": 1.514476614699332e-05,
      "loss": 0.0111,
      "step": 4160
    },
    {
      "epoch": 0.9246666666666666,
      "grad_norm": 0.0833854079246521,
      "learning_rate": 1.510022271714922e-05,
      "loss": 0.0112,
      "step": 4161
    },
    {
      "epoch": 0.9248888888888889,
      "grad_norm": 0.6188771724700928,
      "learning_rate": 1.5055679287305124e-05,
      "loss": 0.9193,
      "step": 4162
    },
    {
      "epoch": 0.9251111111111111,
      "grad_norm": 0.9110550284385681,
      "learning_rate": 1.5011135857461026e-05,
      "loss": 1.8064,
      "step": 4163
    },
    {
      "epoch": 0.9253333333333333,
      "grad_norm": 0.9889512658119202,
      "learning_rate": 1.4966592427616926e-05,
      "loss": 2.1231,
      "step": 4164
    },
    {
      "epoch": 0.9255555555555556,
      "grad_norm": 0.700734555721283,
      "learning_rate": 1.4922048997772831e-05,
      "loss": 0.9772,
      "step": 4165
    },
    {
      "epoch": 0.9257777777777778,
      "grad_norm": 0.10837709903717041,
      "learning_rate": 1.4877505567928732e-05,
      "loss": 0.0171,
      "step": 4166
    },
    {
      "epoch": 0.926,
      "grad_norm": 0.11039458960294724,
      "learning_rate": 1.4832962138084634e-05,
      "loss": 0.017,
      "step": 4167
    },
    {
      "epoch": 0.9262222222222222,
      "grad_norm": 1.0548503398895264,
      "learning_rate": 1.4788418708240534e-05,
      "loss": 2.02,
      "step": 4168
    },
    {
      "epoch": 0.9264444444444444,
      "grad_norm": 1.027597427368164,
      "learning_rate": 1.4743875278396438e-05,
      "loss": 2.0137,
      "step": 4169
    },
    {
      "epoch": 0.9266666666666666,
      "grad_norm": 0.9742117524147034,
      "learning_rate": 1.469933184855234e-05,
      "loss": 1.6216,
      "step": 4170
    },
    {
      "epoch": 0.9268888888888889,
      "grad_norm": 1.1034184694290161,
      "learning_rate": 1.465478841870824e-05,
      "loss": 1.9359,
      "step": 4171
    },
    {
      "epoch": 0.9271111111111111,
      "grad_norm": 0.9604677557945251,
      "learning_rate": 1.4610244988864143e-05,
      "loss": 1.9562,
      "step": 4172
    },
    {
      "epoch": 0.9273333333333333,
      "grad_norm": 0.07002148032188416,
      "learning_rate": 1.4565701559020045e-05,
      "loss": 0.0148,
      "step": 4173
    },
    {
      "epoch": 0.9275555555555556,
      "grad_norm": 0.07596537470817566,
      "learning_rate": 1.4521158129175946e-05,
      "loss": 0.015,
      "step": 4174
    },
    {
      "epoch": 0.9277777777777778,
      "grad_norm": 0.07176879793405533,
      "learning_rate": 1.447661469933185e-05,
      "loss": 0.0153,
      "step": 4175
    },
    {
      "epoch": 0.928,
      "grad_norm": 0.6465597152709961,
      "learning_rate": 1.4432071269487751e-05,
      "loss": 0.8603,
      "step": 4176
    },
    {
      "epoch": 0.9282222222222222,
      "grad_norm": 0.7567986249923706,
      "learning_rate": 1.4387527839643652e-05,
      "loss": 0.974,
      "step": 4177
    },
    {
      "epoch": 0.9284444444444444,
      "grad_norm": 0.9745081067085266,
      "learning_rate": 1.4342984409799557e-05,
      "loss": 1.7431,
      "step": 4178
    },
    {
      "epoch": 0.9286666666666666,
      "grad_norm": 1.2606123685836792,
      "learning_rate": 1.4298440979955457e-05,
      "loss": 1.8718,
      "step": 4179
    },
    {
      "epoch": 0.9288888888888889,
      "grad_norm": 1.018596887588501,
      "learning_rate": 1.425389755011136e-05,
      "loss": 1.7782,
      "step": 4180
    },
    {
      "epoch": 0.9291111111111111,
      "grad_norm": 0.961146891117096,
      "learning_rate": 1.4209354120267263e-05,
      "loss": 1.6335,
      "step": 4181
    },
    {
      "epoch": 0.9293333333333333,
      "grad_norm": 0.9671225547790527,
      "learning_rate": 1.4164810690423163e-05,
      "loss": 1.507,
      "step": 4182
    },
    {
      "epoch": 0.9295555555555556,
      "grad_norm": 0.06404636800289154,
      "learning_rate": 1.4120267260579065e-05,
      "loss": 0.0178,
      "step": 4183
    },
    {
      "epoch": 0.9297777777777778,
      "grad_norm": 0.06602248549461365,
      "learning_rate": 1.4075723830734965e-05,
      "loss": 0.0179,
      "step": 4184
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.06565021723508835,
      "learning_rate": 1.4031180400890869e-05,
      "loss": 0.0178,
      "step": 4185
    },
    {
      "epoch": 0.9302222222222222,
      "grad_norm": 1.20155930519104,
      "learning_rate": 1.3986636971046771e-05,
      "loss": 1.839,
      "step": 4186
    },
    {
      "epoch": 0.9304444444444444,
      "grad_norm": 0.08105297386646271,
      "learning_rate": 1.3942093541202671e-05,
      "loss": 0.0171,
      "step": 4187
    },
    {
      "epoch": 0.9306666666666666,
      "grad_norm": 0.07562917470932007,
      "learning_rate": 1.3897550111358577e-05,
      "loss": 0.0173,
      "step": 4188
    },
    {
      "epoch": 0.9308888888888889,
      "grad_norm": 0.9316315054893494,
      "learning_rate": 1.3853006681514477e-05,
      "loss": 0.9604,
      "step": 4189
    },
    {
      "epoch": 0.9311111111111111,
      "grad_norm": 1.0534974336624146,
      "learning_rate": 1.3808463251670379e-05,
      "loss": 1.5912,
      "step": 4190
    },
    {
      "epoch": 0.9313333333333333,
      "grad_norm": 1.1811798810958862,
      "learning_rate": 1.3763919821826283e-05,
      "loss": 1.76,
      "step": 4191
    },
    {
      "epoch": 0.9315555555555556,
      "grad_norm": 1.158895492553711,
      "learning_rate": 1.3719376391982183e-05,
      "loss": 1.6761,
      "step": 4192
    },
    {
      "epoch": 0.9317777777777778,
      "grad_norm": 1.1721011400222778,
      "learning_rate": 1.3674832962138085e-05,
      "loss": 1.3907,
      "step": 4193
    },
    {
      "epoch": 0.932,
      "grad_norm": 0.180902361869812,
      "learning_rate": 1.3630289532293989e-05,
      "loss": 0.0291,
      "step": 4194
    },
    {
      "epoch": 0.9322222222222222,
      "grad_norm": 0.17803487181663513,
      "learning_rate": 1.3585746102449889e-05,
      "loss": 0.029,
      "step": 4195
    },
    {
      "epoch": 0.9324444444444444,
      "grad_norm": 1.2288343906402588,
      "learning_rate": 1.354120267260579e-05,
      "loss": 1.5034,
      "step": 4196
    },
    {
      "epoch": 0.9326666666666666,
      "grad_norm": 1.1452394723892212,
      "learning_rate": 1.3496659242761694e-05,
      "loss": 1.3718,
      "step": 4197
    },
    {
      "epoch": 0.9328888888888889,
      "grad_norm": 1.1846433877944946,
      "learning_rate": 1.3452115812917596e-05,
      "loss": 1.1493,
      "step": 4198
    },
    {
      "epoch": 0.9331111111111111,
      "grad_norm": 1.074758529663086,
      "learning_rate": 1.3407572383073497e-05,
      "loss": 1.3701,
      "step": 4199
    },
    {
      "epoch": 0.9333333333333333,
      "grad_norm": 1.0819156169891357,
      "learning_rate": 1.33630289532294e-05,
      "loss": 0.4173,
      "step": 4200
    },
    {
      "epoch": 0.9335555555555556,
      "grad_norm": 0.04741929471492767,
      "learning_rate": 1.3318485523385302e-05,
      "loss": 0.0103,
      "step": 4201
    },
    {
      "epoch": 0.9337777777777778,
      "grad_norm": 0.5368507504463196,
      "learning_rate": 1.3273942093541203e-05,
      "loss": 0.9904,
      "step": 4202
    },
    {
      "epoch": 0.934,
      "grad_norm": 0.9232761859893799,
      "learning_rate": 1.3229398663697105e-05,
      "loss": 2.0784,
      "step": 4203
    },
    {
      "epoch": 0.9342222222222222,
      "grad_norm": 0.8134653568267822,
      "learning_rate": 1.3184855233853008e-05,
      "loss": 1.8798,
      "step": 4204
    },
    {
      "epoch": 0.9344444444444444,
      "grad_norm": 0.6048870086669922,
      "learning_rate": 1.3140311804008909e-05,
      "loss": 1.0963,
      "step": 4205
    },
    {
      "epoch": 0.9346666666666666,
      "grad_norm": 0.6360740065574646,
      "learning_rate": 1.309576837416481e-05,
      "loss": 1.0909,
      "step": 4206
    },
    {
      "epoch": 0.9348888888888889,
      "grad_norm": 0.6409539580345154,
      "learning_rate": 1.3051224944320714e-05,
      "loss": 0.9653,
      "step": 4207
    },
    {
      "epoch": 0.9351111111111111,
      "grad_norm": 0.8436147570610046,
      "learning_rate": 1.3006681514476614e-05,
      "loss": 2.0953,
      "step": 4208
    },
    {
      "epoch": 0.9353333333333333,
      "grad_norm": 0.06498900055885315,
      "learning_rate": 1.2962138084632516e-05,
      "loss": 0.0108,
      "step": 4209
    },
    {
      "epoch": 0.9355555555555556,
      "grad_norm": 0.06997820734977722,
      "learning_rate": 1.291759465478842e-05,
      "loss": 0.0104,
      "step": 4210
    },
    {
      "epoch": 0.9357777777777778,
      "grad_norm": 0.07132133841514587,
      "learning_rate": 1.2873051224944322e-05,
      "loss": 0.0105,
      "step": 4211
    },
    {
      "epoch": 0.936,
      "grad_norm": 0.880042552947998,
      "learning_rate": 1.2828507795100222e-05,
      "loss": 2.0466,
      "step": 4212
    },
    {
      "epoch": 0.9362222222222222,
      "grad_norm": 0.9125528931617737,
      "learning_rate": 1.2783964365256126e-05,
      "loss": 1.4838,
      "step": 4213
    },
    {
      "epoch": 0.9364444444444444,
      "grad_norm": 0.8474193811416626,
      "learning_rate": 1.2739420935412028e-05,
      "loss": 1.9134,
      "step": 4214
    },
    {
      "epoch": 0.9366666666666666,
      "grad_norm": 0.9230953454971313,
      "learning_rate": 1.2694877505567928e-05,
      "loss": 1.9544,
      "step": 4215
    },
    {
      "epoch": 0.9368888888888889,
      "grad_norm": 0.9458275437355042,
      "learning_rate": 1.2650334075723832e-05,
      "loss": 1.9736,
      "step": 4216
    },
    {
      "epoch": 0.9371111111111111,
      "grad_norm": 1.3373515605926514,
      "learning_rate": 1.2605790645879734e-05,
      "loss": 2.2127,
      "step": 4217
    },
    {
      "epoch": 0.9373333333333334,
      "grad_norm": 0.09605936706066132,
      "learning_rate": 1.2561247216035634e-05,
      "loss": 0.0156,
      "step": 4218
    },
    {
      "epoch": 0.9375555555555556,
      "grad_norm": 0.9786912798881531,
      "learning_rate": 1.251670378619154e-05,
      "loss": 1.8977,
      "step": 4219
    },
    {
      "epoch": 0.9377777777777778,
      "grad_norm": 0.9807332754135132,
      "learning_rate": 1.247216035634744e-05,
      "loss": 1.8869,
      "step": 4220
    },
    {
      "epoch": 0.938,
      "grad_norm": 0.8643897175788879,
      "learning_rate": 1.2427616926503342e-05,
      "loss": 1.6852,
      "step": 4221
    },
    {
      "epoch": 0.9382222222222222,
      "grad_norm": 1.1636970043182373,
      "learning_rate": 1.2383073496659244e-05,
      "loss": 1.5879,
      "step": 4222
    },
    {
      "epoch": 0.9384444444444444,
      "grad_norm": 0.0700439065694809,
      "learning_rate": 1.2338530066815146e-05,
      "loss": 0.015,
      "step": 4223
    },
    {
      "epoch": 0.9386666666666666,
      "grad_norm": 0.07256254553794861,
      "learning_rate": 1.2293986636971048e-05,
      "loss": 0.015,
      "step": 4224
    },
    {
      "epoch": 0.9388888888888889,
      "grad_norm": 0.6184701919555664,
      "learning_rate": 1.224944320712695e-05,
      "loss": 0.8797,
      "step": 4225
    },
    {
      "epoch": 0.9391111111111111,
      "grad_norm": 0.9730594158172607,
      "learning_rate": 1.2204899777282852e-05,
      "loss": 1.6952,
      "step": 4226
    },
    {
      "epoch": 0.9393333333333334,
      "grad_norm": 0.8093335628509521,
      "learning_rate": 1.2160356347438754e-05,
      "loss": 1.7324,
      "step": 4227
    },
    {
      "epoch": 0.9395555555555556,
      "grad_norm": 1.0205293893814087,
      "learning_rate": 1.2115812917594656e-05,
      "loss": 1.6117,
      "step": 4228
    },
    {
      "epoch": 0.9397777777777778,
      "grad_norm": 0.9411073327064514,
      "learning_rate": 1.2071269487750557e-05,
      "loss": 1.6911,
      "step": 4229
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1085401773452759,
      "learning_rate": 1.202672605790646e-05,
      "loss": 1.822,
      "step": 4230
    },
    {
      "epoch": 0.9402222222222222,
      "grad_norm": 1.0736027956008911,
      "learning_rate": 1.1982182628062361e-05,
      "loss": 1.7295,
      "step": 4231
    },
    {
      "epoch": 0.9404444444444444,
      "grad_norm": 1.0651301145553589,
      "learning_rate": 1.1937639198218263e-05,
      "loss": 1.6569,
      "step": 4232
    },
    {
      "epoch": 0.9406666666666667,
      "grad_norm": 0.9908804297447205,
      "learning_rate": 1.1893095768374165e-05,
      "loss": 1.7012,
      "step": 4233
    },
    {
      "epoch": 0.9408888888888889,
      "grad_norm": 0.9733399748802185,
      "learning_rate": 1.1848552338530067e-05,
      "loss": 1.7654,
      "step": 4234
    },
    {
      "epoch": 0.9411111111111111,
      "grad_norm": 0.8500748872756958,
      "learning_rate": 1.180400890868597e-05,
      "loss": 0.8944,
      "step": 4235
    },
    {
      "epoch": 0.9413333333333334,
      "grad_norm": 0.7698972821235657,
      "learning_rate": 1.1759465478841871e-05,
      "loss": 0.6535,
      "step": 4236
    },
    {
      "epoch": 0.9415555555555556,
      "grad_norm": 0.976285994052887,
      "learning_rate": 1.1714922048997773e-05,
      "loss": 1.5464,
      "step": 4237
    },
    {
      "epoch": 0.9417777777777778,
      "grad_norm": 0.07256097346544266,
      "learning_rate": 1.1670378619153675e-05,
      "loss": 0.0177,
      "step": 4238
    },
    {
      "epoch": 0.942,
      "grad_norm": 0.8034363389015198,
      "learning_rate": 1.1625835189309577e-05,
      "loss": 0.7093,
      "step": 4239
    },
    {
      "epoch": 0.9422222222222222,
      "grad_norm": 1.131561279296875,
      "learning_rate": 1.158129175946548e-05,
      "loss": 1.6093,
      "step": 4240
    },
    {
      "epoch": 0.9424444444444444,
      "grad_norm": 0.6806484460830688,
      "learning_rate": 1.1536748329621381e-05,
      "loss": 0.5989,
      "step": 4241
    },
    {
      "epoch": 0.9426666666666667,
      "grad_norm": 0.7716354727745056,
      "learning_rate": 1.1492204899777285e-05,
      "loss": 0.8757,
      "step": 4242
    },
    {
      "epoch": 0.9428888888888889,
      "grad_norm": 1.0691800117492676,
      "learning_rate": 1.1447661469933185e-05,
      "loss": 1.525,
      "step": 4243
    },
    {
      "epoch": 0.9431111111111111,
      "grad_norm": 1.1259671449661255,
      "learning_rate": 1.1403118040089087e-05,
      "loss": 1.3549,
      "step": 4244
    },
    {
      "epoch": 0.9433333333333334,
      "grad_norm": 1.147953987121582,
      "learning_rate": 1.1358574610244989e-05,
      "loss": 1.5969,
      "step": 4245
    },
    {
      "epoch": 0.9435555555555556,
      "grad_norm": 0.932826578617096,
      "learning_rate": 1.1314031180400891e-05,
      "loss": 1.3558,
      "step": 4246
    },
    {
      "epoch": 0.9437777777777778,
      "grad_norm": 1.2732888460159302,
      "learning_rate": 1.1269487750556795e-05,
      "loss": 1.2856,
      "step": 4247
    },
    {
      "epoch": 0.944,
      "grad_norm": 1.4992557764053345,
      "learning_rate": 1.1224944320712695e-05,
      "loss": 1.1909,
      "step": 4248
    },
    {
      "epoch": 0.9442222222222222,
      "grad_norm": 0.13996893167495728,
      "learning_rate": 1.1180400890868597e-05,
      "loss": 0.0324,
      "step": 4249
    },
    {
      "epoch": 0.9444444444444444,
      "grad_norm": 1.2993894815444946,
      "learning_rate": 1.11358574610245e-05,
      "loss": 1.1957,
      "step": 4250
    },
    {
      "epoch": 0.9446666666666667,
      "grad_norm": 0.5980258584022522,
      "learning_rate": 1.10913140311804e-05,
      "loss": 1.0878,
      "step": 4251
    },
    {
      "epoch": 0.9448888888888889,
      "grad_norm": 0.044246070086956024,
      "learning_rate": 1.1046770601336305e-05,
      "loss": 0.0103,
      "step": 4252
    },
    {
      "epoch": 0.9451111111111111,
      "grad_norm": 0.04438783973455429,
      "learning_rate": 1.1002227171492205e-05,
      "loss": 0.0103,
      "step": 4253
    },
    {
      "epoch": 0.9453333333333334,
      "grad_norm": 0.5702335834503174,
      "learning_rate": 1.0957683741648107e-05,
      "loss": 1.2407,
      "step": 4254
    },
    {
      "epoch": 0.9455555555555556,
      "grad_norm": 0.5994575023651123,
      "learning_rate": 1.091314031180401e-05,
      "loss": 0.974,
      "step": 4255
    },
    {
      "epoch": 0.9457777777777778,
      "grad_norm": 0.916534423828125,
      "learning_rate": 1.086859688195991e-05,
      "loss": 2.1087,
      "step": 4256
    },
    {
      "epoch": 0.946,
      "grad_norm": 0.8901073336601257,
      "learning_rate": 1.0824053452115813e-05,
      "loss": 2.4281,
      "step": 4257
    },
    {
      "epoch": 0.9462222222222222,
      "grad_norm": 0.9570392966270447,
      "learning_rate": 1.0779510022271716e-05,
      "loss": 1.9183,
      "step": 4258
    },
    {
      "epoch": 0.9464444444444444,
      "grad_norm": 0.880129873752594,
      "learning_rate": 1.0734966592427617e-05,
      "loss": 2.15,
      "step": 4259
    },
    {
      "epoch": 0.9466666666666667,
      "grad_norm": 0.06885567307472229,
      "learning_rate": 1.069042316258352e-05,
      "loss": 0.0106,
      "step": 4260
    },
    {
      "epoch": 0.9468888888888889,
      "grad_norm": 0.8540828824043274,
      "learning_rate": 1.064587973273942e-05,
      "loss": 1.9726,
      "step": 4261
    },
    {
      "epoch": 0.9471111111111111,
      "grad_norm": 0.9908187985420227,
      "learning_rate": 1.0601336302895323e-05,
      "loss": 2.4167,
      "step": 4262
    },
    {
      "epoch": 0.9473333333333334,
      "grad_norm": 0.9318075180053711,
      "learning_rate": 1.0556792873051226e-05,
      "loss": 1.9713,
      "step": 4263
    },
    {
      "epoch": 0.9475555555555556,
      "grad_norm": 0.9172812700271606,
      "learning_rate": 1.0512249443207126e-05,
      "loss": 1.9912,
      "step": 4264
    },
    {
      "epoch": 0.9477777777777778,
      "grad_norm": 0.6781771779060364,
      "learning_rate": 1.046770601336303e-05,
      "loss": 1.0824,
      "step": 4265
    },
    {
      "epoch": 0.948,
      "grad_norm": 0.7084238529205322,
      "learning_rate": 1.0423162583518932e-05,
      "loss": 0.9626,
      "step": 4266
    },
    {
      "epoch": 0.9482222222222222,
      "grad_norm": 1.27622389793396,
      "learning_rate": 1.0378619153674832e-05,
      "loss": 1.8587,
      "step": 4267
    },
    {
      "epoch": 0.9484444444444444,
      "grad_norm": 0.9670615792274475,
      "learning_rate": 1.0334075723830736e-05,
      "loss": 1.6965,
      "step": 4268
    },
    {
      "epoch": 0.9486666666666667,
      "grad_norm": 0.07310232520103455,
      "learning_rate": 1.0289532293986638e-05,
      "loss": 0.0157,
      "step": 4269
    },
    {
      "epoch": 0.9488888888888889,
      "grad_norm": 0.9369098544120789,
      "learning_rate": 1.024498886414254e-05,
      "loss": 1.7478,
      "step": 4270
    },
    {
      "epoch": 0.9491111111111111,
      "grad_norm": 0.7294790148735046,
      "learning_rate": 1.0200445434298442e-05,
      "loss": 1.1187,
      "step": 4271
    },
    {
      "epoch": 0.9493333333333334,
      "grad_norm": 0.9501145482063293,
      "learning_rate": 1.0155902004454342e-05,
      "loss": 1.6452,
      "step": 4272
    },
    {
      "epoch": 0.9495555555555556,
      "grad_norm": 1.1477452516555786,
      "learning_rate": 1.0111358574610246e-05,
      "loss": 1.8428,
      "step": 4273
    },
    {
      "epoch": 0.9497777777777778,
      "grad_norm": 0.9523744583129883,
      "learning_rate": 1.0066815144766148e-05,
      "loss": 1.8126,
      "step": 4274
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9534154534339905,
      "learning_rate": 1.002227171492205e-05,
      "loss": 1.7667,
      "step": 4275
    },
    {
      "epoch": 0.9502222222222222,
      "grad_norm": 1.2276133298873901,
      "learning_rate": 9.977728285077952e-06,
      "loss": 1.901,
      "step": 4276
    },
    {
      "epoch": 0.9504444444444444,
      "grad_norm": 0.7759425044059753,
      "learning_rate": 9.933184855233854e-06,
      "loss": 0.7691,
      "step": 4277
    },
    {
      "epoch": 0.9506666666666667,
      "grad_norm": 0.06685524433851242,
      "learning_rate": 9.888641425389756e-06,
      "loss": 0.0175,
      "step": 4278
    },
    {
      "epoch": 0.9508888888888889,
      "grad_norm": 0.7249411940574646,
      "learning_rate": 9.844097995545658e-06,
      "loss": 0.983,
      "step": 4279
    },
    {
      "epoch": 0.9511111111111111,
      "grad_norm": 0.6953089237213135,
      "learning_rate": 9.79955456570156e-06,
      "loss": 0.9221,
      "step": 4280
    },
    {
      "epoch": 0.9513333333333334,
      "grad_norm": 1.0122225284576416,
      "learning_rate": 9.755011135857462e-06,
      "loss": 1.6027,
      "step": 4281
    },
    {
      "epoch": 0.9515555555555556,
      "grad_norm": 1.0647549629211426,
      "learning_rate": 9.710467706013364e-06,
      "loss": 1.6956,
      "step": 4282
    },
    {
      "epoch": 0.9517777777777777,
      "grad_norm": 1.0427286624908447,
      "learning_rate": 9.665924276169266e-06,
      "loss": 1.6593,
      "step": 4283
    },
    {
      "epoch": 0.952,
      "grad_norm": 0.07722879201173782,
      "learning_rate": 9.621380846325168e-06,
      "loss": 0.0186,
      "step": 4284
    },
    {
      "epoch": 0.9522222222222222,
      "grad_norm": 0.7623983025550842,
      "learning_rate": 9.57683741648107e-06,
      "loss": 0.9881,
      "step": 4285
    },
    {
      "epoch": 0.9524444444444444,
      "grad_norm": 1.0861274003982544,
      "learning_rate": 9.532293986636972e-06,
      "loss": 1.5431,
      "step": 4286
    },
    {
      "epoch": 0.9526666666666667,
      "grad_norm": 0.9466423988342285,
      "learning_rate": 9.487750556792873e-06,
      "loss": 1.5866,
      "step": 4287
    },
    {
      "epoch": 0.9528888888888889,
      "grad_norm": 0.7006486058235168,
      "learning_rate": 9.443207126948775e-06,
      "loss": 0.8612,
      "step": 4288
    },
    {
      "epoch": 0.9531111111111111,
      "grad_norm": 0.9445701241493225,
      "learning_rate": 9.398663697104677e-06,
      "loss": 1.4541,
      "step": 4289
    },
    {
      "epoch": 0.9533333333333334,
      "grad_norm": 1.2460566759109497,
      "learning_rate": 9.35412026726058e-06,
      "loss": 1.6395,
      "step": 4290
    },
    {
      "epoch": 0.9535555555555556,
      "grad_norm": 1.0960421562194824,
      "learning_rate": 9.309576837416481e-06,
      "loss": 1.3166,
      "step": 4291
    },
    {
      "epoch": 0.9537777777777777,
      "grad_norm": 1.22000253200531,
      "learning_rate": 9.265033407572383e-06,
      "loss": 1.4362,
      "step": 4292
    },
    {
      "epoch": 0.954,
      "grad_norm": 1.0577735900878906,
      "learning_rate": 9.220489977728285e-06,
      "loss": 1.2362,
      "step": 4293
    },
    {
      "epoch": 0.9542222222222222,
      "grad_norm": 1.261118769645691,
      "learning_rate": 9.175946547884187e-06,
      "loss": 1.5433,
      "step": 4294
    },
    {
      "epoch": 0.9544444444444444,
      "grad_norm": 1.0835603475570679,
      "learning_rate": 9.13140311804009e-06,
      "loss": 1.0706,
      "step": 4295
    },
    {
      "epoch": 0.9546666666666667,
      "grad_norm": 0.1414426565170288,
      "learning_rate": 9.086859688195993e-06,
      "loss": 0.0324,
      "step": 4296
    },
    {
      "epoch": 0.9548888888888889,
      "grad_norm": 0.15208975970745087,
      "learning_rate": 9.042316258351893e-06,
      "loss": 0.0327,
      "step": 4297
    },
    {
      "epoch": 0.9551111111111111,
      "grad_norm": 0.942937970161438,
      "learning_rate": 8.997772828507795e-06,
      "loss": 0.9045,
      "step": 4298
    },
    {
      "epoch": 0.9553333333333334,
      "grad_norm": 0.9884275197982788,
      "learning_rate": 8.953229398663697e-06,
      "loss": 0.9895,
      "step": 4299
    },
    {
      "epoch": 0.9555555555555556,
      "grad_norm": 0.936667263507843,
      "learning_rate": 8.908685968819599e-06,
      "loss": 0.776,
      "step": 4300
    },
    {
      "epoch": 0.9557777777777777,
      "grad_norm": 0.6337212324142456,
      "learning_rate": 8.864142538975503e-06,
      "loss": 1.1042,
      "step": 4301
    },
    {
      "epoch": 0.956,
      "grad_norm": 0.8414755463600159,
      "learning_rate": 8.819599109131403e-06,
      "loss": 1.9703,
      "step": 4302
    },
    {
      "epoch": 0.9562222222222222,
      "grad_norm": 0.5726562738418579,
      "learning_rate": 8.775055679287305e-06,
      "loss": 0.8849,
      "step": 4303
    },
    {
      "epoch": 0.9564444444444444,
      "grad_norm": 0.6391728520393372,
      "learning_rate": 8.730512249443209e-06,
      "loss": 1.1577,
      "step": 4304
    },
    {
      "epoch": 0.9566666666666667,
      "grad_norm": 0.5593711137771606,
      "learning_rate": 8.685968819599109e-06,
      "loss": 0.8922,
      "step": 4305
    },
    {
      "epoch": 0.9568888888888889,
      "grad_norm": 0.7331346869468689,
      "learning_rate": 8.641425389755013e-06,
      "loss": 1.1386,
      "step": 4306
    },
    {
      "epoch": 0.9571111111111111,
      "grad_norm": 0.5739585757255554,
      "learning_rate": 8.596881959910913e-06,
      "loss": 0.8375,
      "step": 4307
    },
    {
      "epoch": 0.9573333333333334,
      "grad_norm": 0.07684678584337234,
      "learning_rate": 8.552338530066815e-06,
      "loss": 0.011,
      "step": 4308
    },
    {
      "epoch": 0.9575555555555556,
      "grad_norm": 0.07993515580892563,
      "learning_rate": 8.507795100222719e-06,
      "loss": 0.0111,
      "step": 4309
    },
    {
      "epoch": 0.9577777777777777,
      "grad_norm": 0.6196415424346924,
      "learning_rate": 8.463251670378619e-06,
      "loss": 1.0166,
      "step": 4310
    },
    {
      "epoch": 0.958,
      "grad_norm": 0.8932988047599792,
      "learning_rate": 8.41870824053452e-06,
      "loss": 2.3441,
      "step": 4311
    },
    {
      "epoch": 0.9582222222222222,
      "grad_norm": 1.2204405069351196,
      "learning_rate": 8.374164810690424e-06,
      "loss": 2.0743,
      "step": 4312
    },
    {
      "epoch": 0.9584444444444444,
      "grad_norm": 0.9031944870948792,
      "learning_rate": 8.329621380846325e-06,
      "loss": 1.9911,
      "step": 4313
    },
    {
      "epoch": 0.9586666666666667,
      "grad_norm": 1.0615592002868652,
      "learning_rate": 8.285077951002228e-06,
      "loss": 2.3094,
      "step": 4314
    },
    {
      "epoch": 0.9588888888888889,
      "grad_norm": 0.931339681148529,
      "learning_rate": 8.240534521158129e-06,
      "loss": 1.8745,
      "step": 4315
    },
    {
      "epoch": 0.9591111111111111,
      "grad_norm": 1.068681240081787,
      "learning_rate": 8.19599109131403e-06,
      "loss": 2.1088,
      "step": 4316
    },
    {
      "epoch": 0.9593333333333334,
      "grad_norm": 0.9199005365371704,
      "learning_rate": 8.151447661469934e-06,
      "loss": 1.9663,
      "step": 4317
    },
    {
      "epoch": 0.9595555555555556,
      "grad_norm": 0.9643719792366028,
      "learning_rate": 8.106904231625835e-06,
      "loss": 2.2083,
      "step": 4318
    },
    {
      "epoch": 0.9597777777777777,
      "grad_norm": 1.026021122932434,
      "learning_rate": 8.062360801781738e-06,
      "loss": 1.688,
      "step": 4319
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9671936631202698,
      "learning_rate": 8.01781737193764e-06,
      "loss": 2.0219,
      "step": 4320
    },
    {
      "epoch": 0.9602222222222222,
      "grad_norm": 0.9827919006347656,
      "learning_rate": 7.97327394209354e-06,
      "loss": 1.6401,
      "step": 4321
    },
    {
      "epoch": 0.9604444444444444,
      "grad_norm": 0.07093919813632965,
      "learning_rate": 7.928730512249444e-06,
      "loss": 0.0157,
      "step": 4322
    },
    {
      "epoch": 0.9606666666666667,
      "grad_norm": 0.07090691477060318,
      "learning_rate": 7.884187082405346e-06,
      "loss": 0.0154,
      "step": 4323
    },
    {
      "epoch": 0.9608888888888889,
      "grad_norm": 0.06990091502666473,
      "learning_rate": 7.839643652561248e-06,
      "loss": 0.0153,
      "step": 4324
    },
    {
      "epoch": 0.9611111111111111,
      "grad_norm": 0.06884946674108505,
      "learning_rate": 7.79510022271715e-06,
      "loss": 0.0152,
      "step": 4325
    },
    {
      "epoch": 0.9613333333333334,
      "grad_norm": 0.8022354245185852,
      "learning_rate": 7.75055679287305e-06,
      "loss": 0.8116,
      "step": 4326
    },
    {
      "epoch": 0.9615555555555556,
      "grad_norm": 1.062786340713501,
      "learning_rate": 7.706013363028954e-06,
      "loss": 1.7179,
      "step": 4327
    },
    {
      "epoch": 0.9617777777777777,
      "grad_norm": 1.0791099071502686,
      "learning_rate": 7.661469933184856e-06,
      "loss": 1.7474,
      "step": 4328
    },
    {
      "epoch": 0.962,
      "grad_norm": 1.0296615362167358,
      "learning_rate": 7.616926503340757e-06,
      "loss": 1.6771,
      "step": 4329
    },
    {
      "epoch": 0.9622222222222222,
      "grad_norm": 1.076456904411316,
      "learning_rate": 7.57238307349666e-06,
      "loss": 1.8226,
      "step": 4330
    },
    {
      "epoch": 0.9624444444444444,
      "grad_norm": 0.9866617321968079,
      "learning_rate": 7.527839643652562e-06,
      "loss": 1.887,
      "step": 4331
    },
    {
      "epoch": 0.9626666666666667,
      "grad_norm": 0.06448253244161606,
      "learning_rate": 7.483296213808463e-06,
      "loss": 0.0173,
      "step": 4332
    },
    {
      "epoch": 0.9628888888888889,
      "grad_norm": 0.06699193269014359,
      "learning_rate": 7.438752783964366e-06,
      "loss": 0.0175,
      "step": 4333
    },
    {
      "epoch": 0.9631111111111111,
      "grad_norm": 0.7605263590812683,
      "learning_rate": 7.394209354120267e-06,
      "loss": 0.8152,
      "step": 4334
    },
    {
      "epoch": 0.9633333333333334,
      "grad_norm": 1.077652096748352,
      "learning_rate": 7.34966592427617e-06,
      "loss": 1.8169,
      "step": 4335
    },
    {
      "epoch": 0.9635555555555556,
      "grad_norm": 0.07532133162021637,
      "learning_rate": 7.305122494432072e-06,
      "loss": 0.0182,
      "step": 4336
    },
    {
      "epoch": 0.9637777777777777,
      "grad_norm": 0.07628657668828964,
      "learning_rate": 7.260579064587973e-06,
      "loss": 0.0177,
      "step": 4337
    },
    {
      "epoch": 0.964,
      "grad_norm": 0.07550018280744553,
      "learning_rate": 7.216035634743876e-06,
      "loss": 0.0181,
      "step": 4338
    },
    {
      "epoch": 0.9642222222222222,
      "grad_norm": 0.9071139097213745,
      "learning_rate": 7.1714922048997785e-06,
      "loss": 0.8412,
      "step": 4339
    },
    {
      "epoch": 0.9644444444444444,
      "grad_norm": 0.9595382213592529,
      "learning_rate": 7.12694877505568e-06,
      "loss": 1.4946,
      "step": 4340
    },
    {
      "epoch": 0.9646666666666667,
      "grad_norm": 1.0608892440795898,
      "learning_rate": 7.082405345211582e-06,
      "loss": 1.6542,
      "step": 4341
    },
    {
      "epoch": 0.9648888888888889,
      "grad_norm": 1.1697642803192139,
      "learning_rate": 7.037861915367483e-06,
      "loss": 1.7757,
      "step": 4342
    },
    {
      "epoch": 0.9651111111111111,
      "grad_norm": 1.0154902935028076,
      "learning_rate": 6.9933184855233855e-06,
      "loss": 1.2555,
      "step": 4343
    },
    {
      "epoch": 0.9653333333333334,
      "grad_norm": 1.1377027034759521,
      "learning_rate": 6.948775055679288e-06,
      "loss": 1.7557,
      "step": 4344
    },
    {
      "epoch": 0.9655555555555555,
      "grad_norm": 1.1074367761611938,
      "learning_rate": 6.9042316258351895e-06,
      "loss": 1.5758,
      "step": 4345
    },
    {
      "epoch": 0.9657777777777777,
      "grad_norm": 0.6881236433982849,
      "learning_rate": 6.8596881959910914e-06,
      "loss": 0.7293,
      "step": 4346
    },
    {
      "epoch": 0.966,
      "grad_norm": 0.18515631556510925,
      "learning_rate": 6.815144766146994e-06,
      "loss": 0.0304,
      "step": 4347
    },
    {
      "epoch": 0.9662222222222222,
      "grad_norm": 1.1112456321716309,
      "learning_rate": 6.770601336302895e-06,
      "loss": 1.2363,
      "step": 4348
    },
    {
      "epoch": 0.9664444444444444,
      "grad_norm": 0.8793594837188721,
      "learning_rate": 6.726057906458798e-06,
      "loss": 0.8094,
      "step": 4349
    },
    {
      "epoch": 0.9666666666666667,
      "grad_norm": 0.9809714555740356,
      "learning_rate": 6.6815144766147e-06,
      "loss": 0.7478,
      "step": 4350
    },
    {
      "epoch": 0.9668888888888889,
      "grad_norm": 0.6749547123908997,
      "learning_rate": 6.636971046770601e-06,
      "loss": 1.0962,
      "step": 4351
    },
    {
      "epoch": 0.9671111111111111,
      "grad_norm": 0.8518489599227905,
      "learning_rate": 6.592427616926504e-06,
      "loss": 2.0981,
      "step": 4352
    },
    {
      "epoch": 0.9673333333333334,
      "grad_norm": 0.5904682874679565,
      "learning_rate": 6.547884187082405e-06,
      "loss": 0.8928,
      "step": 4353
    },
    {
      "epoch": 0.9675555555555555,
      "grad_norm": 0.6970412731170654,
      "learning_rate": 6.503340757238307e-06,
      "loss": 1.1904,
      "step": 4354
    },
    {
      "epoch": 0.9677777777777777,
      "grad_norm": 0.8849335312843323,
      "learning_rate": 6.45879732739421e-06,
      "loss": 1.9197,
      "step": 4355
    },
    {
      "epoch": 0.968,
      "grad_norm": 0.7975565791130066,
      "learning_rate": 6.414253897550111e-06,
      "loss": 2.0422,
      "step": 4356
    },
    {
      "epoch": 0.9682222222222222,
      "grad_norm": 0.898343026638031,
      "learning_rate": 6.369710467706014e-06,
      "loss": 2.0284,
      "step": 4357
    },
    {
      "epoch": 0.9684444444444444,
      "grad_norm": 0.08013878762722015,
      "learning_rate": 6.325167037861916e-06,
      "loss": 0.011,
      "step": 4358
    },
    {
      "epoch": 0.9686666666666667,
      "grad_norm": 0.0778137668967247,
      "learning_rate": 6.280623608017817e-06,
      "loss": 0.011,
      "step": 4359
    },
    {
      "epoch": 0.9688888888888889,
      "grad_norm": 0.075262151658535,
      "learning_rate": 6.23608017817372e-06,
      "loss": 0.0111,
      "step": 4360
    },
    {
      "epoch": 0.9691111111111111,
      "grad_norm": 0.8569869995117188,
      "learning_rate": 6.191536748329622e-06,
      "loss": 2.1552,
      "step": 4361
    },
    {
      "epoch": 0.9693333333333334,
      "grad_norm": 0.7958348393440247,
      "learning_rate": 6.146993318485524e-06,
      "loss": 2.1074,
      "step": 4362
    },
    {
      "epoch": 0.9695555555555555,
      "grad_norm": 0.861589789390564,
      "learning_rate": 6.102449888641426e-06,
      "loss": 1.8603,
      "step": 4363
    },
    {
      "epoch": 0.9697777777777777,
      "grad_norm": 0.870924174785614,
      "learning_rate": 6.057906458797328e-06,
      "loss": 1.6482,
      "step": 4364
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6811211109161377,
      "learning_rate": 6.01336302895323e-06,
      "loss": 0.9431,
      "step": 4365
    },
    {
      "epoch": 0.9702222222222222,
      "grad_norm": 1.2624512910842896,
      "learning_rate": 5.968819599109132e-06,
      "loss": 1.2395,
      "step": 4366
    },
    {
      "epoch": 0.9704444444444444,
      "grad_norm": 0.6082854866981506,
      "learning_rate": 5.924276169265034e-06,
      "loss": 0.9055,
      "step": 4367
    },
    {
      "epoch": 0.9706666666666667,
      "grad_norm": 0.990861713886261,
      "learning_rate": 5.879732739420936e-06,
      "loss": 1.96,
      "step": 4368
    },
    {
      "epoch": 0.9708888888888889,
      "grad_norm": 0.99041748046875,
      "learning_rate": 5.835189309576838e-06,
      "loss": 1.6842,
      "step": 4369
    },
    {
      "epoch": 0.9711111111111111,
      "grad_norm": 0.9319810271263123,
      "learning_rate": 5.79064587973274e-06,
      "loss": 1.9125,
      "step": 4370
    },
    {
      "epoch": 0.9713333333333334,
      "grad_norm": 1.01008141040802,
      "learning_rate": 5.746102449888642e-06,
      "loss": 1.9553,
      "step": 4371
    },
    {
      "epoch": 0.9715555555555555,
      "grad_norm": 0.7167505025863647,
      "learning_rate": 5.7015590200445435e-06,
      "loss": 0.972,
      "step": 4372
    },
    {
      "epoch": 0.9717777777777777,
      "grad_norm": 0.07200965285301208,
      "learning_rate": 5.6570155902004455e-06,
      "loss": 0.0151,
      "step": 4373
    },
    {
      "epoch": 0.972,
      "grad_norm": 0.6893488764762878,
      "learning_rate": 5.6124721603563475e-06,
      "loss": 0.8677,
      "step": 4374
    },
    {
      "epoch": 0.9722222222222222,
      "grad_norm": 0.12305945158004761,
      "learning_rate": 5.56792873051225e-06,
      "loss": 0.0204,
      "step": 4375
    },
    {
      "epoch": 0.9724444444444444,
      "grad_norm": 0.9432768821716309,
      "learning_rate": 5.523385300668152e-06,
      "loss": 1.7875,
      "step": 4376
    },
    {
      "epoch": 0.9726666666666667,
      "grad_norm": 1.0131165981292725,
      "learning_rate": 5.478841870824053e-06,
      "loss": 1.6501,
      "step": 4377
    },
    {
      "epoch": 0.9728888888888889,
      "grad_norm": 1.0048753023147583,
      "learning_rate": 5.434298440979955e-06,
      "loss": 1.6584,
      "step": 4378
    },
    {
      "epoch": 0.9731111111111111,
      "grad_norm": 0.9927910566329956,
      "learning_rate": 5.389755011135858e-06,
      "loss": 1.9151,
      "step": 4379
    },
    {
      "epoch": 0.9733333333333334,
      "grad_norm": 0.7287546396255493,
      "learning_rate": 5.34521158129176e-06,
      "loss": 0.7935,
      "step": 4380
    },
    {
      "epoch": 0.9735555555555555,
      "grad_norm": 0.06438437104225159,
      "learning_rate": 5.300668151447661e-06,
      "loss": 0.0174,
      "step": 4381
    },
    {
      "epoch": 0.9737777777777777,
      "grad_norm": 0.7063573002815247,
      "learning_rate": 5.256124721603563e-06,
      "loss": 0.794,
      "step": 4382
    },
    {
      "epoch": 0.974,
      "grad_norm": 0.07574823498725891,
      "learning_rate": 5.211581291759466e-06,
      "loss": 0.0178,
      "step": 4383
    },
    {
      "epoch": 0.9742222222222222,
      "grad_norm": 0.0726298987865448,
      "learning_rate": 5.167037861915368e-06,
      "loss": 0.0177,
      "step": 4384
    },
    {
      "epoch": 0.9744444444444444,
      "grad_norm": 0.7331129312515259,
      "learning_rate": 5.12249443207127e-06,
      "loss": 0.7402,
      "step": 4385
    },
    {
      "epoch": 0.9746666666666667,
      "grad_norm": 1.0017316341400146,
      "learning_rate": 5.077951002227171e-06,
      "loss": 1.6145,
      "step": 4386
    },
    {
      "epoch": 0.9748888888888889,
      "grad_norm": 0.9680055379867554,
      "learning_rate": 5.033407572383074e-06,
      "loss": 1.4337,
      "step": 4387
    },
    {
      "epoch": 0.9751111111111112,
      "grad_norm": 1.0141950845718384,
      "learning_rate": 4.988864142538976e-06,
      "loss": 1.549,
      "step": 4388
    },
    {
      "epoch": 0.9753333333333334,
      "grad_norm": 0.7489122748374939,
      "learning_rate": 4.944320712694878e-06,
      "loss": 0.6078,
      "step": 4389
    },
    {
      "epoch": 0.9755555555555555,
      "grad_norm": 0.9311794638633728,
      "learning_rate": 4.89977728285078e-06,
      "loss": 0.6076,
      "step": 4390
    },
    {
      "epoch": 0.9757777777777777,
      "grad_norm": 1.1416817903518677,
      "learning_rate": 4.855233853006682e-06,
      "loss": 1.7661,
      "step": 4391
    },
    {
      "epoch": 0.976,
      "grad_norm": 1.3029440641403198,
      "learning_rate": 4.810690423162584e-06,
      "loss": 1.5154,
      "step": 4392
    },
    {
      "epoch": 0.9762222222222222,
      "grad_norm": 1.1306506395339966,
      "learning_rate": 4.766146993318486e-06,
      "loss": 1.382,
      "step": 4393
    },
    {
      "epoch": 0.9764444444444444,
      "grad_norm": 0.9537327885627747,
      "learning_rate": 4.721603563474388e-06,
      "loss": 1.432,
      "step": 4394
    },
    {
      "epoch": 0.9766666666666667,
      "grad_norm": 0.9183233976364136,
      "learning_rate": 4.67706013363029e-06,
      "loss": 1.214,
      "step": 4395
    },
    {
      "epoch": 0.9768888888888889,
      "grad_norm": 0.9410824775695801,
      "learning_rate": 4.632516703786192e-06,
      "loss": 1.1194,
      "step": 4396
    },
    {
      "epoch": 0.9771111111111112,
      "grad_norm": 1.0940196514129639,
      "learning_rate": 4.587973273942094e-06,
      "loss": 1.1115,
      "step": 4397
    },
    {
      "epoch": 0.9773333333333334,
      "grad_norm": 1.0624735355377197,
      "learning_rate": 4.5434298440979965e-06,
      "loss": 1.0935,
      "step": 4398
    },
    {
      "epoch": 0.9775555555555555,
      "grad_norm": 1.0532846450805664,
      "learning_rate": 4.498886414253898e-06,
      "loss": 1.1252,
      "step": 4399
    },
    {
      "epoch": 0.9777777777777777,
      "grad_norm": 1.0047916173934937,
      "learning_rate": 4.4543429844097995e-06,
      "loss": 0.6994,
      "step": 4400
    },
    {
      "epoch": 0.978,
      "grad_norm": 0.7360401153564453,
      "learning_rate": 4.4097995545657015e-06,
      "loss": 1.1339,
      "step": 4401
    },
    {
      "epoch": 0.9782222222222222,
      "grad_norm": 0.8057011961936951,
      "learning_rate": 4.365256124721604e-06,
      "loss": 2.1264,
      "step": 4402
    },
    {
      "epoch": 0.9784444444444444,
      "grad_norm": 0.6433674097061157,
      "learning_rate": 4.320712694877506e-06,
      "loss": 1.1829,
      "step": 4403
    },
    {
      "epoch": 0.9786666666666667,
      "grad_norm": 0.5455031991004944,
      "learning_rate": 4.2761692650334074e-06,
      "loss": 1.2065,
      "step": 4404
    },
    {
      "epoch": 0.9788888888888889,
      "grad_norm": 0.883698582649231,
      "learning_rate": 4.231625835189309e-06,
      "loss": 2.0593,
      "step": 4405
    },
    {
      "epoch": 0.9791111111111112,
      "grad_norm": 0.6390405297279358,
      "learning_rate": 4.187082405345212e-06,
      "loss": 0.9819,
      "step": 4406
    },
    {
      "epoch": 0.9793333333333333,
      "grad_norm": 0.8261483311653137,
      "learning_rate": 4.142538975501114e-06,
      "loss": 2.308,
      "step": 4407
    },
    {
      "epoch": 0.9795555555555555,
      "grad_norm": 0.6975874900817871,
      "learning_rate": 4.097995545657015e-06,
      "loss": 1.0839,
      "step": 4408
    },
    {
      "epoch": 0.9797777777777777,
      "grad_norm": 0.6761125326156616,
      "learning_rate": 4.053452115812917e-06,
      "loss": 0.9341,
      "step": 4409
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7890470027923584,
      "learning_rate": 4.00890868596882e-06,
      "loss": 1.901,
      "step": 4410
    },
    {
      "epoch": 0.9802222222222222,
      "grad_norm": 0.9101024866104126,
      "learning_rate": 3.964365256124722e-06,
      "loss": 1.9368,
      "step": 4411
    },
    {
      "epoch": 0.9804444444444445,
      "grad_norm": 0.9786936640739441,
      "learning_rate": 3.919821826280624e-06,
      "loss": 2.1794,
      "step": 4412
    },
    {
      "epoch": 0.9806666666666667,
      "grad_norm": 0.7119241952896118,
      "learning_rate": 3.875278396436525e-06,
      "loss": 1.0641,
      "step": 4413
    },
    {
      "epoch": 0.9808888888888889,
      "grad_norm": 0.09762410819530487,
      "learning_rate": 3.830734966592428e-06,
      "loss": 0.0164,
      "step": 4414
    },
    {
      "epoch": 0.9811111111111112,
      "grad_norm": 0.7070305943489075,
      "learning_rate": 3.78619153674833e-06,
      "loss": 0.9271,
      "step": 4415
    },
    {
      "epoch": 0.9813333333333333,
      "grad_norm": 0.9111929535865784,
      "learning_rate": 3.7416481069042315e-06,
      "loss": 1.7253,
      "step": 4416
    },
    {
      "epoch": 0.9815555555555555,
      "grad_norm": 1.0224978923797607,
      "learning_rate": 3.6971046770601335e-06,
      "loss": 2.0652,
      "step": 4417
    },
    {
      "epoch": 0.9817777777777777,
      "grad_norm": 1.2484158277511597,
      "learning_rate": 3.652561247216036e-06,
      "loss": 2.263,
      "step": 4418
    },
    {
      "epoch": 0.982,
      "grad_norm": 0.6740500926971436,
      "learning_rate": 3.608017817371938e-06,
      "loss": 0.8257,
      "step": 4419
    },
    {
      "epoch": 0.9822222222222222,
      "grad_norm": 0.06990643590688705,
      "learning_rate": 3.56347438752784e-06,
      "loss": 0.0152,
      "step": 4420
    },
    {
      "epoch": 0.9824444444444445,
      "grad_norm": 0.728216826915741,
      "learning_rate": 3.5189309576837414e-06,
      "loss": 0.8901,
      "step": 4421
    },
    {
      "epoch": 0.9826666666666667,
      "grad_norm": 0.06907333433628082,
      "learning_rate": 3.474387527839644e-06,
      "loss": 0.0152,
      "step": 4422
    },
    {
      "epoch": 0.9828888888888889,
      "grad_norm": 0.792972981929779,
      "learning_rate": 3.4298440979955457e-06,
      "loss": 0.8383,
      "step": 4423
    },
    {
      "epoch": 0.9831111111111112,
      "grad_norm": 0.9240522384643555,
      "learning_rate": 3.3853006681514477e-06,
      "loss": 1.9004,
      "step": 4424
    },
    {
      "epoch": 0.9833333333333333,
      "grad_norm": 0.9684634208679199,
      "learning_rate": 3.34075723830735e-06,
      "loss": 1.7412,
      "step": 4425
    },
    {
      "epoch": 0.9835555555555555,
      "grad_norm": 1.075197696685791,
      "learning_rate": 3.296213808463252e-06,
      "loss": 1.4785,
      "step": 4426
    },
    {
      "epoch": 0.9837777777777778,
      "grad_norm": 0.9526484608650208,
      "learning_rate": 3.2516703786191536e-06,
      "loss": 1.6998,
      "step": 4427
    },
    {
      "epoch": 0.984,
      "grad_norm": 0.995002269744873,
      "learning_rate": 3.2071269487750556e-06,
      "loss": 1.5562,
      "step": 4428
    },
    {
      "epoch": 0.9842222222222222,
      "grad_norm": 1.0168581008911133,
      "learning_rate": 3.162583518930958e-06,
      "loss": 1.722,
      "step": 4429
    },
    {
      "epoch": 0.9844444444444445,
      "grad_norm": 0.06682226806879044,
      "learning_rate": 3.11804008908686e-06,
      "loss": 0.0176,
      "step": 4430
    },
    {
      "epoch": 0.9846666666666667,
      "grad_norm": 0.06983762979507446,
      "learning_rate": 3.073496659242762e-06,
      "loss": 0.0173,
      "step": 4431
    },
    {
      "epoch": 0.9848888888888889,
      "grad_norm": 0.06633459031581879,
      "learning_rate": 3.028953229398664e-06,
      "loss": 0.0174,
      "step": 4432
    },
    {
      "epoch": 0.9851111111111112,
      "grad_norm": 0.06410173326730728,
      "learning_rate": 2.984409799554566e-06,
      "loss": 0.0175,
      "step": 4433
    },
    {
      "epoch": 0.9853333333333333,
      "grad_norm": 0.8998127579689026,
      "learning_rate": 2.939866369710468e-06,
      "loss": 1.6707,
      "step": 4434
    },
    {
      "epoch": 0.9855555555555555,
      "grad_norm": 0.08308030664920807,
      "learning_rate": 2.89532293986637e-06,
      "loss": 0.0179,
      "step": 4435
    },
    {
      "epoch": 0.9857777777777778,
      "grad_norm": 0.8417572379112244,
      "learning_rate": 2.8507795100222718e-06,
      "loss": 0.966,
      "step": 4436
    },
    {
      "epoch": 0.986,
      "grad_norm": 1.395193338394165,
      "learning_rate": 2.8062360801781737e-06,
      "loss": 1.8783,
      "step": 4437
    },
    {
      "epoch": 0.9862222222222222,
      "grad_norm": 0.9416733384132385,
      "learning_rate": 2.761692650334076e-06,
      "loss": 1.5385,
      "step": 4438
    },
    {
      "epoch": 0.9864444444444445,
      "grad_norm": 1.100425362586975,
      "learning_rate": 2.7171492204899777e-06,
      "loss": 1.7733,
      "step": 4439
    },
    {
      "epoch": 0.9866666666666667,
      "grad_norm": 0.7649857401847839,
      "learning_rate": 2.67260579064588e-06,
      "loss": 0.7851,
      "step": 4440
    },
    {
      "epoch": 0.9868888888888889,
      "grad_norm": 1.1875056028366089,
      "learning_rate": 2.6280623608017816e-06,
      "loss": 1.6325,
      "step": 4441
    },
    {
      "epoch": 0.9871111111111112,
      "grad_norm": 1.1401832103729248,
      "learning_rate": 2.583518930957684e-06,
      "loss": 1.6937,
      "step": 4442
    },
    {
      "epoch": 0.9873333333333333,
      "grad_norm": 1.1035478115081787,
      "learning_rate": 2.5389755011135856e-06,
      "loss": 1.526,
      "step": 4443
    },
    {
      "epoch": 0.9875555555555555,
      "grad_norm": 0.8037136793136597,
      "learning_rate": 2.494432071269488e-06,
      "loss": 0.7856,
      "step": 4444
    },
    {
      "epoch": 0.9877777777777778,
      "grad_norm": 1.0584372282028198,
      "learning_rate": 2.44988864142539e-06,
      "loss": 1.3084,
      "step": 4445
    },
    {
      "epoch": 0.988,
      "grad_norm": 0.1836099475622177,
      "learning_rate": 2.405345211581292e-06,
      "loss": 0.0299,
      "step": 4446
    },
    {
      "epoch": 0.9882222222222222,
      "grad_norm": 1.108872413635254,
      "learning_rate": 2.360801781737194e-06,
      "loss": 1.0455,
      "step": 4447
    },
    {
      "epoch": 0.9884444444444445,
      "grad_norm": 0.6207655072212219,
      "learning_rate": 2.316258351893096e-06,
      "loss": 0.4939,
      "step": 4448
    },
    {
      "epoch": 0.9886666666666667,
      "grad_norm": 0.14554363489151,
      "learning_rate": 2.2717149220489982e-06,
      "loss": 0.0318,
      "step": 4449
    },
    {
      "epoch": 0.9888888888888889,
      "grad_norm": 1.2572603225708008,
      "learning_rate": 2.2271714922048998e-06,
      "loss": 1.1098,
      "step": 4450
    },
    {
      "epoch": 0.9891111111111112,
      "grad_norm": 0.04474545270204544,
      "learning_rate": 2.182628062360802e-06,
      "loss": 0.0103,
      "step": 4451
    },
    {
      "epoch": 0.9893333333333333,
      "grad_norm": 0.04636682942509651,
      "learning_rate": 2.1380846325167037e-06,
      "loss": 0.0101,
      "step": 4452
    },
    {
      "epoch": 0.9895555555555555,
      "grad_norm": 0.5653097033500671,
      "learning_rate": 2.093541202672606e-06,
      "loss": 1.0041,
      "step": 4453
    },
    {
      "epoch": 0.9897777777777778,
      "grad_norm": 0.4789440333843231,
      "learning_rate": 2.0489977728285077e-06,
      "loss": 0.979,
      "step": 4454
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8047142028808594,
      "learning_rate": 2.00445434298441e-06,
      "loss": 2.0886,
      "step": 4455
    },
    {
      "epoch": 0.9902222222222222,
      "grad_norm": 0.8989213109016418,
      "learning_rate": 1.959910913140312e-06,
      "loss": 2.1387,
      "step": 4456
    },
    {
      "epoch": 0.9904444444444445,
      "grad_norm": 0.06995019316673279,
      "learning_rate": 1.915367483296214e-06,
      "loss": 0.0109,
      "step": 4457
    },
    {
      "epoch": 0.9906666666666667,
      "grad_norm": 0.07215920835733414,
      "learning_rate": 1.8708240534521158e-06,
      "loss": 0.0108,
      "step": 4458
    },
    {
      "epoch": 0.9908888888888889,
      "grad_norm": 0.07202310115098953,
      "learning_rate": 1.826280623608018e-06,
      "loss": 0.0109,
      "step": 4459
    },
    {
      "epoch": 0.9911111111111112,
      "grad_norm": 0.9508035778999329,
      "learning_rate": 1.78173719376392e-06,
      "loss": 2.2415,
      "step": 4460
    },
    {
      "epoch": 0.9913333333333333,
      "grad_norm": 0.891727864742279,
      "learning_rate": 1.737193763919822e-06,
      "loss": 1.9116,
      "step": 4461
    },
    {
      "epoch": 0.9915555555555555,
      "grad_norm": 1.0234503746032715,
      "learning_rate": 1.6926503340757238e-06,
      "loss": 2.0408,
      "step": 4462
    },
    {
      "epoch": 0.9917777777777778,
      "grad_norm": 0.8998834490776062,
      "learning_rate": 1.648106904231626e-06,
      "loss": 2.0895,
      "step": 4463
    },
    {
      "epoch": 0.992,
      "grad_norm": 0.9309079051017761,
      "learning_rate": 1.6035634743875278e-06,
      "loss": 1.9546,
      "step": 4464
    },
    {
      "epoch": 0.9922222222222222,
      "grad_norm": 0.903396725654602,
      "learning_rate": 1.55902004454343e-06,
      "loss": 1.0776,
      "step": 4465
    },
    {
      "epoch": 0.9924444444444445,
      "grad_norm": 1.0036734342575073,
      "learning_rate": 1.514476614699332e-06,
      "loss": 1.7439,
      "step": 4466
    },
    {
      "epoch": 0.9926666666666667,
      "grad_norm": 0.9246737957000732,
      "learning_rate": 1.469933184855234e-06,
      "loss": 1.7637,
      "step": 4467
    },
    {
      "epoch": 0.9928888888888889,
      "grad_norm": 1.0618118047714233,
      "learning_rate": 1.4253897550111359e-06,
      "loss": 1.9589,
      "step": 4468
    },
    {
      "epoch": 0.9931111111111111,
      "grad_norm": 1.1122076511383057,
      "learning_rate": 1.380846325167038e-06,
      "loss": 1.9023,
      "step": 4469
    },
    {
      "epoch": 0.9933333333333333,
      "grad_norm": 1.027601957321167,
      "learning_rate": 1.33630289532294e-06,
      "loss": 1.9814,
      "step": 4470
    },
    {
      "epoch": 0.9935555555555555,
      "grad_norm": 0.06850501894950867,
      "learning_rate": 1.291759465478842e-06,
      "loss": 0.0152,
      "step": 4471
    },
    {
      "epoch": 0.9937777777777778,
      "grad_norm": 0.067985400557518,
      "learning_rate": 1.247216035634744e-06,
      "loss": 0.0152,
      "step": 4472
    },
    {
      "epoch": 0.994,
      "grad_norm": 1.0229130983352661,
      "learning_rate": 1.202672605790646e-06,
      "loss": 1.6158,
      "step": 4473
    },
    {
      "epoch": 0.9942222222222222,
      "grad_norm": 0.6642321944236755,
      "learning_rate": 1.158129175946548e-06,
      "loss": 0.7592,
      "step": 4474
    },
    {
      "epoch": 0.9944444444444445,
      "grad_norm": 1.025769829750061,
      "learning_rate": 1.1135857461024499e-06,
      "loss": 1.8864,
      "step": 4475
    },
    {
      "epoch": 0.9946666666666667,
      "grad_norm": 1.1777735948562622,
      "learning_rate": 1.0690423162583519e-06,
      "loss": 1.9098,
      "step": 4476
    },
    {
      "epoch": 0.9948888888888889,
      "grad_norm": 1.0232651233673096,
      "learning_rate": 1.0244988864142538e-06,
      "loss": 1.6622,
      "step": 4477
    },
    {
      "epoch": 0.9951111111111111,
      "grad_norm": 1.0267844200134277,
      "learning_rate": 9.79955456570156e-07,
      "loss": 1.8175,
      "step": 4478
    },
    {
      "epoch": 0.9953333333333333,
      "grad_norm": 0.7749679684638977,
      "learning_rate": 9.354120267260579e-07,
      "loss": 0.906,
      "step": 4479
    },
    {
      "epoch": 0.9955555555555555,
      "grad_norm": 0.06536448746919632,
      "learning_rate": 8.9086859688196e-07,
      "loss": 0.0173,
      "step": 4480
    },
    {
      "epoch": 0.9957777777777778,
      "grad_norm": 0.6798564195632935,
      "learning_rate": 8.463251670378619e-07,
      "loss": 0.7955,
      "step": 4481
    },
    {
      "epoch": 0.996,
      "grad_norm": 0.06655056774616241,
      "learning_rate": 8.017817371937639e-07,
      "loss": 0.0176,
      "step": 4482
    },
    {
      "epoch": 0.9962222222222222,
      "grad_norm": 0.7525641918182373,
      "learning_rate": 7.57238307349666e-07,
      "loss": 0.8103,
      "step": 4483
    },
    {
      "epoch": 0.9964444444444445,
      "grad_norm": 0.6724408268928528,
      "learning_rate": 7.126948775055679e-07,
      "loss": 0.7957,
      "step": 4484
    },
    {
      "epoch": 0.9966666666666667,
      "grad_norm": 0.99349445104599,
      "learning_rate": 6.6815144766147e-07,
      "loss": 1.691,
      "step": 4485
    },
    {
      "epoch": 0.9968888888888889,
      "grad_norm": 1.0608917474746704,
      "learning_rate": 6.23608017817372e-07,
      "loss": 1.6244,
      "step": 4486
    },
    {
      "epoch": 0.9971111111111111,
      "grad_norm": 0.07752467691898346,
      "learning_rate": 5.79064587973274e-07,
      "loss": 0.018,
      "step": 4487
    },
    {
      "epoch": 0.9973333333333333,
      "grad_norm": 0.7708075046539307,
      "learning_rate": 5.345211581291759e-07,
      "loss": 0.8414,
      "step": 4488
    },
    {
      "epoch": 0.9975555555555555,
      "grad_norm": 0.6976569890975952,
      "learning_rate": 4.89977728285078e-07,
      "loss": 0.7637,
      "step": 4489
    },
    {
      "epoch": 0.9977777777777778,
      "grad_norm": 1.0548564195632935,
      "learning_rate": 4.4543429844098e-07,
      "loss": 1.4826,
      "step": 4490
    },
    {
      "epoch": 0.998,
      "grad_norm": 0.6447573900222778,
      "learning_rate": 4.0089086859688195e-07,
      "loss": 0.6006,
      "step": 4491
    },
    {
      "epoch": 0.9982222222222222,
      "grad_norm": 1.2821402549743652,
      "learning_rate": 3.5634743875278397e-07,
      "loss": 1.8731,
      "step": 4492
    },
    {
      "epoch": 0.9984444444444445,
      "grad_norm": 1.1518702507019043,
      "learning_rate": 3.11804008908686e-07,
      "loss": 1.5884,
      "step": 4493
    },
    {
      "epoch": 0.9986666666666667,
      "grad_norm": 1.11997389793396,
      "learning_rate": 2.6726057906458796e-07,
      "loss": 1.4486,
      "step": 4494
    },
    {
      "epoch": 0.9988888888888889,
      "grad_norm": 1.1092532873153687,
      "learning_rate": 2.2271714922049e-07,
      "loss": 1.3308,
      "step": 4495
    },
    {
      "epoch": 0.9991111111111111,
      "grad_norm": 0.17926262319087982,
      "learning_rate": 1.7817371937639199e-07,
      "loss": 0.0297,
      "step": 4496
    },
    {
      "epoch": 0.9993333333333333,
      "grad_norm": 1.144982933998108,
      "learning_rate": 1.3363028953229398e-07,
      "loss": 1.2413,
      "step": 4497
    },
    {
      "epoch": 0.9995555555555555,
      "grad_norm": 1.1863489151000977,
      "learning_rate": 8.908685968819599e-08,
      "loss": 1.1693,
      "step": 4498
    },
    {
      "epoch": 0.9997777777777778,
      "grad_norm": 0.7812955975532532,
      "learning_rate": 4.4543429844097996e-08,
      "loss": 0.5973,
      "step": 4499
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0512616634368896,
      "learning_rate": 0.0,
      "loss": 0.8343,
      "step": 4500
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.1682192087173462,
      "eval_runtime": 240.8872,
      "eval_samples_per_second": 4.151,
      "eval_steps_per_second": 4.151,
      "step": 4500
    }
  ],
  "logging_steps": 1,
  "max_steps": 4500,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 4.847769692985754e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}