{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999953931911365, "eval_steps": 500, "global_step": 10853, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.213617727000507e-05, "grad_norm": 5.052331877744982, "learning_rate": 9.208103130755065e-09, "loss": 0.3834, "step": 1 }, { "epoch": 0.00018427235454001014, "grad_norm": 4.968514973300749, "learning_rate": 1.841620626151013e-08, "loss": 0.3996, "step": 2 }, { "epoch": 0.0002764085318100152, "grad_norm": 5.173905523123122, "learning_rate": 2.7624309392265195e-08, "loss": 0.4053, "step": 3 }, { "epoch": 0.0003685447090800203, "grad_norm": 4.954247638334854, "learning_rate": 3.683241252302026e-08, "loss": 0.3794, "step": 4 }, { "epoch": 0.0004606808863500253, "grad_norm": 4.9278156311152586, "learning_rate": 4.604051565377533e-08, "loss": 0.3735, "step": 5 }, { "epoch": 0.0005528170636200304, "grad_norm": 4.865883765294612, "learning_rate": 5.524861878453039e-08, "loss": 0.3882, "step": 6 }, { "epoch": 0.0006449532408900354, "grad_norm": 4.8700873305276255, "learning_rate": 6.445672191528546e-08, "loss": 0.3758, "step": 7 }, { "epoch": 0.0007370894181600406, "grad_norm": 5.087648327313099, "learning_rate": 7.366482504604052e-08, "loss": 0.3758, "step": 8 }, { "epoch": 0.0008292255954300456, "grad_norm": 5.30641963090733, "learning_rate": 8.287292817679558e-08, "loss": 0.4019, "step": 9 }, { "epoch": 0.0009213617727000506, "grad_norm": 5.070246570827928, "learning_rate": 9.208103130755066e-08, "loss": 0.3827, "step": 10 }, { "epoch": 0.0010134979499700557, "grad_norm": 5.216031896054235, "learning_rate": 1.0128913443830572e-07, "loss": 0.3846, "step": 11 }, { "epoch": 0.0011056341272400608, "grad_norm": 4.876861133902156, "learning_rate": 1.1049723756906078e-07, "loss": 0.3843, "step": 12 }, { "epoch": 0.001197770304510066, "grad_norm": 5.023561914398882, "learning_rate": 1.1970534069981586e-07, "loss": 0.3511, "step": 13 }, { "epoch": 0.0012899064817800709, "grad_norm": 5.145451738529512, "learning_rate": 1.2891344383057092e-07, "loss": 0.3672, "step": 14 }, { "epoch": 0.001382042659050076, "grad_norm": 4.792948201942286, "learning_rate": 1.3812154696132598e-07, "loss": 0.3796, "step": 15 }, { "epoch": 0.0014741788363200811, "grad_norm": 4.551919134485809, "learning_rate": 1.4732965009208104e-07, "loss": 0.3803, "step": 16 }, { "epoch": 0.001566315013590086, "grad_norm": 4.8263836273678145, "learning_rate": 1.5653775322283613e-07, "loss": 0.3716, "step": 17 }, { "epoch": 0.0016584511908600912, "grad_norm": 4.740504995965325, "learning_rate": 1.6574585635359117e-07, "loss": 0.3838, "step": 18 }, { "epoch": 0.0017505873681300963, "grad_norm": 4.3895949096154805, "learning_rate": 1.7495395948434625e-07, "loss": 0.3516, "step": 19 }, { "epoch": 0.0018427235454001013, "grad_norm": 4.730549482054788, "learning_rate": 1.8416206261510132e-07, "loss": 0.3971, "step": 20 }, { "epoch": 0.0019348597226701064, "grad_norm": 4.326518829493085, "learning_rate": 1.9337016574585635e-07, "loss": 0.3621, "step": 21 }, { "epoch": 0.0020269958999401113, "grad_norm": 4.411579317357037, "learning_rate": 2.0257826887661144e-07, "loss": 0.3671, "step": 22 }, { "epoch": 0.0021191320772101165, "grad_norm": 4.406425398983609, "learning_rate": 2.1178637200736653e-07, "loss": 0.3586, "step": 23 }, { "epoch": 0.0022112682544801216, "grad_norm": 4.250027158629804, "learning_rate": 2.2099447513812156e-07, "loss": 0.3351, "step": 24 }, { "epoch": 0.0023034044317501268, "grad_norm": 4.453036416890763, "learning_rate": 2.3020257826887662e-07, "loss": 0.3547, "step": 25 }, { "epoch": 0.002395540609020132, "grad_norm": 4.338068169465262, "learning_rate": 2.394106813996317e-07, "loss": 0.3643, "step": 26 }, { "epoch": 0.002487676786290137, "grad_norm": 4.081751082657096, "learning_rate": 2.486187845303868e-07, "loss": 0.3735, "step": 27 }, { "epoch": 0.0025798129635601417, "grad_norm": 3.759847993257449, "learning_rate": 2.5782688766114184e-07, "loss": 0.3538, "step": 28 }, { "epoch": 0.002671949140830147, "grad_norm": 3.656922967111338, "learning_rate": 2.670349907918969e-07, "loss": 0.3466, "step": 29 }, { "epoch": 0.002764085318100152, "grad_norm": 3.5621860682515782, "learning_rate": 2.7624309392265196e-07, "loss": 0.3678, "step": 30 }, { "epoch": 0.002856221495370157, "grad_norm": 3.074394037601397, "learning_rate": 2.85451197053407e-07, "loss": 0.3472, "step": 31 }, { "epoch": 0.0029483576726401623, "grad_norm": 3.0932831564716263, "learning_rate": 2.946593001841621e-07, "loss": 0.3326, "step": 32 }, { "epoch": 0.0030404938499101674, "grad_norm": 3.145618703123189, "learning_rate": 3.0386740331491715e-07, "loss": 0.3714, "step": 33 }, { "epoch": 0.003132630027180172, "grad_norm": 2.847899608744511, "learning_rate": 3.1307550644567226e-07, "loss": 0.3392, "step": 34 }, { "epoch": 0.0032247662044501773, "grad_norm": 2.9243824195879493, "learning_rate": 3.2228360957642727e-07, "loss": 0.3501, "step": 35 }, { "epoch": 0.0033169023817201824, "grad_norm": 2.7832329637023085, "learning_rate": 3.3149171270718233e-07, "loss": 0.3466, "step": 36 }, { "epoch": 0.0034090385589901876, "grad_norm": 2.572516529700843, "learning_rate": 3.4069981583793745e-07, "loss": 0.3354, "step": 37 }, { "epoch": 0.0035011747362601927, "grad_norm": 2.2478775351473783, "learning_rate": 3.499079189686925e-07, "loss": 0.3341, "step": 38 }, { "epoch": 0.003593310913530198, "grad_norm": 2.4125693100520214, "learning_rate": 3.591160220994475e-07, "loss": 0.3518, "step": 39 }, { "epoch": 0.0036854470908002025, "grad_norm": 2.161167186955303, "learning_rate": 3.6832412523020263e-07, "loss": 0.2944, "step": 40 }, { "epoch": 0.0037775832680702077, "grad_norm": 2.340176936611769, "learning_rate": 3.775322283609577e-07, "loss": 0.3516, "step": 41 }, { "epoch": 0.003869719445340213, "grad_norm": 2.0582341569455407, "learning_rate": 3.867403314917127e-07, "loss": 0.2974, "step": 42 }, { "epoch": 0.003961855622610218, "grad_norm": 2.1235508876322244, "learning_rate": 3.959484346224678e-07, "loss": 0.3482, "step": 43 }, { "epoch": 0.004053991799880223, "grad_norm": 1.810336528178075, "learning_rate": 4.051565377532229e-07, "loss": 0.2894, "step": 44 }, { "epoch": 0.004146127977150228, "grad_norm": 1.9229985402496037, "learning_rate": 4.1436464088397794e-07, "loss": 0.3062, "step": 45 }, { "epoch": 0.004238264154420233, "grad_norm": 1.8631489706081992, "learning_rate": 4.2357274401473305e-07, "loss": 0.318, "step": 46 }, { "epoch": 0.0043304003316902385, "grad_norm": 1.7237134286009292, "learning_rate": 4.3278084714548806e-07, "loss": 0.3012, "step": 47 }, { "epoch": 0.004422536508960243, "grad_norm": 1.849534050723691, "learning_rate": 4.419889502762431e-07, "loss": 0.3069, "step": 48 }, { "epoch": 0.004514672686230248, "grad_norm": 1.7076022714184271, "learning_rate": 4.5119705340699824e-07, "loss": 0.3099, "step": 49 }, { "epoch": 0.0046068088635002535, "grad_norm": 1.8201092125421363, "learning_rate": 4.6040515653775325e-07, "loss": 0.3363, "step": 50 }, { "epoch": 0.004698945040770258, "grad_norm": 1.5793947312415075, "learning_rate": 4.696132596685083e-07, "loss": 0.2906, "step": 51 }, { "epoch": 0.004791081218040264, "grad_norm": 1.7063324415343504, "learning_rate": 4.788213627992634e-07, "loss": 0.3293, "step": 52 }, { "epoch": 0.0048832173953102685, "grad_norm": 1.5334969451288736, "learning_rate": 4.880294659300184e-07, "loss": 0.2983, "step": 53 }, { "epoch": 0.004975353572580274, "grad_norm": 1.5375570913421324, "learning_rate": 4.972375690607735e-07, "loss": 0.3062, "step": 54 }, { "epoch": 0.005067489749850279, "grad_norm": 1.531795714777244, "learning_rate": 5.064456721915287e-07, "loss": 0.2957, "step": 55 }, { "epoch": 0.0051596259271202835, "grad_norm": 1.6190246422343229, "learning_rate": 5.156537753222837e-07, "loss": 0.3123, "step": 56 }, { "epoch": 0.005251762104390289, "grad_norm": 1.550801896644375, "learning_rate": 5.248618784530387e-07, "loss": 0.3136, "step": 57 }, { "epoch": 0.005343898281660294, "grad_norm": 1.4783938737129982, "learning_rate": 5.340699815837938e-07, "loss": 0.2735, "step": 58 }, { "epoch": 0.005436034458930299, "grad_norm": 1.4621021369750078, "learning_rate": 5.432780847145488e-07, "loss": 0.2884, "step": 59 }, { "epoch": 0.005528170636200304, "grad_norm": 1.3921579835622648, "learning_rate": 5.524861878453039e-07, "loss": 0.2867, "step": 60 }, { "epoch": 0.005620306813470309, "grad_norm": 1.5262968398070378, "learning_rate": 5.61694290976059e-07, "loss": 0.298, "step": 61 }, { "epoch": 0.005712442990740314, "grad_norm": 1.5915698039983843, "learning_rate": 5.70902394106814e-07, "loss": 0.3088, "step": 62 }, { "epoch": 0.005804579168010319, "grad_norm": 1.5397162188270683, "learning_rate": 5.80110497237569e-07, "loss": 0.2788, "step": 63 }, { "epoch": 0.005896715345280325, "grad_norm": 1.4822801901893587, "learning_rate": 5.893186003683242e-07, "loss": 0.3007, "step": 64 }, { "epoch": 0.005988851522550329, "grad_norm": 1.4690507666454256, "learning_rate": 5.985267034990793e-07, "loss": 0.2784, "step": 65 }, { "epoch": 0.006080987699820335, "grad_norm": 1.3696146492094494, "learning_rate": 6.077348066298343e-07, "loss": 0.2824, "step": 66 }, { "epoch": 0.00617312387709034, "grad_norm": 1.4331132908765383, "learning_rate": 6.169429097605894e-07, "loss": 0.2809, "step": 67 }, { "epoch": 0.006265260054360344, "grad_norm": 1.3225884273908008, "learning_rate": 6.261510128913445e-07, "loss": 0.2803, "step": 68 }, { "epoch": 0.00635739623163035, "grad_norm": 1.3651533674561502, "learning_rate": 6.353591160220995e-07, "loss": 0.2722, "step": 69 }, { "epoch": 0.0064495324089003546, "grad_norm": 1.3972206619657161, "learning_rate": 6.445672191528545e-07, "loss": 0.2856, "step": 70 }, { "epoch": 0.00654166858617036, "grad_norm": 1.3786952654094349, "learning_rate": 6.537753222836097e-07, "loss": 0.2821, "step": 71 }, { "epoch": 0.006633804763440365, "grad_norm": 1.3835265992223351, "learning_rate": 6.629834254143647e-07, "loss": 0.2859, "step": 72 }, { "epoch": 0.0067259409407103695, "grad_norm": 1.3167354078664357, "learning_rate": 6.721915285451197e-07, "loss": 0.2693, "step": 73 }, { "epoch": 0.006818077117980375, "grad_norm": 1.2157572639965608, "learning_rate": 6.813996316758749e-07, "loss": 0.2678, "step": 74 }, { "epoch": 0.00691021329525038, "grad_norm": 1.4145127549666732, "learning_rate": 6.906077348066299e-07, "loss": 0.2752, "step": 75 }, { "epoch": 0.007002349472520385, "grad_norm": 1.3643122772858198, "learning_rate": 6.99815837937385e-07, "loss": 0.2893, "step": 76 }, { "epoch": 0.00709448564979039, "grad_norm": 1.292319390060687, "learning_rate": 7.0902394106814e-07, "loss": 0.26, "step": 77 }, { "epoch": 0.007186621827060396, "grad_norm": 1.4121119413990915, "learning_rate": 7.18232044198895e-07, "loss": 0.2838, "step": 78 }, { "epoch": 0.0072787580043304, "grad_norm": 1.3573902039549837, "learning_rate": 7.274401473296501e-07, "loss": 0.2673, "step": 79 }, { "epoch": 0.007370894181600405, "grad_norm": 1.251616266192662, "learning_rate": 7.366482504604053e-07, "loss": 0.2795, "step": 80 }, { "epoch": 0.007463030358870411, "grad_norm": 1.2676616962987843, "learning_rate": 7.458563535911603e-07, "loss": 0.274, "step": 81 }, { "epoch": 0.007555166536140415, "grad_norm": 1.2779375532844437, "learning_rate": 7.550644567219154e-07, "loss": 0.237, "step": 82 }, { "epoch": 0.007647302713410421, "grad_norm": 1.2823725432016053, "learning_rate": 7.642725598526704e-07, "loss": 0.2535, "step": 83 }, { "epoch": 0.007739438890680426, "grad_norm": 1.2914112825622275, "learning_rate": 7.734806629834254e-07, "loss": 0.2579, "step": 84 }, { "epoch": 0.00783157506795043, "grad_norm": 1.255061266268493, "learning_rate": 7.826887661141805e-07, "loss": 0.2553, "step": 85 }, { "epoch": 0.007923711245220436, "grad_norm": 1.37482617750355, "learning_rate": 7.918968692449356e-07, "loss": 0.2552, "step": 86 }, { "epoch": 0.008015847422490441, "grad_norm": 1.1987336596664846, "learning_rate": 8.011049723756907e-07, "loss": 0.2544, "step": 87 }, { "epoch": 0.008107983599760445, "grad_norm": 1.352509072072678, "learning_rate": 8.103130755064458e-07, "loss": 0.2697, "step": 88 }, { "epoch": 0.008200119777030451, "grad_norm": 1.2902205773865623, "learning_rate": 8.195211786372008e-07, "loss": 0.2477, "step": 89 }, { "epoch": 0.008292255954300456, "grad_norm": 1.395341337522817, "learning_rate": 8.287292817679559e-07, "loss": 0.2627, "step": 90 }, { "epoch": 0.00838439213157046, "grad_norm": 1.268160686732221, "learning_rate": 8.379373848987109e-07, "loss": 0.2851, "step": 91 }, { "epoch": 0.008476528308840466, "grad_norm": 1.3489234822086935, "learning_rate": 8.471454880294661e-07, "loss": 0.2619, "step": 92 }, { "epoch": 0.008568664486110471, "grad_norm": 1.3220216831015386, "learning_rate": 8.563535911602211e-07, "loss": 0.2579, "step": 93 }, { "epoch": 0.008660800663380477, "grad_norm": 1.1760530408428194, "learning_rate": 8.655616942909761e-07, "loss": 0.2461, "step": 94 }, { "epoch": 0.008752936840650481, "grad_norm": 1.5393872750676103, "learning_rate": 8.747697974217311e-07, "loss": 0.2838, "step": 95 }, { "epoch": 0.008845073017920486, "grad_norm": 1.4358175929752721, "learning_rate": 8.839779005524863e-07, "loss": 0.255, "step": 96 }, { "epoch": 0.008937209195190492, "grad_norm": 1.3687969807695604, "learning_rate": 8.931860036832413e-07, "loss": 0.2563, "step": 97 }, { "epoch": 0.009029345372460496, "grad_norm": 1.3219226525379928, "learning_rate": 9.023941068139965e-07, "loss": 0.2482, "step": 98 }, { "epoch": 0.009121481549730501, "grad_norm": 1.3519450540596627, "learning_rate": 9.116022099447515e-07, "loss": 0.2524, "step": 99 }, { "epoch": 0.009213617727000507, "grad_norm": 1.2749396142008642, "learning_rate": 9.208103130755065e-07, "loss": 0.2371, "step": 100 }, { "epoch": 0.009305753904270513, "grad_norm": 1.1980272664855356, "learning_rate": 9.300184162062616e-07, "loss": 0.2508, "step": 101 }, { "epoch": 0.009397890081540516, "grad_norm": 1.3779116744427602, "learning_rate": 9.392265193370166e-07, "loss": 0.2719, "step": 102 }, { "epoch": 0.009490026258810522, "grad_norm": 1.2481451142639794, "learning_rate": 9.484346224677716e-07, "loss": 0.2406, "step": 103 }, { "epoch": 0.009582162436080528, "grad_norm": 1.2456086574919798, "learning_rate": 9.576427255985269e-07, "loss": 0.2594, "step": 104 }, { "epoch": 0.009674298613350531, "grad_norm": 1.3052822180290655, "learning_rate": 9.66850828729282e-07, "loss": 0.2578, "step": 105 }, { "epoch": 0.009766434790620537, "grad_norm": 1.2798135957331098, "learning_rate": 9.760589318600369e-07, "loss": 0.2449, "step": 106 }, { "epoch": 0.009858570967890543, "grad_norm": 1.2233602450508594, "learning_rate": 9.85267034990792e-07, "loss": 0.23, "step": 107 }, { "epoch": 0.009950707145160548, "grad_norm": 1.27138699960983, "learning_rate": 9.94475138121547e-07, "loss": 0.2575, "step": 108 }, { "epoch": 0.010042843322430552, "grad_norm": 1.4590705423131205, "learning_rate": 1.003683241252302e-06, "loss": 0.2414, "step": 109 }, { "epoch": 0.010134979499700558, "grad_norm": 1.3032600900132378, "learning_rate": 1.0128913443830573e-06, "loss": 0.2422, "step": 110 }, { "epoch": 0.010227115676970563, "grad_norm": 1.246423626921792, "learning_rate": 1.0220994475138122e-06, "loss": 0.2504, "step": 111 }, { "epoch": 0.010319251854240567, "grad_norm": 1.3617978945476827, "learning_rate": 1.0313075506445673e-06, "loss": 0.2625, "step": 112 }, { "epoch": 0.010411388031510573, "grad_norm": 1.3138368592325604, "learning_rate": 1.0405156537753222e-06, "loss": 0.2717, "step": 113 }, { "epoch": 0.010503524208780578, "grad_norm": 1.45931937030065, "learning_rate": 1.0497237569060774e-06, "loss": 0.2338, "step": 114 }, { "epoch": 0.010595660386050582, "grad_norm": 1.2209072353641341, "learning_rate": 1.0589318600368325e-06, "loss": 0.2571, "step": 115 }, { "epoch": 0.010687796563320588, "grad_norm": 1.2221332342582498, "learning_rate": 1.0681399631675876e-06, "loss": 0.2181, "step": 116 }, { "epoch": 0.010779932740590593, "grad_norm": 1.3055782277521266, "learning_rate": 1.0773480662983427e-06, "loss": 0.2413, "step": 117 }, { "epoch": 0.010872068917860599, "grad_norm": 1.3001013433954538, "learning_rate": 1.0865561694290976e-06, "loss": 0.2561, "step": 118 }, { "epoch": 0.010964205095130602, "grad_norm": 1.4430759553426427, "learning_rate": 1.0957642725598527e-06, "loss": 0.2393, "step": 119 }, { "epoch": 0.011056341272400608, "grad_norm": 1.4425457370059072, "learning_rate": 1.1049723756906078e-06, "loss": 0.2349, "step": 120 }, { "epoch": 0.011148477449670614, "grad_norm": 1.235681217544338, "learning_rate": 1.114180478821363e-06, "loss": 0.2315, "step": 121 }, { "epoch": 0.011240613626940617, "grad_norm": 1.291133894680049, "learning_rate": 1.123388581952118e-06, "loss": 0.2442, "step": 122 }, { "epoch": 0.011332749804210623, "grad_norm": 1.2986607434244122, "learning_rate": 1.132596685082873e-06, "loss": 0.2427, "step": 123 }, { "epoch": 0.011424885981480629, "grad_norm": 1.3600935260637073, "learning_rate": 1.141804788213628e-06, "loss": 0.2507, "step": 124 }, { "epoch": 0.011517022158750634, "grad_norm": 1.2882709655715936, "learning_rate": 1.1510128913443832e-06, "loss": 0.2491, "step": 125 }, { "epoch": 0.011609158336020638, "grad_norm": 1.236349701513875, "learning_rate": 1.160220994475138e-06, "loss": 0.2238, "step": 126 }, { "epoch": 0.011701294513290644, "grad_norm": 1.277175622784304, "learning_rate": 1.1694290976058934e-06, "loss": 0.2306, "step": 127 }, { "epoch": 0.01179343069056065, "grad_norm": 1.3466287077359933, "learning_rate": 1.1786372007366483e-06, "loss": 0.2529, "step": 128 }, { "epoch": 0.011885566867830653, "grad_norm": 1.2600725855409367, "learning_rate": 1.1878453038674034e-06, "loss": 0.2297, "step": 129 }, { "epoch": 0.011977703045100659, "grad_norm": 1.1909522608327074, "learning_rate": 1.1970534069981586e-06, "loss": 0.2428, "step": 130 }, { "epoch": 0.012069839222370664, "grad_norm": 1.3275342654407982, "learning_rate": 1.2062615101289135e-06, "loss": 0.2387, "step": 131 }, { "epoch": 0.01216197539964067, "grad_norm": 1.3832794168368345, "learning_rate": 1.2154696132596686e-06, "loss": 0.2606, "step": 132 }, { "epoch": 0.012254111576910674, "grad_norm": 1.4083734454299084, "learning_rate": 1.2246777163904237e-06, "loss": 0.2558, "step": 133 }, { "epoch": 0.01234624775418068, "grad_norm": 1.3604330663851263, "learning_rate": 1.2338858195211788e-06, "loss": 0.2131, "step": 134 }, { "epoch": 0.012438383931450685, "grad_norm": 1.368946573958846, "learning_rate": 1.243093922651934e-06, "loss": 0.2607, "step": 135 }, { "epoch": 0.012530520108720689, "grad_norm": 1.4349854840515686, "learning_rate": 1.252302025782689e-06, "loss": 0.2543, "step": 136 }, { "epoch": 0.012622656285990694, "grad_norm": 1.3053177174437076, "learning_rate": 1.261510128913444e-06, "loss": 0.2066, "step": 137 }, { "epoch": 0.0127147924632607, "grad_norm": 1.4065693991109225, "learning_rate": 1.270718232044199e-06, "loss": 0.2428, "step": 138 }, { "epoch": 0.012806928640530704, "grad_norm": 1.3060084203827886, "learning_rate": 1.2799263351749542e-06, "loss": 0.2452, "step": 139 }, { "epoch": 0.012899064817800709, "grad_norm": 1.42770860862496, "learning_rate": 1.289134438305709e-06, "loss": 0.2375, "step": 140 }, { "epoch": 0.012991200995070715, "grad_norm": 1.3712130826622553, "learning_rate": 1.2983425414364642e-06, "loss": 0.2296, "step": 141 }, { "epoch": 0.01308333717234072, "grad_norm": 1.2949739115350103, "learning_rate": 1.3075506445672193e-06, "loss": 0.2249, "step": 142 }, { "epoch": 0.013175473349610724, "grad_norm": 1.4444498310803144, "learning_rate": 1.3167587476979742e-06, "loss": 0.2336, "step": 143 }, { "epoch": 0.01326760952688073, "grad_norm": 1.327765157794959, "learning_rate": 1.3259668508287293e-06, "loss": 0.2305, "step": 144 }, { "epoch": 0.013359745704150735, "grad_norm": 1.49483024693552, "learning_rate": 1.3351749539594844e-06, "loss": 0.2524, "step": 145 }, { "epoch": 0.013451881881420739, "grad_norm": 1.4128065918962016, "learning_rate": 1.3443830570902393e-06, "loss": 0.2421, "step": 146 }, { "epoch": 0.013544018058690745, "grad_norm": 1.458256896983337, "learning_rate": 1.3535911602209945e-06, "loss": 0.256, "step": 147 }, { "epoch": 0.01363615423596075, "grad_norm": 1.5761688856396325, "learning_rate": 1.3627992633517498e-06, "loss": 0.2283, "step": 148 }, { "epoch": 0.013728290413230756, "grad_norm": 1.4268159296492195, "learning_rate": 1.372007366482505e-06, "loss": 0.231, "step": 149 }, { "epoch": 0.01382042659050076, "grad_norm": 1.240181839931121, "learning_rate": 1.3812154696132598e-06, "loss": 0.2265, "step": 150 }, { "epoch": 0.013912562767770765, "grad_norm": 1.3560921208474808, "learning_rate": 1.390423572744015e-06, "loss": 0.2347, "step": 151 }, { "epoch": 0.01400469894504077, "grad_norm": 1.440218247026957, "learning_rate": 1.39963167587477e-06, "loss": 0.2265, "step": 152 }, { "epoch": 0.014096835122310775, "grad_norm": 1.3168656248813988, "learning_rate": 1.408839779005525e-06, "loss": 0.2361, "step": 153 }, { "epoch": 0.01418897129958078, "grad_norm": 1.387358557045741, "learning_rate": 1.41804788213628e-06, "loss": 0.2538, "step": 154 }, { "epoch": 0.014281107476850786, "grad_norm": 1.4226212454591165, "learning_rate": 1.4272559852670352e-06, "loss": 0.2386, "step": 155 }, { "epoch": 0.014373243654120791, "grad_norm": 1.4868929751549826, "learning_rate": 1.43646408839779e-06, "loss": 0.2503, "step": 156 }, { "epoch": 0.014465379831390795, "grad_norm": 1.3156667636135637, "learning_rate": 1.4456721915285452e-06, "loss": 0.2439, "step": 157 }, { "epoch": 0.0145575160086608, "grad_norm": 1.3284249384355258, "learning_rate": 1.4548802946593003e-06, "loss": 0.2325, "step": 158 }, { "epoch": 0.014649652185930806, "grad_norm": 1.2878557366716903, "learning_rate": 1.4640883977900552e-06, "loss": 0.2056, "step": 159 }, { "epoch": 0.01474178836320081, "grad_norm": 1.4793729308344177, "learning_rate": 1.4732965009208105e-06, "loss": 0.2571, "step": 160 }, { "epoch": 0.014833924540470816, "grad_norm": 1.2481891533067875, "learning_rate": 1.4825046040515656e-06, "loss": 0.2195, "step": 161 }, { "epoch": 0.014926060717740821, "grad_norm": 1.3944875094813025, "learning_rate": 1.4917127071823205e-06, "loss": 0.2245, "step": 162 }, { "epoch": 0.015018196895010825, "grad_norm": 1.37854617862289, "learning_rate": 1.5009208103130757e-06, "loss": 0.2298, "step": 163 }, { "epoch": 0.01511033307228083, "grad_norm": 1.3740996859347074, "learning_rate": 1.5101289134438308e-06, "loss": 0.2343, "step": 164 }, { "epoch": 0.015202469249550836, "grad_norm": 1.379655917316226, "learning_rate": 1.5193370165745857e-06, "loss": 0.2271, "step": 165 }, { "epoch": 0.015294605426820842, "grad_norm": 1.2845573670743051, "learning_rate": 1.5285451197053408e-06, "loss": 0.221, "step": 166 }, { "epoch": 0.015386741604090846, "grad_norm": 1.3382949270875386, "learning_rate": 1.537753222836096e-06, "loss": 0.2053, "step": 167 }, { "epoch": 0.015478877781360851, "grad_norm": 1.2241039135765772, "learning_rate": 1.5469613259668508e-06, "loss": 0.2051, "step": 168 }, { "epoch": 0.015571013958630857, "grad_norm": 1.353071391505974, "learning_rate": 1.556169429097606e-06, "loss": 0.2238, "step": 169 }, { "epoch": 0.01566315013590086, "grad_norm": 1.3108612555966297, "learning_rate": 1.565377532228361e-06, "loss": 0.2422, "step": 170 }, { "epoch": 0.015755286313170868, "grad_norm": 1.3798597771479884, "learning_rate": 1.574585635359116e-06, "loss": 0.211, "step": 171 }, { "epoch": 0.015847422490440872, "grad_norm": 1.2463211759017325, "learning_rate": 1.5837937384898713e-06, "loss": 0.2124, "step": 172 }, { "epoch": 0.015939558667710876, "grad_norm": 1.2337293449366062, "learning_rate": 1.5930018416206264e-06, "loss": 0.2183, "step": 173 }, { "epoch": 0.016031694844980883, "grad_norm": 1.2103763277878807, "learning_rate": 1.6022099447513815e-06, "loss": 0.2236, "step": 174 }, { "epoch": 0.016123831022250887, "grad_norm": 1.220903675064504, "learning_rate": 1.6114180478821364e-06, "loss": 0.2141, "step": 175 }, { "epoch": 0.01621596719952089, "grad_norm": 1.358619080502357, "learning_rate": 1.6206261510128915e-06, "loss": 0.2423, "step": 176 }, { "epoch": 0.016308103376790898, "grad_norm": 1.2782364766180747, "learning_rate": 1.6298342541436466e-06, "loss": 0.2346, "step": 177 }, { "epoch": 0.016400239554060902, "grad_norm": 1.3105220268621274, "learning_rate": 1.6390423572744015e-06, "loss": 0.2506, "step": 178 }, { "epoch": 0.016492375731330906, "grad_norm": 1.2782540222227745, "learning_rate": 1.6482504604051566e-06, "loss": 0.2216, "step": 179 }, { "epoch": 0.016584511908600913, "grad_norm": 1.5337856840982391, "learning_rate": 1.6574585635359118e-06, "loss": 0.2348, "step": 180 }, { "epoch": 0.016676648085870917, "grad_norm": 1.2855938394022077, "learning_rate": 1.6666666666666667e-06, "loss": 0.2321, "step": 181 }, { "epoch": 0.01676878426314092, "grad_norm": 1.3688482992570172, "learning_rate": 1.6758747697974218e-06, "loss": 0.2443, "step": 182 }, { "epoch": 0.016860920440410928, "grad_norm": 1.3343184731235973, "learning_rate": 1.685082872928177e-06, "loss": 0.2121, "step": 183 }, { "epoch": 0.016953056617680932, "grad_norm": 1.225401208028096, "learning_rate": 1.6942909760589322e-06, "loss": 0.2287, "step": 184 }, { "epoch": 0.01704519279495094, "grad_norm": 1.2179622098203036, "learning_rate": 1.7034990791896871e-06, "loss": 0.2049, "step": 185 }, { "epoch": 0.017137328972220943, "grad_norm": 1.5066030755860567, "learning_rate": 1.7127071823204422e-06, "loss": 0.2424, "step": 186 }, { "epoch": 0.017229465149490947, "grad_norm": 1.4045090484290212, "learning_rate": 1.7219152854511971e-06, "loss": 0.2227, "step": 187 }, { "epoch": 0.017321601326760954, "grad_norm": 1.272435969600215, "learning_rate": 1.7311233885819523e-06, "loss": 0.2431, "step": 188 }, { "epoch": 0.017413737504030958, "grad_norm": 1.3028523579116038, "learning_rate": 1.7403314917127074e-06, "loss": 0.2179, "step": 189 }, { "epoch": 0.017505873681300962, "grad_norm": 1.515833129596805, "learning_rate": 1.7495395948434623e-06, "loss": 0.2518, "step": 190 }, { "epoch": 0.01759800985857097, "grad_norm": 1.3640712213334758, "learning_rate": 1.7587476979742174e-06, "loss": 0.2519, "step": 191 }, { "epoch": 0.017690146035840973, "grad_norm": 1.2963567960878155, "learning_rate": 1.7679558011049725e-06, "loss": 0.2122, "step": 192 }, { "epoch": 0.017782282213110977, "grad_norm": 1.2385890307787466, "learning_rate": 1.7771639042357274e-06, "loss": 0.218, "step": 193 }, { "epoch": 0.017874418390380984, "grad_norm": 1.2918958910678935, "learning_rate": 1.7863720073664825e-06, "loss": 0.2275, "step": 194 }, { "epoch": 0.017966554567650988, "grad_norm": 1.3240547033002077, "learning_rate": 1.7955801104972378e-06, "loss": 0.2265, "step": 195 }, { "epoch": 0.01805869074492099, "grad_norm": 1.3215736623947212, "learning_rate": 1.804788213627993e-06, "loss": 0.2106, "step": 196 }, { "epoch": 0.018150826922191, "grad_norm": 1.3962463357518629, "learning_rate": 1.8139963167587479e-06, "loss": 0.2421, "step": 197 }, { "epoch": 0.018242963099461003, "grad_norm": 1.3634363454930103, "learning_rate": 1.823204419889503e-06, "loss": 0.2258, "step": 198 }, { "epoch": 0.018335099276731007, "grad_norm": 1.3838622302412065, "learning_rate": 1.832412523020258e-06, "loss": 0.2086, "step": 199 }, { "epoch": 0.018427235454001014, "grad_norm": 1.3181256821025102, "learning_rate": 1.841620626151013e-06, "loss": 0.2129, "step": 200 }, { "epoch": 0.018519371631271018, "grad_norm": 1.270539722225883, "learning_rate": 1.8508287292817681e-06, "loss": 0.2191, "step": 201 }, { "epoch": 0.018611507808541025, "grad_norm": 1.265711181176557, "learning_rate": 1.8600368324125232e-06, "loss": 0.2061, "step": 202 }, { "epoch": 0.01870364398581103, "grad_norm": 1.4039473787664178, "learning_rate": 1.8692449355432781e-06, "loss": 0.2277, "step": 203 }, { "epoch": 0.018795780163081033, "grad_norm": 1.345966851950806, "learning_rate": 1.8784530386740332e-06, "loss": 0.2065, "step": 204 }, { "epoch": 0.01888791634035104, "grad_norm": 1.3892265247643658, "learning_rate": 1.8876611418047884e-06, "loss": 0.2117, "step": 205 }, { "epoch": 0.018980052517621044, "grad_norm": 1.3391019958709516, "learning_rate": 1.8968692449355433e-06, "loss": 0.2241, "step": 206 }, { "epoch": 0.019072188694891048, "grad_norm": 1.3767301542758652, "learning_rate": 1.9060773480662986e-06, "loss": 0.2394, "step": 207 }, { "epoch": 0.019164324872161055, "grad_norm": 1.193499504261302, "learning_rate": 1.9152854511970537e-06, "loss": 0.2147, "step": 208 }, { "epoch": 0.01925646104943106, "grad_norm": 1.42744498061299, "learning_rate": 1.9244935543278086e-06, "loss": 0.2454, "step": 209 }, { "epoch": 0.019348597226701063, "grad_norm": 1.2070717468524428, "learning_rate": 1.933701657458564e-06, "loss": 0.2281, "step": 210 }, { "epoch": 0.01944073340397107, "grad_norm": 1.2184409700694656, "learning_rate": 1.942909760589319e-06, "loss": 0.2011, "step": 211 }, { "epoch": 0.019532869581241074, "grad_norm": 1.244082773508379, "learning_rate": 1.9521178637200737e-06, "loss": 0.2198, "step": 212 }, { "epoch": 0.019625005758511078, "grad_norm": 1.1946783073071228, "learning_rate": 1.961325966850829e-06, "loss": 0.2174, "step": 213 }, { "epoch": 0.019717141935781085, "grad_norm": 1.2601297485847678, "learning_rate": 1.970534069981584e-06, "loss": 0.2225, "step": 214 }, { "epoch": 0.01980927811305109, "grad_norm": 1.3124765793917974, "learning_rate": 1.979742173112339e-06, "loss": 0.2267, "step": 215 }, { "epoch": 0.019901414290321096, "grad_norm": 1.3267678443080182, "learning_rate": 1.988950276243094e-06, "loss": 0.2297, "step": 216 }, { "epoch": 0.0199935504675911, "grad_norm": 1.267293008421713, "learning_rate": 1.998158379373849e-06, "loss": 0.2181, "step": 217 }, { "epoch": 0.020085686644861104, "grad_norm": 1.2619101408630657, "learning_rate": 2.007366482504604e-06, "loss": 0.2397, "step": 218 }, { "epoch": 0.02017782282213111, "grad_norm": 1.3636244878125987, "learning_rate": 2.0165745856353593e-06, "loss": 0.2253, "step": 219 }, { "epoch": 0.020269958999401115, "grad_norm": 1.394830925894432, "learning_rate": 2.0257826887661147e-06, "loss": 0.2252, "step": 220 }, { "epoch": 0.02036209517667112, "grad_norm": 1.2983165359381221, "learning_rate": 2.0349907918968696e-06, "loss": 0.2278, "step": 221 }, { "epoch": 0.020454231353941126, "grad_norm": 1.2967437740330148, "learning_rate": 2.0441988950276245e-06, "loss": 0.2124, "step": 222 }, { "epoch": 0.02054636753121113, "grad_norm": 1.4482194246277718, "learning_rate": 2.0534069981583794e-06, "loss": 0.2216, "step": 223 }, { "epoch": 0.020638503708481134, "grad_norm": 1.310894495587751, "learning_rate": 2.0626151012891347e-06, "loss": 0.222, "step": 224 }, { "epoch": 0.02073063988575114, "grad_norm": 1.2475533975236348, "learning_rate": 2.0718232044198896e-06, "loss": 0.2043, "step": 225 }, { "epoch": 0.020822776063021145, "grad_norm": 1.4060174527930498, "learning_rate": 2.0810313075506445e-06, "loss": 0.222, "step": 226 }, { "epoch": 0.02091491224029115, "grad_norm": 1.4368485294275846, "learning_rate": 2.0902394106814e-06, "loss": 0.2425, "step": 227 }, { "epoch": 0.021007048417561156, "grad_norm": 1.259305482075362, "learning_rate": 2.0994475138121547e-06, "loss": 0.223, "step": 228 }, { "epoch": 0.02109918459483116, "grad_norm": 1.433635435091614, "learning_rate": 2.1086556169429096e-06, "loss": 0.2223, "step": 229 }, { "epoch": 0.021191320772101164, "grad_norm": 1.3258788470822962, "learning_rate": 2.117863720073665e-06, "loss": 0.223, "step": 230 }, { "epoch": 0.02128345694937117, "grad_norm": 1.3345971348097236, "learning_rate": 2.1270718232044203e-06, "loss": 0.2088, "step": 231 }, { "epoch": 0.021375593126641175, "grad_norm": 1.1506446317260917, "learning_rate": 2.136279926335175e-06, "loss": 0.2183, "step": 232 }, { "epoch": 0.021467729303911182, "grad_norm": 1.2501482254633949, "learning_rate": 2.14548802946593e-06, "loss": 0.2218, "step": 233 }, { "epoch": 0.021559865481181186, "grad_norm": 1.2715617957043448, "learning_rate": 2.1546961325966854e-06, "loss": 0.2167, "step": 234 }, { "epoch": 0.02165200165845119, "grad_norm": 1.4251050947489576, "learning_rate": 2.1639042357274403e-06, "loss": 0.2378, "step": 235 }, { "epoch": 0.021744137835721197, "grad_norm": 1.2400115125049491, "learning_rate": 2.1731123388581952e-06, "loss": 0.2294, "step": 236 }, { "epoch": 0.0218362740129912, "grad_norm": 1.3035788835712026, "learning_rate": 2.1823204419889505e-06, "loss": 0.1968, "step": 237 }, { "epoch": 0.021928410190261205, "grad_norm": 1.471849667145631, "learning_rate": 2.1915285451197054e-06, "loss": 0.2228, "step": 238 }, { "epoch": 0.022020546367531212, "grad_norm": 1.3647414373400866, "learning_rate": 2.2007366482504604e-06, "loss": 0.224, "step": 239 }, { "epoch": 0.022112682544801216, "grad_norm": 1.537121143452077, "learning_rate": 2.2099447513812157e-06, "loss": 0.2334, "step": 240 }, { "epoch": 0.02220481872207122, "grad_norm": 1.2899612307328876, "learning_rate": 2.2191528545119706e-06, "loss": 0.207, "step": 241 }, { "epoch": 0.022296954899341227, "grad_norm": 1.2217522545050996, "learning_rate": 2.228360957642726e-06, "loss": 0.2236, "step": 242 }, { "epoch": 0.02238909107661123, "grad_norm": 1.395775888810296, "learning_rate": 2.237569060773481e-06, "loss": 0.2244, "step": 243 }, { "epoch": 0.022481227253881235, "grad_norm": 1.284804900306348, "learning_rate": 2.246777163904236e-06, "loss": 0.2098, "step": 244 }, { "epoch": 0.022573363431151242, "grad_norm": 1.304346808920648, "learning_rate": 2.255985267034991e-06, "loss": 0.2038, "step": 245 }, { "epoch": 0.022665499608421246, "grad_norm": 1.3086929964965677, "learning_rate": 2.265193370165746e-06, "loss": 0.221, "step": 246 }, { "epoch": 0.022757635785691253, "grad_norm": 1.2371068797697236, "learning_rate": 2.2744014732965013e-06, "loss": 0.2059, "step": 247 }, { "epoch": 0.022849771962961257, "grad_norm": 1.237860938668767, "learning_rate": 2.283609576427256e-06, "loss": 0.2116, "step": 248 }, { "epoch": 0.02294190814023126, "grad_norm": 1.1873106876741861, "learning_rate": 2.292817679558011e-06, "loss": 0.2044, "step": 249 }, { "epoch": 0.02303404431750127, "grad_norm": 1.284075741757394, "learning_rate": 2.3020257826887664e-06, "loss": 0.2265, "step": 250 }, { "epoch": 0.023126180494771272, "grad_norm": 1.2554034425448573, "learning_rate": 2.3112338858195213e-06, "loss": 0.2098, "step": 251 }, { "epoch": 0.023218316672041276, "grad_norm": 1.3561997983957859, "learning_rate": 2.320441988950276e-06, "loss": 0.2274, "step": 252 }, { "epoch": 0.023310452849311283, "grad_norm": 1.289899655742179, "learning_rate": 2.3296500920810315e-06, "loss": 0.203, "step": 253 }, { "epoch": 0.023402589026581287, "grad_norm": 1.207952118169262, "learning_rate": 2.338858195211787e-06, "loss": 0.2124, "step": 254 }, { "epoch": 0.02349472520385129, "grad_norm": 1.3805095413072321, "learning_rate": 2.3480662983425418e-06, "loss": 0.2284, "step": 255 }, { "epoch": 0.0235868613811213, "grad_norm": 1.3972049227450618, "learning_rate": 2.3572744014732967e-06, "loss": 0.2212, "step": 256 }, { "epoch": 0.023678997558391302, "grad_norm": 1.2247515177867434, "learning_rate": 2.366482504604052e-06, "loss": 0.2158, "step": 257 }, { "epoch": 0.023771133735661306, "grad_norm": 1.1692729997781546, "learning_rate": 2.375690607734807e-06, "loss": 0.2223, "step": 258 }, { "epoch": 0.023863269912931313, "grad_norm": 1.4293157160410055, "learning_rate": 2.384898710865562e-06, "loss": 0.2355, "step": 259 }, { "epoch": 0.023955406090201317, "grad_norm": 1.2833231867557153, "learning_rate": 2.394106813996317e-06, "loss": 0.2093, "step": 260 }, { "epoch": 0.02404754226747132, "grad_norm": 1.430588872964235, "learning_rate": 2.403314917127072e-06, "loss": 0.2299, "step": 261 }, { "epoch": 0.02413967844474133, "grad_norm": 1.3955869018367655, "learning_rate": 2.412523020257827e-06, "loss": 0.2235, "step": 262 }, { "epoch": 0.024231814622011332, "grad_norm": 1.3209105842207622, "learning_rate": 2.4217311233885823e-06, "loss": 0.2314, "step": 263 }, { "epoch": 0.02432395079928134, "grad_norm": 1.3675314084283223, "learning_rate": 2.430939226519337e-06, "loss": 0.192, "step": 264 }, { "epoch": 0.024416086976551343, "grad_norm": 1.3043781570646351, "learning_rate": 2.440147329650092e-06, "loss": 0.2218, "step": 265 }, { "epoch": 0.024508223153821347, "grad_norm": 1.2993635785678224, "learning_rate": 2.4493554327808474e-06, "loss": 0.2003, "step": 266 }, { "epoch": 0.024600359331091354, "grad_norm": 1.2707418652729778, "learning_rate": 2.4585635359116027e-06, "loss": 0.2036, "step": 267 }, { "epoch": 0.02469249550836136, "grad_norm": 1.2834662882271706, "learning_rate": 2.4677716390423576e-06, "loss": 0.2168, "step": 268 }, { "epoch": 0.024784631685631362, "grad_norm": 1.3433418131656627, "learning_rate": 2.4769797421731125e-06, "loss": 0.2164, "step": 269 }, { "epoch": 0.02487676786290137, "grad_norm": 1.3675989386071368, "learning_rate": 2.486187845303868e-06, "loss": 0.2136, "step": 270 }, { "epoch": 0.024968904040171373, "grad_norm": 1.2819757220021681, "learning_rate": 2.4953959484346228e-06, "loss": 0.2217, "step": 271 }, { "epoch": 0.025061040217441377, "grad_norm": 1.4596357277210503, "learning_rate": 2.504604051565378e-06, "loss": 0.2018, "step": 272 }, { "epoch": 0.025153176394711384, "grad_norm": 1.2597067670175457, "learning_rate": 2.513812154696133e-06, "loss": 0.2315, "step": 273 }, { "epoch": 0.025245312571981388, "grad_norm": 1.2971255477113983, "learning_rate": 2.523020257826888e-06, "loss": 0.2087, "step": 274 }, { "epoch": 0.025337448749251392, "grad_norm": 1.332947894390514, "learning_rate": 2.5322283609576432e-06, "loss": 0.2127, "step": 275 }, { "epoch": 0.0254295849265214, "grad_norm": 1.1260373924980331, "learning_rate": 2.541436464088398e-06, "loss": 0.1973, "step": 276 }, { "epoch": 0.025521721103791403, "grad_norm": 1.2445947528995862, "learning_rate": 2.550644567219153e-06, "loss": 0.2099, "step": 277 }, { "epoch": 0.025613857281061407, "grad_norm": 1.3694045461593618, "learning_rate": 2.5598526703499083e-06, "loss": 0.2243, "step": 278 }, { "epoch": 0.025705993458331414, "grad_norm": 1.297962915474662, "learning_rate": 2.5690607734806632e-06, "loss": 0.2204, "step": 279 }, { "epoch": 0.025798129635601418, "grad_norm": 1.2909158781778913, "learning_rate": 2.578268876611418e-06, "loss": 0.2211, "step": 280 }, { "epoch": 0.025890265812871426, "grad_norm": 1.320578975065506, "learning_rate": 2.5874769797421735e-06, "loss": 0.1965, "step": 281 }, { "epoch": 0.02598240199014143, "grad_norm": 1.2210472413434825, "learning_rate": 2.5966850828729284e-06, "loss": 0.2134, "step": 282 }, { "epoch": 0.026074538167411433, "grad_norm": 1.1578898719596564, "learning_rate": 2.6058931860036833e-06, "loss": 0.2039, "step": 283 }, { "epoch": 0.02616667434468144, "grad_norm": 1.3398605179539043, "learning_rate": 2.6151012891344386e-06, "loss": 0.2091, "step": 284 }, { "epoch": 0.026258810521951444, "grad_norm": 1.2764074537081127, "learning_rate": 2.6243093922651935e-06, "loss": 0.2127, "step": 285 }, { "epoch": 0.026350946699221448, "grad_norm": 1.201092481830308, "learning_rate": 2.6335174953959484e-06, "loss": 0.2134, "step": 286 }, { "epoch": 0.026443082876491456, "grad_norm": 1.3808995093878993, "learning_rate": 2.6427255985267037e-06, "loss": 0.2229, "step": 287 }, { "epoch": 0.02653521905376146, "grad_norm": 1.3001384527969946, "learning_rate": 2.6519337016574586e-06, "loss": 0.2292, "step": 288 }, { "epoch": 0.026627355231031463, "grad_norm": 1.3053774770933373, "learning_rate": 2.6611418047882135e-06, "loss": 0.22, "step": 289 }, { "epoch": 0.02671949140830147, "grad_norm": 1.449134250644644, "learning_rate": 2.670349907918969e-06, "loss": 0.2194, "step": 290 }, { "epoch": 0.026811627585571474, "grad_norm": 1.437368533930005, "learning_rate": 2.6795580110497238e-06, "loss": 0.2239, "step": 291 }, { "epoch": 0.026903763762841478, "grad_norm": 1.2446885130520247, "learning_rate": 2.6887661141804787e-06, "loss": 0.2141, "step": 292 }, { "epoch": 0.026995899940111485, "grad_norm": 1.7040675956383482, "learning_rate": 2.697974217311234e-06, "loss": 0.2224, "step": 293 }, { "epoch": 0.02708803611738149, "grad_norm": 1.332041322961633, "learning_rate": 2.707182320441989e-06, "loss": 0.2214, "step": 294 }, { "epoch": 0.027180172294651497, "grad_norm": 1.2590166578500492, "learning_rate": 2.716390423572744e-06, "loss": 0.2179, "step": 295 }, { "epoch": 0.0272723084719215, "grad_norm": 1.4226861813160818, "learning_rate": 2.7255985267034996e-06, "loss": 0.2196, "step": 296 }, { "epoch": 0.027364444649191504, "grad_norm": 1.3934346019180117, "learning_rate": 2.7348066298342545e-06, "loss": 0.2201, "step": 297 }, { "epoch": 0.02745658082646151, "grad_norm": 1.2888343424352768, "learning_rate": 2.74401473296501e-06, "loss": 0.2109, "step": 298 }, { "epoch": 0.027548717003731515, "grad_norm": 1.4048452398008997, "learning_rate": 2.7532228360957647e-06, "loss": 0.22, "step": 299 }, { "epoch": 0.02764085318100152, "grad_norm": 1.2759644069246936, "learning_rate": 2.7624309392265196e-06, "loss": 0.2002, "step": 300 }, { "epoch": 0.027732989358271527, "grad_norm": 1.257513125532842, "learning_rate": 2.771639042357275e-06, "loss": 0.2016, "step": 301 }, { "epoch": 0.02782512553554153, "grad_norm": 1.1604132143904993, "learning_rate": 2.78084714548803e-06, "loss": 0.2001, "step": 302 }, { "epoch": 0.027917261712811534, "grad_norm": 1.300325855758057, "learning_rate": 2.7900552486187847e-06, "loss": 0.2034, "step": 303 }, { "epoch": 0.02800939789008154, "grad_norm": 1.3505529212765077, "learning_rate": 2.79926335174954e-06, "loss": 0.2161, "step": 304 }, { "epoch": 0.028101534067351545, "grad_norm": 1.2614356404469431, "learning_rate": 2.808471454880295e-06, "loss": 0.1892, "step": 305 }, { "epoch": 0.02819367024462155, "grad_norm": 1.461383179054911, "learning_rate": 2.81767955801105e-06, "loss": 0.2128, "step": 306 }, { "epoch": 0.028285806421891557, "grad_norm": 1.25242901824847, "learning_rate": 2.826887661141805e-06, "loss": 0.22, "step": 307 }, { "epoch": 0.02837794259916156, "grad_norm": 1.2270884655785574, "learning_rate": 2.83609576427256e-06, "loss": 0.1904, "step": 308 }, { "epoch": 0.028470078776431564, "grad_norm": 1.23710825518334, "learning_rate": 2.845303867403315e-06, "loss": 0.2057, "step": 309 }, { "epoch": 0.02856221495370157, "grad_norm": 1.2747541107670282, "learning_rate": 2.8545119705340703e-06, "loss": 0.2165, "step": 310 }, { "epoch": 0.028654351130971575, "grad_norm": 1.5134645452681845, "learning_rate": 2.8637200736648252e-06, "loss": 0.2277, "step": 311 }, { "epoch": 0.028746487308241583, "grad_norm": 1.2152059027151885, "learning_rate": 2.87292817679558e-06, "loss": 0.2047, "step": 312 }, { "epoch": 0.028838623485511587, "grad_norm": 1.3277121581950153, "learning_rate": 2.8821362799263355e-06, "loss": 0.2256, "step": 313 }, { "epoch": 0.02893075966278159, "grad_norm": 1.32730086437395, "learning_rate": 2.8913443830570904e-06, "loss": 0.2202, "step": 314 }, { "epoch": 0.029022895840051598, "grad_norm": 1.363811967060764, "learning_rate": 2.9005524861878453e-06, "loss": 0.2197, "step": 315 }, { "epoch": 0.0291150320173216, "grad_norm": 1.2590769976128895, "learning_rate": 2.9097605893186006e-06, "loss": 0.2082, "step": 316 }, { "epoch": 0.029207168194591605, "grad_norm": 1.2277995813074711, "learning_rate": 2.9189686924493555e-06, "loss": 0.2061, "step": 317 }, { "epoch": 0.029299304371861613, "grad_norm": 1.3793722738217344, "learning_rate": 2.9281767955801104e-06, "loss": 0.1971, "step": 318 }, { "epoch": 0.029391440549131616, "grad_norm": 1.361895665830025, "learning_rate": 2.937384898710866e-06, "loss": 0.2427, "step": 319 }, { "epoch": 0.02948357672640162, "grad_norm": 1.2966036850711782, "learning_rate": 2.946593001841621e-06, "loss": 0.2244, "step": 320 }, { "epoch": 0.029575712903671628, "grad_norm": 1.1971066961262655, "learning_rate": 2.955801104972376e-06, "loss": 0.2265, "step": 321 }, { "epoch": 0.02966784908094163, "grad_norm": 1.2180842224212953, "learning_rate": 2.9650092081031313e-06, "loss": 0.2238, "step": 322 }, { "epoch": 0.029759985258211635, "grad_norm": 1.1589180181774166, "learning_rate": 2.974217311233886e-06, "loss": 0.1929, "step": 323 }, { "epoch": 0.029852121435481643, "grad_norm": 1.2133350157084903, "learning_rate": 2.983425414364641e-06, "loss": 0.2134, "step": 324 }, { "epoch": 0.029944257612751646, "grad_norm": 1.1779599790119306, "learning_rate": 2.9926335174953964e-06, "loss": 0.2066, "step": 325 }, { "epoch": 0.03003639379002165, "grad_norm": 1.1847663049212864, "learning_rate": 3.0018416206261513e-06, "loss": 0.1822, "step": 326 }, { "epoch": 0.030128529967291658, "grad_norm": 1.2734701566041768, "learning_rate": 3.0110497237569062e-06, "loss": 0.2225, "step": 327 }, { "epoch": 0.03022066614456166, "grad_norm": 1.456216336880786, "learning_rate": 3.0202578268876615e-06, "loss": 0.2303, "step": 328 }, { "epoch": 0.03031280232183167, "grad_norm": 1.3359492802492923, "learning_rate": 3.0294659300184164e-06, "loss": 0.2327, "step": 329 }, { "epoch": 0.030404938499101673, "grad_norm": 1.3170633885393337, "learning_rate": 3.0386740331491713e-06, "loss": 0.2113, "step": 330 }, { "epoch": 0.030497074676371676, "grad_norm": 1.3663569659678911, "learning_rate": 3.0478821362799267e-06, "loss": 0.2292, "step": 331 }, { "epoch": 0.030589210853641684, "grad_norm": 1.3094102769500708, "learning_rate": 3.0570902394106816e-06, "loss": 0.2158, "step": 332 }, { "epoch": 0.030681347030911688, "grad_norm": 1.2497293850188491, "learning_rate": 3.0662983425414365e-06, "loss": 0.2161, "step": 333 }, { "epoch": 0.03077348320818169, "grad_norm": 1.1791717194673006, "learning_rate": 3.075506445672192e-06, "loss": 0.2042, "step": 334 }, { "epoch": 0.0308656193854517, "grad_norm": 1.2243353185304204, "learning_rate": 3.0847145488029467e-06, "loss": 0.2221, "step": 335 }, { "epoch": 0.030957755562721703, "grad_norm": 1.2144934202478015, "learning_rate": 3.0939226519337016e-06, "loss": 0.2091, "step": 336 }, { "epoch": 0.031049891739991706, "grad_norm": 1.3869201324530631, "learning_rate": 3.103130755064457e-06, "loss": 0.22, "step": 337 }, { "epoch": 0.031142027917261714, "grad_norm": 1.2522412871255026, "learning_rate": 3.112338858195212e-06, "loss": 0.2012, "step": 338 }, { "epoch": 0.031234164094531718, "grad_norm": 1.223982875197098, "learning_rate": 3.1215469613259667e-06, "loss": 0.2362, "step": 339 }, { "epoch": 0.03132630027180172, "grad_norm": 1.180490704761597, "learning_rate": 3.130755064456722e-06, "loss": 0.2006, "step": 340 }, { "epoch": 0.031418436449071725, "grad_norm": 1.334003641795415, "learning_rate": 3.139963167587477e-06, "loss": 0.217, "step": 341 }, { "epoch": 0.031510572626341736, "grad_norm": 1.2176716108720274, "learning_rate": 3.149171270718232e-06, "loss": 0.2124, "step": 342 }, { "epoch": 0.03160270880361174, "grad_norm": 1.3395536252432325, "learning_rate": 3.1583793738489876e-06, "loss": 0.2098, "step": 343 }, { "epoch": 0.031694844980881744, "grad_norm": 1.283429869492175, "learning_rate": 3.1675874769797425e-06, "loss": 0.2314, "step": 344 }, { "epoch": 0.03178698115815175, "grad_norm": 1.228230288659854, "learning_rate": 3.176795580110498e-06, "loss": 0.1965, "step": 345 }, { "epoch": 0.03187911733542175, "grad_norm": 1.2440315402589066, "learning_rate": 3.1860036832412528e-06, "loss": 0.2225, "step": 346 }, { "epoch": 0.031971253512691755, "grad_norm": 1.2499654799548787, "learning_rate": 3.1952117863720077e-06, "loss": 0.2206, "step": 347 }, { "epoch": 0.032063389689961766, "grad_norm": 1.2049177369182857, "learning_rate": 3.204419889502763e-06, "loss": 0.2035, "step": 348 }, { "epoch": 0.03215552586723177, "grad_norm": 1.2316769248478436, "learning_rate": 3.213627992633518e-06, "loss": 0.2145, "step": 349 }, { "epoch": 0.032247662044501774, "grad_norm": 1.2321172561939648, "learning_rate": 3.222836095764273e-06, "loss": 0.2112, "step": 350 }, { "epoch": 0.03233979822177178, "grad_norm": 1.1523176756649027, "learning_rate": 3.232044198895028e-06, "loss": 0.2056, "step": 351 }, { "epoch": 0.03243193439904178, "grad_norm": 1.1236159577981801, "learning_rate": 3.241252302025783e-06, "loss": 0.1955, "step": 352 }, { "epoch": 0.03252407057631179, "grad_norm": 1.2130769573681421, "learning_rate": 3.250460405156538e-06, "loss": 0.2054, "step": 353 }, { "epoch": 0.032616206753581796, "grad_norm": 1.1913432800528299, "learning_rate": 3.2596685082872933e-06, "loss": 0.1981, "step": 354 }, { "epoch": 0.0327083429308518, "grad_norm": 1.3859609731695497, "learning_rate": 3.268876611418048e-06, "loss": 0.2342, "step": 355 }, { "epoch": 0.032800479108121804, "grad_norm": 1.3072352238426874, "learning_rate": 3.278084714548803e-06, "loss": 0.2183, "step": 356 }, { "epoch": 0.03289261528539181, "grad_norm": 1.3473692917629814, "learning_rate": 3.2872928176795584e-06, "loss": 0.2029, "step": 357 }, { "epoch": 0.03298475146266181, "grad_norm": 1.2572639062134559, "learning_rate": 3.2965009208103133e-06, "loss": 0.2214, "step": 358 }, { "epoch": 0.03307688763993182, "grad_norm": 1.203052241995266, "learning_rate": 3.305709023941068e-06, "loss": 0.2026, "step": 359 }, { "epoch": 0.033169023817201826, "grad_norm": 1.2948231954080816, "learning_rate": 3.3149171270718235e-06, "loss": 0.2209, "step": 360 }, { "epoch": 0.03326115999447183, "grad_norm": 1.1961645679085242, "learning_rate": 3.3241252302025784e-06, "loss": 0.2119, "step": 361 }, { "epoch": 0.033353296171741834, "grad_norm": 1.2097174219855942, "learning_rate": 3.3333333333333333e-06, "loss": 0.2142, "step": 362 }, { "epoch": 0.03344543234901184, "grad_norm": 1.242640883301389, "learning_rate": 3.3425414364640887e-06, "loss": 0.21, "step": 363 }, { "epoch": 0.03353756852628184, "grad_norm": 1.1964795201693772, "learning_rate": 3.3517495395948436e-06, "loss": 0.2179, "step": 364 }, { "epoch": 0.03362970470355185, "grad_norm": 1.2315695513233953, "learning_rate": 3.3609576427255985e-06, "loss": 0.2066, "step": 365 }, { "epoch": 0.033721840880821856, "grad_norm": 1.112363729635578, "learning_rate": 3.370165745856354e-06, "loss": 0.2008, "step": 366 }, { "epoch": 0.03381397705809186, "grad_norm": 1.2655168156807526, "learning_rate": 3.379373848987109e-06, "loss": 0.193, "step": 367 }, { "epoch": 0.033906113235361864, "grad_norm": 1.1496127915515584, "learning_rate": 3.3885819521178644e-06, "loss": 0.1982, "step": 368 }, { "epoch": 0.03399824941263187, "grad_norm": 1.174710108082871, "learning_rate": 3.3977900552486193e-06, "loss": 0.1977, "step": 369 }, { "epoch": 0.03409038558990188, "grad_norm": 1.2707966093570304, "learning_rate": 3.4069981583793742e-06, "loss": 0.2018, "step": 370 }, { "epoch": 0.03418252176717188, "grad_norm": 1.2562422642370359, "learning_rate": 3.416206261510129e-06, "loss": 0.2088, "step": 371 }, { "epoch": 0.034274657944441886, "grad_norm": 1.2280848584726436, "learning_rate": 3.4254143646408845e-06, "loss": 0.2105, "step": 372 }, { "epoch": 0.03436679412171189, "grad_norm": 1.217850057779426, "learning_rate": 3.4346224677716394e-06, "loss": 0.2063, "step": 373 }, { "epoch": 0.034458930298981894, "grad_norm": 1.2091286639825245, "learning_rate": 3.4438305709023943e-06, "loss": 0.2128, "step": 374 }, { "epoch": 0.0345510664762519, "grad_norm": 1.1715733041783976, "learning_rate": 3.4530386740331496e-06, "loss": 0.2021, "step": 375 }, { "epoch": 0.03464320265352191, "grad_norm": 1.2001707885169832, "learning_rate": 3.4622467771639045e-06, "loss": 0.2257, "step": 376 }, { "epoch": 0.03473533883079191, "grad_norm": 1.2368175910547214, "learning_rate": 3.4714548802946594e-06, "loss": 0.2145, "step": 377 }, { "epoch": 0.034827475008061916, "grad_norm": 1.166337000359825, "learning_rate": 3.4806629834254147e-06, "loss": 0.2079, "step": 378 }, { "epoch": 0.03491961118533192, "grad_norm": 1.1572816320295447, "learning_rate": 3.4898710865561696e-06, "loss": 0.1901, "step": 379 }, { "epoch": 0.035011747362601923, "grad_norm": 1.1237321439729187, "learning_rate": 3.4990791896869245e-06, "loss": 0.2007, "step": 380 }, { "epoch": 0.03510388353987193, "grad_norm": 1.215928962970433, "learning_rate": 3.50828729281768e-06, "loss": 0.2195, "step": 381 }, { "epoch": 0.03519601971714194, "grad_norm": 1.251329116724435, "learning_rate": 3.5174953959484348e-06, "loss": 0.2066, "step": 382 }, { "epoch": 0.03528815589441194, "grad_norm": 1.2234531446138686, "learning_rate": 3.5267034990791897e-06, "loss": 0.2093, "step": 383 }, { "epoch": 0.035380292071681946, "grad_norm": 1.2252920368192435, "learning_rate": 3.535911602209945e-06, "loss": 0.2202, "step": 384 }, { "epoch": 0.03547242824895195, "grad_norm": 1.2609463141051451, "learning_rate": 3.5451197053407e-06, "loss": 0.2001, "step": 385 }, { "epoch": 0.03556456442622195, "grad_norm": 1.2927472157292323, "learning_rate": 3.554327808471455e-06, "loss": 0.2256, "step": 386 }, { "epoch": 0.035656700603491964, "grad_norm": 1.155641554981568, "learning_rate": 3.56353591160221e-06, "loss": 0.199, "step": 387 }, { "epoch": 0.03574883678076197, "grad_norm": 1.2654887045193257, "learning_rate": 3.572744014732965e-06, "loss": 0.2078, "step": 388 }, { "epoch": 0.03584097295803197, "grad_norm": 1.2166982932381427, "learning_rate": 3.58195211786372e-06, "loss": 0.2103, "step": 389 }, { "epoch": 0.035933109135301976, "grad_norm": 1.37669075363763, "learning_rate": 3.5911602209944757e-06, "loss": 0.2152, "step": 390 }, { "epoch": 0.03602524531257198, "grad_norm": 1.3416512571374115, "learning_rate": 3.6003683241252306e-06, "loss": 0.1992, "step": 391 }, { "epoch": 0.03611738148984198, "grad_norm": 1.229234386328588, "learning_rate": 3.609576427255986e-06, "loss": 0.2187, "step": 392 }, { "epoch": 0.036209517667111994, "grad_norm": 1.1832506871580368, "learning_rate": 3.618784530386741e-06, "loss": 0.1978, "step": 393 }, { "epoch": 0.036301653844382, "grad_norm": 1.2383981566876872, "learning_rate": 3.6279926335174957e-06, "loss": 0.207, "step": 394 }, { "epoch": 0.036393790021652, "grad_norm": 1.1413755558262018, "learning_rate": 3.637200736648251e-06, "loss": 0.2155, "step": 395 }, { "epoch": 0.036485926198922006, "grad_norm": 1.2450403165766708, "learning_rate": 3.646408839779006e-06, "loss": 0.2142, "step": 396 }, { "epoch": 0.03657806237619201, "grad_norm": 1.2177024569527306, "learning_rate": 3.655616942909761e-06, "loss": 0.2167, "step": 397 }, { "epoch": 0.03667019855346201, "grad_norm": 1.2723658885665146, "learning_rate": 3.664825046040516e-06, "loss": 0.2194, "step": 398 }, { "epoch": 0.036762334730732024, "grad_norm": 1.174670391591689, "learning_rate": 3.674033149171271e-06, "loss": 0.2089, "step": 399 }, { "epoch": 0.03685447090800203, "grad_norm": 1.2221763371090841, "learning_rate": 3.683241252302026e-06, "loss": 0.2106, "step": 400 }, { "epoch": 0.03694660708527203, "grad_norm": 1.2965457839204568, "learning_rate": 3.6924493554327813e-06, "loss": 0.2099, "step": 401 }, { "epoch": 0.037038743262542036, "grad_norm": 1.2033987453656643, "learning_rate": 3.7016574585635362e-06, "loss": 0.2095, "step": 402 }, { "epoch": 0.03713087943981204, "grad_norm": 1.2068890988997176, "learning_rate": 3.710865561694291e-06, "loss": 0.201, "step": 403 }, { "epoch": 0.03722301561708205, "grad_norm": 1.203262066657531, "learning_rate": 3.7200736648250464e-06, "loss": 0.2073, "step": 404 }, { "epoch": 0.037315151794352054, "grad_norm": 1.2129025553528847, "learning_rate": 3.7292817679558014e-06, "loss": 0.2214, "step": 405 }, { "epoch": 0.03740728797162206, "grad_norm": 1.2850805303315755, "learning_rate": 3.7384898710865563e-06, "loss": 0.2202, "step": 406 }, { "epoch": 0.03749942414889206, "grad_norm": 1.1849304905609503, "learning_rate": 3.7476979742173116e-06, "loss": 0.1994, "step": 407 }, { "epoch": 0.037591560326162066, "grad_norm": 1.1447297755974901, "learning_rate": 3.7569060773480665e-06, "loss": 0.1877, "step": 408 }, { "epoch": 0.03768369650343207, "grad_norm": 1.3731333236915204, "learning_rate": 3.7661141804788214e-06, "loss": 0.2068, "step": 409 }, { "epoch": 0.03777583268070208, "grad_norm": 1.3507563680134274, "learning_rate": 3.7753222836095767e-06, "loss": 0.2184, "step": 410 }, { "epoch": 0.037867968857972084, "grad_norm": 1.2967176512561887, "learning_rate": 3.7845303867403316e-06, "loss": 0.1949, "step": 411 }, { "epoch": 0.03796010503524209, "grad_norm": 1.1615648678608848, "learning_rate": 3.7937384898710865e-06, "loss": 0.1906, "step": 412 }, { "epoch": 0.03805224121251209, "grad_norm": 1.1806971928220382, "learning_rate": 3.802946593001842e-06, "loss": 0.1814, "step": 413 }, { "epoch": 0.038144377389782096, "grad_norm": 1.273886277912273, "learning_rate": 3.812154696132597e-06, "loss": 0.2248, "step": 414 }, { "epoch": 0.038236513567052106, "grad_norm": 1.1335712217012766, "learning_rate": 3.8213627992633525e-06, "loss": 0.2128, "step": 415 }, { "epoch": 0.03832864974432211, "grad_norm": 1.1793652832224044, "learning_rate": 3.830570902394107e-06, "loss": 0.1874, "step": 416 }, { "epoch": 0.038420785921592114, "grad_norm": 1.2334577784320606, "learning_rate": 3.839779005524862e-06, "loss": 0.2141, "step": 417 }, { "epoch": 0.03851292209886212, "grad_norm": 1.1609662659175002, "learning_rate": 3.848987108655617e-06, "loss": 0.2094, "step": 418 }, { "epoch": 0.03860505827613212, "grad_norm": 1.1745343514754072, "learning_rate": 3.858195211786372e-06, "loss": 0.1954, "step": 419 }, { "epoch": 0.038697194453402126, "grad_norm": 1.2526388364382148, "learning_rate": 3.867403314917128e-06, "loss": 0.2275, "step": 420 }, { "epoch": 0.038789330630672136, "grad_norm": 1.4330497133104598, "learning_rate": 3.876611418047883e-06, "loss": 0.2126, "step": 421 }, { "epoch": 0.03888146680794214, "grad_norm": 1.2562815683824222, "learning_rate": 3.885819521178638e-06, "loss": 0.2024, "step": 422 }, { "epoch": 0.038973602985212144, "grad_norm": 1.4160966441130192, "learning_rate": 3.8950276243093926e-06, "loss": 0.2131, "step": 423 }, { "epoch": 0.03906573916248215, "grad_norm": 1.3816473101884428, "learning_rate": 3.9042357274401475e-06, "loss": 0.2113, "step": 424 }, { "epoch": 0.03915787533975215, "grad_norm": 1.2554377015630513, "learning_rate": 3.913443830570902e-06, "loss": 0.2132, "step": 425 }, { "epoch": 0.039250011517022156, "grad_norm": 1.3398302209339412, "learning_rate": 3.922651933701658e-06, "loss": 0.2099, "step": 426 }, { "epoch": 0.039342147694292166, "grad_norm": 1.3603810885516354, "learning_rate": 3.931860036832413e-06, "loss": 0.2125, "step": 427 }, { "epoch": 0.03943428387156217, "grad_norm": 1.258039947717146, "learning_rate": 3.941068139963168e-06, "loss": 0.2057, "step": 428 }, { "epoch": 0.039526420048832174, "grad_norm": 1.2036522078809784, "learning_rate": 3.950276243093923e-06, "loss": 0.1912, "step": 429 }, { "epoch": 0.03961855622610218, "grad_norm": 1.2675274340086102, "learning_rate": 3.959484346224678e-06, "loss": 0.2224, "step": 430 }, { "epoch": 0.03971069240337218, "grad_norm": 1.3973388775812086, "learning_rate": 3.968692449355433e-06, "loss": 0.2296, "step": 431 }, { "epoch": 0.03980282858064219, "grad_norm": 1.543316930802054, "learning_rate": 3.977900552486188e-06, "loss": 0.2062, "step": 432 }, { "epoch": 0.039894964757912196, "grad_norm": 1.1195071306132687, "learning_rate": 3.987108655616943e-06, "loss": 0.2085, "step": 433 }, { "epoch": 0.0399871009351822, "grad_norm": 1.4042138133782505, "learning_rate": 3.996316758747698e-06, "loss": 0.1929, "step": 434 }, { "epoch": 0.040079237112452204, "grad_norm": 1.458286673287262, "learning_rate": 4.005524861878453e-06, "loss": 0.2097, "step": 435 }, { "epoch": 0.04017137328972221, "grad_norm": 1.2963948315513325, "learning_rate": 4.014732965009208e-06, "loss": 0.2118, "step": 436 }, { "epoch": 0.04026350946699221, "grad_norm": 1.559050465013681, "learning_rate": 4.023941068139964e-06, "loss": 0.2149, "step": 437 }, { "epoch": 0.04035564564426222, "grad_norm": 1.2769319583081065, "learning_rate": 4.033149171270719e-06, "loss": 0.2219, "step": 438 }, { "epoch": 0.040447781821532226, "grad_norm": 1.3096848506654442, "learning_rate": 4.0423572744014736e-06, "loss": 0.1945, "step": 439 }, { "epoch": 0.04053991799880223, "grad_norm": 1.185303690120011, "learning_rate": 4.051565377532229e-06, "loss": 0.1891, "step": 440 }, { "epoch": 0.040632054176072234, "grad_norm": 1.159162633689249, "learning_rate": 4.060773480662984e-06, "loss": 0.2175, "step": 441 }, { "epoch": 0.04072419035334224, "grad_norm": 1.2662580758209543, "learning_rate": 4.069981583793739e-06, "loss": 0.2064, "step": 442 }, { "epoch": 0.04081632653061224, "grad_norm": 1.1246911765480914, "learning_rate": 4.079189686924494e-06, "loss": 0.219, "step": 443 }, { "epoch": 0.04090846270788225, "grad_norm": 1.1350391555698733, "learning_rate": 4.088397790055249e-06, "loss": 0.203, "step": 444 }, { "epoch": 0.041000598885152256, "grad_norm": 1.4680786044597403, "learning_rate": 4.097605893186004e-06, "loss": 0.2131, "step": 445 }, { "epoch": 0.04109273506242226, "grad_norm": 1.2159762672968413, "learning_rate": 4.106813996316759e-06, "loss": 0.2029, "step": 446 }, { "epoch": 0.041184871239692264, "grad_norm": 1.289465041658357, "learning_rate": 4.1160220994475145e-06, "loss": 0.2069, "step": 447 }, { "epoch": 0.04127700741696227, "grad_norm": 1.577269595539687, "learning_rate": 4.125230202578269e-06, "loss": 0.2179, "step": 448 }, { "epoch": 0.04136914359423228, "grad_norm": 1.2817783420567126, "learning_rate": 4.134438305709024e-06, "loss": 0.2043, "step": 449 }, { "epoch": 0.04146127977150228, "grad_norm": 1.1310315648514593, "learning_rate": 4.143646408839779e-06, "loss": 0.2162, "step": 450 }, { "epoch": 0.041553415948772286, "grad_norm": 1.3119107380289001, "learning_rate": 4.152854511970534e-06, "loss": 0.2197, "step": 451 }, { "epoch": 0.04164555212604229, "grad_norm": 1.3865007259353652, "learning_rate": 4.162062615101289e-06, "loss": 0.24, "step": 452 }, { "epoch": 0.041737688303312294, "grad_norm": 1.1858485712993334, "learning_rate": 4.171270718232045e-06, "loss": 0.209, "step": 453 }, { "epoch": 0.0418298244805823, "grad_norm": 1.1178283987286062, "learning_rate": 4.1804788213628e-06, "loss": 0.1983, "step": 454 }, { "epoch": 0.04192196065785231, "grad_norm": 1.201810219473216, "learning_rate": 4.1896869244935545e-06, "loss": 0.2191, "step": 455 }, { "epoch": 0.04201409683512231, "grad_norm": 1.2475953832029654, "learning_rate": 4.1988950276243095e-06, "loss": 0.2023, "step": 456 }, { "epoch": 0.042106233012392316, "grad_norm": 1.1804605088793463, "learning_rate": 4.208103130755064e-06, "loss": 0.2032, "step": 457 }, { "epoch": 0.04219836918966232, "grad_norm": 1.2262939863172024, "learning_rate": 4.217311233885819e-06, "loss": 0.2193, "step": 458 }, { "epoch": 0.042290505366932324, "grad_norm": 1.2144298896507224, "learning_rate": 4.226519337016575e-06, "loss": 0.221, "step": 459 }, { "epoch": 0.04238264154420233, "grad_norm": 1.2934211101242432, "learning_rate": 4.23572744014733e-06, "loss": 0.2258, "step": 460 }, { "epoch": 0.04247477772147234, "grad_norm": 1.1425010577356645, "learning_rate": 4.244935543278086e-06, "loss": 0.2027, "step": 461 }, { "epoch": 0.04256691389874234, "grad_norm": 1.185579804260426, "learning_rate": 4.2541436464088406e-06, "loss": 0.2351, "step": 462 }, { "epoch": 0.042659050076012346, "grad_norm": 1.1729170129773308, "learning_rate": 4.2633517495395955e-06, "loss": 0.2035, "step": 463 }, { "epoch": 0.04275118625328235, "grad_norm": 1.1888699825576086, "learning_rate": 4.27255985267035e-06, "loss": 0.2234, "step": 464 }, { "epoch": 0.042843322430552354, "grad_norm": 1.0956917428584472, "learning_rate": 4.281767955801105e-06, "loss": 0.1724, "step": 465 }, { "epoch": 0.042935458607822365, "grad_norm": 1.2428999853822662, "learning_rate": 4.29097605893186e-06, "loss": 0.2288, "step": 466 }, { "epoch": 0.04302759478509237, "grad_norm": 1.1530798568369878, "learning_rate": 4.300184162062616e-06, "loss": 0.2017, "step": 467 }, { "epoch": 0.04311973096236237, "grad_norm": 1.3439487754256692, "learning_rate": 4.309392265193371e-06, "loss": 0.2096, "step": 468 }, { "epoch": 0.043211867139632376, "grad_norm": 1.1683549358961656, "learning_rate": 4.318600368324126e-06, "loss": 0.2148, "step": 469 }, { "epoch": 0.04330400331690238, "grad_norm": 1.2522700216036091, "learning_rate": 4.327808471454881e-06, "loss": 0.2164, "step": 470 }, { "epoch": 0.043396139494172384, "grad_norm": 1.2387500928446844, "learning_rate": 4.3370165745856355e-06, "loss": 0.2054, "step": 471 }, { "epoch": 0.043488275671442395, "grad_norm": 1.1954111876349953, "learning_rate": 4.3462246777163904e-06, "loss": 0.2119, "step": 472 }, { "epoch": 0.0435804118487124, "grad_norm": 1.6231312157976416, "learning_rate": 4.355432780847146e-06, "loss": 0.2158, "step": 473 }, { "epoch": 0.0436725480259824, "grad_norm": 1.3358159604896864, "learning_rate": 4.364640883977901e-06, "loss": 0.2089, "step": 474 }, { "epoch": 0.043764684203252406, "grad_norm": 1.4873292661852786, "learning_rate": 4.373848987108656e-06, "loss": 0.2184, "step": 475 }, { "epoch": 0.04385682038052241, "grad_norm": 1.5926804326802442, "learning_rate": 4.383057090239411e-06, "loss": 0.2198, "step": 476 }, { "epoch": 0.043948956557792414, "grad_norm": 1.1544457982412275, "learning_rate": 4.392265193370166e-06, "loss": 0.2047, "step": 477 }, { "epoch": 0.044041092735062425, "grad_norm": 1.3790644801344767, "learning_rate": 4.401473296500921e-06, "loss": 0.2104, "step": 478 }, { "epoch": 0.04413322891233243, "grad_norm": 1.4144109682196038, "learning_rate": 4.4106813996316765e-06, "loss": 0.2169, "step": 479 }, { "epoch": 0.04422536508960243, "grad_norm": 1.235543716580457, "learning_rate": 4.419889502762431e-06, "loss": 0.2173, "step": 480 }, { "epoch": 0.044317501266872436, "grad_norm": 1.2283735168259708, "learning_rate": 4.429097605893186e-06, "loss": 0.1962, "step": 481 }, { "epoch": 0.04440963744414244, "grad_norm": 1.4283017557499813, "learning_rate": 4.438305709023941e-06, "loss": 0.218, "step": 482 }, { "epoch": 0.04450177362141245, "grad_norm": 1.4867664072627964, "learning_rate": 4.447513812154696e-06, "loss": 0.2183, "step": 483 }, { "epoch": 0.044593909798682455, "grad_norm": 1.4108802496691264, "learning_rate": 4.456721915285452e-06, "loss": 0.2013, "step": 484 }, { "epoch": 0.04468604597595246, "grad_norm": 1.166986570586516, "learning_rate": 4.465930018416207e-06, "loss": 0.2002, "step": 485 }, { "epoch": 0.04477818215322246, "grad_norm": 1.1648717298249538, "learning_rate": 4.475138121546962e-06, "loss": 0.2209, "step": 486 }, { "epoch": 0.044870318330492466, "grad_norm": 1.2876822750663734, "learning_rate": 4.484346224677717e-06, "loss": 0.2046, "step": 487 }, { "epoch": 0.04496245450776247, "grad_norm": 1.2801973779891336, "learning_rate": 4.493554327808472e-06, "loss": 0.2199, "step": 488 }, { "epoch": 0.04505459068503248, "grad_norm": 1.298576742843075, "learning_rate": 4.502762430939227e-06, "loss": 0.2117, "step": 489 }, { "epoch": 0.045146726862302484, "grad_norm": 1.3131277756849138, "learning_rate": 4.511970534069982e-06, "loss": 0.2126, "step": 490 }, { "epoch": 0.04523886303957249, "grad_norm": 1.163802616612496, "learning_rate": 4.521178637200737e-06, "loss": 0.2079, "step": 491 }, { "epoch": 0.04533099921684249, "grad_norm": 1.223443299674731, "learning_rate": 4.530386740331492e-06, "loss": 0.2047, "step": 492 }, { "epoch": 0.045423135394112496, "grad_norm": 1.183473280850051, "learning_rate": 4.539594843462248e-06, "loss": 0.1852, "step": 493 }, { "epoch": 0.04551527157138251, "grad_norm": 1.1651402187049773, "learning_rate": 4.5488029465930025e-06, "loss": 0.2038, "step": 494 }, { "epoch": 0.04560740774865251, "grad_norm": 1.1460140058645047, "learning_rate": 4.5580110497237574e-06, "loss": 0.196, "step": 495 }, { "epoch": 0.045699543925922514, "grad_norm": 1.1776138222171473, "learning_rate": 4.567219152854512e-06, "loss": 0.1995, "step": 496 }, { "epoch": 0.04579168010319252, "grad_norm": 1.2204020301333764, "learning_rate": 4.576427255985267e-06, "loss": 0.2143, "step": 497 }, { "epoch": 0.04588381628046252, "grad_norm": 1.205241368495962, "learning_rate": 4.585635359116022e-06, "loss": 0.1979, "step": 498 }, { "epoch": 0.045975952457732526, "grad_norm": 1.1734468213424174, "learning_rate": 4.594843462246777e-06, "loss": 0.2078, "step": 499 }, { "epoch": 0.04606808863500254, "grad_norm": 1.1082444575732158, "learning_rate": 4.604051565377533e-06, "loss": 0.1961, "step": 500 }, { "epoch": 0.04606808863500254, "eval_loss": 0.20690900087356567, "eval_runtime": 299.5863, "eval_samples_per_second": 23.422, "eval_steps_per_second": 2.931, "step": 500 }, { "epoch": 0.04616022481227254, "grad_norm": 1.2088449274977096, "learning_rate": 4.613259668508288e-06, "loss": 0.2047, "step": 501 }, { "epoch": 0.046252360989542544, "grad_norm": 1.142903312485517, "learning_rate": 4.622467771639043e-06, "loss": 0.2019, "step": 502 }, { "epoch": 0.04634449716681255, "grad_norm": 1.2256105445368588, "learning_rate": 4.6316758747697975e-06, "loss": 0.1987, "step": 503 }, { "epoch": 0.04643663334408255, "grad_norm": 1.1904513269582884, "learning_rate": 4.640883977900552e-06, "loss": 0.2127, "step": 504 }, { "epoch": 0.046528769521352556, "grad_norm": 1.1457301085346001, "learning_rate": 4.650092081031307e-06, "loss": 0.197, "step": 505 }, { "epoch": 0.04662090569862257, "grad_norm": 1.3077547506039735, "learning_rate": 4.659300184162063e-06, "loss": 0.1919, "step": 506 }, { "epoch": 0.04671304187589257, "grad_norm": 1.0715902844691318, "learning_rate": 4.668508287292818e-06, "loss": 0.1938, "step": 507 }, { "epoch": 0.046805178053162574, "grad_norm": 1.1850578820772626, "learning_rate": 4.677716390423574e-06, "loss": 0.2025, "step": 508 }, { "epoch": 0.04689731423043258, "grad_norm": 1.4073409971969546, "learning_rate": 4.686924493554329e-06, "loss": 0.2144, "step": 509 }, { "epoch": 0.04698945040770258, "grad_norm": 1.2622270185118862, "learning_rate": 4.6961325966850835e-06, "loss": 0.2194, "step": 510 }, { "epoch": 0.04708158658497259, "grad_norm": 1.2236474583396226, "learning_rate": 4.7053406998158384e-06, "loss": 0.2092, "step": 511 }, { "epoch": 0.0471737227622426, "grad_norm": 1.1348264980475313, "learning_rate": 4.714548802946593e-06, "loss": 0.2052, "step": 512 }, { "epoch": 0.0472658589395126, "grad_norm": 1.3406440516081668, "learning_rate": 4.723756906077348e-06, "loss": 0.207, "step": 513 }, { "epoch": 0.047357995116782604, "grad_norm": 1.446315027401559, "learning_rate": 4.732965009208104e-06, "loss": 0.2169, "step": 514 }, { "epoch": 0.04745013129405261, "grad_norm": 1.2350193328918795, "learning_rate": 4.742173112338859e-06, "loss": 0.2123, "step": 515 }, { "epoch": 0.04754226747132261, "grad_norm": 1.3453002863844115, "learning_rate": 4.751381215469614e-06, "loss": 0.2213, "step": 516 }, { "epoch": 0.04763440364859262, "grad_norm": 1.3208799605546395, "learning_rate": 4.760589318600369e-06, "loss": 0.2248, "step": 517 }, { "epoch": 0.04772653982586263, "grad_norm": 1.197417531741685, "learning_rate": 4.769797421731124e-06, "loss": 0.2067, "step": 518 }, { "epoch": 0.04781867600313263, "grad_norm": 1.166778788170299, "learning_rate": 4.7790055248618785e-06, "loss": 0.2121, "step": 519 }, { "epoch": 0.047910812180402634, "grad_norm": 1.2056681536807465, "learning_rate": 4.788213627992634e-06, "loss": 0.2048, "step": 520 }, { "epoch": 0.04800294835767264, "grad_norm": 1.1862496944445569, "learning_rate": 4.797421731123389e-06, "loss": 0.2052, "step": 521 }, { "epoch": 0.04809508453494264, "grad_norm": 1.1650915820727847, "learning_rate": 4.806629834254144e-06, "loss": 0.2037, "step": 522 }, { "epoch": 0.04818722071221265, "grad_norm": 1.0997406935439868, "learning_rate": 4.815837937384899e-06, "loss": 0.2162, "step": 523 }, { "epoch": 0.04827935688948266, "grad_norm": 1.142599490179962, "learning_rate": 4.825046040515654e-06, "loss": 0.1947, "step": 524 }, { "epoch": 0.04837149306675266, "grad_norm": 1.3182614669715278, "learning_rate": 4.834254143646409e-06, "loss": 0.2224, "step": 525 }, { "epoch": 0.048463629244022664, "grad_norm": 1.1006337099413035, "learning_rate": 4.8434622467771645e-06, "loss": 0.2104, "step": 526 }, { "epoch": 0.04855576542129267, "grad_norm": 1.2851582791992746, "learning_rate": 4.852670349907919e-06, "loss": 0.2112, "step": 527 }, { "epoch": 0.04864790159856268, "grad_norm": 1.226468392041963, "learning_rate": 4.861878453038674e-06, "loss": 0.2075, "step": 528 }, { "epoch": 0.04874003777583268, "grad_norm": 1.1605259603239653, "learning_rate": 4.871086556169429e-06, "loss": 0.2143, "step": 529 }, { "epoch": 0.04883217395310269, "grad_norm": 1.1872303036487082, "learning_rate": 4.880294659300184e-06, "loss": 0.2131, "step": 530 }, { "epoch": 0.04892431013037269, "grad_norm": 1.2324441915157167, "learning_rate": 4.889502762430939e-06, "loss": 0.2198, "step": 531 }, { "epoch": 0.049016446307642694, "grad_norm": 1.084873626256991, "learning_rate": 4.898710865561695e-06, "loss": 0.1981, "step": 532 }, { "epoch": 0.0491085824849127, "grad_norm": 1.0866645738744949, "learning_rate": 4.90791896869245e-06, "loss": 0.2105, "step": 533 }, { "epoch": 0.04920071866218271, "grad_norm": 1.2290547614755443, "learning_rate": 4.9171270718232054e-06, "loss": 0.2274, "step": 534 }, { "epoch": 0.04929285483945271, "grad_norm": 1.1524273585439213, "learning_rate": 4.92633517495396e-06, "loss": 0.2047, "step": 535 }, { "epoch": 0.04938499101672272, "grad_norm": 1.061747508568193, "learning_rate": 4.935543278084715e-06, "loss": 0.1734, "step": 536 }, { "epoch": 0.04947712719399272, "grad_norm": 1.2916494032401462, "learning_rate": 4.94475138121547e-06, "loss": 0.2168, "step": 537 }, { "epoch": 0.049569263371262724, "grad_norm": 1.269578911667256, "learning_rate": 4.953959484346225e-06, "loss": 0.2109, "step": 538 }, { "epoch": 0.04966139954853273, "grad_norm": 1.378804388434808, "learning_rate": 4.96316758747698e-06, "loss": 0.2091, "step": 539 }, { "epoch": 0.04975353572580274, "grad_norm": 1.1049708603346846, "learning_rate": 4.972375690607736e-06, "loss": 0.2072, "step": 540 }, { "epoch": 0.04984567190307274, "grad_norm": 1.2404672009645543, "learning_rate": 4.981583793738491e-06, "loss": 0.2066, "step": 541 }, { "epoch": 0.04993780808034275, "grad_norm": 1.1799110644655362, "learning_rate": 4.9907918968692455e-06, "loss": 0.2128, "step": 542 }, { "epoch": 0.05002994425761275, "grad_norm": 1.1896138346028056, "learning_rate": 5e-06, "loss": 0.2015, "step": 543 }, { "epoch": 0.050122080434882754, "grad_norm": 1.2225805533773542, "learning_rate": 4.999999883937366e-06, "loss": 0.213, "step": 544 }, { "epoch": 0.050214216612152765, "grad_norm": 1.2114568339356038, "learning_rate": 4.999999535749473e-06, "loss": 0.2064, "step": 545 }, { "epoch": 0.05030635278942277, "grad_norm": 1.1217499299052078, "learning_rate": 4.999998955436354e-06, "loss": 0.2024, "step": 546 }, { "epoch": 0.05039848896669277, "grad_norm": 1.133686745541808, "learning_rate": 4.999998142998064e-06, "loss": 0.2001, "step": 547 }, { "epoch": 0.050490625143962777, "grad_norm": 1.2440473687463243, "learning_rate": 4.999997098434676e-06, "loss": 0.2089, "step": 548 }, { "epoch": 0.05058276132123278, "grad_norm": 1.1784492874879233, "learning_rate": 4.999995821746289e-06, "loss": 0.2084, "step": 549 }, { "epoch": 0.050674897498502784, "grad_norm": 1.188597001170747, "learning_rate": 4.9999943129330204e-06, "loss": 0.2187, "step": 550 }, { "epoch": 0.050767033675772795, "grad_norm": 1.295042393574803, "learning_rate": 4.999992571995011e-06, "loss": 0.1944, "step": 551 }, { "epoch": 0.0508591698530428, "grad_norm": 1.0516405339241583, "learning_rate": 4.999990598932423e-06, "loss": 0.2001, "step": 552 }, { "epoch": 0.0509513060303128, "grad_norm": 1.0917727740604706, "learning_rate": 4.999988393745438e-06, "loss": 0.2018, "step": 553 }, { "epoch": 0.051043442207582806, "grad_norm": 1.2785884522845907, "learning_rate": 4.999985956434263e-06, "loss": 0.2369, "step": 554 }, { "epoch": 0.05113557838485281, "grad_norm": 1.1693288663566566, "learning_rate": 4.999983286999121e-06, "loss": 0.1985, "step": 555 }, { "epoch": 0.051227714562122814, "grad_norm": 1.1206519528655148, "learning_rate": 4.999980385440262e-06, "loss": 0.2056, "step": 556 }, { "epoch": 0.051319850739392825, "grad_norm": 1.1626004840751911, "learning_rate": 4.999977251757956e-06, "loss": 0.2036, "step": 557 }, { "epoch": 0.05141198691666283, "grad_norm": 1.326436529000634, "learning_rate": 4.999973885952492e-06, "loss": 0.2105, "step": 558 }, { "epoch": 0.05150412309393283, "grad_norm": 1.1994189863044933, "learning_rate": 4.9999702880241855e-06, "loss": 0.2136, "step": 559 }, { "epoch": 0.051596259271202836, "grad_norm": 1.2267888006027625, "learning_rate": 4.999966457973367e-06, "loss": 0.2173, "step": 560 }, { "epoch": 0.05168839544847284, "grad_norm": 1.1688358053384447, "learning_rate": 4.999962395800395e-06, "loss": 0.2334, "step": 561 }, { "epoch": 0.05178053162574285, "grad_norm": 1.3029073613643016, "learning_rate": 4.999958101505645e-06, "loss": 0.2071, "step": 562 }, { "epoch": 0.051872667803012855, "grad_norm": 1.1677420008318726, "learning_rate": 4.999953575089516e-06, "loss": 0.2165, "step": 563 }, { "epoch": 0.05196480398028286, "grad_norm": 1.116469178905927, "learning_rate": 4.999948816552429e-06, "loss": 0.2057, "step": 564 }, { "epoch": 0.05205694015755286, "grad_norm": 1.3097286700249924, "learning_rate": 4.999943825894825e-06, "loss": 0.2254, "step": 565 }, { "epoch": 0.052149076334822866, "grad_norm": 1.1322032799911372, "learning_rate": 4.999938603117167e-06, "loss": 0.2032, "step": 566 }, { "epoch": 0.05224121251209287, "grad_norm": 1.2103922176557846, "learning_rate": 4.999933148219942e-06, "loss": 0.2353, "step": 567 }, { "epoch": 0.05233334868936288, "grad_norm": 1.3405882785620524, "learning_rate": 4.999927461203654e-06, "loss": 0.2122, "step": 568 }, { "epoch": 0.052425484866632885, "grad_norm": 1.1357985528866983, "learning_rate": 4.999921542068833e-06, "loss": 0.2023, "step": 569 }, { "epoch": 0.05251762104390289, "grad_norm": 1.1622773497726775, "learning_rate": 4.9999153908160285e-06, "loss": 0.1914, "step": 570 }, { "epoch": 0.05260975722117289, "grad_norm": 1.2580747807039376, "learning_rate": 4.999909007445809e-06, "loss": 0.2155, "step": 571 }, { "epoch": 0.052701893398442896, "grad_norm": 1.1608877424155948, "learning_rate": 4.99990239195877e-06, "loss": 0.2156, "step": 572 }, { "epoch": 0.0527940295757129, "grad_norm": 1.1581171779291344, "learning_rate": 4.999895544355525e-06, "loss": 0.2128, "step": 573 }, { "epoch": 0.05288616575298291, "grad_norm": 1.153474177296958, "learning_rate": 4.9998884646367094e-06, "loss": 0.1973, "step": 574 }, { "epoch": 0.052978301930252915, "grad_norm": 1.1874129378072187, "learning_rate": 4.999881152802981e-06, "loss": 0.2063, "step": 575 }, { "epoch": 0.05307043810752292, "grad_norm": 1.0913852863675626, "learning_rate": 4.999873608855019e-06, "loss": 0.2013, "step": 576 }, { "epoch": 0.05316257428479292, "grad_norm": 1.124914712901831, "learning_rate": 4.999865832793522e-06, "loss": 0.2111, "step": 577 }, { "epoch": 0.053254710462062926, "grad_norm": 1.1644687308281916, "learning_rate": 4.9998578246192155e-06, "loss": 0.2059, "step": 578 }, { "epoch": 0.05334684663933294, "grad_norm": 1.2942615223555596, "learning_rate": 4.9998495843328385e-06, "loss": 0.2221, "step": 579 }, { "epoch": 0.05343898281660294, "grad_norm": 1.123179821863574, "learning_rate": 4.9998411119351605e-06, "loss": 0.2102, "step": 580 }, { "epoch": 0.053531118993872945, "grad_norm": 1.2273638033386287, "learning_rate": 4.999832407426966e-06, "loss": 0.215, "step": 581 }, { "epoch": 0.05362325517114295, "grad_norm": 1.2098380897181231, "learning_rate": 4.999823470809062e-06, "loss": 0.2148, "step": 582 }, { "epoch": 0.05371539134841295, "grad_norm": 0.9746673941052318, "learning_rate": 4.999814302082281e-06, "loss": 0.1878, "step": 583 }, { "epoch": 0.053807527525682956, "grad_norm": 1.0967724336422364, "learning_rate": 4.999804901247472e-06, "loss": 0.2021, "step": 584 }, { "epoch": 0.05389966370295297, "grad_norm": 1.2264527641340204, "learning_rate": 4.99979526830551e-06, "loss": 0.2083, "step": 585 }, { "epoch": 0.05399179988022297, "grad_norm": 1.185566661438185, "learning_rate": 4.999785403257288e-06, "loss": 0.1993, "step": 586 }, { "epoch": 0.054083936057492975, "grad_norm": 1.176578670156522, "learning_rate": 4.9997753061037225e-06, "loss": 0.1965, "step": 587 }, { "epoch": 0.05417607223476298, "grad_norm": 1.4360254532578547, "learning_rate": 4.9997649768457505e-06, "loss": 0.2219, "step": 588 }, { "epoch": 0.05426820841203298, "grad_norm": 1.144309223073869, "learning_rate": 4.999754415484331e-06, "loss": 0.2147, "step": 589 }, { "epoch": 0.05436034458930299, "grad_norm": 1.0619722443303092, "learning_rate": 4.9997436220204455e-06, "loss": 0.2046, "step": 590 }, { "epoch": 0.054452480766573, "grad_norm": 1.1799893495434046, "learning_rate": 4.9997325964550945e-06, "loss": 0.2243, "step": 591 }, { "epoch": 0.054544616943843, "grad_norm": 1.1114971013751254, "learning_rate": 4.999721338789304e-06, "loss": 0.2069, "step": 592 }, { "epoch": 0.054636753121113005, "grad_norm": 1.094133912064876, "learning_rate": 4.999709849024118e-06, "loss": 0.2018, "step": 593 }, { "epoch": 0.05472888929838301, "grad_norm": 1.0666830212123013, "learning_rate": 4.999698127160604e-06, "loss": 0.1898, "step": 594 }, { "epoch": 0.05482102547565301, "grad_norm": 1.1414568181365667, "learning_rate": 4.999686173199849e-06, "loss": 0.2035, "step": 595 }, { "epoch": 0.05491316165292302, "grad_norm": 1.1433975732381854, "learning_rate": 4.999673987142964e-06, "loss": 0.2044, "step": 596 }, { "epoch": 0.05500529783019303, "grad_norm": 1.2889576924059074, "learning_rate": 4.999661568991081e-06, "loss": 0.2042, "step": 597 }, { "epoch": 0.05509743400746303, "grad_norm": 1.2353072072103293, "learning_rate": 4.999648918745352e-06, "loss": 0.2115, "step": 598 }, { "epoch": 0.055189570184733035, "grad_norm": 1.2571958587328962, "learning_rate": 4.999636036406951e-06, "loss": 0.2169, "step": 599 }, { "epoch": 0.05528170636200304, "grad_norm": 1.46152140513451, "learning_rate": 4.999622921977076e-06, "loss": 0.2131, "step": 600 }, { "epoch": 0.05537384253927304, "grad_norm": 1.1820367409058008, "learning_rate": 4.999609575456944e-06, "loss": 0.1844, "step": 601 }, { "epoch": 0.05546597871654305, "grad_norm": 1.1737151850144656, "learning_rate": 4.9995959968477926e-06, "loss": 0.2256, "step": 602 }, { "epoch": 0.05555811489381306, "grad_norm": 1.1214004446252206, "learning_rate": 4.9995821861508844e-06, "loss": 0.1867, "step": 603 }, { "epoch": 0.05565025107108306, "grad_norm": 1.1260505778426881, "learning_rate": 4.999568143367501e-06, "loss": 0.1964, "step": 604 }, { "epoch": 0.055742387248353065, "grad_norm": 1.167666210113292, "learning_rate": 4.999553868498948e-06, "loss": 0.2115, "step": 605 }, { "epoch": 0.05583452342562307, "grad_norm": 1.1045012954183473, "learning_rate": 4.999539361546547e-06, "loss": 0.1758, "step": 606 }, { "epoch": 0.05592665960289308, "grad_norm": 1.233219811553698, "learning_rate": 4.999524622511649e-06, "loss": 0.2164, "step": 607 }, { "epoch": 0.05601879578016308, "grad_norm": 1.2706612209541799, "learning_rate": 4.99950965139562e-06, "loss": 0.2008, "step": 608 }, { "epoch": 0.05611093195743309, "grad_norm": 1.2646084508514779, "learning_rate": 4.999494448199851e-06, "loss": 0.2092, "step": 609 }, { "epoch": 0.05620306813470309, "grad_norm": 1.149501357551309, "learning_rate": 4.9994790129257535e-06, "loss": 0.1984, "step": 610 }, { "epoch": 0.056295204311973095, "grad_norm": 1.3291468516688794, "learning_rate": 4.999463345574761e-06, "loss": 0.2162, "step": 611 }, { "epoch": 0.0563873404892431, "grad_norm": 1.205429054407277, "learning_rate": 4.999447446148328e-06, "loss": 0.2137, "step": 612 }, { "epoch": 0.05647947666651311, "grad_norm": 1.2381435162688017, "learning_rate": 4.999431314647929e-06, "loss": 0.2129, "step": 613 }, { "epoch": 0.05657161284378311, "grad_norm": 1.2673565020592805, "learning_rate": 4.999414951075065e-06, "loss": 0.2007, "step": 614 }, { "epoch": 0.05666374902105312, "grad_norm": 1.1037792471970673, "learning_rate": 4.999398355431253e-06, "loss": 0.2108, "step": 615 }, { "epoch": 0.05675588519832312, "grad_norm": 1.16253375115935, "learning_rate": 4.999381527718036e-06, "loss": 0.2098, "step": 616 }, { "epoch": 0.056848021375593125, "grad_norm": 1.1401961391380055, "learning_rate": 4.999364467936974e-06, "loss": 0.2076, "step": 617 }, { "epoch": 0.05694015755286313, "grad_norm": 1.070625343707576, "learning_rate": 4.999347176089653e-06, "loss": 0.1909, "step": 618 }, { "epoch": 0.05703229373013314, "grad_norm": 1.1323161172935006, "learning_rate": 4.999329652177677e-06, "loss": 0.2081, "step": 619 }, { "epoch": 0.05712442990740314, "grad_norm": 1.1839375977802664, "learning_rate": 4.9993118962026735e-06, "loss": 0.2152, "step": 620 }, { "epoch": 0.05721656608467315, "grad_norm": 1.1346559448427023, "learning_rate": 4.999293908166292e-06, "loss": 0.1946, "step": 621 }, { "epoch": 0.05730870226194315, "grad_norm": 1.2069917410518431, "learning_rate": 4.999275688070202e-06, "loss": 0.1944, "step": 622 }, { "epoch": 0.057400838439213155, "grad_norm": 1.1793130901944762, "learning_rate": 4.999257235916096e-06, "loss": 0.2065, "step": 623 }, { "epoch": 0.057492974616483165, "grad_norm": 1.1633227618690698, "learning_rate": 4.999238551705686e-06, "loss": 0.1944, "step": 624 }, { "epoch": 0.05758511079375317, "grad_norm": 1.173343604205386, "learning_rate": 4.9992196354407075e-06, "loss": 0.2122, "step": 625 }, { "epoch": 0.05767724697102317, "grad_norm": 1.1303138827921249, "learning_rate": 4.999200487122917e-06, "loss": 0.2187, "step": 626 }, { "epoch": 0.05776938314829318, "grad_norm": 1.0398772349211105, "learning_rate": 4.999181106754093e-06, "loss": 0.1956, "step": 627 }, { "epoch": 0.05786151932556318, "grad_norm": 1.1638532808388222, "learning_rate": 4.999161494336033e-06, "loss": 0.1927, "step": 628 }, { "epoch": 0.057953655502833185, "grad_norm": 1.108151966330762, "learning_rate": 4.99914164987056e-06, "loss": 0.1986, "step": 629 }, { "epoch": 0.058045791680103195, "grad_norm": 1.183480026573518, "learning_rate": 4.999121573359516e-06, "loss": 0.2119, "step": 630 }, { "epoch": 0.0581379278573732, "grad_norm": 1.1555341737613782, "learning_rate": 4.999101264804765e-06, "loss": 0.1911, "step": 631 }, { "epoch": 0.0582300640346432, "grad_norm": 1.1980595228638002, "learning_rate": 4.9990807242081915e-06, "loss": 0.2053, "step": 632 }, { "epoch": 0.05832220021191321, "grad_norm": 1.3207559140555372, "learning_rate": 4.999059951571705e-06, "loss": 0.2119, "step": 633 }, { "epoch": 0.05841433638918321, "grad_norm": 1.2652476003839102, "learning_rate": 4.9990389468972336e-06, "loss": 0.2239, "step": 634 }, { "epoch": 0.058506472566453215, "grad_norm": 1.1645579104689652, "learning_rate": 4.999017710186725e-06, "loss": 0.2233, "step": 635 }, { "epoch": 0.058598608743723225, "grad_norm": 1.188157223333697, "learning_rate": 4.998996241442155e-06, "loss": 0.2056, "step": 636 }, { "epoch": 0.05869074492099323, "grad_norm": 1.174226549897811, "learning_rate": 4.998974540665514e-06, "loss": 0.2078, "step": 637 }, { "epoch": 0.05878288109826323, "grad_norm": 1.026345579666321, "learning_rate": 4.998952607858818e-06, "loss": 0.1947, "step": 638 }, { "epoch": 0.05887501727553324, "grad_norm": 1.3408302898393387, "learning_rate": 4.998930443024103e-06, "loss": 0.1999, "step": 639 }, { "epoch": 0.05896715345280324, "grad_norm": 1.2737103319088758, "learning_rate": 4.9989080461634285e-06, "loss": 0.2139, "step": 640 }, { "epoch": 0.05905928963007325, "grad_norm": 1.013191424805071, "learning_rate": 4.9988854172788725e-06, "loss": 0.1945, "step": 641 }, { "epoch": 0.059151425807343255, "grad_norm": 1.2422989589513913, "learning_rate": 4.998862556372537e-06, "loss": 0.2146, "step": 642 }, { "epoch": 0.05924356198461326, "grad_norm": 1.0916578038400657, "learning_rate": 4.998839463446543e-06, "loss": 0.1872, "step": 643 }, { "epoch": 0.05933569816188326, "grad_norm": 1.2140922793923727, "learning_rate": 4.998816138503038e-06, "loss": 0.2128, "step": 644 }, { "epoch": 0.05942783433915327, "grad_norm": 1.187481640612988, "learning_rate": 4.9987925815441835e-06, "loss": 0.186, "step": 645 }, { "epoch": 0.05951997051642327, "grad_norm": 1.0595639803375902, "learning_rate": 4.99876879257217e-06, "loss": 0.1814, "step": 646 }, { "epoch": 0.05961210669369328, "grad_norm": 1.1918728406860226, "learning_rate": 4.9987447715892046e-06, "loss": 0.2033, "step": 647 }, { "epoch": 0.059704242870963285, "grad_norm": 1.1277418699802502, "learning_rate": 4.998720518597518e-06, "loss": 0.1976, "step": 648 }, { "epoch": 0.05979637904823329, "grad_norm": 1.2389827145068848, "learning_rate": 4.998696033599363e-06, "loss": 0.2214, "step": 649 }, { "epoch": 0.05988851522550329, "grad_norm": 1.1715668898340321, "learning_rate": 4.998671316597012e-06, "loss": 0.1957, "step": 650 }, { "epoch": 0.0599806514027733, "grad_norm": 1.1157040510100455, "learning_rate": 4.998646367592761e-06, "loss": 0.1977, "step": 651 }, { "epoch": 0.0600727875800433, "grad_norm": 1.2964622196527131, "learning_rate": 4.998621186588925e-06, "loss": 0.2137, "step": 652 }, { "epoch": 0.06016492375731331, "grad_norm": 1.1874309992271197, "learning_rate": 4.998595773587844e-06, "loss": 0.198, "step": 653 }, { "epoch": 0.060257059934583315, "grad_norm": 1.0821447628825356, "learning_rate": 4.998570128591875e-06, "loss": 0.1983, "step": 654 }, { "epoch": 0.06034919611185332, "grad_norm": 1.1274894688033879, "learning_rate": 4.998544251603402e-06, "loss": 0.197, "step": 655 }, { "epoch": 0.06044133228912332, "grad_norm": 1.2286272649030554, "learning_rate": 4.998518142624826e-06, "loss": 0.2165, "step": 656 }, { "epoch": 0.06053346846639333, "grad_norm": 1.0008382256801454, "learning_rate": 4.998491801658571e-06, "loss": 0.1782, "step": 657 }, { "epoch": 0.06062560464366334, "grad_norm": 1.1573290350734389, "learning_rate": 4.998465228707084e-06, "loss": 0.1976, "step": 658 }, { "epoch": 0.06071774082093334, "grad_norm": 1.1294960460081294, "learning_rate": 4.998438423772831e-06, "loss": 0.2195, "step": 659 }, { "epoch": 0.060809876998203345, "grad_norm": 1.154289522109689, "learning_rate": 4.998411386858303e-06, "loss": 0.2114, "step": 660 }, { "epoch": 0.06090201317547335, "grad_norm": 1.0669414151184684, "learning_rate": 4.998384117966007e-06, "loss": 0.1967, "step": 661 }, { "epoch": 0.06099414935274335, "grad_norm": 1.1281826805262178, "learning_rate": 4.998356617098478e-06, "loss": 0.2076, "step": 662 }, { "epoch": 0.06108628553001336, "grad_norm": 1.114308852904428, "learning_rate": 4.9983288842582665e-06, "loss": 0.2047, "step": 663 }, { "epoch": 0.06117842170728337, "grad_norm": 1.2071437766523476, "learning_rate": 4.9983009194479505e-06, "loss": 0.2026, "step": 664 }, { "epoch": 0.06127055788455337, "grad_norm": 1.1827937087808504, "learning_rate": 4.998272722670126e-06, "loss": 0.2008, "step": 665 }, { "epoch": 0.061362694061823375, "grad_norm": 1.0733621691103314, "learning_rate": 4.998244293927409e-06, "loss": 0.1813, "step": 666 }, { "epoch": 0.06145483023909338, "grad_norm": 1.1788970733123387, "learning_rate": 4.998215633222441e-06, "loss": 0.219, "step": 667 }, { "epoch": 0.06154696641636338, "grad_norm": 1.2172001325394024, "learning_rate": 4.998186740557882e-06, "loss": 0.1977, "step": 668 }, { "epoch": 0.061639102593633394, "grad_norm": 1.1195473427959475, "learning_rate": 4.998157615936416e-06, "loss": 0.1914, "step": 669 }, { "epoch": 0.0617312387709034, "grad_norm": 1.1674007562772488, "learning_rate": 4.998128259360747e-06, "loss": 0.2087, "step": 670 }, { "epoch": 0.0618233749481734, "grad_norm": 1.212028870572152, "learning_rate": 4.998098670833599e-06, "loss": 0.211, "step": 671 }, { "epoch": 0.061915511125443405, "grad_norm": 1.1367268757070708, "learning_rate": 4.998068850357721e-06, "loss": 0.1912, "step": 672 }, { "epoch": 0.06200764730271341, "grad_norm": 1.08789125897739, "learning_rate": 4.998038797935882e-06, "loss": 0.1952, "step": 673 }, { "epoch": 0.06209978347998341, "grad_norm": 1.1357554005551076, "learning_rate": 4.9980085135708715e-06, "loss": 0.2017, "step": 674 }, { "epoch": 0.062191919657253424, "grad_norm": 1.14464491950595, "learning_rate": 4.997977997265501e-06, "loss": 0.2049, "step": 675 }, { "epoch": 0.06228405583452343, "grad_norm": 1.0226569388533309, "learning_rate": 4.997947249022605e-06, "loss": 0.1861, "step": 676 }, { "epoch": 0.06237619201179343, "grad_norm": 1.12500761449028, "learning_rate": 4.997916268845038e-06, "loss": 0.1935, "step": 677 }, { "epoch": 0.062468328189063435, "grad_norm": 1.0950565167941166, "learning_rate": 4.997885056735677e-06, "loss": 0.1959, "step": 678 }, { "epoch": 0.06256046436633345, "grad_norm": 1.1475008577190142, "learning_rate": 4.99785361269742e-06, "loss": 0.1987, "step": 679 }, { "epoch": 0.06265260054360344, "grad_norm": 1.153601965067529, "learning_rate": 4.9978219367331856e-06, "loss": 0.2098, "step": 680 }, { "epoch": 0.06274473672087345, "grad_norm": 1.1044287968459916, "learning_rate": 4.997790028845916e-06, "loss": 0.2078, "step": 681 }, { "epoch": 0.06283687289814345, "grad_norm": 1.087079920318585, "learning_rate": 4.997757889038573e-06, "loss": 0.2082, "step": 682 }, { "epoch": 0.06292900907541346, "grad_norm": 1.163440565892733, "learning_rate": 4.9977255173141405e-06, "loss": 0.1917, "step": 683 }, { "epoch": 0.06302114525268347, "grad_norm": 1.0799809277476804, "learning_rate": 4.997692913675626e-06, "loss": 0.2071, "step": 684 }, { "epoch": 0.06311328142995347, "grad_norm": 1.0607317616778018, "learning_rate": 4.997660078126055e-06, "loss": 0.2097, "step": 685 }, { "epoch": 0.06320541760722348, "grad_norm": 1.079311486129121, "learning_rate": 4.997627010668477e-06, "loss": 0.2119, "step": 686 }, { "epoch": 0.06329755378449348, "grad_norm": 1.0586204104270576, "learning_rate": 4.997593711305963e-06, "loss": 0.1931, "step": 687 }, { "epoch": 0.06338968996176349, "grad_norm": 1.3372536926610412, "learning_rate": 4.997560180041604e-06, "loss": 0.2016, "step": 688 }, { "epoch": 0.0634818261390335, "grad_norm": 1.026145931825911, "learning_rate": 4.997526416878513e-06, "loss": 0.1991, "step": 689 }, { "epoch": 0.0635739623163035, "grad_norm": 1.0825949947992264, "learning_rate": 4.997492421819825e-06, "loss": 0.1901, "step": 690 }, { "epoch": 0.0636660984935735, "grad_norm": 1.1104887396834857, "learning_rate": 4.997458194868697e-06, "loss": 0.1964, "step": 691 }, { "epoch": 0.0637582346708435, "grad_norm": 1.1414256643477074, "learning_rate": 4.997423736028308e-06, "loss": 0.1942, "step": 692 }, { "epoch": 0.06385037084811351, "grad_norm": 1.0428539165758621, "learning_rate": 4.997389045301856e-06, "loss": 0.1803, "step": 693 }, { "epoch": 0.06394250702538351, "grad_norm": 1.1658483507123416, "learning_rate": 4.997354122692561e-06, "loss": 0.2028, "step": 694 }, { "epoch": 0.06403464320265352, "grad_norm": 0.9640305471456437, "learning_rate": 4.997318968203668e-06, "loss": 0.1639, "step": 695 }, { "epoch": 0.06412677937992353, "grad_norm": 0.9944000405107695, "learning_rate": 4.99728358183844e-06, "loss": 0.1952, "step": 696 }, { "epoch": 0.06421891555719353, "grad_norm": 1.0588180086567232, "learning_rate": 4.9972479636001625e-06, "loss": 0.1902, "step": 697 }, { "epoch": 0.06431105173446354, "grad_norm": 1.1141307683264974, "learning_rate": 4.9972121134921435e-06, "loss": 0.1993, "step": 698 }, { "epoch": 0.06440318791173354, "grad_norm": 1.2576518697394798, "learning_rate": 4.99717603151771e-06, "loss": 0.1968, "step": 699 }, { "epoch": 0.06449532408900355, "grad_norm": 1.0725923379796012, "learning_rate": 4.997139717680214e-06, "loss": 0.1962, "step": 700 }, { "epoch": 0.06458746026627356, "grad_norm": 1.1265762810463271, "learning_rate": 4.9971031719830255e-06, "loss": 0.2103, "step": 701 }, { "epoch": 0.06467959644354355, "grad_norm": 1.0764056898213448, "learning_rate": 4.9970663944295396e-06, "loss": 0.1929, "step": 702 }, { "epoch": 0.06477173262081357, "grad_norm": 1.1443294790794694, "learning_rate": 4.99702938502317e-06, "loss": 0.2004, "step": 703 }, { "epoch": 0.06486386879808356, "grad_norm": 1.1556168002401073, "learning_rate": 4.996992143767353e-06, "loss": 0.1955, "step": 704 }, { "epoch": 0.06495600497535357, "grad_norm": 1.1330344957803058, "learning_rate": 4.996954670665547e-06, "loss": 0.1971, "step": 705 }, { "epoch": 0.06504814115262358, "grad_norm": 1.1245253883139057, "learning_rate": 4.996916965721232e-06, "loss": 0.2186, "step": 706 }, { "epoch": 0.06514027732989358, "grad_norm": 1.1056427514136586, "learning_rate": 4.996879028937906e-06, "loss": 0.2043, "step": 707 }, { "epoch": 0.06523241350716359, "grad_norm": 1.1164453535256291, "learning_rate": 4.996840860319094e-06, "loss": 0.2042, "step": 708 }, { "epoch": 0.06532454968443359, "grad_norm": 1.0858019482374586, "learning_rate": 4.996802459868341e-06, "loss": 0.1927, "step": 709 }, { "epoch": 0.0654166858617036, "grad_norm": 1.1400184015748671, "learning_rate": 4.996763827589209e-06, "loss": 0.2088, "step": 710 }, { "epoch": 0.0655088220389736, "grad_norm": 1.2412575997017532, "learning_rate": 4.996724963485288e-06, "loss": 0.2067, "step": 711 }, { "epoch": 0.06560095821624361, "grad_norm": 1.102175865083544, "learning_rate": 4.996685867560186e-06, "loss": 0.2011, "step": 712 }, { "epoch": 0.06569309439351362, "grad_norm": 1.1335302279554809, "learning_rate": 4.996646539817531e-06, "loss": 0.2104, "step": 713 }, { "epoch": 0.06578523057078361, "grad_norm": 1.0993877114695203, "learning_rate": 4.996606980260977e-06, "loss": 0.2188, "step": 714 }, { "epoch": 0.06587736674805363, "grad_norm": 1.0510165045528403, "learning_rate": 4.9965671888941955e-06, "loss": 0.1866, "step": 715 }, { "epoch": 0.06596950292532362, "grad_norm": 1.1947642401183298, "learning_rate": 4.996527165720882e-06, "loss": 0.2027, "step": 716 }, { "epoch": 0.06606163910259363, "grad_norm": 1.1376782085741068, "learning_rate": 4.996486910744753e-06, "loss": 0.184, "step": 717 }, { "epoch": 0.06615377527986364, "grad_norm": 0.9570102832377015, "learning_rate": 4.996446423969546e-06, "loss": 0.1791, "step": 718 }, { "epoch": 0.06624591145713364, "grad_norm": 1.1353836050535746, "learning_rate": 4.9964057053990186e-06, "loss": 0.2073, "step": 719 }, { "epoch": 0.06633804763440365, "grad_norm": 1.2154219117874654, "learning_rate": 4.996364755036954e-06, "loss": 0.201, "step": 720 }, { "epoch": 0.06643018381167365, "grad_norm": 1.0109850083514942, "learning_rate": 4.996323572887153e-06, "loss": 0.1882, "step": 721 }, { "epoch": 0.06652231998894366, "grad_norm": 1.101603698240089, "learning_rate": 4.996282158953439e-06, "loss": 0.1845, "step": 722 }, { "epoch": 0.06661445616621367, "grad_norm": 1.1990367891628382, "learning_rate": 4.996240513239658e-06, "loss": 0.2094, "step": 723 }, { "epoch": 0.06670659234348367, "grad_norm": 1.1531297226861148, "learning_rate": 4.9961986357496775e-06, "loss": 0.2001, "step": 724 }, { "epoch": 0.06679872852075368, "grad_norm": 1.1181593399001286, "learning_rate": 4.996156526487383e-06, "loss": 0.1898, "step": 725 }, { "epoch": 0.06689086469802367, "grad_norm": 1.1830597611536493, "learning_rate": 4.996114185456688e-06, "loss": 0.1877, "step": 726 }, { "epoch": 0.06698300087529369, "grad_norm": 1.1232584394160454, "learning_rate": 4.996071612661523e-06, "loss": 0.1953, "step": 727 }, { "epoch": 0.06707513705256368, "grad_norm": 1.0889290193640835, "learning_rate": 4.996028808105838e-06, "loss": 0.2108, "step": 728 }, { "epoch": 0.0671672732298337, "grad_norm": 1.0995182161943409, "learning_rate": 4.995985771793611e-06, "loss": 0.1885, "step": 729 }, { "epoch": 0.0672594094071037, "grad_norm": 0.991728475400526, "learning_rate": 4.995942503728837e-06, "loss": 0.1945, "step": 730 }, { "epoch": 0.0673515455843737, "grad_norm": 1.1414476841467807, "learning_rate": 4.995899003915532e-06, "loss": 0.2073, "step": 731 }, { "epoch": 0.06744368176164371, "grad_norm": 1.1816046268935128, "learning_rate": 4.995855272357736e-06, "loss": 0.2024, "step": 732 }, { "epoch": 0.06753581793891371, "grad_norm": 1.0793429170100068, "learning_rate": 4.995811309059509e-06, "loss": 0.2027, "step": 733 }, { "epoch": 0.06762795411618372, "grad_norm": 1.0463184895901732, "learning_rate": 4.995767114024934e-06, "loss": 0.1931, "step": 734 }, { "epoch": 0.06772009029345373, "grad_norm": 1.11156175838415, "learning_rate": 4.995722687258113e-06, "loss": 0.2125, "step": 735 }, { "epoch": 0.06781222647072373, "grad_norm": 1.1620522039851269, "learning_rate": 4.995678028763172e-06, "loss": 0.1767, "step": 736 }, { "epoch": 0.06790436264799374, "grad_norm": 1.1748972006716736, "learning_rate": 4.995633138544258e-06, "loss": 0.1896, "step": 737 }, { "epoch": 0.06799649882526373, "grad_norm": 1.1248616510945046, "learning_rate": 4.995588016605539e-06, "loss": 0.1975, "step": 738 }, { "epoch": 0.06808863500253375, "grad_norm": 1.1490092157815965, "learning_rate": 4.995542662951203e-06, "loss": 0.2018, "step": 739 }, { "epoch": 0.06818077117980376, "grad_norm": 1.29389132222856, "learning_rate": 4.9954970775854626e-06, "loss": 0.2105, "step": 740 }, { "epoch": 0.06827290735707375, "grad_norm": 1.0475002924733825, "learning_rate": 4.99545126051255e-06, "loss": 0.1922, "step": 741 }, { "epoch": 0.06836504353434376, "grad_norm": 1.1500051467045975, "learning_rate": 4.99540521173672e-06, "loss": 0.2023, "step": 742 }, { "epoch": 0.06845717971161376, "grad_norm": 1.15475035543741, "learning_rate": 4.995358931262246e-06, "loss": 0.1969, "step": 743 }, { "epoch": 0.06854931588888377, "grad_norm": 1.1034626957034204, "learning_rate": 4.995312419093427e-06, "loss": 0.2191, "step": 744 }, { "epoch": 0.06864145206615377, "grad_norm": 1.1053020130496054, "learning_rate": 4.995265675234583e-06, "loss": 0.1987, "step": 745 }, { "epoch": 0.06873358824342378, "grad_norm": 1.1992876381966995, "learning_rate": 4.995218699690051e-06, "loss": 0.209, "step": 746 }, { "epoch": 0.06882572442069379, "grad_norm": 1.082494388880573, "learning_rate": 4.995171492464195e-06, "loss": 0.1994, "step": 747 }, { "epoch": 0.06891786059796379, "grad_norm": 1.129990460435263, "learning_rate": 4.995124053561396e-06, "loss": 0.2027, "step": 748 }, { "epoch": 0.0690099967752338, "grad_norm": 1.0028326159846859, "learning_rate": 4.9950763829860615e-06, "loss": 0.1868, "step": 749 }, { "epoch": 0.0691021329525038, "grad_norm": 1.0755633729487084, "learning_rate": 4.995028480742616e-06, "loss": 0.1966, "step": 750 }, { "epoch": 0.0691942691297738, "grad_norm": 1.1063468461219594, "learning_rate": 4.9949803468355075e-06, "loss": 0.1893, "step": 751 }, { "epoch": 0.06928640530704382, "grad_norm": 1.048084914654042, "learning_rate": 4.994931981269206e-06, "loss": 0.2112, "step": 752 }, { "epoch": 0.06937854148431381, "grad_norm": 1.0230459409513677, "learning_rate": 4.9948833840482e-06, "loss": 0.1965, "step": 753 }, { "epoch": 0.06947067766158382, "grad_norm": 1.0632313033802272, "learning_rate": 4.994834555177004e-06, "loss": 0.2011, "step": 754 }, { "epoch": 0.06956281383885382, "grad_norm": 1.0589252280154375, "learning_rate": 4.994785494660151e-06, "loss": 0.2018, "step": 755 }, { "epoch": 0.06965495001612383, "grad_norm": 1.1755938840081506, "learning_rate": 4.994736202502196e-06, "loss": 0.2266, "step": 756 }, { "epoch": 0.06974708619339384, "grad_norm": 1.0493776400120107, "learning_rate": 4.994686678707716e-06, "loss": 0.2197, "step": 757 }, { "epoch": 0.06983922237066384, "grad_norm": 0.9856328790985965, "learning_rate": 4.9946369232813104e-06, "loss": 0.1926, "step": 758 }, { "epoch": 0.06993135854793385, "grad_norm": 0.9684741371548949, "learning_rate": 4.994586936227598e-06, "loss": 0.1856, "step": 759 }, { "epoch": 0.07002349472520385, "grad_norm": 1.0235610225225114, "learning_rate": 4.99453671755122e-06, "loss": 0.1985, "step": 760 }, { "epoch": 0.07011563090247386, "grad_norm": 1.0056452861965137, "learning_rate": 4.994486267256839e-06, "loss": 0.2043, "step": 761 }, { "epoch": 0.07020776707974385, "grad_norm": 0.9480271147994421, "learning_rate": 4.994435585349139e-06, "loss": 0.1854, "step": 762 }, { "epoch": 0.07029990325701387, "grad_norm": 1.073899838330998, "learning_rate": 4.994384671832827e-06, "loss": 0.194, "step": 763 }, { "epoch": 0.07039203943428388, "grad_norm": 1.1042378147261156, "learning_rate": 4.994333526712629e-06, "loss": 0.2038, "step": 764 }, { "epoch": 0.07048417561155387, "grad_norm": 1.0929836003453606, "learning_rate": 4.994282149993296e-06, "loss": 0.1941, "step": 765 }, { "epoch": 0.07057631178882388, "grad_norm": 1.002112688526324, "learning_rate": 4.994230541679596e-06, "loss": 0.1952, "step": 766 }, { "epoch": 0.07066844796609388, "grad_norm": 1.0405743611774914, "learning_rate": 4.994178701776322e-06, "loss": 0.1973, "step": 767 }, { "epoch": 0.07076058414336389, "grad_norm": 1.0611508046462508, "learning_rate": 4.994126630288287e-06, "loss": 0.1998, "step": 768 }, { "epoch": 0.0708527203206339, "grad_norm": 1.0044887753865614, "learning_rate": 4.994074327220326e-06, "loss": 0.1912, "step": 769 }, { "epoch": 0.0709448564979039, "grad_norm": 1.0125081975049615, "learning_rate": 4.994021792577296e-06, "loss": 0.1996, "step": 770 }, { "epoch": 0.07103699267517391, "grad_norm": 1.1171803446967403, "learning_rate": 4.993969026364074e-06, "loss": 0.2068, "step": 771 }, { "epoch": 0.0711291288524439, "grad_norm": 1.148820539107491, "learning_rate": 4.993916028585559e-06, "loss": 0.198, "step": 772 }, { "epoch": 0.07122126502971392, "grad_norm": 1.0778949515489327, "learning_rate": 4.993862799246672e-06, "loss": 0.1984, "step": 773 }, { "epoch": 0.07131340120698393, "grad_norm": 1.1483231088801946, "learning_rate": 4.9938093383523565e-06, "loss": 0.2117, "step": 774 }, { "epoch": 0.07140553738425393, "grad_norm": 1.1566007468065098, "learning_rate": 4.993755645907575e-06, "loss": 0.2116, "step": 775 }, { "epoch": 0.07149767356152394, "grad_norm": 1.0319713754984687, "learning_rate": 4.993701721917314e-06, "loss": 0.2012, "step": 776 }, { "epoch": 0.07158980973879393, "grad_norm": 1.077311245060311, "learning_rate": 4.993647566386579e-06, "loss": 0.2059, "step": 777 }, { "epoch": 0.07168194591606394, "grad_norm": 1.1405163265066733, "learning_rate": 4.993593179320399e-06, "loss": 0.1983, "step": 778 }, { "epoch": 0.07177408209333394, "grad_norm": 1.0767703872817018, "learning_rate": 4.993538560723824e-06, "loss": 0.2079, "step": 779 }, { "epoch": 0.07186621827060395, "grad_norm": 1.2227974845354599, "learning_rate": 4.993483710601926e-06, "loss": 0.2136, "step": 780 }, { "epoch": 0.07195835444787396, "grad_norm": 1.0541963353313086, "learning_rate": 4.993428628959796e-06, "loss": 0.1913, "step": 781 }, { "epoch": 0.07205049062514396, "grad_norm": 1.0513503112741112, "learning_rate": 4.993373315802551e-06, "loss": 0.201, "step": 782 }, { "epoch": 0.07214262680241397, "grad_norm": 1.0201367211302232, "learning_rate": 4.993317771135324e-06, "loss": 0.1903, "step": 783 }, { "epoch": 0.07223476297968397, "grad_norm": 1.1327814545653032, "learning_rate": 4.993261994963275e-06, "loss": 0.2212, "step": 784 }, { "epoch": 0.07232689915695398, "grad_norm": 1.0740213154007985, "learning_rate": 4.99320598729158e-06, "loss": 0.1948, "step": 785 }, { "epoch": 0.07241903533422399, "grad_norm": 0.957971255991909, "learning_rate": 4.993149748125441e-06, "loss": 0.1928, "step": 786 }, { "epoch": 0.07251117151149399, "grad_norm": 1.0166766681701447, "learning_rate": 4.99309327747008e-06, "loss": 0.1938, "step": 787 }, { "epoch": 0.072603307688764, "grad_norm": 1.0764254895760437, "learning_rate": 4.99303657533074e-06, "loss": 0.2087, "step": 788 }, { "epoch": 0.07269544386603399, "grad_norm": 1.0328362715853223, "learning_rate": 4.9929796417126855e-06, "loss": 0.2004, "step": 789 }, { "epoch": 0.072787580043304, "grad_norm": 1.0993910395671507, "learning_rate": 4.992922476621203e-06, "loss": 0.1968, "step": 790 }, { "epoch": 0.07287971622057401, "grad_norm": 1.174010366680361, "learning_rate": 4.992865080061599e-06, "loss": 0.2078, "step": 791 }, { "epoch": 0.07297185239784401, "grad_norm": 1.0829664383846667, "learning_rate": 4.992807452039206e-06, "loss": 0.2075, "step": 792 }, { "epoch": 0.07306398857511402, "grad_norm": 1.0362818708953512, "learning_rate": 4.992749592559372e-06, "loss": 0.2064, "step": 793 }, { "epoch": 0.07315612475238402, "grad_norm": 1.1051992276067801, "learning_rate": 4.99269150162747e-06, "loss": 0.2035, "step": 794 }, { "epoch": 0.07324826092965403, "grad_norm": 1.0070264686302375, "learning_rate": 4.9926331792488935e-06, "loss": 0.2013, "step": 795 }, { "epoch": 0.07334039710692403, "grad_norm": 1.0971405705997572, "learning_rate": 4.992574625429059e-06, "loss": 0.1991, "step": 796 }, { "epoch": 0.07343253328419404, "grad_norm": 1.0608316209147346, "learning_rate": 4.992515840173401e-06, "loss": 0.205, "step": 797 }, { "epoch": 0.07352466946146405, "grad_norm": 1.0833481979795825, "learning_rate": 4.992456823487381e-06, "loss": 0.2123, "step": 798 }, { "epoch": 0.07361680563873405, "grad_norm": 1.130384094246514, "learning_rate": 4.992397575376474e-06, "loss": 0.206, "step": 799 }, { "epoch": 0.07370894181600406, "grad_norm": 1.032883544230814, "learning_rate": 4.992338095846185e-06, "loss": 0.1954, "step": 800 }, { "epoch": 0.07380107799327405, "grad_norm": 1.0325683668694534, "learning_rate": 4.992278384902036e-06, "loss": 0.1823, "step": 801 }, { "epoch": 0.07389321417054406, "grad_norm": 1.0856330855658867, "learning_rate": 4.992218442549571e-06, "loss": 0.1972, "step": 802 }, { "epoch": 0.07398535034781407, "grad_norm": 0.9419401027800425, "learning_rate": 4.992158268794355e-06, "loss": 0.174, "step": 803 }, { "epoch": 0.07407748652508407, "grad_norm": 0.9955805004494609, "learning_rate": 4.992097863641975e-06, "loss": 0.1921, "step": 804 }, { "epoch": 0.07416962270235408, "grad_norm": 1.0997153728401974, "learning_rate": 4.992037227098041e-06, "loss": 0.2003, "step": 805 }, { "epoch": 0.07426175887962408, "grad_norm": 1.1910036898293976, "learning_rate": 4.991976359168182e-06, "loss": 0.2154, "step": 806 }, { "epoch": 0.07435389505689409, "grad_norm": 1.1056827367049222, "learning_rate": 4.99191525985805e-06, "loss": 0.2022, "step": 807 }, { "epoch": 0.0744460312341641, "grad_norm": 1.042144172015194, "learning_rate": 4.991853929173318e-06, "loss": 0.1988, "step": 808 }, { "epoch": 0.0745381674114341, "grad_norm": 1.01022506354823, "learning_rate": 4.99179236711968e-06, "loss": 0.1876, "step": 809 }, { "epoch": 0.07463030358870411, "grad_norm": 1.0524961526541383, "learning_rate": 4.991730573702852e-06, "loss": 0.1909, "step": 810 }, { "epoch": 0.0747224397659741, "grad_norm": 1.1702706877003066, "learning_rate": 4.991668548928573e-06, "loss": 0.2195, "step": 811 }, { "epoch": 0.07481457594324412, "grad_norm": 1.0918293776434618, "learning_rate": 4.991606292802601e-06, "loss": 0.2199, "step": 812 }, { "epoch": 0.07490671212051413, "grad_norm": 1.0721208493063734, "learning_rate": 4.991543805330716e-06, "loss": 0.2144, "step": 813 }, { "epoch": 0.07499884829778412, "grad_norm": 1.0564964966450854, "learning_rate": 4.991481086518721e-06, "loss": 0.1924, "step": 814 }, { "epoch": 0.07509098447505413, "grad_norm": 1.028522715058719, "learning_rate": 4.9914181363724394e-06, "loss": 0.1979, "step": 815 }, { "epoch": 0.07518312065232413, "grad_norm": 1.0724064172531385, "learning_rate": 4.991354954897715e-06, "loss": 0.2131, "step": 816 }, { "epoch": 0.07527525682959414, "grad_norm": 1.0614207713972115, "learning_rate": 4.991291542100416e-06, "loss": 0.207, "step": 817 }, { "epoch": 0.07536739300686414, "grad_norm": 1.0449337855654208, "learning_rate": 4.991227897986428e-06, "loss": 0.1917, "step": 818 }, { "epoch": 0.07545952918413415, "grad_norm": 1.1357051354151935, "learning_rate": 4.991164022561662e-06, "loss": 0.2016, "step": 819 }, { "epoch": 0.07555166536140416, "grad_norm": 0.9585027095704082, "learning_rate": 4.991099915832048e-06, "loss": 0.1846, "step": 820 }, { "epoch": 0.07564380153867416, "grad_norm": 1.1083315681597967, "learning_rate": 4.9910355778035394e-06, "loss": 0.2127, "step": 821 }, { "epoch": 0.07573593771594417, "grad_norm": 1.1383747660532335, "learning_rate": 4.990971008482109e-06, "loss": 0.1968, "step": 822 }, { "epoch": 0.07582807389321417, "grad_norm": 1.104749280816824, "learning_rate": 4.990906207873753e-06, "loss": 0.1904, "step": 823 }, { "epoch": 0.07592021007048418, "grad_norm": 1.0561974215226457, "learning_rate": 4.990841175984486e-06, "loss": 0.2, "step": 824 }, { "epoch": 0.07601234624775419, "grad_norm": 1.0340894305919612, "learning_rate": 4.9907759128203485e-06, "loss": 0.1958, "step": 825 }, { "epoch": 0.07610448242502418, "grad_norm": 1.020276399091769, "learning_rate": 4.9907104183874e-06, "loss": 0.1939, "step": 826 }, { "epoch": 0.0761966186022942, "grad_norm": 1.074585805099351, "learning_rate": 4.990644692691721e-06, "loss": 0.1983, "step": 827 }, { "epoch": 0.07628875477956419, "grad_norm": 0.9840992754978488, "learning_rate": 4.990578735739413e-06, "loss": 0.1936, "step": 828 }, { "epoch": 0.0763808909568342, "grad_norm": 1.0576207176961585, "learning_rate": 4.990512547536602e-06, "loss": 0.196, "step": 829 }, { "epoch": 0.07647302713410421, "grad_norm": 1.054963331859563, "learning_rate": 4.990446128089434e-06, "loss": 0.1939, "step": 830 }, { "epoch": 0.07656516331137421, "grad_norm": 1.0430434986538253, "learning_rate": 4.990379477404073e-06, "loss": 0.1973, "step": 831 }, { "epoch": 0.07665729948864422, "grad_norm": 1.0974937731254035, "learning_rate": 4.9903125954867114e-06, "loss": 0.2066, "step": 832 }, { "epoch": 0.07674943566591422, "grad_norm": 1.0481385062164033, "learning_rate": 4.990245482343556e-06, "loss": 0.1749, "step": 833 }, { "epoch": 0.07684157184318423, "grad_norm": 1.1591353967604399, "learning_rate": 4.990178137980841e-06, "loss": 0.2042, "step": 834 }, { "epoch": 0.07693370802045423, "grad_norm": 1.020946978751914, "learning_rate": 4.990110562404817e-06, "loss": 0.1887, "step": 835 }, { "epoch": 0.07702584419772424, "grad_norm": 1.0521526449822267, "learning_rate": 4.990042755621759e-06, "loss": 0.1925, "step": 836 }, { "epoch": 0.07711798037499425, "grad_norm": 1.08541448464305, "learning_rate": 4.989974717637963e-06, "loss": 0.1917, "step": 837 }, { "epoch": 0.07721011655226424, "grad_norm": 1.044048482126402, "learning_rate": 4.989906448459748e-06, "loss": 0.2083, "step": 838 }, { "epoch": 0.07730225272953425, "grad_norm": 1.0120160811235484, "learning_rate": 4.98983794809345e-06, "loss": 0.1941, "step": 839 }, { "epoch": 0.07739438890680425, "grad_norm": 1.102690087406534, "learning_rate": 4.989769216545431e-06, "loss": 0.1952, "step": 840 }, { "epoch": 0.07748652508407426, "grad_norm": 1.0589664371165017, "learning_rate": 4.9897002538220715e-06, "loss": 0.1889, "step": 841 }, { "epoch": 0.07757866126134427, "grad_norm": 1.0781668139280136, "learning_rate": 4.989631059929777e-06, "loss": 0.2183, "step": 842 }, { "epoch": 0.07767079743861427, "grad_norm": 1.0272497094730775, "learning_rate": 4.989561634874969e-06, "loss": 0.1995, "step": 843 }, { "epoch": 0.07776293361588428, "grad_norm": 1.1132914451731781, "learning_rate": 4.9894919786640964e-06, "loss": 0.2053, "step": 844 }, { "epoch": 0.07785506979315428, "grad_norm": 1.1159855286710003, "learning_rate": 4.989422091303625e-06, "loss": 0.1962, "step": 845 }, { "epoch": 0.07794720597042429, "grad_norm": 1.164205836975356, "learning_rate": 4.989351972800045e-06, "loss": 0.2039, "step": 846 }, { "epoch": 0.0780393421476943, "grad_norm": 1.0153737620570222, "learning_rate": 4.989281623159866e-06, "loss": 0.205, "step": 847 }, { "epoch": 0.0781314783249643, "grad_norm": 1.1936452816832441, "learning_rate": 4.98921104238962e-06, "loss": 0.2094, "step": 848 }, { "epoch": 0.0782236145022343, "grad_norm": 1.0872486263629397, "learning_rate": 4.989140230495862e-06, "loss": 0.2017, "step": 849 }, { "epoch": 0.0783157506795043, "grad_norm": 1.0430936509820625, "learning_rate": 4.989069187485165e-06, "loss": 0.1959, "step": 850 }, { "epoch": 0.07840788685677431, "grad_norm": 1.043073581421324, "learning_rate": 4.988997913364126e-06, "loss": 0.1943, "step": 851 }, { "epoch": 0.07850002303404431, "grad_norm": 1.142893762388807, "learning_rate": 4.988926408139363e-06, "loss": 0.2164, "step": 852 }, { "epoch": 0.07859215921131432, "grad_norm": 1.1122064536056118, "learning_rate": 4.988854671817516e-06, "loss": 0.2032, "step": 853 }, { "epoch": 0.07868429538858433, "grad_norm": 1.0929495216494962, "learning_rate": 4.988782704405244e-06, "loss": 0.1949, "step": 854 }, { "epoch": 0.07877643156585433, "grad_norm": 1.2464150623403418, "learning_rate": 4.98871050590923e-06, "loss": 0.2328, "step": 855 }, { "epoch": 0.07886856774312434, "grad_norm": 1.0842337596440934, "learning_rate": 4.988638076336178e-06, "loss": 0.2011, "step": 856 }, { "epoch": 0.07896070392039434, "grad_norm": 1.0893100625521832, "learning_rate": 4.988565415692812e-06, "loss": 0.2067, "step": 857 }, { "epoch": 0.07905284009766435, "grad_norm": 0.9557182611203138, "learning_rate": 4.988492523985881e-06, "loss": 0.1788, "step": 858 }, { "epoch": 0.07914497627493436, "grad_norm": 1.0570627168797462, "learning_rate": 4.9884194012221496e-06, "loss": 0.2019, "step": 859 }, { "epoch": 0.07923711245220436, "grad_norm": 1.0179565473648933, "learning_rate": 4.98834604740841e-06, "loss": 0.2019, "step": 860 }, { "epoch": 0.07932924862947437, "grad_norm": 1.0519805334151477, "learning_rate": 4.988272462551471e-06, "loss": 0.204, "step": 861 }, { "epoch": 0.07942138480674436, "grad_norm": 0.9369015774026637, "learning_rate": 4.988198646658167e-06, "loss": 0.1809, "step": 862 }, { "epoch": 0.07951352098401437, "grad_norm": 1.1315810175239667, "learning_rate": 4.988124599735351e-06, "loss": 0.2129, "step": 863 }, { "epoch": 0.07960565716128438, "grad_norm": 1.073675633216396, "learning_rate": 4.988050321789898e-06, "loss": 0.1985, "step": 864 }, { "epoch": 0.07969779333855438, "grad_norm": 0.9730213494505685, "learning_rate": 4.987975812828704e-06, "loss": 0.1924, "step": 865 }, { "epoch": 0.07978992951582439, "grad_norm": 1.0859711498805535, "learning_rate": 4.987901072858689e-06, "loss": 0.1946, "step": 866 }, { "epoch": 0.07988206569309439, "grad_norm": 1.10664046990388, "learning_rate": 4.9878261018867915e-06, "loss": 0.2062, "step": 867 }, { "epoch": 0.0799742018703644, "grad_norm": 1.1293880063200286, "learning_rate": 4.9877508999199724e-06, "loss": 0.215, "step": 868 }, { "epoch": 0.0800663380476344, "grad_norm": 1.0544035282612745, "learning_rate": 4.987675466965215e-06, "loss": 0.1872, "step": 869 }, { "epoch": 0.08015847422490441, "grad_norm": 1.081822317114401, "learning_rate": 4.987599803029522e-06, "loss": 0.2051, "step": 870 }, { "epoch": 0.08025061040217442, "grad_norm": 1.0983981922980988, "learning_rate": 4.98752390811992e-06, "loss": 0.1988, "step": 871 }, { "epoch": 0.08034274657944442, "grad_norm": 1.0687075549507865, "learning_rate": 4.987447782243456e-06, "loss": 0.2103, "step": 872 }, { "epoch": 0.08043488275671443, "grad_norm": 1.129069762940162, "learning_rate": 4.9873714254071966e-06, "loss": 0.2106, "step": 873 }, { "epoch": 0.08052701893398442, "grad_norm": 1.0695215954521389, "learning_rate": 4.987294837618233e-06, "loss": 0.2056, "step": 874 }, { "epoch": 0.08061915511125443, "grad_norm": 1.006571818809714, "learning_rate": 4.987218018883676e-06, "loss": 0.2022, "step": 875 }, { "epoch": 0.08071129128852444, "grad_norm": 1.1207288976979883, "learning_rate": 4.987140969210659e-06, "loss": 0.2053, "step": 876 }, { "epoch": 0.08080342746579444, "grad_norm": 1.1091698949899884, "learning_rate": 4.987063688606335e-06, "loss": 0.2054, "step": 877 }, { "epoch": 0.08089556364306445, "grad_norm": 0.9936982863486912, "learning_rate": 4.98698617707788e-06, "loss": 0.1949, "step": 878 }, { "epoch": 0.08098769982033445, "grad_norm": 1.0218531560234463, "learning_rate": 4.98690843463249e-06, "loss": 0.183, "step": 879 }, { "epoch": 0.08107983599760446, "grad_norm": 1.0482785566667265, "learning_rate": 4.986830461277384e-06, "loss": 0.1962, "step": 880 }, { "epoch": 0.08117197217487447, "grad_norm": 1.0487002736259132, "learning_rate": 4.986752257019804e-06, "loss": 0.2063, "step": 881 }, { "epoch": 0.08126410835214447, "grad_norm": 1.0225704173071417, "learning_rate": 4.9866738218670075e-06, "loss": 0.1873, "step": 882 }, { "epoch": 0.08135624452941448, "grad_norm": 0.9922761716304604, "learning_rate": 4.986595155826279e-06, "loss": 0.1932, "step": 883 }, { "epoch": 0.08144838070668448, "grad_norm": 1.1203291309724843, "learning_rate": 4.986516258904923e-06, "loss": 0.2085, "step": 884 }, { "epoch": 0.08154051688395449, "grad_norm": 1.0137824728220541, "learning_rate": 4.986437131110265e-06, "loss": 0.1957, "step": 885 }, { "epoch": 0.08163265306122448, "grad_norm": 1.078763682867198, "learning_rate": 4.986357772449652e-06, "loss": 0.2051, "step": 886 }, { "epoch": 0.0817247892384945, "grad_norm": 1.1459363420840565, "learning_rate": 4.986278182930452e-06, "loss": 0.2071, "step": 887 }, { "epoch": 0.0818169254157645, "grad_norm": 1.0489858592922743, "learning_rate": 4.986198362560055e-06, "loss": 0.2049, "step": 888 }, { "epoch": 0.0819090615930345, "grad_norm": 1.1867738602471791, "learning_rate": 4.986118311345873e-06, "loss": 0.1922, "step": 889 }, { "epoch": 0.08200119777030451, "grad_norm": 1.0816042554701828, "learning_rate": 4.9860380292953375e-06, "loss": 0.1961, "step": 890 }, { "epoch": 0.08209333394757451, "grad_norm": 1.0776867351148292, "learning_rate": 4.985957516415903e-06, "loss": 0.2077, "step": 891 }, { "epoch": 0.08218547012484452, "grad_norm": 1.0266838629289732, "learning_rate": 4.985876772715047e-06, "loss": 0.1845, "step": 892 }, { "epoch": 0.08227760630211453, "grad_norm": 1.0714186396487406, "learning_rate": 4.985795798200265e-06, "loss": 0.1991, "step": 893 }, { "epoch": 0.08236974247938453, "grad_norm": 1.1565923591963108, "learning_rate": 4.9857145928790745e-06, "loss": 0.2053, "step": 894 }, { "epoch": 0.08246187865665454, "grad_norm": 1.0961658661114462, "learning_rate": 4.9856331567590175e-06, "loss": 0.2075, "step": 895 }, { "epoch": 0.08255401483392454, "grad_norm": 0.9307410162608049, "learning_rate": 4.985551489847654e-06, "loss": 0.1784, "step": 896 }, { "epoch": 0.08264615101119455, "grad_norm": 1.0401378502092977, "learning_rate": 4.985469592152567e-06, "loss": 0.1867, "step": 897 }, { "epoch": 0.08273828718846456, "grad_norm": 1.0339494204766255, "learning_rate": 4.985387463681361e-06, "loss": 0.1986, "step": 898 }, { "epoch": 0.08283042336573455, "grad_norm": 1.1980946192748525, "learning_rate": 4.985305104441661e-06, "loss": 0.2064, "step": 899 }, { "epoch": 0.08292255954300456, "grad_norm": 1.0323921168593682, "learning_rate": 4.9852225144411156e-06, "loss": 0.2084, "step": 900 }, { "epoch": 0.08301469572027456, "grad_norm": 1.0136930453828628, "learning_rate": 4.985139693687392e-06, "loss": 0.1888, "step": 901 }, { "epoch": 0.08310683189754457, "grad_norm": 0.9971484787535447, "learning_rate": 4.985056642188179e-06, "loss": 0.2017, "step": 902 }, { "epoch": 0.08319896807481457, "grad_norm": 0.9555914001671217, "learning_rate": 4.984973359951192e-06, "loss": 0.1815, "step": 903 }, { "epoch": 0.08329110425208458, "grad_norm": 1.0364717190048256, "learning_rate": 4.984889846984159e-06, "loss": 0.188, "step": 904 }, { "epoch": 0.08338324042935459, "grad_norm": 1.005245975401244, "learning_rate": 4.984806103294837e-06, "loss": 0.1874, "step": 905 }, { "epoch": 0.08347537660662459, "grad_norm": 1.0742626967198226, "learning_rate": 4.9847221288910004e-06, "loss": 0.2091, "step": 906 }, { "epoch": 0.0835675127838946, "grad_norm": 1.0572271589829996, "learning_rate": 4.984637923780448e-06, "loss": 0.1896, "step": 907 }, { "epoch": 0.0836596489611646, "grad_norm": 1.072817626504438, "learning_rate": 4.984553487970995e-06, "loss": 0.2027, "step": 908 }, { "epoch": 0.0837517851384346, "grad_norm": 1.1697331427088027, "learning_rate": 4.984468821470485e-06, "loss": 0.2009, "step": 909 }, { "epoch": 0.08384392131570462, "grad_norm": 1.099615722951779, "learning_rate": 4.984383924286776e-06, "loss": 0.2073, "step": 910 }, { "epoch": 0.08393605749297461, "grad_norm": 1.1067206700378336, "learning_rate": 4.984298796427754e-06, "loss": 0.2053, "step": 911 }, { "epoch": 0.08402819367024462, "grad_norm": 1.1386230456096778, "learning_rate": 4.984213437901321e-06, "loss": 0.2053, "step": 912 }, { "epoch": 0.08412032984751462, "grad_norm": 1.051467856985353, "learning_rate": 4.984127848715402e-06, "loss": 0.2002, "step": 913 }, { "epoch": 0.08421246602478463, "grad_norm": 1.0159847224874938, "learning_rate": 4.984042028877945e-06, "loss": 0.1739, "step": 914 }, { "epoch": 0.08430460220205464, "grad_norm": 1.1102387510829792, "learning_rate": 4.983955978396919e-06, "loss": 0.1952, "step": 915 }, { "epoch": 0.08439673837932464, "grad_norm": 1.044485002079122, "learning_rate": 4.983869697280312e-06, "loss": 0.2054, "step": 916 }, { "epoch": 0.08448887455659465, "grad_norm": 1.1181813544003023, "learning_rate": 4.983783185536137e-06, "loss": 0.1931, "step": 917 }, { "epoch": 0.08458101073386465, "grad_norm": 1.0630599308615696, "learning_rate": 4.983696443172426e-06, "loss": 0.1876, "step": 918 }, { "epoch": 0.08467314691113466, "grad_norm": 0.9944252347497624, "learning_rate": 4.983609470197233e-06, "loss": 0.1866, "step": 919 }, { "epoch": 0.08476528308840466, "grad_norm": 1.0413027140377702, "learning_rate": 4.983522266618633e-06, "loss": 0.196, "step": 920 }, { "epoch": 0.08485741926567467, "grad_norm": 1.1229738346239124, "learning_rate": 4.983434832444724e-06, "loss": 0.1916, "step": 921 }, { "epoch": 0.08494955544294468, "grad_norm": 1.033570992977324, "learning_rate": 4.983347167683623e-06, "loss": 0.1942, "step": 922 }, { "epoch": 0.08504169162021467, "grad_norm": 1.0615629937133617, "learning_rate": 4.98325927234347e-06, "loss": 0.1998, "step": 923 }, { "epoch": 0.08513382779748468, "grad_norm": 1.0603013903796452, "learning_rate": 4.983171146432427e-06, "loss": 0.1958, "step": 924 }, { "epoch": 0.08522596397475468, "grad_norm": 1.0836485017893724, "learning_rate": 4.983082789958675e-06, "loss": 0.1969, "step": 925 }, { "epoch": 0.08531810015202469, "grad_norm": 1.1918839586269157, "learning_rate": 4.9829942029304194e-06, "loss": 0.1979, "step": 926 }, { "epoch": 0.0854102363292947, "grad_norm": 1.0560845818273896, "learning_rate": 4.982905385355885e-06, "loss": 0.1971, "step": 927 }, { "epoch": 0.0855023725065647, "grad_norm": 1.0371941917693073, "learning_rate": 4.982816337243318e-06, "loss": 0.199, "step": 928 }, { "epoch": 0.08559450868383471, "grad_norm": 1.0181772522742907, "learning_rate": 4.982727058600987e-06, "loss": 0.1991, "step": 929 }, { "epoch": 0.08568664486110471, "grad_norm": 0.9936553320104157, "learning_rate": 4.98263754943718e-06, "loss": 0.1841, "step": 930 }, { "epoch": 0.08577878103837472, "grad_norm": 1.151859083100634, "learning_rate": 4.9825478097602115e-06, "loss": 0.19, "step": 931 }, { "epoch": 0.08587091721564473, "grad_norm": 1.1087091001896687, "learning_rate": 4.982457839578411e-06, "loss": 0.1975, "step": 932 }, { "epoch": 0.08596305339291473, "grad_norm": 1.0020521314066806, "learning_rate": 4.982367638900132e-06, "loss": 0.206, "step": 933 }, { "epoch": 0.08605518957018474, "grad_norm": 1.0393467185829126, "learning_rate": 4.982277207733751e-06, "loss": 0.1917, "step": 934 }, { "epoch": 0.08614732574745473, "grad_norm": 1.3665091289700992, "learning_rate": 4.982186546087665e-06, "loss": 0.2101, "step": 935 }, { "epoch": 0.08623946192472474, "grad_norm": 0.970728944149346, "learning_rate": 4.98209565397029e-06, "loss": 0.1853, "step": 936 }, { "epoch": 0.08633159810199474, "grad_norm": 1.0188718196066726, "learning_rate": 4.9820045313900675e-06, "loss": 0.1909, "step": 937 }, { "epoch": 0.08642373427926475, "grad_norm": 1.0079807293569125, "learning_rate": 4.981913178355456e-06, "loss": 0.1798, "step": 938 }, { "epoch": 0.08651587045653476, "grad_norm": 0.9823783578124373, "learning_rate": 4.981821594874939e-06, "loss": 0.1792, "step": 939 }, { "epoch": 0.08660800663380476, "grad_norm": 1.035277129236254, "learning_rate": 4.981729780957021e-06, "loss": 0.1908, "step": 940 }, { "epoch": 0.08670014281107477, "grad_norm": 1.0201631403526028, "learning_rate": 4.981637736610224e-06, "loss": 0.182, "step": 941 }, { "epoch": 0.08679227898834477, "grad_norm": 1.0623794729008158, "learning_rate": 4.981545461843098e-06, "loss": 0.1962, "step": 942 }, { "epoch": 0.08688441516561478, "grad_norm": 1.0952542593720789, "learning_rate": 4.9814529566642065e-06, "loss": 0.1876, "step": 943 }, { "epoch": 0.08697655134288479, "grad_norm": 1.0851540110165558, "learning_rate": 4.981360221082143e-06, "loss": 0.1981, "step": 944 }, { "epoch": 0.08706868752015479, "grad_norm": 1.117127612131708, "learning_rate": 4.9812672551055144e-06, "loss": 0.2034, "step": 945 }, { "epoch": 0.0871608236974248, "grad_norm": 1.210471734588479, "learning_rate": 4.981174058742955e-06, "loss": 0.201, "step": 946 }, { "epoch": 0.0872529598746948, "grad_norm": 1.1075330267974028, "learning_rate": 4.981080632003117e-06, "loss": 0.211, "step": 947 }, { "epoch": 0.0873450960519648, "grad_norm": 1.070657470857975, "learning_rate": 4.980986974894676e-06, "loss": 0.1781, "step": 948 }, { "epoch": 0.08743723222923482, "grad_norm": 1.0137187662237797, "learning_rate": 4.980893087426326e-06, "loss": 0.1832, "step": 949 }, { "epoch": 0.08752936840650481, "grad_norm": 1.2310842945020835, "learning_rate": 4.980798969606787e-06, "loss": 0.2071, "step": 950 }, { "epoch": 0.08762150458377482, "grad_norm": 0.9978182904900013, "learning_rate": 4.980704621444797e-06, "loss": 0.1889, "step": 951 }, { "epoch": 0.08771364076104482, "grad_norm": 1.1657157637314783, "learning_rate": 4.980610042949115e-06, "loss": 0.2151, "step": 952 }, { "epoch": 0.08780577693831483, "grad_norm": 1.0615920322109136, "learning_rate": 4.980515234128522e-06, "loss": 0.1894, "step": 953 }, { "epoch": 0.08789791311558483, "grad_norm": 0.9965168434781553, "learning_rate": 4.980420194991826e-06, "loss": 0.1723, "step": 954 }, { "epoch": 0.08799004929285484, "grad_norm": 1.0223984489495057, "learning_rate": 4.980324925547845e-06, "loss": 0.2016, "step": 955 }, { "epoch": 0.08808218547012485, "grad_norm": 1.0267736631959716, "learning_rate": 4.980229425805429e-06, "loss": 0.1948, "step": 956 }, { "epoch": 0.08817432164739485, "grad_norm": 1.0487644444256707, "learning_rate": 4.9801336957734435e-06, "loss": 0.1976, "step": 957 }, { "epoch": 0.08826645782466486, "grad_norm": 0.9800925451113316, "learning_rate": 4.980037735460778e-06, "loss": 0.1884, "step": 958 }, { "epoch": 0.08835859400193485, "grad_norm": 1.0512460138994213, "learning_rate": 4.9799415448763414e-06, "loss": 0.1905, "step": 959 }, { "epoch": 0.08845073017920486, "grad_norm": 1.0438604822998108, "learning_rate": 4.979845124029066e-06, "loss": 0.1997, "step": 960 }, { "epoch": 0.08854286635647488, "grad_norm": 1.1256703426732806, "learning_rate": 4.979748472927903e-06, "loss": 0.1826, "step": 961 }, { "epoch": 0.08863500253374487, "grad_norm": 1.1642895647997922, "learning_rate": 4.979651591581829e-06, "loss": 0.1938, "step": 962 }, { "epoch": 0.08872713871101488, "grad_norm": 1.061865516308078, "learning_rate": 4.979554479999836e-06, "loss": 0.1979, "step": 963 }, { "epoch": 0.08881927488828488, "grad_norm": 1.0511995629982513, "learning_rate": 4.979457138190944e-06, "loss": 0.1991, "step": 964 }, { "epoch": 0.08891141106555489, "grad_norm": 1.0627898550832569, "learning_rate": 4.979359566164189e-06, "loss": 0.1892, "step": 965 }, { "epoch": 0.0890035472428249, "grad_norm": 1.1520194609491567, "learning_rate": 4.979261763928632e-06, "loss": 0.2088, "step": 966 }, { "epoch": 0.0890956834200949, "grad_norm": 1.0750536562889166, "learning_rate": 4.979163731493354e-06, "loss": 0.2057, "step": 967 }, { "epoch": 0.08918781959736491, "grad_norm": 1.0233899360449537, "learning_rate": 4.979065468867456e-06, "loss": 0.1966, "step": 968 }, { "epoch": 0.0892799557746349, "grad_norm": 1.0189667297065197, "learning_rate": 4.978966976060062e-06, "loss": 0.1893, "step": 969 }, { "epoch": 0.08937209195190492, "grad_norm": 0.9615841945851383, "learning_rate": 4.978868253080318e-06, "loss": 0.1795, "step": 970 }, { "epoch": 0.08946422812917491, "grad_norm": 1.0374752006939945, "learning_rate": 4.9787692999373895e-06, "loss": 0.1927, "step": 971 }, { "epoch": 0.08955636430644492, "grad_norm": 0.9781931331030623, "learning_rate": 4.978670116640465e-06, "loss": 0.1886, "step": 972 }, { "epoch": 0.08964850048371494, "grad_norm": 1.0482569650890077, "learning_rate": 4.978570703198754e-06, "loss": 0.2073, "step": 973 }, { "epoch": 0.08974063666098493, "grad_norm": 1.0307954204033831, "learning_rate": 4.978471059621486e-06, "loss": 0.2001, "step": 974 }, { "epoch": 0.08983277283825494, "grad_norm": 0.9504730932711084, "learning_rate": 4.978371185917913e-06, "loss": 0.1871, "step": 975 }, { "epoch": 0.08992490901552494, "grad_norm": 1.0212992592289591, "learning_rate": 4.978271082097309e-06, "loss": 0.1865, "step": 976 }, { "epoch": 0.09001704519279495, "grad_norm": 1.0584402344944974, "learning_rate": 4.978170748168968e-06, "loss": 0.1827, "step": 977 }, { "epoch": 0.09010918137006496, "grad_norm": 1.0265243323297528, "learning_rate": 4.978070184142207e-06, "loss": 0.1955, "step": 978 }, { "epoch": 0.09020131754733496, "grad_norm": 0.99918479647745, "learning_rate": 4.977969390026362e-06, "loss": 0.1902, "step": 979 }, { "epoch": 0.09029345372460497, "grad_norm": 1.0545620541518919, "learning_rate": 4.9778683658307925e-06, "loss": 0.1904, "step": 980 }, { "epoch": 0.09038558990187497, "grad_norm": 1.0303710328312456, "learning_rate": 4.977767111564879e-06, "loss": 0.1922, "step": 981 }, { "epoch": 0.09047772607914498, "grad_norm": 1.0446625626228556, "learning_rate": 4.977665627238023e-06, "loss": 0.1855, "step": 982 }, { "epoch": 0.09056986225641499, "grad_norm": 1.0552315927395985, "learning_rate": 4.977563912859645e-06, "loss": 0.1869, "step": 983 }, { "epoch": 0.09066199843368498, "grad_norm": 1.003974927091642, "learning_rate": 4.977461968439193e-06, "loss": 0.1923, "step": 984 }, { "epoch": 0.090754134610955, "grad_norm": 1.0230489571997252, "learning_rate": 4.9773597939861294e-06, "loss": 0.1856, "step": 985 }, { "epoch": 0.09084627078822499, "grad_norm": 1.0105592868572502, "learning_rate": 4.977257389509943e-06, "loss": 0.1929, "step": 986 }, { "epoch": 0.090938406965495, "grad_norm": 1.0725121055688818, "learning_rate": 4.9771547550201414e-06, "loss": 0.1856, "step": 987 }, { "epoch": 0.09103054314276501, "grad_norm": 1.0409324997207798, "learning_rate": 4.977051890526254e-06, "loss": 0.1922, "step": 988 }, { "epoch": 0.09112267932003501, "grad_norm": 1.0338303485068927, "learning_rate": 4.976948796037831e-06, "loss": 0.194, "step": 989 }, { "epoch": 0.09121481549730502, "grad_norm": 1.0689018179069636, "learning_rate": 4.976845471564447e-06, "loss": 0.1924, "step": 990 }, { "epoch": 0.09130695167457502, "grad_norm": 1.0394659919045186, "learning_rate": 4.976741917115695e-06, "loss": 0.1917, "step": 991 }, { "epoch": 0.09139908785184503, "grad_norm": 1.0150076898279992, "learning_rate": 4.976638132701188e-06, "loss": 0.1842, "step": 992 }, { "epoch": 0.09149122402911503, "grad_norm": 1.0841944088787114, "learning_rate": 4.976534118330565e-06, "loss": 0.1788, "step": 993 }, { "epoch": 0.09158336020638504, "grad_norm": 1.0401076161846878, "learning_rate": 4.9764298740134814e-06, "loss": 0.1901, "step": 994 }, { "epoch": 0.09167549638365505, "grad_norm": 0.9971280819093569, "learning_rate": 4.976325399759619e-06, "loss": 0.1951, "step": 995 }, { "epoch": 0.09176763256092504, "grad_norm": 0.9976774105341277, "learning_rate": 4.976220695578675e-06, "loss": 0.1741, "step": 996 }, { "epoch": 0.09185976873819506, "grad_norm": 1.0794336104421778, "learning_rate": 4.976115761480373e-06, "loss": 0.2019, "step": 997 }, { "epoch": 0.09195190491546505, "grad_norm": 1.1589141007240227, "learning_rate": 4.9760105974744576e-06, "loss": 0.2021, "step": 998 }, { "epoch": 0.09204404109273506, "grad_norm": 1.0959230462918457, "learning_rate": 4.97590520357069e-06, "loss": 0.1871, "step": 999 }, { "epoch": 0.09213617727000507, "grad_norm": 1.0170637360982258, "learning_rate": 4.97579957977886e-06, "loss": 0.192, "step": 1000 }, { "epoch": 0.09213617727000507, "eval_loss": 0.19303320348262787, "eval_runtime": 299.1988, "eval_samples_per_second": 23.453, "eval_steps_per_second": 2.935, "step": 1000 }, { "epoch": 0.09222831344727507, "grad_norm": 1.1568554165939138, "learning_rate": 4.97569372610877e-06, "loss": 0.1831, "step": 1001 }, { "epoch": 0.09232044962454508, "grad_norm": 1.1944389893604717, "learning_rate": 4.975587642570252e-06, "loss": 0.1828, "step": 1002 }, { "epoch": 0.09241258580181508, "grad_norm": 1.0170283022489994, "learning_rate": 4.975481329173156e-06, "loss": 0.1856, "step": 1003 }, { "epoch": 0.09250472197908509, "grad_norm": 1.0558442749265609, "learning_rate": 4.975374785927351e-06, "loss": 0.1847, "step": 1004 }, { "epoch": 0.0925968581563551, "grad_norm": 1.0094563700785, "learning_rate": 4.975268012842732e-06, "loss": 0.1876, "step": 1005 }, { "epoch": 0.0926889943336251, "grad_norm": 1.1605189632893436, "learning_rate": 4.97516100992921e-06, "loss": 0.1964, "step": 1006 }, { "epoch": 0.09278113051089511, "grad_norm": 1.0938956132780517, "learning_rate": 4.975053777196723e-06, "loss": 0.2036, "step": 1007 }, { "epoch": 0.0928732666881651, "grad_norm": 1.0767350046907365, "learning_rate": 4.974946314655226e-06, "loss": 0.2035, "step": 1008 }, { "epoch": 0.09296540286543511, "grad_norm": 0.9971359022353502, "learning_rate": 4.974838622314698e-06, "loss": 0.1969, "step": 1009 }, { "epoch": 0.09305753904270511, "grad_norm": 1.093729265610002, "learning_rate": 4.974730700185136e-06, "loss": 0.2025, "step": 1010 }, { "epoch": 0.09314967521997512, "grad_norm": 1.0774952559409026, "learning_rate": 4.974622548276564e-06, "loss": 0.2024, "step": 1011 }, { "epoch": 0.09324181139724513, "grad_norm": 1.009403051152341, "learning_rate": 4.974514166599021e-06, "loss": 0.1936, "step": 1012 }, { "epoch": 0.09333394757451513, "grad_norm": 1.0763173811540299, "learning_rate": 4.974405555162571e-06, "loss": 0.1912, "step": 1013 }, { "epoch": 0.09342608375178514, "grad_norm": 1.1012649720307428, "learning_rate": 4.9742967139773e-06, "loss": 0.2018, "step": 1014 }, { "epoch": 0.09351821992905514, "grad_norm": 0.9708640152172979, "learning_rate": 4.974187643053312e-06, "loss": 0.1711, "step": 1015 }, { "epoch": 0.09361035610632515, "grad_norm": 1.109835431173934, "learning_rate": 4.9740783424007355e-06, "loss": 0.1957, "step": 1016 }, { "epoch": 0.09370249228359516, "grad_norm": 1.1759709250141979, "learning_rate": 4.973968812029718e-06, "loss": 0.21, "step": 1017 }, { "epoch": 0.09379462846086516, "grad_norm": 0.9865822952697535, "learning_rate": 4.973859051950431e-06, "loss": 0.175, "step": 1018 }, { "epoch": 0.09388676463813517, "grad_norm": 1.1361881263400175, "learning_rate": 4.973749062173065e-06, "loss": 0.1994, "step": 1019 }, { "epoch": 0.09397890081540516, "grad_norm": 1.109853232173025, "learning_rate": 4.973638842707831e-06, "loss": 0.1969, "step": 1020 }, { "epoch": 0.09407103699267517, "grad_norm": 0.9731737615905076, "learning_rate": 4.973528393564965e-06, "loss": 0.1752, "step": 1021 }, { "epoch": 0.09416317316994519, "grad_norm": 1.074193893659733, "learning_rate": 4.973417714754721e-06, "loss": 0.194, "step": 1022 }, { "epoch": 0.09425530934721518, "grad_norm": 1.024455833636712, "learning_rate": 4.973306806287376e-06, "loss": 0.1903, "step": 1023 }, { "epoch": 0.0943474455244852, "grad_norm": 0.9573557205309052, "learning_rate": 4.9731956681732284e-06, "loss": 0.1828, "step": 1024 }, { "epoch": 0.09443958170175519, "grad_norm": 1.0124926306215474, "learning_rate": 4.973084300422597e-06, "loss": 0.1931, "step": 1025 }, { "epoch": 0.0945317178790252, "grad_norm": 1.0815661147002018, "learning_rate": 4.972972703045822e-06, "loss": 0.195, "step": 1026 }, { "epoch": 0.0946238540562952, "grad_norm": 1.110883335805394, "learning_rate": 4.972860876053265e-06, "loss": 0.1982, "step": 1027 }, { "epoch": 0.09471599023356521, "grad_norm": 1.0693046793652852, "learning_rate": 4.97274881945531e-06, "loss": 0.1811, "step": 1028 }, { "epoch": 0.09480812641083522, "grad_norm": 1.1919130340115107, "learning_rate": 4.97263653326236e-06, "loss": 0.2073, "step": 1029 }, { "epoch": 0.09490026258810522, "grad_norm": 1.0577602885838477, "learning_rate": 4.972524017484842e-06, "loss": 0.1841, "step": 1030 }, { "epoch": 0.09499239876537523, "grad_norm": 1.1057953679804238, "learning_rate": 4.972411272133204e-06, "loss": 0.1848, "step": 1031 }, { "epoch": 0.09508453494264522, "grad_norm": 1.1472304844236627, "learning_rate": 4.972298297217913e-06, "loss": 0.1851, "step": 1032 }, { "epoch": 0.09517667111991523, "grad_norm": 1.0139962897450747, "learning_rate": 4.972185092749458e-06, "loss": 0.1888, "step": 1033 }, { "epoch": 0.09526880729718525, "grad_norm": 1.0237739212232981, "learning_rate": 4.972071658738352e-06, "loss": 0.1961, "step": 1034 }, { "epoch": 0.09536094347445524, "grad_norm": 1.1226050341955258, "learning_rate": 4.971957995195126e-06, "loss": 0.1919, "step": 1035 }, { "epoch": 0.09545307965172525, "grad_norm": 1.0615783114613073, "learning_rate": 4.971844102130334e-06, "loss": 0.1929, "step": 1036 }, { "epoch": 0.09554521582899525, "grad_norm": 1.016886377753397, "learning_rate": 4.971729979554551e-06, "loss": 0.1956, "step": 1037 }, { "epoch": 0.09563735200626526, "grad_norm": 1.0576392129215906, "learning_rate": 4.9716156274783746e-06, "loss": 0.186, "step": 1038 }, { "epoch": 0.09572948818353527, "grad_norm": 1.0326102477262193, "learning_rate": 4.9715010459124205e-06, "loss": 0.2068, "step": 1039 }, { "epoch": 0.09582162436080527, "grad_norm": 0.9670626996840229, "learning_rate": 4.971386234867328e-06, "loss": 0.197, "step": 1040 }, { "epoch": 0.09591376053807528, "grad_norm": 1.0570562814072233, "learning_rate": 4.971271194353757e-06, "loss": 0.1895, "step": 1041 }, { "epoch": 0.09600589671534528, "grad_norm": 1.0598769897437053, "learning_rate": 4.971155924382392e-06, "loss": 0.1856, "step": 1042 }, { "epoch": 0.09609803289261529, "grad_norm": 0.9717398940768603, "learning_rate": 4.971040424963931e-06, "loss": 0.1694, "step": 1043 }, { "epoch": 0.09619016906988528, "grad_norm": 1.0484387235197232, "learning_rate": 4.970924696109102e-06, "loss": 0.1974, "step": 1044 }, { "epoch": 0.0962823052471553, "grad_norm": 1.0682781178742418, "learning_rate": 4.970808737828648e-06, "loss": 0.214, "step": 1045 }, { "epoch": 0.0963744414244253, "grad_norm": 1.0480298626312177, "learning_rate": 4.970692550133337e-06, "loss": 0.1959, "step": 1046 }, { "epoch": 0.0964665776016953, "grad_norm": 1.1064752815320091, "learning_rate": 4.970576133033958e-06, "loss": 0.1924, "step": 1047 }, { "epoch": 0.09655871377896531, "grad_norm": 1.0614078272511498, "learning_rate": 4.970459486541318e-06, "loss": 0.2013, "step": 1048 }, { "epoch": 0.09665084995623531, "grad_norm": 1.064795035168974, "learning_rate": 4.970342610666249e-06, "loss": 0.1947, "step": 1049 }, { "epoch": 0.09674298613350532, "grad_norm": 0.9799709467241581, "learning_rate": 4.970225505419602e-06, "loss": 0.1769, "step": 1050 }, { "epoch": 0.09683512231077533, "grad_norm": 1.043741540422472, "learning_rate": 4.970108170812252e-06, "loss": 0.1953, "step": 1051 }, { "epoch": 0.09692725848804533, "grad_norm": 1.061623477697688, "learning_rate": 4.969990606855093e-06, "loss": 0.2071, "step": 1052 }, { "epoch": 0.09701939466531534, "grad_norm": 1.1020868177462027, "learning_rate": 4.969872813559039e-06, "loss": 0.1821, "step": 1053 }, { "epoch": 0.09711153084258534, "grad_norm": 0.988163345783669, "learning_rate": 4.9697547909350295e-06, "loss": 0.1987, "step": 1054 }, { "epoch": 0.09720366701985535, "grad_norm": 1.0267991125472582, "learning_rate": 4.969636538994021e-06, "loss": 0.1918, "step": 1055 }, { "epoch": 0.09729580319712536, "grad_norm": 1.070032876028796, "learning_rate": 4.969518057746995e-06, "loss": 0.2002, "step": 1056 }, { "epoch": 0.09738793937439535, "grad_norm": 0.954895748932761, "learning_rate": 4.969399347204951e-06, "loss": 0.1829, "step": 1057 }, { "epoch": 0.09748007555166537, "grad_norm": 1.0721201059228953, "learning_rate": 4.969280407378912e-06, "loss": 0.192, "step": 1058 }, { "epoch": 0.09757221172893536, "grad_norm": 1.0107730979473246, "learning_rate": 4.9691612382799215e-06, "loss": 0.194, "step": 1059 }, { "epoch": 0.09766434790620537, "grad_norm": 0.9854433143433244, "learning_rate": 4.969041839919044e-06, "loss": 0.1909, "step": 1060 }, { "epoch": 0.09775648408347537, "grad_norm": 1.0348738031722355, "learning_rate": 4.968922212307367e-06, "loss": 0.1922, "step": 1061 }, { "epoch": 0.09784862026074538, "grad_norm": 1.0040960545465387, "learning_rate": 4.968802355455995e-06, "loss": 0.1923, "step": 1062 }, { "epoch": 0.09794075643801539, "grad_norm": 1.0094386883868456, "learning_rate": 4.96868226937606e-06, "loss": 0.1751, "step": 1063 }, { "epoch": 0.09803289261528539, "grad_norm": 1.073963995133156, "learning_rate": 4.96856195407871e-06, "loss": 0.1931, "step": 1064 }, { "epoch": 0.0981250287925554, "grad_norm": 0.9703948692708834, "learning_rate": 4.968441409575117e-06, "loss": 0.1906, "step": 1065 }, { "epoch": 0.0982171649698254, "grad_norm": 1.06228397745977, "learning_rate": 4.968320635876473e-06, "loss": 0.1857, "step": 1066 }, { "epoch": 0.09830930114709541, "grad_norm": 1.0662602435123827, "learning_rate": 4.968199632993994e-06, "loss": 0.1943, "step": 1067 }, { "epoch": 0.09840143732436542, "grad_norm": 1.030389208026544, "learning_rate": 4.968078400938912e-06, "loss": 0.1981, "step": 1068 }, { "epoch": 0.09849357350163541, "grad_norm": 1.0075981112842045, "learning_rate": 4.967956939722485e-06, "loss": 0.1969, "step": 1069 }, { "epoch": 0.09858570967890543, "grad_norm": 0.958118698153524, "learning_rate": 4.967835249355991e-06, "loss": 0.1858, "step": 1070 }, { "epoch": 0.09867784585617542, "grad_norm": 0.9474495525140502, "learning_rate": 4.967713329850728e-06, "loss": 0.1859, "step": 1071 }, { "epoch": 0.09876998203344543, "grad_norm": 1.0554983849327597, "learning_rate": 4.967591181218017e-06, "loss": 0.192, "step": 1072 }, { "epoch": 0.09886211821071544, "grad_norm": 1.0152034702976793, "learning_rate": 4.967468803469199e-06, "loss": 0.195, "step": 1073 }, { "epoch": 0.09895425438798544, "grad_norm": 1.0168864653893954, "learning_rate": 4.967346196615638e-06, "loss": 0.1767, "step": 1074 }, { "epoch": 0.09904639056525545, "grad_norm": 1.043221440739377, "learning_rate": 4.967223360668716e-06, "loss": 0.1846, "step": 1075 }, { "epoch": 0.09913852674252545, "grad_norm": 1.0048446057039784, "learning_rate": 4.9671002956398395e-06, "loss": 0.1973, "step": 1076 }, { "epoch": 0.09923066291979546, "grad_norm": 0.9969031964290073, "learning_rate": 4.966977001540436e-06, "loss": 0.1926, "step": 1077 }, { "epoch": 0.09932279909706546, "grad_norm": 1.052777627221389, "learning_rate": 4.966853478381951e-06, "loss": 0.1995, "step": 1078 }, { "epoch": 0.09941493527433547, "grad_norm": 1.068967031109921, "learning_rate": 4.966729726175857e-06, "loss": 0.1848, "step": 1079 }, { "epoch": 0.09950707145160548, "grad_norm": 1.160503081694701, "learning_rate": 4.96660574493364e-06, "loss": 0.1954, "step": 1080 }, { "epoch": 0.09959920762887547, "grad_norm": 1.0796967030397735, "learning_rate": 4.9664815346668165e-06, "loss": 0.2055, "step": 1081 }, { "epoch": 0.09969134380614549, "grad_norm": 1.005831162809125, "learning_rate": 4.966357095386915e-06, "loss": 0.1972, "step": 1082 }, { "epoch": 0.09978347998341548, "grad_norm": 1.0052790015292061, "learning_rate": 4.966232427105493e-06, "loss": 0.1825, "step": 1083 }, { "epoch": 0.0998756161606855, "grad_norm": 1.0082971570804145, "learning_rate": 4.9661075298341245e-06, "loss": 0.1679, "step": 1084 }, { "epoch": 0.0999677523379555, "grad_norm": 1.002477114385746, "learning_rate": 4.965982403584406e-06, "loss": 0.1978, "step": 1085 }, { "epoch": 0.1000598885152255, "grad_norm": 1.0561649001397835, "learning_rate": 4.965857048367956e-06, "loss": 0.2016, "step": 1086 }, { "epoch": 0.10015202469249551, "grad_norm": 1.090128521697667, "learning_rate": 4.965731464196415e-06, "loss": 0.1981, "step": 1087 }, { "epoch": 0.10024416086976551, "grad_norm": 1.1605860564434374, "learning_rate": 4.96560565108144e-06, "loss": 0.2093, "step": 1088 }, { "epoch": 0.10033629704703552, "grad_norm": 0.9908203729796794, "learning_rate": 4.965479609034717e-06, "loss": 0.1761, "step": 1089 }, { "epoch": 0.10042843322430553, "grad_norm": 1.0689295025072343, "learning_rate": 4.9653533380679455e-06, "loss": 0.2124, "step": 1090 }, { "epoch": 0.10052056940157553, "grad_norm": 0.9557530326333923, "learning_rate": 4.965226838192852e-06, "loss": 0.1835, "step": 1091 }, { "epoch": 0.10061270557884554, "grad_norm": 0.9607802521798345, "learning_rate": 4.965100109421182e-06, "loss": 0.1779, "step": 1092 }, { "epoch": 0.10070484175611553, "grad_norm": 1.0016875203479627, "learning_rate": 4.9649731517647e-06, "loss": 0.1769, "step": 1093 }, { "epoch": 0.10079697793338555, "grad_norm": 1.0440688028642757, "learning_rate": 4.964845965235196e-06, "loss": 0.1934, "step": 1094 }, { "epoch": 0.10088911411065554, "grad_norm": 1.099885377144038, "learning_rate": 4.964718549844479e-06, "loss": 0.2077, "step": 1095 }, { "epoch": 0.10098125028792555, "grad_norm": 1.0364884967207673, "learning_rate": 4.964590905604379e-06, "loss": 0.1839, "step": 1096 }, { "epoch": 0.10107338646519556, "grad_norm": 1.119047007151761, "learning_rate": 4.964463032526749e-06, "loss": 0.1921, "step": 1097 }, { "epoch": 0.10116552264246556, "grad_norm": 1.0229016109535547, "learning_rate": 4.9643349306234615e-06, "loss": 0.1873, "step": 1098 }, { "epoch": 0.10125765881973557, "grad_norm": 1.0007547076017496, "learning_rate": 4.96420659990641e-06, "loss": 0.1809, "step": 1099 }, { "epoch": 0.10134979499700557, "grad_norm": 1.1215031197209377, "learning_rate": 4.9640780403875095e-06, "loss": 0.1995, "step": 1100 }, { "epoch": 0.10144193117427558, "grad_norm": 1.0890006150478866, "learning_rate": 4.963949252078698e-06, "loss": 0.1965, "step": 1101 }, { "epoch": 0.10153406735154559, "grad_norm": 1.0198293652323223, "learning_rate": 4.963820234991934e-06, "loss": 0.2028, "step": 1102 }, { "epoch": 0.10162620352881559, "grad_norm": 0.9681566672517501, "learning_rate": 4.963690989139196e-06, "loss": 0.1775, "step": 1103 }, { "epoch": 0.1017183397060856, "grad_norm": 0.9842979036405417, "learning_rate": 4.963561514532485e-06, "loss": 0.174, "step": 1104 }, { "epoch": 0.1018104758833556, "grad_norm": 1.056487078327593, "learning_rate": 4.963431811183821e-06, "loss": 0.1923, "step": 1105 }, { "epoch": 0.1019026120606256, "grad_norm": 0.9681514651053884, "learning_rate": 4.963301879105249e-06, "loss": 0.1735, "step": 1106 }, { "epoch": 0.10199474823789562, "grad_norm": 1.0279411063186674, "learning_rate": 4.963171718308833e-06, "loss": 0.1939, "step": 1107 }, { "epoch": 0.10208688441516561, "grad_norm": 1.0199638016460721, "learning_rate": 4.963041328806656e-06, "loss": 0.184, "step": 1108 }, { "epoch": 0.10217902059243562, "grad_norm": 1.0831824990470151, "learning_rate": 4.962910710610827e-06, "loss": 0.1919, "step": 1109 }, { "epoch": 0.10227115676970562, "grad_norm": 1.0908326206048449, "learning_rate": 4.962779863733475e-06, "loss": 0.1765, "step": 1110 }, { "epoch": 0.10236329294697563, "grad_norm": 1.0045235303450732, "learning_rate": 4.962648788186747e-06, "loss": 0.1892, "step": 1111 }, { "epoch": 0.10245542912424563, "grad_norm": 0.9971996009560316, "learning_rate": 4.9625174839828135e-06, "loss": 0.1818, "step": 1112 }, { "epoch": 0.10254756530151564, "grad_norm": 1.1008529906643778, "learning_rate": 4.9623859511338664e-06, "loss": 0.1859, "step": 1113 }, { "epoch": 0.10263970147878565, "grad_norm": 1.0675327190930683, "learning_rate": 4.962254189652119e-06, "loss": 0.1938, "step": 1114 }, { "epoch": 0.10273183765605565, "grad_norm": 0.9678039471099696, "learning_rate": 4.962122199549806e-06, "loss": 0.1842, "step": 1115 }, { "epoch": 0.10282397383332566, "grad_norm": 0.9783428439299713, "learning_rate": 4.96198998083918e-06, "loss": 0.1868, "step": 1116 }, { "epoch": 0.10291611001059565, "grad_norm": 1.0795078906373854, "learning_rate": 4.961857533532521e-06, "loss": 0.2017, "step": 1117 }, { "epoch": 0.10300824618786567, "grad_norm": 0.9862093313250959, "learning_rate": 4.961724857642125e-06, "loss": 0.188, "step": 1118 }, { "epoch": 0.10310038236513568, "grad_norm": 1.023997592371275, "learning_rate": 4.96159195318031e-06, "loss": 0.1807, "step": 1119 }, { "epoch": 0.10319251854240567, "grad_norm": 1.1350867035477064, "learning_rate": 4.9614588201594175e-06, "loss": 0.1962, "step": 1120 }, { "epoch": 0.10328465471967568, "grad_norm": 1.0486867617781612, "learning_rate": 4.961325458591809e-06, "loss": 0.1967, "step": 1121 }, { "epoch": 0.10337679089694568, "grad_norm": 0.9979044530364211, "learning_rate": 4.961191868489866e-06, "loss": 0.1847, "step": 1122 }, { "epoch": 0.10346892707421569, "grad_norm": 1.0898276661037767, "learning_rate": 4.961058049865994e-06, "loss": 0.1841, "step": 1123 }, { "epoch": 0.1035610632514857, "grad_norm": 1.0539187928170928, "learning_rate": 4.960924002732616e-06, "loss": 0.2036, "step": 1124 }, { "epoch": 0.1036531994287557, "grad_norm": 0.973520042589487, "learning_rate": 4.9607897271021815e-06, "loss": 0.1765, "step": 1125 }, { "epoch": 0.10374533560602571, "grad_norm": 1.0595893348731948, "learning_rate": 4.960655222987155e-06, "loss": 0.2013, "step": 1126 }, { "epoch": 0.1038374717832957, "grad_norm": 1.1099133446922225, "learning_rate": 4.960520490400026e-06, "loss": 0.1849, "step": 1127 }, { "epoch": 0.10392960796056572, "grad_norm": 0.9847206344296402, "learning_rate": 4.9603855293533045e-06, "loss": 0.1859, "step": 1128 }, { "epoch": 0.10402174413783571, "grad_norm": 1.0085016434462313, "learning_rate": 4.960250339859523e-06, "loss": 0.1922, "step": 1129 }, { "epoch": 0.10411388031510573, "grad_norm": 1.0132928372271228, "learning_rate": 4.960114921931231e-06, "loss": 0.1816, "step": 1130 }, { "epoch": 0.10420601649237574, "grad_norm": 0.945813808487549, "learning_rate": 4.959979275581005e-06, "loss": 0.1926, "step": 1131 }, { "epoch": 0.10429815266964573, "grad_norm": 1.091489002731477, "learning_rate": 4.959843400821438e-06, "loss": 0.187, "step": 1132 }, { "epoch": 0.10439028884691574, "grad_norm": 0.9602509824379453, "learning_rate": 4.959707297665146e-06, "loss": 0.1666, "step": 1133 }, { "epoch": 0.10448242502418574, "grad_norm": 1.070831104626253, "learning_rate": 4.959570966124768e-06, "loss": 0.1983, "step": 1134 }, { "epoch": 0.10457456120145575, "grad_norm": 1.0644935824954898, "learning_rate": 4.959434406212959e-06, "loss": 0.187, "step": 1135 }, { "epoch": 0.10466669737872576, "grad_norm": 1.18188816476157, "learning_rate": 4.959297617942403e-06, "loss": 0.2021, "step": 1136 }, { "epoch": 0.10475883355599576, "grad_norm": 1.0758197586073297, "learning_rate": 4.959160601325797e-06, "loss": 0.1852, "step": 1137 }, { "epoch": 0.10485096973326577, "grad_norm": 1.0990404429156002, "learning_rate": 4.959023356375866e-06, "loss": 0.1921, "step": 1138 }, { "epoch": 0.10494310591053577, "grad_norm": 1.028267704366153, "learning_rate": 4.9588858831053495e-06, "loss": 0.1953, "step": 1139 }, { "epoch": 0.10503524208780578, "grad_norm": 0.9471378455619729, "learning_rate": 4.958748181527016e-06, "loss": 0.1834, "step": 1140 }, { "epoch": 0.10512737826507579, "grad_norm": 1.0244112736454591, "learning_rate": 4.958610251653649e-06, "loss": 0.1766, "step": 1141 }, { "epoch": 0.10521951444234579, "grad_norm": 1.0541172839830792, "learning_rate": 4.958472093498055e-06, "loss": 0.1942, "step": 1142 }, { "epoch": 0.1053116506196158, "grad_norm": 0.9910686182968134, "learning_rate": 4.9583337070730625e-06, "loss": 0.1839, "step": 1143 }, { "epoch": 0.10540378679688579, "grad_norm": 0.9317050369769572, "learning_rate": 4.958195092391521e-06, "loss": 0.1908, "step": 1144 }, { "epoch": 0.1054959229741558, "grad_norm": 1.01507009801172, "learning_rate": 4.958056249466301e-06, "loss": 0.1772, "step": 1145 }, { "epoch": 0.1055880591514258, "grad_norm": 0.9887305632286719, "learning_rate": 4.957917178310293e-06, "loss": 0.1931, "step": 1146 }, { "epoch": 0.10568019532869581, "grad_norm": 1.0393372989945, "learning_rate": 4.957777878936411e-06, "loss": 0.1966, "step": 1147 }, { "epoch": 0.10577233150596582, "grad_norm": 0.9892641791079512, "learning_rate": 4.957638351357587e-06, "loss": 0.1931, "step": 1148 }, { "epoch": 0.10586446768323582, "grad_norm": 0.9559643694345603, "learning_rate": 4.957498595586779e-06, "loss": 0.1823, "step": 1149 }, { "epoch": 0.10595660386050583, "grad_norm": 0.979023807959273, "learning_rate": 4.957358611636962e-06, "loss": 0.178, "step": 1150 }, { "epoch": 0.10604874003777583, "grad_norm": 1.0774638507040097, "learning_rate": 4.957218399521133e-06, "loss": 0.1908, "step": 1151 }, { "epoch": 0.10614087621504584, "grad_norm": 0.9904806549507087, "learning_rate": 4.957077959252311e-06, "loss": 0.1955, "step": 1152 }, { "epoch": 0.10623301239231585, "grad_norm": 1.0784191373101655, "learning_rate": 4.956937290843537e-06, "loss": 0.1904, "step": 1153 }, { "epoch": 0.10632514856958585, "grad_norm": 1.1522667775208266, "learning_rate": 4.95679639430787e-06, "loss": 0.2227, "step": 1154 }, { "epoch": 0.10641728474685586, "grad_norm": 0.9904608761960886, "learning_rate": 4.956655269658393e-06, "loss": 0.1822, "step": 1155 }, { "epoch": 0.10650942092412585, "grad_norm": 1.0503960405331592, "learning_rate": 4.956513916908211e-06, "loss": 0.1937, "step": 1156 }, { "epoch": 0.10660155710139586, "grad_norm": 1.0262513807644829, "learning_rate": 4.956372336070448e-06, "loss": 0.1808, "step": 1157 }, { "epoch": 0.10669369327866587, "grad_norm": 1.0342363298395292, "learning_rate": 4.956230527158248e-06, "loss": 0.1749, "step": 1158 }, { "epoch": 0.10678582945593587, "grad_norm": 0.9789103538040284, "learning_rate": 4.95608849018478e-06, "loss": 0.1849, "step": 1159 }, { "epoch": 0.10687796563320588, "grad_norm": 1.0614565305811643, "learning_rate": 4.95594622516323e-06, "loss": 0.2029, "step": 1160 }, { "epoch": 0.10697010181047588, "grad_norm": 1.0560558999226575, "learning_rate": 4.95580373210681e-06, "loss": 0.2107, "step": 1161 }, { "epoch": 0.10706223798774589, "grad_norm": 1.0797810477588987, "learning_rate": 4.955661011028748e-06, "loss": 0.2075, "step": 1162 }, { "epoch": 0.1071543741650159, "grad_norm": 1.0029566214938326, "learning_rate": 4.955518061942298e-06, "loss": 0.1868, "step": 1163 }, { "epoch": 0.1072465103422859, "grad_norm": 1.0588961555129832, "learning_rate": 4.955374884860731e-06, "loss": 0.2038, "step": 1164 }, { "epoch": 0.10733864651955591, "grad_norm": 1.00256844787753, "learning_rate": 4.9552314797973426e-06, "loss": 0.2008, "step": 1165 }, { "epoch": 0.1074307826968259, "grad_norm": 1.0284873277323123, "learning_rate": 4.955087846765446e-06, "loss": 0.1886, "step": 1166 }, { "epoch": 0.10752291887409592, "grad_norm": 0.9660868860677466, "learning_rate": 4.954943985778379e-06, "loss": 0.1911, "step": 1167 }, { "epoch": 0.10761505505136591, "grad_norm": 1.006615188078031, "learning_rate": 4.954799896849499e-06, "loss": 0.1988, "step": 1168 }, { "epoch": 0.10770719122863592, "grad_norm": 1.0946500653930293, "learning_rate": 4.954655579992184e-06, "loss": 0.2008, "step": 1169 }, { "epoch": 0.10779932740590593, "grad_norm": 1.0096850735445058, "learning_rate": 4.954511035219835e-06, "loss": 0.1905, "step": 1170 }, { "epoch": 0.10789146358317593, "grad_norm": 0.9542605339416056, "learning_rate": 4.954366262545871e-06, "loss": 0.1893, "step": 1171 }, { "epoch": 0.10798359976044594, "grad_norm": 0.9358990406040504, "learning_rate": 4.954221261983736e-06, "loss": 0.1841, "step": 1172 }, { "epoch": 0.10807573593771594, "grad_norm": 1.0250006987149098, "learning_rate": 4.954076033546892e-06, "loss": 0.1942, "step": 1173 }, { "epoch": 0.10816787211498595, "grad_norm": 1.02972348562299, "learning_rate": 4.953930577248825e-06, "loss": 0.1924, "step": 1174 }, { "epoch": 0.10826000829225596, "grad_norm": 0.9230680708989243, "learning_rate": 4.95378489310304e-06, "loss": 0.1795, "step": 1175 }, { "epoch": 0.10835214446952596, "grad_norm": 1.006934556434401, "learning_rate": 4.953638981123063e-06, "loss": 0.1859, "step": 1176 }, { "epoch": 0.10844428064679597, "grad_norm": 0.9479125968654736, "learning_rate": 4.9534928413224424e-06, "loss": 0.1685, "step": 1177 }, { "epoch": 0.10853641682406596, "grad_norm": 1.002904725939237, "learning_rate": 4.953346473714748e-06, "loss": 0.1972, "step": 1178 }, { "epoch": 0.10862855300133598, "grad_norm": 0.9705753499726706, "learning_rate": 4.953199878313569e-06, "loss": 0.1833, "step": 1179 }, { "epoch": 0.10872068917860599, "grad_norm": 0.9865043838278399, "learning_rate": 4.953053055132518e-06, "loss": 0.1868, "step": 1180 }, { "epoch": 0.10881282535587598, "grad_norm": 0.9364729281823607, "learning_rate": 4.9529060041852264e-06, "loss": 0.1877, "step": 1181 }, { "epoch": 0.108904961533146, "grad_norm": 0.9197551228545804, "learning_rate": 4.9527587254853485e-06, "loss": 0.1765, "step": 1182 }, { "epoch": 0.10899709771041599, "grad_norm": 0.9930734696539932, "learning_rate": 4.952611219046559e-06, "loss": 0.1975, "step": 1183 }, { "epoch": 0.109089233887686, "grad_norm": 0.9438054002784088, "learning_rate": 4.952463484882553e-06, "loss": 0.1746, "step": 1184 }, { "epoch": 0.109181370064956, "grad_norm": 1.0275596921449845, "learning_rate": 4.9523155230070495e-06, "loss": 0.1882, "step": 1185 }, { "epoch": 0.10927350624222601, "grad_norm": 0.9827711445828464, "learning_rate": 4.952167333433785e-06, "loss": 0.1813, "step": 1186 }, { "epoch": 0.10936564241949602, "grad_norm": 1.0596005745086683, "learning_rate": 4.952018916176521e-06, "loss": 0.1867, "step": 1187 }, { "epoch": 0.10945777859676602, "grad_norm": 0.9426441584350082, "learning_rate": 4.9518702712490355e-06, "loss": 0.1697, "step": 1188 }, { "epoch": 0.10954991477403603, "grad_norm": 1.0978130786232543, "learning_rate": 4.951721398665131e-06, "loss": 0.195, "step": 1189 }, { "epoch": 0.10964205095130602, "grad_norm": 1.054271925935406, "learning_rate": 4.951572298438632e-06, "loss": 0.1778, "step": 1190 }, { "epoch": 0.10973418712857604, "grad_norm": 0.9363792710466154, "learning_rate": 4.95142297058338e-06, "loss": 0.182, "step": 1191 }, { "epoch": 0.10982632330584605, "grad_norm": 0.9582824053873974, "learning_rate": 4.951273415113243e-06, "loss": 0.191, "step": 1192 }, { "epoch": 0.10991845948311604, "grad_norm": 1.04768214799217, "learning_rate": 4.951123632042104e-06, "loss": 0.1876, "step": 1193 }, { "epoch": 0.11001059566038605, "grad_norm": 0.9511803599003008, "learning_rate": 4.950973621383873e-06, "loss": 0.1682, "step": 1194 }, { "epoch": 0.11010273183765605, "grad_norm": 1.0673589766836193, "learning_rate": 4.950823383152478e-06, "loss": 0.2048, "step": 1195 }, { "epoch": 0.11019486801492606, "grad_norm": 1.0445742808478182, "learning_rate": 4.9506729173618675e-06, "loss": 0.1819, "step": 1196 }, { "epoch": 0.11028700419219607, "grad_norm": 0.8806288475750527, "learning_rate": 4.950522224026012e-06, "loss": 0.1729, "step": 1197 }, { "epoch": 0.11037914036946607, "grad_norm": 1.076709708388022, "learning_rate": 4.950371303158905e-06, "loss": 0.1789, "step": 1198 }, { "epoch": 0.11047127654673608, "grad_norm": 0.9229384393059257, "learning_rate": 4.950220154774559e-06, "loss": 0.1733, "step": 1199 }, { "epoch": 0.11056341272400608, "grad_norm": 0.9186878701388156, "learning_rate": 4.950068778887007e-06, "loss": 0.1753, "step": 1200 }, { "epoch": 0.11065554890127609, "grad_norm": 0.9700339527796721, "learning_rate": 4.949917175510307e-06, "loss": 0.1912, "step": 1201 }, { "epoch": 0.11074768507854608, "grad_norm": 0.9264578935919071, "learning_rate": 4.949765344658532e-06, "loss": 0.1807, "step": 1202 }, { "epoch": 0.1108398212558161, "grad_norm": 0.9953730707901259, "learning_rate": 4.949613286345781e-06, "loss": 0.1897, "step": 1203 }, { "epoch": 0.1109319574330861, "grad_norm": 0.9958814097903571, "learning_rate": 4.9494610005861745e-06, "loss": 0.1855, "step": 1204 }, { "epoch": 0.1110240936103561, "grad_norm": 0.9974041687614713, "learning_rate": 4.949308487393849e-06, "loss": 0.1887, "step": 1205 }, { "epoch": 0.11111622978762611, "grad_norm": 1.049073106143341, "learning_rate": 4.949155746782966e-06, "loss": 0.2123, "step": 1206 }, { "epoch": 0.11120836596489611, "grad_norm": 0.9442037997859811, "learning_rate": 4.94900277876771e-06, "loss": 0.1698, "step": 1207 }, { "epoch": 0.11130050214216612, "grad_norm": 1.0030492263525004, "learning_rate": 4.948849583362282e-06, "loss": 0.1939, "step": 1208 }, { "epoch": 0.11139263831943613, "grad_norm": 1.0918873927109156, "learning_rate": 4.948696160580907e-06, "loss": 0.2061, "step": 1209 }, { "epoch": 0.11148477449670613, "grad_norm": 0.9850049521735987, "learning_rate": 4.948542510437829e-06, "loss": 0.1791, "step": 1210 }, { "epoch": 0.11157691067397614, "grad_norm": 0.8914351041716434, "learning_rate": 4.948388632947316e-06, "loss": 0.1618, "step": 1211 }, { "epoch": 0.11166904685124614, "grad_norm": 0.9481190935623166, "learning_rate": 4.948234528123655e-06, "loss": 0.1926, "step": 1212 }, { "epoch": 0.11176118302851615, "grad_norm": 1.0067784660769212, "learning_rate": 4.948080195981154e-06, "loss": 0.1871, "step": 1213 }, { "epoch": 0.11185331920578616, "grad_norm": 0.962562766383318, "learning_rate": 4.947925636534144e-06, "loss": 0.1781, "step": 1214 }, { "epoch": 0.11194545538305616, "grad_norm": 1.0027653781996462, "learning_rate": 4.947770849796975e-06, "loss": 0.1888, "step": 1215 }, { "epoch": 0.11203759156032617, "grad_norm": 0.9736579394329236, "learning_rate": 4.9476158357840194e-06, "loss": 0.1795, "step": 1216 }, { "epoch": 0.11212972773759616, "grad_norm": 1.0500111563474652, "learning_rate": 4.9474605945096695e-06, "loss": 0.2044, "step": 1217 }, { "epoch": 0.11222186391486617, "grad_norm": 1.0505396188236562, "learning_rate": 4.94730512598834e-06, "loss": 0.1849, "step": 1218 }, { "epoch": 0.11231400009213617, "grad_norm": 0.9764803395123072, "learning_rate": 4.947149430234467e-06, "loss": 0.1906, "step": 1219 }, { "epoch": 0.11240613626940618, "grad_norm": 1.0016094631221018, "learning_rate": 4.946993507262505e-06, "loss": 0.1858, "step": 1220 }, { "epoch": 0.11249827244667619, "grad_norm": 0.9867478645758117, "learning_rate": 4.946837357086933e-06, "loss": 0.1871, "step": 1221 }, { "epoch": 0.11259040862394619, "grad_norm": 1.0536305452882067, "learning_rate": 4.946680979722249e-06, "loss": 0.2072, "step": 1222 }, { "epoch": 0.1126825448012162, "grad_norm": 1.0001369286623907, "learning_rate": 4.946524375182973e-06, "loss": 0.1849, "step": 1223 }, { "epoch": 0.1127746809784862, "grad_norm": 0.9590582772180609, "learning_rate": 4.946367543483645e-06, "loss": 0.1948, "step": 1224 }, { "epoch": 0.11286681715575621, "grad_norm": 1.0317577767091315, "learning_rate": 4.946210484638827e-06, "loss": 0.1842, "step": 1225 }, { "epoch": 0.11295895333302622, "grad_norm": 0.9817570764988467, "learning_rate": 4.946053198663103e-06, "loss": 0.1647, "step": 1226 }, { "epoch": 0.11305108951029622, "grad_norm": 1.0826573918808229, "learning_rate": 4.945895685571076e-06, "loss": 0.2078, "step": 1227 }, { "epoch": 0.11314322568756623, "grad_norm": 0.9688980456746177, "learning_rate": 4.945737945377372e-06, "loss": 0.1812, "step": 1228 }, { "epoch": 0.11323536186483622, "grad_norm": 0.992886307038925, "learning_rate": 4.945579978096635e-06, "loss": 0.1841, "step": 1229 }, { "epoch": 0.11332749804210623, "grad_norm": 1.0512449225985512, "learning_rate": 4.945421783743535e-06, "loss": 0.1818, "step": 1230 }, { "epoch": 0.11341963421937624, "grad_norm": 1.0098209511209224, "learning_rate": 4.945263362332759e-06, "loss": 0.1857, "step": 1231 }, { "epoch": 0.11351177039664624, "grad_norm": 1.1196581888871462, "learning_rate": 4.945104713879017e-06, "loss": 0.1891, "step": 1232 }, { "epoch": 0.11360390657391625, "grad_norm": 0.933560568750918, "learning_rate": 4.9449458383970386e-06, "loss": 0.1791, "step": 1233 }, { "epoch": 0.11369604275118625, "grad_norm": 1.0192174357061985, "learning_rate": 4.944786735901576e-06, "loss": 0.1794, "step": 1234 }, { "epoch": 0.11378817892845626, "grad_norm": 1.0521293689124385, "learning_rate": 4.944627406407401e-06, "loss": 0.1932, "step": 1235 }, { "epoch": 0.11388031510572626, "grad_norm": 0.9583785459922292, "learning_rate": 4.94446784992931e-06, "loss": 0.17, "step": 1236 }, { "epoch": 0.11397245128299627, "grad_norm": 1.050868615441966, "learning_rate": 4.9443080664821156e-06, "loss": 0.1939, "step": 1237 }, { "epoch": 0.11406458746026628, "grad_norm": 1.0764625733452038, "learning_rate": 4.944148056080654e-06, "loss": 0.1994, "step": 1238 }, { "epoch": 0.11415672363753628, "grad_norm": 1.0277252761190465, "learning_rate": 4.943987818739782e-06, "loss": 0.1876, "step": 1239 }, { "epoch": 0.11424885981480629, "grad_norm": 1.0350109470595392, "learning_rate": 4.943827354474378e-06, "loss": 0.1894, "step": 1240 }, { "epoch": 0.11434099599207628, "grad_norm": 1.0146682594155343, "learning_rate": 4.943666663299341e-06, "loss": 0.1729, "step": 1241 }, { "epoch": 0.1144331321693463, "grad_norm": 1.028187292870586, "learning_rate": 4.943505745229592e-06, "loss": 0.1855, "step": 1242 }, { "epoch": 0.1145252683466163, "grad_norm": 1.099924483199594, "learning_rate": 4.943344600280071e-06, "loss": 0.2035, "step": 1243 }, { "epoch": 0.1146174045238863, "grad_norm": 1.1454243464465912, "learning_rate": 4.943183228465742e-06, "loss": 0.1993, "step": 1244 }, { "epoch": 0.11470954070115631, "grad_norm": 0.9485855285325462, "learning_rate": 4.943021629801586e-06, "loss": 0.1691, "step": 1245 }, { "epoch": 0.11480167687842631, "grad_norm": 1.0626072276859084, "learning_rate": 4.9428598043026085e-06, "loss": 0.1846, "step": 1246 }, { "epoch": 0.11489381305569632, "grad_norm": 0.9269346524433504, "learning_rate": 4.942697751983837e-06, "loss": 0.1773, "step": 1247 }, { "epoch": 0.11498594923296633, "grad_norm": 0.9535100536348952, "learning_rate": 4.942535472860315e-06, "loss": 0.1787, "step": 1248 }, { "epoch": 0.11507808541023633, "grad_norm": 1.0023957002528299, "learning_rate": 4.942372966947112e-06, "loss": 0.1866, "step": 1249 }, { "epoch": 0.11517022158750634, "grad_norm": 0.994322779502017, "learning_rate": 4.942210234259316e-06, "loss": 0.1778, "step": 1250 }, { "epoch": 0.11526235776477634, "grad_norm": 0.9575100289627279, "learning_rate": 4.9420472748120365e-06, "loss": 0.1941, "step": 1251 }, { "epoch": 0.11535449394204635, "grad_norm": 0.9484403113363623, "learning_rate": 4.941884088620405e-06, "loss": 0.1731, "step": 1252 }, { "epoch": 0.11544663011931634, "grad_norm": 1.0017289164577379, "learning_rate": 4.941720675699573e-06, "loss": 0.1819, "step": 1253 }, { "epoch": 0.11553876629658635, "grad_norm": 0.9694215635786955, "learning_rate": 4.941557036064714e-06, "loss": 0.1838, "step": 1254 }, { "epoch": 0.11563090247385636, "grad_norm": 1.021592492535489, "learning_rate": 4.9413931697310215e-06, "loss": 0.197, "step": 1255 }, { "epoch": 0.11572303865112636, "grad_norm": 1.0982720789156573, "learning_rate": 4.941229076713709e-06, "loss": 0.1999, "step": 1256 }, { "epoch": 0.11581517482839637, "grad_norm": 0.9286929084767928, "learning_rate": 4.9410647570280156e-06, "loss": 0.1699, "step": 1257 }, { "epoch": 0.11590731100566637, "grad_norm": 1.0284887867762864, "learning_rate": 4.940900210689196e-06, "loss": 0.1965, "step": 1258 }, { "epoch": 0.11599944718293638, "grad_norm": 1.0434528337767535, "learning_rate": 4.94073543771253e-06, "loss": 0.1971, "step": 1259 }, { "epoch": 0.11609158336020639, "grad_norm": 0.9721395364025261, "learning_rate": 4.940570438113315e-06, "loss": 0.1916, "step": 1260 }, { "epoch": 0.11618371953747639, "grad_norm": 1.026835672769356, "learning_rate": 4.940405211906872e-06, "loss": 0.19, "step": 1261 }, { "epoch": 0.1162758557147464, "grad_norm": 0.983921489168916, "learning_rate": 4.9402397591085435e-06, "loss": 0.1901, "step": 1262 }, { "epoch": 0.1163679918920164, "grad_norm": 1.006030490851194, "learning_rate": 4.94007407973369e-06, "loss": 0.2099, "step": 1263 }, { "epoch": 0.1164601280692864, "grad_norm": 0.946916078921123, "learning_rate": 4.939908173797696e-06, "loss": 0.1796, "step": 1264 }, { "epoch": 0.11655226424655642, "grad_norm": 0.9186429153859957, "learning_rate": 4.939742041315964e-06, "loss": 0.1764, "step": 1265 }, { "epoch": 0.11664440042382641, "grad_norm": 1.0162264460794295, "learning_rate": 4.939575682303923e-06, "loss": 0.1946, "step": 1266 }, { "epoch": 0.11673653660109642, "grad_norm": 0.939647214901781, "learning_rate": 4.939409096777017e-06, "loss": 0.1816, "step": 1267 }, { "epoch": 0.11682867277836642, "grad_norm": 1.0127923569806176, "learning_rate": 4.939242284750712e-06, "loss": 0.1787, "step": 1268 }, { "epoch": 0.11692080895563643, "grad_norm": 0.9696949793271437, "learning_rate": 4.9390752462405e-06, "loss": 0.182, "step": 1269 }, { "epoch": 0.11701294513290643, "grad_norm": 1.0188460893399238, "learning_rate": 4.938907981261889e-06, "loss": 0.1925, "step": 1270 }, { "epoch": 0.11710508131017644, "grad_norm": 1.047535048711083, "learning_rate": 4.938740489830409e-06, "loss": 0.1858, "step": 1271 }, { "epoch": 0.11719721748744645, "grad_norm": 0.9839924846788682, "learning_rate": 4.938572771961612e-06, "loss": 0.193, "step": 1272 }, { "epoch": 0.11728935366471645, "grad_norm": 1.0763836514639684, "learning_rate": 4.93840482767107e-06, "loss": 0.1942, "step": 1273 }, { "epoch": 0.11738148984198646, "grad_norm": 0.9660646463485411, "learning_rate": 4.938236656974378e-06, "loss": 0.1703, "step": 1274 }, { "epoch": 0.11747362601925646, "grad_norm": 0.9776665253257636, "learning_rate": 4.9380682598871505e-06, "loss": 0.1821, "step": 1275 }, { "epoch": 0.11756576219652647, "grad_norm": 1.002267232847209, "learning_rate": 4.937899636425022e-06, "loss": 0.2, "step": 1276 }, { "epoch": 0.11765789837379648, "grad_norm": 0.9378913381320999, "learning_rate": 4.9377307866036506e-06, "loss": 0.1895, "step": 1277 }, { "epoch": 0.11775003455106647, "grad_norm": 0.9960120707110922, "learning_rate": 4.9375617104387124e-06, "loss": 0.1887, "step": 1278 }, { "epoch": 0.11784217072833648, "grad_norm": 1.0253843069745778, "learning_rate": 4.9373924079459076e-06, "loss": 0.1895, "step": 1279 }, { "epoch": 0.11793430690560648, "grad_norm": 0.9694205674452752, "learning_rate": 4.937222879140955e-06, "loss": 0.1948, "step": 1280 }, { "epoch": 0.11802644308287649, "grad_norm": 0.9340661317572511, "learning_rate": 4.937053124039597e-06, "loss": 0.1793, "step": 1281 }, { "epoch": 0.1181185792601465, "grad_norm": 1.0174735224747125, "learning_rate": 4.9368831426575925e-06, "loss": 0.182, "step": 1282 }, { "epoch": 0.1182107154374165, "grad_norm": 0.9767629316407481, "learning_rate": 4.9367129350107265e-06, "loss": 0.1837, "step": 1283 }, { "epoch": 0.11830285161468651, "grad_norm": 1.054908188076334, "learning_rate": 4.936542501114803e-06, "loss": 0.1852, "step": 1284 }, { "epoch": 0.11839498779195651, "grad_norm": 0.9595958456149299, "learning_rate": 4.936371840985645e-06, "loss": 0.17, "step": 1285 }, { "epoch": 0.11848712396922652, "grad_norm": 1.0916556011220866, "learning_rate": 4.9362009546391e-06, "loss": 0.1717, "step": 1286 }, { "epoch": 0.11857926014649652, "grad_norm": 1.0532332905680384, "learning_rate": 4.9360298420910335e-06, "loss": 0.1647, "step": 1287 }, { "epoch": 0.11867139632376653, "grad_norm": 1.069006714331314, "learning_rate": 4.935858503357335e-06, "loss": 0.1872, "step": 1288 }, { "epoch": 0.11876353250103654, "grad_norm": 1.0146730910464414, "learning_rate": 4.935686938453912e-06, "loss": 0.19, "step": 1289 }, { "epoch": 0.11885566867830653, "grad_norm": 1.1013136967926964, "learning_rate": 4.935515147396695e-06, "loss": 0.1942, "step": 1290 }, { "epoch": 0.11894780485557654, "grad_norm": 1.022490231526319, "learning_rate": 4.935343130201633e-06, "loss": 0.1719, "step": 1291 }, { "epoch": 0.11903994103284654, "grad_norm": 1.0736875280791962, "learning_rate": 4.935170886884701e-06, "loss": 0.1981, "step": 1292 }, { "epoch": 0.11913207721011655, "grad_norm": 1.0171104588512216, "learning_rate": 4.934998417461888e-06, "loss": 0.1832, "step": 1293 }, { "epoch": 0.11922421338738656, "grad_norm": 0.984348887268898, "learning_rate": 4.9348257219492116e-06, "loss": 0.1683, "step": 1294 }, { "epoch": 0.11931634956465656, "grad_norm": 1.0769453299087037, "learning_rate": 4.934652800362704e-06, "loss": 0.2061, "step": 1295 }, { "epoch": 0.11940848574192657, "grad_norm": 1.0409966331483476, "learning_rate": 4.934479652718422e-06, "loss": 0.1865, "step": 1296 }, { "epoch": 0.11950062191919657, "grad_norm": 1.046150491665991, "learning_rate": 4.934306279032442e-06, "loss": 0.1836, "step": 1297 }, { "epoch": 0.11959275809646658, "grad_norm": 1.0300935796451935, "learning_rate": 4.934132679320863e-06, "loss": 0.1818, "step": 1298 }, { "epoch": 0.11968489427373659, "grad_norm": 1.0223005476376819, "learning_rate": 4.933958853599803e-06, "loss": 0.2019, "step": 1299 }, { "epoch": 0.11977703045100659, "grad_norm": 0.9953052620305401, "learning_rate": 4.9337848018854005e-06, "loss": 0.191, "step": 1300 }, { "epoch": 0.1198691666282766, "grad_norm": 0.9167793138450512, "learning_rate": 4.933610524193817e-06, "loss": 0.1575, "step": 1301 }, { "epoch": 0.1199613028055466, "grad_norm": 0.9501468070448875, "learning_rate": 4.933436020541235e-06, "loss": 0.1935, "step": 1302 }, { "epoch": 0.1200534389828166, "grad_norm": 0.9908154738642798, "learning_rate": 4.933261290943856e-06, "loss": 0.1634, "step": 1303 }, { "epoch": 0.1201455751600866, "grad_norm": 1.0252708980694958, "learning_rate": 4.933086335417905e-06, "loss": 0.1912, "step": 1304 }, { "epoch": 0.12023771133735661, "grad_norm": 0.964216175527814, "learning_rate": 4.932911153979626e-06, "loss": 0.1788, "step": 1305 }, { "epoch": 0.12032984751462662, "grad_norm": 0.9947350149300663, "learning_rate": 4.932735746645284e-06, "loss": 0.1872, "step": 1306 }, { "epoch": 0.12042198369189662, "grad_norm": 1.0222442690970204, "learning_rate": 4.9325601134311665e-06, "loss": 0.1716, "step": 1307 }, { "epoch": 0.12051411986916663, "grad_norm": 0.9714762563929501, "learning_rate": 4.932384254353581e-06, "loss": 0.2076, "step": 1308 }, { "epoch": 0.12060625604643663, "grad_norm": 1.0023546871765083, "learning_rate": 4.932208169428855e-06, "loss": 0.1822, "step": 1309 }, { "epoch": 0.12069839222370664, "grad_norm": 0.9454582559482887, "learning_rate": 4.932031858673338e-06, "loss": 0.1814, "step": 1310 }, { "epoch": 0.12079052840097665, "grad_norm": 0.9685169957367546, "learning_rate": 4.931855322103403e-06, "loss": 0.1932, "step": 1311 }, { "epoch": 0.12088266457824665, "grad_norm": 0.9805185074459721, "learning_rate": 4.9316785597354385e-06, "loss": 0.1805, "step": 1312 }, { "epoch": 0.12097480075551666, "grad_norm": 0.9804392769335055, "learning_rate": 4.931501571585858e-06, "loss": 0.1845, "step": 1313 }, { "epoch": 0.12106693693278665, "grad_norm": 1.00322430997519, "learning_rate": 4.931324357671095e-06, "loss": 0.1851, "step": 1314 }, { "epoch": 0.12115907311005666, "grad_norm": 0.9328218448764897, "learning_rate": 4.931146918007604e-06, "loss": 0.1836, "step": 1315 }, { "epoch": 0.12125120928732668, "grad_norm": 0.9179723457446634, "learning_rate": 4.93096925261186e-06, "loss": 0.1784, "step": 1316 }, { "epoch": 0.12134334546459667, "grad_norm": 1.0730735318120903, "learning_rate": 4.930791361500359e-06, "loss": 0.1995, "step": 1317 }, { "epoch": 0.12143548164186668, "grad_norm": 1.0073445260759302, "learning_rate": 4.930613244689618e-06, "loss": 0.1876, "step": 1318 }, { "epoch": 0.12152761781913668, "grad_norm": 0.9666047869313055, "learning_rate": 4.930434902196177e-06, "loss": 0.1844, "step": 1319 }, { "epoch": 0.12161975399640669, "grad_norm": 0.9566473238654624, "learning_rate": 4.930256334036593e-06, "loss": 0.1834, "step": 1320 }, { "epoch": 0.12171189017367669, "grad_norm": 1.0697107399386463, "learning_rate": 4.930077540227447e-06, "loss": 0.1792, "step": 1321 }, { "epoch": 0.1218040263509467, "grad_norm": 0.9209255293099364, "learning_rate": 4.92989852078534e-06, "loss": 0.1636, "step": 1322 }, { "epoch": 0.12189616252821671, "grad_norm": 1.1125165716627952, "learning_rate": 4.929719275726893e-06, "loss": 0.1838, "step": 1323 }, { "epoch": 0.1219882987054867, "grad_norm": 0.9940504687221988, "learning_rate": 4.9295398050687505e-06, "loss": 0.1737, "step": 1324 }, { "epoch": 0.12208043488275672, "grad_norm": 1.0103196274970314, "learning_rate": 4.929360108827575e-06, "loss": 0.1867, "step": 1325 }, { "epoch": 0.12217257106002671, "grad_norm": 1.0723418698938951, "learning_rate": 4.929180187020053e-06, "loss": 0.1873, "step": 1326 }, { "epoch": 0.12226470723729672, "grad_norm": 0.9596676997934364, "learning_rate": 4.9290000396628875e-06, "loss": 0.1845, "step": 1327 }, { "epoch": 0.12235684341456673, "grad_norm": 0.9962848010523647, "learning_rate": 4.928819666772808e-06, "loss": 0.1789, "step": 1328 }, { "epoch": 0.12244897959183673, "grad_norm": 1.0363419257999569, "learning_rate": 4.9286390683665615e-06, "loss": 0.1886, "step": 1329 }, { "epoch": 0.12254111576910674, "grad_norm": 0.9278551228569101, "learning_rate": 4.9284582444609156e-06, "loss": 0.1816, "step": 1330 }, { "epoch": 0.12263325194637674, "grad_norm": 1.0265744616787111, "learning_rate": 4.9282771950726605e-06, "loss": 0.1864, "step": 1331 }, { "epoch": 0.12272538812364675, "grad_norm": 0.9152490785424434, "learning_rate": 4.928095920218606e-06, "loss": 0.1797, "step": 1332 }, { "epoch": 0.12281752430091676, "grad_norm": 0.8831438931297253, "learning_rate": 4.927914419915585e-06, "loss": 0.1858, "step": 1333 }, { "epoch": 0.12290966047818676, "grad_norm": 0.9599196224749852, "learning_rate": 4.927732694180448e-06, "loss": 0.1894, "step": 1334 }, { "epoch": 0.12300179665545677, "grad_norm": 0.9345601055819366, "learning_rate": 4.9275507430300694e-06, "loss": 0.19, "step": 1335 }, { "epoch": 0.12309393283272677, "grad_norm": 0.9531446635108866, "learning_rate": 4.927368566481343e-06, "loss": 0.1768, "step": 1336 }, { "epoch": 0.12318606900999678, "grad_norm": 0.8954454298056077, "learning_rate": 4.927186164551184e-06, "loss": 0.1661, "step": 1337 }, { "epoch": 0.12327820518726679, "grad_norm": 1.0406211772874898, "learning_rate": 4.927003537256528e-06, "loss": 0.1896, "step": 1338 }, { "epoch": 0.12337034136453678, "grad_norm": 0.9427072884318984, "learning_rate": 4.926820684614333e-06, "loss": 0.1783, "step": 1339 }, { "epoch": 0.1234624775418068, "grad_norm": 1.063398218693465, "learning_rate": 4.9266376066415764e-06, "loss": 0.2, "step": 1340 }, { "epoch": 0.12355461371907679, "grad_norm": 1.014467337241986, "learning_rate": 4.926454303355256e-06, "loss": 0.1778, "step": 1341 }, { "epoch": 0.1236467498963468, "grad_norm": 0.9867211771794864, "learning_rate": 4.926270774772392e-06, "loss": 0.1868, "step": 1342 }, { "epoch": 0.1237388860736168, "grad_norm": 0.9314537671161427, "learning_rate": 4.926087020910027e-06, "loss": 0.1689, "step": 1343 }, { "epoch": 0.12383102225088681, "grad_norm": 0.9945656614235373, "learning_rate": 4.925903041785221e-06, "loss": 0.1915, "step": 1344 }, { "epoch": 0.12392315842815682, "grad_norm": 1.054070147484718, "learning_rate": 4.925718837415055e-06, "loss": 0.1874, "step": 1345 }, { "epoch": 0.12401529460542682, "grad_norm": 1.003037762311901, "learning_rate": 4.925534407816634e-06, "loss": 0.1769, "step": 1346 }, { "epoch": 0.12410743078269683, "grad_norm": 0.9398314757426698, "learning_rate": 4.925349753007083e-06, "loss": 0.1737, "step": 1347 }, { "epoch": 0.12419956695996683, "grad_norm": 1.0109341517521457, "learning_rate": 4.925164873003546e-06, "loss": 0.184, "step": 1348 }, { "epoch": 0.12429170313723684, "grad_norm": 1.045989030595084, "learning_rate": 4.92497976782319e-06, "loss": 0.1924, "step": 1349 }, { "epoch": 0.12438383931450685, "grad_norm": 0.9613180803344651, "learning_rate": 4.924794437483202e-06, "loss": 0.1723, "step": 1350 }, { "epoch": 0.12447597549177684, "grad_norm": 1.0326785619529568, "learning_rate": 4.924608882000789e-06, "loss": 0.1987, "step": 1351 }, { "epoch": 0.12456811166904685, "grad_norm": 0.9158713928345703, "learning_rate": 4.92442310139318e-06, "loss": 0.1765, "step": 1352 }, { "epoch": 0.12466024784631685, "grad_norm": 0.9681906696968335, "learning_rate": 4.924237095677625e-06, "loss": 0.1783, "step": 1353 }, { "epoch": 0.12475238402358686, "grad_norm": 1.0316433171773627, "learning_rate": 4.924050864871396e-06, "loss": 0.191, "step": 1354 }, { "epoch": 0.12484452020085687, "grad_norm": 1.0732753608787855, "learning_rate": 4.923864408991782e-06, "loss": 0.1873, "step": 1355 }, { "epoch": 0.12493665637812687, "grad_norm": 1.0115088181506764, "learning_rate": 4.923677728056098e-06, "loss": 0.184, "step": 1356 }, { "epoch": 0.12502879255539687, "grad_norm": 0.9831921194489737, "learning_rate": 4.923490822081675e-06, "loss": 0.1831, "step": 1357 }, { "epoch": 0.1251209287326669, "grad_norm": 0.9368075813037634, "learning_rate": 4.923303691085869e-06, "loss": 0.1716, "step": 1358 }, { "epoch": 0.1252130649099369, "grad_norm": 0.9158976927466699, "learning_rate": 4.9231163350860535e-06, "loss": 0.1734, "step": 1359 }, { "epoch": 0.12530520108720689, "grad_norm": 0.9316616396852874, "learning_rate": 4.922928754099626e-06, "loss": 0.1706, "step": 1360 }, { "epoch": 0.1253973372644769, "grad_norm": 1.1250088621018242, "learning_rate": 4.9227409481440034e-06, "loss": 0.2038, "step": 1361 }, { "epoch": 0.1254894734417469, "grad_norm": 0.9675671851618894, "learning_rate": 4.922552917236622e-06, "loss": 0.1746, "step": 1362 }, { "epoch": 0.1255816096190169, "grad_norm": 0.9821306778029524, "learning_rate": 4.922364661394943e-06, "loss": 0.1879, "step": 1363 }, { "epoch": 0.1256737457962869, "grad_norm": 0.896728621906593, "learning_rate": 4.922176180636443e-06, "loss": 0.1632, "step": 1364 }, { "epoch": 0.12576588197355693, "grad_norm": 0.9697621684195716, "learning_rate": 4.921987474978626e-06, "loss": 0.1888, "step": 1365 }, { "epoch": 0.12585801815082692, "grad_norm": 0.9522967075080769, "learning_rate": 4.921798544439009e-06, "loss": 0.1875, "step": 1366 }, { "epoch": 0.12595015432809692, "grad_norm": 1.051166642356011, "learning_rate": 4.921609389035138e-06, "loss": 0.1997, "step": 1367 }, { "epoch": 0.12604229050536694, "grad_norm": 1.0744627934977156, "learning_rate": 4.921420008784573e-06, "loss": 0.1905, "step": 1368 }, { "epoch": 0.12613442668263694, "grad_norm": 0.9960260593456565, "learning_rate": 4.9212304037049015e-06, "loss": 0.1829, "step": 1369 }, { "epoch": 0.12622656285990694, "grad_norm": 1.0202692128949833, "learning_rate": 4.921040573813726e-06, "loss": 0.1732, "step": 1370 }, { "epoch": 0.12631869903717693, "grad_norm": 1.0365842879051643, "learning_rate": 4.9208505191286714e-06, "loss": 0.1855, "step": 1371 }, { "epoch": 0.12641083521444696, "grad_norm": 1.1591917314275566, "learning_rate": 4.920660239667387e-06, "loss": 0.209, "step": 1372 }, { "epoch": 0.12650297139171696, "grad_norm": 0.9993800116845046, "learning_rate": 4.920469735447538e-06, "loss": 0.1883, "step": 1373 }, { "epoch": 0.12659510756898695, "grad_norm": 0.9608088730057035, "learning_rate": 4.920279006486815e-06, "loss": 0.1812, "step": 1374 }, { "epoch": 0.12668724374625698, "grad_norm": 0.9592626621524494, "learning_rate": 4.920088052802924e-06, "loss": 0.1908, "step": 1375 }, { "epoch": 0.12677937992352697, "grad_norm": 0.9979910451476817, "learning_rate": 4.919896874413597e-06, "loss": 0.1756, "step": 1376 }, { "epoch": 0.12687151610079697, "grad_norm": 0.9705279536103502, "learning_rate": 4.919705471336585e-06, "loss": 0.1696, "step": 1377 }, { "epoch": 0.126963652278067, "grad_norm": 1.02529637426661, "learning_rate": 4.919513843589661e-06, "loss": 0.1979, "step": 1378 }, { "epoch": 0.127055788455337, "grad_norm": 0.9709462058488644, "learning_rate": 4.919321991190614e-06, "loss": 0.1803, "step": 1379 }, { "epoch": 0.127147924632607, "grad_norm": 1.072594825084783, "learning_rate": 4.919129914157261e-06, "loss": 0.1933, "step": 1380 }, { "epoch": 0.127240060809877, "grad_norm": 1.1100361520927327, "learning_rate": 4.918937612507435e-06, "loss": 0.1769, "step": 1381 }, { "epoch": 0.127332196987147, "grad_norm": 0.9545515898210492, "learning_rate": 4.918745086258992e-06, "loss": 0.1717, "step": 1382 }, { "epoch": 0.127424333164417, "grad_norm": 0.9027879802968097, "learning_rate": 4.918552335429806e-06, "loss": 0.1644, "step": 1383 }, { "epoch": 0.127516469341687, "grad_norm": 0.9607434821952252, "learning_rate": 4.918359360037776e-06, "loss": 0.1759, "step": 1384 }, { "epoch": 0.12760860551895703, "grad_norm": 1.0212850726584626, "learning_rate": 4.918166160100819e-06, "loss": 0.1868, "step": 1385 }, { "epoch": 0.12770074169622703, "grad_norm": 0.9851648999704009, "learning_rate": 4.917972735636875e-06, "loss": 0.1792, "step": 1386 }, { "epoch": 0.12779287787349702, "grad_norm": 0.9860306797823638, "learning_rate": 4.9177790866639005e-06, "loss": 0.1576, "step": 1387 }, { "epoch": 0.12788501405076702, "grad_norm": 0.9616676315179897, "learning_rate": 4.917585213199878e-06, "loss": 0.1748, "step": 1388 }, { "epoch": 0.12797715022803705, "grad_norm": 1.0519830739673464, "learning_rate": 4.9173911152628095e-06, "loss": 0.202, "step": 1389 }, { "epoch": 0.12806928640530704, "grad_norm": 1.0777062892635907, "learning_rate": 4.917196792870715e-06, "loss": 0.1892, "step": 1390 }, { "epoch": 0.12816142258257704, "grad_norm": 0.9991283645825584, "learning_rate": 4.917002246041638e-06, "loss": 0.2017, "step": 1391 }, { "epoch": 0.12825355875984706, "grad_norm": 1.0345531811177093, "learning_rate": 4.916807474793643e-06, "loss": 0.2083, "step": 1392 }, { "epoch": 0.12834569493711706, "grad_norm": 0.9962307459016241, "learning_rate": 4.916612479144812e-06, "loss": 0.1898, "step": 1393 }, { "epoch": 0.12843783111438706, "grad_norm": 0.9750941523903573, "learning_rate": 4.916417259113254e-06, "loss": 0.186, "step": 1394 }, { "epoch": 0.12852996729165708, "grad_norm": 1.0734279770420394, "learning_rate": 4.916221814717092e-06, "loss": 0.1941, "step": 1395 }, { "epoch": 0.12862210346892708, "grad_norm": 1.1087277465598613, "learning_rate": 4.916026145974476e-06, "loss": 0.1863, "step": 1396 }, { "epoch": 0.12871423964619708, "grad_norm": 0.970503983339328, "learning_rate": 4.915830252903572e-06, "loss": 0.1809, "step": 1397 }, { "epoch": 0.12880637582346707, "grad_norm": 1.0270549073779653, "learning_rate": 4.915634135522569e-06, "loss": 0.1737, "step": 1398 }, { "epoch": 0.1288985120007371, "grad_norm": 1.017718130375212, "learning_rate": 4.915437793849676e-06, "loss": 0.1936, "step": 1399 }, { "epoch": 0.1289906481780071, "grad_norm": 0.9258354769873366, "learning_rate": 4.915241227903125e-06, "loss": 0.176, "step": 1400 }, { "epoch": 0.1290827843552771, "grad_norm": 1.1632022523711172, "learning_rate": 4.915044437701165e-06, "loss": 0.193, "step": 1401 }, { "epoch": 0.12917492053254712, "grad_norm": 0.9909882624273187, "learning_rate": 4.914847423262069e-06, "loss": 0.1772, "step": 1402 }, { "epoch": 0.1292670567098171, "grad_norm": 1.0227967032568668, "learning_rate": 4.9146501846041304e-06, "loss": 0.1813, "step": 1403 }, { "epoch": 0.1293591928870871, "grad_norm": 1.023350428596177, "learning_rate": 4.914452721745662e-06, "loss": 0.1751, "step": 1404 }, { "epoch": 0.1294513290643571, "grad_norm": 1.0953277903910312, "learning_rate": 4.914255034704998e-06, "loss": 0.2063, "step": 1405 }, { "epoch": 0.12954346524162713, "grad_norm": 1.0076641007495757, "learning_rate": 4.914057123500495e-06, "loss": 0.1864, "step": 1406 }, { "epoch": 0.12963560141889713, "grad_norm": 1.0569146161299305, "learning_rate": 4.913858988150528e-06, "loss": 0.1817, "step": 1407 }, { "epoch": 0.12972773759616713, "grad_norm": 1.0438485163527682, "learning_rate": 4.9136606286734945e-06, "loss": 0.1638, "step": 1408 }, { "epoch": 0.12981987377343715, "grad_norm": 0.9678728108018867, "learning_rate": 4.913462045087811e-06, "loss": 0.1735, "step": 1409 }, { "epoch": 0.12991200995070715, "grad_norm": 1.0572497449704699, "learning_rate": 4.9132632374119185e-06, "loss": 0.1859, "step": 1410 }, { "epoch": 0.13000414612797714, "grad_norm": 1.1213963454815326, "learning_rate": 4.913064205664273e-06, "loss": 0.18, "step": 1411 }, { "epoch": 0.13009628230524717, "grad_norm": 1.0531509919261324, "learning_rate": 4.912864949863358e-06, "loss": 0.1701, "step": 1412 }, { "epoch": 0.13018841848251717, "grad_norm": 1.0597007287157376, "learning_rate": 4.912665470027671e-06, "loss": 0.1975, "step": 1413 }, { "epoch": 0.13028055465978716, "grad_norm": 1.0209805064275213, "learning_rate": 4.912465766175736e-06, "loss": 0.1686, "step": 1414 }, { "epoch": 0.13037269083705716, "grad_norm": 0.9354404787118893, "learning_rate": 4.912265838326095e-06, "loss": 0.1649, "step": 1415 }, { "epoch": 0.13046482701432718, "grad_norm": 0.9685000508425466, "learning_rate": 4.912065686497312e-06, "loss": 0.1792, "step": 1416 }, { "epoch": 0.13055696319159718, "grad_norm": 0.9793173080999195, "learning_rate": 4.91186531070797e-06, "loss": 0.1733, "step": 1417 }, { "epoch": 0.13064909936886718, "grad_norm": 1.0267034852588974, "learning_rate": 4.911664710976674e-06, "loss": 0.1911, "step": 1418 }, { "epoch": 0.1307412355461372, "grad_norm": 1.0679568316502386, "learning_rate": 4.91146388732205e-06, "loss": 0.1959, "step": 1419 }, { "epoch": 0.1308333717234072, "grad_norm": 1.043916463767155, "learning_rate": 4.911262839762745e-06, "loss": 0.1844, "step": 1420 }, { "epoch": 0.1309255079006772, "grad_norm": 1.1426541467661104, "learning_rate": 4.911061568317425e-06, "loss": 0.1839, "step": 1421 }, { "epoch": 0.1310176440779472, "grad_norm": 1.0670712794431116, "learning_rate": 4.910860073004779e-06, "loss": 0.1893, "step": 1422 }, { "epoch": 0.13110978025521722, "grad_norm": 1.0217657650798537, "learning_rate": 4.910658353843517e-06, "loss": 0.1757, "step": 1423 }, { "epoch": 0.13120191643248721, "grad_norm": 1.042824076262155, "learning_rate": 4.910456410852367e-06, "loss": 0.163, "step": 1424 }, { "epoch": 0.1312940526097572, "grad_norm": 0.956885833442343, "learning_rate": 4.91025424405008e-06, "loss": 0.1723, "step": 1425 }, { "epoch": 0.13138618878702724, "grad_norm": 1.1449966036167125, "learning_rate": 4.910051853455426e-06, "loss": 0.1911, "step": 1426 }, { "epoch": 0.13147832496429723, "grad_norm": 1.036287110180777, "learning_rate": 4.909849239087199e-06, "loss": 0.1988, "step": 1427 }, { "epoch": 0.13157046114156723, "grad_norm": 0.9948309049567811, "learning_rate": 4.90964640096421e-06, "loss": 0.1951, "step": 1428 }, { "epoch": 0.13166259731883725, "grad_norm": 1.0844421392628185, "learning_rate": 4.9094433391052935e-06, "loss": 0.1812, "step": 1429 }, { "epoch": 0.13175473349610725, "grad_norm": 1.0495914214075481, "learning_rate": 4.909240053529304e-06, "loss": 0.1985, "step": 1430 }, { "epoch": 0.13184686967337725, "grad_norm": 0.9651180837150052, "learning_rate": 4.909036544255116e-06, "loss": 0.1702, "step": 1431 }, { "epoch": 0.13193900585064725, "grad_norm": 0.9955966957325579, "learning_rate": 4.908832811301626e-06, "loss": 0.1781, "step": 1432 }, { "epoch": 0.13203114202791727, "grad_norm": 1.0035279832126653, "learning_rate": 4.90862885468775e-06, "loss": 0.1743, "step": 1433 }, { "epoch": 0.13212327820518727, "grad_norm": 1.0358847851511948, "learning_rate": 4.908424674432425e-06, "loss": 0.1895, "step": 1434 }, { "epoch": 0.13221541438245726, "grad_norm": 0.9875222025539104, "learning_rate": 4.908220270554611e-06, "loss": 0.1809, "step": 1435 }, { "epoch": 0.1323075505597273, "grad_norm": 1.0278508432913862, "learning_rate": 4.908015643073285e-06, "loss": 0.1833, "step": 1436 }, { "epoch": 0.13239968673699729, "grad_norm": 1.0273581077755105, "learning_rate": 4.907810792007447e-06, "loss": 0.1984, "step": 1437 }, { "epoch": 0.13249182291426728, "grad_norm": 1.0560009720633898, "learning_rate": 4.907605717376118e-06, "loss": 0.1864, "step": 1438 }, { "epoch": 0.13258395909153728, "grad_norm": 0.9626044030041234, "learning_rate": 4.90740041919834e-06, "loss": 0.1758, "step": 1439 }, { "epoch": 0.1326760952688073, "grad_norm": 0.9672689247849761, "learning_rate": 4.907194897493173e-06, "loss": 0.1771, "step": 1440 }, { "epoch": 0.1327682314460773, "grad_norm": 1.0450504004609606, "learning_rate": 4.906989152279701e-06, "loss": 0.1905, "step": 1441 }, { "epoch": 0.1328603676233473, "grad_norm": 0.9482587626895994, "learning_rate": 4.9067831835770275e-06, "loss": 0.1738, "step": 1442 }, { "epoch": 0.13295250380061732, "grad_norm": 0.9463239494076845, "learning_rate": 4.906576991404276e-06, "loss": 0.1854, "step": 1443 }, { "epoch": 0.13304463997788732, "grad_norm": 1.0061874594493472, "learning_rate": 4.9063705757805915e-06, "loss": 0.1985, "step": 1444 }, { "epoch": 0.13313677615515732, "grad_norm": 0.9068357394587006, "learning_rate": 4.906163936725141e-06, "loss": 0.1595, "step": 1445 }, { "epoch": 0.13322891233242734, "grad_norm": 1.011124096617946, "learning_rate": 4.905957074257109e-06, "loss": 0.1716, "step": 1446 }, { "epoch": 0.13332104850969734, "grad_norm": 0.9635712912469494, "learning_rate": 4.905749988395704e-06, "loss": 0.1686, "step": 1447 }, { "epoch": 0.13341318468696733, "grad_norm": 0.9726084759736766, "learning_rate": 4.905542679160155e-06, "loss": 0.1682, "step": 1448 }, { "epoch": 0.13350532086423733, "grad_norm": 0.9982505363951202, "learning_rate": 4.905335146569707e-06, "loss": 0.1972, "step": 1449 }, { "epoch": 0.13359745704150736, "grad_norm": 1.0309683229900235, "learning_rate": 4.9051273906436335e-06, "loss": 0.1786, "step": 1450 }, { "epoch": 0.13368959321877735, "grad_norm": 1.014497711913621, "learning_rate": 4.904919411401222e-06, "loss": 0.1753, "step": 1451 }, { "epoch": 0.13378172939604735, "grad_norm": 0.9494034749604296, "learning_rate": 4.9047112088617855e-06, "loss": 0.1685, "step": 1452 }, { "epoch": 0.13387386557331737, "grad_norm": 0.9533159259651824, "learning_rate": 4.904502783044654e-06, "loss": 0.183, "step": 1453 }, { "epoch": 0.13396600175058737, "grad_norm": 1.0198352992862363, "learning_rate": 4.90429413396918e-06, "loss": 0.1997, "step": 1454 }, { "epoch": 0.13405813792785737, "grad_norm": 1.028575773022091, "learning_rate": 4.904085261654736e-06, "loss": 0.1874, "step": 1455 }, { "epoch": 0.13415027410512737, "grad_norm": 0.9643745703997726, "learning_rate": 4.903876166120718e-06, "loss": 0.186, "step": 1456 }, { "epoch": 0.1342424102823974, "grad_norm": 0.9918455781983645, "learning_rate": 4.903666847386539e-06, "loss": 0.1926, "step": 1457 }, { "epoch": 0.1343345464596674, "grad_norm": 1.0310036777983294, "learning_rate": 4.903457305471635e-06, "loss": 0.1888, "step": 1458 }, { "epoch": 0.13442668263693738, "grad_norm": 0.9804790007901808, "learning_rate": 4.90324754039546e-06, "loss": 0.1899, "step": 1459 }, { "epoch": 0.1345188188142074, "grad_norm": 0.873722915764326, "learning_rate": 4.903037552177494e-06, "loss": 0.1743, "step": 1460 }, { "epoch": 0.1346109549914774, "grad_norm": 0.9519299025740255, "learning_rate": 4.9028273408372315e-06, "loss": 0.1809, "step": 1461 }, { "epoch": 0.1347030911687474, "grad_norm": 0.9612320511182931, "learning_rate": 4.902616906394193e-06, "loss": 0.1636, "step": 1462 }, { "epoch": 0.13479522734601743, "grad_norm": 0.9937787642498676, "learning_rate": 4.9024062488679145e-06, "loss": 0.1743, "step": 1463 }, { "epoch": 0.13488736352328742, "grad_norm": 0.8938554298239793, "learning_rate": 4.9021953682779585e-06, "loss": 0.1561, "step": 1464 }, { "epoch": 0.13497949970055742, "grad_norm": 1.0398410959428166, "learning_rate": 4.901984264643904e-06, "loss": 0.1925, "step": 1465 }, { "epoch": 0.13507163587782742, "grad_norm": 1.0310209853854573, "learning_rate": 4.9017729379853515e-06, "loss": 0.1992, "step": 1466 }, { "epoch": 0.13516377205509744, "grad_norm": 1.0207343294570042, "learning_rate": 4.901561388321923e-06, "loss": 0.2045, "step": 1467 }, { "epoch": 0.13525590823236744, "grad_norm": 0.87864572441413, "learning_rate": 4.901349615673262e-06, "loss": 0.1572, "step": 1468 }, { "epoch": 0.13534804440963744, "grad_norm": 0.9196847849987159, "learning_rate": 4.90113762005903e-06, "loss": 0.1802, "step": 1469 }, { "epoch": 0.13544018058690746, "grad_norm": 0.9578454364771498, "learning_rate": 4.900925401498912e-06, "loss": 0.1858, "step": 1470 }, { "epoch": 0.13553231676417746, "grad_norm": 0.9488263656936775, "learning_rate": 4.900712960012612e-06, "loss": 0.1801, "step": 1471 }, { "epoch": 0.13562445294144745, "grad_norm": 0.9320142456071285, "learning_rate": 4.900500295619855e-06, "loss": 0.1808, "step": 1472 }, { "epoch": 0.13571658911871745, "grad_norm": 0.8749696112915272, "learning_rate": 4.900287408340387e-06, "loss": 0.1707, "step": 1473 }, { "epoch": 0.13580872529598748, "grad_norm": 0.9555507551898182, "learning_rate": 4.900074298193976e-06, "loss": 0.1826, "step": 1474 }, { "epoch": 0.13590086147325747, "grad_norm": 1.0200858753072042, "learning_rate": 4.899860965200407e-06, "loss": 0.1936, "step": 1475 }, { "epoch": 0.13599299765052747, "grad_norm": 1.046105205148264, "learning_rate": 4.89964740937949e-06, "loss": 0.1949, "step": 1476 }, { "epoch": 0.1360851338277975, "grad_norm": 0.9703684004608017, "learning_rate": 4.899433630751052e-06, "loss": 0.1812, "step": 1477 }, { "epoch": 0.1361772700050675, "grad_norm": 0.9358632265946936, "learning_rate": 4.8992196293349435e-06, "loss": 0.1692, "step": 1478 }, { "epoch": 0.1362694061823375, "grad_norm": 0.9421548138334379, "learning_rate": 4.899005405151034e-06, "loss": 0.173, "step": 1479 }, { "epoch": 0.1363615423596075, "grad_norm": 0.9874060354260804, "learning_rate": 4.898790958219215e-06, "loss": 0.1917, "step": 1480 }, { "epoch": 0.1364536785368775, "grad_norm": 1.0241768866081442, "learning_rate": 4.898576288559396e-06, "loss": 0.194, "step": 1481 }, { "epoch": 0.1365458147141475, "grad_norm": 0.967676077385443, "learning_rate": 4.898361396191512e-06, "loss": 0.1869, "step": 1482 }, { "epoch": 0.1366379508914175, "grad_norm": 1.063157649624893, "learning_rate": 4.898146281135514e-06, "loss": 0.1878, "step": 1483 }, { "epoch": 0.13673008706868753, "grad_norm": 1.0213338083271282, "learning_rate": 4.8979309434113745e-06, "loss": 0.171, "step": 1484 }, { "epoch": 0.13682222324595752, "grad_norm": 0.9752201110126685, "learning_rate": 4.89771538303909e-06, "loss": 0.176, "step": 1485 }, { "epoch": 0.13691435942322752, "grad_norm": 0.9992722204337339, "learning_rate": 4.897499600038673e-06, "loss": 0.1763, "step": 1486 }, { "epoch": 0.13700649560049755, "grad_norm": 0.9673495125040701, "learning_rate": 4.8972835944301615e-06, "loss": 0.181, "step": 1487 }, { "epoch": 0.13709863177776754, "grad_norm": 0.9172227987836397, "learning_rate": 4.89706736623361e-06, "loss": 0.1687, "step": 1488 }, { "epoch": 0.13719076795503754, "grad_norm": 0.9179738386385932, "learning_rate": 4.896850915469095e-06, "loss": 0.1708, "step": 1489 }, { "epoch": 0.13728290413230754, "grad_norm": 1.0866865421153868, "learning_rate": 4.896634242156715e-06, "loss": 0.1981, "step": 1490 }, { "epoch": 0.13737504030957756, "grad_norm": 1.1181657985966733, "learning_rate": 4.896417346316587e-06, "loss": 0.2006, "step": 1491 }, { "epoch": 0.13746717648684756, "grad_norm": 0.958948999492823, "learning_rate": 4.8962002279688514e-06, "loss": 0.176, "step": 1492 }, { "epoch": 0.13755931266411756, "grad_norm": 1.0453082102751459, "learning_rate": 4.8959828871336665e-06, "loss": 0.1824, "step": 1493 }, { "epoch": 0.13765144884138758, "grad_norm": 1.0704601468240302, "learning_rate": 4.895765323831212e-06, "loss": 0.1798, "step": 1494 }, { "epoch": 0.13774358501865758, "grad_norm": 1.046104125736917, "learning_rate": 4.895547538081691e-06, "loss": 0.1865, "step": 1495 }, { "epoch": 0.13783572119592757, "grad_norm": 0.9953218833475491, "learning_rate": 4.895329529905322e-06, "loss": 0.1798, "step": 1496 }, { "epoch": 0.1379278573731976, "grad_norm": 0.9993516892546119, "learning_rate": 4.895111299322348e-06, "loss": 0.1779, "step": 1497 }, { "epoch": 0.1380199935504676, "grad_norm": 1.1037371027487326, "learning_rate": 4.894892846353032e-06, "loss": 0.1718, "step": 1498 }, { "epoch": 0.1381121297277376, "grad_norm": 1.1042776255985305, "learning_rate": 4.8946741710176584e-06, "loss": 0.1769, "step": 1499 }, { "epoch": 0.1382042659050076, "grad_norm": 1.0159415033155752, "learning_rate": 4.894455273336531e-06, "loss": 0.1963, "step": 1500 }, { "epoch": 0.1382042659050076, "eval_loss": 0.1832522302865982, "eval_runtime": 300.5966, "eval_samples_per_second": 23.344, "eval_steps_per_second": 2.921, "step": 1500 }, { "epoch": 0.13829640208227761, "grad_norm": 1.0590835289628433, "learning_rate": 4.894236153329972e-06, "loss": 0.1821, "step": 1501 }, { "epoch": 0.1383885382595476, "grad_norm": 1.0055100832100328, "learning_rate": 4.894016811018329e-06, "loss": 0.164, "step": 1502 }, { "epoch": 0.1384806744368176, "grad_norm": 1.031947823782199, "learning_rate": 4.893797246421968e-06, "loss": 0.1967, "step": 1503 }, { "epoch": 0.13857281061408763, "grad_norm": 0.9897293422122585, "learning_rate": 4.893577459561274e-06, "loss": 0.1844, "step": 1504 }, { "epoch": 0.13866494679135763, "grad_norm": 1.0213676397867222, "learning_rate": 4.893357450456657e-06, "loss": 0.1896, "step": 1505 }, { "epoch": 0.13875708296862763, "grad_norm": 1.0049304145032214, "learning_rate": 4.893137219128542e-06, "loss": 0.1719, "step": 1506 }, { "epoch": 0.13884921914589762, "grad_norm": 0.9605931957652998, "learning_rate": 4.892916765597378e-06, "loss": 0.1735, "step": 1507 }, { "epoch": 0.13894135532316765, "grad_norm": 1.09891001647981, "learning_rate": 4.892696089883636e-06, "loss": 0.2017, "step": 1508 }, { "epoch": 0.13903349150043764, "grad_norm": 1.0245227870445939, "learning_rate": 4.8924751920078045e-06, "loss": 0.1845, "step": 1509 }, { "epoch": 0.13912562767770764, "grad_norm": 0.9744899746698248, "learning_rate": 4.892254071990393e-06, "loss": 0.1673, "step": 1510 }, { "epoch": 0.13921776385497767, "grad_norm": 1.007033072682871, "learning_rate": 4.892032729851934e-06, "loss": 0.1638, "step": 1511 }, { "epoch": 0.13930990003224766, "grad_norm": 1.0239978553664408, "learning_rate": 4.891811165612979e-06, "loss": 0.2006, "step": 1512 }, { "epoch": 0.13940203620951766, "grad_norm": 0.9688980878806478, "learning_rate": 4.8915893792941e-06, "loss": 0.1741, "step": 1513 }, { "epoch": 0.13949417238678768, "grad_norm": 1.0124497462752957, "learning_rate": 4.891367370915889e-06, "loss": 0.1853, "step": 1514 }, { "epoch": 0.13958630856405768, "grad_norm": 0.9443507545978798, "learning_rate": 4.89114514049896e-06, "loss": 0.1853, "step": 1515 }, { "epoch": 0.13967844474132768, "grad_norm": 0.8944876191974487, "learning_rate": 4.890922688063949e-06, "loss": 0.1713, "step": 1516 }, { "epoch": 0.13977058091859768, "grad_norm": 0.9278038889701954, "learning_rate": 4.8907000136315075e-06, "loss": 0.159, "step": 1517 }, { "epoch": 0.1398627170958677, "grad_norm": 0.9544401348206267, "learning_rate": 4.890477117222313e-06, "loss": 0.1646, "step": 1518 }, { "epoch": 0.1399548532731377, "grad_norm": 0.993320836333389, "learning_rate": 4.890253998857061e-06, "loss": 0.1712, "step": 1519 }, { "epoch": 0.1400469894504077, "grad_norm": 0.9672368448981169, "learning_rate": 4.890030658556467e-06, "loss": 0.1763, "step": 1520 }, { "epoch": 0.14013912562767772, "grad_norm": 1.0007096297433211, "learning_rate": 4.88980709634127e-06, "loss": 0.1778, "step": 1521 }, { "epoch": 0.14023126180494772, "grad_norm": 1.0380589027334104, "learning_rate": 4.889583312232227e-06, "loss": 0.2014, "step": 1522 }, { "epoch": 0.1403233979822177, "grad_norm": 1.0112883370951264, "learning_rate": 4.889359306250117e-06, "loss": 0.173, "step": 1523 }, { "epoch": 0.1404155341594877, "grad_norm": 0.9158439308113651, "learning_rate": 4.889135078415736e-06, "loss": 0.1703, "step": 1524 }, { "epoch": 0.14050767033675773, "grad_norm": 0.971656273295498, "learning_rate": 4.888910628749908e-06, "loss": 0.2035, "step": 1525 }, { "epoch": 0.14059980651402773, "grad_norm": 1.028981671131926, "learning_rate": 4.88868595727347e-06, "loss": 0.1804, "step": 1526 }, { "epoch": 0.14069194269129773, "grad_norm": 0.9897401268515336, "learning_rate": 4.888461064007284e-06, "loss": 0.1767, "step": 1527 }, { "epoch": 0.14078407886856775, "grad_norm": 0.9659819889789746, "learning_rate": 4.888235948972232e-06, "loss": 0.1853, "step": 1528 }, { "epoch": 0.14087621504583775, "grad_norm": 0.9421444160040869, "learning_rate": 4.888010612189213e-06, "loss": 0.1643, "step": 1529 }, { "epoch": 0.14096835122310775, "grad_norm": 1.0162138437056278, "learning_rate": 4.8877850536791535e-06, "loss": 0.191, "step": 1530 }, { "epoch": 0.14106048740037777, "grad_norm": 1.074012467266447, "learning_rate": 4.887559273462994e-06, "loss": 0.1941, "step": 1531 }, { "epoch": 0.14115262357764777, "grad_norm": 0.9711479225310997, "learning_rate": 4.8873332715617e-06, "loss": 0.1845, "step": 1532 }, { "epoch": 0.14124475975491776, "grad_norm": 1.0096948886121502, "learning_rate": 4.887107047996253e-06, "loss": 0.1911, "step": 1533 }, { "epoch": 0.14133689593218776, "grad_norm": 0.9516594505504503, "learning_rate": 4.886880602787661e-06, "loss": 0.1763, "step": 1534 }, { "epoch": 0.1414290321094578, "grad_norm": 0.9870978246474578, "learning_rate": 4.886653935956949e-06, "loss": 0.172, "step": 1535 }, { "epoch": 0.14152116828672778, "grad_norm": 0.9766472869579116, "learning_rate": 4.88642704752516e-06, "loss": 0.1664, "step": 1536 }, { "epoch": 0.14161330446399778, "grad_norm": 0.8996626934665198, "learning_rate": 4.886199937513365e-06, "loss": 0.1725, "step": 1537 }, { "epoch": 0.1417054406412678, "grad_norm": 1.1275854369973244, "learning_rate": 4.885972605942647e-06, "loss": 0.1811, "step": 1538 }, { "epoch": 0.1417975768185378, "grad_norm": 0.9686314114387246, "learning_rate": 4.8857450528341166e-06, "loss": 0.1725, "step": 1539 }, { "epoch": 0.1418897129958078, "grad_norm": 0.9318170462863459, "learning_rate": 4.8855172782089015e-06, "loss": 0.1632, "step": 1540 }, { "epoch": 0.1419818491730778, "grad_norm": 0.9707586029191749, "learning_rate": 4.88528928208815e-06, "loss": 0.1759, "step": 1541 }, { "epoch": 0.14207398535034782, "grad_norm": 1.0456594221249906, "learning_rate": 4.885061064493033e-06, "loss": 0.1786, "step": 1542 }, { "epoch": 0.14216612152761782, "grad_norm": 0.9707787392031242, "learning_rate": 4.884832625444738e-06, "loss": 0.1732, "step": 1543 }, { "epoch": 0.1422582577048878, "grad_norm": 0.9532507142755162, "learning_rate": 4.8846039649644785e-06, "loss": 0.1662, "step": 1544 }, { "epoch": 0.14235039388215784, "grad_norm": 0.9159276555645566, "learning_rate": 4.884375083073483e-06, "loss": 0.1715, "step": 1545 }, { "epoch": 0.14244253005942784, "grad_norm": 1.0166336431813243, "learning_rate": 4.8841459797930045e-06, "loss": 0.1841, "step": 1546 }, { "epoch": 0.14253466623669783, "grad_norm": 0.9938953514211266, "learning_rate": 4.8839166551443165e-06, "loss": 0.1917, "step": 1547 }, { "epoch": 0.14262680241396786, "grad_norm": 0.9985060362734772, "learning_rate": 4.883687109148709e-06, "loss": 0.1909, "step": 1548 }, { "epoch": 0.14271893859123785, "grad_norm": 0.9480753155210944, "learning_rate": 4.883457341827498e-06, "loss": 0.1634, "step": 1549 }, { "epoch": 0.14281107476850785, "grad_norm": 1.044475869424449, "learning_rate": 4.883227353202016e-06, "loss": 0.1905, "step": 1550 }, { "epoch": 0.14290321094577785, "grad_norm": 0.9751713676071151, "learning_rate": 4.882997143293617e-06, "loss": 0.1766, "step": 1551 }, { "epoch": 0.14299534712304787, "grad_norm": 1.0616405307473622, "learning_rate": 4.882766712123677e-06, "loss": 0.177, "step": 1552 }, { "epoch": 0.14308748330031787, "grad_norm": 0.9891993285446687, "learning_rate": 4.882536059713592e-06, "loss": 0.1902, "step": 1553 }, { "epoch": 0.14317961947758787, "grad_norm": 0.9812316312540941, "learning_rate": 4.882305186084777e-06, "loss": 0.1828, "step": 1554 }, { "epoch": 0.1432717556548579, "grad_norm": 1.0198101696782014, "learning_rate": 4.88207409125867e-06, "loss": 0.1784, "step": 1555 }, { "epoch": 0.1433638918321279, "grad_norm": 0.9787051486382536, "learning_rate": 4.881842775256726e-06, "loss": 0.1802, "step": 1556 }, { "epoch": 0.14345602800939788, "grad_norm": 0.9757898719198366, "learning_rate": 4.8816112381004245e-06, "loss": 0.1722, "step": 1557 }, { "epoch": 0.14354816418666788, "grad_norm": 0.994552243999101, "learning_rate": 4.881379479811263e-06, "loss": 0.183, "step": 1558 }, { "epoch": 0.1436403003639379, "grad_norm": 0.9879186122580522, "learning_rate": 4.881147500410761e-06, "loss": 0.1727, "step": 1559 }, { "epoch": 0.1437324365412079, "grad_norm": 0.9389846554223322, "learning_rate": 4.880915299920457e-06, "loss": 0.183, "step": 1560 }, { "epoch": 0.1438245727184779, "grad_norm": 0.9176926081504023, "learning_rate": 4.8806828783619106e-06, "loss": 0.1648, "step": 1561 }, { "epoch": 0.14391670889574792, "grad_norm": 1.0177887444327505, "learning_rate": 4.880450235756704e-06, "loss": 0.1858, "step": 1562 }, { "epoch": 0.14400884507301792, "grad_norm": 0.8823529515319729, "learning_rate": 4.880217372126436e-06, "loss": 0.1709, "step": 1563 }, { "epoch": 0.14410098125028792, "grad_norm": 0.9499161520652284, "learning_rate": 4.8799842874927285e-06, "loss": 0.1833, "step": 1564 }, { "epoch": 0.14419311742755794, "grad_norm": 0.9768941581657758, "learning_rate": 4.879750981877224e-06, "loss": 0.1751, "step": 1565 }, { "epoch": 0.14428525360482794, "grad_norm": 1.0382432327832614, "learning_rate": 4.879517455301585e-06, "loss": 0.1696, "step": 1566 }, { "epoch": 0.14437738978209794, "grad_norm": 1.0226195818276123, "learning_rate": 4.8792837077874945e-06, "loss": 0.1897, "step": 1567 }, { "epoch": 0.14446952595936793, "grad_norm": 0.9536502285629268, "learning_rate": 4.8790497393566546e-06, "loss": 0.1905, "step": 1568 }, { "epoch": 0.14456166213663796, "grad_norm": 0.9933028554235266, "learning_rate": 4.878815550030792e-06, "loss": 0.1772, "step": 1569 }, { "epoch": 0.14465379831390796, "grad_norm": 0.94927888214594, "learning_rate": 4.878581139831649e-06, "loss": 0.174, "step": 1570 }, { "epoch": 0.14474593449117795, "grad_norm": 0.9734027162264917, "learning_rate": 4.87834650878099e-06, "loss": 0.1878, "step": 1571 }, { "epoch": 0.14483807066844798, "grad_norm": 0.9927226017155047, "learning_rate": 4.8781116569006026e-06, "loss": 0.1783, "step": 1572 }, { "epoch": 0.14493020684571797, "grad_norm": 0.8755924113255371, "learning_rate": 4.877876584212292e-06, "loss": 0.1789, "step": 1573 }, { "epoch": 0.14502234302298797, "grad_norm": 0.8908355930191686, "learning_rate": 4.8776412907378845e-06, "loss": 0.1659, "step": 1574 }, { "epoch": 0.14511447920025797, "grad_norm": 0.9964829044139177, "learning_rate": 4.8774057764992275e-06, "loss": 0.1903, "step": 1575 }, { "epoch": 0.145206615377528, "grad_norm": 1.0087199814403942, "learning_rate": 4.877170041518187e-06, "loss": 0.1846, "step": 1576 }, { "epoch": 0.145298751554798, "grad_norm": 0.9427168363296254, "learning_rate": 4.876934085816654e-06, "loss": 0.1805, "step": 1577 }, { "epoch": 0.14539088773206799, "grad_norm": 1.0114845794628744, "learning_rate": 4.8766979094165346e-06, "loss": 0.1767, "step": 1578 }, { "epoch": 0.145483023909338, "grad_norm": 0.9339557450945638, "learning_rate": 4.8764615123397584e-06, "loss": 0.1773, "step": 1579 }, { "epoch": 0.145575160086608, "grad_norm": 1.0022634915094149, "learning_rate": 4.876224894608275e-06, "loss": 0.1836, "step": 1580 }, { "epoch": 0.145667296263878, "grad_norm": 1.0173079709967716, "learning_rate": 4.875988056244055e-06, "loss": 0.1951, "step": 1581 }, { "epoch": 0.14575943244114803, "grad_norm": 0.9166566621649066, "learning_rate": 4.875750997269088e-06, "loss": 0.1713, "step": 1582 }, { "epoch": 0.14585156861841803, "grad_norm": 0.9612055526441895, "learning_rate": 4.875513717705385e-06, "loss": 0.1803, "step": 1583 }, { "epoch": 0.14594370479568802, "grad_norm": 0.9483501489894158, "learning_rate": 4.875276217574978e-06, "loss": 0.1804, "step": 1584 }, { "epoch": 0.14603584097295802, "grad_norm": 0.9754513204683902, "learning_rate": 4.875038496899919e-06, "loss": 0.1776, "step": 1585 }, { "epoch": 0.14612797715022804, "grad_norm": 0.9887240406962738, "learning_rate": 4.874800555702278e-06, "loss": 0.1859, "step": 1586 }, { "epoch": 0.14622011332749804, "grad_norm": 0.9269334563736277, "learning_rate": 4.874562394004152e-06, "loss": 0.1852, "step": 1587 }, { "epoch": 0.14631224950476804, "grad_norm": 0.9301846330078121, "learning_rate": 4.874324011827651e-06, "loss": 0.1743, "step": 1588 }, { "epoch": 0.14640438568203806, "grad_norm": 1.0003398362769642, "learning_rate": 4.874085409194911e-06, "loss": 0.1961, "step": 1589 }, { "epoch": 0.14649652185930806, "grad_norm": 0.9124366367357205, "learning_rate": 4.873846586128083e-06, "loss": 0.1683, "step": 1590 }, { "epoch": 0.14658865803657806, "grad_norm": 0.9337326845444719, "learning_rate": 4.873607542649347e-06, "loss": 0.1814, "step": 1591 }, { "epoch": 0.14668079421384805, "grad_norm": 0.9707004361697791, "learning_rate": 4.873368278780893e-06, "loss": 0.1835, "step": 1592 }, { "epoch": 0.14677293039111808, "grad_norm": 0.9567094098967206, "learning_rate": 4.87312879454494e-06, "loss": 0.1687, "step": 1593 }, { "epoch": 0.14686506656838808, "grad_norm": 1.0388349235769987, "learning_rate": 4.872889089963723e-06, "loss": 0.1999, "step": 1594 }, { "epoch": 0.14695720274565807, "grad_norm": 1.0055808616791115, "learning_rate": 4.872649165059497e-06, "loss": 0.196, "step": 1595 }, { "epoch": 0.1470493389229281, "grad_norm": 0.9344422199590832, "learning_rate": 4.872409019854543e-06, "loss": 0.1674, "step": 1596 }, { "epoch": 0.1471414751001981, "grad_norm": 0.9358621830893651, "learning_rate": 4.872168654371155e-06, "loss": 0.1735, "step": 1597 }, { "epoch": 0.1472336112774681, "grad_norm": 0.9787417589126662, "learning_rate": 4.8719280686316524e-06, "loss": 0.1662, "step": 1598 }, { "epoch": 0.14732574745473812, "grad_norm": 0.969455208958826, "learning_rate": 4.871687262658373e-06, "loss": 0.1887, "step": 1599 }, { "epoch": 0.1474178836320081, "grad_norm": 0.9719521093017446, "learning_rate": 4.871446236473676e-06, "loss": 0.1807, "step": 1600 }, { "epoch": 0.1475100198092781, "grad_norm": 0.967409307452388, "learning_rate": 4.871204990099941e-06, "loss": 0.1689, "step": 1601 }, { "epoch": 0.1476021559865481, "grad_norm": 1.0137520395644053, "learning_rate": 4.870963523559567e-06, "loss": 0.1768, "step": 1602 }, { "epoch": 0.14769429216381813, "grad_norm": 1.0032661099951032, "learning_rate": 4.8707218368749755e-06, "loss": 0.2019, "step": 1603 }, { "epoch": 0.14778642834108813, "grad_norm": 0.994483855416401, "learning_rate": 4.870479930068607e-06, "loss": 0.1846, "step": 1604 }, { "epoch": 0.14787856451835812, "grad_norm": 0.9323151361272326, "learning_rate": 4.8702378031629204e-06, "loss": 0.1703, "step": 1605 }, { "epoch": 0.14797070069562815, "grad_norm": 0.9392494774290172, "learning_rate": 4.869995456180399e-06, "loss": 0.1724, "step": 1606 }, { "epoch": 0.14806283687289815, "grad_norm": 0.9840474087404582, "learning_rate": 4.869752889143544e-06, "loss": 0.1831, "step": 1607 }, { "epoch": 0.14815497305016814, "grad_norm": 0.9347889910562127, "learning_rate": 4.8695101020748796e-06, "loss": 0.1707, "step": 1608 }, { "epoch": 0.14824710922743817, "grad_norm": 0.9683681475620743, "learning_rate": 4.869267094996946e-06, "loss": 0.1821, "step": 1609 }, { "epoch": 0.14833924540470816, "grad_norm": 0.951231654892894, "learning_rate": 4.869023867932309e-06, "loss": 0.173, "step": 1610 }, { "epoch": 0.14843138158197816, "grad_norm": 0.9654930888260544, "learning_rate": 4.868780420903549e-06, "loss": 0.1802, "step": 1611 }, { "epoch": 0.14852351775924816, "grad_norm": 1.0105698517481636, "learning_rate": 4.868536753933273e-06, "loss": 0.1892, "step": 1612 }, { "epoch": 0.14861565393651818, "grad_norm": 0.9740568467052928, "learning_rate": 4.868292867044104e-06, "loss": 0.1802, "step": 1613 }, { "epoch": 0.14870779011378818, "grad_norm": 1.0226350980028478, "learning_rate": 4.868048760258688e-06, "loss": 0.1893, "step": 1614 }, { "epoch": 0.14879992629105818, "grad_norm": 0.952735105485474, "learning_rate": 4.86780443359969e-06, "loss": 0.1895, "step": 1615 }, { "epoch": 0.1488920624683282, "grad_norm": 0.9784818281571142, "learning_rate": 4.8675598870897945e-06, "loss": 0.1775, "step": 1616 }, { "epoch": 0.1489841986455982, "grad_norm": 0.9473189174777866, "learning_rate": 4.86731512075171e-06, "loss": 0.1766, "step": 1617 }, { "epoch": 0.1490763348228682, "grad_norm": 1.0432332046226518, "learning_rate": 4.86707013460816e-06, "loss": 0.1785, "step": 1618 }, { "epoch": 0.1491684710001382, "grad_norm": 1.036504291535368, "learning_rate": 4.866824928681895e-06, "loss": 0.1896, "step": 1619 }, { "epoch": 0.14926060717740822, "grad_norm": 1.0094182586165745, "learning_rate": 4.86657950299568e-06, "loss": 0.1908, "step": 1620 }, { "epoch": 0.1493527433546782, "grad_norm": 0.9844012875372663, "learning_rate": 4.866333857572303e-06, "loss": 0.1854, "step": 1621 }, { "epoch": 0.1494448795319482, "grad_norm": 0.9374114123884193, "learning_rate": 4.866087992434573e-06, "loss": 0.157, "step": 1622 }, { "epoch": 0.14953701570921824, "grad_norm": 1.006479066028266, "learning_rate": 4.865841907605319e-06, "loss": 0.1824, "step": 1623 }, { "epoch": 0.14962915188648823, "grad_norm": 0.9427763406675067, "learning_rate": 4.865595603107388e-06, "loss": 0.1777, "step": 1624 }, { "epoch": 0.14972128806375823, "grad_norm": 1.0306848153573909, "learning_rate": 4.865349078963652e-06, "loss": 0.1883, "step": 1625 }, { "epoch": 0.14981342424102825, "grad_norm": 1.0514858020888211, "learning_rate": 4.865102335196999e-06, "loss": 0.1851, "step": 1626 }, { "epoch": 0.14990556041829825, "grad_norm": 0.9753039893941937, "learning_rate": 4.8648553718303386e-06, "loss": 0.1826, "step": 1627 }, { "epoch": 0.14999769659556825, "grad_norm": 1.0084449553216943, "learning_rate": 4.864608188886603e-06, "loss": 0.1822, "step": 1628 }, { "epoch": 0.15008983277283824, "grad_norm": 1.006986138104111, "learning_rate": 4.8643607863887435e-06, "loss": 0.1828, "step": 1629 }, { "epoch": 0.15018196895010827, "grad_norm": 1.0297786557131936, "learning_rate": 4.8641131643597294e-06, "loss": 0.2041, "step": 1630 }, { "epoch": 0.15027410512737827, "grad_norm": 1.0037871234057218, "learning_rate": 4.863865322822553e-06, "loss": 0.1904, "step": 1631 }, { "epoch": 0.15036624130464826, "grad_norm": 0.9211265793923229, "learning_rate": 4.863617261800229e-06, "loss": 0.1853, "step": 1632 }, { "epoch": 0.1504583774819183, "grad_norm": 0.9938628142088426, "learning_rate": 4.863368981315786e-06, "loss": 0.1869, "step": 1633 }, { "epoch": 0.15055051365918828, "grad_norm": 1.017354171445112, "learning_rate": 4.86312048139228e-06, "loss": 0.1867, "step": 1634 }, { "epoch": 0.15064264983645828, "grad_norm": 0.9333495575889209, "learning_rate": 4.862871762052782e-06, "loss": 0.1667, "step": 1635 }, { "epoch": 0.15073478601372828, "grad_norm": 0.9518180727935168, "learning_rate": 4.862622823320388e-06, "loss": 0.1788, "step": 1636 }, { "epoch": 0.1508269221909983, "grad_norm": 1.009626987099678, "learning_rate": 4.862373665218209e-06, "loss": 0.1648, "step": 1637 }, { "epoch": 0.1509190583682683, "grad_norm": 0.9908827014313232, "learning_rate": 4.862124287769382e-06, "loss": 0.1888, "step": 1638 }, { "epoch": 0.1510111945455383, "grad_norm": 0.9522038223620974, "learning_rate": 4.86187469099706e-06, "loss": 0.1736, "step": 1639 }, { "epoch": 0.15110333072280832, "grad_norm": 1.0418156159165777, "learning_rate": 4.861624874924419e-06, "loss": 0.1875, "step": 1640 }, { "epoch": 0.15119546690007832, "grad_norm": 0.9646383512472281, "learning_rate": 4.861374839574654e-06, "loss": 0.1712, "step": 1641 }, { "epoch": 0.15128760307734831, "grad_norm": 1.0021890657671846, "learning_rate": 4.861124584970981e-06, "loss": 0.193, "step": 1642 }, { "epoch": 0.15137973925461834, "grad_norm": 1.0054203275956135, "learning_rate": 4.860874111136637e-06, "loss": 0.1775, "step": 1643 }, { "epoch": 0.15147187543188834, "grad_norm": 0.9488906602949967, "learning_rate": 4.860623418094877e-06, "loss": 0.1907, "step": 1644 }, { "epoch": 0.15156401160915833, "grad_norm": 1.0123184700678247, "learning_rate": 4.8603725058689785e-06, "loss": 0.1903, "step": 1645 }, { "epoch": 0.15165614778642833, "grad_norm": 1.0996320649763636, "learning_rate": 4.860121374482239e-06, "loss": 0.1722, "step": 1646 }, { "epoch": 0.15174828396369835, "grad_norm": 0.9861341443618606, "learning_rate": 4.859870023957976e-06, "loss": 0.1835, "step": 1647 }, { "epoch": 0.15184042014096835, "grad_norm": 0.9267396471067681, "learning_rate": 4.8596184543195265e-06, "loss": 0.1555, "step": 1648 }, { "epoch": 0.15193255631823835, "grad_norm": 1.2004716263814705, "learning_rate": 4.859366665590251e-06, "loss": 0.1959, "step": 1649 }, { "epoch": 0.15202469249550837, "grad_norm": 0.968519280731306, "learning_rate": 4.859114657793526e-06, "loss": 0.1843, "step": 1650 }, { "epoch": 0.15211682867277837, "grad_norm": 0.9437339279271343, "learning_rate": 4.858862430952751e-06, "loss": 0.1683, "step": 1651 }, { "epoch": 0.15220896485004837, "grad_norm": 1.1495872644096086, "learning_rate": 4.858609985091345e-06, "loss": 0.188, "step": 1652 }, { "epoch": 0.15230110102731836, "grad_norm": 0.9283484607685915, "learning_rate": 4.858357320232749e-06, "loss": 0.1684, "step": 1653 }, { "epoch": 0.1523932372045884, "grad_norm": 0.9333600768277015, "learning_rate": 4.858104436400422e-06, "loss": 0.1672, "step": 1654 }, { "epoch": 0.15248537338185839, "grad_norm": 1.0603988968811429, "learning_rate": 4.857851333617844e-06, "loss": 0.1833, "step": 1655 }, { "epoch": 0.15257750955912838, "grad_norm": 0.8944080350738115, "learning_rate": 4.857598011908515e-06, "loss": 0.1706, "step": 1656 }, { "epoch": 0.1526696457363984, "grad_norm": 1.0031502555495742, "learning_rate": 4.857344471295958e-06, "loss": 0.186, "step": 1657 }, { "epoch": 0.1527617819136684, "grad_norm": 1.0782002303208784, "learning_rate": 4.857090711803713e-06, "loss": 0.1812, "step": 1658 }, { "epoch": 0.1528539180909384, "grad_norm": 0.9937919230014062, "learning_rate": 4.856836733455341e-06, "loss": 0.1638, "step": 1659 }, { "epoch": 0.15294605426820843, "grad_norm": 0.9962883028859182, "learning_rate": 4.8565825362744255e-06, "loss": 0.1753, "step": 1660 }, { "epoch": 0.15303819044547842, "grad_norm": 0.8589383960068907, "learning_rate": 4.8563281202845666e-06, "loss": 0.1608, "step": 1661 }, { "epoch": 0.15313032662274842, "grad_norm": 0.9811901617708566, "learning_rate": 4.85607348550939e-06, "loss": 0.1871, "step": 1662 }, { "epoch": 0.15322246280001842, "grad_norm": 0.9399582788710602, "learning_rate": 4.855818631972535e-06, "loss": 0.1832, "step": 1663 }, { "epoch": 0.15331459897728844, "grad_norm": 0.9372479897951055, "learning_rate": 4.855563559697668e-06, "loss": 0.1684, "step": 1664 }, { "epoch": 0.15340673515455844, "grad_norm": 0.9427635954759986, "learning_rate": 4.855308268708469e-06, "loss": 0.1647, "step": 1665 }, { "epoch": 0.15349887133182843, "grad_norm": 0.9627536164774635, "learning_rate": 4.8550527590286455e-06, "loss": 0.1803, "step": 1666 }, { "epoch": 0.15359100750909846, "grad_norm": 0.9739001116786681, "learning_rate": 4.85479703068192e-06, "loss": 0.168, "step": 1667 }, { "epoch": 0.15368314368636846, "grad_norm": 0.9356987155049376, "learning_rate": 4.854541083692036e-06, "loss": 0.1746, "step": 1668 }, { "epoch": 0.15377527986363845, "grad_norm": 1.0169478043112947, "learning_rate": 4.854284918082759e-06, "loss": 0.1695, "step": 1669 }, { "epoch": 0.15386741604090845, "grad_norm": 0.977947892285413, "learning_rate": 4.854028533877874e-06, "loss": 0.1782, "step": 1670 }, { "epoch": 0.15395955221817847, "grad_norm": 0.9697331096055015, "learning_rate": 4.8537719311011865e-06, "loss": 0.1767, "step": 1671 }, { "epoch": 0.15405168839544847, "grad_norm": 0.9882267562435134, "learning_rate": 4.853515109776522e-06, "loss": 0.1781, "step": 1672 }, { "epoch": 0.15414382457271847, "grad_norm": 0.9743762721224799, "learning_rate": 4.8532580699277256e-06, "loss": 0.1655, "step": 1673 }, { "epoch": 0.1542359607499885, "grad_norm": 0.8948799120507229, "learning_rate": 4.853000811578665e-06, "loss": 0.1713, "step": 1674 }, { "epoch": 0.1543280969272585, "grad_norm": 0.9685729986401584, "learning_rate": 4.852743334753226e-06, "loss": 0.1772, "step": 1675 }, { "epoch": 0.1544202331045285, "grad_norm": 1.0010420081338955, "learning_rate": 4.852485639475314e-06, "loss": 0.1827, "step": 1676 }, { "epoch": 0.1545123692817985, "grad_norm": 0.9126092389102316, "learning_rate": 4.852227725768857e-06, "loss": 0.1753, "step": 1677 }, { "epoch": 0.1546045054590685, "grad_norm": 0.9552177894021503, "learning_rate": 4.8519695936578045e-06, "loss": 0.1828, "step": 1678 }, { "epoch": 0.1546966416363385, "grad_norm": 0.9534993774757963, "learning_rate": 4.851711243166121e-06, "loss": 0.1808, "step": 1679 }, { "epoch": 0.1547887778136085, "grad_norm": 0.9956457154947068, "learning_rate": 4.851452674317795e-06, "loss": 0.1898, "step": 1680 }, { "epoch": 0.15488091399087853, "grad_norm": 0.9164339311946711, "learning_rate": 4.851193887136835e-06, "loss": 0.1635, "step": 1681 }, { "epoch": 0.15497305016814852, "grad_norm": 0.95908455887589, "learning_rate": 4.850934881647271e-06, "loss": 0.1802, "step": 1682 }, { "epoch": 0.15506518634541852, "grad_norm": 1.0414535544577295, "learning_rate": 4.850675657873149e-06, "loss": 0.2002, "step": 1683 }, { "epoch": 0.15515732252268855, "grad_norm": 0.9468141742151894, "learning_rate": 4.850416215838539e-06, "loss": 0.1796, "step": 1684 }, { "epoch": 0.15524945869995854, "grad_norm": 1.0035960741903245, "learning_rate": 4.850156555567531e-06, "loss": 0.1758, "step": 1685 }, { "epoch": 0.15534159487722854, "grad_norm": 0.9767439731982273, "learning_rate": 4.849896677084234e-06, "loss": 0.1774, "step": 1686 }, { "epoch": 0.15543373105449854, "grad_norm": 1.034221041451684, "learning_rate": 4.849636580412778e-06, "loss": 0.2034, "step": 1687 }, { "epoch": 0.15552586723176856, "grad_norm": 0.9576078272674569, "learning_rate": 4.849376265577312e-06, "loss": 0.1867, "step": 1688 }, { "epoch": 0.15561800340903856, "grad_norm": 1.0442517742292943, "learning_rate": 4.849115732602006e-06, "loss": 0.2, "step": 1689 }, { "epoch": 0.15571013958630855, "grad_norm": 0.8993671722695789, "learning_rate": 4.848854981511053e-06, "loss": 0.1634, "step": 1690 }, { "epoch": 0.15580227576357858, "grad_norm": 0.9595537498820411, "learning_rate": 4.848594012328661e-06, "loss": 0.1763, "step": 1691 }, { "epoch": 0.15589441194084858, "grad_norm": 0.9627114516540672, "learning_rate": 4.848332825079063e-06, "loss": 0.1853, "step": 1692 }, { "epoch": 0.15598654811811857, "grad_norm": 0.9229982001738194, "learning_rate": 4.848071419786509e-06, "loss": 0.1715, "step": 1693 }, { "epoch": 0.1560786842953886, "grad_norm": 0.9070563011756574, "learning_rate": 4.847809796475271e-06, "loss": 0.1743, "step": 1694 }, { "epoch": 0.1561708204726586, "grad_norm": 0.912107518148019, "learning_rate": 4.8475479551696405e-06, "loss": 0.1714, "step": 1695 }, { "epoch": 0.1562629566499286, "grad_norm": 1.031131238472556, "learning_rate": 4.847285895893931e-06, "loss": 0.1881, "step": 1696 }, { "epoch": 0.1563550928271986, "grad_norm": 0.8837742757425879, "learning_rate": 4.847023618672472e-06, "loss": 0.1628, "step": 1697 }, { "epoch": 0.1564472290044686, "grad_norm": 0.8733263411942864, "learning_rate": 4.846761123529618e-06, "loss": 0.1644, "step": 1698 }, { "epoch": 0.1565393651817386, "grad_norm": 0.9097551228910571, "learning_rate": 4.846498410489741e-06, "loss": 0.1682, "step": 1699 }, { "epoch": 0.1566315013590086, "grad_norm": 0.9915359550647295, "learning_rate": 4.846235479577234e-06, "loss": 0.1853, "step": 1700 }, { "epoch": 0.15672363753627863, "grad_norm": 1.030370760761187, "learning_rate": 4.845972330816511e-06, "loss": 0.1927, "step": 1701 }, { "epoch": 0.15681577371354863, "grad_norm": 1.0241222156942211, "learning_rate": 4.845708964232003e-06, "loss": 0.1904, "step": 1702 }, { "epoch": 0.15690790989081863, "grad_norm": 0.9367671000205519, "learning_rate": 4.845445379848167e-06, "loss": 0.1812, "step": 1703 }, { "epoch": 0.15700004606808862, "grad_norm": 0.969147645704287, "learning_rate": 4.845181577689474e-06, "loss": 0.1826, "step": 1704 }, { "epoch": 0.15709218224535865, "grad_norm": 0.914293608169453, "learning_rate": 4.844917557780419e-06, "loss": 0.1794, "step": 1705 }, { "epoch": 0.15718431842262864, "grad_norm": 1.007827722207317, "learning_rate": 4.844653320145517e-06, "loss": 0.2025, "step": 1706 }, { "epoch": 0.15727645459989864, "grad_norm": 0.9844002604666592, "learning_rate": 4.844388864809302e-06, "loss": 0.1754, "step": 1707 }, { "epoch": 0.15736859077716867, "grad_norm": 1.0507807427487061, "learning_rate": 4.844124191796328e-06, "loss": 0.1871, "step": 1708 }, { "epoch": 0.15746072695443866, "grad_norm": 1.0395668195003018, "learning_rate": 4.843859301131171e-06, "loss": 0.2118, "step": 1709 }, { "epoch": 0.15755286313170866, "grad_norm": 1.0186142609766382, "learning_rate": 4.843594192838425e-06, "loss": 0.1991, "step": 1710 }, { "epoch": 0.15764499930897868, "grad_norm": 0.9191028208060649, "learning_rate": 4.8433288669427055e-06, "loss": 0.1673, "step": 1711 }, { "epoch": 0.15773713548624868, "grad_norm": 1.0107944706696486, "learning_rate": 4.84306332346865e-06, "loss": 0.1813, "step": 1712 }, { "epoch": 0.15782927166351868, "grad_norm": 0.8938007261528311, "learning_rate": 4.842797562440913e-06, "loss": 0.1716, "step": 1713 }, { "epoch": 0.15792140784078867, "grad_norm": 0.9842512427881709, "learning_rate": 4.842531583884168e-06, "loss": 0.1797, "step": 1714 }, { "epoch": 0.1580135440180587, "grad_norm": 1.0168585547681084, "learning_rate": 4.842265387823115e-06, "loss": 0.1949, "step": 1715 }, { "epoch": 0.1581056801953287, "grad_norm": 0.9344893607216574, "learning_rate": 4.841998974282469e-06, "loss": 0.1687, "step": 1716 }, { "epoch": 0.1581978163725987, "grad_norm": 0.9569816615949233, "learning_rate": 4.841732343286965e-06, "loss": 0.1737, "step": 1717 }, { "epoch": 0.15828995254986872, "grad_norm": 0.937606627263079, "learning_rate": 4.841465494861362e-06, "loss": 0.1811, "step": 1718 }, { "epoch": 0.15838208872713871, "grad_norm": 1.0238800385610571, "learning_rate": 4.841198429030435e-06, "loss": 0.1739, "step": 1719 }, { "epoch": 0.1584742249044087, "grad_norm": 0.9619074678640244, "learning_rate": 4.840931145818982e-06, "loss": 0.1632, "step": 1720 }, { "epoch": 0.1585663610816787, "grad_norm": 0.9303762249991079, "learning_rate": 4.84066364525182e-06, "loss": 0.1721, "step": 1721 }, { "epoch": 0.15865849725894873, "grad_norm": 0.9418596387167747, "learning_rate": 4.8403959273537875e-06, "loss": 0.1739, "step": 1722 }, { "epoch": 0.15875063343621873, "grad_norm": 1.087199652929513, "learning_rate": 4.8401279921497405e-06, "loss": 0.1902, "step": 1723 }, { "epoch": 0.15884276961348873, "grad_norm": 0.9637578112894941, "learning_rate": 4.839859839664557e-06, "loss": 0.198, "step": 1724 }, { "epoch": 0.15893490579075875, "grad_norm": 0.8742090828607239, "learning_rate": 4.839591469923137e-06, "loss": 0.1676, "step": 1725 }, { "epoch": 0.15902704196802875, "grad_norm": 0.9493609600429447, "learning_rate": 4.8393228829503966e-06, "loss": 0.1838, "step": 1726 }, { "epoch": 0.15911917814529875, "grad_norm": 0.9210297680349159, "learning_rate": 4.839054078771275e-06, "loss": 0.1838, "step": 1727 }, { "epoch": 0.15921131432256877, "grad_norm": 0.9447500968465715, "learning_rate": 4.83878505741073e-06, "loss": 0.1835, "step": 1728 }, { "epoch": 0.15930345049983877, "grad_norm": 0.9540678329702622, "learning_rate": 4.838515818893741e-06, "loss": 0.172, "step": 1729 }, { "epoch": 0.15939558667710876, "grad_norm": 1.0123215650229256, "learning_rate": 4.838246363245306e-06, "loss": 0.1641, "step": 1730 }, { "epoch": 0.15948772285437876, "grad_norm": 0.9966400481188536, "learning_rate": 4.837976690490445e-06, "loss": 0.1905, "step": 1731 }, { "epoch": 0.15957985903164879, "grad_norm": 0.9095553032746777, "learning_rate": 4.837706800654197e-06, "loss": 0.1769, "step": 1732 }, { "epoch": 0.15967199520891878, "grad_norm": 1.0303333847098155, "learning_rate": 4.83743669376162e-06, "loss": 0.1803, "step": 1733 }, { "epoch": 0.15976413138618878, "grad_norm": 0.963888249747459, "learning_rate": 4.8371663698377955e-06, "loss": 0.1683, "step": 1734 }, { "epoch": 0.1598562675634588, "grad_norm": 0.9576505684732173, "learning_rate": 4.836895828907822e-06, "loss": 0.1851, "step": 1735 }, { "epoch": 0.1599484037407288, "grad_norm": 1.0554891537306332, "learning_rate": 4.836625070996818e-06, "loss": 0.188, "step": 1736 }, { "epoch": 0.1600405399179988, "grad_norm": 1.030216874614439, "learning_rate": 4.836354096129926e-06, "loss": 0.1659, "step": 1737 }, { "epoch": 0.1601326760952688, "grad_norm": 1.0900760942160297, "learning_rate": 4.8360829043323046e-06, "loss": 0.1832, "step": 1738 }, { "epoch": 0.16022481227253882, "grad_norm": 0.9371181926316801, "learning_rate": 4.835811495629134e-06, "loss": 0.175, "step": 1739 }, { "epoch": 0.16031694844980882, "grad_norm": 0.9495594727001155, "learning_rate": 4.835539870045613e-06, "loss": 0.1992, "step": 1740 }, { "epoch": 0.1604090846270788, "grad_norm": 0.917342942507763, "learning_rate": 4.8352680276069654e-06, "loss": 0.1653, "step": 1741 }, { "epoch": 0.16050122080434884, "grad_norm": 0.90525029954931, "learning_rate": 4.83499596833843e-06, "loss": 0.1795, "step": 1742 }, { "epoch": 0.16059335698161883, "grad_norm": 1.0455965458196492, "learning_rate": 4.834723692265268e-06, "loss": 0.1817, "step": 1743 }, { "epoch": 0.16068549315888883, "grad_norm": 0.959693505057054, "learning_rate": 4.834451199412759e-06, "loss": 0.1781, "step": 1744 }, { "epoch": 0.16077762933615886, "grad_norm": 0.9527369490107168, "learning_rate": 4.8341784898062056e-06, "loss": 0.1819, "step": 1745 }, { "epoch": 0.16086976551342885, "grad_norm": 0.9431052982892322, "learning_rate": 4.833905563470928e-06, "loss": 0.1706, "step": 1746 }, { "epoch": 0.16096190169069885, "grad_norm": 1.0048907347623248, "learning_rate": 4.833632420432267e-06, "loss": 0.1831, "step": 1747 }, { "epoch": 0.16105403786796885, "grad_norm": 1.0442032356650304, "learning_rate": 4.833359060715586e-06, "loss": 0.19, "step": 1748 }, { "epoch": 0.16114617404523887, "grad_norm": 1.037538480498732, "learning_rate": 4.8330854843462635e-06, "loss": 0.1804, "step": 1749 }, { "epoch": 0.16123831022250887, "grad_norm": 0.9676875781513236, "learning_rate": 4.832811691349703e-06, "loss": 0.1804, "step": 1750 }, { "epoch": 0.16133044639977887, "grad_norm": 1.0135987525084371, "learning_rate": 4.832537681751327e-06, "loss": 0.1827, "step": 1751 }, { "epoch": 0.1614225825770489, "grad_norm": 1.018385777602988, "learning_rate": 4.832263455576576e-06, "loss": 0.1972, "step": 1752 }, { "epoch": 0.1615147187543189, "grad_norm": 1.1980934939018493, "learning_rate": 4.8319890128509115e-06, "loss": 0.1822, "step": 1753 }, { "epoch": 0.16160685493158888, "grad_norm": 1.0080490206517214, "learning_rate": 4.831714353599817e-06, "loss": 0.1886, "step": 1754 }, { "epoch": 0.16169899110885888, "grad_norm": 0.941493167404903, "learning_rate": 4.831439477848793e-06, "loss": 0.1725, "step": 1755 }, { "epoch": 0.1617911272861289, "grad_norm": 0.9039272326667056, "learning_rate": 4.831164385623362e-06, "loss": 0.1589, "step": 1756 }, { "epoch": 0.1618832634633989, "grad_norm": 0.9530095888259483, "learning_rate": 4.830889076949069e-06, "loss": 0.1746, "step": 1757 }, { "epoch": 0.1619753996406689, "grad_norm": 0.905148587751177, "learning_rate": 4.830613551851473e-06, "loss": 0.1681, "step": 1758 }, { "epoch": 0.16206753581793892, "grad_norm": 0.9308074744746171, "learning_rate": 4.830337810356157e-06, "loss": 0.1776, "step": 1759 }, { "epoch": 0.16215967199520892, "grad_norm": 0.9875824921474106, "learning_rate": 4.830061852488726e-06, "loss": 0.1828, "step": 1760 }, { "epoch": 0.16225180817247892, "grad_norm": 0.9813374242149387, "learning_rate": 4.829785678274801e-06, "loss": 0.1839, "step": 1761 }, { "epoch": 0.16234394434974894, "grad_norm": 0.931234653866775, "learning_rate": 4.829509287740024e-06, "loss": 0.1788, "step": 1762 }, { "epoch": 0.16243608052701894, "grad_norm": 0.9585368043301006, "learning_rate": 4.82923268091006e-06, "loss": 0.188, "step": 1763 }, { "epoch": 0.16252821670428894, "grad_norm": 1.0074529442860354, "learning_rate": 4.828955857810591e-06, "loss": 0.191, "step": 1764 }, { "epoch": 0.16262035288155893, "grad_norm": 0.9410361170473834, "learning_rate": 4.828678818467319e-06, "loss": 0.1763, "step": 1765 }, { "epoch": 0.16271248905882896, "grad_norm": 1.0208109082183012, "learning_rate": 4.828401562905969e-06, "loss": 0.2075, "step": 1766 }, { "epoch": 0.16280462523609895, "grad_norm": 0.9269680529094783, "learning_rate": 4.828124091152283e-06, "loss": 0.1737, "step": 1767 }, { "epoch": 0.16289676141336895, "grad_norm": 1.0100855715032826, "learning_rate": 4.827846403232024e-06, "loss": 0.1928, "step": 1768 }, { "epoch": 0.16298889759063898, "grad_norm": 0.9905609107994238, "learning_rate": 4.827568499170977e-06, "loss": 0.1722, "step": 1769 }, { "epoch": 0.16308103376790897, "grad_norm": 1.0136821739673834, "learning_rate": 4.8272903789949435e-06, "loss": 0.1732, "step": 1770 }, { "epoch": 0.16317316994517897, "grad_norm": 1.014178191785531, "learning_rate": 4.8270120427297485e-06, "loss": 0.1892, "step": 1771 }, { "epoch": 0.16326530612244897, "grad_norm": 0.9882369844302558, "learning_rate": 4.8267334904012345e-06, "loss": 0.1798, "step": 1772 }, { "epoch": 0.163357442299719, "grad_norm": 0.9940919783308403, "learning_rate": 4.8264547220352655e-06, "loss": 0.1862, "step": 1773 }, { "epoch": 0.163449578476989, "grad_norm": 1.0582977739314734, "learning_rate": 4.826175737657725e-06, "loss": 0.1913, "step": 1774 }, { "epoch": 0.16354171465425899, "grad_norm": 1.0787650948609633, "learning_rate": 4.825896537294518e-06, "loss": 0.1854, "step": 1775 }, { "epoch": 0.163633850831529, "grad_norm": 1.0181516343182249, "learning_rate": 4.825617120971566e-06, "loss": 0.1747, "step": 1776 }, { "epoch": 0.163725987008799, "grad_norm": 0.9365780887109146, "learning_rate": 4.825337488714814e-06, "loss": 0.185, "step": 1777 }, { "epoch": 0.163818123186069, "grad_norm": 1.034805261001223, "learning_rate": 4.825057640550226e-06, "loss": 0.1875, "step": 1778 }, { "epoch": 0.16391025936333903, "grad_norm": 1.0004955506710502, "learning_rate": 4.824777576503786e-06, "loss": 0.1832, "step": 1779 }, { "epoch": 0.16400239554060903, "grad_norm": 1.045133789322791, "learning_rate": 4.824497296601499e-06, "loss": 0.1743, "step": 1780 }, { "epoch": 0.16409453171787902, "grad_norm": 0.929674001723364, "learning_rate": 4.8242168008693864e-06, "loss": 0.1688, "step": 1781 }, { "epoch": 0.16418666789514902, "grad_norm": 0.8909382328694191, "learning_rate": 4.823936089333494e-06, "loss": 0.1679, "step": 1782 }, { "epoch": 0.16427880407241904, "grad_norm": 0.8714078813260372, "learning_rate": 4.823655162019886e-06, "loss": 0.1616, "step": 1783 }, { "epoch": 0.16437094024968904, "grad_norm": 0.9117516027805643, "learning_rate": 4.823374018954646e-06, "loss": 0.1665, "step": 1784 }, { "epoch": 0.16446307642695904, "grad_norm": 0.8647610420636894, "learning_rate": 4.823092660163878e-06, "loss": 0.1528, "step": 1785 }, { "epoch": 0.16455521260422906, "grad_norm": 0.9530561108679019, "learning_rate": 4.822811085673706e-06, "loss": 0.1691, "step": 1786 }, { "epoch": 0.16464734878149906, "grad_norm": 0.8638732798559001, "learning_rate": 4.822529295510276e-06, "loss": 0.1568, "step": 1787 }, { "epoch": 0.16473948495876906, "grad_norm": 0.9641560836422756, "learning_rate": 4.82224728969975e-06, "loss": 0.1783, "step": 1788 }, { "epoch": 0.16483162113603905, "grad_norm": 0.924759701849841, "learning_rate": 4.821965068268314e-06, "loss": 0.1727, "step": 1789 }, { "epoch": 0.16492375731330908, "grad_norm": 0.9362299471705925, "learning_rate": 4.82168263124217e-06, "loss": 0.1741, "step": 1790 }, { "epoch": 0.16501589349057907, "grad_norm": 0.981951133307446, "learning_rate": 4.8213999786475455e-06, "loss": 0.186, "step": 1791 }, { "epoch": 0.16510802966784907, "grad_norm": 0.8704969480803476, "learning_rate": 4.821117110510683e-06, "loss": 0.1634, "step": 1792 }, { "epoch": 0.1652001658451191, "grad_norm": 0.9063406667410021, "learning_rate": 4.820834026857846e-06, "loss": 0.1793, "step": 1793 }, { "epoch": 0.1652923020223891, "grad_norm": 1.0657161087119753, "learning_rate": 4.820550727715321e-06, "loss": 0.1889, "step": 1794 }, { "epoch": 0.1653844381996591, "grad_norm": 0.9127685674712928, "learning_rate": 4.820267213109409e-06, "loss": 0.1724, "step": 1795 }, { "epoch": 0.16547657437692911, "grad_norm": 0.9727997916206534, "learning_rate": 4.8199834830664395e-06, "loss": 0.1777, "step": 1796 }, { "epoch": 0.1655687105541991, "grad_norm": 0.9452854756210244, "learning_rate": 4.819699537612752e-06, "loss": 0.1615, "step": 1797 }, { "epoch": 0.1656608467314691, "grad_norm": 0.9578005282418585, "learning_rate": 4.819415376774714e-06, "loss": 0.1827, "step": 1798 }, { "epoch": 0.1657529829087391, "grad_norm": 1.0597663881266928, "learning_rate": 4.819131000578707e-06, "loss": 0.1843, "step": 1799 }, { "epoch": 0.16584511908600913, "grad_norm": 0.9342840629471308, "learning_rate": 4.818846409051139e-06, "loss": 0.1813, "step": 1800 }, { "epoch": 0.16593725526327913, "grad_norm": 0.98252920230672, "learning_rate": 4.818561602218431e-06, "loss": 0.1838, "step": 1801 }, { "epoch": 0.16602939144054912, "grad_norm": 0.9424563329013992, "learning_rate": 4.818276580107029e-06, "loss": 0.1702, "step": 1802 }, { "epoch": 0.16612152761781915, "grad_norm": 0.9202413050049445, "learning_rate": 4.817991342743396e-06, "loss": 0.1735, "step": 1803 }, { "epoch": 0.16621366379508914, "grad_norm": 0.9549194113487504, "learning_rate": 4.81770589015402e-06, "loss": 0.1796, "step": 1804 }, { "epoch": 0.16630579997235914, "grad_norm": 0.9465694214625567, "learning_rate": 4.8174202223654e-06, "loss": 0.1843, "step": 1805 }, { "epoch": 0.16639793614962914, "grad_norm": 0.9349905458192107, "learning_rate": 4.8171343394040645e-06, "loss": 0.1745, "step": 1806 }, { "epoch": 0.16649007232689916, "grad_norm": 0.8796702727017657, "learning_rate": 4.816848241296556e-06, "loss": 0.1695, "step": 1807 }, { "epoch": 0.16658220850416916, "grad_norm": 0.9207017502379715, "learning_rate": 4.816561928069439e-06, "loss": 0.1825, "step": 1808 }, { "epoch": 0.16667434468143916, "grad_norm": 0.8798529220695297, "learning_rate": 4.8162753997492965e-06, "loss": 0.1758, "step": 1809 }, { "epoch": 0.16676648085870918, "grad_norm": 1.010096798746228, "learning_rate": 4.815988656362735e-06, "loss": 0.1908, "step": 1810 }, { "epoch": 0.16685861703597918, "grad_norm": 0.906477990452234, "learning_rate": 4.815701697936377e-06, "loss": 0.1817, "step": 1811 }, { "epoch": 0.16695075321324918, "grad_norm": 0.9584264334027195, "learning_rate": 4.815414524496867e-06, "loss": 0.1689, "step": 1812 }, { "epoch": 0.1670428893905192, "grad_norm": 1.0568204132693753, "learning_rate": 4.8151271360708704e-06, "loss": 0.1817, "step": 1813 }, { "epoch": 0.1671350255677892, "grad_norm": 1.0170922070435067, "learning_rate": 4.814839532685069e-06, "loss": 0.1794, "step": 1814 }, { "epoch": 0.1672271617450592, "grad_norm": 0.9452343091927007, "learning_rate": 4.814551714366168e-06, "loss": 0.1775, "step": 1815 }, { "epoch": 0.1673192979223292, "grad_norm": 1.0103963129300657, "learning_rate": 4.814263681140892e-06, "loss": 0.1789, "step": 1816 }, { "epoch": 0.16741143409959922, "grad_norm": 1.057193287404099, "learning_rate": 4.813975433035984e-06, "loss": 0.18, "step": 1817 }, { "epoch": 0.1675035702768692, "grad_norm": 1.0599542152721877, "learning_rate": 4.813686970078207e-06, "loss": 0.1861, "step": 1818 }, { "epoch": 0.1675957064541392, "grad_norm": 0.9410257979008102, "learning_rate": 4.813398292294345e-06, "loss": 0.1782, "step": 1819 }, { "epoch": 0.16768784263140923, "grad_norm": 0.9106844898045353, "learning_rate": 4.813109399711204e-06, "loss": 0.1605, "step": 1820 }, { "epoch": 0.16777997880867923, "grad_norm": 0.9510723876689734, "learning_rate": 4.812820292355607e-06, "loss": 0.1768, "step": 1821 }, { "epoch": 0.16787211498594923, "grad_norm": 0.9713976398794669, "learning_rate": 4.812530970254396e-06, "loss": 0.1768, "step": 1822 }, { "epoch": 0.16796425116321922, "grad_norm": 0.9726574938485535, "learning_rate": 4.812241433434436e-06, "loss": 0.188, "step": 1823 }, { "epoch": 0.16805638734048925, "grad_norm": 1.0053761517023432, "learning_rate": 4.81195168192261e-06, "loss": 0.1749, "step": 1824 }, { "epoch": 0.16814852351775925, "grad_norm": 1.0033991069529102, "learning_rate": 4.81166171574582e-06, "loss": 0.1782, "step": 1825 }, { "epoch": 0.16824065969502924, "grad_norm": 0.9968269303113185, "learning_rate": 4.811371534930993e-06, "loss": 0.1805, "step": 1826 }, { "epoch": 0.16833279587229927, "grad_norm": 0.9615227789782784, "learning_rate": 4.8110811395050695e-06, "loss": 0.1696, "step": 1827 }, { "epoch": 0.16842493204956926, "grad_norm": 0.9821799299380174, "learning_rate": 4.810790529495013e-06, "loss": 0.1791, "step": 1828 }, { "epoch": 0.16851706822683926, "grad_norm": 0.9811162841683847, "learning_rate": 4.810499704927808e-06, "loss": 0.1723, "step": 1829 }, { "epoch": 0.1686092044041093, "grad_norm": 1.1244318132152042, "learning_rate": 4.810208665830456e-06, "loss": 0.1907, "step": 1830 }, { "epoch": 0.16870134058137928, "grad_norm": 0.9817177234015824, "learning_rate": 4.809917412229981e-06, "loss": 0.1788, "step": 1831 }, { "epoch": 0.16879347675864928, "grad_norm": 0.9184917085261609, "learning_rate": 4.809625944153425e-06, "loss": 0.1617, "step": 1832 }, { "epoch": 0.16888561293591928, "grad_norm": 0.9234623678622588, "learning_rate": 4.8093342616278525e-06, "loss": 0.1809, "step": 1833 }, { "epoch": 0.1689777491131893, "grad_norm": 0.9581615643494954, "learning_rate": 4.809042364680345e-06, "loss": 0.1634, "step": 1834 }, { "epoch": 0.1690698852904593, "grad_norm": 0.9697985187154081, "learning_rate": 4.808750253338006e-06, "loss": 0.1871, "step": 1835 }, { "epoch": 0.1691620214677293, "grad_norm": 0.9221846846576924, "learning_rate": 4.8084579276279565e-06, "loss": 0.176, "step": 1836 }, { "epoch": 0.16925415764499932, "grad_norm": 0.962272543721862, "learning_rate": 4.80816538757734e-06, "loss": 0.1954, "step": 1837 }, { "epoch": 0.16934629382226932, "grad_norm": 0.8740315446603265, "learning_rate": 4.80787263321332e-06, "loss": 0.1638, "step": 1838 }, { "epoch": 0.16943842999953931, "grad_norm": 0.9401843869180659, "learning_rate": 4.8075796645630764e-06, "loss": 0.1839, "step": 1839 }, { "epoch": 0.1695305661768093, "grad_norm": 0.9919651044339668, "learning_rate": 4.807286481653813e-06, "loss": 0.1719, "step": 1840 }, { "epoch": 0.16962270235407934, "grad_norm": 0.931631600254304, "learning_rate": 4.806993084512752e-06, "loss": 0.1786, "step": 1841 }, { "epoch": 0.16971483853134933, "grad_norm": 1.0388517287914885, "learning_rate": 4.806699473167134e-06, "loss": 0.1916, "step": 1842 }, { "epoch": 0.16980697470861933, "grad_norm": 0.8913690534162603, "learning_rate": 4.806405647644222e-06, "loss": 0.1826, "step": 1843 }, { "epoch": 0.16989911088588935, "grad_norm": 1.0257086366954042, "learning_rate": 4.806111607971298e-06, "loss": 0.1751, "step": 1844 }, { "epoch": 0.16999124706315935, "grad_norm": 0.9335569005047046, "learning_rate": 4.805817354175663e-06, "loss": 0.1715, "step": 1845 }, { "epoch": 0.17008338324042935, "grad_norm": 0.9491430087260864, "learning_rate": 4.805522886284637e-06, "loss": 0.1731, "step": 1846 }, { "epoch": 0.17017551941769937, "grad_norm": 0.8936870287779184, "learning_rate": 4.8052282043255635e-06, "loss": 0.1777, "step": 1847 }, { "epoch": 0.17026765559496937, "grad_norm": 0.8902565793090995, "learning_rate": 4.804933308325804e-06, "loss": 0.1694, "step": 1848 }, { "epoch": 0.17035979177223937, "grad_norm": 0.9985363911822385, "learning_rate": 4.8046381983127385e-06, "loss": 0.2018, "step": 1849 }, { "epoch": 0.17045192794950936, "grad_norm": 0.9426301604024907, "learning_rate": 4.8043428743137675e-06, "loss": 0.1867, "step": 1850 }, { "epoch": 0.1705440641267794, "grad_norm": 0.8550027014618022, "learning_rate": 4.8040473363563136e-06, "loss": 0.1637, "step": 1851 }, { "epoch": 0.17063620030404938, "grad_norm": 0.9734804002388993, "learning_rate": 4.8037515844678165e-06, "loss": 0.1866, "step": 1852 }, { "epoch": 0.17072833648131938, "grad_norm": 0.9928693625462343, "learning_rate": 4.803455618675736e-06, "loss": 0.1784, "step": 1853 }, { "epoch": 0.1708204726585894, "grad_norm": 0.9402222477752512, "learning_rate": 4.803159439007554e-06, "loss": 0.1678, "step": 1854 }, { "epoch": 0.1709126088358594, "grad_norm": 0.9737747885881045, "learning_rate": 4.80286304549077e-06, "loss": 0.1844, "step": 1855 }, { "epoch": 0.1710047450131294, "grad_norm": 0.8979782554294682, "learning_rate": 4.802566438152904e-06, "loss": 0.1707, "step": 1856 }, { "epoch": 0.1710968811903994, "grad_norm": 1.0231193949257447, "learning_rate": 4.802269617021497e-06, "loss": 0.1965, "step": 1857 }, { "epoch": 0.17118901736766942, "grad_norm": 0.9882485601160279, "learning_rate": 4.801972582124108e-06, "loss": 0.1855, "step": 1858 }, { "epoch": 0.17128115354493942, "grad_norm": 0.9804779701977544, "learning_rate": 4.801675333488317e-06, "loss": 0.1846, "step": 1859 }, { "epoch": 0.17137328972220942, "grad_norm": 0.9887115862803393, "learning_rate": 4.801377871141723e-06, "loss": 0.1818, "step": 1860 }, { "epoch": 0.17146542589947944, "grad_norm": 0.9908343960616961, "learning_rate": 4.801080195111948e-06, "loss": 0.1728, "step": 1861 }, { "epoch": 0.17155756207674944, "grad_norm": 0.9121926413556142, "learning_rate": 4.800782305426628e-06, "loss": 0.1736, "step": 1862 }, { "epoch": 0.17164969825401943, "grad_norm": 1.0130289652715774, "learning_rate": 4.800484202113423e-06, "loss": 0.1725, "step": 1863 }, { "epoch": 0.17174183443128946, "grad_norm": 0.9997676957196446, "learning_rate": 4.800185885200013e-06, "loss": 0.1723, "step": 1864 }, { "epoch": 0.17183397060855946, "grad_norm": 0.9695560315706072, "learning_rate": 4.7998873547140954e-06, "loss": 0.1721, "step": 1865 }, { "epoch": 0.17192610678582945, "grad_norm": 0.9735293306486302, "learning_rate": 4.799588610683389e-06, "loss": 0.1845, "step": 1866 }, { "epoch": 0.17201824296309945, "grad_norm": 0.9944144850116116, "learning_rate": 4.799289653135633e-06, "loss": 0.1756, "step": 1867 }, { "epoch": 0.17211037914036947, "grad_norm": 0.9316812929228392, "learning_rate": 4.7989904820985854e-06, "loss": 0.1747, "step": 1868 }, { "epoch": 0.17220251531763947, "grad_norm": 0.9486629348319279, "learning_rate": 4.798691097600024e-06, "loss": 0.1783, "step": 1869 }, { "epoch": 0.17229465149490947, "grad_norm": 0.9921250810385875, "learning_rate": 4.798391499667747e-06, "loss": 0.1806, "step": 1870 }, { "epoch": 0.1723867876721795, "grad_norm": 0.9467281601207117, "learning_rate": 4.798091688329572e-06, "loss": 0.1792, "step": 1871 }, { "epoch": 0.1724789238494495, "grad_norm": 0.8938437016708146, "learning_rate": 4.7977916636133365e-06, "loss": 0.166, "step": 1872 }, { "epoch": 0.17257106002671949, "grad_norm": 0.9033608634661399, "learning_rate": 4.797491425546898e-06, "loss": 0.1624, "step": 1873 }, { "epoch": 0.17266319620398948, "grad_norm": 1.0093173143492151, "learning_rate": 4.797190974158133e-06, "loss": 0.1804, "step": 1874 }, { "epoch": 0.1727553323812595, "grad_norm": 0.9269759911665011, "learning_rate": 4.796890309474938e-06, "loss": 0.1924, "step": 1875 }, { "epoch": 0.1728474685585295, "grad_norm": 0.9511087442600202, "learning_rate": 4.796589431525232e-06, "loss": 0.1717, "step": 1876 }, { "epoch": 0.1729396047357995, "grad_norm": 0.9727630535856038, "learning_rate": 4.796288340336949e-06, "loss": 0.1824, "step": 1877 }, { "epoch": 0.17303174091306953, "grad_norm": 0.9098476882737225, "learning_rate": 4.795987035938047e-06, "loss": 0.1598, "step": 1878 }, { "epoch": 0.17312387709033952, "grad_norm": 0.9586208598334143, "learning_rate": 4.795685518356501e-06, "loss": 0.176, "step": 1879 }, { "epoch": 0.17321601326760952, "grad_norm": 0.9342244991941684, "learning_rate": 4.795383787620308e-06, "loss": 0.1622, "step": 1880 }, { "epoch": 0.17330814944487954, "grad_norm": 0.9062253340794538, "learning_rate": 4.795081843757483e-06, "loss": 0.1579, "step": 1881 }, { "epoch": 0.17340028562214954, "grad_norm": 0.93803547801233, "learning_rate": 4.794779686796062e-06, "loss": 0.166, "step": 1882 }, { "epoch": 0.17349242179941954, "grad_norm": 0.9996096252341287, "learning_rate": 4.794477316764101e-06, "loss": 0.1842, "step": 1883 }, { "epoch": 0.17358455797668954, "grad_norm": 0.9514554221599457, "learning_rate": 4.794174733689672e-06, "loss": 0.1672, "step": 1884 }, { "epoch": 0.17367669415395956, "grad_norm": 0.9379721576841644, "learning_rate": 4.793871937600874e-06, "loss": 0.1689, "step": 1885 }, { "epoch": 0.17376883033122956, "grad_norm": 0.9887485087873079, "learning_rate": 4.7935689285258195e-06, "loss": 0.1695, "step": 1886 }, { "epoch": 0.17386096650849955, "grad_norm": 0.9185524648048645, "learning_rate": 4.793265706492643e-06, "loss": 0.175, "step": 1887 }, { "epoch": 0.17395310268576958, "grad_norm": 0.944094137645397, "learning_rate": 4.792962271529499e-06, "loss": 0.1694, "step": 1888 }, { "epoch": 0.17404523886303958, "grad_norm": 0.9907587903074953, "learning_rate": 4.792658623664561e-06, "loss": 0.1788, "step": 1889 }, { "epoch": 0.17413737504030957, "grad_norm": 0.8874671631140197, "learning_rate": 4.792354762926023e-06, "loss": 0.1604, "step": 1890 }, { "epoch": 0.17422951121757957, "grad_norm": 0.917699450885086, "learning_rate": 4.792050689342098e-06, "loss": 0.1882, "step": 1891 }, { "epoch": 0.1743216473948496, "grad_norm": 0.8728825937831589, "learning_rate": 4.791746402941021e-06, "loss": 0.1583, "step": 1892 }, { "epoch": 0.1744137835721196, "grad_norm": 0.999026083350806, "learning_rate": 4.791441903751043e-06, "loss": 0.1835, "step": 1893 }, { "epoch": 0.1745059197493896, "grad_norm": 0.9922845901509115, "learning_rate": 4.791137191800438e-06, "loss": 0.1817, "step": 1894 }, { "epoch": 0.1745980559266596, "grad_norm": 0.9063701103169883, "learning_rate": 4.790832267117498e-06, "loss": 0.1633, "step": 1895 }, { "epoch": 0.1746901921039296, "grad_norm": 0.9812700186466582, "learning_rate": 4.790527129730536e-06, "loss": 0.1825, "step": 1896 }, { "epoch": 0.1747823282811996, "grad_norm": 0.9791016126703898, "learning_rate": 4.790221779667883e-06, "loss": 0.1834, "step": 1897 }, { "epoch": 0.17487446445846963, "grad_norm": 1.019954906964735, "learning_rate": 4.789916216957892e-06, "loss": 0.1793, "step": 1898 }, { "epoch": 0.17496660063573963, "grad_norm": 1.0658787511106087, "learning_rate": 4.789610441628932e-06, "loss": 0.1805, "step": 1899 }, { "epoch": 0.17505873681300962, "grad_norm": 0.9131631203861305, "learning_rate": 4.789304453709398e-06, "loss": 0.1692, "step": 1900 }, { "epoch": 0.17515087299027962, "grad_norm": 0.9370079179863117, "learning_rate": 4.788998253227698e-06, "loss": 0.1774, "step": 1901 }, { "epoch": 0.17524300916754965, "grad_norm": 0.9204477318927515, "learning_rate": 4.788691840212264e-06, "loss": 0.1696, "step": 1902 }, { "epoch": 0.17533514534481964, "grad_norm": 0.9702024390215709, "learning_rate": 4.788385214691546e-06, "loss": 0.1758, "step": 1903 }, { "epoch": 0.17542728152208964, "grad_norm": 1.0381178605677073, "learning_rate": 4.788078376694017e-06, "loss": 0.18, "step": 1904 }, { "epoch": 0.17551941769935966, "grad_norm": 0.9955510617085207, "learning_rate": 4.787771326248162e-06, "loss": 0.1738, "step": 1905 }, { "epoch": 0.17561155387662966, "grad_norm": 0.8831789806878049, "learning_rate": 4.787464063382493e-06, "loss": 0.157, "step": 1906 }, { "epoch": 0.17570369005389966, "grad_norm": 0.9652670549320401, "learning_rate": 4.787156588125541e-06, "loss": 0.1738, "step": 1907 }, { "epoch": 0.17579582623116966, "grad_norm": 1.016867250112242, "learning_rate": 4.786848900505852e-06, "loss": 0.197, "step": 1908 }, { "epoch": 0.17588796240843968, "grad_norm": 0.8937479093266593, "learning_rate": 4.786541000551997e-06, "loss": 0.1709, "step": 1909 }, { "epoch": 0.17598009858570968, "grad_norm": 0.9348137203113728, "learning_rate": 4.786232888292564e-06, "loss": 0.1696, "step": 1910 }, { "epoch": 0.17607223476297967, "grad_norm": 0.9606862462398517, "learning_rate": 4.785924563756162e-06, "loss": 0.1778, "step": 1911 }, { "epoch": 0.1761643709402497, "grad_norm": 0.9522177279868732, "learning_rate": 4.785616026971418e-06, "loss": 0.1802, "step": 1912 }, { "epoch": 0.1762565071175197, "grad_norm": 0.9383905997510276, "learning_rate": 4.78530727796698e-06, "loss": 0.1735, "step": 1913 }, { "epoch": 0.1763486432947897, "grad_norm": 0.9510793409227706, "learning_rate": 4.784998316771515e-06, "loss": 0.1739, "step": 1914 }, { "epoch": 0.17644077947205972, "grad_norm": 1.0062514891320191, "learning_rate": 4.784689143413711e-06, "loss": 0.1829, "step": 1915 }, { "epoch": 0.1765329156493297, "grad_norm": 0.906545128875752, "learning_rate": 4.784379757922273e-06, "loss": 0.1647, "step": 1916 }, { "epoch": 0.1766250518265997, "grad_norm": 0.9973752258860914, "learning_rate": 4.78407016032593e-06, "loss": 0.183, "step": 1917 }, { "epoch": 0.1767171880038697, "grad_norm": 0.9141803308976617, "learning_rate": 4.783760350653426e-06, "loss": 0.1575, "step": 1918 }, { "epoch": 0.17680932418113973, "grad_norm": 0.9502312399313518, "learning_rate": 4.783450328933527e-06, "loss": 0.1746, "step": 1919 }, { "epoch": 0.17690146035840973, "grad_norm": 1.0723880151854668, "learning_rate": 4.78314009519502e-06, "loss": 0.1945, "step": 1920 }, { "epoch": 0.17699359653567973, "grad_norm": 0.8964554327793061, "learning_rate": 4.782829649466709e-06, "loss": 0.1613, "step": 1921 }, { "epoch": 0.17708573271294975, "grad_norm": 0.9319472469096941, "learning_rate": 4.78251899177742e-06, "loss": 0.1661, "step": 1922 }, { "epoch": 0.17717786889021975, "grad_norm": 1.0237495405647998, "learning_rate": 4.7822081221559965e-06, "loss": 0.1866, "step": 1923 }, { "epoch": 0.17727000506748974, "grad_norm": 0.8369732203496116, "learning_rate": 4.781897040631304e-06, "loss": 0.1652, "step": 1924 }, { "epoch": 0.17736214124475974, "grad_norm": 1.0220020925106441, "learning_rate": 4.781585747232224e-06, "loss": 0.1963, "step": 1925 }, { "epoch": 0.17745427742202977, "grad_norm": 0.9377498492935152, "learning_rate": 4.781274241987664e-06, "loss": 0.1687, "step": 1926 }, { "epoch": 0.17754641359929976, "grad_norm": 0.9221667519044533, "learning_rate": 4.7809625249265436e-06, "loss": 0.1633, "step": 1927 }, { "epoch": 0.17763854977656976, "grad_norm": 1.096594760082437, "learning_rate": 4.780650596077808e-06, "loss": 0.1839, "step": 1928 }, { "epoch": 0.17773068595383978, "grad_norm": 1.0274699250823436, "learning_rate": 4.780338455470419e-06, "loss": 0.1784, "step": 1929 }, { "epoch": 0.17782282213110978, "grad_norm": 0.9173298175308037, "learning_rate": 4.780026103133358e-06, "loss": 0.1671, "step": 1930 }, { "epoch": 0.17791495830837978, "grad_norm": 1.0241515678846906, "learning_rate": 4.7797135390956294e-06, "loss": 0.1667, "step": 1931 }, { "epoch": 0.1780070944856498, "grad_norm": 1.0382108734004492, "learning_rate": 4.779400763386253e-06, "loss": 0.1912, "step": 1932 }, { "epoch": 0.1780992306629198, "grad_norm": 0.9695327271031154, "learning_rate": 4.77908777603427e-06, "loss": 0.1753, "step": 1933 }, { "epoch": 0.1781913668401898, "grad_norm": 1.038422251537585, "learning_rate": 4.778774577068741e-06, "loss": 0.1856, "step": 1934 }, { "epoch": 0.1782835030174598, "grad_norm": 0.8941524559172731, "learning_rate": 4.778461166518748e-06, "loss": 0.162, "step": 1935 }, { "epoch": 0.17837563919472982, "grad_norm": 0.9313317705606107, "learning_rate": 4.778147544413392e-06, "loss": 0.1728, "step": 1936 }, { "epoch": 0.17846777537199982, "grad_norm": 1.0519701170000495, "learning_rate": 4.777833710781789e-06, "loss": 0.1957, "step": 1937 }, { "epoch": 0.1785599115492698, "grad_norm": 1.0387981318355688, "learning_rate": 4.777519665653082e-06, "loss": 0.1878, "step": 1938 }, { "epoch": 0.17865204772653984, "grad_norm": 0.9444838158399083, "learning_rate": 4.777205409056429e-06, "loss": 0.1779, "step": 1939 }, { "epoch": 0.17874418390380983, "grad_norm": 0.9575307026752148, "learning_rate": 4.776890941021008e-06, "loss": 0.2017, "step": 1940 }, { "epoch": 0.17883632008107983, "grad_norm": 1.0234915939858733, "learning_rate": 4.776576261576018e-06, "loss": 0.1722, "step": 1941 }, { "epoch": 0.17892845625834983, "grad_norm": 0.9803379275090085, "learning_rate": 4.776261370750678e-06, "loss": 0.1706, "step": 1942 }, { "epoch": 0.17902059243561985, "grad_norm": 0.9358718176258201, "learning_rate": 4.775946268574224e-06, "loss": 0.1763, "step": 1943 }, { "epoch": 0.17911272861288985, "grad_norm": 0.9786414947095523, "learning_rate": 4.775630955075915e-06, "loss": 0.1992, "step": 1944 }, { "epoch": 0.17920486479015985, "grad_norm": 1.0318275409794424, "learning_rate": 4.775315430285026e-06, "loss": 0.1981, "step": 1945 }, { "epoch": 0.17929700096742987, "grad_norm": 0.9631276712451919, "learning_rate": 4.7749996942308546e-06, "loss": 0.1888, "step": 1946 }, { "epoch": 0.17938913714469987, "grad_norm": 1.0003266569020262, "learning_rate": 4.774683746942717e-06, "loss": 0.1842, "step": 1947 }, { "epoch": 0.17948127332196986, "grad_norm": 0.9265546188995342, "learning_rate": 4.774367588449948e-06, "loss": 0.1578, "step": 1948 }, { "epoch": 0.1795734094992399, "grad_norm": 0.9009805935055492, "learning_rate": 4.774051218781904e-06, "loss": 0.1632, "step": 1949 }, { "epoch": 0.17966554567650989, "grad_norm": 0.9809819232416425, "learning_rate": 4.77373463796796e-06, "loss": 0.1786, "step": 1950 }, { "epoch": 0.17975768185377988, "grad_norm": 1.0260467345453894, "learning_rate": 4.7734178460375105e-06, "loss": 0.1657, "step": 1951 }, { "epoch": 0.17984981803104988, "grad_norm": 0.9280186899004385, "learning_rate": 4.773100843019969e-06, "loss": 0.1662, "step": 1952 }, { "epoch": 0.1799419542083199, "grad_norm": 0.9861324889794633, "learning_rate": 4.7727836289447685e-06, "loss": 0.1815, "step": 1953 }, { "epoch": 0.1800340903855899, "grad_norm": 0.9184406579084488, "learning_rate": 4.7724662038413646e-06, "loss": 0.1805, "step": 1954 }, { "epoch": 0.1801262265628599, "grad_norm": 1.065223211061827, "learning_rate": 4.772148567739229e-06, "loss": 0.172, "step": 1955 }, { "epoch": 0.18021836274012992, "grad_norm": 0.9539061206964681, "learning_rate": 4.7718307206678535e-06, "loss": 0.1828, "step": 1956 }, { "epoch": 0.18031049891739992, "grad_norm": 0.9148835147838171, "learning_rate": 4.7715126626567525e-06, "loss": 0.1541, "step": 1957 }, { "epoch": 0.18040263509466992, "grad_norm": 0.9232041932852285, "learning_rate": 4.7711943937354555e-06, "loss": 0.1751, "step": 1958 }, { "epoch": 0.18049477127193994, "grad_norm": 0.9216980915355115, "learning_rate": 4.770875913933515e-06, "loss": 0.1851, "step": 1959 }, { "epoch": 0.18058690744920994, "grad_norm": 0.9540739614506976, "learning_rate": 4.770557223280501e-06, "loss": 0.1919, "step": 1960 }, { "epoch": 0.18067904362647993, "grad_norm": 0.9591787025956806, "learning_rate": 4.7702383218060044e-06, "loss": 0.1772, "step": 1961 }, { "epoch": 0.18077117980374993, "grad_norm": 1.059574276391285, "learning_rate": 4.769919209539635e-06, "loss": 0.2, "step": 1962 }, { "epoch": 0.18086331598101996, "grad_norm": 1.0242113087334594, "learning_rate": 4.769599886511024e-06, "loss": 0.1847, "step": 1963 }, { "epoch": 0.18095545215828995, "grad_norm": 0.8678928771391421, "learning_rate": 4.769280352749817e-06, "loss": 0.1636, "step": 1964 }, { "epoch": 0.18104758833555995, "grad_norm": 0.9217600598060104, "learning_rate": 4.768960608285688e-06, "loss": 0.1729, "step": 1965 }, { "epoch": 0.18113972451282997, "grad_norm": 0.8802794485229976, "learning_rate": 4.76864065314832e-06, "loss": 0.1748, "step": 1966 }, { "epoch": 0.18123186069009997, "grad_norm": 0.9870852789685146, "learning_rate": 4.768320487367424e-06, "loss": 0.1683, "step": 1967 }, { "epoch": 0.18132399686736997, "grad_norm": 0.9296981723017914, "learning_rate": 4.768000110972727e-06, "loss": 0.1689, "step": 1968 }, { "epoch": 0.18141613304463997, "grad_norm": 0.9677522771888776, "learning_rate": 4.767679523993976e-06, "loss": 0.1883, "step": 1969 }, { "epoch": 0.18150826922191, "grad_norm": 0.9016253481332323, "learning_rate": 4.767358726460936e-06, "loss": 0.1605, "step": 1970 }, { "epoch": 0.18160040539918, "grad_norm": 0.9058389324448571, "learning_rate": 4.7670377184033944e-06, "loss": 0.1687, "step": 1971 }, { "epoch": 0.18169254157644998, "grad_norm": 1.017821513904742, "learning_rate": 4.7667164998511574e-06, "loss": 0.1911, "step": 1972 }, { "epoch": 0.18178467775372, "grad_norm": 0.9469304159614529, "learning_rate": 4.766395070834049e-06, "loss": 0.1783, "step": 1973 }, { "epoch": 0.18187681393099, "grad_norm": 0.9290326425937189, "learning_rate": 4.7660734313819135e-06, "loss": 0.1666, "step": 1974 }, { "epoch": 0.18196895010826, "grad_norm": 0.907021416145737, "learning_rate": 4.765751581524617e-06, "loss": 0.1551, "step": 1975 }, { "epoch": 0.18206108628553003, "grad_norm": 0.9225218369066349, "learning_rate": 4.765429521292042e-06, "loss": 0.1812, "step": 1976 }, { "epoch": 0.18215322246280002, "grad_norm": 0.9316138857926441, "learning_rate": 4.765107250714093e-06, "loss": 0.17, "step": 1977 }, { "epoch": 0.18224535864007002, "grad_norm": 0.9764313721990456, "learning_rate": 4.764784769820691e-06, "loss": 0.1843, "step": 1978 }, { "epoch": 0.18233749481734002, "grad_norm": 0.9237308847508986, "learning_rate": 4.76446207864178e-06, "loss": 0.1797, "step": 1979 }, { "epoch": 0.18242963099461004, "grad_norm": 0.9644001296709762, "learning_rate": 4.764139177207321e-06, "loss": 0.187, "step": 1980 }, { "epoch": 0.18252176717188004, "grad_norm": 0.8821755436379798, "learning_rate": 4.763816065547295e-06, "loss": 0.1686, "step": 1981 }, { "epoch": 0.18261390334915004, "grad_norm": 0.9503521212580078, "learning_rate": 4.763492743691705e-06, "loss": 0.1591, "step": 1982 }, { "epoch": 0.18270603952642006, "grad_norm": 0.9082913689491118, "learning_rate": 4.7631692116705695e-06, "loss": 0.176, "step": 1983 }, { "epoch": 0.18279817570369006, "grad_norm": 0.9100042108564763, "learning_rate": 4.76284546951393e-06, "loss": 0.1638, "step": 1984 }, { "epoch": 0.18289031188096005, "grad_norm": 1.0130194245152968, "learning_rate": 4.762521517251844e-06, "loss": 0.1889, "step": 1985 }, { "epoch": 0.18298244805823005, "grad_norm": 0.9483601310990681, "learning_rate": 4.762197354914391e-06, "loss": 0.1789, "step": 1986 }, { "epoch": 0.18307458423550008, "grad_norm": 0.9978614650039507, "learning_rate": 4.761872982531671e-06, "loss": 0.188, "step": 1987 }, { "epoch": 0.18316672041277007, "grad_norm": 1.0247173947884123, "learning_rate": 4.761548400133801e-06, "loss": 0.1727, "step": 1988 }, { "epoch": 0.18325885659004007, "grad_norm": 1.043154599032093, "learning_rate": 4.761223607750919e-06, "loss": 0.1821, "step": 1989 }, { "epoch": 0.1833509927673101, "grad_norm": 1.069427452926741, "learning_rate": 4.760898605413182e-06, "loss": 0.1953, "step": 1990 }, { "epoch": 0.1834431289445801, "grad_norm": 0.9171673945079573, "learning_rate": 4.760573393150766e-06, "loss": 0.1682, "step": 1991 }, { "epoch": 0.1835352651218501, "grad_norm": 0.9707474375781835, "learning_rate": 4.760247970993867e-06, "loss": 0.187, "step": 1992 }, { "epoch": 0.1836274012991201, "grad_norm": 0.9115866782437404, "learning_rate": 4.7599223389727e-06, "loss": 0.166, "step": 1993 }, { "epoch": 0.1837195374763901, "grad_norm": 0.9093587060651217, "learning_rate": 4.759596497117501e-06, "loss": 0.1621, "step": 1994 }, { "epoch": 0.1838116736536601, "grad_norm": 0.939985698078961, "learning_rate": 4.759270445458524e-06, "loss": 0.186, "step": 1995 }, { "epoch": 0.1839038098309301, "grad_norm": 0.9930713127947033, "learning_rate": 4.758944184026043e-06, "loss": 0.1755, "step": 1996 }, { "epoch": 0.18399594600820013, "grad_norm": 0.9060670257127519, "learning_rate": 4.758617712850352e-06, "loss": 0.1656, "step": 1997 }, { "epoch": 0.18408808218547013, "grad_norm": 0.9596014968387875, "learning_rate": 4.758291031961763e-06, "loss": 0.1774, "step": 1998 }, { "epoch": 0.18418021836274012, "grad_norm": 0.9071035450847232, "learning_rate": 4.757964141390609e-06, "loss": 0.1779, "step": 1999 }, { "epoch": 0.18427235454001015, "grad_norm": 0.916038591656752, "learning_rate": 4.75763704116724e-06, "loss": 0.1701, "step": 2000 }, { "epoch": 0.18427235454001015, "eval_loss": 0.1747845858335495, "eval_runtime": 299.1226, "eval_samples_per_second": 23.459, "eval_steps_per_second": 2.935, "step": 2000 }, { "epoch": 0.18436449071728014, "grad_norm": 0.9620227340872637, "learning_rate": 4.757309731322029e-06, "loss": 0.1766, "step": 2001 }, { "epoch": 0.18445662689455014, "grad_norm": 0.9936552481938895, "learning_rate": 4.756982211885368e-06, "loss": 0.185, "step": 2002 }, { "epoch": 0.18454876307182014, "grad_norm": 0.9243700651653634, "learning_rate": 4.756654482887665e-06, "loss": 0.1629, "step": 2003 }, { "epoch": 0.18464089924909016, "grad_norm": 0.9388336836899591, "learning_rate": 4.756326544359351e-06, "loss": 0.1677, "step": 2004 }, { "epoch": 0.18473303542636016, "grad_norm": 0.9691896762005906, "learning_rate": 4.7559983963308735e-06, "loss": 0.159, "step": 2005 }, { "epoch": 0.18482517160363016, "grad_norm": 0.91871871300768, "learning_rate": 4.755670038832703e-06, "loss": 0.1657, "step": 2006 }, { "epoch": 0.18491730778090018, "grad_norm": 0.959152484145046, "learning_rate": 4.755341471895325e-06, "loss": 0.1813, "step": 2007 }, { "epoch": 0.18500944395817018, "grad_norm": 0.9236567392647953, "learning_rate": 4.75501269554925e-06, "loss": 0.1655, "step": 2008 }, { "epoch": 0.18510158013544017, "grad_norm": 0.917760585288594, "learning_rate": 4.754683709825003e-06, "loss": 0.1762, "step": 2009 }, { "epoch": 0.1851937163127102, "grad_norm": 0.8881508952042282, "learning_rate": 4.7543545147531314e-06, "loss": 0.1677, "step": 2010 }, { "epoch": 0.1852858524899802, "grad_norm": 0.9048943834112922, "learning_rate": 4.754025110364201e-06, "loss": 0.1648, "step": 2011 }, { "epoch": 0.1853779886672502, "grad_norm": 0.9174376650297095, "learning_rate": 4.753695496688795e-06, "loss": 0.1641, "step": 2012 }, { "epoch": 0.1854701248445202, "grad_norm": 0.9517016421197806, "learning_rate": 4.753365673757521e-06, "loss": 0.1783, "step": 2013 }, { "epoch": 0.18556226102179021, "grad_norm": 0.9098170442182718, "learning_rate": 4.7530356416010004e-06, "loss": 0.1584, "step": 2014 }, { "epoch": 0.1856543971990602, "grad_norm": 0.9253465519598166, "learning_rate": 4.7527054002498785e-06, "loss": 0.1692, "step": 2015 }, { "epoch": 0.1857465333763302, "grad_norm": 0.9304349614009866, "learning_rate": 4.752374949734818e-06, "loss": 0.1764, "step": 2016 }, { "epoch": 0.18583866955360023, "grad_norm": 0.9380871995269432, "learning_rate": 4.752044290086501e-06, "loss": 0.174, "step": 2017 }, { "epoch": 0.18593080573087023, "grad_norm": 0.9037755594294037, "learning_rate": 4.75171342133563e-06, "loss": 0.1584, "step": 2018 }, { "epoch": 0.18602294190814023, "grad_norm": 0.9530499133921239, "learning_rate": 4.751382343512924e-06, "loss": 0.1765, "step": 2019 }, { "epoch": 0.18611507808541022, "grad_norm": 0.9837126600118509, "learning_rate": 4.751051056649126e-06, "loss": 0.1754, "step": 2020 }, { "epoch": 0.18620721426268025, "grad_norm": 0.9610159521960262, "learning_rate": 4.750719560774994e-06, "loss": 0.1713, "step": 2021 }, { "epoch": 0.18629935043995025, "grad_norm": 0.9192661443054622, "learning_rate": 4.75038785592131e-06, "loss": 0.1624, "step": 2022 }, { "epoch": 0.18639148661722024, "grad_norm": 0.9674669683439728, "learning_rate": 4.750055942118871e-06, "loss": 0.1772, "step": 2023 }, { "epoch": 0.18648362279449027, "grad_norm": 0.9778177795518106, "learning_rate": 4.749723819398496e-06, "loss": 0.1693, "step": 2024 }, { "epoch": 0.18657575897176026, "grad_norm": 0.9154309692494186, "learning_rate": 4.749391487791021e-06, "loss": 0.167, "step": 2025 }, { "epoch": 0.18666789514903026, "grad_norm": 0.9903773650928431, "learning_rate": 4.749058947327306e-06, "loss": 0.1675, "step": 2026 }, { "epoch": 0.18676003132630029, "grad_norm": 0.9298684751001484, "learning_rate": 4.7487261980382235e-06, "loss": 0.1686, "step": 2027 }, { "epoch": 0.18685216750357028, "grad_norm": 0.9751804305795381, "learning_rate": 4.748393239954674e-06, "loss": 0.1811, "step": 2028 }, { "epoch": 0.18694430368084028, "grad_norm": 0.916861137524208, "learning_rate": 4.748060073107568e-06, "loss": 0.1852, "step": 2029 }, { "epoch": 0.18703643985811028, "grad_norm": 0.9351359370165698, "learning_rate": 4.747726697527844e-06, "loss": 0.1744, "step": 2030 }, { "epoch": 0.1871285760353803, "grad_norm": 0.9126688237293801, "learning_rate": 4.747393113246453e-06, "loss": 0.1643, "step": 2031 }, { "epoch": 0.1872207122126503, "grad_norm": 0.9693557147267255, "learning_rate": 4.74705932029437e-06, "loss": 0.1742, "step": 2032 }, { "epoch": 0.1873128483899203, "grad_norm": 0.9757807922463323, "learning_rate": 4.746725318702587e-06, "loss": 0.166, "step": 2033 }, { "epoch": 0.18740498456719032, "grad_norm": 1.0355600426168787, "learning_rate": 4.746391108502116e-06, "loss": 0.1829, "step": 2034 }, { "epoch": 0.18749712074446032, "grad_norm": 0.9040436676391728, "learning_rate": 4.7460566897239905e-06, "loss": 0.1662, "step": 2035 }, { "epoch": 0.1875892569217303, "grad_norm": 1.0315787072138687, "learning_rate": 4.745722062399258e-06, "loss": 0.1904, "step": 2036 }, { "epoch": 0.1876813930990003, "grad_norm": 0.8845840894873757, "learning_rate": 4.745387226558991e-06, "loss": 0.1578, "step": 2037 }, { "epoch": 0.18777352927627033, "grad_norm": 0.9747265772515474, "learning_rate": 4.745052182234278e-06, "loss": 0.1845, "step": 2038 }, { "epoch": 0.18786566545354033, "grad_norm": 0.8418652499136062, "learning_rate": 4.744716929456229e-06, "loss": 0.1648, "step": 2039 }, { "epoch": 0.18795780163081033, "grad_norm": 0.9120095457085243, "learning_rate": 4.744381468255971e-06, "loss": 0.1719, "step": 2040 }, { "epoch": 0.18804993780808035, "grad_norm": 0.9253441991466294, "learning_rate": 4.7440457986646525e-06, "loss": 0.1741, "step": 2041 }, { "epoch": 0.18814207398535035, "grad_norm": 0.892395716534565, "learning_rate": 4.743709920713439e-06, "loss": 0.1623, "step": 2042 }, { "epoch": 0.18823421016262035, "grad_norm": 0.9443349478550868, "learning_rate": 4.743373834433519e-06, "loss": 0.1722, "step": 2043 }, { "epoch": 0.18832634633989037, "grad_norm": 0.9663983479939164, "learning_rate": 4.743037539856097e-06, "loss": 0.1874, "step": 2044 }, { "epoch": 0.18841848251716037, "grad_norm": 0.8949159341792653, "learning_rate": 4.742701037012397e-06, "loss": 0.1707, "step": 2045 }, { "epoch": 0.18851061869443037, "grad_norm": 0.9418778718283833, "learning_rate": 4.7423643259336656e-06, "loss": 0.1754, "step": 2046 }, { "epoch": 0.18860275487170036, "grad_norm": 0.9752328192488944, "learning_rate": 4.742027406651164e-06, "loss": 0.1647, "step": 2047 }, { "epoch": 0.1886948910489704, "grad_norm": 1.100330463505503, "learning_rate": 4.741690279196178e-06, "loss": 0.1866, "step": 2048 }, { "epoch": 0.18878702722624038, "grad_norm": 0.9272295304956722, "learning_rate": 4.741352943600007e-06, "loss": 0.1817, "step": 2049 }, { "epoch": 0.18887916340351038, "grad_norm": 0.9627565386987668, "learning_rate": 4.741015399893974e-06, "loss": 0.176, "step": 2050 }, { "epoch": 0.1889712995807804, "grad_norm": 1.00925483113557, "learning_rate": 4.740677648109421e-06, "loss": 0.1825, "step": 2051 }, { "epoch": 0.1890634357580504, "grad_norm": 0.8733725473410048, "learning_rate": 4.740339688277707e-06, "loss": 0.1708, "step": 2052 }, { "epoch": 0.1891555719353204, "grad_norm": 0.8996489754962257, "learning_rate": 4.7400015204302105e-06, "loss": 0.1527, "step": 2053 }, { "epoch": 0.1892477081125904, "grad_norm": 1.215286111800239, "learning_rate": 4.739663144598333e-06, "loss": 0.1734, "step": 2054 }, { "epoch": 0.18933984428986042, "grad_norm": 0.9496525339998125, "learning_rate": 4.739324560813491e-06, "loss": 0.1641, "step": 2055 }, { "epoch": 0.18943198046713042, "grad_norm": 1.02856117189786, "learning_rate": 4.738985769107123e-06, "loss": 0.2055, "step": 2056 }, { "epoch": 0.18952411664440041, "grad_norm": 0.9598011558340099, "learning_rate": 4.738646769510685e-06, "loss": 0.1707, "step": 2057 }, { "epoch": 0.18961625282167044, "grad_norm": 1.006354046062315, "learning_rate": 4.738307562055653e-06, "loss": 0.1777, "step": 2058 }, { "epoch": 0.18970838899894044, "grad_norm": 0.9327924509299893, "learning_rate": 4.737968146773524e-06, "loss": 0.1703, "step": 2059 }, { "epoch": 0.18980052517621043, "grad_norm": 0.9457591674178287, "learning_rate": 4.737628523695811e-06, "loss": 0.1727, "step": 2060 }, { "epoch": 0.18989266135348046, "grad_norm": 0.9430303988478719, "learning_rate": 4.737288692854049e-06, "loss": 0.1813, "step": 2061 }, { "epoch": 0.18998479753075045, "grad_norm": 0.9261016048878836, "learning_rate": 4.736948654279791e-06, "loss": 0.1772, "step": 2062 }, { "epoch": 0.19007693370802045, "grad_norm": 0.9157174430306279, "learning_rate": 4.73660840800461e-06, "loss": 0.1589, "step": 2063 }, { "epoch": 0.19016906988529045, "grad_norm": 0.9654944454044344, "learning_rate": 4.736267954060097e-06, "loss": 0.1712, "step": 2064 }, { "epoch": 0.19026120606256047, "grad_norm": 0.9651362786809399, "learning_rate": 4.735927292477864e-06, "loss": 0.1814, "step": 2065 }, { "epoch": 0.19035334223983047, "grad_norm": 0.9044877928218676, "learning_rate": 4.735586423289542e-06, "loss": 0.168, "step": 2066 }, { "epoch": 0.19044547841710047, "grad_norm": 0.9307633438458481, "learning_rate": 4.735245346526779e-06, "loss": 0.1755, "step": 2067 }, { "epoch": 0.1905376145943705, "grad_norm": 0.9901411171327417, "learning_rate": 4.734904062221246e-06, "loss": 0.1798, "step": 2068 }, { "epoch": 0.1906297507716405, "grad_norm": 0.9888693596561464, "learning_rate": 4.734562570404629e-06, "loss": 0.1725, "step": 2069 }, { "epoch": 0.19072188694891049, "grad_norm": 0.9139611246565976, "learning_rate": 4.734220871108638e-06, "loss": 0.1639, "step": 2070 }, { "epoch": 0.19081402312618048, "grad_norm": 0.9049028935278389, "learning_rate": 4.733878964364998e-06, "loss": 0.1762, "step": 2071 }, { "epoch": 0.1909061593034505, "grad_norm": 0.931420719373214, "learning_rate": 4.7335368502054564e-06, "loss": 0.1787, "step": 2072 }, { "epoch": 0.1909982954807205, "grad_norm": 0.9522269497391289, "learning_rate": 4.733194528661778e-06, "loss": 0.1751, "step": 2073 }, { "epoch": 0.1910904316579905, "grad_norm": 0.9549558070156487, "learning_rate": 4.732851999765747e-06, "loss": 0.1684, "step": 2074 }, { "epoch": 0.19118256783526053, "grad_norm": 0.9653026251701582, "learning_rate": 4.732509263549167e-06, "loss": 0.1713, "step": 2075 }, { "epoch": 0.19127470401253052, "grad_norm": 0.9395360707584086, "learning_rate": 4.732166320043862e-06, "loss": 0.1595, "step": 2076 }, { "epoch": 0.19136684018980052, "grad_norm": 0.9011041007700445, "learning_rate": 4.731823169281674e-06, "loss": 0.1726, "step": 2077 }, { "epoch": 0.19145897636707054, "grad_norm": 0.9982033798259724, "learning_rate": 4.731479811294464e-06, "loss": 0.1802, "step": 2078 }, { "epoch": 0.19155111254434054, "grad_norm": 0.9653945847759827, "learning_rate": 4.731136246114114e-06, "loss": 0.1786, "step": 2079 }, { "epoch": 0.19164324872161054, "grad_norm": 0.9572793258606881, "learning_rate": 4.730792473772523e-06, "loss": 0.17, "step": 2080 }, { "epoch": 0.19173538489888053, "grad_norm": 1.017565135723274, "learning_rate": 4.730448494301612e-06, "loss": 0.174, "step": 2081 }, { "epoch": 0.19182752107615056, "grad_norm": 0.9507693595346857, "learning_rate": 4.7301043077333165e-06, "loss": 0.1805, "step": 2082 }, { "epoch": 0.19191965725342056, "grad_norm": 0.9061693205241929, "learning_rate": 4.729759914099597e-06, "loss": 0.1636, "step": 2083 }, { "epoch": 0.19201179343069055, "grad_norm": 0.9924987247261973, "learning_rate": 4.729415313432429e-06, "loss": 0.17, "step": 2084 }, { "epoch": 0.19210392960796058, "grad_norm": 0.9801999793423163, "learning_rate": 4.729070505763809e-06, "loss": 0.1798, "step": 2085 }, { "epoch": 0.19219606578523057, "grad_norm": 0.9452879034672409, "learning_rate": 4.728725491125753e-06, "loss": 0.1726, "step": 2086 }, { "epoch": 0.19228820196250057, "grad_norm": 0.9947615608484313, "learning_rate": 4.728380269550296e-06, "loss": 0.1876, "step": 2087 }, { "epoch": 0.19238033813977057, "grad_norm": 1.0462479131548408, "learning_rate": 4.7280348410694905e-06, "loss": 0.1865, "step": 2088 }, { "epoch": 0.1924724743170406, "grad_norm": 0.9309592228183027, "learning_rate": 4.72768920571541e-06, "loss": 0.1876, "step": 2089 }, { "epoch": 0.1925646104943106, "grad_norm": 0.9042067215573306, "learning_rate": 4.727343363520147e-06, "loss": 0.1715, "step": 2090 }, { "epoch": 0.1926567466715806, "grad_norm": 0.9590467825615356, "learning_rate": 4.7269973145158134e-06, "loss": 0.168, "step": 2091 }, { "epoch": 0.1927488828488506, "grad_norm": 0.9612689987095125, "learning_rate": 4.7266510587345395e-06, "loss": 0.1712, "step": 2092 }, { "epoch": 0.1928410190261206, "grad_norm": 0.9089807471667561, "learning_rate": 4.726304596208475e-06, "loss": 0.1624, "step": 2093 }, { "epoch": 0.1929331552033906, "grad_norm": 0.8967926180786936, "learning_rate": 4.725957926969789e-06, "loss": 0.1564, "step": 2094 }, { "epoch": 0.19302529138066063, "grad_norm": 0.9384858000388413, "learning_rate": 4.72561105105067e-06, "loss": 0.169, "step": 2095 }, { "epoch": 0.19311742755793063, "grad_norm": 0.9604763558849385, "learning_rate": 4.7252639684833255e-06, "loss": 0.1687, "step": 2096 }, { "epoch": 0.19320956373520062, "grad_norm": 0.9183646669661314, "learning_rate": 4.724916679299982e-06, "loss": 0.1664, "step": 2097 }, { "epoch": 0.19330169991247062, "grad_norm": 0.917100174808335, "learning_rate": 4.7245691835328855e-06, "loss": 0.1676, "step": 2098 }, { "epoch": 0.19339383608974064, "grad_norm": 0.9365272475470837, "learning_rate": 4.724221481214301e-06, "loss": 0.1673, "step": 2099 }, { "epoch": 0.19348597226701064, "grad_norm": 1.0141662835470497, "learning_rate": 4.723873572376512e-06, "loss": 0.185, "step": 2100 }, { "epoch": 0.19357810844428064, "grad_norm": 0.9558839300005915, "learning_rate": 4.723525457051823e-06, "loss": 0.1795, "step": 2101 }, { "epoch": 0.19367024462155066, "grad_norm": 0.9411376137788801, "learning_rate": 4.723177135272556e-06, "loss": 0.1752, "step": 2102 }, { "epoch": 0.19376238079882066, "grad_norm": 0.8955626892165673, "learning_rate": 4.7228286070710525e-06, "loss": 0.1726, "step": 2103 }, { "epoch": 0.19385451697609066, "grad_norm": 0.9072698897943006, "learning_rate": 4.722479872479674e-06, "loss": 0.1588, "step": 2104 }, { "epoch": 0.19394665315336065, "grad_norm": 0.921780763256833, "learning_rate": 4.7221309315308e-06, "loss": 0.1765, "step": 2105 }, { "epoch": 0.19403878933063068, "grad_norm": 0.8677649999621102, "learning_rate": 4.721781784256829e-06, "loss": 0.1535, "step": 2106 }, { "epoch": 0.19413092550790068, "grad_norm": 1.0361240936326264, "learning_rate": 4.721432430690181e-06, "loss": 0.18, "step": 2107 }, { "epoch": 0.19422306168517067, "grad_norm": 1.0506692873586285, "learning_rate": 4.721082870863293e-06, "loss": 0.1744, "step": 2108 }, { "epoch": 0.1943151978624407, "grad_norm": 0.9632736944905299, "learning_rate": 4.720733104808621e-06, "loss": 0.1563, "step": 2109 }, { "epoch": 0.1944073340397107, "grad_norm": 0.9477773210722018, "learning_rate": 4.720383132558641e-06, "loss": 0.174, "step": 2110 }, { "epoch": 0.1944994702169807, "grad_norm": 1.105717006171067, "learning_rate": 4.720032954145849e-06, "loss": 0.1774, "step": 2111 }, { "epoch": 0.19459160639425072, "grad_norm": 0.9737476883844909, "learning_rate": 4.719682569602757e-06, "loss": 0.1692, "step": 2112 }, { "epoch": 0.1946837425715207, "grad_norm": 1.0352161292591884, "learning_rate": 4.7193319789619e-06, "loss": 0.1729, "step": 2113 }, { "epoch": 0.1947758787487907, "grad_norm": 0.893049222834855, "learning_rate": 4.718981182255831e-06, "loss": 0.167, "step": 2114 }, { "epoch": 0.1948680149260607, "grad_norm": 0.9409597426616632, "learning_rate": 4.71863017951712e-06, "loss": 0.1851, "step": 2115 }, { "epoch": 0.19496015110333073, "grad_norm": 0.932584725623311, "learning_rate": 4.718278970778357e-06, "loss": 0.1704, "step": 2116 }, { "epoch": 0.19505228728060073, "grad_norm": 0.9747757753227256, "learning_rate": 4.717927556072153e-06, "loss": 0.175, "step": 2117 }, { "epoch": 0.19514442345787072, "grad_norm": 0.964675319367432, "learning_rate": 4.717575935431138e-06, "loss": 0.1741, "step": 2118 }, { "epoch": 0.19523655963514075, "grad_norm": 1.0099187231751314, "learning_rate": 4.7172241088879575e-06, "loss": 0.1854, "step": 2119 }, { "epoch": 0.19532869581241075, "grad_norm": 0.9059186999651861, "learning_rate": 4.716872076475281e-06, "loss": 0.1786, "step": 2120 }, { "epoch": 0.19542083198968074, "grad_norm": 0.9383878112214665, "learning_rate": 4.7165198382257926e-06, "loss": 0.1691, "step": 2121 }, { "epoch": 0.19551296816695074, "grad_norm": 0.9390251394741452, "learning_rate": 4.716167394172198e-06, "loss": 0.1747, "step": 2122 }, { "epoch": 0.19560510434422076, "grad_norm": 0.9179061143840483, "learning_rate": 4.715814744347224e-06, "loss": 0.1699, "step": 2123 }, { "epoch": 0.19569724052149076, "grad_norm": 0.8956384202715454, "learning_rate": 4.715461888783612e-06, "loss": 0.1581, "step": 2124 }, { "epoch": 0.19578937669876076, "grad_norm": 0.9922412568775244, "learning_rate": 4.715108827514125e-06, "loss": 0.1852, "step": 2125 }, { "epoch": 0.19588151287603078, "grad_norm": 0.9201841509112736, "learning_rate": 4.714755560571545e-06, "loss": 0.1806, "step": 2126 }, { "epoch": 0.19597364905330078, "grad_norm": 0.9291530212197076, "learning_rate": 4.7144020879886736e-06, "loss": 0.1678, "step": 2127 }, { "epoch": 0.19606578523057078, "grad_norm": 0.8892646385384433, "learning_rate": 4.714048409798328e-06, "loss": 0.1588, "step": 2128 }, { "epoch": 0.1961579214078408, "grad_norm": 0.9578647584230859, "learning_rate": 4.713694526033351e-06, "loss": 0.1696, "step": 2129 }, { "epoch": 0.1962500575851108, "grad_norm": 0.9303263756088295, "learning_rate": 4.713340436726599e-06, "loss": 0.1876, "step": 2130 }, { "epoch": 0.1963421937623808, "grad_norm": 1.0143043159123855, "learning_rate": 4.712986141910948e-06, "loss": 0.1833, "step": 2131 }, { "epoch": 0.1964343299396508, "grad_norm": 1.0257791241751693, "learning_rate": 4.712631641619297e-06, "loss": 0.1813, "step": 2132 }, { "epoch": 0.19652646611692082, "grad_norm": 0.9326155949771587, "learning_rate": 4.7122769358845595e-06, "loss": 0.1834, "step": 2133 }, { "epoch": 0.19661860229419081, "grad_norm": 0.9501425332398825, "learning_rate": 4.71192202473967e-06, "loss": 0.1883, "step": 2134 }, { "epoch": 0.1967107384714608, "grad_norm": 0.92519734244431, "learning_rate": 4.711566908217583e-06, "loss": 0.1733, "step": 2135 }, { "epoch": 0.19680287464873084, "grad_norm": 0.9141559769713349, "learning_rate": 4.71121158635127e-06, "loss": 0.1693, "step": 2136 }, { "epoch": 0.19689501082600083, "grad_norm": 0.8635507856697013, "learning_rate": 4.710856059173723e-06, "loss": 0.1597, "step": 2137 }, { "epoch": 0.19698714700327083, "grad_norm": 0.9169944069705963, "learning_rate": 4.710500326717954e-06, "loss": 0.1789, "step": 2138 }, { "epoch": 0.19707928318054083, "grad_norm": 0.9519571879140262, "learning_rate": 4.7101443890169915e-06, "loss": 0.1681, "step": 2139 }, { "epoch": 0.19717141935781085, "grad_norm": 0.9514507211318625, "learning_rate": 4.7097882461038845e-06, "loss": 0.1725, "step": 2140 }, { "epoch": 0.19726355553508085, "grad_norm": 0.9761404726228229, "learning_rate": 4.7094318980117005e-06, "loss": 0.1759, "step": 2141 }, { "epoch": 0.19735569171235084, "grad_norm": 0.9768124223352279, "learning_rate": 4.709075344773527e-06, "loss": 0.179, "step": 2142 }, { "epoch": 0.19744782788962087, "grad_norm": 0.9305036698633852, "learning_rate": 4.70871858642247e-06, "loss": 0.1669, "step": 2143 }, { "epoch": 0.19753996406689087, "grad_norm": 0.9332637277566961, "learning_rate": 4.708361622991656e-06, "loss": 0.164, "step": 2144 }, { "epoch": 0.19763210024416086, "grad_norm": 0.9666366370430729, "learning_rate": 4.708004454514226e-06, "loss": 0.183, "step": 2145 }, { "epoch": 0.1977242364214309, "grad_norm": 0.9087507122980967, "learning_rate": 4.7076470810233455e-06, "loss": 0.1715, "step": 2146 }, { "epoch": 0.19781637259870088, "grad_norm": 0.9415921197844276, "learning_rate": 4.707289502552196e-06, "loss": 0.1791, "step": 2147 }, { "epoch": 0.19790850877597088, "grad_norm": 0.8989464774107121, "learning_rate": 4.706931719133978e-06, "loss": 0.1655, "step": 2148 }, { "epoch": 0.19800064495324088, "grad_norm": 1.0465683898896436, "learning_rate": 4.706573730801913e-06, "loss": 0.189, "step": 2149 }, { "epoch": 0.1980927811305109, "grad_norm": 0.920378805791493, "learning_rate": 4.706215537589239e-06, "loss": 0.1758, "step": 2150 }, { "epoch": 0.1981849173077809, "grad_norm": 0.8983349357724887, "learning_rate": 4.705857139529215e-06, "loss": 0.1497, "step": 2151 }, { "epoch": 0.1982770534850509, "grad_norm": 0.9580161374361805, "learning_rate": 4.705498536655119e-06, "loss": 0.1821, "step": 2152 }, { "epoch": 0.19836918966232092, "grad_norm": 0.9231662092252367, "learning_rate": 4.705139729000246e-06, "loss": 0.1692, "step": 2153 }, { "epoch": 0.19846132583959092, "grad_norm": 0.9511535860655734, "learning_rate": 4.704780716597912e-06, "loss": 0.177, "step": 2154 }, { "epoch": 0.19855346201686092, "grad_norm": 0.9592527774843944, "learning_rate": 4.7044214994814505e-06, "loss": 0.1872, "step": 2155 }, { "epoch": 0.1986455981941309, "grad_norm": 0.941224979580375, "learning_rate": 4.704062077684216e-06, "loss": 0.1692, "step": 2156 }, { "epoch": 0.19873773437140094, "grad_norm": 0.8864677413252431, "learning_rate": 4.703702451239582e-06, "loss": 0.1711, "step": 2157 }, { "epoch": 0.19882987054867093, "grad_norm": 0.9760104907836626, "learning_rate": 4.703342620180936e-06, "loss": 0.1891, "step": 2158 }, { "epoch": 0.19892200672594093, "grad_norm": 0.925158598268592, "learning_rate": 4.702982584541691e-06, "loss": 0.1695, "step": 2159 }, { "epoch": 0.19901414290321096, "grad_norm": 0.969472216082636, "learning_rate": 4.702622344355276e-06, "loss": 0.1771, "step": 2160 }, { "epoch": 0.19910627908048095, "grad_norm": 1.0508981404224167, "learning_rate": 4.702261899655139e-06, "loss": 0.177, "step": 2161 }, { "epoch": 0.19919841525775095, "grad_norm": 0.9847930314760027, "learning_rate": 4.701901250474748e-06, "loss": 0.1827, "step": 2162 }, { "epoch": 0.19929055143502097, "grad_norm": 0.966509026140173, "learning_rate": 4.70154039684759e-06, "loss": 0.1706, "step": 2163 }, { "epoch": 0.19938268761229097, "grad_norm": 1.0693603050159732, "learning_rate": 4.701179338807168e-06, "loss": 0.1866, "step": 2164 }, { "epoch": 0.19947482378956097, "grad_norm": 0.9131224498772273, "learning_rate": 4.7008180763870075e-06, "loss": 0.1569, "step": 2165 }, { "epoch": 0.19956695996683096, "grad_norm": 0.9114405522217581, "learning_rate": 4.700456609620652e-06, "loss": 0.1687, "step": 2166 }, { "epoch": 0.199659096144101, "grad_norm": 0.9712061010659172, "learning_rate": 4.700094938541664e-06, "loss": 0.169, "step": 2167 }, { "epoch": 0.199751232321371, "grad_norm": 0.9870353066400023, "learning_rate": 4.6997330631836235e-06, "loss": 0.1736, "step": 2168 }, { "epoch": 0.19984336849864098, "grad_norm": 1.0457732545812704, "learning_rate": 4.699370983580132e-06, "loss": 0.1676, "step": 2169 }, { "epoch": 0.199935504675911, "grad_norm": 0.9582776467618426, "learning_rate": 4.699008699764807e-06, "loss": 0.1622, "step": 2170 }, { "epoch": 0.200027640853181, "grad_norm": 0.924560477144988, "learning_rate": 4.698646211771287e-06, "loss": 0.1778, "step": 2171 }, { "epoch": 0.200119777030451, "grad_norm": 0.9603418732544574, "learning_rate": 4.698283519633231e-06, "loss": 0.173, "step": 2172 }, { "epoch": 0.200211913207721, "grad_norm": 0.9318117654352273, "learning_rate": 4.6979206233843136e-06, "loss": 0.1763, "step": 2173 }, { "epoch": 0.20030404938499102, "grad_norm": 0.8500574809360554, "learning_rate": 4.697557523058229e-06, "loss": 0.1612, "step": 2174 }, { "epoch": 0.20039618556226102, "grad_norm": 0.9321952438861371, "learning_rate": 4.6971942186886925e-06, "loss": 0.1657, "step": 2175 }, { "epoch": 0.20048832173953102, "grad_norm": 0.8878808978445922, "learning_rate": 4.696830710309437e-06, "loss": 0.1669, "step": 2176 }, { "epoch": 0.20058045791680104, "grad_norm": 0.979230897783306, "learning_rate": 4.696466997954212e-06, "loss": 0.1746, "step": 2177 }, { "epoch": 0.20067259409407104, "grad_norm": 0.9049009072181234, "learning_rate": 4.696103081656791e-06, "loss": 0.1701, "step": 2178 }, { "epoch": 0.20076473027134104, "grad_norm": 0.9108225412943807, "learning_rate": 4.695738961450962e-06, "loss": 0.1588, "step": 2179 }, { "epoch": 0.20085686644861106, "grad_norm": 0.9517811181905188, "learning_rate": 4.695374637370534e-06, "loss": 0.173, "step": 2180 }, { "epoch": 0.20094900262588106, "grad_norm": 0.9484256391007594, "learning_rate": 4.695010109449335e-06, "loss": 0.174, "step": 2181 }, { "epoch": 0.20104113880315105, "grad_norm": 0.9833242975609692, "learning_rate": 4.694645377721211e-06, "loss": 0.1824, "step": 2182 }, { "epoch": 0.20113327498042105, "grad_norm": 0.9384489452108485, "learning_rate": 4.694280442220027e-06, "loss": 0.1606, "step": 2183 }, { "epoch": 0.20122541115769108, "grad_norm": 0.9458788028655759, "learning_rate": 4.693915302979669e-06, "loss": 0.1865, "step": 2184 }, { "epoch": 0.20131754733496107, "grad_norm": 0.9715149119245432, "learning_rate": 4.693549960034038e-06, "loss": 0.1758, "step": 2185 }, { "epoch": 0.20140968351223107, "grad_norm": 0.8895745071235284, "learning_rate": 4.693184413417058e-06, "loss": 0.1652, "step": 2186 }, { "epoch": 0.2015018196895011, "grad_norm": 0.9179333420101142, "learning_rate": 4.692818663162668e-06, "loss": 0.1668, "step": 2187 }, { "epoch": 0.2015939558667711, "grad_norm": 1.0524495392370385, "learning_rate": 4.69245270930483e-06, "loss": 0.1626, "step": 2188 }, { "epoch": 0.2016860920440411, "grad_norm": 0.9301977089994403, "learning_rate": 4.6920865518775214e-06, "loss": 0.161, "step": 2189 }, { "epoch": 0.20177822822131108, "grad_norm": 0.9799561176585696, "learning_rate": 4.6917201909147415e-06, "loss": 0.1838, "step": 2190 }, { "epoch": 0.2018703643985811, "grad_norm": 0.9996105143573751, "learning_rate": 4.691353626450505e-06, "loss": 0.1726, "step": 2191 }, { "epoch": 0.2019625005758511, "grad_norm": 1.025059301876579, "learning_rate": 4.690986858518849e-06, "loss": 0.1822, "step": 2192 }, { "epoch": 0.2020546367531211, "grad_norm": 0.9165762821501996, "learning_rate": 4.6906198871538265e-06, "loss": 0.1639, "step": 2193 }, { "epoch": 0.20214677293039113, "grad_norm": 1.0481638600088068, "learning_rate": 4.690252712389513e-06, "loss": 0.1855, "step": 2194 }, { "epoch": 0.20223890910766112, "grad_norm": 0.9492628877870042, "learning_rate": 4.6898853342599994e-06, "loss": 0.1567, "step": 2195 }, { "epoch": 0.20233104528493112, "grad_norm": 1.02721472974448, "learning_rate": 4.689517752799396e-06, "loss": 0.1904, "step": 2196 }, { "epoch": 0.20242318146220115, "grad_norm": 0.9749242814804707, "learning_rate": 4.689149968041834e-06, "loss": 0.1844, "step": 2197 }, { "epoch": 0.20251531763947114, "grad_norm": 1.0038104998977666, "learning_rate": 4.6887819800214615e-06, "loss": 0.1743, "step": 2198 }, { "epoch": 0.20260745381674114, "grad_norm": 1.005188089752521, "learning_rate": 4.688413788772447e-06, "loss": 0.1788, "step": 2199 }, { "epoch": 0.20269958999401114, "grad_norm": 0.9150762628236749, "learning_rate": 4.688045394328976e-06, "loss": 0.1737, "step": 2200 }, { "epoch": 0.20279172617128116, "grad_norm": 0.9398977054241704, "learning_rate": 4.687676796725256e-06, "loss": 0.1634, "step": 2201 }, { "epoch": 0.20288386234855116, "grad_norm": 0.9427101062525798, "learning_rate": 4.687307995995509e-06, "loss": 0.1661, "step": 2202 }, { "epoch": 0.20297599852582116, "grad_norm": 0.9673031921019366, "learning_rate": 4.68693899217398e-06, "loss": 0.1687, "step": 2203 }, { "epoch": 0.20306813470309118, "grad_norm": 0.9799184390341139, "learning_rate": 4.6865697852949285e-06, "loss": 0.1661, "step": 2204 }, { "epoch": 0.20316027088036118, "grad_norm": 0.9799685128302213, "learning_rate": 4.686200375392639e-06, "loss": 0.182, "step": 2205 }, { "epoch": 0.20325240705763117, "grad_norm": 0.9174324923130279, "learning_rate": 4.6858307625014084e-06, "loss": 0.1579, "step": 2206 }, { "epoch": 0.20334454323490117, "grad_norm": 0.9515024330765529, "learning_rate": 4.685460946655556e-06, "loss": 0.1784, "step": 2207 }, { "epoch": 0.2034366794121712, "grad_norm": 1.0194734725774568, "learning_rate": 4.68509092788942e-06, "loss": 0.1754, "step": 2208 }, { "epoch": 0.2035288155894412, "grad_norm": 0.9333275447322845, "learning_rate": 4.684720706237356e-06, "loss": 0.1666, "step": 2209 }, { "epoch": 0.2036209517667112, "grad_norm": 0.90507569208577, "learning_rate": 4.68435028173374e-06, "loss": 0.1643, "step": 2210 }, { "epoch": 0.2037130879439812, "grad_norm": 1.047547987412547, "learning_rate": 4.683979654412965e-06, "loss": 0.1965, "step": 2211 }, { "epoch": 0.2038052241212512, "grad_norm": 0.8708624834050619, "learning_rate": 4.683608824309443e-06, "loss": 0.1751, "step": 2212 }, { "epoch": 0.2038973602985212, "grad_norm": 0.9282192492999448, "learning_rate": 4.683237791457608e-06, "loss": 0.1772, "step": 2213 }, { "epoch": 0.20398949647579123, "grad_norm": 0.9310633455335762, "learning_rate": 4.682866555891908e-06, "loss": 0.1808, "step": 2214 }, { "epoch": 0.20408163265306123, "grad_norm": 0.8971755268118722, "learning_rate": 4.6824951176468134e-06, "loss": 0.1652, "step": 2215 }, { "epoch": 0.20417376883033123, "grad_norm": 0.9276435656284576, "learning_rate": 4.682123476756813e-06, "loss": 0.1685, "step": 2216 }, { "epoch": 0.20426590500760122, "grad_norm": 0.8795940253862768, "learning_rate": 4.681751633256413e-06, "loss": 0.1668, "step": 2217 }, { "epoch": 0.20435804118487125, "grad_norm": 0.9207697016900108, "learning_rate": 4.681379587180138e-06, "loss": 0.175, "step": 2218 }, { "epoch": 0.20445017736214124, "grad_norm": 0.9294762082235091, "learning_rate": 4.681007338562535e-06, "loss": 0.1796, "step": 2219 }, { "epoch": 0.20454231353941124, "grad_norm": 0.9734872584164359, "learning_rate": 4.680634887438165e-06, "loss": 0.1733, "step": 2220 }, { "epoch": 0.20463444971668127, "grad_norm": 0.924133430170834, "learning_rate": 4.6802622338416115e-06, "loss": 0.161, "step": 2221 }, { "epoch": 0.20472658589395126, "grad_norm": 0.8720069613724715, "learning_rate": 4.679889377807475e-06, "loss": 0.1514, "step": 2222 }, { "epoch": 0.20481872207122126, "grad_norm": 0.8737950896382805, "learning_rate": 4.679516319370374e-06, "loss": 0.1527, "step": 2223 }, { "epoch": 0.20491085824849126, "grad_norm": 0.9553532936958365, "learning_rate": 4.679143058564949e-06, "loss": 0.1844, "step": 2224 }, { "epoch": 0.20500299442576128, "grad_norm": 1.0052618454260092, "learning_rate": 4.678769595425856e-06, "loss": 0.1941, "step": 2225 }, { "epoch": 0.20509513060303128, "grad_norm": 0.9698993550808772, "learning_rate": 4.6783959299877725e-06, "loss": 0.1606, "step": 2226 }, { "epoch": 0.20518726678030128, "grad_norm": 0.9874980391024155, "learning_rate": 4.678022062285392e-06, "loss": 0.1753, "step": 2227 }, { "epoch": 0.2052794029575713, "grad_norm": 0.9071206997289588, "learning_rate": 4.677647992353428e-06, "loss": 0.1654, "step": 2228 }, { "epoch": 0.2053715391348413, "grad_norm": 0.9335882793666567, "learning_rate": 4.677273720226615e-06, "loss": 0.1701, "step": 2229 }, { "epoch": 0.2054636753121113, "grad_norm": 0.8977947704353159, "learning_rate": 4.6768992459397015e-06, "loss": 0.1505, "step": 2230 }, { "epoch": 0.20555581148938132, "grad_norm": 0.9250319869015167, "learning_rate": 4.67652456952746e-06, "loss": 0.1564, "step": 2231 }, { "epoch": 0.20564794766665132, "grad_norm": 0.9457739881833873, "learning_rate": 4.6761496910246766e-06, "loss": 0.1708, "step": 2232 }, { "epoch": 0.2057400838439213, "grad_norm": 0.8952312643772637, "learning_rate": 4.6757746104661606e-06, "loss": 0.159, "step": 2233 }, { "epoch": 0.2058322200211913, "grad_norm": 0.9126755394162147, "learning_rate": 4.675399327886738e-06, "loss": 0.1598, "step": 2234 }, { "epoch": 0.20592435619846133, "grad_norm": 0.9376971680289053, "learning_rate": 4.675023843321254e-06, "loss": 0.1645, "step": 2235 }, { "epoch": 0.20601649237573133, "grad_norm": 0.9812020676495905, "learning_rate": 4.674648156804571e-06, "loss": 0.1508, "step": 2236 }, { "epoch": 0.20610862855300133, "grad_norm": 1.0616534309734076, "learning_rate": 4.674272268371574e-06, "loss": 0.177, "step": 2237 }, { "epoch": 0.20620076473027135, "grad_norm": 0.9902707787686815, "learning_rate": 4.673896178057162e-06, "loss": 0.1725, "step": 2238 }, { "epoch": 0.20629290090754135, "grad_norm": 0.9329391926904818, "learning_rate": 4.673519885896256e-06, "loss": 0.182, "step": 2239 }, { "epoch": 0.20638503708481135, "grad_norm": 0.9761864416718115, "learning_rate": 4.673143391923794e-06, "loss": 0.1788, "step": 2240 }, { "epoch": 0.20647717326208134, "grad_norm": 0.9435942541496123, "learning_rate": 4.672766696174736e-06, "loss": 0.1664, "step": 2241 }, { "epoch": 0.20656930943935137, "grad_norm": 0.8962267290534224, "learning_rate": 4.672389798684055e-06, "loss": 0.173, "step": 2242 }, { "epoch": 0.20666144561662136, "grad_norm": 0.9259279692548622, "learning_rate": 4.672012699486748e-06, "loss": 0.1722, "step": 2243 }, { "epoch": 0.20675358179389136, "grad_norm": 0.9124221318809105, "learning_rate": 4.671635398617828e-06, "loss": 0.1616, "step": 2244 }, { "epoch": 0.20684571797116139, "grad_norm": 0.8905141403926174, "learning_rate": 4.671257896112327e-06, "loss": 0.1604, "step": 2245 }, { "epoch": 0.20693785414843138, "grad_norm": 0.9232672218716768, "learning_rate": 4.670880192005298e-06, "loss": 0.165, "step": 2246 }, { "epoch": 0.20702999032570138, "grad_norm": 0.9323052407767077, "learning_rate": 4.670502286331809e-06, "loss": 0.1805, "step": 2247 }, { "epoch": 0.2071221265029714, "grad_norm": 0.9634973629389704, "learning_rate": 4.670124179126948e-06, "loss": 0.1761, "step": 2248 }, { "epoch": 0.2072142626802414, "grad_norm": 0.9278126931365306, "learning_rate": 4.669745870425824e-06, "loss": 0.172, "step": 2249 }, { "epoch": 0.2073063988575114, "grad_norm": 0.849220352694767, "learning_rate": 4.669367360263563e-06, "loss": 0.1517, "step": 2250 }, { "epoch": 0.2073985350347814, "grad_norm": 0.9132996602721539, "learning_rate": 4.668988648675309e-06, "loss": 0.164, "step": 2251 }, { "epoch": 0.20749067121205142, "grad_norm": 0.8700626009502423, "learning_rate": 4.668609735696225e-06, "loss": 0.1628, "step": 2252 }, { "epoch": 0.20758280738932142, "grad_norm": 0.9505619550445175, "learning_rate": 4.668230621361494e-06, "loss": 0.1642, "step": 2253 }, { "epoch": 0.2076749435665914, "grad_norm": 0.9806376626717213, "learning_rate": 4.667851305706316e-06, "loss": 0.1875, "step": 2254 }, { "epoch": 0.20776707974386144, "grad_norm": 0.952808410032759, "learning_rate": 4.667471788765911e-06, "loss": 0.169, "step": 2255 }, { "epoch": 0.20785921592113143, "grad_norm": 1.0121652933247183, "learning_rate": 4.667092070575518e-06, "loss": 0.1769, "step": 2256 }, { "epoch": 0.20795135209840143, "grad_norm": 0.8796668160626014, "learning_rate": 4.666712151170392e-06, "loss": 0.151, "step": 2257 }, { "epoch": 0.20804348827567143, "grad_norm": 0.9275469318790555, "learning_rate": 4.6663320305858106e-06, "loss": 0.1674, "step": 2258 }, { "epoch": 0.20813562445294145, "grad_norm": 0.9732098865299389, "learning_rate": 4.665951708857066e-06, "loss": 0.1701, "step": 2259 }, { "epoch": 0.20822776063021145, "grad_norm": 0.9026738441040866, "learning_rate": 4.665571186019473e-06, "loss": 0.159, "step": 2260 }, { "epoch": 0.20831989680748145, "grad_norm": 0.9593697452130802, "learning_rate": 4.665190462108362e-06, "loss": 0.1625, "step": 2261 }, { "epoch": 0.20841203298475147, "grad_norm": 1.0499408869674476, "learning_rate": 4.664809537159084e-06, "loss": 0.179, "step": 2262 }, { "epoch": 0.20850416916202147, "grad_norm": 0.9468448196539467, "learning_rate": 4.664428411207007e-06, "loss": 0.1548, "step": 2263 }, { "epoch": 0.20859630533929147, "grad_norm": 0.9702256795456764, "learning_rate": 4.664047084287518e-06, "loss": 0.171, "step": 2264 }, { "epoch": 0.2086884415165615, "grad_norm": 0.98262891832084, "learning_rate": 4.663665556436025e-06, "loss": 0.1658, "step": 2265 }, { "epoch": 0.2087805776938315, "grad_norm": 0.9745838646564671, "learning_rate": 4.663283827687953e-06, "loss": 0.1773, "step": 2266 }, { "epoch": 0.20887271387110148, "grad_norm": 0.9604527581018515, "learning_rate": 4.662901898078746e-06, "loss": 0.1737, "step": 2267 }, { "epoch": 0.20896485004837148, "grad_norm": 0.9844126821220404, "learning_rate": 4.662519767643863e-06, "loss": 0.1678, "step": 2268 }, { "epoch": 0.2090569862256415, "grad_norm": 0.957428150635439, "learning_rate": 4.662137436418786e-06, "loss": 0.1804, "step": 2269 }, { "epoch": 0.2091491224029115, "grad_norm": 0.9349317076208526, "learning_rate": 4.661754904439018e-06, "loss": 0.177, "step": 2270 }, { "epoch": 0.2092412585801815, "grad_norm": 0.9298674765899402, "learning_rate": 4.661372171740073e-06, "loss": 0.1702, "step": 2271 }, { "epoch": 0.20933339475745152, "grad_norm": 0.9608961443235141, "learning_rate": 4.660989238357489e-06, "loss": 0.1725, "step": 2272 }, { "epoch": 0.20942553093472152, "grad_norm": 0.9192686971637103, "learning_rate": 4.660606104326822e-06, "loss": 0.1676, "step": 2273 }, { "epoch": 0.20951766711199152, "grad_norm": 0.9484587340929831, "learning_rate": 4.660222769683645e-06, "loss": 0.1689, "step": 2274 }, { "epoch": 0.20960980328926151, "grad_norm": 0.9166918556649847, "learning_rate": 4.659839234463552e-06, "loss": 0.1666, "step": 2275 }, { "epoch": 0.20970193946653154, "grad_norm": 0.9401238964664081, "learning_rate": 4.659455498702154e-06, "loss": 0.1676, "step": 2276 }, { "epoch": 0.20979407564380154, "grad_norm": 0.9401025662602998, "learning_rate": 4.65907156243508e-06, "loss": 0.1778, "step": 2277 }, { "epoch": 0.20988621182107153, "grad_norm": 0.9752374373591519, "learning_rate": 4.65868742569798e-06, "loss": 0.1672, "step": 2278 }, { "epoch": 0.20997834799834156, "grad_norm": 0.9809510171831634, "learning_rate": 4.658303088526519e-06, "loss": 0.184, "step": 2279 }, { "epoch": 0.21007048417561155, "grad_norm": 0.8925811679962771, "learning_rate": 4.657918550956384e-06, "loss": 0.1697, "step": 2280 }, { "epoch": 0.21016262035288155, "grad_norm": 0.9315080343767318, "learning_rate": 4.65753381302328e-06, "loss": 0.1737, "step": 2281 }, { "epoch": 0.21025475653015158, "grad_norm": 0.8436564944976962, "learning_rate": 4.657148874762929e-06, "loss": 0.1501, "step": 2282 }, { "epoch": 0.21034689270742157, "grad_norm": 0.9222611884806714, "learning_rate": 4.656763736211073e-06, "loss": 0.17, "step": 2283 }, { "epoch": 0.21043902888469157, "grad_norm": 0.9276655656851056, "learning_rate": 4.656378397403472e-06, "loss": 0.1628, "step": 2284 }, { "epoch": 0.21053116506196157, "grad_norm": 0.9146059613952714, "learning_rate": 4.655992858375904e-06, "loss": 0.17, "step": 2285 }, { "epoch": 0.2106233012392316, "grad_norm": 0.9490196294298086, "learning_rate": 4.655607119164168e-06, "loss": 0.1624, "step": 2286 }, { "epoch": 0.2107154374165016, "grad_norm": 0.9463987771147194, "learning_rate": 4.655221179804078e-06, "loss": 0.1729, "step": 2287 }, { "epoch": 0.21080757359377159, "grad_norm": 0.9384821744611376, "learning_rate": 4.65483504033147e-06, "loss": 0.1709, "step": 2288 }, { "epoch": 0.2108997097710416, "grad_norm": 0.9991342293889612, "learning_rate": 4.654448700782197e-06, "loss": 0.173, "step": 2289 }, { "epoch": 0.2109918459483116, "grad_norm": 0.9473308951074153, "learning_rate": 4.65406216119213e-06, "loss": 0.1816, "step": 2290 }, { "epoch": 0.2110839821255816, "grad_norm": 0.9682808078755373, "learning_rate": 4.653675421597159e-06, "loss": 0.1666, "step": 2291 }, { "epoch": 0.2111761183028516, "grad_norm": 0.9236695196106048, "learning_rate": 4.653288482033194e-06, "loss": 0.1725, "step": 2292 }, { "epoch": 0.21126825448012163, "grad_norm": 0.9706887089132648, "learning_rate": 4.652901342536162e-06, "loss": 0.1893, "step": 2293 }, { "epoch": 0.21136039065739162, "grad_norm": 1.0069353159222993, "learning_rate": 4.652514003142008e-06, "loss": 0.1593, "step": 2294 }, { "epoch": 0.21145252683466162, "grad_norm": 1.0053797728020337, "learning_rate": 4.652126463886697e-06, "loss": 0.1876, "step": 2295 }, { "epoch": 0.21154466301193164, "grad_norm": 0.9440259393861917, "learning_rate": 4.651738724806213e-06, "loss": 0.1547, "step": 2296 }, { "epoch": 0.21163679918920164, "grad_norm": 0.8471903564795712, "learning_rate": 4.651350785936556e-06, "loss": 0.1456, "step": 2297 }, { "epoch": 0.21172893536647164, "grad_norm": 0.9864966435131894, "learning_rate": 4.650962647313747e-06, "loss": 0.1737, "step": 2298 }, { "epoch": 0.21182107154374166, "grad_norm": 1.018468284517782, "learning_rate": 4.650574308973826e-06, "loss": 0.1832, "step": 2299 }, { "epoch": 0.21191320772101166, "grad_norm": 1.0098138817405005, "learning_rate": 4.6501857709528475e-06, "loss": 0.1904, "step": 2300 }, { "epoch": 0.21200534389828166, "grad_norm": 1.0461555531592517, "learning_rate": 4.649797033286889e-06, "loss": 0.1821, "step": 2301 }, { "epoch": 0.21209748007555165, "grad_norm": 0.9609524234503527, "learning_rate": 4.6494080960120444e-06, "loss": 0.1708, "step": 2302 }, { "epoch": 0.21218961625282168, "grad_norm": 0.8956399373758597, "learning_rate": 4.6490189591644274e-06, "loss": 0.1596, "step": 2303 }, { "epoch": 0.21228175243009167, "grad_norm": 0.9086186604621006, "learning_rate": 4.648629622780169e-06, "loss": 0.1742, "step": 2304 }, { "epoch": 0.21237388860736167, "grad_norm": 0.9054650267010966, "learning_rate": 4.648240086895418e-06, "loss": 0.1585, "step": 2305 }, { "epoch": 0.2124660247846317, "grad_norm": 0.973003929436934, "learning_rate": 4.647850351546345e-06, "loss": 0.1628, "step": 2306 }, { "epoch": 0.2125581609619017, "grad_norm": 0.920435551963916, "learning_rate": 4.647460416769134e-06, "loss": 0.1639, "step": 2307 }, { "epoch": 0.2126502971391717, "grad_norm": 1.010065351946995, "learning_rate": 4.647070282599994e-06, "loss": 0.171, "step": 2308 }, { "epoch": 0.2127424333164417, "grad_norm": 1.0122412359159743, "learning_rate": 4.646679949075146e-06, "loss": 0.1932, "step": 2309 }, { "epoch": 0.2128345694937117, "grad_norm": 0.9360254209298183, "learning_rate": 4.646289416230834e-06, "loss": 0.1694, "step": 2310 }, { "epoch": 0.2129267056709817, "grad_norm": 1.0354873116708483, "learning_rate": 4.645898684103318e-06, "loss": 0.1771, "step": 2311 }, { "epoch": 0.2130188418482517, "grad_norm": 0.9613647007080832, "learning_rate": 4.6455077527288795e-06, "loss": 0.18, "step": 2312 }, { "epoch": 0.21311097802552173, "grad_norm": 0.9226544949065173, "learning_rate": 4.6451166221438145e-06, "loss": 0.1657, "step": 2313 }, { "epoch": 0.21320311420279173, "grad_norm": 1.0212738272454622, "learning_rate": 4.644725292384441e-06, "loss": 0.1681, "step": 2314 }, { "epoch": 0.21329525038006172, "grad_norm": 0.9275529095731044, "learning_rate": 4.6443337634870926e-06, "loss": 0.1597, "step": 2315 }, { "epoch": 0.21338738655733175, "grad_norm": 0.9251203990872005, "learning_rate": 4.643942035488123e-06, "loss": 0.1665, "step": 2316 }, { "epoch": 0.21347952273460175, "grad_norm": 0.8799100754109026, "learning_rate": 4.643550108423905e-06, "loss": 0.1609, "step": 2317 }, { "epoch": 0.21357165891187174, "grad_norm": 0.9864751850142733, "learning_rate": 4.64315798233083e-06, "loss": 0.1724, "step": 2318 }, { "epoch": 0.21366379508914174, "grad_norm": 0.9305526319557794, "learning_rate": 4.642765657245304e-06, "loss": 0.1703, "step": 2319 }, { "epoch": 0.21375593126641176, "grad_norm": 0.9165264859623368, "learning_rate": 4.642373133203757e-06, "loss": 0.1597, "step": 2320 }, { "epoch": 0.21384806744368176, "grad_norm": 0.9499805101389946, "learning_rate": 4.641980410242634e-06, "loss": 0.1678, "step": 2321 }, { "epoch": 0.21394020362095176, "grad_norm": 0.9481987039539749, "learning_rate": 4.6415874883983995e-06, "loss": 0.1672, "step": 2322 }, { "epoch": 0.21403233979822178, "grad_norm": 0.8697561079038395, "learning_rate": 4.641194367707535e-06, "loss": 0.1636, "step": 2323 }, { "epoch": 0.21412447597549178, "grad_norm": 0.9583502223016455, "learning_rate": 4.640801048206545e-06, "loss": 0.1702, "step": 2324 }, { "epoch": 0.21421661215276178, "grad_norm": 0.9973479221639489, "learning_rate": 4.6404075299319465e-06, "loss": 0.1651, "step": 2325 }, { "epoch": 0.2143087483300318, "grad_norm": 0.8790869012185121, "learning_rate": 4.640013812920278e-06, "loss": 0.1584, "step": 2326 }, { "epoch": 0.2144008845073018, "grad_norm": 0.8205862861509154, "learning_rate": 4.639619897208097e-06, "loss": 0.1542, "step": 2327 }, { "epoch": 0.2144930206845718, "grad_norm": 0.970252112224293, "learning_rate": 4.639225782831978e-06, "loss": 0.1679, "step": 2328 }, { "epoch": 0.2145851568618418, "grad_norm": 0.9452446931266928, "learning_rate": 4.638831469828515e-06, "loss": 0.1701, "step": 2329 }, { "epoch": 0.21467729303911182, "grad_norm": 0.9891248909941764, "learning_rate": 4.638436958234321e-06, "loss": 0.1851, "step": 2330 }, { "epoch": 0.2147694292163818, "grad_norm": 0.8998482758735131, "learning_rate": 4.638042248086023e-06, "loss": 0.1676, "step": 2331 }, { "epoch": 0.2148615653936518, "grad_norm": 1.0211991710865993, "learning_rate": 4.637647339420273e-06, "loss": 0.172, "step": 2332 }, { "epoch": 0.21495370157092183, "grad_norm": 1.0033330908470734, "learning_rate": 4.637252232273738e-06, "loss": 0.1764, "step": 2333 }, { "epoch": 0.21504583774819183, "grad_norm": 0.8913172960585651, "learning_rate": 4.6368569266831035e-06, "loss": 0.1557, "step": 2334 }, { "epoch": 0.21513797392546183, "grad_norm": 1.043087204088054, "learning_rate": 4.636461422685072e-06, "loss": 0.1745, "step": 2335 }, { "epoch": 0.21523011010273183, "grad_norm": 0.8909211705779044, "learning_rate": 4.63606572031637e-06, "loss": 0.1649, "step": 2336 }, { "epoch": 0.21532224628000185, "grad_norm": 0.9538343349636392, "learning_rate": 4.635669819613734e-06, "loss": 0.1642, "step": 2337 }, { "epoch": 0.21541438245727185, "grad_norm": 0.9715472692776371, "learning_rate": 4.635273720613925e-06, "loss": 0.1712, "step": 2338 }, { "epoch": 0.21550651863454184, "grad_norm": 0.9349140770076221, "learning_rate": 4.634877423353723e-06, "loss": 0.1721, "step": 2339 }, { "epoch": 0.21559865481181187, "grad_norm": 0.9606087264919402, "learning_rate": 4.634480927869921e-06, "loss": 0.1668, "step": 2340 }, { "epoch": 0.21569079098908187, "grad_norm": 0.8532265613158703, "learning_rate": 4.634084234199335e-06, "loss": 0.1369, "step": 2341 }, { "epoch": 0.21578292716635186, "grad_norm": 0.8989400773969781, "learning_rate": 4.633687342378799e-06, "loss": 0.1698, "step": 2342 }, { "epoch": 0.2158750633436219, "grad_norm": 1.0121332804629664, "learning_rate": 4.633290252445164e-06, "loss": 0.1878, "step": 2343 }, { "epoch": 0.21596719952089188, "grad_norm": 1.030553980973197, "learning_rate": 4.632892964435299e-06, "loss": 0.1805, "step": 2344 }, { "epoch": 0.21605933569816188, "grad_norm": 0.9629513385761489, "learning_rate": 4.632495478386092e-06, "loss": 0.1634, "step": 2345 }, { "epoch": 0.21615147187543188, "grad_norm": 0.906324183769925, "learning_rate": 4.632097794334451e-06, "loss": 0.172, "step": 2346 }, { "epoch": 0.2162436080527019, "grad_norm": 0.9551279217162751, "learning_rate": 4.631699912317301e-06, "loss": 0.1656, "step": 2347 }, { "epoch": 0.2163357442299719, "grad_norm": 0.9365362458955379, "learning_rate": 4.631301832371584e-06, "loss": 0.1759, "step": 2348 }, { "epoch": 0.2164278804072419, "grad_norm": 0.9881321567680756, "learning_rate": 4.630903554534262e-06, "loss": 0.1685, "step": 2349 }, { "epoch": 0.21652001658451192, "grad_norm": 0.9674707035004548, "learning_rate": 4.630505078842317e-06, "loss": 0.1823, "step": 2350 }, { "epoch": 0.21661215276178192, "grad_norm": 0.9793487730071272, "learning_rate": 4.630106405332745e-06, "loss": 0.1827, "step": 2351 }, { "epoch": 0.21670428893905191, "grad_norm": 0.9409326842175851, "learning_rate": 4.629707534042564e-06, "loss": 0.1639, "step": 2352 }, { "epoch": 0.2167964251163219, "grad_norm": 1.0087420208183764, "learning_rate": 4.6293084650088095e-06, "loss": 0.1795, "step": 2353 }, { "epoch": 0.21688856129359194, "grad_norm": 1.044876791324977, "learning_rate": 4.628909198268534e-06, "loss": 0.169, "step": 2354 }, { "epoch": 0.21698069747086193, "grad_norm": 0.9389750409437341, "learning_rate": 4.628509733858813e-06, "loss": 0.1679, "step": 2355 }, { "epoch": 0.21707283364813193, "grad_norm": 0.8676673622848394, "learning_rate": 4.628110071816732e-06, "loss": 0.1624, "step": 2356 }, { "epoch": 0.21716496982540195, "grad_norm": 0.9154277930953563, "learning_rate": 4.6277102121794015e-06, "loss": 0.1751, "step": 2357 }, { "epoch": 0.21725710600267195, "grad_norm": 0.9619309892433496, "learning_rate": 4.62731015498395e-06, "loss": 0.1738, "step": 2358 }, { "epoch": 0.21734924217994195, "grad_norm": 0.926667275696638, "learning_rate": 4.626909900267521e-06, "loss": 0.1743, "step": 2359 }, { "epoch": 0.21744137835721197, "grad_norm": 0.9403910037068345, "learning_rate": 4.626509448067279e-06, "loss": 0.1655, "step": 2360 }, { "epoch": 0.21753351453448197, "grad_norm": 0.8818296140485228, "learning_rate": 4.626108798420406e-06, "loss": 0.1574, "step": 2361 }, { "epoch": 0.21762565071175197, "grad_norm": 0.9323796067026165, "learning_rate": 4.625707951364102e-06, "loss": 0.1781, "step": 2362 }, { "epoch": 0.21771778688902196, "grad_norm": 0.9243899591524739, "learning_rate": 4.625306906935586e-06, "loss": 0.1743, "step": 2363 }, { "epoch": 0.217809923066292, "grad_norm": 0.9527407527487539, "learning_rate": 4.624905665172095e-06, "loss": 0.1642, "step": 2364 }, { "epoch": 0.21790205924356199, "grad_norm": 0.8815844393739036, "learning_rate": 4.6245042261108845e-06, "loss": 0.1668, "step": 2365 }, { "epoch": 0.21799419542083198, "grad_norm": 0.9479977436616651, "learning_rate": 4.6241025897892275e-06, "loss": 0.1842, "step": 2366 }, { "epoch": 0.218086331598102, "grad_norm": 1.0207347307047026, "learning_rate": 4.623700756244417e-06, "loss": 0.1859, "step": 2367 }, { "epoch": 0.218178467775372, "grad_norm": 0.9197498111886677, "learning_rate": 4.6232987255137625e-06, "loss": 0.1515, "step": 2368 }, { "epoch": 0.218270603952642, "grad_norm": 0.8838719986716687, "learning_rate": 4.622896497634593e-06, "loss": 0.1483, "step": 2369 }, { "epoch": 0.218362740129912, "grad_norm": 0.900451505897105, "learning_rate": 4.622494072644255e-06, "loss": 0.1735, "step": 2370 }, { "epoch": 0.21845487630718202, "grad_norm": 0.8934791164057562, "learning_rate": 4.622091450580114e-06, "loss": 0.1656, "step": 2371 }, { "epoch": 0.21854701248445202, "grad_norm": 0.895686185724979, "learning_rate": 4.621688631479554e-06, "loss": 0.1653, "step": 2372 }, { "epoch": 0.21863914866172202, "grad_norm": 0.9833309677439741, "learning_rate": 4.621285615379976e-06, "loss": 0.1743, "step": 2373 }, { "epoch": 0.21873128483899204, "grad_norm": 0.9372570832200562, "learning_rate": 4.620882402318799e-06, "loss": 0.1736, "step": 2374 }, { "epoch": 0.21882342101626204, "grad_norm": 0.8761533867954667, "learning_rate": 4.620478992333463e-06, "loss": 0.1618, "step": 2375 }, { "epoch": 0.21891555719353203, "grad_norm": 0.8919996604272982, "learning_rate": 4.620075385461426e-06, "loss": 0.1608, "step": 2376 }, { "epoch": 0.21900769337080206, "grad_norm": 0.8657870181470666, "learning_rate": 4.61967158174016e-06, "loss": 0.1508, "step": 2377 }, { "epoch": 0.21909982954807206, "grad_norm": 0.923022145836738, "learning_rate": 4.61926758120716e-06, "loss": 0.1579, "step": 2378 }, { "epoch": 0.21919196572534205, "grad_norm": 0.9272554038487848, "learning_rate": 4.618863383899937e-06, "loss": 0.1696, "step": 2379 }, { "epoch": 0.21928410190261205, "grad_norm": 0.9473433534596402, "learning_rate": 4.618458989856021e-06, "loss": 0.1765, "step": 2380 }, { "epoch": 0.21937623807988207, "grad_norm": 0.923103183354492, "learning_rate": 4.618054399112959e-06, "loss": 0.1675, "step": 2381 }, { "epoch": 0.21946837425715207, "grad_norm": 0.9369984252173658, "learning_rate": 4.617649611708318e-06, "loss": 0.1711, "step": 2382 }, { "epoch": 0.21956051043442207, "grad_norm": 1.0737046620535462, "learning_rate": 4.617244627679684e-06, "loss": 0.1829, "step": 2383 }, { "epoch": 0.2196526466116921, "grad_norm": 0.9481793986325134, "learning_rate": 4.6168394470646575e-06, "loss": 0.1639, "step": 2384 }, { "epoch": 0.2197447827889621, "grad_norm": 1.0068030260607288, "learning_rate": 4.61643406990086e-06, "loss": 0.1714, "step": 2385 }, { "epoch": 0.2198369189662321, "grad_norm": 0.9234873321825561, "learning_rate": 4.616028496225933e-06, "loss": 0.165, "step": 2386 }, { "epoch": 0.21992905514350208, "grad_norm": 0.9145551409494252, "learning_rate": 4.6156227260775314e-06, "loss": 0.1637, "step": 2387 }, { "epoch": 0.2200211913207721, "grad_norm": 0.9326334048386388, "learning_rate": 4.615216759493332e-06, "loss": 0.1835, "step": 2388 }, { "epoch": 0.2201133274980421, "grad_norm": 0.8781396547544478, "learning_rate": 4.614810596511028e-06, "loss": 0.1571, "step": 2389 }, { "epoch": 0.2202054636753121, "grad_norm": 0.9321770521056157, "learning_rate": 4.614404237168334e-06, "loss": 0.1606, "step": 2390 }, { "epoch": 0.22029759985258213, "grad_norm": 0.9109442841057521, "learning_rate": 4.613997681502977e-06, "loss": 0.1523, "step": 2391 }, { "epoch": 0.22038973602985212, "grad_norm": 0.9073470892509928, "learning_rate": 4.61359092955271e-06, "loss": 0.1544, "step": 2392 }, { "epoch": 0.22048187220712212, "grad_norm": 0.9727998985444354, "learning_rate": 4.613183981355297e-06, "loss": 0.1606, "step": 2393 }, { "epoch": 0.22057400838439215, "grad_norm": 0.9071330785105896, "learning_rate": 4.612776836948524e-06, "loss": 0.1543, "step": 2394 }, { "epoch": 0.22066614456166214, "grad_norm": 0.9097721335699841, "learning_rate": 4.612369496370194e-06, "loss": 0.1574, "step": 2395 }, { "epoch": 0.22075828073893214, "grad_norm": 0.9304001670063471, "learning_rate": 4.611961959658129e-06, "loss": 0.1568, "step": 2396 }, { "epoch": 0.22085041691620214, "grad_norm": 0.9445814202450755, "learning_rate": 4.611554226850168e-06, "loss": 0.1821, "step": 2397 }, { "epoch": 0.22094255309347216, "grad_norm": 0.9995514602468918, "learning_rate": 4.6111462979841704e-06, "loss": 0.1621, "step": 2398 }, { "epoch": 0.22103468927074216, "grad_norm": 0.9832850713868987, "learning_rate": 4.610738173098012e-06, "loss": 0.1711, "step": 2399 }, { "epoch": 0.22112682544801215, "grad_norm": 0.9329304727726307, "learning_rate": 4.610329852229587e-06, "loss": 0.1566, "step": 2400 }, { "epoch": 0.22121896162528218, "grad_norm": 0.9107929098618456, "learning_rate": 4.6099213354168085e-06, "loss": 0.1735, "step": 2401 }, { "epoch": 0.22131109780255218, "grad_norm": 0.9163785528280652, "learning_rate": 4.609512622697606e-06, "loss": 0.1564, "step": 2402 }, { "epoch": 0.22140323397982217, "grad_norm": 0.9260260556809167, "learning_rate": 4.609103714109931e-06, "loss": 0.1583, "step": 2403 }, { "epoch": 0.22149537015709217, "grad_norm": 0.995726300308974, "learning_rate": 4.608694609691747e-06, "loss": 0.1728, "step": 2404 }, { "epoch": 0.2215875063343622, "grad_norm": 0.9277977487816504, "learning_rate": 4.608285309481043e-06, "loss": 0.1636, "step": 2405 }, { "epoch": 0.2216796425116322, "grad_norm": 0.9227743080386674, "learning_rate": 4.607875813515821e-06, "loss": 0.1707, "step": 2406 }, { "epoch": 0.2217717786889022, "grad_norm": 0.8860722109582515, "learning_rate": 4.607466121834103e-06, "loss": 0.1579, "step": 2407 }, { "epoch": 0.2218639148661722, "grad_norm": 0.920265036722145, "learning_rate": 4.607056234473928e-06, "loss": 0.1578, "step": 2408 }, { "epoch": 0.2219560510434422, "grad_norm": 0.8811025661892827, "learning_rate": 4.606646151473355e-06, "loss": 0.1675, "step": 2409 }, { "epoch": 0.2220481872207122, "grad_norm": 0.8916127939474685, "learning_rate": 4.606235872870461e-06, "loss": 0.1773, "step": 2410 }, { "epoch": 0.22214032339798223, "grad_norm": 0.9900333498792695, "learning_rate": 4.605825398703339e-06, "loss": 0.1751, "step": 2411 }, { "epoch": 0.22223245957525223, "grad_norm": 0.9706997511826468, "learning_rate": 4.605414729010102e-06, "loss": 0.1808, "step": 2412 }, { "epoch": 0.22232459575252222, "grad_norm": 0.9164289847295664, "learning_rate": 4.605003863828881e-06, "loss": 0.1609, "step": 2413 }, { "epoch": 0.22241673192979222, "grad_norm": 0.9547324103869823, "learning_rate": 4.604592803197825e-06, "loss": 0.1822, "step": 2414 }, { "epoch": 0.22250886810706225, "grad_norm": 0.8917918711512808, "learning_rate": 4.6041815471551e-06, "loss": 0.1579, "step": 2415 }, { "epoch": 0.22260100428433224, "grad_norm": 0.9874142973301107, "learning_rate": 4.603770095738892e-06, "loss": 0.1781, "step": 2416 }, { "epoch": 0.22269314046160224, "grad_norm": 0.9212348176350641, "learning_rate": 4.603358448987405e-06, "loss": 0.1673, "step": 2417 }, { "epoch": 0.22278527663887226, "grad_norm": 0.9412577615662902, "learning_rate": 4.602946606938858e-06, "loss": 0.1729, "step": 2418 }, { "epoch": 0.22287741281614226, "grad_norm": 0.8402429294824661, "learning_rate": 4.6025345696314935e-06, "loss": 0.1637, "step": 2419 }, { "epoch": 0.22296954899341226, "grad_norm": 1.0231878663886147, "learning_rate": 4.602122337103568e-06, "loss": 0.1861, "step": 2420 }, { "epoch": 0.22306168517068226, "grad_norm": 0.9267704717175042, "learning_rate": 4.601709909393357e-06, "loss": 0.1789, "step": 2421 }, { "epoch": 0.22315382134795228, "grad_norm": 0.9402886812499314, "learning_rate": 4.601297286539155e-06, "loss": 0.1844, "step": 2422 }, { "epoch": 0.22324595752522228, "grad_norm": 0.9232675247411815, "learning_rate": 4.600884468579273e-06, "loss": 0.1718, "step": 2423 }, { "epoch": 0.22333809370249227, "grad_norm": 0.8850107261659447, "learning_rate": 4.600471455552043e-06, "loss": 0.1545, "step": 2424 }, { "epoch": 0.2234302298797623, "grad_norm": 0.9001963815880184, "learning_rate": 4.600058247495812e-06, "loss": 0.168, "step": 2425 }, { "epoch": 0.2235223660570323, "grad_norm": 1.0125379821849876, "learning_rate": 4.599644844448946e-06, "loss": 0.175, "step": 2426 }, { "epoch": 0.2236145022343023, "grad_norm": 0.9715366729797272, "learning_rate": 4.599231246449831e-06, "loss": 0.1705, "step": 2427 }, { "epoch": 0.22370663841157232, "grad_norm": 0.983021655050111, "learning_rate": 4.5988174535368686e-06, "loss": 0.1568, "step": 2428 }, { "epoch": 0.22379877458884231, "grad_norm": 0.9512948661524263, "learning_rate": 4.59840346574848e-06, "loss": 0.1809, "step": 2429 }, { "epoch": 0.2238909107661123, "grad_norm": 0.9968867206661762, "learning_rate": 4.597989283123104e-06, "loss": 0.1843, "step": 2430 }, { "epoch": 0.2239830469433823, "grad_norm": 1.0375629522642, "learning_rate": 4.597574905699196e-06, "loss": 0.1685, "step": 2431 }, { "epoch": 0.22407518312065233, "grad_norm": 1.0263084195890293, "learning_rate": 4.597160333515233e-06, "loss": 0.1767, "step": 2432 }, { "epoch": 0.22416731929792233, "grad_norm": 0.9303738801673548, "learning_rate": 4.596745566609707e-06, "loss": 0.1598, "step": 2433 }, { "epoch": 0.22425945547519233, "grad_norm": 0.927679806741521, "learning_rate": 4.5963306050211296e-06, "loss": 0.1649, "step": 2434 }, { "epoch": 0.22435159165246235, "grad_norm": 0.9658005546537741, "learning_rate": 4.595915448788031e-06, "loss": 0.1818, "step": 2435 }, { "epoch": 0.22444372782973235, "grad_norm": 0.9499725030180026, "learning_rate": 4.5955000979489565e-06, "loss": 0.1766, "step": 2436 }, { "epoch": 0.22453586400700234, "grad_norm": 0.9462439750542999, "learning_rate": 4.595084552542472e-06, "loss": 0.1728, "step": 2437 }, { "epoch": 0.22462800018427234, "grad_norm": 0.9720158256266674, "learning_rate": 4.594668812607162e-06, "loss": 0.1667, "step": 2438 }, { "epoch": 0.22472013636154237, "grad_norm": 0.9173112612757395, "learning_rate": 4.594252878181627e-06, "loss": 0.163, "step": 2439 }, { "epoch": 0.22481227253881236, "grad_norm": 0.940867147194557, "learning_rate": 4.593836749304487e-06, "loss": 0.1511, "step": 2440 }, { "epoch": 0.22490440871608236, "grad_norm": 0.9835880434614503, "learning_rate": 4.59342042601438e-06, "loss": 0.1839, "step": 2441 }, { "epoch": 0.22499654489335238, "grad_norm": 0.9879031852310044, "learning_rate": 4.59300390834996e-06, "loss": 0.178, "step": 2442 }, { "epoch": 0.22508868107062238, "grad_norm": 0.9081333169346187, "learning_rate": 4.592587196349902e-06, "loss": 0.1798, "step": 2443 }, { "epoch": 0.22518081724789238, "grad_norm": 0.8798711443957609, "learning_rate": 4.592170290052898e-06, "loss": 0.1654, "step": 2444 }, { "epoch": 0.2252729534251624, "grad_norm": 0.9326071476847042, "learning_rate": 4.591753189497658e-06, "loss": 0.1544, "step": 2445 }, { "epoch": 0.2253650896024324, "grad_norm": 1.1534455607712155, "learning_rate": 4.591335894722909e-06, "loss": 0.1937, "step": 2446 }, { "epoch": 0.2254572257797024, "grad_norm": 0.9028689022204759, "learning_rate": 4.5909184057673976e-06, "loss": 0.1604, "step": 2447 }, { "epoch": 0.2255493619569724, "grad_norm": 0.8946199825952447, "learning_rate": 4.590500722669886e-06, "loss": 0.1697, "step": 2448 }, { "epoch": 0.22564149813424242, "grad_norm": 0.9077763847398155, "learning_rate": 4.590082845469158e-06, "loss": 0.1507, "step": 2449 }, { "epoch": 0.22573363431151242, "grad_norm": 0.9592178561443785, "learning_rate": 4.589664774204013e-06, "loss": 0.1696, "step": 2450 }, { "epoch": 0.2258257704887824, "grad_norm": 0.9613902738050768, "learning_rate": 4.589246508913267e-06, "loss": 0.1774, "step": 2451 }, { "epoch": 0.22591790666605244, "grad_norm": 0.8930021527771865, "learning_rate": 4.58882804963576e-06, "loss": 0.1676, "step": 2452 }, { "epoch": 0.22601004284332243, "grad_norm": 0.9254850657625637, "learning_rate": 4.588409396410342e-06, "loss": 0.1784, "step": 2453 }, { "epoch": 0.22610217902059243, "grad_norm": 0.8648068592964746, "learning_rate": 4.587990549275889e-06, "loss": 0.1647, "step": 2454 }, { "epoch": 0.22619431519786243, "grad_norm": 0.9218315395309605, "learning_rate": 4.587571508271288e-06, "loss": 0.1643, "step": 2455 }, { "epoch": 0.22628645137513245, "grad_norm": 0.9359752443930277, "learning_rate": 4.587152273435447e-06, "loss": 0.166, "step": 2456 }, { "epoch": 0.22637858755240245, "grad_norm": 0.9467255151495074, "learning_rate": 4.586732844807293e-06, "loss": 0.1695, "step": 2457 }, { "epoch": 0.22647072372967245, "grad_norm": 0.9775875127483362, "learning_rate": 4.58631322242577e-06, "loss": 0.1672, "step": 2458 }, { "epoch": 0.22656285990694247, "grad_norm": 1.0104603751151366, "learning_rate": 4.58589340632984e-06, "loss": 0.1704, "step": 2459 }, { "epoch": 0.22665499608421247, "grad_norm": 0.9989237155219942, "learning_rate": 4.585473396558482e-06, "loss": 0.1896, "step": 2460 }, { "epoch": 0.22674713226148246, "grad_norm": 0.9295942764578942, "learning_rate": 4.585053193150695e-06, "loss": 0.148, "step": 2461 }, { "epoch": 0.2268392684387525, "grad_norm": 0.9990328294585157, "learning_rate": 4.584632796145495e-06, "loss": 0.1617, "step": 2462 }, { "epoch": 0.2269314046160225, "grad_norm": 1.0342567607730049, "learning_rate": 4.584212205581915e-06, "loss": 0.1736, "step": 2463 }, { "epoch": 0.22702354079329248, "grad_norm": 0.9424632951659273, "learning_rate": 4.5837914214990085e-06, "loss": 0.1685, "step": 2464 }, { "epoch": 0.22711567697056248, "grad_norm": 0.8664763361776535, "learning_rate": 4.583370443935843e-06, "loss": 0.1574, "step": 2465 }, { "epoch": 0.2272078131478325, "grad_norm": 0.9720244211223175, "learning_rate": 4.582949272931508e-06, "loss": 0.1708, "step": 2466 }, { "epoch": 0.2272999493251025, "grad_norm": 0.9624471682686935, "learning_rate": 4.582527908525109e-06, "loss": 0.1701, "step": 2467 }, { "epoch": 0.2273920855023725, "grad_norm": 0.9668421091692699, "learning_rate": 4.5821063507557695e-06, "loss": 0.1771, "step": 2468 }, { "epoch": 0.22748422167964252, "grad_norm": 0.9317703673514961, "learning_rate": 4.581684599662632e-06, "loss": 0.1706, "step": 2469 }, { "epoch": 0.22757635785691252, "grad_norm": 0.9286142161019658, "learning_rate": 4.581262655284854e-06, "loss": 0.1661, "step": 2470 }, { "epoch": 0.22766849403418252, "grad_norm": 0.9792223907797291, "learning_rate": 4.580840517661615e-06, "loss": 0.1513, "step": 2471 }, { "epoch": 0.2277606302114525, "grad_norm": 0.8942474980522034, "learning_rate": 4.58041818683211e-06, "loss": 0.1607, "step": 2472 }, { "epoch": 0.22785276638872254, "grad_norm": 0.9241481692834103, "learning_rate": 4.579995662835552e-06, "loss": 0.1693, "step": 2473 }, { "epoch": 0.22794490256599254, "grad_norm": 0.9423013205176731, "learning_rate": 4.579572945711174e-06, "loss": 0.1548, "step": 2474 }, { "epoch": 0.22803703874326253, "grad_norm": 1.0187746405002311, "learning_rate": 4.579150035498223e-06, "loss": 0.1702, "step": 2475 }, { "epoch": 0.22812917492053256, "grad_norm": 1.0143035138735677, "learning_rate": 4.578726932235969e-06, "loss": 0.1638, "step": 2476 }, { "epoch": 0.22822131109780255, "grad_norm": 0.9930147973024229, "learning_rate": 4.5783036359636935e-06, "loss": 0.1677, "step": 2477 }, { "epoch": 0.22831344727507255, "grad_norm": 0.9625773296101545, "learning_rate": 4.5778801467207035e-06, "loss": 0.1699, "step": 2478 }, { "epoch": 0.22840558345234258, "grad_norm": 0.9811159556227943, "learning_rate": 4.577456464546317e-06, "loss": 0.1745, "step": 2479 }, { "epoch": 0.22849771962961257, "grad_norm": 0.9546391442575929, "learning_rate": 4.5770325894798754e-06, "loss": 0.1701, "step": 2480 }, { "epoch": 0.22858985580688257, "grad_norm": 0.8864094301933726, "learning_rate": 4.5766085215607335e-06, "loss": 0.1704, "step": 2481 }, { "epoch": 0.22868199198415257, "grad_norm": 0.9331663904936708, "learning_rate": 4.576184260828267e-06, "loss": 0.1603, "step": 2482 }, { "epoch": 0.2287741281614226, "grad_norm": 1.0563673952258763, "learning_rate": 4.575759807321869e-06, "loss": 0.179, "step": 2483 }, { "epoch": 0.2288662643386926, "grad_norm": 0.9305700277198957, "learning_rate": 4.575335161080948e-06, "loss": 0.1595, "step": 2484 }, { "epoch": 0.22895840051596258, "grad_norm": 0.9856789510726491, "learning_rate": 4.574910322144935e-06, "loss": 0.1785, "step": 2485 }, { "epoch": 0.2290505366932326, "grad_norm": 0.9650074532401066, "learning_rate": 4.574485290553276e-06, "loss": 0.1671, "step": 2486 }, { "epoch": 0.2291426728705026, "grad_norm": 0.9194999600270781, "learning_rate": 4.574060066345434e-06, "loss": 0.1794, "step": 2487 }, { "epoch": 0.2292348090477726, "grad_norm": 0.9499206998494362, "learning_rate": 4.573634649560891e-06, "loss": 0.1651, "step": 2488 }, { "epoch": 0.2293269452250426, "grad_norm": 0.9880827662100659, "learning_rate": 4.573209040239148e-06, "loss": 0.172, "step": 2489 }, { "epoch": 0.22941908140231262, "grad_norm": 0.9704437917207153, "learning_rate": 4.572783238419723e-06, "loss": 0.165, "step": 2490 }, { "epoch": 0.22951121757958262, "grad_norm": 0.9473886009713728, "learning_rate": 4.572357244142151e-06, "loss": 0.1687, "step": 2491 }, { "epoch": 0.22960335375685262, "grad_norm": 1.0115985599601431, "learning_rate": 4.5719310574459846e-06, "loss": 0.1661, "step": 2492 }, { "epoch": 0.22969548993412264, "grad_norm": 1.0523002183614862, "learning_rate": 4.5715046783707976e-06, "loss": 0.161, "step": 2493 }, { "epoch": 0.22978762611139264, "grad_norm": 0.9012474784949154, "learning_rate": 4.571078106956178e-06, "loss": 0.1588, "step": 2494 }, { "epoch": 0.22987976228866264, "grad_norm": 1.0733611855009282, "learning_rate": 4.570651343241733e-06, "loss": 0.1731, "step": 2495 }, { "epoch": 0.22997189846593266, "grad_norm": 1.0563609033763628, "learning_rate": 4.570224387267089e-06, "loss": 0.179, "step": 2496 }, { "epoch": 0.23006403464320266, "grad_norm": 0.9549484688378221, "learning_rate": 4.569797239071887e-06, "loss": 0.1831, "step": 2497 }, { "epoch": 0.23015617082047266, "grad_norm": 1.0449068654366036, "learning_rate": 4.569369898695789e-06, "loss": 0.1741, "step": 2498 }, { "epoch": 0.23024830699774265, "grad_norm": 0.9602792119218708, "learning_rate": 4.568942366178473e-06, "loss": 0.1623, "step": 2499 }, { "epoch": 0.23034044317501268, "grad_norm": 0.9618534029768782, "learning_rate": 4.568514641559636e-06, "loss": 0.1647, "step": 2500 }, { "epoch": 0.23034044317501268, "eval_loss": 0.16871164739131927, "eval_runtime": 300.164, "eval_samples_per_second": 23.377, "eval_steps_per_second": 2.925, "step": 2500 }, { "epoch": 0.23043257935228267, "grad_norm": 0.9533742219678426, "learning_rate": 4.5680867248789916e-06, "loss": 0.1741, "step": 2501 }, { "epoch": 0.23052471552955267, "grad_norm": 0.9298655405790685, "learning_rate": 4.567658616176273e-06, "loss": 0.1708, "step": 2502 }, { "epoch": 0.2306168517068227, "grad_norm": 0.9479029081415975, "learning_rate": 4.5672303154912275e-06, "loss": 0.1682, "step": 2503 }, { "epoch": 0.2307089878840927, "grad_norm": 0.8851274933549781, "learning_rate": 4.566801822863626e-06, "loss": 0.1666, "step": 2504 }, { "epoch": 0.2308011240613627, "grad_norm": 0.8924494397459016, "learning_rate": 4.566373138333253e-06, "loss": 0.1619, "step": 2505 }, { "epoch": 0.23089326023863269, "grad_norm": 0.9435939583116043, "learning_rate": 4.565944261939911e-06, "loss": 0.163, "step": 2506 }, { "epoch": 0.2309853964159027, "grad_norm": 0.8879874757562365, "learning_rate": 4.565515193723423e-06, "loss": 0.1756, "step": 2507 }, { "epoch": 0.2310775325931727, "grad_norm": 0.9376920867552287, "learning_rate": 4.5650859337236256e-06, "loss": 0.1614, "step": 2508 }, { "epoch": 0.2311696687704427, "grad_norm": 0.9673814720139658, "learning_rate": 4.564656481980378e-06, "loss": 0.1733, "step": 2509 }, { "epoch": 0.23126180494771273, "grad_norm": 0.9152447120818502, "learning_rate": 4.564226838533553e-06, "loss": 0.1586, "step": 2510 }, { "epoch": 0.23135394112498273, "grad_norm": 0.8820356484085558, "learning_rate": 4.563797003423045e-06, "loss": 0.1673, "step": 2511 }, { "epoch": 0.23144607730225272, "grad_norm": 0.9391183631130411, "learning_rate": 4.563366976688762e-06, "loss": 0.1761, "step": 2512 }, { "epoch": 0.23153821347952275, "grad_norm": 0.9636950958740574, "learning_rate": 4.562936758370634e-06, "loss": 0.1637, "step": 2513 }, { "epoch": 0.23163034965679274, "grad_norm": 1.037525619091228, "learning_rate": 4.5625063485086065e-06, "loss": 0.1925, "step": 2514 }, { "epoch": 0.23172248583406274, "grad_norm": 0.94368533310796, "learning_rate": 4.562075747142641e-06, "loss": 0.1921, "step": 2515 }, { "epoch": 0.23181462201133274, "grad_norm": 0.9306148789692448, "learning_rate": 4.561644954312721e-06, "loss": 0.1609, "step": 2516 }, { "epoch": 0.23190675818860276, "grad_norm": 0.8962214850100089, "learning_rate": 4.561213970058845e-06, "loss": 0.1568, "step": 2517 }, { "epoch": 0.23199889436587276, "grad_norm": 0.9376759104456283, "learning_rate": 4.560782794421031e-06, "loss": 0.1733, "step": 2518 }, { "epoch": 0.23209103054314276, "grad_norm": 0.9774780989550023, "learning_rate": 4.5603514274393125e-06, "loss": 0.1688, "step": 2519 }, { "epoch": 0.23218316672041278, "grad_norm": 0.8467491530853, "learning_rate": 4.559919869153742e-06, "loss": 0.1526, "step": 2520 }, { "epoch": 0.23227530289768278, "grad_norm": 0.9208731668470556, "learning_rate": 4.559488119604389e-06, "loss": 0.1625, "step": 2521 }, { "epoch": 0.23236743907495278, "grad_norm": 1.0214193792829334, "learning_rate": 4.5590561788313435e-06, "loss": 0.1769, "step": 2522 }, { "epoch": 0.23245957525222277, "grad_norm": 0.9530626148431194, "learning_rate": 4.55862404687471e-06, "loss": 0.1768, "step": 2523 }, { "epoch": 0.2325517114294928, "grad_norm": 0.9441286615058059, "learning_rate": 4.558191723774612e-06, "loss": 0.1662, "step": 2524 }, { "epoch": 0.2326438476067628, "grad_norm": 0.9633992603927569, "learning_rate": 4.557759209571191e-06, "loss": 0.1649, "step": 2525 }, { "epoch": 0.2327359837840328, "grad_norm": 0.8883571013742829, "learning_rate": 4.557326504304606e-06, "loss": 0.1669, "step": 2526 }, { "epoch": 0.23282811996130282, "grad_norm": 0.9452980355449644, "learning_rate": 4.556893608015034e-06, "loss": 0.1695, "step": 2527 }, { "epoch": 0.2329202561385728, "grad_norm": 0.9022590301416014, "learning_rate": 4.556460520742669e-06, "loss": 0.1571, "step": 2528 }, { "epoch": 0.2330123923158428, "grad_norm": 0.929852406290282, "learning_rate": 4.556027242527723e-06, "loss": 0.1743, "step": 2529 }, { "epoch": 0.23310452849311283, "grad_norm": 0.966516869274674, "learning_rate": 4.555593773410426e-06, "loss": 0.1702, "step": 2530 }, { "epoch": 0.23319666467038283, "grad_norm": 0.8832043346632404, "learning_rate": 4.555160113431027e-06, "loss": 0.1653, "step": 2531 }, { "epoch": 0.23328880084765283, "grad_norm": 0.9671050400227129, "learning_rate": 4.554726262629789e-06, "loss": 0.1752, "step": 2532 }, { "epoch": 0.23338093702492282, "grad_norm": 0.8813498900554017, "learning_rate": 4.554292221046997e-06, "loss": 0.1535, "step": 2533 }, { "epoch": 0.23347307320219285, "grad_norm": 0.928958558583173, "learning_rate": 4.553857988722951e-06, "loss": 0.1611, "step": 2534 }, { "epoch": 0.23356520937946285, "grad_norm": 0.9717573709876711, "learning_rate": 4.55342356569797e-06, "loss": 0.1683, "step": 2535 }, { "epoch": 0.23365734555673284, "grad_norm": 0.9072668433737212, "learning_rate": 4.5529889520123896e-06, "loss": 0.17, "step": 2536 }, { "epoch": 0.23374948173400287, "grad_norm": 0.9099867455316708, "learning_rate": 4.5525541477065644e-06, "loss": 0.1746, "step": 2537 }, { "epoch": 0.23384161791127286, "grad_norm": 0.9050044764166337, "learning_rate": 4.552119152820866e-06, "loss": 0.1606, "step": 2538 }, { "epoch": 0.23393375408854286, "grad_norm": 1.0216026559059102, "learning_rate": 4.551683967395683e-06, "loss": 0.1692, "step": 2539 }, { "epoch": 0.23402589026581286, "grad_norm": 0.9592363893092558, "learning_rate": 4.5512485914714225e-06, "loss": 0.1649, "step": 2540 }, { "epoch": 0.23411802644308288, "grad_norm": 0.9168069315815013, "learning_rate": 4.55081302508851e-06, "loss": 0.165, "step": 2541 }, { "epoch": 0.23421016262035288, "grad_norm": 0.918494290815671, "learning_rate": 4.550377268287387e-06, "loss": 0.1698, "step": 2542 }, { "epoch": 0.23430229879762288, "grad_norm": 0.9922874023864924, "learning_rate": 4.549941321108514e-06, "loss": 0.1714, "step": 2543 }, { "epoch": 0.2343944349748929, "grad_norm": 1.0043624274520886, "learning_rate": 4.549505183592368e-06, "loss": 0.1849, "step": 2544 }, { "epoch": 0.2344865711521629, "grad_norm": 0.9095985103827297, "learning_rate": 4.549068855779447e-06, "loss": 0.1526, "step": 2545 }, { "epoch": 0.2345787073294329, "grad_norm": 0.9878365543468702, "learning_rate": 4.5486323377102615e-06, "loss": 0.1765, "step": 2546 }, { "epoch": 0.23467084350670292, "grad_norm": 0.8155329876941606, "learning_rate": 4.548195629425343e-06, "loss": 0.1394, "step": 2547 }, { "epoch": 0.23476297968397292, "grad_norm": 0.9100479491854085, "learning_rate": 4.547758730965239e-06, "loss": 0.1598, "step": 2548 }, { "epoch": 0.2348551158612429, "grad_norm": 0.9992825507444053, "learning_rate": 4.547321642370517e-06, "loss": 0.1826, "step": 2549 }, { "epoch": 0.2349472520385129, "grad_norm": 0.9061188471199144, "learning_rate": 4.5468843636817605e-06, "loss": 0.1576, "step": 2550 }, { "epoch": 0.23503938821578294, "grad_norm": 0.9626546650825835, "learning_rate": 4.54644689493957e-06, "loss": 0.1722, "step": 2551 }, { "epoch": 0.23513152439305293, "grad_norm": 0.9163551094540457, "learning_rate": 4.546009236184565e-06, "loss": 0.1463, "step": 2552 }, { "epoch": 0.23522366057032293, "grad_norm": 0.9758440630401635, "learning_rate": 4.545571387457382e-06, "loss": 0.1688, "step": 2553 }, { "epoch": 0.23531579674759295, "grad_norm": 0.9592381292109499, "learning_rate": 4.545133348798677e-06, "loss": 0.1758, "step": 2554 }, { "epoch": 0.23540793292486295, "grad_norm": 1.0261072582249307, "learning_rate": 4.54469512024912e-06, "loss": 0.1713, "step": 2555 }, { "epoch": 0.23550006910213295, "grad_norm": 0.9247050273249168, "learning_rate": 4.5442567018494e-06, "loss": 0.1625, "step": 2556 }, { "epoch": 0.23559220527940294, "grad_norm": 0.8985845157903278, "learning_rate": 4.543818093640226e-06, "loss": 0.174, "step": 2557 }, { "epoch": 0.23568434145667297, "grad_norm": 0.9681388653177704, "learning_rate": 4.543379295662322e-06, "loss": 0.1713, "step": 2558 }, { "epoch": 0.23577647763394297, "grad_norm": 0.9448366597258528, "learning_rate": 4.54294030795643e-06, "loss": 0.1573, "step": 2559 }, { "epoch": 0.23586861381121296, "grad_norm": 0.9314244476538863, "learning_rate": 4.5425011305633106e-06, "loss": 0.1792, "step": 2560 }, { "epoch": 0.235960749988483, "grad_norm": 1.0225904621056787, "learning_rate": 4.542061763523742e-06, "loss": 0.1756, "step": 2561 }, { "epoch": 0.23605288616575298, "grad_norm": 0.9305300082933172, "learning_rate": 4.541622206878519e-06, "loss": 0.1614, "step": 2562 }, { "epoch": 0.23614502234302298, "grad_norm": 0.9248622005950687, "learning_rate": 4.541182460668453e-06, "loss": 0.1735, "step": 2563 }, { "epoch": 0.236237158520293, "grad_norm": 0.9311672044363726, "learning_rate": 4.540742524934377e-06, "loss": 0.1728, "step": 2564 }, { "epoch": 0.236329294697563, "grad_norm": 0.9839928437689336, "learning_rate": 4.540302399717138e-06, "loss": 0.1706, "step": 2565 }, { "epoch": 0.236421430874833, "grad_norm": 0.9426141389876795, "learning_rate": 4.5398620850576016e-06, "loss": 0.162, "step": 2566 }, { "epoch": 0.236513567052103, "grad_norm": 0.8881256101693608, "learning_rate": 4.53942158099665e-06, "loss": 0.1562, "step": 2567 }, { "epoch": 0.23660570322937302, "grad_norm": 0.9105265463755824, "learning_rate": 4.538980887575187e-06, "loss": 0.1598, "step": 2568 }, { "epoch": 0.23669783940664302, "grad_norm": 0.8733143352802549, "learning_rate": 4.538540004834128e-06, "loss": 0.155, "step": 2569 }, { "epoch": 0.23678997558391301, "grad_norm": 0.885143091633039, "learning_rate": 4.53809893281441e-06, "loss": 0.1525, "step": 2570 }, { "epoch": 0.23688211176118304, "grad_norm": 0.9585313460757532, "learning_rate": 4.537657671556987e-06, "loss": 0.1646, "step": 2571 }, { "epoch": 0.23697424793845304, "grad_norm": 0.9948560820566639, "learning_rate": 4.5372162211028305e-06, "loss": 0.1807, "step": 2572 }, { "epoch": 0.23706638411572303, "grad_norm": 0.9080172248460118, "learning_rate": 4.536774581492928e-06, "loss": 0.1494, "step": 2573 }, { "epoch": 0.23715852029299303, "grad_norm": 0.9556030654445338, "learning_rate": 4.5363327527682855e-06, "loss": 0.1593, "step": 2574 }, { "epoch": 0.23725065647026305, "grad_norm": 0.919600867467294, "learning_rate": 4.535890734969929e-06, "loss": 0.1704, "step": 2575 }, { "epoch": 0.23734279264753305, "grad_norm": 1.0317406838521088, "learning_rate": 4.535448528138899e-06, "loss": 0.176, "step": 2576 }, { "epoch": 0.23743492882480305, "grad_norm": 0.943160391590626, "learning_rate": 4.535006132316253e-06, "loss": 0.1911, "step": 2577 }, { "epoch": 0.23752706500207307, "grad_norm": 0.8917538056406878, "learning_rate": 4.534563547543069e-06, "loss": 0.1606, "step": 2578 }, { "epoch": 0.23761920117934307, "grad_norm": 0.9851079183758169, "learning_rate": 4.53412077386044e-06, "loss": 0.1817, "step": 2579 }, { "epoch": 0.23771133735661307, "grad_norm": 0.9027548501120674, "learning_rate": 4.533677811309479e-06, "loss": 0.1672, "step": 2580 }, { "epoch": 0.2378034735338831, "grad_norm": 0.945909808985898, "learning_rate": 4.533234659931315e-06, "loss": 0.1845, "step": 2581 }, { "epoch": 0.2378956097111531, "grad_norm": 0.8521446168221071, "learning_rate": 4.532791319767093e-06, "loss": 0.1434, "step": 2582 }, { "epoch": 0.23798774588842309, "grad_norm": 0.9503670223175602, "learning_rate": 4.532347790857978e-06, "loss": 0.1808, "step": 2583 }, { "epoch": 0.23807988206569308, "grad_norm": 0.9063415363527835, "learning_rate": 4.531904073245152e-06, "loss": 0.1701, "step": 2584 }, { "epoch": 0.2381720182429631, "grad_norm": 0.8789860283816323, "learning_rate": 4.531460166969816e-06, "loss": 0.1668, "step": 2585 }, { "epoch": 0.2382641544202331, "grad_norm": 0.9391306489506216, "learning_rate": 4.531016072073182e-06, "loss": 0.1697, "step": 2586 }, { "epoch": 0.2383562905975031, "grad_norm": 0.9586292371328816, "learning_rate": 4.530571788596489e-06, "loss": 0.166, "step": 2587 }, { "epoch": 0.23844842677477313, "grad_norm": 0.9708360436328097, "learning_rate": 4.530127316580986e-06, "loss": 0.1775, "step": 2588 }, { "epoch": 0.23854056295204312, "grad_norm": 0.8840283994456286, "learning_rate": 4.5296826560679445e-06, "loss": 0.1493, "step": 2589 }, { "epoch": 0.23863269912931312, "grad_norm": 0.9292858225886818, "learning_rate": 4.529237807098649e-06, "loss": 0.1625, "step": 2590 }, { "epoch": 0.23872483530658312, "grad_norm": 0.8977136056150155, "learning_rate": 4.528792769714404e-06, "loss": 0.1758, "step": 2591 }, { "epoch": 0.23881697148385314, "grad_norm": 1.0151817811358843, "learning_rate": 4.528347543956533e-06, "loss": 0.1596, "step": 2592 }, { "epoch": 0.23890910766112314, "grad_norm": 0.9335523901294361, "learning_rate": 4.527902129866374e-06, "loss": 0.1732, "step": 2593 }, { "epoch": 0.23900124383839313, "grad_norm": 0.9256864277979158, "learning_rate": 4.527456527485284e-06, "loss": 0.1683, "step": 2594 }, { "epoch": 0.23909338001566316, "grad_norm": 1.0210336286294275, "learning_rate": 4.527010736854637e-06, "loss": 0.1601, "step": 2595 }, { "epoch": 0.23918551619293316, "grad_norm": 0.960562630209394, "learning_rate": 4.526564758015825e-06, "loss": 0.1597, "step": 2596 }, { "epoch": 0.23927765237020315, "grad_norm": 0.9923091348049548, "learning_rate": 4.5261185910102575e-06, "loss": 0.1749, "step": 2597 }, { "epoch": 0.23936978854747318, "grad_norm": 0.9185996987364619, "learning_rate": 4.525672235879361e-06, "loss": 0.1757, "step": 2598 }, { "epoch": 0.23946192472474317, "grad_norm": 0.9504821525147511, "learning_rate": 4.5252256926645786e-06, "loss": 0.1733, "step": 2599 }, { "epoch": 0.23955406090201317, "grad_norm": 0.8817193190901266, "learning_rate": 4.5247789614073725e-06, "loss": 0.1518, "step": 2600 }, { "epoch": 0.23964619707928317, "grad_norm": 0.8998627255833563, "learning_rate": 4.524332042149223e-06, "loss": 0.1598, "step": 2601 }, { "epoch": 0.2397383332565532, "grad_norm": 0.8916195806919052, "learning_rate": 4.523884934931624e-06, "loss": 0.1536, "step": 2602 }, { "epoch": 0.2398304694338232, "grad_norm": 0.8969910004039, "learning_rate": 4.523437639796092e-06, "loss": 0.1558, "step": 2603 }, { "epoch": 0.2399226056110932, "grad_norm": 0.9579370144538515, "learning_rate": 4.522990156784157e-06, "loss": 0.1696, "step": 2604 }, { "epoch": 0.2400147417883632, "grad_norm": 0.9256485173070821, "learning_rate": 4.522542485937369e-06, "loss": 0.165, "step": 2605 }, { "epoch": 0.2401068779656332, "grad_norm": 0.8763097473674882, "learning_rate": 4.522094627297293e-06, "loss": 0.1518, "step": 2606 }, { "epoch": 0.2401990141429032, "grad_norm": 0.9590186614529309, "learning_rate": 4.521646580905513e-06, "loss": 0.1663, "step": 2607 }, { "epoch": 0.2402911503201732, "grad_norm": 0.8953321886812519, "learning_rate": 4.521198346803631e-06, "loss": 0.156, "step": 2608 }, { "epoch": 0.24038328649744323, "grad_norm": 0.9207768164702115, "learning_rate": 4.520749925033264e-06, "loss": 0.1607, "step": 2609 }, { "epoch": 0.24047542267471322, "grad_norm": 0.919939995459796, "learning_rate": 4.52030131563605e-06, "loss": 0.1725, "step": 2610 }, { "epoch": 0.24056755885198322, "grad_norm": 0.9646077134116254, "learning_rate": 4.519852518653641e-06, "loss": 0.167, "step": 2611 }, { "epoch": 0.24065969502925325, "grad_norm": 0.905875090197065, "learning_rate": 4.519403534127709e-06, "loss": 0.1651, "step": 2612 }, { "epoch": 0.24075183120652324, "grad_norm": 0.9733708732781958, "learning_rate": 4.51895436209994e-06, "loss": 0.1682, "step": 2613 }, { "epoch": 0.24084396738379324, "grad_norm": 0.9138941832811526, "learning_rate": 4.5185050026120425e-06, "loss": 0.1581, "step": 2614 }, { "epoch": 0.24093610356106326, "grad_norm": 0.8714836624140244, "learning_rate": 4.5180554557057376e-06, "loss": 0.1607, "step": 2615 }, { "epoch": 0.24102823973833326, "grad_norm": 0.896212162592746, "learning_rate": 4.5176057214227665e-06, "loss": 0.1557, "step": 2616 }, { "epoch": 0.24112037591560326, "grad_norm": 0.9795900304013812, "learning_rate": 4.517155799804888e-06, "loss": 0.1594, "step": 2617 }, { "epoch": 0.24121251209287325, "grad_norm": 0.9620751253748585, "learning_rate": 4.516705690893874e-06, "loss": 0.1746, "step": 2618 }, { "epoch": 0.24130464827014328, "grad_norm": 0.9766714887522009, "learning_rate": 4.516255394731522e-06, "loss": 0.1655, "step": 2619 }, { "epoch": 0.24139678444741328, "grad_norm": 0.9644944957480073, "learning_rate": 4.515804911359639e-06, "loss": 0.1604, "step": 2620 }, { "epoch": 0.24148892062468327, "grad_norm": 1.0028858940407497, "learning_rate": 4.5153542408200524e-06, "loss": 0.1666, "step": 2621 }, { "epoch": 0.2415810568019533, "grad_norm": 0.9909913147953266, "learning_rate": 4.514903383154608e-06, "loss": 0.1715, "step": 2622 }, { "epoch": 0.2416731929792233, "grad_norm": 0.9720807397909752, "learning_rate": 4.5144523384051675e-06, "loss": 0.1704, "step": 2623 }, { "epoch": 0.2417653291564933, "grad_norm": 0.9983520531859589, "learning_rate": 4.514001106613611e-06, "loss": 0.1673, "step": 2624 }, { "epoch": 0.2418574653337633, "grad_norm": 0.871023507107817, "learning_rate": 4.513549687821834e-06, "loss": 0.1598, "step": 2625 }, { "epoch": 0.2419496015110333, "grad_norm": 0.875727688350462, "learning_rate": 4.513098082071753e-06, "loss": 0.1796, "step": 2626 }, { "epoch": 0.2420417376883033, "grad_norm": 0.9984200375498948, "learning_rate": 4.512646289405298e-06, "loss": 0.1737, "step": 2627 }, { "epoch": 0.2421338738655733, "grad_norm": 0.9219824808671676, "learning_rate": 4.5121943098644185e-06, "loss": 0.1727, "step": 2628 }, { "epoch": 0.24222601004284333, "grad_norm": 0.903236560497671, "learning_rate": 4.5117421434910805e-06, "loss": 0.1563, "step": 2629 }, { "epoch": 0.24231814622011333, "grad_norm": 1.1088373746993974, "learning_rate": 4.511289790327268e-06, "loss": 0.1795, "step": 2630 }, { "epoch": 0.24241028239738333, "grad_norm": 0.9099632964748022, "learning_rate": 4.510837250414982e-06, "loss": 0.1802, "step": 2631 }, { "epoch": 0.24250241857465335, "grad_norm": 0.9625764621719629, "learning_rate": 4.5103845237962405e-06, "loss": 0.1824, "step": 2632 }, { "epoch": 0.24259455475192335, "grad_norm": 0.9045197623683646, "learning_rate": 4.509931610513081e-06, "loss": 0.1535, "step": 2633 }, { "epoch": 0.24268669092919334, "grad_norm": 0.8883996221321223, "learning_rate": 4.509478510607553e-06, "loss": 0.162, "step": 2634 }, { "epoch": 0.24277882710646334, "grad_norm": 0.8733565291886327, "learning_rate": 4.509025224121732e-06, "loss": 0.1488, "step": 2635 }, { "epoch": 0.24287096328373337, "grad_norm": 0.9323593147387864, "learning_rate": 4.5085717510977e-06, "loss": 0.1832, "step": 2636 }, { "epoch": 0.24296309946100336, "grad_norm": 0.8330195920047772, "learning_rate": 4.508118091577566e-06, "loss": 0.1578, "step": 2637 }, { "epoch": 0.24305523563827336, "grad_norm": 1.0178111918491635, "learning_rate": 4.507664245603451e-06, "loss": 0.1661, "step": 2638 }, { "epoch": 0.24314737181554338, "grad_norm": 0.9034888786182202, "learning_rate": 4.507210213217495e-06, "loss": 0.1695, "step": 2639 }, { "epoch": 0.24323950799281338, "grad_norm": 0.9159922991117291, "learning_rate": 4.506755994461853e-06, "loss": 0.1803, "step": 2640 }, { "epoch": 0.24333164417008338, "grad_norm": 0.9496166120744165, "learning_rate": 4.506301589378703e-06, "loss": 0.1754, "step": 2641 }, { "epoch": 0.24342378034735337, "grad_norm": 0.927288722206701, "learning_rate": 4.5058469980102336e-06, "loss": 0.1608, "step": 2642 }, { "epoch": 0.2435159165246234, "grad_norm": 0.9357101673472532, "learning_rate": 4.505392220398655e-06, "loss": 0.1743, "step": 2643 }, { "epoch": 0.2436080527018934, "grad_norm": 0.8967894186666624, "learning_rate": 4.504937256586192e-06, "loss": 0.1614, "step": 2644 }, { "epoch": 0.2437001888791634, "grad_norm": 1.008390819829681, "learning_rate": 4.50448210661509e-06, "loss": 0.18, "step": 2645 }, { "epoch": 0.24379232505643342, "grad_norm": 0.8959777511685124, "learning_rate": 4.504026770527607e-06, "loss": 0.1625, "step": 2646 }, { "epoch": 0.24388446123370341, "grad_norm": 0.883975734164676, "learning_rate": 4.503571248366024e-06, "loss": 0.1644, "step": 2647 }, { "epoch": 0.2439765974109734, "grad_norm": 0.9447762260150692, "learning_rate": 4.503115540172636e-06, "loss": 0.1766, "step": 2648 }, { "epoch": 0.24406873358824344, "grad_norm": 0.9023969503174291, "learning_rate": 4.502659645989753e-06, "loss": 0.1517, "step": 2649 }, { "epoch": 0.24416086976551343, "grad_norm": 0.8786423359540836, "learning_rate": 4.502203565859706e-06, "loss": 0.1548, "step": 2650 }, { "epoch": 0.24425300594278343, "grad_norm": 0.9129953675126846, "learning_rate": 4.501747299824843e-06, "loss": 0.1769, "step": 2651 }, { "epoch": 0.24434514212005343, "grad_norm": 0.8685998545587346, "learning_rate": 4.501290847927529e-06, "loss": 0.158, "step": 2652 }, { "epoch": 0.24443727829732345, "grad_norm": 0.9169127256515894, "learning_rate": 4.500834210210143e-06, "loss": 0.1536, "step": 2653 }, { "epoch": 0.24452941447459345, "grad_norm": 0.9004372536575974, "learning_rate": 4.500377386715086e-06, "loss": 0.1561, "step": 2654 }, { "epoch": 0.24462155065186345, "grad_norm": 0.9171923195813083, "learning_rate": 4.499920377484772e-06, "loss": 0.1561, "step": 2655 }, { "epoch": 0.24471368682913347, "grad_norm": 0.9970316417826179, "learning_rate": 4.499463182561637e-06, "loss": 0.1781, "step": 2656 }, { "epoch": 0.24480582300640347, "grad_norm": 0.8986690107368408, "learning_rate": 4.49900580198813e-06, "loss": 0.163, "step": 2657 }, { "epoch": 0.24489795918367346, "grad_norm": 0.8609935269764124, "learning_rate": 4.498548235806719e-06, "loss": 0.1552, "step": 2658 }, { "epoch": 0.24499009536094346, "grad_norm": 0.8736993468630774, "learning_rate": 4.4980904840598894e-06, "loss": 0.1552, "step": 2659 }, { "epoch": 0.24508223153821349, "grad_norm": 0.9081273596019058, "learning_rate": 4.497632546790143e-06, "loss": 0.1512, "step": 2660 }, { "epoch": 0.24517436771548348, "grad_norm": 0.9102031151269082, "learning_rate": 4.49717442404e-06, "loss": 0.1595, "step": 2661 }, { "epoch": 0.24526650389275348, "grad_norm": 0.9021316326942003, "learning_rate": 4.496716115851996e-06, "loss": 0.1717, "step": 2662 }, { "epoch": 0.2453586400700235, "grad_norm": 0.9719049140168042, "learning_rate": 4.496257622268687e-06, "loss": 0.1615, "step": 2663 }, { "epoch": 0.2454507762472935, "grad_norm": 0.9727092079229075, "learning_rate": 4.495798943332642e-06, "loss": 0.1579, "step": 2664 }, { "epoch": 0.2455429124245635, "grad_norm": 0.8986708563300652, "learning_rate": 4.495340079086451e-06, "loss": 0.169, "step": 2665 }, { "epoch": 0.24563504860183352, "grad_norm": 0.949471301620958, "learning_rate": 4.494881029572718e-06, "loss": 0.1704, "step": 2666 }, { "epoch": 0.24572718477910352, "grad_norm": 0.9152647484984884, "learning_rate": 4.494421794834068e-06, "loss": 0.1605, "step": 2667 }, { "epoch": 0.24581932095637352, "grad_norm": 1.0020699225934224, "learning_rate": 4.4939623749131385e-06, "loss": 0.1787, "step": 2668 }, { "epoch": 0.2459114571336435, "grad_norm": 0.9225724208528915, "learning_rate": 4.493502769852589e-06, "loss": 0.1645, "step": 2669 }, { "epoch": 0.24600359331091354, "grad_norm": 0.9282588859001433, "learning_rate": 4.493042979695092e-06, "loss": 0.1706, "step": 2670 }, { "epoch": 0.24609572948818353, "grad_norm": 0.950681578689861, "learning_rate": 4.4925830044833405e-06, "loss": 0.1632, "step": 2671 }, { "epoch": 0.24618786566545353, "grad_norm": 0.9578740886589785, "learning_rate": 4.492122844260042e-06, "loss": 0.1645, "step": 2672 }, { "epoch": 0.24628000184272356, "grad_norm": 0.9183491188538061, "learning_rate": 4.491662499067923e-06, "loss": 0.1684, "step": 2673 }, { "epoch": 0.24637213801999355, "grad_norm": 0.886920316373167, "learning_rate": 4.491201968949726e-06, "loss": 0.1425, "step": 2674 }, { "epoch": 0.24646427419726355, "grad_norm": 0.9515433294511995, "learning_rate": 4.490741253948213e-06, "loss": 0.1617, "step": 2675 }, { "epoch": 0.24655641037453357, "grad_norm": 0.9513213792540711, "learning_rate": 4.49028035410616e-06, "loss": 0.161, "step": 2676 }, { "epoch": 0.24664854655180357, "grad_norm": 0.9553852848159456, "learning_rate": 4.489819269466362e-06, "loss": 0.1646, "step": 2677 }, { "epoch": 0.24674068272907357, "grad_norm": 1.0489521515871678, "learning_rate": 4.489358000071631e-06, "loss": 0.1591, "step": 2678 }, { "epoch": 0.24683281890634357, "grad_norm": 0.9143172794273362, "learning_rate": 4.488896545964795e-06, "loss": 0.1642, "step": 2679 }, { "epoch": 0.2469249550836136, "grad_norm": 0.8760022568204034, "learning_rate": 4.4884349071887e-06, "loss": 0.1506, "step": 2680 }, { "epoch": 0.2470170912608836, "grad_norm": 0.9208733869067626, "learning_rate": 4.487973083786211e-06, "loss": 0.1668, "step": 2681 }, { "epoch": 0.24710922743815358, "grad_norm": 0.940764657196804, "learning_rate": 4.4875110758002076e-06, "loss": 0.1731, "step": 2682 }, { "epoch": 0.2472013636154236, "grad_norm": 0.975836350447163, "learning_rate": 4.487048883273586e-06, "loss": 0.162, "step": 2683 }, { "epoch": 0.2472934997926936, "grad_norm": 0.9104989847581315, "learning_rate": 4.486586506249262e-06, "loss": 0.1656, "step": 2684 }, { "epoch": 0.2473856359699636, "grad_norm": 0.8994565355652719, "learning_rate": 4.486123944770166e-06, "loss": 0.1649, "step": 2685 }, { "epoch": 0.2474777721472336, "grad_norm": 0.9958969115514426, "learning_rate": 4.48566119887925e-06, "loss": 0.1773, "step": 2686 }, { "epoch": 0.24756990832450362, "grad_norm": 0.8853726143372311, "learning_rate": 4.4851982686194775e-06, "loss": 0.1567, "step": 2687 }, { "epoch": 0.24766204450177362, "grad_norm": 0.9261982366343027, "learning_rate": 4.484735154033831e-06, "loss": 0.1593, "step": 2688 }, { "epoch": 0.24775418067904362, "grad_norm": 0.9154568975953823, "learning_rate": 4.484271855165312e-06, "loss": 0.1694, "step": 2689 }, { "epoch": 0.24784631685631364, "grad_norm": 0.9484967646834224, "learning_rate": 4.483808372056939e-06, "loss": 0.1714, "step": 2690 }, { "epoch": 0.24793845303358364, "grad_norm": 0.9634197695354942, "learning_rate": 4.483344704751745e-06, "loss": 0.1791, "step": 2691 }, { "epoch": 0.24803058921085364, "grad_norm": 0.9700386725862131, "learning_rate": 4.48288085329278e-06, "loss": 0.1642, "step": 2692 }, { "epoch": 0.24812272538812366, "grad_norm": 0.8622018190901424, "learning_rate": 4.482416817723115e-06, "loss": 0.1619, "step": 2693 }, { "epoch": 0.24821486156539366, "grad_norm": 0.9328281803476557, "learning_rate": 4.481952598085836e-06, "loss": 0.1816, "step": 2694 }, { "epoch": 0.24830699774266365, "grad_norm": 0.8818479936219914, "learning_rate": 4.481488194424044e-06, "loss": 0.1502, "step": 2695 }, { "epoch": 0.24839913391993365, "grad_norm": 0.9726880839105281, "learning_rate": 4.481023606780861e-06, "loss": 0.1681, "step": 2696 }, { "epoch": 0.24849127009720368, "grad_norm": 0.9194992416431299, "learning_rate": 4.480558835199422e-06, "loss": 0.1611, "step": 2697 }, { "epoch": 0.24858340627447367, "grad_norm": 0.8483395943971769, "learning_rate": 4.4800938797228825e-06, "loss": 0.1508, "step": 2698 }, { "epoch": 0.24867554245174367, "grad_norm": 0.863379180784911, "learning_rate": 4.479628740394412e-06, "loss": 0.1424, "step": 2699 }, { "epoch": 0.2487676786290137, "grad_norm": 0.918067359903417, "learning_rate": 4.4791634172572015e-06, "loss": 0.1557, "step": 2700 }, { "epoch": 0.2488598148062837, "grad_norm": 0.9248335136551753, "learning_rate": 4.478697910354455e-06, "loss": 0.1647, "step": 2701 }, { "epoch": 0.2489519509835537, "grad_norm": 0.9227062178559122, "learning_rate": 4.4782322197293935e-06, "loss": 0.1508, "step": 2702 }, { "epoch": 0.24904408716082369, "grad_norm": 0.9355481138879411, "learning_rate": 4.477766345425257e-06, "loss": 0.1669, "step": 2703 }, { "epoch": 0.2491362233380937, "grad_norm": 0.9262215971301258, "learning_rate": 4.4773002874853035e-06, "loss": 0.166, "step": 2704 }, { "epoch": 0.2492283595153637, "grad_norm": 0.939773739473119, "learning_rate": 4.476834045952805e-06, "loss": 0.1692, "step": 2705 }, { "epoch": 0.2493204956926337, "grad_norm": 0.9233835051003554, "learning_rate": 4.476367620871053e-06, "loss": 0.1678, "step": 2706 }, { "epoch": 0.24941263186990373, "grad_norm": 0.9959179911656888, "learning_rate": 4.475901012283354e-06, "loss": 0.1719, "step": 2707 }, { "epoch": 0.24950476804717373, "grad_norm": 0.9906403922433501, "learning_rate": 4.475434220233034e-06, "loss": 0.171, "step": 2708 }, { "epoch": 0.24959690422444372, "grad_norm": 0.9654043551251009, "learning_rate": 4.474967244763434e-06, "loss": 0.1524, "step": 2709 }, { "epoch": 0.24968904040171375, "grad_norm": 0.8859687185368176, "learning_rate": 4.474500085917912e-06, "loss": 0.1626, "step": 2710 }, { "epoch": 0.24978117657898374, "grad_norm": 0.9742371995625955, "learning_rate": 4.474032743739846e-06, "loss": 0.1681, "step": 2711 }, { "epoch": 0.24987331275625374, "grad_norm": 0.8800994362044134, "learning_rate": 4.4735652182726265e-06, "loss": 0.1534, "step": 2712 }, { "epoch": 0.24996544893352374, "grad_norm": 0.9751280188862264, "learning_rate": 4.473097509559664e-06, "loss": 0.1629, "step": 2713 }, { "epoch": 0.25005758511079373, "grad_norm": 0.9597390984328364, "learning_rate": 4.472629617644385e-06, "loss": 0.1634, "step": 2714 }, { "epoch": 0.25014972128806373, "grad_norm": 0.8782469623492604, "learning_rate": 4.472161542570234e-06, "loss": 0.1465, "step": 2715 }, { "epoch": 0.2502418574653338, "grad_norm": 1.0126355329165513, "learning_rate": 4.4716932843806715e-06, "loss": 0.1885, "step": 2716 }, { "epoch": 0.2503339936426038, "grad_norm": 0.9689860924375248, "learning_rate": 4.471224843119176e-06, "loss": 0.1622, "step": 2717 }, { "epoch": 0.2504261298198738, "grad_norm": 1.0502120884934425, "learning_rate": 4.470756218829241e-06, "loss": 0.1806, "step": 2718 }, { "epoch": 0.2505182659971438, "grad_norm": 0.8987777249287304, "learning_rate": 4.470287411554379e-06, "loss": 0.1517, "step": 2719 }, { "epoch": 0.25061040217441377, "grad_norm": 1.0238603313699401, "learning_rate": 4.469818421338119e-06, "loss": 0.1562, "step": 2720 }, { "epoch": 0.25070253835168377, "grad_norm": 1.0334880871837124, "learning_rate": 4.469349248224007e-06, "loss": 0.1943, "step": 2721 }, { "epoch": 0.2507946745289538, "grad_norm": 0.9433556577855376, "learning_rate": 4.468879892255604e-06, "loss": 0.155, "step": 2722 }, { "epoch": 0.2508868107062238, "grad_norm": 0.9291327476122262, "learning_rate": 4.4684103534764925e-06, "loss": 0.1684, "step": 2723 }, { "epoch": 0.2509789468834938, "grad_norm": 0.9294984576948387, "learning_rate": 4.467940631930267e-06, "loss": 0.1643, "step": 2724 }, { "epoch": 0.2510710830607638, "grad_norm": 0.8568728555781507, "learning_rate": 4.467470727660543e-06, "loss": 0.161, "step": 2725 }, { "epoch": 0.2511632192380338, "grad_norm": 0.9282762970010767, "learning_rate": 4.467000640710949e-06, "loss": 0.1657, "step": 2726 }, { "epoch": 0.2512553554153038, "grad_norm": 0.9137764648161802, "learning_rate": 4.466530371125135e-06, "loss": 0.1627, "step": 2727 }, { "epoch": 0.2513474915925738, "grad_norm": 0.9720124135320236, "learning_rate": 4.4660599189467634e-06, "loss": 0.1748, "step": 2728 }, { "epoch": 0.25143962776984385, "grad_norm": 1.0387321685221882, "learning_rate": 4.465589284219517e-06, "loss": 0.1823, "step": 2729 }, { "epoch": 0.25153176394711385, "grad_norm": 0.953514571101409, "learning_rate": 4.465118466987094e-06, "loss": 0.1749, "step": 2730 }, { "epoch": 0.25162390012438385, "grad_norm": 0.9653536317377608, "learning_rate": 4.4646474672932105e-06, "loss": 0.1688, "step": 2731 }, { "epoch": 0.25171603630165384, "grad_norm": 0.9214114453191518, "learning_rate": 4.464176285181597e-06, "loss": 0.1707, "step": 2732 }, { "epoch": 0.25180817247892384, "grad_norm": 0.9310991093879136, "learning_rate": 4.4637049206960055e-06, "loss": 0.1664, "step": 2733 }, { "epoch": 0.25190030865619384, "grad_norm": 0.8764637830155427, "learning_rate": 4.4632333738802e-06, "loss": 0.1636, "step": 2734 }, { "epoch": 0.25199244483346384, "grad_norm": 0.8625927177967262, "learning_rate": 4.462761644777964e-06, "loss": 0.1619, "step": 2735 }, { "epoch": 0.2520845810107339, "grad_norm": 0.90143610722121, "learning_rate": 4.4622897334330985e-06, "loss": 0.1643, "step": 2736 }, { "epoch": 0.2521767171880039, "grad_norm": 0.9695051236534774, "learning_rate": 4.4618176398894205e-06, "loss": 0.1621, "step": 2737 }, { "epoch": 0.2522688533652739, "grad_norm": 0.9623621029230007, "learning_rate": 4.4613453641907634e-06, "loss": 0.169, "step": 2738 }, { "epoch": 0.2523609895425439, "grad_norm": 0.9804724158084682, "learning_rate": 4.460872906380977e-06, "loss": 0.1667, "step": 2739 }, { "epoch": 0.2524531257198139, "grad_norm": 0.9440309923131297, "learning_rate": 4.460400266503932e-06, "loss": 0.1788, "step": 2740 }, { "epoch": 0.2525452618970839, "grad_norm": 0.8943689360394186, "learning_rate": 4.4599274446035104e-06, "loss": 0.1634, "step": 2741 }, { "epoch": 0.25263739807435387, "grad_norm": 0.9078445150328539, "learning_rate": 4.459454440723614e-06, "loss": 0.1674, "step": 2742 }, { "epoch": 0.2527295342516239, "grad_norm": 1.0362519468921287, "learning_rate": 4.4589812549081624e-06, "loss": 0.1754, "step": 2743 }, { "epoch": 0.2528216704288939, "grad_norm": 0.9085550271912034, "learning_rate": 4.458507887201091e-06, "loss": 0.1622, "step": 2744 }, { "epoch": 0.2529138066061639, "grad_norm": 0.9723393303756631, "learning_rate": 4.458034337646351e-06, "loss": 0.1717, "step": 2745 }, { "epoch": 0.2530059427834339, "grad_norm": 0.8992954032616152, "learning_rate": 4.4575606062879115e-06, "loss": 0.1591, "step": 2746 }, { "epoch": 0.2530980789607039, "grad_norm": 0.94354265969286, "learning_rate": 4.45708669316976e-06, "loss": 0.167, "step": 2747 }, { "epoch": 0.2531902151379739, "grad_norm": 0.9230334147360164, "learning_rate": 4.456612598335898e-06, "loss": 0.1684, "step": 2748 }, { "epoch": 0.2532823513152439, "grad_norm": 0.8605421993979968, "learning_rate": 4.4561383218303455e-06, "loss": 0.1448, "step": 2749 }, { "epoch": 0.25337448749251396, "grad_norm": 0.9734131686816277, "learning_rate": 4.45566386369714e-06, "loss": 0.1796, "step": 2750 }, { "epoch": 0.25346662366978395, "grad_norm": 0.9752093359656303, "learning_rate": 4.455189223980333e-06, "loss": 0.1666, "step": 2751 }, { "epoch": 0.25355875984705395, "grad_norm": 0.9671629645895793, "learning_rate": 4.454714402723997e-06, "loss": 0.1672, "step": 2752 }, { "epoch": 0.25365089602432395, "grad_norm": 0.9912354137997301, "learning_rate": 4.4542393999722184e-06, "loss": 0.1749, "step": 2753 }, { "epoch": 0.25374303220159394, "grad_norm": 0.9629544614386797, "learning_rate": 4.453764215769101e-06, "loss": 0.1617, "step": 2754 }, { "epoch": 0.25383516837886394, "grad_norm": 0.903837560697107, "learning_rate": 4.4532888501587655e-06, "loss": 0.1691, "step": 2755 }, { "epoch": 0.253927304556134, "grad_norm": 0.9369456487961056, "learning_rate": 4.452813303185351e-06, "loss": 0.1628, "step": 2756 }, { "epoch": 0.254019440733404, "grad_norm": 0.9460665893734913, "learning_rate": 4.452337574893011e-06, "loss": 0.1602, "step": 2757 }, { "epoch": 0.254111576910674, "grad_norm": 0.8699779142398438, "learning_rate": 4.451861665325916e-06, "loss": 0.1473, "step": 2758 }, { "epoch": 0.254203713087944, "grad_norm": 0.9002005591816477, "learning_rate": 4.451385574528256e-06, "loss": 0.1511, "step": 2759 }, { "epoch": 0.254295849265214, "grad_norm": 0.9945308777773249, "learning_rate": 4.450909302544235e-06, "loss": 0.1645, "step": 2760 }, { "epoch": 0.254387985442484, "grad_norm": 0.9477308554562681, "learning_rate": 4.450432849418076e-06, "loss": 0.1698, "step": 2761 }, { "epoch": 0.254480121619754, "grad_norm": 0.975609887655622, "learning_rate": 4.449956215194017e-06, "loss": 0.1733, "step": 2762 }, { "epoch": 0.254572257797024, "grad_norm": 1.045564596726802, "learning_rate": 4.4494793999163125e-06, "loss": 0.1485, "step": 2763 }, { "epoch": 0.254664393974294, "grad_norm": 0.9691806406972214, "learning_rate": 4.449002403629237e-06, "loss": 0.1715, "step": 2764 }, { "epoch": 0.254756530151564, "grad_norm": 0.8882406299274819, "learning_rate": 4.448525226377078e-06, "loss": 0.1599, "step": 2765 }, { "epoch": 0.254848666328834, "grad_norm": 1.0071041342757574, "learning_rate": 4.448047868204143e-06, "loss": 0.1663, "step": 2766 }, { "epoch": 0.254940802506104, "grad_norm": 0.9867383224082461, "learning_rate": 4.447570329154752e-06, "loss": 0.159, "step": 2767 }, { "epoch": 0.255032938683374, "grad_norm": 0.9594025105717243, "learning_rate": 4.447092609273248e-06, "loss": 0.1672, "step": 2768 }, { "epoch": 0.255125074860644, "grad_norm": 0.9711693026778082, "learning_rate": 4.446614708603985e-06, "loss": 0.175, "step": 2769 }, { "epoch": 0.25521721103791406, "grad_norm": 0.9495706212576188, "learning_rate": 4.446136627191337e-06, "loss": 0.1688, "step": 2770 }, { "epoch": 0.25530934721518406, "grad_norm": 0.8719052281328445, "learning_rate": 4.445658365079693e-06, "loss": 0.1548, "step": 2771 }, { "epoch": 0.25540148339245405, "grad_norm": 0.9026874716661581, "learning_rate": 4.4451799223134615e-06, "loss": 0.1588, "step": 2772 }, { "epoch": 0.25549361956972405, "grad_norm": 0.9205413159506892, "learning_rate": 4.444701298937064e-06, "loss": 0.1647, "step": 2773 }, { "epoch": 0.25558575574699405, "grad_norm": 0.8853201950547462, "learning_rate": 4.444222494994942e-06, "loss": 0.1607, "step": 2774 }, { "epoch": 0.25567789192426404, "grad_norm": 0.9112603810645689, "learning_rate": 4.443743510531552e-06, "loss": 0.1581, "step": 2775 }, { "epoch": 0.25577002810153404, "grad_norm": 0.9430372314730556, "learning_rate": 4.443264345591368e-06, "loss": 0.1602, "step": 2776 }, { "epoch": 0.2558621642788041, "grad_norm": 0.9940607095386159, "learning_rate": 4.442785000218881e-06, "loss": 0.1669, "step": 2777 }, { "epoch": 0.2559543004560741, "grad_norm": 0.874978128077645, "learning_rate": 4.442305474458596e-06, "loss": 0.1598, "step": 2778 }, { "epoch": 0.2560464366333441, "grad_norm": 0.870158754636262, "learning_rate": 4.4418257683550405e-06, "loss": 0.1537, "step": 2779 }, { "epoch": 0.2561385728106141, "grad_norm": 0.9790466686822947, "learning_rate": 4.441345881952752e-06, "loss": 0.169, "step": 2780 }, { "epoch": 0.2562307089878841, "grad_norm": 1.0139628897985031, "learning_rate": 4.44086581529629e-06, "loss": 0.1769, "step": 2781 }, { "epoch": 0.2563228451651541, "grad_norm": 0.9463222270409243, "learning_rate": 4.440385568430228e-06, "loss": 0.1544, "step": 2782 }, { "epoch": 0.2564149813424241, "grad_norm": 0.9747904707531557, "learning_rate": 4.439905141399157e-06, "loss": 0.1797, "step": 2783 }, { "epoch": 0.25650711751969413, "grad_norm": 0.9977583516525407, "learning_rate": 4.439424534247686e-06, "loss": 0.162, "step": 2784 }, { "epoch": 0.2565992536969641, "grad_norm": 1.009039762592302, "learning_rate": 4.438943747020437e-06, "loss": 0.1606, "step": 2785 }, { "epoch": 0.2566913898742341, "grad_norm": 0.917807551416993, "learning_rate": 4.438462779762052e-06, "loss": 0.1589, "step": 2786 }, { "epoch": 0.2567835260515041, "grad_norm": 1.0105522924525019, "learning_rate": 4.437981632517191e-06, "loss": 0.1691, "step": 2787 }, { "epoch": 0.2568756622287741, "grad_norm": 0.8741885257091299, "learning_rate": 4.437500305330526e-06, "loss": 0.1627, "step": 2788 }, { "epoch": 0.2569677984060441, "grad_norm": 0.9656983545728868, "learning_rate": 4.437018798246749e-06, "loss": 0.1533, "step": 2789 }, { "epoch": 0.25705993458331416, "grad_norm": 0.9585997069738847, "learning_rate": 4.436537111310568e-06, "loss": 0.1806, "step": 2790 }, { "epoch": 0.25715207076058416, "grad_norm": 0.891640608831986, "learning_rate": 4.436055244566708e-06, "loss": 0.1585, "step": 2791 }, { "epoch": 0.25724420693785416, "grad_norm": 0.9842157286945457, "learning_rate": 4.4355731980599105e-06, "loss": 0.1579, "step": 2792 }, { "epoch": 0.25733634311512416, "grad_norm": 1.0030576180364315, "learning_rate": 4.435090971834933e-06, "loss": 0.1709, "step": 2793 }, { "epoch": 0.25742847929239415, "grad_norm": 0.9517782001478897, "learning_rate": 4.43460856593655e-06, "loss": 0.1718, "step": 2794 }, { "epoch": 0.25752061546966415, "grad_norm": 0.8794981685031267, "learning_rate": 4.434125980409553e-06, "loss": 0.1562, "step": 2795 }, { "epoch": 0.25761275164693415, "grad_norm": 0.8812764377019595, "learning_rate": 4.433643215298753e-06, "loss": 0.1577, "step": 2796 }, { "epoch": 0.2577048878242042, "grad_norm": 0.8945685024582577, "learning_rate": 4.433160270648971e-06, "loss": 0.1613, "step": 2797 }, { "epoch": 0.2577970240014742, "grad_norm": 0.8853629308772607, "learning_rate": 4.432677146505049e-06, "loss": 0.1643, "step": 2798 }, { "epoch": 0.2578891601787442, "grad_norm": 0.9325081834935269, "learning_rate": 4.432193842911846e-06, "loss": 0.1807, "step": 2799 }, { "epoch": 0.2579812963560142, "grad_norm": 0.9434955095871754, "learning_rate": 4.431710359914238e-06, "loss": 0.1653, "step": 2800 }, { "epoch": 0.2580734325332842, "grad_norm": 0.9005501179330212, "learning_rate": 4.4312266975571145e-06, "loss": 0.1608, "step": 2801 }, { "epoch": 0.2581655687105542, "grad_norm": 0.9051611437276726, "learning_rate": 4.430742855885384e-06, "loss": 0.1595, "step": 2802 }, { "epoch": 0.2582577048878242, "grad_norm": 0.8541816832770728, "learning_rate": 4.430258834943973e-06, "loss": 0.1435, "step": 2803 }, { "epoch": 0.25834984106509423, "grad_norm": 0.8819731313821468, "learning_rate": 4.429774634777819e-06, "loss": 0.1708, "step": 2804 }, { "epoch": 0.25844197724236423, "grad_norm": 0.8660324638072526, "learning_rate": 4.429290255431884e-06, "loss": 0.1548, "step": 2805 }, { "epoch": 0.2585341134196342, "grad_norm": 0.9404639070221896, "learning_rate": 4.428805696951141e-06, "loss": 0.1691, "step": 2806 }, { "epoch": 0.2586262495969042, "grad_norm": 0.8740371398676113, "learning_rate": 4.428320959380581e-06, "loss": 0.1684, "step": 2807 }, { "epoch": 0.2587183857741742, "grad_norm": 0.8545366276085776, "learning_rate": 4.427836042765213e-06, "loss": 0.1448, "step": 2808 }, { "epoch": 0.2588105219514442, "grad_norm": 0.8662838141494884, "learning_rate": 4.4273509471500606e-06, "loss": 0.148, "step": 2809 }, { "epoch": 0.2589026581287142, "grad_norm": 0.9313287920052562, "learning_rate": 4.426865672580166e-06, "loss": 0.1506, "step": 2810 }, { "epoch": 0.25899479430598427, "grad_norm": 0.8542806501069565, "learning_rate": 4.426380219100585e-06, "loss": 0.1531, "step": 2811 }, { "epoch": 0.25908693048325426, "grad_norm": 1.0367999062687907, "learning_rate": 4.425894586756394e-06, "loss": 0.196, "step": 2812 }, { "epoch": 0.25917906666052426, "grad_norm": 0.9720196319247114, "learning_rate": 4.425408775592684e-06, "loss": 0.1574, "step": 2813 }, { "epoch": 0.25927120283779426, "grad_norm": 0.8971796959776257, "learning_rate": 4.424922785654561e-06, "loss": 0.1557, "step": 2814 }, { "epoch": 0.25936333901506425, "grad_norm": 0.8716438967042835, "learning_rate": 4.424436616987151e-06, "loss": 0.1534, "step": 2815 }, { "epoch": 0.25945547519233425, "grad_norm": 0.9366795506332969, "learning_rate": 4.423950269635594e-06, "loss": 0.1722, "step": 2816 }, { "epoch": 0.25954761136960425, "grad_norm": 0.8750053562323363, "learning_rate": 4.4234637436450465e-06, "loss": 0.1667, "step": 2817 }, { "epoch": 0.2596397475468743, "grad_norm": 0.8688375268172122, "learning_rate": 4.422977039060684e-06, "loss": 0.1551, "step": 2818 }, { "epoch": 0.2597318837241443, "grad_norm": 0.9164926259231396, "learning_rate": 4.422490155927696e-06, "loss": 0.1547, "step": 2819 }, { "epoch": 0.2598240199014143, "grad_norm": 0.8784819223059159, "learning_rate": 4.422003094291291e-06, "loss": 0.1553, "step": 2820 }, { "epoch": 0.2599161560786843, "grad_norm": 0.8480220147126285, "learning_rate": 4.421515854196692e-06, "loss": 0.1421, "step": 2821 }, { "epoch": 0.2600082922559543, "grad_norm": 0.9812963150064519, "learning_rate": 4.421028435689138e-06, "loss": 0.1739, "step": 2822 }, { "epoch": 0.2601004284332243, "grad_norm": 0.966210027789589, "learning_rate": 4.420540838813887e-06, "loss": 0.1726, "step": 2823 }, { "epoch": 0.26019256461049434, "grad_norm": 0.8640742533975352, "learning_rate": 4.420053063616214e-06, "loss": 0.1597, "step": 2824 }, { "epoch": 0.26028470078776433, "grad_norm": 0.9021044826230841, "learning_rate": 4.419565110141406e-06, "loss": 0.1669, "step": 2825 }, { "epoch": 0.26037683696503433, "grad_norm": 0.8744446068310874, "learning_rate": 4.419076978434771e-06, "loss": 0.1587, "step": 2826 }, { "epoch": 0.2604689731423043, "grad_norm": 0.9002597276951575, "learning_rate": 4.418588668541632e-06, "loss": 0.1496, "step": 2827 }, { "epoch": 0.2605611093195743, "grad_norm": 0.8705763804623955, "learning_rate": 4.41810018050733e-06, "loss": 0.1612, "step": 2828 }, { "epoch": 0.2606532454968443, "grad_norm": 0.8590574476522304, "learning_rate": 4.417611514377218e-06, "loss": 0.1541, "step": 2829 }, { "epoch": 0.2607453816741143, "grad_norm": 0.9420489301767392, "learning_rate": 4.417122670196672e-06, "loss": 0.1662, "step": 2830 }, { "epoch": 0.26083751785138437, "grad_norm": 0.9855215157889966, "learning_rate": 4.416633648011079e-06, "loss": 0.1719, "step": 2831 }, { "epoch": 0.26092965402865437, "grad_norm": 0.9015482825979495, "learning_rate": 4.416144447865845e-06, "loss": 0.1594, "step": 2832 }, { "epoch": 0.26102179020592436, "grad_norm": 0.9186528341173594, "learning_rate": 4.4156550698063935e-06, "loss": 0.1503, "step": 2833 }, { "epoch": 0.26111392638319436, "grad_norm": 0.888306584206573, "learning_rate": 4.4151655138781625e-06, "loss": 0.1477, "step": 2834 }, { "epoch": 0.26120606256046436, "grad_norm": 1.0019447633808933, "learning_rate": 4.414675780126607e-06, "loss": 0.1655, "step": 2835 }, { "epoch": 0.26129819873773436, "grad_norm": 0.9867728137209706, "learning_rate": 4.4141858685972e-06, "loss": 0.1709, "step": 2836 }, { "epoch": 0.26139033491500435, "grad_norm": 0.9522319296236781, "learning_rate": 4.413695779335428e-06, "loss": 0.1604, "step": 2837 }, { "epoch": 0.2614824710922744, "grad_norm": 0.9150332712571578, "learning_rate": 4.413205512386798e-06, "loss": 0.1683, "step": 2838 }, { "epoch": 0.2615746072695444, "grad_norm": 0.9783738508165877, "learning_rate": 4.41271506779683e-06, "loss": 0.1781, "step": 2839 }, { "epoch": 0.2616667434468144, "grad_norm": 0.8955606749537997, "learning_rate": 4.412224445611062e-06, "loss": 0.1636, "step": 2840 }, { "epoch": 0.2617588796240844, "grad_norm": 0.8984379268924941, "learning_rate": 4.411733645875048e-06, "loss": 0.1586, "step": 2841 }, { "epoch": 0.2618510158013544, "grad_norm": 0.9029965233138934, "learning_rate": 4.41124266863436e-06, "loss": 0.1666, "step": 2842 }, { "epoch": 0.2619431519786244, "grad_norm": 0.9174022488679581, "learning_rate": 4.410751513934585e-06, "loss": 0.1725, "step": 2843 }, { "epoch": 0.2620352881558944, "grad_norm": 0.9756876452431863, "learning_rate": 4.410260181821325e-06, "loss": 0.1763, "step": 2844 }, { "epoch": 0.26212742433316444, "grad_norm": 0.9244170802721333, "learning_rate": 4.409768672340202e-06, "loss": 0.1463, "step": 2845 }, { "epoch": 0.26221956051043444, "grad_norm": 0.9041971028491017, "learning_rate": 4.409276985536852e-06, "loss": 0.173, "step": 2846 }, { "epoch": 0.26231169668770443, "grad_norm": 0.9359224044219522, "learning_rate": 4.408785121456929e-06, "loss": 0.1613, "step": 2847 }, { "epoch": 0.26240383286497443, "grad_norm": 0.8730622181165766, "learning_rate": 4.408293080146101e-06, "loss": 0.1474, "step": 2848 }, { "epoch": 0.2624959690422444, "grad_norm": 0.9145537165320866, "learning_rate": 4.407800861650056e-06, "loss": 0.1586, "step": 2849 }, { "epoch": 0.2625881052195144, "grad_norm": 0.8977352205218089, "learning_rate": 4.407308466014496e-06, "loss": 0.1486, "step": 2850 }, { "epoch": 0.2626802413967845, "grad_norm": 0.9106247178852294, "learning_rate": 4.406815893285139e-06, "loss": 0.1653, "step": 2851 }, { "epoch": 0.26277237757405447, "grad_norm": 1.0115187545849416, "learning_rate": 4.406323143507721e-06, "loss": 0.1612, "step": 2852 }, { "epoch": 0.26286451375132447, "grad_norm": 0.9432111102878478, "learning_rate": 4.405830216727995e-06, "loss": 0.1501, "step": 2853 }, { "epoch": 0.26295664992859447, "grad_norm": 0.8641598652236985, "learning_rate": 4.405337112991728e-06, "loss": 0.1483, "step": 2854 }, { "epoch": 0.26304878610586446, "grad_norm": 0.8641392406260407, "learning_rate": 4.404843832344704e-06, "loss": 0.1653, "step": 2855 }, { "epoch": 0.26314092228313446, "grad_norm": 0.9564993972527854, "learning_rate": 4.404350374832725e-06, "loss": 0.1611, "step": 2856 }, { "epoch": 0.26323305846040446, "grad_norm": 0.8752998536245241, "learning_rate": 4.40385674050161e-06, "loss": 0.1537, "step": 2857 }, { "epoch": 0.2633251946376745, "grad_norm": 0.8546718345682361, "learning_rate": 4.403362929397191e-06, "loss": 0.1559, "step": 2858 }, { "epoch": 0.2634173308149445, "grad_norm": 0.8856269547457143, "learning_rate": 4.40286894156532e-06, "loss": 0.1531, "step": 2859 }, { "epoch": 0.2635094669922145, "grad_norm": 0.8966875055653404, "learning_rate": 4.402374777051862e-06, "loss": 0.1676, "step": 2860 }, { "epoch": 0.2636016031694845, "grad_norm": 0.9282672278947397, "learning_rate": 4.401880435902701e-06, "loss": 0.1676, "step": 2861 }, { "epoch": 0.2636937393467545, "grad_norm": 1.0165990167745647, "learning_rate": 4.401385918163737e-06, "loss": 0.1859, "step": 2862 }, { "epoch": 0.2637858755240245, "grad_norm": 0.8764188958015945, "learning_rate": 4.400891223880888e-06, "loss": 0.1488, "step": 2863 }, { "epoch": 0.2638780117012945, "grad_norm": 0.8798735349276867, "learning_rate": 4.400396353100081e-06, "loss": 0.1599, "step": 2864 }, { "epoch": 0.26397014787856454, "grad_norm": 0.9295437632531321, "learning_rate": 4.39990130586727e-06, "loss": 0.1597, "step": 2865 }, { "epoch": 0.26406228405583454, "grad_norm": 0.8596446043652288, "learning_rate": 4.399406082228418e-06, "loss": 0.1533, "step": 2866 }, { "epoch": 0.26415442023310454, "grad_norm": 0.9534196281125018, "learning_rate": 4.398910682229507e-06, "loss": 0.1718, "step": 2867 }, { "epoch": 0.26424655641037453, "grad_norm": 0.9279560321709486, "learning_rate": 4.398415105916535e-06, "loss": 0.1646, "step": 2868 }, { "epoch": 0.26433869258764453, "grad_norm": 0.8749195173976764, "learning_rate": 4.397919353335516e-06, "loss": 0.1513, "step": 2869 }, { "epoch": 0.2644308287649145, "grad_norm": 0.9097910090711311, "learning_rate": 4.3974234245324795e-06, "loss": 0.1705, "step": 2870 }, { "epoch": 0.2645229649421845, "grad_norm": 0.9171732739684699, "learning_rate": 4.396927319553476e-06, "loss": 0.1718, "step": 2871 }, { "epoch": 0.2646151011194546, "grad_norm": 0.8748961263126276, "learning_rate": 4.396431038444565e-06, "loss": 0.1576, "step": 2872 }, { "epoch": 0.2647072372967246, "grad_norm": 0.9224132153233574, "learning_rate": 4.3959345812518285e-06, "loss": 0.1666, "step": 2873 }, { "epoch": 0.26479937347399457, "grad_norm": 0.943886286209004, "learning_rate": 4.395437948021362e-06, "loss": 0.1623, "step": 2874 }, { "epoch": 0.26489150965126457, "grad_norm": 0.9233832263520497, "learning_rate": 4.394941138799278e-06, "loss": 0.1654, "step": 2875 }, { "epoch": 0.26498364582853456, "grad_norm": 0.8726674273082234, "learning_rate": 4.3944441536317055e-06, "loss": 0.1425, "step": 2876 }, { "epoch": 0.26507578200580456, "grad_norm": 1.01831125875492, "learning_rate": 4.3939469925647895e-06, "loss": 0.1705, "step": 2877 }, { "epoch": 0.26516791818307456, "grad_norm": 0.8873960103462912, "learning_rate": 4.3934496556446916e-06, "loss": 0.1475, "step": 2878 }, { "epoch": 0.2652600543603446, "grad_norm": 0.890705716689305, "learning_rate": 4.3929521429175895e-06, "loss": 0.1523, "step": 2879 }, { "epoch": 0.2653521905376146, "grad_norm": 0.8998449017070324, "learning_rate": 4.392454454429676e-06, "loss": 0.1703, "step": 2880 }, { "epoch": 0.2654443267148846, "grad_norm": 0.8815761058067845, "learning_rate": 4.391956590227164e-06, "loss": 0.1603, "step": 2881 }, { "epoch": 0.2655364628921546, "grad_norm": 0.9434933743462255, "learning_rate": 4.391458550356278e-06, "loss": 0.1619, "step": 2882 }, { "epoch": 0.2656285990694246, "grad_norm": 0.9342716887434068, "learning_rate": 4.390960334863263e-06, "loss": 0.1533, "step": 2883 }, { "epoch": 0.2657207352466946, "grad_norm": 0.9839182352177902, "learning_rate": 4.390461943794377e-06, "loss": 0.1775, "step": 2884 }, { "epoch": 0.26581287142396465, "grad_norm": 0.8677512839404379, "learning_rate": 4.389963377195896e-06, "loss": 0.162, "step": 2885 }, { "epoch": 0.26590500760123464, "grad_norm": 0.886479980018512, "learning_rate": 4.389464635114112e-06, "loss": 0.1563, "step": 2886 }, { "epoch": 0.26599714377850464, "grad_norm": 0.9815790472166931, "learning_rate": 4.388965717595334e-06, "loss": 0.1867, "step": 2887 }, { "epoch": 0.26608927995577464, "grad_norm": 0.8801062088344992, "learning_rate": 4.3884666246858846e-06, "loss": 0.1565, "step": 2888 }, { "epoch": 0.26618141613304463, "grad_norm": 0.9784498336027216, "learning_rate": 4.387967356432107e-06, "loss": 0.17, "step": 2889 }, { "epoch": 0.26627355231031463, "grad_norm": 0.9091918449537576, "learning_rate": 4.3874679128803565e-06, "loss": 0.1535, "step": 2890 }, { "epoch": 0.26636568848758463, "grad_norm": 0.9058091255127974, "learning_rate": 4.386968294077007e-06, "loss": 0.1599, "step": 2891 }, { "epoch": 0.2664578246648547, "grad_norm": 0.9314719924346809, "learning_rate": 4.386468500068449e-06, "loss": 0.1663, "step": 2892 }, { "epoch": 0.2665499608421247, "grad_norm": 0.9439398416546473, "learning_rate": 4.385968530901087e-06, "loss": 0.1737, "step": 2893 }, { "epoch": 0.2666420970193947, "grad_norm": 0.9627391950050143, "learning_rate": 4.3854683866213445e-06, "loss": 0.1613, "step": 2894 }, { "epoch": 0.26673423319666467, "grad_norm": 0.9709489598335683, "learning_rate": 4.384968067275659e-06, "loss": 0.1749, "step": 2895 }, { "epoch": 0.26682636937393467, "grad_norm": 0.9876304597584598, "learning_rate": 4.384467572910486e-06, "loss": 0.1744, "step": 2896 }, { "epoch": 0.26691850555120467, "grad_norm": 0.9806142595268034, "learning_rate": 4.383966903572295e-06, "loss": 0.1669, "step": 2897 }, { "epoch": 0.26701064172847466, "grad_norm": 0.8390793688951929, "learning_rate": 4.383466059307576e-06, "loss": 0.1529, "step": 2898 }, { "epoch": 0.2671027779057447, "grad_norm": 0.9378584287300362, "learning_rate": 4.382965040162829e-06, "loss": 0.1678, "step": 2899 }, { "epoch": 0.2671949140830147, "grad_norm": 0.9185936272777794, "learning_rate": 4.3824638461845764e-06, "loss": 0.1624, "step": 2900 }, { "epoch": 0.2672870502602847, "grad_norm": 0.8849129207015696, "learning_rate": 4.381962477419352e-06, "loss": 0.1621, "step": 2901 }, { "epoch": 0.2673791864375547, "grad_norm": 1.020300634639128, "learning_rate": 4.3814609339137105e-06, "loss": 0.1697, "step": 2902 }, { "epoch": 0.2674713226148247, "grad_norm": 0.9478899087466803, "learning_rate": 4.380959215714218e-06, "loss": 0.1625, "step": 2903 }, { "epoch": 0.2675634587920947, "grad_norm": 1.0037603195239564, "learning_rate": 4.380457322867461e-06, "loss": 0.1615, "step": 2904 }, { "epoch": 0.2676555949693647, "grad_norm": 0.9466541604314458, "learning_rate": 4.379955255420037e-06, "loss": 0.1636, "step": 2905 }, { "epoch": 0.26774773114663475, "grad_norm": 0.8951466466126106, "learning_rate": 4.379453013418567e-06, "loss": 0.1722, "step": 2906 }, { "epoch": 0.26783986732390475, "grad_norm": 0.9318669615219027, "learning_rate": 4.378950596909683e-06, "loss": 0.1661, "step": 2907 }, { "epoch": 0.26793200350117474, "grad_norm": 1.0399320939535317, "learning_rate": 4.378448005940031e-06, "loss": 0.1661, "step": 2908 }, { "epoch": 0.26802413967844474, "grad_norm": 1.0157712583133751, "learning_rate": 4.377945240556282e-06, "loss": 0.1779, "step": 2909 }, { "epoch": 0.26811627585571474, "grad_norm": 0.9301603867530266, "learning_rate": 4.3774423008051145e-06, "loss": 0.1634, "step": 2910 }, { "epoch": 0.26820841203298473, "grad_norm": 0.9296110650376944, "learning_rate": 4.376939186733227e-06, "loss": 0.1516, "step": 2911 }, { "epoch": 0.26830054821025473, "grad_norm": 1.0043841351727716, "learning_rate": 4.376435898387334e-06, "loss": 0.1632, "step": 2912 }, { "epoch": 0.2683926843875248, "grad_norm": 0.9963071461738237, "learning_rate": 4.375932435814167e-06, "loss": 0.1846, "step": 2913 }, { "epoch": 0.2684848205647948, "grad_norm": 0.9703723999360877, "learning_rate": 4.37542879906047e-06, "loss": 0.1675, "step": 2914 }, { "epoch": 0.2685769567420648, "grad_norm": 0.8830939679657934, "learning_rate": 4.374924988173008e-06, "loss": 0.1608, "step": 2915 }, { "epoch": 0.2686690929193348, "grad_norm": 0.9540176107967079, "learning_rate": 4.374421003198559e-06, "loss": 0.1575, "step": 2916 }, { "epoch": 0.26876122909660477, "grad_norm": 0.9928441377656393, "learning_rate": 4.373916844183918e-06, "loss": 0.1587, "step": 2917 }, { "epoch": 0.26885336527387477, "grad_norm": 0.9455759756236314, "learning_rate": 4.373412511175897e-06, "loss": 0.1575, "step": 2918 }, { "epoch": 0.2689455014511448, "grad_norm": 0.9637716298737328, "learning_rate": 4.372908004221322e-06, "loss": 0.1687, "step": 2919 }, { "epoch": 0.2690376376284148, "grad_norm": 0.9425788092717383, "learning_rate": 4.372403323367037e-06, "loss": 0.1538, "step": 2920 }, { "epoch": 0.2691297738056848, "grad_norm": 0.935187533150534, "learning_rate": 4.371898468659903e-06, "loss": 0.1701, "step": 2921 }, { "epoch": 0.2692219099829548, "grad_norm": 0.9553637112591826, "learning_rate": 4.371393440146794e-06, "loss": 0.1654, "step": 2922 }, { "epoch": 0.2693140461602248, "grad_norm": 0.9022261913400997, "learning_rate": 4.370888237874602e-06, "loss": 0.1494, "step": 2923 }, { "epoch": 0.2694061823374948, "grad_norm": 0.8851623605601451, "learning_rate": 4.370382861890237e-06, "loss": 0.1459, "step": 2924 }, { "epoch": 0.2694983185147648, "grad_norm": 0.9899899922542382, "learning_rate": 4.369877312240621e-06, "loss": 0.17, "step": 2925 }, { "epoch": 0.26959045469203485, "grad_norm": 0.8982399301119234, "learning_rate": 4.369371588972696e-06, "loss": 0.1581, "step": 2926 }, { "epoch": 0.26968259086930485, "grad_norm": 0.9199727306968912, "learning_rate": 4.368865692133417e-06, "loss": 0.1628, "step": 2927 }, { "epoch": 0.26977472704657485, "grad_norm": 0.916780158787217, "learning_rate": 4.3683596217697585e-06, "loss": 0.1578, "step": 2928 }, { "epoch": 0.26986686322384484, "grad_norm": 1.0499551006536163, "learning_rate": 4.367853377928707e-06, "loss": 0.1777, "step": 2929 }, { "epoch": 0.26995899940111484, "grad_norm": 0.9431552116102009, "learning_rate": 4.367346960657269e-06, "loss": 0.1523, "step": 2930 }, { "epoch": 0.27005113557838484, "grad_norm": 0.9014934644247001, "learning_rate": 4.366840370002465e-06, "loss": 0.1602, "step": 2931 }, { "epoch": 0.27014327175565483, "grad_norm": 1.0011043047400634, "learning_rate": 4.366333606011331e-06, "loss": 0.1674, "step": 2932 }, { "epoch": 0.2702354079329249, "grad_norm": 1.0340166384513643, "learning_rate": 4.365826668730921e-06, "loss": 0.1662, "step": 2933 }, { "epoch": 0.2703275441101949, "grad_norm": 0.9098261155448165, "learning_rate": 4.365319558208304e-06, "loss": 0.1656, "step": 2934 }, { "epoch": 0.2704196802874649, "grad_norm": 1.0370324645257154, "learning_rate": 4.3648122744905654e-06, "loss": 0.1918, "step": 2935 }, { "epoch": 0.2705118164647349, "grad_norm": 0.9814751176480726, "learning_rate": 4.364304817624806e-06, "loss": 0.1629, "step": 2936 }, { "epoch": 0.2706039526420049, "grad_norm": 0.8429499591444676, "learning_rate": 4.363797187658144e-06, "loss": 0.1524, "step": 2937 }, { "epoch": 0.27069608881927487, "grad_norm": 1.0208847925208175, "learning_rate": 4.363289384637713e-06, "loss": 0.1677, "step": 2938 }, { "epoch": 0.27078822499654487, "grad_norm": 0.9106234440617202, "learning_rate": 4.362781408610662e-06, "loss": 0.1596, "step": 2939 }, { "epoch": 0.2708803611738149, "grad_norm": 0.8962143929787811, "learning_rate": 4.362273259624156e-06, "loss": 0.1587, "step": 2940 }, { "epoch": 0.2709724973510849, "grad_norm": 0.9695209952030241, "learning_rate": 4.3617649377253775e-06, "loss": 0.1713, "step": 2941 }, { "epoch": 0.2710646335283549, "grad_norm": 0.9189259739380802, "learning_rate": 4.361256442961524e-06, "loss": 0.1703, "step": 2942 }, { "epoch": 0.2711567697056249, "grad_norm": 1.001396554375545, "learning_rate": 4.360747775379811e-06, "loss": 0.1704, "step": 2943 }, { "epoch": 0.2712489058828949, "grad_norm": 0.9123763202978219, "learning_rate": 4.3602389350274656e-06, "loss": 0.1591, "step": 2944 }, { "epoch": 0.2713410420601649, "grad_norm": 0.9308820506862105, "learning_rate": 4.359729921951735e-06, "loss": 0.1668, "step": 2945 }, { "epoch": 0.2714331782374349, "grad_norm": 0.9350597956598543, "learning_rate": 4.3592207361998815e-06, "loss": 0.1692, "step": 2946 }, { "epoch": 0.27152531441470495, "grad_norm": 0.9473956643289153, "learning_rate": 4.358711377819181e-06, "loss": 0.1562, "step": 2947 }, { "epoch": 0.27161745059197495, "grad_norm": 0.9222103923820115, "learning_rate": 4.358201846856931e-06, "loss": 0.1578, "step": 2948 }, { "epoch": 0.27170958676924495, "grad_norm": 0.8718693024219112, "learning_rate": 4.357692143360438e-06, "loss": 0.1602, "step": 2949 }, { "epoch": 0.27180172294651495, "grad_norm": 0.8969897252603595, "learning_rate": 4.35718226737703e-06, "loss": 0.1607, "step": 2950 }, { "epoch": 0.27189385912378494, "grad_norm": 0.9386779288297119, "learning_rate": 4.35667221895405e-06, "loss": 0.1724, "step": 2951 }, { "epoch": 0.27198599530105494, "grad_norm": 0.9094775211356984, "learning_rate": 4.356161998138853e-06, "loss": 0.1724, "step": 2952 }, { "epoch": 0.272078131478325, "grad_norm": 0.8950162107788308, "learning_rate": 4.355651604978815e-06, "loss": 0.1597, "step": 2953 }, { "epoch": 0.272170267655595, "grad_norm": 0.8761295316756618, "learning_rate": 4.355141039521325e-06, "loss": 0.16, "step": 2954 }, { "epoch": 0.272262403832865, "grad_norm": 0.862109488387714, "learning_rate": 4.3546303018137915e-06, "loss": 0.1512, "step": 2955 }, { "epoch": 0.272354540010135, "grad_norm": 0.9616828736913641, "learning_rate": 4.354119391903634e-06, "loss": 0.1654, "step": 2956 }, { "epoch": 0.272446676187405, "grad_norm": 0.8860353296993886, "learning_rate": 4.353608309838292e-06, "loss": 0.1403, "step": 2957 }, { "epoch": 0.272538812364675, "grad_norm": 0.9628950022603051, "learning_rate": 4.353097055665219e-06, "loss": 0.1566, "step": 2958 }, { "epoch": 0.272630948541945, "grad_norm": 0.9596948029087774, "learning_rate": 4.352585629431883e-06, "loss": 0.165, "step": 2959 }, { "epoch": 0.272723084719215, "grad_norm": 0.8557258049846965, "learning_rate": 4.352074031185774e-06, "loss": 0.1573, "step": 2960 }, { "epoch": 0.272815220896485, "grad_norm": 0.9269594433765076, "learning_rate": 4.351562260974391e-06, "loss": 0.1749, "step": 2961 }, { "epoch": 0.272907357073755, "grad_norm": 0.9173990269761251, "learning_rate": 4.3510503188452535e-06, "loss": 0.1693, "step": 2962 }, { "epoch": 0.272999493251025, "grad_norm": 0.9427467660265008, "learning_rate": 4.350538204845895e-06, "loss": 0.1641, "step": 2963 }, { "epoch": 0.273091629428295, "grad_norm": 0.9381162953466216, "learning_rate": 4.350025919023864e-06, "loss": 0.1714, "step": 2964 }, { "epoch": 0.273183765605565, "grad_norm": 0.8947538202851707, "learning_rate": 4.349513461426728e-06, "loss": 0.1777, "step": 2965 }, { "epoch": 0.273275901782835, "grad_norm": 0.9430666551395096, "learning_rate": 4.349000832102067e-06, "loss": 0.1606, "step": 2966 }, { "epoch": 0.27336803796010506, "grad_norm": 0.9077354832576249, "learning_rate": 4.348488031097481e-06, "loss": 0.1603, "step": 2967 }, { "epoch": 0.27346017413737506, "grad_norm": 0.9182053591196929, "learning_rate": 4.3479750584605814e-06, "loss": 0.16, "step": 2968 }, { "epoch": 0.27355231031464505, "grad_norm": 0.9057263697989837, "learning_rate": 4.347461914238999e-06, "loss": 0.1662, "step": 2969 }, { "epoch": 0.27364444649191505, "grad_norm": 0.9164232624325025, "learning_rate": 4.34694859848038e-06, "loss": 0.162, "step": 2970 }, { "epoch": 0.27373658266918505, "grad_norm": 0.9369492838326873, "learning_rate": 4.346435111232383e-06, "loss": 0.1673, "step": 2971 }, { "epoch": 0.27382871884645504, "grad_norm": 0.9215110313361893, "learning_rate": 4.345921452542689e-06, "loss": 0.1578, "step": 2972 }, { "epoch": 0.27392085502372504, "grad_norm": 0.9152849266863357, "learning_rate": 4.345407622458988e-06, "loss": 0.1673, "step": 2973 }, { "epoch": 0.2740129912009951, "grad_norm": 0.9041598610869491, "learning_rate": 4.3448936210289916e-06, "loss": 0.1457, "step": 2974 }, { "epoch": 0.2741051273782651, "grad_norm": 0.9426336536924195, "learning_rate": 4.344379448300423e-06, "loss": 0.1704, "step": 2975 }, { "epoch": 0.2741972635555351, "grad_norm": 0.8841261534075182, "learning_rate": 4.343865104321026e-06, "loss": 0.1546, "step": 2976 }, { "epoch": 0.2742893997328051, "grad_norm": 0.8945634429252811, "learning_rate": 4.3433505891385534e-06, "loss": 0.1668, "step": 2977 }, { "epoch": 0.2743815359100751, "grad_norm": 0.8960031810119188, "learning_rate": 4.342835902800782e-06, "loss": 0.1692, "step": 2978 }, { "epoch": 0.2744736720873451, "grad_norm": 0.9376536707630264, "learning_rate": 4.342321045355498e-06, "loss": 0.1679, "step": 2979 }, { "epoch": 0.2745658082646151, "grad_norm": 0.9040938912188998, "learning_rate": 4.341806016850506e-06, "loss": 0.1564, "step": 2980 }, { "epoch": 0.2746579444418851, "grad_norm": 0.9501565919833976, "learning_rate": 4.341290817333628e-06, "loss": 0.173, "step": 2981 }, { "epoch": 0.2747500806191551, "grad_norm": 0.8828775263021356, "learning_rate": 4.340775446852699e-06, "loss": 0.1559, "step": 2982 }, { "epoch": 0.2748422167964251, "grad_norm": 0.850067548106956, "learning_rate": 4.340259905455572e-06, "loss": 0.1516, "step": 2983 }, { "epoch": 0.2749343529736951, "grad_norm": 0.9603013308750544, "learning_rate": 4.339744193190114e-06, "loss": 0.1713, "step": 2984 }, { "epoch": 0.2750264891509651, "grad_norm": 1.0113076687699456, "learning_rate": 4.339228310104211e-06, "loss": 0.1827, "step": 2985 }, { "epoch": 0.2751186253282351, "grad_norm": 0.9572529546575894, "learning_rate": 4.338712256245761e-06, "loss": 0.1586, "step": 2986 }, { "epoch": 0.27521076150550516, "grad_norm": 0.9569799508875555, "learning_rate": 4.3381960316626795e-06, "loss": 0.1591, "step": 2987 }, { "epoch": 0.27530289768277516, "grad_norm": 0.8482125697273889, "learning_rate": 4.337679636402898e-06, "loss": 0.1595, "step": 2988 }, { "epoch": 0.27539503386004516, "grad_norm": 0.8838971352649014, "learning_rate": 4.3371630705143665e-06, "loss": 0.155, "step": 2989 }, { "epoch": 0.27548717003731515, "grad_norm": 0.9940918802102938, "learning_rate": 4.336646334045045e-06, "loss": 0.1565, "step": 2990 }, { "epoch": 0.27557930621458515, "grad_norm": 0.8826585591020982, "learning_rate": 4.336129427042913e-06, "loss": 0.1486, "step": 2991 }, { "epoch": 0.27567144239185515, "grad_norm": 0.9645792000767957, "learning_rate": 4.335612349555967e-06, "loss": 0.1706, "step": 2992 }, { "epoch": 0.27576357856912515, "grad_norm": 0.9157431148804065, "learning_rate": 4.335095101632217e-06, "loss": 0.1703, "step": 2993 }, { "epoch": 0.2758557147463952, "grad_norm": 0.9536633356430607, "learning_rate": 4.334577683319689e-06, "loss": 0.1515, "step": 2994 }, { "epoch": 0.2759478509236652, "grad_norm": 0.9246001577573807, "learning_rate": 4.334060094666426e-06, "loss": 0.1623, "step": 2995 }, { "epoch": 0.2760399871009352, "grad_norm": 0.9440481150724693, "learning_rate": 4.333542335720485e-06, "loss": 0.1736, "step": 2996 }, { "epoch": 0.2761321232782052, "grad_norm": 1.1704161241743252, "learning_rate": 4.3330244065299424e-06, "loss": 0.1663, "step": 2997 }, { "epoch": 0.2762242594554752, "grad_norm": 0.9360542946940662, "learning_rate": 4.332506307142885e-06, "loss": 0.1557, "step": 2998 }, { "epoch": 0.2763163956327452, "grad_norm": 0.8877005689033058, "learning_rate": 4.33198803760742e-06, "loss": 0.1623, "step": 2999 }, { "epoch": 0.2764085318100152, "grad_norm": 0.8917013234852214, "learning_rate": 4.3314695979716684e-06, "loss": 0.1507, "step": 3000 }, { "epoch": 0.2764085318100152, "eval_loss": 0.1629796177148819, "eval_runtime": 299.43, "eval_samples_per_second": 23.435, "eval_steps_per_second": 2.932, "step": 3000 }, { "epoch": 0.27650066798728523, "grad_norm": 0.9388370081009483, "learning_rate": 4.330950988283767e-06, "loss": 0.1647, "step": 3001 }, { "epoch": 0.27659280416455523, "grad_norm": 0.8715944561126756, "learning_rate": 4.330432208591871e-06, "loss": 0.1517, "step": 3002 }, { "epoch": 0.2766849403418252, "grad_norm": 0.9239641362222594, "learning_rate": 4.329913258944146e-06, "loss": 0.1629, "step": 3003 }, { "epoch": 0.2767770765190952, "grad_norm": 0.9607187053118423, "learning_rate": 4.329394139388779e-06, "loss": 0.1547, "step": 3004 }, { "epoch": 0.2768692126963652, "grad_norm": 0.9877687003566187, "learning_rate": 4.328874849973968e-06, "loss": 0.166, "step": 3005 }, { "epoch": 0.2769613488736352, "grad_norm": 0.9366916037002174, "learning_rate": 4.328355390747931e-06, "loss": 0.1685, "step": 3006 }, { "epoch": 0.2770534850509052, "grad_norm": 0.9333814868699946, "learning_rate": 4.3278357617589e-06, "loss": 0.1578, "step": 3007 }, { "epoch": 0.27714562122817527, "grad_norm": 0.967607732134371, "learning_rate": 4.327315963055121e-06, "loss": 0.1584, "step": 3008 }, { "epoch": 0.27723775740544526, "grad_norm": 0.9058729702202627, "learning_rate": 4.326795994684858e-06, "loss": 0.1629, "step": 3009 }, { "epoch": 0.27732989358271526, "grad_norm": 0.8914686442256584, "learning_rate": 4.326275856696391e-06, "loss": 0.1462, "step": 3010 }, { "epoch": 0.27742202975998526, "grad_norm": 0.9350169815870337, "learning_rate": 4.325755549138014e-06, "loss": 0.1626, "step": 3011 }, { "epoch": 0.27751416593725525, "grad_norm": 0.986767040362785, "learning_rate": 4.325235072058037e-06, "loss": 0.1682, "step": 3012 }, { "epoch": 0.27760630211452525, "grad_norm": 0.9304736612317988, "learning_rate": 4.324714425504788e-06, "loss": 0.1681, "step": 3013 }, { "epoch": 0.27769843829179525, "grad_norm": 1.0173816582289463, "learning_rate": 4.324193609526607e-06, "loss": 0.1493, "step": 3014 }, { "epoch": 0.2777905744690653, "grad_norm": 0.9479003398421398, "learning_rate": 4.323672624171854e-06, "loss": 0.1716, "step": 3015 }, { "epoch": 0.2778827106463353, "grad_norm": 0.9340243186194851, "learning_rate": 4.323151469488902e-06, "loss": 0.1596, "step": 3016 }, { "epoch": 0.2779748468236053, "grad_norm": 0.8575012092454355, "learning_rate": 4.322630145526139e-06, "loss": 0.1603, "step": 3017 }, { "epoch": 0.2780669830008753, "grad_norm": 0.8844930689599675, "learning_rate": 4.322108652331971e-06, "loss": 0.147, "step": 3018 }, { "epoch": 0.2781591191781453, "grad_norm": 0.9506160437467802, "learning_rate": 4.321586989954819e-06, "loss": 0.152, "step": 3019 }, { "epoch": 0.2782512553554153, "grad_norm": 0.9913035500170787, "learning_rate": 4.3210651584431186e-06, "loss": 0.1708, "step": 3020 }, { "epoch": 0.27834339153268534, "grad_norm": 0.9403771168318222, "learning_rate": 4.320543157845321e-06, "loss": 0.1664, "step": 3021 }, { "epoch": 0.27843552770995533, "grad_norm": 0.9888529408708953, "learning_rate": 4.320020988209898e-06, "loss": 0.1524, "step": 3022 }, { "epoch": 0.27852766388722533, "grad_norm": 1.0197212346707907, "learning_rate": 4.319498649585329e-06, "loss": 0.1837, "step": 3023 }, { "epoch": 0.2786198000644953, "grad_norm": 1.041097294154436, "learning_rate": 4.318976142020113e-06, "loss": 0.1815, "step": 3024 }, { "epoch": 0.2787119362417653, "grad_norm": 0.9489619575370245, "learning_rate": 4.318453465562768e-06, "loss": 0.1648, "step": 3025 }, { "epoch": 0.2788040724190353, "grad_norm": 0.8724471409134508, "learning_rate": 4.317930620261823e-06, "loss": 0.1541, "step": 3026 }, { "epoch": 0.2788962085963053, "grad_norm": 0.9104960457396443, "learning_rate": 4.317407606165825e-06, "loss": 0.1693, "step": 3027 }, { "epoch": 0.27898834477357537, "grad_norm": 0.9707721567063771, "learning_rate": 4.3168844233233345e-06, "loss": 0.1672, "step": 3028 }, { "epoch": 0.27908048095084537, "grad_norm": 0.9680840756993969, "learning_rate": 4.316361071782929e-06, "loss": 0.1674, "step": 3029 }, { "epoch": 0.27917261712811536, "grad_norm": 0.9071358782871782, "learning_rate": 4.315837551593203e-06, "loss": 0.1566, "step": 3030 }, { "epoch": 0.27926475330538536, "grad_norm": 0.8903474335652056, "learning_rate": 4.315313862802766e-06, "loss": 0.156, "step": 3031 }, { "epoch": 0.27935688948265536, "grad_norm": 0.9724942706639386, "learning_rate": 4.31479000546024e-06, "loss": 0.1767, "step": 3032 }, { "epoch": 0.27944902565992535, "grad_norm": 0.9528363083561447, "learning_rate": 4.314265979614267e-06, "loss": 0.1708, "step": 3033 }, { "epoch": 0.27954116183719535, "grad_norm": 0.8441521393036625, "learning_rate": 4.313741785313503e-06, "loss": 0.1554, "step": 3034 }, { "epoch": 0.2796332980144654, "grad_norm": 0.8782369763374204, "learning_rate": 4.313217422606618e-06, "loss": 0.1569, "step": 3035 }, { "epoch": 0.2797254341917354, "grad_norm": 0.9718559574608084, "learning_rate": 4.312692891542302e-06, "loss": 0.1661, "step": 3036 }, { "epoch": 0.2798175703690054, "grad_norm": 0.9446994124364056, "learning_rate": 4.312168192169254e-06, "loss": 0.1603, "step": 3037 }, { "epoch": 0.2799097065462754, "grad_norm": 0.8951970904849386, "learning_rate": 4.311643324536195e-06, "loss": 0.1624, "step": 3038 }, { "epoch": 0.2800018427235454, "grad_norm": 0.9198959941328185, "learning_rate": 4.311118288691859e-06, "loss": 0.1684, "step": 3039 }, { "epoch": 0.2800939789008154, "grad_norm": 0.9587061003460235, "learning_rate": 4.3105930846849945e-06, "loss": 0.1714, "step": 3040 }, { "epoch": 0.2801861150780854, "grad_norm": 0.8302605952313523, "learning_rate": 4.310067712564367e-06, "loss": 0.1448, "step": 3041 }, { "epoch": 0.28027825125535544, "grad_norm": 0.9283440649700089, "learning_rate": 4.3095421723787585e-06, "loss": 0.1672, "step": 3042 }, { "epoch": 0.28037038743262543, "grad_norm": 0.9372916319568307, "learning_rate": 4.309016464176964e-06, "loss": 0.1653, "step": 3043 }, { "epoch": 0.28046252360989543, "grad_norm": 0.9278315387270265, "learning_rate": 4.308490588007796e-06, "loss": 0.1634, "step": 3044 }, { "epoch": 0.28055465978716543, "grad_norm": 0.9396311360757985, "learning_rate": 4.307964543920083e-06, "loss": 0.1662, "step": 3045 }, { "epoch": 0.2806467959644354, "grad_norm": 0.9113997709540158, "learning_rate": 4.3074383319626655e-06, "loss": 0.1594, "step": 3046 }, { "epoch": 0.2807389321417054, "grad_norm": 0.8784488154165262, "learning_rate": 4.306911952184406e-06, "loss": 0.1487, "step": 3047 }, { "epoch": 0.2808310683189754, "grad_norm": 0.9661023236004905, "learning_rate": 4.306385404634177e-06, "loss": 0.1786, "step": 3048 }, { "epoch": 0.28092320449624547, "grad_norm": 0.9097267950273097, "learning_rate": 4.305858689360869e-06, "loss": 0.1492, "step": 3049 }, { "epoch": 0.28101534067351547, "grad_norm": 0.9366899670263596, "learning_rate": 4.3053318064133864e-06, "loss": 0.1637, "step": 3050 }, { "epoch": 0.28110747685078546, "grad_norm": 0.9021081218041649, "learning_rate": 4.3048047558406525e-06, "loss": 0.159, "step": 3051 }, { "epoch": 0.28119961302805546, "grad_norm": 0.9412954829333593, "learning_rate": 4.304277537691602e-06, "loss": 0.1589, "step": 3052 }, { "epoch": 0.28129174920532546, "grad_norm": 0.9059656008131649, "learning_rate": 4.303750152015188e-06, "loss": 0.1675, "step": 3053 }, { "epoch": 0.28138388538259546, "grad_norm": 0.8794175338311734, "learning_rate": 4.3032225988603786e-06, "loss": 0.1537, "step": 3054 }, { "epoch": 0.2814760215598655, "grad_norm": 0.913108805336839, "learning_rate": 4.302694878276157e-06, "loss": 0.1679, "step": 3055 }, { "epoch": 0.2815681577371355, "grad_norm": 0.9989015886002992, "learning_rate": 4.302166990311522e-06, "loss": 0.1825, "step": 3056 }, { "epoch": 0.2816602939144055, "grad_norm": 0.9152409790134869, "learning_rate": 4.301638935015487e-06, "loss": 0.1456, "step": 3057 }, { "epoch": 0.2817524300916755, "grad_norm": 0.8561207996336198, "learning_rate": 4.3011107124370835e-06, "loss": 0.1498, "step": 3058 }, { "epoch": 0.2818445662689455, "grad_norm": 0.9062836968107347, "learning_rate": 4.300582322625356e-06, "loss": 0.1524, "step": 3059 }, { "epoch": 0.2819367024462155, "grad_norm": 1.0121945627292734, "learning_rate": 4.300053765629367e-06, "loss": 0.1708, "step": 3060 }, { "epoch": 0.2820288386234855, "grad_norm": 0.8853915734328843, "learning_rate": 4.299525041498192e-06, "loss": 0.149, "step": 3061 }, { "epoch": 0.28212097480075554, "grad_norm": 0.8736829341581167, "learning_rate": 4.298996150280923e-06, "loss": 0.1581, "step": 3062 }, { "epoch": 0.28221311097802554, "grad_norm": 1.031079870711207, "learning_rate": 4.298467092026668e-06, "loss": 0.1677, "step": 3063 }, { "epoch": 0.28230524715529554, "grad_norm": 0.9303524529887373, "learning_rate": 4.29793786678455e-06, "loss": 0.1517, "step": 3064 }, { "epoch": 0.28239738333256553, "grad_norm": 1.0035979953985423, "learning_rate": 4.297408474603707e-06, "loss": 0.1738, "step": 3065 }, { "epoch": 0.28248951950983553, "grad_norm": 0.9516958507458324, "learning_rate": 4.296878915533294e-06, "loss": 0.1703, "step": 3066 }, { "epoch": 0.2825816556871055, "grad_norm": 0.8632532319262257, "learning_rate": 4.2963491896224806e-06, "loss": 0.1541, "step": 3067 }, { "epoch": 0.2826737918643755, "grad_norm": 0.9264065893787056, "learning_rate": 4.295819296920451e-06, "loss": 0.1602, "step": 3068 }, { "epoch": 0.2827659280416456, "grad_norm": 0.9289191168966096, "learning_rate": 4.295289237476407e-06, "loss": 0.1623, "step": 3069 }, { "epoch": 0.2828580642189156, "grad_norm": 0.8903370320980405, "learning_rate": 4.294759011339564e-06, "loss": 0.1574, "step": 3070 }, { "epoch": 0.28295020039618557, "grad_norm": 0.9863779141764555, "learning_rate": 4.294228618559153e-06, "loss": 0.178, "step": 3071 }, { "epoch": 0.28304233657345557, "grad_norm": 0.8660438063688297, "learning_rate": 4.293698059184423e-06, "loss": 0.1549, "step": 3072 }, { "epoch": 0.28313447275072556, "grad_norm": 0.8464393507295984, "learning_rate": 4.293167333264634e-06, "loss": 0.1516, "step": 3073 }, { "epoch": 0.28322660892799556, "grad_norm": 0.911983641200305, "learning_rate": 4.292636440849065e-06, "loss": 0.1559, "step": 3074 }, { "epoch": 0.28331874510526556, "grad_norm": 0.8761141663534089, "learning_rate": 4.292105381987011e-06, "loss": 0.1531, "step": 3075 }, { "epoch": 0.2834108812825356, "grad_norm": 0.9819629945606868, "learning_rate": 4.291574156727778e-06, "loss": 0.1823, "step": 3076 }, { "epoch": 0.2835030174598056, "grad_norm": 0.92229535525268, "learning_rate": 4.291042765120693e-06, "loss": 0.1551, "step": 3077 }, { "epoch": 0.2835951536370756, "grad_norm": 0.8734506454229878, "learning_rate": 4.290511207215093e-06, "loss": 0.1562, "step": 3078 }, { "epoch": 0.2836872898143456, "grad_norm": 0.9258218141455302, "learning_rate": 4.289979483060336e-06, "loss": 0.1643, "step": 3079 }, { "epoch": 0.2837794259916156, "grad_norm": 0.8863624060949952, "learning_rate": 4.289447592705791e-06, "loss": 0.151, "step": 3080 }, { "epoch": 0.2838715621688856, "grad_norm": 0.9360893187784172, "learning_rate": 4.2889155362008435e-06, "loss": 0.1724, "step": 3081 }, { "epoch": 0.2839636983461556, "grad_norm": 0.9114271743045955, "learning_rate": 4.288383313594897e-06, "loss": 0.1571, "step": 3082 }, { "epoch": 0.28405583452342564, "grad_norm": 0.9140713488133063, "learning_rate": 4.287850924937367e-06, "loss": 0.1625, "step": 3083 }, { "epoch": 0.28414797070069564, "grad_norm": 0.8729642931002742, "learning_rate": 4.287318370277686e-06, "loss": 0.1598, "step": 3084 }, { "epoch": 0.28424010687796564, "grad_norm": 0.8690224870912999, "learning_rate": 4.286785649665302e-06, "loss": 0.1428, "step": 3085 }, { "epoch": 0.28433224305523563, "grad_norm": 0.942851685881234, "learning_rate": 4.286252763149679e-06, "loss": 0.1726, "step": 3086 }, { "epoch": 0.28442437923250563, "grad_norm": 1.0015144979953934, "learning_rate": 4.2857197107802936e-06, "loss": 0.1628, "step": 3087 }, { "epoch": 0.2845165154097756, "grad_norm": 0.8880299301894348, "learning_rate": 4.285186492606641e-06, "loss": 0.1553, "step": 3088 }, { "epoch": 0.2846086515870457, "grad_norm": 0.8486996924762646, "learning_rate": 4.2846531086782315e-06, "loss": 0.1475, "step": 3089 }, { "epoch": 0.2847007877643157, "grad_norm": 0.9489188020398197, "learning_rate": 4.2841195590445875e-06, "loss": 0.1622, "step": 3090 }, { "epoch": 0.2847929239415857, "grad_norm": 1.0150637526631434, "learning_rate": 4.283585843755251e-06, "loss": 0.1593, "step": 3091 }, { "epoch": 0.28488506011885567, "grad_norm": 0.9502841585717497, "learning_rate": 4.283051962859776e-06, "loss": 0.1587, "step": 3092 }, { "epoch": 0.28497719629612567, "grad_norm": 0.9644289682928954, "learning_rate": 4.2825179164077365e-06, "loss": 0.1807, "step": 3093 }, { "epoch": 0.28506933247339566, "grad_norm": 0.893244872345484, "learning_rate": 4.281983704448715e-06, "loss": 0.1543, "step": 3094 }, { "epoch": 0.28516146865066566, "grad_norm": 0.8971291446766364, "learning_rate": 4.281449327032315e-06, "loss": 0.1634, "step": 3095 }, { "epoch": 0.2852536048279357, "grad_norm": 0.8839743026411959, "learning_rate": 4.2809147842081535e-06, "loss": 0.1497, "step": 3096 }, { "epoch": 0.2853457410052057, "grad_norm": 0.8814626684254067, "learning_rate": 4.280380076025863e-06, "loss": 0.1553, "step": 3097 }, { "epoch": 0.2854378771824757, "grad_norm": 0.9336844802468615, "learning_rate": 4.27984520253509e-06, "loss": 0.1746, "step": 3098 }, { "epoch": 0.2855300133597457, "grad_norm": 0.8813308242067851, "learning_rate": 4.279310163785499e-06, "loss": 0.1672, "step": 3099 }, { "epoch": 0.2856221495370157, "grad_norm": 0.92626645246821, "learning_rate": 4.278774959826768e-06, "loss": 0.154, "step": 3100 }, { "epoch": 0.2857142857142857, "grad_norm": 1.0123665912926623, "learning_rate": 4.2782395907085894e-06, "loss": 0.1978, "step": 3101 }, { "epoch": 0.2858064218915557, "grad_norm": 0.9242315379627385, "learning_rate": 4.277704056480674e-06, "loss": 0.1711, "step": 3102 }, { "epoch": 0.28589855806882575, "grad_norm": 0.8996561918227259, "learning_rate": 4.2771683571927455e-06, "loss": 0.1785, "step": 3103 }, { "epoch": 0.28599069424609574, "grad_norm": 0.9052116172117433, "learning_rate": 4.276632492894544e-06, "loss": 0.1484, "step": 3104 }, { "epoch": 0.28608283042336574, "grad_norm": 0.9657865678405, "learning_rate": 4.276096463635825e-06, "loss": 0.1653, "step": 3105 }, { "epoch": 0.28617496660063574, "grad_norm": 0.93706574283875, "learning_rate": 4.275560269466358e-06, "loss": 0.1673, "step": 3106 }, { "epoch": 0.28626710277790574, "grad_norm": 0.9397267517974345, "learning_rate": 4.275023910435928e-06, "loss": 0.1537, "step": 3107 }, { "epoch": 0.28635923895517573, "grad_norm": 0.9004009633111321, "learning_rate": 4.274487386594338e-06, "loss": 0.1538, "step": 3108 }, { "epoch": 0.28645137513244573, "grad_norm": 0.9613421096528934, "learning_rate": 4.273950697991402e-06, "loss": 0.162, "step": 3109 }, { "epoch": 0.2865435113097158, "grad_norm": 0.8638174797582878, "learning_rate": 4.273413844676953e-06, "loss": 0.1519, "step": 3110 }, { "epoch": 0.2866356474869858, "grad_norm": 0.9219609665483445, "learning_rate": 4.272876826700838e-06, "loss": 0.1581, "step": 3111 }, { "epoch": 0.2867277836642558, "grad_norm": 0.9123932775435808, "learning_rate": 4.27233964411292e-06, "loss": 0.1677, "step": 3112 }, { "epoch": 0.28681991984152577, "grad_norm": 0.8596332032462187, "learning_rate": 4.271802296963073e-06, "loss": 0.1476, "step": 3113 }, { "epoch": 0.28691205601879577, "grad_norm": 0.9236043959851093, "learning_rate": 4.271264785301194e-06, "loss": 0.1564, "step": 3114 }, { "epoch": 0.28700419219606577, "grad_norm": 0.9562016916199808, "learning_rate": 4.270727109177188e-06, "loss": 0.17, "step": 3115 }, { "epoch": 0.28709632837333576, "grad_norm": 0.9001979247167449, "learning_rate": 4.270189268640979e-06, "loss": 0.1626, "step": 3116 }, { "epoch": 0.2871884645506058, "grad_norm": 0.8700019211927976, "learning_rate": 4.269651263742507e-06, "loss": 0.1634, "step": 3117 }, { "epoch": 0.2872806007278758, "grad_norm": 0.9996977490972816, "learning_rate": 4.269113094531724e-06, "loss": 0.1679, "step": 3118 }, { "epoch": 0.2873727369051458, "grad_norm": 0.9392105076354225, "learning_rate": 4.268574761058601e-06, "loss": 0.1763, "step": 3119 }, { "epoch": 0.2874648730824158, "grad_norm": 0.9496915781676928, "learning_rate": 4.26803626337312e-06, "loss": 0.1546, "step": 3120 }, { "epoch": 0.2875570092596858, "grad_norm": 0.9427266315727539, "learning_rate": 4.267497601525281e-06, "loss": 0.1687, "step": 3121 }, { "epoch": 0.2876491454369558, "grad_norm": 0.8473345537965931, "learning_rate": 4.266958775565101e-06, "loss": 0.1401, "step": 3122 }, { "epoch": 0.28774128161422585, "grad_norm": 0.9460675493514902, "learning_rate": 4.266419785542607e-06, "loss": 0.1626, "step": 3123 }, { "epoch": 0.28783341779149585, "grad_norm": 0.986061711219792, "learning_rate": 4.265880631507847e-06, "loss": 0.162, "step": 3124 }, { "epoch": 0.28792555396876585, "grad_norm": 0.9533196068906457, "learning_rate": 4.265341313510879e-06, "loss": 0.163, "step": 3125 }, { "epoch": 0.28801769014603584, "grad_norm": 0.9354306059737347, "learning_rate": 4.264801831601781e-06, "loss": 0.17, "step": 3126 }, { "epoch": 0.28810982632330584, "grad_norm": 0.8667926960766736, "learning_rate": 4.264262185830643e-06, "loss": 0.1498, "step": 3127 }, { "epoch": 0.28820196250057584, "grad_norm": 1.0641447059766413, "learning_rate": 4.263722376247571e-06, "loss": 0.1683, "step": 3128 }, { "epoch": 0.28829409867784583, "grad_norm": 0.9032416434200741, "learning_rate": 4.263182402902687e-06, "loss": 0.154, "step": 3129 }, { "epoch": 0.2883862348551159, "grad_norm": 0.8697261860817109, "learning_rate": 4.262642265846127e-06, "loss": 0.1479, "step": 3130 }, { "epoch": 0.2884783710323859, "grad_norm": 0.9237533351121093, "learning_rate": 4.262101965128042e-06, "loss": 0.159, "step": 3131 }, { "epoch": 0.2885705072096559, "grad_norm": 0.9107264611849066, "learning_rate": 4.261561500798601e-06, "loss": 0.1612, "step": 3132 }, { "epoch": 0.2886626433869259, "grad_norm": 0.8700605038024919, "learning_rate": 4.261020872907985e-06, "loss": 0.1582, "step": 3133 }, { "epoch": 0.2887547795641959, "grad_norm": 0.9432745501419812, "learning_rate": 4.26048008150639e-06, "loss": 0.164, "step": 3134 }, { "epoch": 0.28884691574146587, "grad_norm": 0.9282434523299004, "learning_rate": 4.259939126644032e-06, "loss": 0.1679, "step": 3135 }, { "epoch": 0.28893905191873587, "grad_norm": 0.9423611364408566, "learning_rate": 4.259398008371136e-06, "loss": 0.1676, "step": 3136 }, { "epoch": 0.2890311880960059, "grad_norm": 0.8685127726333524, "learning_rate": 4.258856726737945e-06, "loss": 0.1492, "step": 3137 }, { "epoch": 0.2891233242732759, "grad_norm": 0.9771353056369224, "learning_rate": 4.258315281794718e-06, "loss": 0.1631, "step": 3138 }, { "epoch": 0.2892154604505459, "grad_norm": 0.9624194484481019, "learning_rate": 4.257773673591728e-06, "loss": 0.1498, "step": 3139 }, { "epoch": 0.2893075966278159, "grad_norm": 0.9092065792466625, "learning_rate": 4.257231902179263e-06, "loss": 0.1504, "step": 3140 }, { "epoch": 0.2893997328050859, "grad_norm": 0.956521824952388, "learning_rate": 4.256689967607627e-06, "loss": 0.1725, "step": 3141 }, { "epoch": 0.2894918689823559, "grad_norm": 0.9338296990138528, "learning_rate": 4.256147869927137e-06, "loss": 0.1581, "step": 3142 }, { "epoch": 0.2895840051596259, "grad_norm": 0.888802037271971, "learning_rate": 4.25560560918813e-06, "loss": 0.1602, "step": 3143 }, { "epoch": 0.28967614133689595, "grad_norm": 0.9319211913936077, "learning_rate": 4.255063185440953e-06, "loss": 0.1654, "step": 3144 }, { "epoch": 0.28976827751416595, "grad_norm": 0.9786469213747607, "learning_rate": 4.254520598735971e-06, "loss": 0.1824, "step": 3145 }, { "epoch": 0.28986041369143595, "grad_norm": 0.9157519191307902, "learning_rate": 4.253977849123561e-06, "loss": 0.1612, "step": 3146 }, { "epoch": 0.28995254986870594, "grad_norm": 0.9298000384411869, "learning_rate": 4.25343493665412e-06, "loss": 0.15, "step": 3147 }, { "epoch": 0.29004468604597594, "grad_norm": 0.9180849563596113, "learning_rate": 4.252891861378056e-06, "loss": 0.1682, "step": 3148 }, { "epoch": 0.29013682222324594, "grad_norm": 0.970214702091066, "learning_rate": 4.252348623345794e-06, "loss": 0.1724, "step": 3149 }, { "epoch": 0.29022895840051594, "grad_norm": 0.8712777245705906, "learning_rate": 4.2518052226077734e-06, "loss": 0.1471, "step": 3150 }, { "epoch": 0.290321094577786, "grad_norm": 0.8772532104660963, "learning_rate": 4.25126165921445e-06, "loss": 0.1586, "step": 3151 }, { "epoch": 0.290413230755056, "grad_norm": 0.8603518107957989, "learning_rate": 4.250717933216293e-06, "loss": 0.1485, "step": 3152 }, { "epoch": 0.290505366932326, "grad_norm": 0.9476717820121503, "learning_rate": 4.250174044663787e-06, "loss": 0.1641, "step": 3153 }, { "epoch": 0.290597503109596, "grad_norm": 0.9518921904154757, "learning_rate": 4.249629993607433e-06, "loss": 0.1602, "step": 3154 }, { "epoch": 0.290689639286866, "grad_norm": 0.8289642643179239, "learning_rate": 4.249085780097746e-06, "loss": 0.1506, "step": 3155 }, { "epoch": 0.29078177546413597, "grad_norm": 0.9148601434226283, "learning_rate": 4.248541404185255e-06, "loss": 0.1575, "step": 3156 }, { "epoch": 0.290873911641406, "grad_norm": 0.944322099578078, "learning_rate": 4.247996865920509e-06, "loss": 0.1676, "step": 3157 }, { "epoch": 0.290966047818676, "grad_norm": 1.0149429041739264, "learning_rate": 4.247452165354064e-06, "loss": 0.1757, "step": 3158 }, { "epoch": 0.291058183995946, "grad_norm": 0.8897165675585696, "learning_rate": 4.246907302536497e-06, "loss": 0.1503, "step": 3159 }, { "epoch": 0.291150320173216, "grad_norm": 0.9069588057960449, "learning_rate": 4.246362277518399e-06, "loss": 0.1633, "step": 3160 }, { "epoch": 0.291242456350486, "grad_norm": 0.8724677368674314, "learning_rate": 4.245817090350377e-06, "loss": 0.1507, "step": 3161 }, { "epoch": 0.291334592527756, "grad_norm": 0.8859929836695598, "learning_rate": 4.245271741083049e-06, "loss": 0.1669, "step": 3162 }, { "epoch": 0.291426728705026, "grad_norm": 0.990522122817011, "learning_rate": 4.244726229767052e-06, "loss": 0.1826, "step": 3163 }, { "epoch": 0.29151886488229606, "grad_norm": 0.8546328526513989, "learning_rate": 4.2441805564530366e-06, "loss": 0.1501, "step": 3164 }, { "epoch": 0.29161100105956606, "grad_norm": 0.9211953614605264, "learning_rate": 4.2436347211916695e-06, "loss": 0.1639, "step": 3165 }, { "epoch": 0.29170313723683605, "grad_norm": 1.1441021991526923, "learning_rate": 4.243088724033632e-06, "loss": 0.1586, "step": 3166 }, { "epoch": 0.29179527341410605, "grad_norm": 0.9121448781527541, "learning_rate": 4.242542565029617e-06, "loss": 0.1676, "step": 3167 }, { "epoch": 0.29188740959137605, "grad_norm": 0.8570822660987969, "learning_rate": 4.241996244230338e-06, "loss": 0.1572, "step": 3168 }, { "epoch": 0.29197954576864604, "grad_norm": 0.8798918108299641, "learning_rate": 4.24144976168652e-06, "loss": 0.1552, "step": 3169 }, { "epoch": 0.29207168194591604, "grad_norm": 0.9012805667976503, "learning_rate": 4.240903117448904e-06, "loss": 0.1608, "step": 3170 }, { "epoch": 0.2921638181231861, "grad_norm": 0.8268739629189876, "learning_rate": 4.240356311568247e-06, "loss": 0.1454, "step": 3171 }, { "epoch": 0.2922559543004561, "grad_norm": 0.9519215570676918, "learning_rate": 4.239809344095319e-06, "loss": 0.1645, "step": 3172 }, { "epoch": 0.2923480904777261, "grad_norm": 0.9360762926839049, "learning_rate": 4.239262215080906e-06, "loss": 0.1584, "step": 3173 }, { "epoch": 0.2924402266549961, "grad_norm": 0.9743162418622031, "learning_rate": 4.238714924575809e-06, "loss": 0.185, "step": 3174 }, { "epoch": 0.2925323628322661, "grad_norm": 0.8853535313691572, "learning_rate": 4.238167472630844e-06, "loss": 0.1475, "step": 3175 }, { "epoch": 0.2926244990095361, "grad_norm": 0.9429373674217792, "learning_rate": 4.237619859296842e-06, "loss": 0.1615, "step": 3176 }, { "epoch": 0.2927166351868061, "grad_norm": 0.8754838330157808, "learning_rate": 4.237072084624649e-06, "loss": 0.141, "step": 3177 }, { "epoch": 0.2928087713640761, "grad_norm": 0.9265393674777754, "learning_rate": 4.2365241486651275e-06, "loss": 0.1543, "step": 3178 }, { "epoch": 0.2929009075413461, "grad_norm": 0.892789764988484, "learning_rate": 4.235976051469151e-06, "loss": 0.1626, "step": 3179 }, { "epoch": 0.2929930437186161, "grad_norm": 0.8348631663386511, "learning_rate": 4.23542779308761e-06, "loss": 0.1429, "step": 3180 }, { "epoch": 0.2930851798958861, "grad_norm": 0.8821394531522185, "learning_rate": 4.234879373571413e-06, "loss": 0.162, "step": 3181 }, { "epoch": 0.2931773160731561, "grad_norm": 0.8806914298120246, "learning_rate": 4.234330792971479e-06, "loss": 0.1632, "step": 3182 }, { "epoch": 0.2932694522504261, "grad_norm": 0.8918627316336875, "learning_rate": 4.233782051338745e-06, "loss": 0.1617, "step": 3183 }, { "epoch": 0.2933615884276961, "grad_norm": 0.8678930306994892, "learning_rate": 4.23323314872416e-06, "loss": 0.1533, "step": 3184 }, { "epoch": 0.29345372460496616, "grad_norm": 0.9178630543984329, "learning_rate": 4.232684085178691e-06, "loss": 0.1649, "step": 3185 }, { "epoch": 0.29354586078223616, "grad_norm": 0.8753634315475634, "learning_rate": 4.232134860753318e-06, "loss": 0.1673, "step": 3186 }, { "epoch": 0.29363799695950615, "grad_norm": 0.8870097305860352, "learning_rate": 4.231585475499037e-06, "loss": 0.1448, "step": 3187 }, { "epoch": 0.29373013313677615, "grad_norm": 0.9544469402594513, "learning_rate": 4.231035929466858e-06, "loss": 0.1595, "step": 3188 }, { "epoch": 0.29382226931404615, "grad_norm": 0.9241039149441995, "learning_rate": 4.230486222707807e-06, "loss": 0.1527, "step": 3189 }, { "epoch": 0.29391440549131614, "grad_norm": 0.8843626519655292, "learning_rate": 4.229936355272924e-06, "loss": 0.1617, "step": 3190 }, { "epoch": 0.2940065416685862, "grad_norm": 0.9271307349122019, "learning_rate": 4.229386327213264e-06, "loss": 0.1611, "step": 3191 }, { "epoch": 0.2940986778458562, "grad_norm": 0.9492531025198703, "learning_rate": 4.228836138579897e-06, "loss": 0.1729, "step": 3192 }, { "epoch": 0.2941908140231262, "grad_norm": 0.9295851214577007, "learning_rate": 4.2282857894239085e-06, "loss": 0.1687, "step": 3193 }, { "epoch": 0.2942829502003962, "grad_norm": 0.977256783995922, "learning_rate": 4.227735279796399e-06, "loss": 0.1628, "step": 3194 }, { "epoch": 0.2943750863776662, "grad_norm": 0.8930681892980293, "learning_rate": 4.227184609748483e-06, "loss": 0.1693, "step": 3195 }, { "epoch": 0.2944672225549362, "grad_norm": 0.8933894768496947, "learning_rate": 4.226633779331289e-06, "loss": 0.1508, "step": 3196 }, { "epoch": 0.2945593587322062, "grad_norm": 0.8927683830142663, "learning_rate": 4.226082788595965e-06, "loss": 0.1453, "step": 3197 }, { "epoch": 0.29465149490947623, "grad_norm": 0.9104305630689763, "learning_rate": 4.225531637593666e-06, "loss": 0.1563, "step": 3198 }, { "epoch": 0.2947436310867462, "grad_norm": 0.9241605921153313, "learning_rate": 4.2249803263755695e-06, "loss": 0.1743, "step": 3199 }, { "epoch": 0.2948357672640162, "grad_norm": 0.9296113117121186, "learning_rate": 4.2244288549928645e-06, "loss": 0.1516, "step": 3200 }, { "epoch": 0.2949279034412862, "grad_norm": 0.9199239821431868, "learning_rate": 4.223877223496754e-06, "loss": 0.1663, "step": 3201 }, { "epoch": 0.2950200396185562, "grad_norm": 0.9000094471075423, "learning_rate": 4.223325431938459e-06, "loss": 0.1564, "step": 3202 }, { "epoch": 0.2951121757958262, "grad_norm": 0.9194190986912426, "learning_rate": 4.2227734803692115e-06, "loss": 0.1555, "step": 3203 }, { "epoch": 0.2952043119730962, "grad_norm": 0.8767195760502394, "learning_rate": 4.2222213688402605e-06, "loss": 0.1386, "step": 3204 }, { "epoch": 0.29529644815036626, "grad_norm": 0.8871918600848787, "learning_rate": 4.22166909740287e-06, "loss": 0.144, "step": 3205 }, { "epoch": 0.29538858432763626, "grad_norm": 1.0100555949711532, "learning_rate": 4.221116666108319e-06, "loss": 0.171, "step": 3206 }, { "epoch": 0.29548072050490626, "grad_norm": 0.907837346659232, "learning_rate": 4.2205640750079e-06, "loss": 0.1585, "step": 3207 }, { "epoch": 0.29557285668217625, "grad_norm": 0.9685782224087519, "learning_rate": 4.220011324152922e-06, "loss": 0.1694, "step": 3208 }, { "epoch": 0.29566499285944625, "grad_norm": 0.9318424056577996, "learning_rate": 4.219458413594707e-06, "loss": 0.1661, "step": 3209 }, { "epoch": 0.29575712903671625, "grad_norm": 0.975106382055604, "learning_rate": 4.218905343384593e-06, "loss": 0.1648, "step": 3210 }, { "epoch": 0.29584926521398625, "grad_norm": 0.8829421015276901, "learning_rate": 4.218352113573933e-06, "loss": 0.161, "step": 3211 }, { "epoch": 0.2959414013912563, "grad_norm": 0.9457196742302185, "learning_rate": 4.217798724214094e-06, "loss": 0.176, "step": 3212 }, { "epoch": 0.2960335375685263, "grad_norm": 0.9877734213208268, "learning_rate": 4.21724517535646e-06, "loss": 0.161, "step": 3213 }, { "epoch": 0.2961256737457963, "grad_norm": 0.8864723853789074, "learning_rate": 4.216691467052426e-06, "loss": 0.1501, "step": 3214 }, { "epoch": 0.2962178099230663, "grad_norm": 0.8725650110211445, "learning_rate": 4.216137599353404e-06, "loss": 0.149, "step": 3215 }, { "epoch": 0.2963099461003363, "grad_norm": 0.9069598790303386, "learning_rate": 4.215583572310821e-06, "loss": 0.1522, "step": 3216 }, { "epoch": 0.2964020822776063, "grad_norm": 0.9359483520411078, "learning_rate": 4.2150293859761196e-06, "loss": 0.1575, "step": 3217 }, { "epoch": 0.29649421845487633, "grad_norm": 0.9376534373883524, "learning_rate": 4.214475040400755e-06, "loss": 0.1693, "step": 3218 }, { "epoch": 0.29658635463214633, "grad_norm": 0.9605366150327874, "learning_rate": 4.213920535636198e-06, "loss": 0.1555, "step": 3219 }, { "epoch": 0.29667849080941633, "grad_norm": 0.9239353702038833, "learning_rate": 4.213365871733934e-06, "loss": 0.1589, "step": 3220 }, { "epoch": 0.2967706269866863, "grad_norm": 0.9265223393518568, "learning_rate": 4.212811048745467e-06, "loss": 0.1625, "step": 3221 }, { "epoch": 0.2968627631639563, "grad_norm": 0.9788953150847244, "learning_rate": 4.212256066722307e-06, "loss": 0.1648, "step": 3222 }, { "epoch": 0.2969548993412263, "grad_norm": 0.8587737017236943, "learning_rate": 4.211700925715988e-06, "loss": 0.1434, "step": 3223 }, { "epoch": 0.2970470355184963, "grad_norm": 0.9571360236107946, "learning_rate": 4.211145625778054e-06, "loss": 0.1718, "step": 3224 }, { "epoch": 0.29713917169576637, "grad_norm": 0.8948875668309092, "learning_rate": 4.2105901669600645e-06, "loss": 0.1493, "step": 3225 }, { "epoch": 0.29723130787303637, "grad_norm": 0.8698739984411084, "learning_rate": 4.210034549313594e-06, "loss": 0.1537, "step": 3226 }, { "epoch": 0.29732344405030636, "grad_norm": 0.9127957190567569, "learning_rate": 4.2094787728902305e-06, "loss": 0.154, "step": 3227 }, { "epoch": 0.29741558022757636, "grad_norm": 0.9743876075133446, "learning_rate": 4.20892283774158e-06, "loss": 0.1653, "step": 3228 }, { "epoch": 0.29750771640484636, "grad_norm": 0.9697707594140141, "learning_rate": 4.20836674391926e-06, "loss": 0.1628, "step": 3229 }, { "epoch": 0.29759985258211635, "grad_norm": 0.9921447822593994, "learning_rate": 4.207810491474904e-06, "loss": 0.1741, "step": 3230 }, { "epoch": 0.29769198875938635, "grad_norm": 1.0175502207785552, "learning_rate": 4.207254080460161e-06, "loss": 0.1759, "step": 3231 }, { "epoch": 0.2977841249366564, "grad_norm": 0.8958978146833146, "learning_rate": 4.206697510926691e-06, "loss": 0.1538, "step": 3232 }, { "epoch": 0.2978762611139264, "grad_norm": 0.950880039603536, "learning_rate": 4.206140782926174e-06, "loss": 0.1721, "step": 3233 }, { "epoch": 0.2979683972911964, "grad_norm": 0.925327176872321, "learning_rate": 4.205583896510303e-06, "loss": 0.1595, "step": 3234 }, { "epoch": 0.2980605334684664, "grad_norm": 0.9532665759717548, "learning_rate": 4.2050268517307816e-06, "loss": 0.1639, "step": 3235 }, { "epoch": 0.2981526696457364, "grad_norm": 0.9291134573772569, "learning_rate": 4.204469648639335e-06, "loss": 0.1715, "step": 3236 }, { "epoch": 0.2982448058230064, "grad_norm": 0.8797274289945238, "learning_rate": 4.203912287287697e-06, "loss": 0.1604, "step": 3237 }, { "epoch": 0.2983369420002764, "grad_norm": 0.9040168151293431, "learning_rate": 4.203354767727621e-06, "loss": 0.1658, "step": 3238 }, { "epoch": 0.29842907817754644, "grad_norm": 0.985469121655086, "learning_rate": 4.202797090010871e-06, "loss": 0.1692, "step": 3239 }, { "epoch": 0.29852121435481643, "grad_norm": 0.8991733097245737, "learning_rate": 4.202239254189228e-06, "loss": 0.1527, "step": 3240 }, { "epoch": 0.29861335053208643, "grad_norm": 0.9968726368094237, "learning_rate": 4.2016812603144865e-06, "loss": 0.1768, "step": 3241 }, { "epoch": 0.2987054867093564, "grad_norm": 0.9261811611053946, "learning_rate": 4.201123108438457e-06, "loss": 0.1609, "step": 3242 }, { "epoch": 0.2987976228866264, "grad_norm": 0.9805890401028196, "learning_rate": 4.2005647986129635e-06, "loss": 0.163, "step": 3243 }, { "epoch": 0.2988897590638964, "grad_norm": 0.9154281842963401, "learning_rate": 4.2000063308898466e-06, "loss": 0.154, "step": 3244 }, { "epoch": 0.2989818952411664, "grad_norm": 0.9263190062990657, "learning_rate": 4.199447705320958e-06, "loss": 0.159, "step": 3245 }, { "epoch": 0.29907403141843647, "grad_norm": 0.9684841984778471, "learning_rate": 4.1988889219581676e-06, "loss": 0.1809, "step": 3246 }, { "epoch": 0.29916616759570647, "grad_norm": 0.8439439005356527, "learning_rate": 4.198329980853357e-06, "loss": 0.1386, "step": 3247 }, { "epoch": 0.29925830377297646, "grad_norm": 0.8805133731749528, "learning_rate": 4.1977708820584265e-06, "loss": 0.1609, "step": 3248 }, { "epoch": 0.29935043995024646, "grad_norm": 0.9299348501988777, "learning_rate": 4.197211625625285e-06, "loss": 0.1614, "step": 3249 }, { "epoch": 0.29944257612751646, "grad_norm": 0.953423157440807, "learning_rate": 4.196652211605863e-06, "loss": 0.1753, "step": 3250 }, { "epoch": 0.29953471230478645, "grad_norm": 0.9650855610206487, "learning_rate": 4.196092640052099e-06, "loss": 0.1743, "step": 3251 }, { "epoch": 0.2996268484820565, "grad_norm": 0.8158109293633011, "learning_rate": 4.195532911015952e-06, "loss": 0.149, "step": 3252 }, { "epoch": 0.2997189846593265, "grad_norm": 1.0066905218223139, "learning_rate": 4.1949730245493915e-06, "loss": 0.166, "step": 3253 }, { "epoch": 0.2998111208365965, "grad_norm": 0.981891494249524, "learning_rate": 4.194412980704403e-06, "loss": 0.1683, "step": 3254 }, { "epoch": 0.2999032570138665, "grad_norm": 0.9388757636396333, "learning_rate": 4.1938527795329875e-06, "loss": 0.1695, "step": 3255 }, { "epoch": 0.2999953931911365, "grad_norm": 0.861978941124501, "learning_rate": 4.1932924210871585e-06, "loss": 0.1584, "step": 3256 }, { "epoch": 0.3000875293684065, "grad_norm": 0.9093301324713368, "learning_rate": 4.192731905418947e-06, "loss": 0.1612, "step": 3257 }, { "epoch": 0.3001796655456765, "grad_norm": 0.9074202593094116, "learning_rate": 4.192171232580395e-06, "loss": 0.1613, "step": 3258 }, { "epoch": 0.30027180172294654, "grad_norm": 0.8515153187239818, "learning_rate": 4.191610402623561e-06, "loss": 0.1501, "step": 3259 }, { "epoch": 0.30036393790021654, "grad_norm": 0.9327983127436237, "learning_rate": 4.191049415600521e-06, "loss": 0.1732, "step": 3260 }, { "epoch": 0.30045607407748653, "grad_norm": 0.96624826925591, "learning_rate": 4.19048827156336e-06, "loss": 0.1695, "step": 3261 }, { "epoch": 0.30054821025475653, "grad_norm": 0.9123411497937238, "learning_rate": 4.189926970564181e-06, "loss": 0.1404, "step": 3262 }, { "epoch": 0.30064034643202653, "grad_norm": 0.9116714759394909, "learning_rate": 4.189365512655101e-06, "loss": 0.1605, "step": 3263 }, { "epoch": 0.3007324826092965, "grad_norm": 0.9543230905818623, "learning_rate": 4.188803897888251e-06, "loss": 0.1489, "step": 3264 }, { "epoch": 0.3008246187865665, "grad_norm": 0.946421886980175, "learning_rate": 4.188242126315778e-06, "loss": 0.1636, "step": 3265 }, { "epoch": 0.3009167549638366, "grad_norm": 0.9416980687774688, "learning_rate": 4.187680197989841e-06, "loss": 0.1693, "step": 3266 }, { "epoch": 0.30100889114110657, "grad_norm": 0.8628506075375213, "learning_rate": 4.187118112962616e-06, "loss": 0.1471, "step": 3267 }, { "epoch": 0.30110102731837657, "grad_norm": 0.9110568486745181, "learning_rate": 4.186555871286293e-06, "loss": 0.1405, "step": 3268 }, { "epoch": 0.30119316349564657, "grad_norm": 0.8937401299625113, "learning_rate": 4.185993473013076e-06, "loss": 0.1494, "step": 3269 }, { "epoch": 0.30128529967291656, "grad_norm": 0.9063871070755407, "learning_rate": 4.185430918195184e-06, "loss": 0.1538, "step": 3270 }, { "epoch": 0.30137743585018656, "grad_norm": 0.912457399295779, "learning_rate": 4.184868206884849e-06, "loss": 0.1485, "step": 3271 }, { "epoch": 0.30146957202745656, "grad_norm": 0.9949777663011297, "learning_rate": 4.18430533913432e-06, "loss": 0.1771, "step": 3272 }, { "epoch": 0.3015617082047266, "grad_norm": 0.9784479373923843, "learning_rate": 4.183742314995859e-06, "loss": 0.1618, "step": 3273 }, { "epoch": 0.3016538443819966, "grad_norm": 0.9112650889954245, "learning_rate": 4.183179134521743e-06, "loss": 0.1513, "step": 3274 }, { "epoch": 0.3017459805592666, "grad_norm": 0.9355584324086121, "learning_rate": 4.1826157977642634e-06, "loss": 0.1484, "step": 3275 }, { "epoch": 0.3018381167365366, "grad_norm": 0.8931369194300064, "learning_rate": 4.1820523047757246e-06, "loss": 0.1656, "step": 3276 }, { "epoch": 0.3019302529138066, "grad_norm": 0.9956521555709148, "learning_rate": 4.18148865560845e-06, "loss": 0.1626, "step": 3277 }, { "epoch": 0.3020223890910766, "grad_norm": 0.8874050966915844, "learning_rate": 4.180924850314771e-06, "loss": 0.1575, "step": 3278 }, { "epoch": 0.3021145252683466, "grad_norm": 0.9523402862573914, "learning_rate": 4.180360888947041e-06, "loss": 0.1703, "step": 3279 }, { "epoch": 0.30220666144561664, "grad_norm": 0.9287874514545038, "learning_rate": 4.179796771557619e-06, "loss": 0.1669, "step": 3280 }, { "epoch": 0.30229879762288664, "grad_norm": 0.8673579238655271, "learning_rate": 4.179232498198888e-06, "loss": 0.1503, "step": 3281 }, { "epoch": 0.30239093380015664, "grad_norm": 0.8794126748973937, "learning_rate": 4.178668068923238e-06, "loss": 0.1578, "step": 3282 }, { "epoch": 0.30248306997742663, "grad_norm": 0.9275772205249678, "learning_rate": 4.178103483783077e-06, "loss": 0.1525, "step": 3283 }, { "epoch": 0.30257520615469663, "grad_norm": 0.8968088357348379, "learning_rate": 4.177538742830828e-06, "loss": 0.1547, "step": 3284 }, { "epoch": 0.3026673423319666, "grad_norm": 0.8928338403213077, "learning_rate": 4.1769738461189245e-06, "loss": 0.1653, "step": 3285 }, { "epoch": 0.3027594785092367, "grad_norm": 0.9542122073006188, "learning_rate": 4.176408793699821e-06, "loss": 0.1528, "step": 3286 }, { "epoch": 0.3028516146865067, "grad_norm": 0.9825041852507498, "learning_rate": 4.1758435856259784e-06, "loss": 0.1642, "step": 3287 }, { "epoch": 0.3029437508637767, "grad_norm": 0.9378090275370146, "learning_rate": 4.17527822194988e-06, "loss": 0.1532, "step": 3288 }, { "epoch": 0.30303588704104667, "grad_norm": 0.925622043346032, "learning_rate": 4.174712702724017e-06, "loss": 0.1638, "step": 3289 }, { "epoch": 0.30312802321831667, "grad_norm": 0.8605309241655659, "learning_rate": 4.174147028000901e-06, "loss": 0.1538, "step": 3290 }, { "epoch": 0.30322015939558666, "grad_norm": 0.9165943989447812, "learning_rate": 4.173581197833052e-06, "loss": 0.1482, "step": 3291 }, { "epoch": 0.30331229557285666, "grad_norm": 0.8668274344509658, "learning_rate": 4.173015212273009e-06, "loss": 0.1509, "step": 3292 }, { "epoch": 0.3034044317501267, "grad_norm": 0.9491082207852997, "learning_rate": 4.1724490713733246e-06, "loss": 0.1514, "step": 3293 }, { "epoch": 0.3034965679273967, "grad_norm": 0.9456871597563634, "learning_rate": 4.171882775186563e-06, "loss": 0.1619, "step": 3294 }, { "epoch": 0.3035887041046667, "grad_norm": 0.9315199674334298, "learning_rate": 4.1713163237653055e-06, "loss": 0.1582, "step": 3295 }, { "epoch": 0.3036808402819367, "grad_norm": 0.9989797250234418, "learning_rate": 4.170749717162148e-06, "loss": 0.1632, "step": 3296 }, { "epoch": 0.3037729764592067, "grad_norm": 0.875493273649281, "learning_rate": 4.170182955429699e-06, "loss": 0.1616, "step": 3297 }, { "epoch": 0.3038651126364767, "grad_norm": 0.9281100714565997, "learning_rate": 4.169616038620583e-06, "loss": 0.1456, "step": 3298 }, { "epoch": 0.3039572488137467, "grad_norm": 0.8493906414552408, "learning_rate": 4.169048966787438e-06, "loss": 0.1478, "step": 3299 }, { "epoch": 0.30404938499101675, "grad_norm": 0.8952927753283536, "learning_rate": 4.168481739982917e-06, "loss": 0.1636, "step": 3300 }, { "epoch": 0.30414152116828674, "grad_norm": 0.8655868115020314, "learning_rate": 4.167914358259687e-06, "loss": 0.1548, "step": 3301 }, { "epoch": 0.30423365734555674, "grad_norm": 0.9919065938123784, "learning_rate": 4.167346821670429e-06, "loss": 0.1751, "step": 3302 }, { "epoch": 0.30432579352282674, "grad_norm": 0.9493609688016562, "learning_rate": 4.166779130267839e-06, "loss": 0.1665, "step": 3303 }, { "epoch": 0.30441792970009673, "grad_norm": 0.8983868029734609, "learning_rate": 4.166211284104629e-06, "loss": 0.1572, "step": 3304 }, { "epoch": 0.30451006587736673, "grad_norm": 0.9007939752199932, "learning_rate": 4.16564328323352e-06, "loss": 0.1698, "step": 3305 }, { "epoch": 0.30460220205463673, "grad_norm": 0.8913045218728333, "learning_rate": 4.165075127707254e-06, "loss": 0.1603, "step": 3306 }, { "epoch": 0.3046943382319068, "grad_norm": 0.8821994473863105, "learning_rate": 4.164506817578582e-06, "loss": 0.1536, "step": 3307 }, { "epoch": 0.3047864744091768, "grad_norm": 0.8662864009408308, "learning_rate": 4.163938352900274e-06, "loss": 0.1631, "step": 3308 }, { "epoch": 0.3048786105864468, "grad_norm": 0.9322076782697044, "learning_rate": 4.16336973372511e-06, "loss": 0.1707, "step": 3309 }, { "epoch": 0.30497074676371677, "grad_norm": 0.8791872015663708, "learning_rate": 4.162800960105889e-06, "loss": 0.1453, "step": 3310 }, { "epoch": 0.30506288294098677, "grad_norm": 0.9335680774636453, "learning_rate": 4.162232032095418e-06, "loss": 0.1639, "step": 3311 }, { "epoch": 0.30515501911825677, "grad_norm": 0.938972469326503, "learning_rate": 4.1616629497465245e-06, "loss": 0.1542, "step": 3312 }, { "epoch": 0.30524715529552676, "grad_norm": 0.9437698259785928, "learning_rate": 4.1610937131120474e-06, "loss": 0.1788, "step": 3313 }, { "epoch": 0.3053392914727968, "grad_norm": 0.8838873717685708, "learning_rate": 4.16052432224484e-06, "loss": 0.1577, "step": 3314 }, { "epoch": 0.3054314276500668, "grad_norm": 0.9391124996158003, "learning_rate": 4.159954777197771e-06, "loss": 0.1574, "step": 3315 }, { "epoch": 0.3055235638273368, "grad_norm": 0.9675529515690404, "learning_rate": 4.159385078023722e-06, "loss": 0.1664, "step": 3316 }, { "epoch": 0.3056157000046068, "grad_norm": 0.9691361853056687, "learning_rate": 4.15881522477559e-06, "loss": 0.1581, "step": 3317 }, { "epoch": 0.3057078361818768, "grad_norm": 0.9497540299432615, "learning_rate": 4.1582452175062854e-06, "loss": 0.1766, "step": 3318 }, { "epoch": 0.3057999723591468, "grad_norm": 0.9331780873537096, "learning_rate": 4.157675056268735e-06, "loss": 0.1581, "step": 3319 }, { "epoch": 0.30589210853641685, "grad_norm": 0.9282528349029902, "learning_rate": 4.157104741115876e-06, "loss": 0.1542, "step": 3320 }, { "epoch": 0.30598424471368685, "grad_norm": 0.936802223907317, "learning_rate": 4.156534272100664e-06, "loss": 0.1827, "step": 3321 }, { "epoch": 0.30607638089095685, "grad_norm": 0.9002015504623913, "learning_rate": 4.155963649276066e-06, "loss": 0.1593, "step": 3322 }, { "epoch": 0.30616851706822684, "grad_norm": 0.8840297918677589, "learning_rate": 4.155392872695066e-06, "loss": 0.1578, "step": 3323 }, { "epoch": 0.30626065324549684, "grad_norm": 0.8926966395195797, "learning_rate": 4.154821942410659e-06, "loss": 0.1528, "step": 3324 }, { "epoch": 0.30635278942276684, "grad_norm": 0.8927711731854681, "learning_rate": 4.154250858475857e-06, "loss": 0.1653, "step": 3325 }, { "epoch": 0.30644492560003683, "grad_norm": 0.9700319830098186, "learning_rate": 4.1536796209436835e-06, "loss": 0.1659, "step": 3326 }, { "epoch": 0.3065370617773069, "grad_norm": 0.9483174494393913, "learning_rate": 4.153108229867181e-06, "loss": 0.1665, "step": 3327 }, { "epoch": 0.3066291979545769, "grad_norm": 0.8748630059230683, "learning_rate": 4.1525366852994e-06, "loss": 0.1554, "step": 3328 }, { "epoch": 0.3067213341318469, "grad_norm": 0.8582455329358799, "learning_rate": 4.151964987293411e-06, "loss": 0.143, "step": 3329 }, { "epoch": 0.3068134703091169, "grad_norm": 0.8715858478170836, "learning_rate": 4.151393135902294e-06, "loss": 0.1583, "step": 3330 }, { "epoch": 0.3069056064863869, "grad_norm": 0.9516812753474873, "learning_rate": 4.150821131179148e-06, "loss": 0.1645, "step": 3331 }, { "epoch": 0.30699774266365687, "grad_norm": 0.9084710244319857, "learning_rate": 4.150248973177081e-06, "loss": 0.1507, "step": 3332 }, { "epoch": 0.30708987884092687, "grad_norm": 0.9165140829229909, "learning_rate": 4.14967666194922e-06, "loss": 0.1578, "step": 3333 }, { "epoch": 0.3071820150181969, "grad_norm": 0.8704866370447997, "learning_rate": 4.149104197548703e-06, "loss": 0.1517, "step": 3334 }, { "epoch": 0.3072741511954669, "grad_norm": 0.8936771673676237, "learning_rate": 4.148531580028685e-06, "loss": 0.1527, "step": 3335 }, { "epoch": 0.3073662873727369, "grad_norm": 0.9689189902531499, "learning_rate": 4.147958809442331e-06, "loss": 0.1379, "step": 3336 }, { "epoch": 0.3074584235500069, "grad_norm": 0.9222655925574594, "learning_rate": 4.147385885842824e-06, "loss": 0.1536, "step": 3337 }, { "epoch": 0.3075505597272769, "grad_norm": 0.9481217418968965, "learning_rate": 4.146812809283361e-06, "loss": 0.1663, "step": 3338 }, { "epoch": 0.3076426959045469, "grad_norm": 0.9071299429191669, "learning_rate": 4.14623957981715e-06, "loss": 0.1547, "step": 3339 }, { "epoch": 0.3077348320818169, "grad_norm": 0.9272233789397594, "learning_rate": 4.1456661974974185e-06, "loss": 0.1385, "step": 3340 }, { "epoch": 0.30782696825908695, "grad_norm": 0.9741525591963978, "learning_rate": 4.145092662377403e-06, "loss": 0.1641, "step": 3341 }, { "epoch": 0.30791910443635695, "grad_norm": 0.9253540203356433, "learning_rate": 4.144518974510358e-06, "loss": 0.17, "step": 3342 }, { "epoch": 0.30801124061362695, "grad_norm": 0.8975724542139679, "learning_rate": 4.143945133949547e-06, "loss": 0.1479, "step": 3343 }, { "epoch": 0.30810337679089694, "grad_norm": 0.9632358843689789, "learning_rate": 4.1433711407482544e-06, "loss": 0.1731, "step": 3344 }, { "epoch": 0.30819551296816694, "grad_norm": 1.0076660275039135, "learning_rate": 4.142796994959775e-06, "loss": 0.1857, "step": 3345 }, { "epoch": 0.30828764914543694, "grad_norm": 0.9007248222676459, "learning_rate": 4.142222696637417e-06, "loss": 0.1653, "step": 3346 }, { "epoch": 0.30837978532270693, "grad_norm": 0.8293657995382075, "learning_rate": 4.141648245834505e-06, "loss": 0.1557, "step": 3347 }, { "epoch": 0.308471921499977, "grad_norm": 0.9018556933312604, "learning_rate": 4.141073642604377e-06, "loss": 0.1507, "step": 3348 }, { "epoch": 0.308564057677247, "grad_norm": 1.0206437869118763, "learning_rate": 4.140498887000385e-06, "loss": 0.1612, "step": 3349 }, { "epoch": 0.308656193854517, "grad_norm": 0.8628938425077515, "learning_rate": 4.139923979075894e-06, "loss": 0.1537, "step": 3350 }, { "epoch": 0.308748330031787, "grad_norm": 0.9050313968713789, "learning_rate": 4.139348918884285e-06, "loss": 0.1655, "step": 3351 }, { "epoch": 0.308840466209057, "grad_norm": 0.9103712066570067, "learning_rate": 4.138773706478953e-06, "loss": 0.151, "step": 3352 }, { "epoch": 0.30893260238632697, "grad_norm": 0.92340284255135, "learning_rate": 4.138198341913305e-06, "loss": 0.1493, "step": 3353 }, { "epoch": 0.309024738563597, "grad_norm": 0.8688924319396153, "learning_rate": 4.137622825240767e-06, "loss": 0.1574, "step": 3354 }, { "epoch": 0.309116874740867, "grad_norm": 0.8993317455096554, "learning_rate": 4.1370471565147715e-06, "loss": 0.1575, "step": 3355 }, { "epoch": 0.309209010918137, "grad_norm": 0.928602372486308, "learning_rate": 4.1364713357887715e-06, "loss": 0.1543, "step": 3356 }, { "epoch": 0.309301147095407, "grad_norm": 1.061155027790231, "learning_rate": 4.1358953631162314e-06, "loss": 0.1598, "step": 3357 }, { "epoch": 0.309393283272677, "grad_norm": 0.9084105597716158, "learning_rate": 4.135319238550632e-06, "loss": 0.1559, "step": 3358 }, { "epoch": 0.309485419449947, "grad_norm": 0.9097245918542545, "learning_rate": 4.1347429621454645e-06, "loss": 0.1491, "step": 3359 }, { "epoch": 0.309577555627217, "grad_norm": 1.0176830209153604, "learning_rate": 4.134166533954238e-06, "loss": 0.1677, "step": 3360 }, { "epoch": 0.30966969180448706, "grad_norm": 0.9599751677181546, "learning_rate": 4.1335899540304715e-06, "loss": 0.1648, "step": 3361 }, { "epoch": 0.30976182798175705, "grad_norm": 0.9575989184802846, "learning_rate": 4.133013222427703e-06, "loss": 0.1663, "step": 3362 }, { "epoch": 0.30985396415902705, "grad_norm": 0.9533677266914239, "learning_rate": 4.132436339199481e-06, "loss": 0.1527, "step": 3363 }, { "epoch": 0.30994610033629705, "grad_norm": 0.943371411328731, "learning_rate": 4.131859304399368e-06, "loss": 0.1645, "step": 3364 }, { "epoch": 0.31003823651356704, "grad_norm": 0.9108660115026544, "learning_rate": 4.1312821180809445e-06, "loss": 0.1672, "step": 3365 }, { "epoch": 0.31013037269083704, "grad_norm": 0.930016137545348, "learning_rate": 4.130704780297801e-06, "loss": 0.1498, "step": 3366 }, { "epoch": 0.31022250886810704, "grad_norm": 0.9522554481405724, "learning_rate": 4.130127291103542e-06, "loss": 0.1644, "step": 3367 }, { "epoch": 0.3103146450453771, "grad_norm": 0.8842450520060663, "learning_rate": 4.129549650551788e-06, "loss": 0.1453, "step": 3368 }, { "epoch": 0.3104067812226471, "grad_norm": 0.9676290587085921, "learning_rate": 4.1289718586961755e-06, "loss": 0.1627, "step": 3369 }, { "epoch": 0.3104989173999171, "grad_norm": 0.8760194541205237, "learning_rate": 4.12839391559035e-06, "loss": 0.1573, "step": 3370 }, { "epoch": 0.3105910535771871, "grad_norm": 1.0120308223548502, "learning_rate": 4.127815821287973e-06, "loss": 0.1691, "step": 3371 }, { "epoch": 0.3106831897544571, "grad_norm": 1.0230226420993533, "learning_rate": 4.127237575842723e-06, "loss": 0.1727, "step": 3372 }, { "epoch": 0.3107753259317271, "grad_norm": 0.9527995171561136, "learning_rate": 4.126659179308289e-06, "loss": 0.167, "step": 3373 }, { "epoch": 0.3108674621089971, "grad_norm": 0.8802401223513894, "learning_rate": 4.126080631738374e-06, "loss": 0.1577, "step": 3374 }, { "epoch": 0.3109595982862671, "grad_norm": 1.0054768017771973, "learning_rate": 4.125501933186699e-06, "loss": 0.152, "step": 3375 }, { "epoch": 0.3110517344635371, "grad_norm": 0.9339483620066793, "learning_rate": 4.124923083706993e-06, "loss": 0.169, "step": 3376 }, { "epoch": 0.3111438706408071, "grad_norm": 0.891809746192897, "learning_rate": 4.124344083353005e-06, "loss": 0.1604, "step": 3377 }, { "epoch": 0.3112360068180771, "grad_norm": 0.9405125779039759, "learning_rate": 4.123764932178492e-06, "loss": 0.1537, "step": 3378 }, { "epoch": 0.3113281429953471, "grad_norm": 0.9114646595770788, "learning_rate": 4.123185630237233e-06, "loss": 0.1631, "step": 3379 }, { "epoch": 0.3114202791726171, "grad_norm": 0.9156889815669623, "learning_rate": 4.122606177583012e-06, "loss": 0.1571, "step": 3380 }, { "epoch": 0.3115124153498871, "grad_norm": 0.9232508366180471, "learning_rate": 4.122026574269633e-06, "loss": 0.1623, "step": 3381 }, { "epoch": 0.31160455152715716, "grad_norm": 0.9173313455540943, "learning_rate": 4.121446820350911e-06, "loss": 0.153, "step": 3382 }, { "epoch": 0.31169668770442716, "grad_norm": 0.9332944353788419, "learning_rate": 4.12086691588068e-06, "loss": 0.1565, "step": 3383 }, { "epoch": 0.31178882388169715, "grad_norm": 0.867962487245611, "learning_rate": 4.120286860912779e-06, "loss": 0.1477, "step": 3384 }, { "epoch": 0.31188096005896715, "grad_norm": 0.9925637374051337, "learning_rate": 4.11970665550107e-06, "loss": 0.1599, "step": 3385 }, { "epoch": 0.31197309623623715, "grad_norm": 0.9567564373610871, "learning_rate": 4.119126299699422e-06, "loss": 0.1695, "step": 3386 }, { "epoch": 0.31206523241350714, "grad_norm": 0.8915249939767821, "learning_rate": 4.118545793561724e-06, "loss": 0.1473, "step": 3387 }, { "epoch": 0.3121573685907772, "grad_norm": 0.9671789476742289, "learning_rate": 4.117965137141875e-06, "loss": 0.1586, "step": 3388 }, { "epoch": 0.3122495047680472, "grad_norm": 0.9078171347172409, "learning_rate": 4.117384330493789e-06, "loss": 0.143, "step": 3389 }, { "epoch": 0.3123416409453172, "grad_norm": 0.9564013420176967, "learning_rate": 4.1168033736713934e-06, "loss": 0.1657, "step": 3390 }, { "epoch": 0.3124337771225872, "grad_norm": 0.9466706325040476, "learning_rate": 4.116222266728631e-06, "loss": 0.1646, "step": 3391 }, { "epoch": 0.3125259132998572, "grad_norm": 0.9132831841251625, "learning_rate": 4.115641009719456e-06, "loss": 0.1468, "step": 3392 }, { "epoch": 0.3126180494771272, "grad_norm": 0.8936981888983082, "learning_rate": 4.11505960269784e-06, "loss": 0.1555, "step": 3393 }, { "epoch": 0.3127101856543972, "grad_norm": 0.9496755272541821, "learning_rate": 4.114478045717767e-06, "loss": 0.1644, "step": 3394 }, { "epoch": 0.31280232183166723, "grad_norm": 0.9503775314773306, "learning_rate": 4.113896338833233e-06, "loss": 0.1553, "step": 3395 }, { "epoch": 0.3128944580089372, "grad_norm": 1.0006972046347853, "learning_rate": 4.11331448209825e-06, "loss": 0.1795, "step": 3396 }, { "epoch": 0.3129865941862072, "grad_norm": 0.8495640118383059, "learning_rate": 4.112732475566844e-06, "loss": 0.1525, "step": 3397 }, { "epoch": 0.3130787303634772, "grad_norm": 0.9164288932535483, "learning_rate": 4.112150319293055e-06, "loss": 0.1616, "step": 3398 }, { "epoch": 0.3131708665407472, "grad_norm": 0.9069820705502305, "learning_rate": 4.111568013330933e-06, "loss": 0.1549, "step": 3399 }, { "epoch": 0.3132630027180172, "grad_norm": 0.8560780037635382, "learning_rate": 4.110985557734549e-06, "loss": 0.1411, "step": 3400 }, { "epoch": 0.3133551388952872, "grad_norm": 0.9304730799382744, "learning_rate": 4.110402952557982e-06, "loss": 0.1589, "step": 3401 }, { "epoch": 0.31344727507255726, "grad_norm": 0.9225084800079449, "learning_rate": 4.109820197855329e-06, "loss": 0.1565, "step": 3402 }, { "epoch": 0.31353941124982726, "grad_norm": 0.9449480858494125, "learning_rate": 4.109237293680697e-06, "loss": 0.1578, "step": 3403 }, { "epoch": 0.31363154742709726, "grad_norm": 0.7922549635079016, "learning_rate": 4.108654240088208e-06, "loss": 0.1331, "step": 3404 }, { "epoch": 0.31372368360436725, "grad_norm": 0.9651881798215124, "learning_rate": 4.1080710371319995e-06, "loss": 0.1516, "step": 3405 }, { "epoch": 0.31381581978163725, "grad_norm": 0.9823047227787114, "learning_rate": 4.107487684866224e-06, "loss": 0.1681, "step": 3406 }, { "epoch": 0.31390795595890725, "grad_norm": 0.8984657664496137, "learning_rate": 4.106904183345042e-06, "loss": 0.1454, "step": 3407 }, { "epoch": 0.31400009213617724, "grad_norm": 1.0948214742805147, "learning_rate": 4.106320532622635e-06, "loss": 0.1789, "step": 3408 }, { "epoch": 0.3140922283134473, "grad_norm": 1.0380192093032585, "learning_rate": 4.105736732753193e-06, "loss": 0.176, "step": 3409 }, { "epoch": 0.3141843644907173, "grad_norm": 0.8916083716066878, "learning_rate": 4.1051527837909225e-06, "loss": 0.1372, "step": 3410 }, { "epoch": 0.3142765006679873, "grad_norm": 0.9012931626518442, "learning_rate": 4.104568685790043e-06, "loss": 0.1487, "step": 3411 }, { "epoch": 0.3143686368452573, "grad_norm": 0.9137654798095866, "learning_rate": 4.103984438804789e-06, "loss": 0.1538, "step": 3412 }, { "epoch": 0.3144607730225273, "grad_norm": 0.9481939846524343, "learning_rate": 4.103400042889407e-06, "loss": 0.1637, "step": 3413 }, { "epoch": 0.3145529091997973, "grad_norm": 0.9071124252098365, "learning_rate": 4.102815498098159e-06, "loss": 0.1578, "step": 3414 }, { "epoch": 0.3146450453770673, "grad_norm": 0.8270852439467992, "learning_rate": 4.102230804485318e-06, "loss": 0.1495, "step": 3415 }, { "epoch": 0.31473718155433733, "grad_norm": 0.9502681634882719, "learning_rate": 4.101645962105176e-06, "loss": 0.163, "step": 3416 }, { "epoch": 0.3148293177316073, "grad_norm": 0.951552464274314, "learning_rate": 4.101060971012033e-06, "loss": 0.1591, "step": 3417 }, { "epoch": 0.3149214539088773, "grad_norm": 0.8949602924889337, "learning_rate": 4.100475831260208e-06, "loss": 0.1444, "step": 3418 }, { "epoch": 0.3150135900861473, "grad_norm": 0.9491012105424428, "learning_rate": 4.099890542904028e-06, "loss": 0.1576, "step": 3419 }, { "epoch": 0.3151057262634173, "grad_norm": 0.9076824336858172, "learning_rate": 4.0993051059978405e-06, "loss": 0.1549, "step": 3420 }, { "epoch": 0.3151978624406873, "grad_norm": 0.9139002533602671, "learning_rate": 4.098719520596e-06, "loss": 0.1417, "step": 3421 }, { "epoch": 0.31528999861795737, "grad_norm": 0.9239054987071181, "learning_rate": 4.098133786752881e-06, "loss": 0.1456, "step": 3422 }, { "epoch": 0.31538213479522736, "grad_norm": 0.9467114150066259, "learning_rate": 4.097547904522869e-06, "loss": 0.1505, "step": 3423 }, { "epoch": 0.31547427097249736, "grad_norm": 0.9140901271401887, "learning_rate": 4.09696187396036e-06, "loss": 0.1493, "step": 3424 }, { "epoch": 0.31556640714976736, "grad_norm": 0.9424231683243252, "learning_rate": 4.0963756951197695e-06, "loss": 0.1606, "step": 3425 }, { "epoch": 0.31565854332703736, "grad_norm": 0.9403766923503943, "learning_rate": 4.095789368055525e-06, "loss": 0.1664, "step": 3426 }, { "epoch": 0.31575067950430735, "grad_norm": 0.9218638685999803, "learning_rate": 4.095202892822066e-06, "loss": 0.1499, "step": 3427 }, { "epoch": 0.31584281568157735, "grad_norm": 0.9485215914433966, "learning_rate": 4.094616269473846e-06, "loss": 0.1698, "step": 3428 }, { "epoch": 0.3159349518588474, "grad_norm": 0.9196371332910769, "learning_rate": 4.0940294980653335e-06, "loss": 0.1596, "step": 3429 }, { "epoch": 0.3160270880361174, "grad_norm": 0.886507100543908, "learning_rate": 4.093442578651011e-06, "loss": 0.151, "step": 3430 }, { "epoch": 0.3161192242133874, "grad_norm": 0.8484028923939285, "learning_rate": 4.092855511285373e-06, "loss": 0.1416, "step": 3431 }, { "epoch": 0.3162113603906574, "grad_norm": 0.9073737296535767, "learning_rate": 4.09226829602293e-06, "loss": 0.1493, "step": 3432 }, { "epoch": 0.3163034965679274, "grad_norm": 0.967529401555553, "learning_rate": 4.091680932918205e-06, "loss": 0.1633, "step": 3433 }, { "epoch": 0.3163956327451974, "grad_norm": 0.9725392063430839, "learning_rate": 4.091093422025733e-06, "loss": 0.1687, "step": 3434 }, { "epoch": 0.3164877689224674, "grad_norm": 0.9128546072500832, "learning_rate": 4.090505763400065e-06, "loss": 0.1624, "step": 3435 }, { "epoch": 0.31657990509973744, "grad_norm": 0.8497060725564581, "learning_rate": 4.089917957095767e-06, "loss": 0.1473, "step": 3436 }, { "epoch": 0.31667204127700743, "grad_norm": 0.936515788653949, "learning_rate": 4.089330003167416e-06, "loss": 0.1532, "step": 3437 }, { "epoch": 0.31676417745427743, "grad_norm": 0.9506803605861734, "learning_rate": 4.088741901669601e-06, "loss": 0.1418, "step": 3438 }, { "epoch": 0.3168563136315474, "grad_norm": 0.9006173689048483, "learning_rate": 4.088153652656932e-06, "loss": 0.1606, "step": 3439 }, { "epoch": 0.3169484498088174, "grad_norm": 0.9289187640854032, "learning_rate": 4.087565256184024e-06, "loss": 0.1566, "step": 3440 }, { "epoch": 0.3170405859860874, "grad_norm": 1.0591258145699214, "learning_rate": 4.086976712305511e-06, "loss": 0.1799, "step": 3441 }, { "epoch": 0.3171327221633574, "grad_norm": 0.9400847693025363, "learning_rate": 4.08638802107604e-06, "loss": 0.1616, "step": 3442 }, { "epoch": 0.31722485834062747, "grad_norm": 0.9298557372660742, "learning_rate": 4.0857991825502696e-06, "loss": 0.1676, "step": 3443 }, { "epoch": 0.31731699451789747, "grad_norm": 0.9800661183069009, "learning_rate": 4.085210196782875e-06, "loss": 0.1604, "step": 3444 }, { "epoch": 0.31740913069516746, "grad_norm": 1.0754863588778008, "learning_rate": 4.084621063828544e-06, "loss": 0.1738, "step": 3445 }, { "epoch": 0.31750126687243746, "grad_norm": 0.9468907496454307, "learning_rate": 4.0840317837419754e-06, "loss": 0.1716, "step": 3446 }, { "epoch": 0.31759340304970746, "grad_norm": 0.9500032382526354, "learning_rate": 4.083442356577886e-06, "loss": 0.1568, "step": 3447 }, { "epoch": 0.31768553922697745, "grad_norm": 1.03361821973418, "learning_rate": 4.082852782391003e-06, "loss": 0.1646, "step": 3448 }, { "epoch": 0.31777767540424745, "grad_norm": 0.9627491594944207, "learning_rate": 4.0822630612360685e-06, "loss": 0.159, "step": 3449 }, { "epoch": 0.3178698115815175, "grad_norm": 0.8662665250817492, "learning_rate": 4.081673193167839e-06, "loss": 0.1522, "step": 3450 }, { "epoch": 0.3179619477587875, "grad_norm": 0.8937920972998911, "learning_rate": 4.081083178241083e-06, "loss": 0.1493, "step": 3451 }, { "epoch": 0.3180540839360575, "grad_norm": 0.9479929284339497, "learning_rate": 4.080493016510583e-06, "loss": 0.1639, "step": 3452 }, { "epoch": 0.3181462201133275, "grad_norm": 0.9187532135497326, "learning_rate": 4.079902708031137e-06, "loss": 0.1601, "step": 3453 }, { "epoch": 0.3182383562905975, "grad_norm": 0.9144021033769467, "learning_rate": 4.079312252857556e-06, "loss": 0.1541, "step": 3454 }, { "epoch": 0.3183304924678675, "grad_norm": 0.9191213571527707, "learning_rate": 4.07872165104466e-06, "loss": 0.1623, "step": 3455 }, { "epoch": 0.31842262864513754, "grad_norm": 0.9511674669274025, "learning_rate": 4.07813090264729e-06, "loss": 0.1597, "step": 3456 }, { "epoch": 0.31851476482240754, "grad_norm": 0.9798337339733897, "learning_rate": 4.077540007720295e-06, "loss": 0.1722, "step": 3457 }, { "epoch": 0.31860690099967753, "grad_norm": 0.8879213314696275, "learning_rate": 4.076948966318542e-06, "loss": 0.1491, "step": 3458 }, { "epoch": 0.31869903717694753, "grad_norm": 0.8721348850025189, "learning_rate": 4.076357778496906e-06, "loss": 0.1519, "step": 3459 }, { "epoch": 0.3187911733542175, "grad_norm": 0.9450063837612857, "learning_rate": 4.075766444310282e-06, "loss": 0.1484, "step": 3460 }, { "epoch": 0.3188833095314875, "grad_norm": 0.9015677020824875, "learning_rate": 4.075174963813574e-06, "loss": 0.1551, "step": 3461 }, { "epoch": 0.3189754457087575, "grad_norm": 0.9398397674223026, "learning_rate": 4.0745833370617e-06, "loss": 0.1598, "step": 3462 }, { "epoch": 0.3190675818860276, "grad_norm": 0.9545054887697545, "learning_rate": 4.073991564109595e-06, "loss": 0.1535, "step": 3463 }, { "epoch": 0.31915971806329757, "grad_norm": 0.935516966266871, "learning_rate": 4.073399645012203e-06, "loss": 0.1534, "step": 3464 }, { "epoch": 0.31925185424056757, "grad_norm": 0.9004407095982404, "learning_rate": 4.072807579824485e-06, "loss": 0.1729, "step": 3465 }, { "epoch": 0.31934399041783756, "grad_norm": 0.9059623238664691, "learning_rate": 4.072215368601414e-06, "loss": 0.1619, "step": 3466 }, { "epoch": 0.31943612659510756, "grad_norm": 0.8742229100315807, "learning_rate": 4.0716230113979766e-06, "loss": 0.1502, "step": 3467 }, { "epoch": 0.31952826277237756, "grad_norm": 1.0188211123923578, "learning_rate": 4.071030508269173e-06, "loss": 0.1699, "step": 3468 }, { "epoch": 0.31962039894964756, "grad_norm": 0.8903732181525056, "learning_rate": 4.070437859270019e-06, "loss": 0.1637, "step": 3469 }, { "epoch": 0.3197125351269176, "grad_norm": 0.9856901177717559, "learning_rate": 4.06984506445554e-06, "loss": 0.1581, "step": 3470 }, { "epoch": 0.3198046713041876, "grad_norm": 0.9279233569958244, "learning_rate": 4.069252123880777e-06, "loss": 0.1619, "step": 3471 }, { "epoch": 0.3198968074814576, "grad_norm": 0.9172143149312972, "learning_rate": 4.068659037600786e-06, "loss": 0.1542, "step": 3472 }, { "epoch": 0.3199889436587276, "grad_norm": 0.8131584393868659, "learning_rate": 4.068065805670635e-06, "loss": 0.1417, "step": 3473 }, { "epoch": 0.3200810798359976, "grad_norm": 0.9034854285537024, "learning_rate": 4.067472428145405e-06, "loss": 0.1494, "step": 3474 }, { "epoch": 0.3201732160132676, "grad_norm": 0.8541692198778182, "learning_rate": 4.066878905080191e-06, "loss": 0.1396, "step": 3475 }, { "epoch": 0.3202653521905376, "grad_norm": 0.9365299991439768, "learning_rate": 4.066285236530103e-06, "loss": 0.1597, "step": 3476 }, { "epoch": 0.32035748836780764, "grad_norm": 0.9592597644111814, "learning_rate": 4.065691422550261e-06, "loss": 0.1512, "step": 3477 }, { "epoch": 0.32044962454507764, "grad_norm": 0.8863394888331791, "learning_rate": 4.065097463195803e-06, "loss": 0.1592, "step": 3478 }, { "epoch": 0.32054176072234764, "grad_norm": 0.8543865524815278, "learning_rate": 4.064503358521876e-06, "loss": 0.1459, "step": 3479 }, { "epoch": 0.32063389689961763, "grad_norm": 0.8962676770429008, "learning_rate": 4.063909108583644e-06, "loss": 0.1553, "step": 3480 }, { "epoch": 0.32072603307688763, "grad_norm": 0.871977731951131, "learning_rate": 4.063314713436283e-06, "loss": 0.1656, "step": 3481 }, { "epoch": 0.3208181692541576, "grad_norm": 1.001308123062294, "learning_rate": 4.062720173134983e-06, "loss": 0.1709, "step": 3482 }, { "epoch": 0.3209103054314276, "grad_norm": 0.8730340921167583, "learning_rate": 4.062125487734947e-06, "loss": 0.151, "step": 3483 }, { "epoch": 0.3210024416086977, "grad_norm": 0.9399620155764555, "learning_rate": 4.06153065729139e-06, "loss": 0.1477, "step": 3484 }, { "epoch": 0.32109457778596767, "grad_norm": 0.9154550750631788, "learning_rate": 4.060935681859545e-06, "loss": 0.1509, "step": 3485 }, { "epoch": 0.32118671396323767, "grad_norm": 0.9360146752265455, "learning_rate": 4.060340561494654e-06, "loss": 0.1606, "step": 3486 }, { "epoch": 0.32127885014050767, "grad_norm": 0.9870281583727557, "learning_rate": 4.059745296251972e-06, "loss": 0.1612, "step": 3487 }, { "epoch": 0.32137098631777766, "grad_norm": 0.9446020683476672, "learning_rate": 4.059149886186773e-06, "loss": 0.1586, "step": 3488 }, { "epoch": 0.32146312249504766, "grad_norm": 0.9370826914836032, "learning_rate": 4.058554331354339e-06, "loss": 0.1608, "step": 3489 }, { "epoch": 0.3215552586723177, "grad_norm": 0.9222591973137391, "learning_rate": 4.057958631809967e-06, "loss": 0.1549, "step": 3490 }, { "epoch": 0.3216473948495877, "grad_norm": 0.915092079185541, "learning_rate": 4.057362787608969e-06, "loss": 0.1546, "step": 3491 }, { "epoch": 0.3217395310268577, "grad_norm": 0.9413157010417782, "learning_rate": 4.056766798806668e-06, "loss": 0.1484, "step": 3492 }, { "epoch": 0.3218316672041277, "grad_norm": 0.9019348756364096, "learning_rate": 4.056170665458403e-06, "loss": 0.1511, "step": 3493 }, { "epoch": 0.3219238033813977, "grad_norm": 0.9600914185028901, "learning_rate": 4.055574387619524e-06, "loss": 0.1733, "step": 3494 }, { "epoch": 0.3220159395586677, "grad_norm": 0.911767587313382, "learning_rate": 4.054977965345396e-06, "loss": 0.1658, "step": 3495 }, { "epoch": 0.3221080757359377, "grad_norm": 0.9238148568525669, "learning_rate": 4.054381398691396e-06, "loss": 0.1651, "step": 3496 }, { "epoch": 0.32220021191320775, "grad_norm": 0.8606339124055112, "learning_rate": 4.053784687712916e-06, "loss": 0.1521, "step": 3497 }, { "epoch": 0.32229234809047774, "grad_norm": 0.8899508622289252, "learning_rate": 4.05318783246536e-06, "loss": 0.1452, "step": 3498 }, { "epoch": 0.32238448426774774, "grad_norm": 0.8955622026127069, "learning_rate": 4.052590833004147e-06, "loss": 0.1554, "step": 3499 }, { "epoch": 0.32247662044501774, "grad_norm": 0.824248867010417, "learning_rate": 4.051993689384709e-06, "loss": 0.1421, "step": 3500 }, { "epoch": 0.32247662044501774, "eval_loss": 0.15785863995552063, "eval_runtime": 300.4481, "eval_samples_per_second": 23.355, "eval_steps_per_second": 2.922, "step": 3500 }, { "epoch": 0.32256875662228773, "grad_norm": 0.8684284205940074, "learning_rate": 4.051396401662489e-06, "loss": 0.1431, "step": 3501 }, { "epoch": 0.32266089279955773, "grad_norm": 1.0345536582306027, "learning_rate": 4.050798969892946e-06, "loss": 0.1672, "step": 3502 }, { "epoch": 0.3227530289768277, "grad_norm": 0.8593899874719152, "learning_rate": 4.050201394131551e-06, "loss": 0.1409, "step": 3503 }, { "epoch": 0.3228451651540978, "grad_norm": 0.9223597678224997, "learning_rate": 4.049603674433791e-06, "loss": 0.168, "step": 3504 }, { "epoch": 0.3229373013313678, "grad_norm": 0.888252713703813, "learning_rate": 4.049005810855163e-06, "loss": 0.1679, "step": 3505 }, { "epoch": 0.3230294375086378, "grad_norm": 0.9718784879317928, "learning_rate": 4.048407803451178e-06, "loss": 0.1591, "step": 3506 }, { "epoch": 0.32312157368590777, "grad_norm": 0.8838193993880871, "learning_rate": 4.047809652277362e-06, "loss": 0.1505, "step": 3507 }, { "epoch": 0.32321370986317777, "grad_norm": 0.8915524847095565, "learning_rate": 4.047211357389254e-06, "loss": 0.1685, "step": 3508 }, { "epoch": 0.32330584604044776, "grad_norm": 0.873930483845192, "learning_rate": 4.046612918842405e-06, "loss": 0.1543, "step": 3509 }, { "epoch": 0.32339798221771776, "grad_norm": 0.8735696475644171, "learning_rate": 4.0460143366923785e-06, "loss": 0.1578, "step": 3510 }, { "epoch": 0.3234901183949878, "grad_norm": 0.8588041851689829, "learning_rate": 4.045415610994755e-06, "loss": 0.157, "step": 3511 }, { "epoch": 0.3235822545722578, "grad_norm": 0.8657789207118362, "learning_rate": 4.044816741805127e-06, "loss": 0.1471, "step": 3512 }, { "epoch": 0.3236743907495278, "grad_norm": 0.859079362850816, "learning_rate": 4.044217729179097e-06, "loss": 0.1583, "step": 3513 }, { "epoch": 0.3237665269267978, "grad_norm": 0.9071782136735049, "learning_rate": 4.043618573172286e-06, "loss": 0.1645, "step": 3514 }, { "epoch": 0.3238586631040678, "grad_norm": 0.9391842378302793, "learning_rate": 4.043019273840323e-06, "loss": 0.1599, "step": 3515 }, { "epoch": 0.3239507992813378, "grad_norm": 0.9179852158772025, "learning_rate": 4.042419831238855e-06, "loss": 0.1671, "step": 3516 }, { "epoch": 0.3240429354586078, "grad_norm": 0.936687179289209, "learning_rate": 4.041820245423539e-06, "loss": 0.1489, "step": 3517 }, { "epoch": 0.32413507163587785, "grad_norm": 0.8751747372246792, "learning_rate": 4.041220516450048e-06, "loss": 0.1429, "step": 3518 }, { "epoch": 0.32422720781314784, "grad_norm": 0.9622948464619779, "learning_rate": 4.040620644374066e-06, "loss": 0.1714, "step": 3519 }, { "epoch": 0.32431934399041784, "grad_norm": 0.8881873414992707, "learning_rate": 4.0400206292512914e-06, "loss": 0.1358, "step": 3520 }, { "epoch": 0.32441148016768784, "grad_norm": 0.9365575801168411, "learning_rate": 4.039420471137435e-06, "loss": 0.1687, "step": 3521 }, { "epoch": 0.32450361634495783, "grad_norm": 0.9525223912847359, "learning_rate": 4.038820170088223e-06, "loss": 0.162, "step": 3522 }, { "epoch": 0.32459575252222783, "grad_norm": 0.8896341990118014, "learning_rate": 4.0382197261593925e-06, "loss": 0.1672, "step": 3523 }, { "epoch": 0.3246878886994979, "grad_norm": 1.0189292647998855, "learning_rate": 4.037619139406695e-06, "loss": 0.1676, "step": 3524 }, { "epoch": 0.3247800248767679, "grad_norm": 0.902643694703534, "learning_rate": 4.037018409885894e-06, "loss": 0.1641, "step": 3525 }, { "epoch": 0.3248721610540379, "grad_norm": 0.867488131643773, "learning_rate": 4.036417537652769e-06, "loss": 0.1431, "step": 3526 }, { "epoch": 0.3249642972313079, "grad_norm": 0.9114760774953333, "learning_rate": 4.03581652276311e-06, "loss": 0.1563, "step": 3527 }, { "epoch": 0.32505643340857787, "grad_norm": 0.847393565850545, "learning_rate": 4.035215365272722e-06, "loss": 0.1403, "step": 3528 }, { "epoch": 0.32514856958584787, "grad_norm": 0.9601802498635071, "learning_rate": 4.034614065237421e-06, "loss": 0.1717, "step": 3529 }, { "epoch": 0.32524070576311787, "grad_norm": 0.8451113466394516, "learning_rate": 4.034012622713041e-06, "loss": 0.1323, "step": 3530 }, { "epoch": 0.3253328419403879, "grad_norm": 0.9540096973545658, "learning_rate": 4.033411037755422e-06, "loss": 0.163, "step": 3531 }, { "epoch": 0.3254249781176579, "grad_norm": 0.9029760011942667, "learning_rate": 4.032809310420424e-06, "loss": 0.1638, "step": 3532 }, { "epoch": 0.3255171142949279, "grad_norm": 0.8653493696897545, "learning_rate": 4.032207440763915e-06, "loss": 0.1482, "step": 3533 }, { "epoch": 0.3256092504721979, "grad_norm": 0.9002695038758083, "learning_rate": 4.0316054288417825e-06, "loss": 0.1626, "step": 3534 }, { "epoch": 0.3257013866494679, "grad_norm": 0.9292035928119098, "learning_rate": 4.031003274709919e-06, "loss": 0.1565, "step": 3535 }, { "epoch": 0.3257935228267379, "grad_norm": 0.9373344296369921, "learning_rate": 4.0304009784242385e-06, "loss": 0.1767, "step": 3536 }, { "epoch": 0.3258856590040079, "grad_norm": 0.8566406083660746, "learning_rate": 4.029798540040661e-06, "loss": 0.1393, "step": 3537 }, { "epoch": 0.32597779518127795, "grad_norm": 0.9507352638450343, "learning_rate": 4.029195959615125e-06, "loss": 0.1465, "step": 3538 }, { "epoch": 0.32606993135854795, "grad_norm": 0.907988452302607, "learning_rate": 4.02859323720358e-06, "loss": 0.1587, "step": 3539 }, { "epoch": 0.32616206753581795, "grad_norm": 0.9271425625270764, "learning_rate": 4.027990372861989e-06, "loss": 0.1569, "step": 3540 }, { "epoch": 0.32625420371308794, "grad_norm": 0.9450417681972197, "learning_rate": 4.027387366646326e-06, "loss": 0.1594, "step": 3541 }, { "epoch": 0.32634633989035794, "grad_norm": 0.9513022280524657, "learning_rate": 4.026784218612581e-06, "loss": 0.1675, "step": 3542 }, { "epoch": 0.32643847606762794, "grad_norm": 0.9200859147183892, "learning_rate": 4.026180928816759e-06, "loss": 0.1609, "step": 3543 }, { "epoch": 0.32653061224489793, "grad_norm": 1.0053375898691645, "learning_rate": 4.0255774973148735e-06, "loss": 0.1638, "step": 3544 }, { "epoch": 0.326622748422168, "grad_norm": 0.9025225354859596, "learning_rate": 4.024973924162952e-06, "loss": 0.1462, "step": 3545 }, { "epoch": 0.326714884599438, "grad_norm": 0.9576990817667488, "learning_rate": 4.024370209417037e-06, "loss": 0.1593, "step": 3546 }, { "epoch": 0.326807020776708, "grad_norm": 0.9525658664676164, "learning_rate": 4.0237663531331855e-06, "loss": 0.1598, "step": 3547 }, { "epoch": 0.326899156953978, "grad_norm": 0.8995010015032531, "learning_rate": 4.023162355367464e-06, "loss": 0.1483, "step": 3548 }, { "epoch": 0.326991293131248, "grad_norm": 0.9037322000936574, "learning_rate": 4.022558216175953e-06, "loss": 0.1507, "step": 3549 }, { "epoch": 0.32708342930851797, "grad_norm": 0.8937200904467155, "learning_rate": 4.021953935614748e-06, "loss": 0.1442, "step": 3550 }, { "epoch": 0.32717556548578797, "grad_norm": 0.9984501569219848, "learning_rate": 4.021349513739956e-06, "loss": 0.1755, "step": 3551 }, { "epoch": 0.327267701663058, "grad_norm": 0.8965440594336742, "learning_rate": 4.020744950607699e-06, "loss": 0.1573, "step": 3552 }, { "epoch": 0.327359837840328, "grad_norm": 0.9156102418861106, "learning_rate": 4.020140246274109e-06, "loss": 0.1599, "step": 3553 }, { "epoch": 0.327451974017598, "grad_norm": 0.9645827093836922, "learning_rate": 4.019535400795333e-06, "loss": 0.1746, "step": 3554 }, { "epoch": 0.327544110194868, "grad_norm": 0.8814855920188206, "learning_rate": 4.018930414227533e-06, "loss": 0.1558, "step": 3555 }, { "epoch": 0.327636246372138, "grad_norm": 0.983414207184116, "learning_rate": 4.018325286626879e-06, "loss": 0.1592, "step": 3556 }, { "epoch": 0.327728382549408, "grad_norm": 0.919976809129956, "learning_rate": 4.017720018049559e-06, "loss": 0.1532, "step": 3557 }, { "epoch": 0.32782051872667806, "grad_norm": 0.9291452608705151, "learning_rate": 4.017114608551772e-06, "loss": 0.1601, "step": 3558 }, { "epoch": 0.32791265490394805, "grad_norm": 0.8852683619080132, "learning_rate": 4.016509058189731e-06, "loss": 0.1505, "step": 3559 }, { "epoch": 0.32800479108121805, "grad_norm": 0.9186159403047259, "learning_rate": 4.0159033670196605e-06, "loss": 0.1629, "step": 3560 }, { "epoch": 0.32809692725848805, "grad_norm": 0.9844743262655814, "learning_rate": 4.0152975350978e-06, "loss": 0.1588, "step": 3561 }, { "epoch": 0.32818906343575804, "grad_norm": 0.8595561775845129, "learning_rate": 4.0146915624803985e-06, "loss": 0.1556, "step": 3562 }, { "epoch": 0.32828119961302804, "grad_norm": 0.9446841396028918, "learning_rate": 4.014085449223724e-06, "loss": 0.1581, "step": 3563 }, { "epoch": 0.32837333579029804, "grad_norm": 1.0023786637300742, "learning_rate": 4.013479195384051e-06, "loss": 0.1686, "step": 3564 }, { "epoch": 0.3284654719675681, "grad_norm": 0.9068328433386368, "learning_rate": 4.012872801017673e-06, "loss": 0.156, "step": 3565 }, { "epoch": 0.3285576081448381, "grad_norm": 0.9627651441762631, "learning_rate": 4.012266266180892e-06, "loss": 0.1477, "step": 3566 }, { "epoch": 0.3286497443221081, "grad_norm": 0.8659019527581846, "learning_rate": 4.011659590930026e-06, "loss": 0.1515, "step": 3567 }, { "epoch": 0.3287418804993781, "grad_norm": 0.8270948116690996, "learning_rate": 4.011052775321405e-06, "loss": 0.1422, "step": 3568 }, { "epoch": 0.3288340166766481, "grad_norm": 0.9782404984989339, "learning_rate": 4.010445819411369e-06, "loss": 0.1684, "step": 3569 }, { "epoch": 0.3289261528539181, "grad_norm": 0.8829362934235169, "learning_rate": 4.009838723256278e-06, "loss": 0.1522, "step": 3570 }, { "epoch": 0.32901828903118807, "grad_norm": 0.9407625480923458, "learning_rate": 4.009231486912498e-06, "loss": 0.1656, "step": 3571 }, { "epoch": 0.3291104252084581, "grad_norm": 0.8848548864415823, "learning_rate": 4.008624110436413e-06, "loss": 0.1527, "step": 3572 }, { "epoch": 0.3292025613857281, "grad_norm": 0.9301909339470641, "learning_rate": 4.008016593884416e-06, "loss": 0.1546, "step": 3573 }, { "epoch": 0.3292946975629981, "grad_norm": 0.9320406389327086, "learning_rate": 4.0074089373129165e-06, "loss": 0.1522, "step": 3574 }, { "epoch": 0.3293868337402681, "grad_norm": 0.8886539369086722, "learning_rate": 4.006801140778335e-06, "loss": 0.1472, "step": 3575 }, { "epoch": 0.3294789699175381, "grad_norm": 0.9537007448167414, "learning_rate": 4.006193204337106e-06, "loss": 0.1604, "step": 3576 }, { "epoch": 0.3295711060948081, "grad_norm": 0.8619622908907076, "learning_rate": 4.005585128045675e-06, "loss": 0.1431, "step": 3577 }, { "epoch": 0.3296632422720781, "grad_norm": 0.9035882099898597, "learning_rate": 4.004976911960503e-06, "loss": 0.1629, "step": 3578 }, { "epoch": 0.32975537844934816, "grad_norm": 0.9564187518876925, "learning_rate": 4.004368556138062e-06, "loss": 0.1551, "step": 3579 }, { "epoch": 0.32984751462661815, "grad_norm": 0.8805677607577047, "learning_rate": 4.003760060634839e-06, "loss": 0.1552, "step": 3580 }, { "epoch": 0.32993965080388815, "grad_norm": 0.8140917034555333, "learning_rate": 4.003151425507333e-06, "loss": 0.1515, "step": 3581 }, { "epoch": 0.33003178698115815, "grad_norm": 0.9419332145704289, "learning_rate": 4.002542650812056e-06, "loss": 0.1705, "step": 3582 }, { "epoch": 0.33012392315842815, "grad_norm": 0.8431186709355069, "learning_rate": 4.001933736605531e-06, "loss": 0.1577, "step": 3583 }, { "epoch": 0.33021605933569814, "grad_norm": 0.9288861117347793, "learning_rate": 4.001324682944297e-06, "loss": 0.1623, "step": 3584 }, { "epoch": 0.3303081955129682, "grad_norm": 0.9538522526825776, "learning_rate": 4.000715489884906e-06, "loss": 0.1561, "step": 3585 }, { "epoch": 0.3304003316902382, "grad_norm": 0.8871395638894523, "learning_rate": 4.000106157483919e-06, "loss": 0.1475, "step": 3586 }, { "epoch": 0.3304924678675082, "grad_norm": 0.9324362611728906, "learning_rate": 3.999496685797914e-06, "loss": 0.1643, "step": 3587 }, { "epoch": 0.3305846040447782, "grad_norm": 0.9712778225713625, "learning_rate": 3.998887074883481e-06, "loss": 0.16, "step": 3588 }, { "epoch": 0.3306767402220482, "grad_norm": 0.9202172732232993, "learning_rate": 3.9982773247972204e-06, "loss": 0.1718, "step": 3589 }, { "epoch": 0.3307688763993182, "grad_norm": 0.9304776376651178, "learning_rate": 3.99766743559575e-06, "loss": 0.1617, "step": 3590 }, { "epoch": 0.3308610125765882, "grad_norm": 0.8970303287240191, "learning_rate": 3.997057407335697e-06, "loss": 0.1537, "step": 3591 }, { "epoch": 0.33095314875385823, "grad_norm": 0.9275835584300335, "learning_rate": 3.996447240073702e-06, "loss": 0.1617, "step": 3592 }, { "epoch": 0.3310452849311282, "grad_norm": 0.917024390620923, "learning_rate": 3.995836933866421e-06, "loss": 0.1493, "step": 3593 }, { "epoch": 0.3311374211083982, "grad_norm": 0.8871456134644057, "learning_rate": 3.995226488770519e-06, "loss": 0.1603, "step": 3594 }, { "epoch": 0.3312295572856682, "grad_norm": 0.9237380887364842, "learning_rate": 3.994615904842676e-06, "loss": 0.1612, "step": 3595 }, { "epoch": 0.3313216934629382, "grad_norm": 0.990513202871709, "learning_rate": 3.994005182139586e-06, "loss": 0.1813, "step": 3596 }, { "epoch": 0.3314138296402082, "grad_norm": 0.8947980698760637, "learning_rate": 3.993394320717952e-06, "loss": 0.1597, "step": 3597 }, { "epoch": 0.3315059658174782, "grad_norm": 0.8966130531365468, "learning_rate": 3.992783320634498e-06, "loss": 0.1464, "step": 3598 }, { "epoch": 0.33159810199474826, "grad_norm": 0.9026903822003577, "learning_rate": 3.992172181945951e-06, "loss": 0.1658, "step": 3599 }, { "epoch": 0.33169023817201826, "grad_norm": 0.9037038636134237, "learning_rate": 3.991560904709055e-06, "loss": 0.166, "step": 3600 }, { "epoch": 0.33178237434928826, "grad_norm": 0.9647233382895593, "learning_rate": 3.990949488980569e-06, "loss": 0.161, "step": 3601 }, { "epoch": 0.33187451052655825, "grad_norm": 0.9559015439299133, "learning_rate": 3.990337934817263e-06, "loss": 0.1685, "step": 3602 }, { "epoch": 0.33196664670382825, "grad_norm": 0.8896214610619697, "learning_rate": 3.989726242275919e-06, "loss": 0.1489, "step": 3603 }, { "epoch": 0.33205878288109825, "grad_norm": 0.9415012706281953, "learning_rate": 3.989114411413333e-06, "loss": 0.1636, "step": 3604 }, { "epoch": 0.33215091905836824, "grad_norm": 0.9134402537640711, "learning_rate": 3.988502442286314e-06, "loss": 0.1496, "step": 3605 }, { "epoch": 0.3322430552356383, "grad_norm": 0.8943155472774135, "learning_rate": 3.987890334951683e-06, "loss": 0.1521, "step": 3606 }, { "epoch": 0.3323351914129083, "grad_norm": 0.9865044679709729, "learning_rate": 3.987278089466274e-06, "loss": 0.1754, "step": 3607 }, { "epoch": 0.3324273275901783, "grad_norm": 0.9439032580162637, "learning_rate": 3.986665705886934e-06, "loss": 0.1557, "step": 3608 }, { "epoch": 0.3325194637674483, "grad_norm": 0.8531927782870514, "learning_rate": 3.986053184270524e-06, "loss": 0.1541, "step": 3609 }, { "epoch": 0.3326115999447183, "grad_norm": 0.9242320549076447, "learning_rate": 3.9854405246739155e-06, "loss": 0.1632, "step": 3610 }, { "epoch": 0.3327037361219883, "grad_norm": 0.9688725514867598, "learning_rate": 3.984827727153995e-06, "loss": 0.1606, "step": 3611 }, { "epoch": 0.3327958722992583, "grad_norm": 0.9171240245963733, "learning_rate": 3.984214791767659e-06, "loss": 0.1548, "step": 3612 }, { "epoch": 0.33288800847652833, "grad_norm": 0.9001926653232694, "learning_rate": 3.983601718571821e-06, "loss": 0.1577, "step": 3613 }, { "epoch": 0.3329801446537983, "grad_norm": 0.8394494549911041, "learning_rate": 3.982988507623403e-06, "loss": 0.1532, "step": 3614 }, { "epoch": 0.3330722808310683, "grad_norm": 0.8980142878215236, "learning_rate": 3.982375158979344e-06, "loss": 0.1573, "step": 3615 }, { "epoch": 0.3331644170083383, "grad_norm": 0.883954929348229, "learning_rate": 3.98176167269659e-06, "loss": 0.1479, "step": 3616 }, { "epoch": 0.3332565531856083, "grad_norm": 0.9567135747589431, "learning_rate": 3.981148048832106e-06, "loss": 0.1608, "step": 3617 }, { "epoch": 0.3333486893628783, "grad_norm": 0.8848169559034494, "learning_rate": 3.980534287442866e-06, "loss": 0.1521, "step": 3618 }, { "epoch": 0.33344082554014837, "grad_norm": 0.9362944149112743, "learning_rate": 3.9799203885858584e-06, "loss": 0.1519, "step": 3619 }, { "epoch": 0.33353296171741836, "grad_norm": 0.8514694979616128, "learning_rate": 3.979306352318083e-06, "loss": 0.1425, "step": 3620 }, { "epoch": 0.33362509789468836, "grad_norm": 0.8970641312249314, "learning_rate": 3.978692178696555e-06, "loss": 0.1494, "step": 3621 }, { "epoch": 0.33371723407195836, "grad_norm": 0.9766601140568355, "learning_rate": 3.9780778677782974e-06, "loss": 0.1709, "step": 3622 }, { "epoch": 0.33380937024922835, "grad_norm": 0.9966149520552534, "learning_rate": 3.977463419620352e-06, "loss": 0.1779, "step": 3623 }, { "epoch": 0.33390150642649835, "grad_norm": 0.9568555752316749, "learning_rate": 3.976848834279767e-06, "loss": 0.1615, "step": 3624 }, { "epoch": 0.33399364260376835, "grad_norm": 0.919492432719039, "learning_rate": 3.976234111813611e-06, "loss": 0.1568, "step": 3625 }, { "epoch": 0.3340857787810384, "grad_norm": 0.9378481548085038, "learning_rate": 3.975619252278958e-06, "loss": 0.1648, "step": 3626 }, { "epoch": 0.3341779149583084, "grad_norm": 0.9358170612115819, "learning_rate": 3.9750042557328986e-06, "loss": 0.1494, "step": 3627 }, { "epoch": 0.3342700511355784, "grad_norm": 0.892577374929836, "learning_rate": 3.974389122232536e-06, "loss": 0.1505, "step": 3628 }, { "epoch": 0.3343621873128484, "grad_norm": 0.8907012935380049, "learning_rate": 3.973773851834983e-06, "loss": 0.1441, "step": 3629 }, { "epoch": 0.3344543234901184, "grad_norm": 0.9104396913688365, "learning_rate": 3.973158444597371e-06, "loss": 0.1566, "step": 3630 }, { "epoch": 0.3345464596673884, "grad_norm": 1.0018321492907019, "learning_rate": 3.972542900576838e-06, "loss": 0.1642, "step": 3631 }, { "epoch": 0.3346385958446584, "grad_norm": 1.0043463523252036, "learning_rate": 3.9719272198305385e-06, "loss": 0.1546, "step": 3632 }, { "epoch": 0.33473073202192843, "grad_norm": 0.9422176961049771, "learning_rate": 3.971311402415638e-06, "loss": 0.154, "step": 3633 }, { "epoch": 0.33482286819919843, "grad_norm": 0.9556246249227124, "learning_rate": 3.970695448389315e-06, "loss": 0.1566, "step": 3634 }, { "epoch": 0.33491500437646843, "grad_norm": 0.9922729925416031, "learning_rate": 3.970079357808763e-06, "loss": 0.1522, "step": 3635 }, { "epoch": 0.3350071405537384, "grad_norm": 0.9364461746995512, "learning_rate": 3.969463130731183e-06, "loss": 0.1658, "step": 3636 }, { "epoch": 0.3350992767310084, "grad_norm": 0.9077754495843189, "learning_rate": 3.968846767213794e-06, "loss": 0.1638, "step": 3637 }, { "epoch": 0.3351914129082784, "grad_norm": 0.9642592983646728, "learning_rate": 3.968230267313824e-06, "loss": 0.1607, "step": 3638 }, { "epoch": 0.3352835490855484, "grad_norm": 0.9523862140574059, "learning_rate": 3.967613631088516e-06, "loss": 0.1602, "step": 3639 }, { "epoch": 0.33537568526281847, "grad_norm": 0.853738339970035, "learning_rate": 3.966996858595123e-06, "loss": 0.1621, "step": 3640 }, { "epoch": 0.33546782144008847, "grad_norm": 0.9435129135901393, "learning_rate": 3.966379949890916e-06, "loss": 0.1479, "step": 3641 }, { "epoch": 0.33555995761735846, "grad_norm": 1.0237409152349508, "learning_rate": 3.965762905033171e-06, "loss": 0.1442, "step": 3642 }, { "epoch": 0.33565209379462846, "grad_norm": 1.004827274729739, "learning_rate": 3.965145724079184e-06, "loss": 0.1727, "step": 3643 }, { "epoch": 0.33574422997189846, "grad_norm": 0.9012970937284178, "learning_rate": 3.964528407086259e-06, "loss": 0.1582, "step": 3644 }, { "epoch": 0.33583636614916845, "grad_norm": 0.9084801893441767, "learning_rate": 3.963910954111712e-06, "loss": 0.1427, "step": 3645 }, { "epoch": 0.33592850232643845, "grad_norm": 1.0458487380125516, "learning_rate": 3.9632933652128765e-06, "loss": 0.1634, "step": 3646 }, { "epoch": 0.3360206385037085, "grad_norm": 0.9627427187542494, "learning_rate": 3.962675640447094e-06, "loss": 0.1768, "step": 3647 }, { "epoch": 0.3361127746809785, "grad_norm": 0.942436602646475, "learning_rate": 3.962057779871722e-06, "loss": 0.169, "step": 3648 }, { "epoch": 0.3362049108582485, "grad_norm": 1.0233983022133986, "learning_rate": 3.961439783544126e-06, "loss": 0.1548, "step": 3649 }, { "epoch": 0.3362970470355185, "grad_norm": 0.8673192470735941, "learning_rate": 3.960821651521691e-06, "loss": 0.1531, "step": 3650 }, { "epoch": 0.3363891832127885, "grad_norm": 0.9154833282149483, "learning_rate": 3.960203383861807e-06, "loss": 0.1662, "step": 3651 }, { "epoch": 0.3364813193900585, "grad_norm": 0.9145509274938834, "learning_rate": 3.959584980621883e-06, "loss": 0.1519, "step": 3652 }, { "epoch": 0.33657345556732854, "grad_norm": 0.9274327627736129, "learning_rate": 3.958966441859335e-06, "loss": 0.161, "step": 3653 }, { "epoch": 0.33666559174459854, "grad_norm": 0.93938164864867, "learning_rate": 3.958347767631595e-06, "loss": 0.1478, "step": 3654 }, { "epoch": 0.33675772792186853, "grad_norm": 1.0533654788708426, "learning_rate": 3.95772895799611e-06, "loss": 0.1577, "step": 3655 }, { "epoch": 0.33684986409913853, "grad_norm": 0.8832765620148048, "learning_rate": 3.957110013010333e-06, "loss": 0.1574, "step": 3656 }, { "epoch": 0.3369420002764085, "grad_norm": 0.9261190045148706, "learning_rate": 3.9564909327317355e-06, "loss": 0.1531, "step": 3657 }, { "epoch": 0.3370341364536785, "grad_norm": 0.9237710676896893, "learning_rate": 3.955871717217797e-06, "loss": 0.1433, "step": 3658 }, { "epoch": 0.3371262726309485, "grad_norm": 0.9487715568740481, "learning_rate": 3.955252366526014e-06, "loss": 0.1626, "step": 3659 }, { "epoch": 0.3372184088082186, "grad_norm": 1.0072562563839196, "learning_rate": 3.954632880713891e-06, "loss": 0.1569, "step": 3660 }, { "epoch": 0.33731054498548857, "grad_norm": 0.9446053903813388, "learning_rate": 3.954013259838949e-06, "loss": 0.1612, "step": 3661 }, { "epoch": 0.33740268116275857, "grad_norm": 0.8729387006885364, "learning_rate": 3.95339350395872e-06, "loss": 0.1407, "step": 3662 }, { "epoch": 0.33749481734002856, "grad_norm": 0.9221098538932074, "learning_rate": 3.952773613130747e-06, "loss": 0.177, "step": 3663 }, { "epoch": 0.33758695351729856, "grad_norm": 0.86712001263126, "learning_rate": 3.9521535874125875e-06, "loss": 0.1529, "step": 3664 }, { "epoch": 0.33767908969456856, "grad_norm": 0.8922847012391321, "learning_rate": 3.951533426861812e-06, "loss": 0.1519, "step": 3665 }, { "epoch": 0.33777122587183855, "grad_norm": 0.940770601309715, "learning_rate": 3.950913131536001e-06, "loss": 0.1431, "step": 3666 }, { "epoch": 0.3378633620491086, "grad_norm": 0.9514263978222713, "learning_rate": 3.950292701492749e-06, "loss": 0.1693, "step": 3667 }, { "epoch": 0.3379554982263786, "grad_norm": 0.8291431585281795, "learning_rate": 3.949672136789665e-06, "loss": 0.138, "step": 3668 }, { "epoch": 0.3380476344036486, "grad_norm": 0.9627031886750439, "learning_rate": 3.949051437484367e-06, "loss": 0.1486, "step": 3669 }, { "epoch": 0.3381397705809186, "grad_norm": 0.9203209058165884, "learning_rate": 3.948430603634486e-06, "loss": 0.1483, "step": 3670 }, { "epoch": 0.3382319067581886, "grad_norm": 0.8850216723638966, "learning_rate": 3.947809635297668e-06, "loss": 0.1517, "step": 3671 }, { "epoch": 0.3383240429354586, "grad_norm": 0.9675759754223425, "learning_rate": 3.9471885325315695e-06, "loss": 0.1727, "step": 3672 }, { "epoch": 0.3384161791127286, "grad_norm": 0.9573188172538333, "learning_rate": 3.94656729539386e-06, "loss": 0.1555, "step": 3673 }, { "epoch": 0.33850831528999864, "grad_norm": 0.9600208385443318, "learning_rate": 3.945945923942221e-06, "loss": 0.1635, "step": 3674 }, { "epoch": 0.33860045146726864, "grad_norm": 0.9380222087962892, "learning_rate": 3.945324418234349e-06, "loss": 0.1637, "step": 3675 }, { "epoch": 0.33869258764453863, "grad_norm": 0.9400532586305578, "learning_rate": 3.944702778327948e-06, "loss": 0.1479, "step": 3676 }, { "epoch": 0.33878472382180863, "grad_norm": 0.8849537883410138, "learning_rate": 3.944081004280738e-06, "loss": 0.1577, "step": 3677 }, { "epoch": 0.33887685999907863, "grad_norm": 0.893123818162334, "learning_rate": 3.943459096150452e-06, "loss": 0.1505, "step": 3678 }, { "epoch": 0.3389689961763486, "grad_norm": 0.902068247100491, "learning_rate": 3.942837053994834e-06, "loss": 0.1561, "step": 3679 }, { "epoch": 0.3390611323536186, "grad_norm": 0.9182592324604583, "learning_rate": 3.942214877871639e-06, "loss": 0.1631, "step": 3680 }, { "epoch": 0.3391532685308887, "grad_norm": 0.9270539702602028, "learning_rate": 3.941592567838638e-06, "loss": 0.1599, "step": 3681 }, { "epoch": 0.33924540470815867, "grad_norm": 0.869777337642264, "learning_rate": 3.940970123953613e-06, "loss": 0.1481, "step": 3682 }, { "epoch": 0.33933754088542867, "grad_norm": 0.86276183059958, "learning_rate": 3.940347546274355e-06, "loss": 0.156, "step": 3683 }, { "epoch": 0.33942967706269866, "grad_norm": 0.9137393832409828, "learning_rate": 3.9397248348586735e-06, "loss": 0.1548, "step": 3684 }, { "epoch": 0.33952181323996866, "grad_norm": 0.8612576797475836, "learning_rate": 3.939101989764386e-06, "loss": 0.1437, "step": 3685 }, { "epoch": 0.33961394941723866, "grad_norm": 0.8495987603787247, "learning_rate": 3.938479011049324e-06, "loss": 0.1368, "step": 3686 }, { "epoch": 0.3397060855945087, "grad_norm": 0.9270752934332105, "learning_rate": 3.937855898771331e-06, "loss": 0.1525, "step": 3687 }, { "epoch": 0.3397982217717787, "grad_norm": 0.9485150781196504, "learning_rate": 3.9372326529882635e-06, "loss": 0.172, "step": 3688 }, { "epoch": 0.3398903579490487, "grad_norm": 0.9116673543666898, "learning_rate": 3.936609273757988e-06, "loss": 0.1527, "step": 3689 }, { "epoch": 0.3399824941263187, "grad_norm": 0.9229394432273127, "learning_rate": 3.935985761138388e-06, "loss": 0.1564, "step": 3690 }, { "epoch": 0.3400746303035887, "grad_norm": 0.8605203140126699, "learning_rate": 3.935362115187356e-06, "loss": 0.1446, "step": 3691 }, { "epoch": 0.3401667664808587, "grad_norm": 0.9058126161505262, "learning_rate": 3.934738335962796e-06, "loss": 0.1689, "step": 3692 }, { "epoch": 0.3402589026581287, "grad_norm": 0.8737393124424059, "learning_rate": 3.934114423522627e-06, "loss": 0.1464, "step": 3693 }, { "epoch": 0.34035103883539874, "grad_norm": 0.9203947578164606, "learning_rate": 3.93349037792478e-06, "loss": 0.1603, "step": 3694 }, { "epoch": 0.34044317501266874, "grad_norm": 0.871739564957307, "learning_rate": 3.932866199227196e-06, "loss": 0.1527, "step": 3695 }, { "epoch": 0.34053531118993874, "grad_norm": 0.8450165346429883, "learning_rate": 3.932241887487834e-06, "loss": 0.1608, "step": 3696 }, { "epoch": 0.34062744736720874, "grad_norm": 0.9199114792500082, "learning_rate": 3.931617442764656e-06, "loss": 0.1627, "step": 3697 }, { "epoch": 0.34071958354447873, "grad_norm": 0.9050247845325442, "learning_rate": 3.930992865115645e-06, "loss": 0.1604, "step": 3698 }, { "epoch": 0.34081171972174873, "grad_norm": 0.9139956710219851, "learning_rate": 3.930368154598793e-06, "loss": 0.1572, "step": 3699 }, { "epoch": 0.3409038558990187, "grad_norm": 0.8631013927561422, "learning_rate": 3.929743311272104e-06, "loss": 0.1598, "step": 3700 }, { "epoch": 0.3409959920762888, "grad_norm": 0.8943273656977337, "learning_rate": 3.929118335193594e-06, "loss": 0.1538, "step": 3701 }, { "epoch": 0.3410881282535588, "grad_norm": 0.8850875675008057, "learning_rate": 3.9284932264212925e-06, "loss": 0.1595, "step": 3702 }, { "epoch": 0.3411802644308288, "grad_norm": 0.9126308299525047, "learning_rate": 3.927867985013242e-06, "loss": 0.1596, "step": 3703 }, { "epoch": 0.34127240060809877, "grad_norm": 0.9408328545751462, "learning_rate": 3.9272426110274955e-06, "loss": 0.1655, "step": 3704 }, { "epoch": 0.34136453678536877, "grad_norm": 0.9617011692840643, "learning_rate": 3.926617104522118e-06, "loss": 0.1766, "step": 3705 }, { "epoch": 0.34145667296263876, "grad_norm": 0.9237258704007577, "learning_rate": 3.92599146555519e-06, "loss": 0.1627, "step": 3706 }, { "epoch": 0.34154880913990876, "grad_norm": 0.9690704139681786, "learning_rate": 3.9253656941848e-06, "loss": 0.1639, "step": 3707 }, { "epoch": 0.3416409453171788, "grad_norm": 0.9982076700789548, "learning_rate": 3.9247397904690526e-06, "loss": 0.1669, "step": 3708 }, { "epoch": 0.3417330814944488, "grad_norm": 0.9471486226369275, "learning_rate": 3.924113754466062e-06, "loss": 0.1528, "step": 3709 }, { "epoch": 0.3418252176717188, "grad_norm": 0.9104829825095512, "learning_rate": 3.923487586233956e-06, "loss": 0.153, "step": 3710 }, { "epoch": 0.3419173538489888, "grad_norm": 0.956036911754247, "learning_rate": 3.922861285830874e-06, "loss": 0.1599, "step": 3711 }, { "epoch": 0.3420094900262588, "grad_norm": 0.8919804779273773, "learning_rate": 3.922234853314969e-06, "loss": 0.1563, "step": 3712 }, { "epoch": 0.3421016262035288, "grad_norm": 0.8444794902726142, "learning_rate": 3.921608288744405e-06, "loss": 0.1571, "step": 3713 }, { "epoch": 0.3421937623807988, "grad_norm": 0.8359544466312437, "learning_rate": 3.920981592177358e-06, "loss": 0.1327, "step": 3714 }, { "epoch": 0.34228589855806885, "grad_norm": 0.9597616019869197, "learning_rate": 3.920354763672017e-06, "loss": 0.1589, "step": 3715 }, { "epoch": 0.34237803473533884, "grad_norm": 0.838642030459525, "learning_rate": 3.9197278032865835e-06, "loss": 0.146, "step": 3716 }, { "epoch": 0.34247017091260884, "grad_norm": 0.9531398921954629, "learning_rate": 3.919100711079271e-06, "loss": 0.1544, "step": 3717 }, { "epoch": 0.34256230708987884, "grad_norm": 0.9489530931832708, "learning_rate": 3.918473487108305e-06, "loss": 0.1521, "step": 3718 }, { "epoch": 0.34265444326714883, "grad_norm": 0.9305730738295115, "learning_rate": 3.917846131431923e-06, "loss": 0.1532, "step": 3719 }, { "epoch": 0.34274657944441883, "grad_norm": 0.9741820408835091, "learning_rate": 3.917218644108375e-06, "loss": 0.1548, "step": 3720 }, { "epoch": 0.3428387156216889, "grad_norm": 1.0039496017353446, "learning_rate": 3.916591025195923e-06, "loss": 0.1618, "step": 3721 }, { "epoch": 0.3429308517989589, "grad_norm": 0.975277791866181, "learning_rate": 3.915963274752842e-06, "loss": 0.1598, "step": 3722 }, { "epoch": 0.3430229879762289, "grad_norm": 0.9510130783589238, "learning_rate": 3.915335392837418e-06, "loss": 0.1608, "step": 3723 }, { "epoch": 0.3431151241534989, "grad_norm": 0.9307546556982514, "learning_rate": 3.914707379507952e-06, "loss": 0.1429, "step": 3724 }, { "epoch": 0.34320726033076887, "grad_norm": 0.9390570411218914, "learning_rate": 3.914079234822752e-06, "loss": 0.1562, "step": 3725 }, { "epoch": 0.34329939650803887, "grad_norm": 0.8986706044646279, "learning_rate": 3.913450958840144e-06, "loss": 0.1487, "step": 3726 }, { "epoch": 0.34339153268530886, "grad_norm": 0.9437748108420285, "learning_rate": 3.912822551618461e-06, "loss": 0.1618, "step": 3727 }, { "epoch": 0.3434836688625789, "grad_norm": 0.8769176358148374, "learning_rate": 3.912194013216053e-06, "loss": 0.1582, "step": 3728 }, { "epoch": 0.3435758050398489, "grad_norm": 0.8809803011658778, "learning_rate": 3.911565343691279e-06, "loss": 0.1441, "step": 3729 }, { "epoch": 0.3436679412171189, "grad_norm": 1.0717667235658699, "learning_rate": 3.910936543102511e-06, "loss": 0.1624, "step": 3730 }, { "epoch": 0.3437600773943889, "grad_norm": 0.8995225605847589, "learning_rate": 3.910307611508133e-06, "loss": 0.1491, "step": 3731 }, { "epoch": 0.3438522135716589, "grad_norm": 0.9128137084403737, "learning_rate": 3.9096785489665405e-06, "loss": 0.146, "step": 3732 }, { "epoch": 0.3439443497489289, "grad_norm": 0.9476732140767965, "learning_rate": 3.9090493555361445e-06, "loss": 0.1623, "step": 3733 }, { "epoch": 0.3440364859261989, "grad_norm": 0.8873546249523853, "learning_rate": 3.908420031275363e-06, "loss": 0.1448, "step": 3734 }, { "epoch": 0.34412862210346895, "grad_norm": 0.9694870317753858, "learning_rate": 3.907790576242631e-06, "loss": 0.159, "step": 3735 }, { "epoch": 0.34422075828073895, "grad_norm": 0.8987721407984873, "learning_rate": 3.907160990496392e-06, "loss": 0.1518, "step": 3736 }, { "epoch": 0.34431289445800894, "grad_norm": 0.8835955419714732, "learning_rate": 3.9065312740951035e-06, "loss": 0.1551, "step": 3737 }, { "epoch": 0.34440503063527894, "grad_norm": 0.9036042800205629, "learning_rate": 3.905901427097235e-06, "loss": 0.1389, "step": 3738 }, { "epoch": 0.34449716681254894, "grad_norm": 0.9288485278514401, "learning_rate": 3.9052714495612675e-06, "loss": 0.1479, "step": 3739 }, { "epoch": 0.34458930298981894, "grad_norm": 0.832981619207698, "learning_rate": 3.904641341545694e-06, "loss": 0.143, "step": 3740 }, { "epoch": 0.34468143916708893, "grad_norm": 0.8967549193356065, "learning_rate": 3.904011103109022e-06, "loss": 0.1571, "step": 3741 }, { "epoch": 0.344773575344359, "grad_norm": 1.0107971066325179, "learning_rate": 3.903380734309767e-06, "loss": 0.1667, "step": 3742 }, { "epoch": 0.344865711521629, "grad_norm": 0.9379380617255512, "learning_rate": 3.90275023520646e-06, "loss": 0.1539, "step": 3743 }, { "epoch": 0.344957847698899, "grad_norm": 0.8811406590696291, "learning_rate": 3.902119605857644e-06, "loss": 0.1546, "step": 3744 }, { "epoch": 0.345049983876169, "grad_norm": 0.8930279758698314, "learning_rate": 3.90148884632187e-06, "loss": 0.135, "step": 3745 }, { "epoch": 0.34514212005343897, "grad_norm": 0.9641434925117012, "learning_rate": 3.900857956657707e-06, "loss": 0.1506, "step": 3746 }, { "epoch": 0.34523425623070897, "grad_norm": 0.9518783618290042, "learning_rate": 3.900226936923731e-06, "loss": 0.1406, "step": 3747 }, { "epoch": 0.34532639240797897, "grad_norm": 0.9648681621137352, "learning_rate": 3.899595787178534e-06, "loss": 0.1551, "step": 3748 }, { "epoch": 0.345418528585249, "grad_norm": 0.92389069120726, "learning_rate": 3.898964507480717e-06, "loss": 0.147, "step": 3749 }, { "epoch": 0.345510664762519, "grad_norm": 0.9264829787786033, "learning_rate": 3.8983330978888955e-06, "loss": 0.1523, "step": 3750 }, { "epoch": 0.345602800939789, "grad_norm": 0.9856484497779876, "learning_rate": 3.897701558461695e-06, "loss": 0.1709, "step": 3751 }, { "epoch": 0.345694937117059, "grad_norm": 0.9104132120981927, "learning_rate": 3.897069889257754e-06, "loss": 0.1615, "step": 3752 }, { "epoch": 0.345787073294329, "grad_norm": 0.8928954674177243, "learning_rate": 3.8964380903357244e-06, "loss": 0.1491, "step": 3753 }, { "epoch": 0.345879209471599, "grad_norm": 0.9773042752590833, "learning_rate": 3.895806161754267e-06, "loss": 0.1555, "step": 3754 }, { "epoch": 0.34597134564886906, "grad_norm": 0.9934249525249464, "learning_rate": 3.895174103572057e-06, "loss": 0.1739, "step": 3755 }, { "epoch": 0.34606348182613905, "grad_norm": 0.9725688234311249, "learning_rate": 3.894541915847783e-06, "loss": 0.1661, "step": 3756 }, { "epoch": 0.34615561800340905, "grad_norm": 0.9447443585635159, "learning_rate": 3.89390959864014e-06, "loss": 0.159, "step": 3757 }, { "epoch": 0.34624775418067905, "grad_norm": 0.8845525119997683, "learning_rate": 3.893277152007842e-06, "loss": 0.1549, "step": 3758 }, { "epoch": 0.34633989035794904, "grad_norm": 0.8523390340352258, "learning_rate": 3.89264457600961e-06, "loss": 0.1441, "step": 3759 }, { "epoch": 0.34643202653521904, "grad_norm": 0.9244438703386956, "learning_rate": 3.892011870704179e-06, "loss": 0.1581, "step": 3760 }, { "epoch": 0.34652416271248904, "grad_norm": 0.9038773135362822, "learning_rate": 3.891379036150297e-06, "loss": 0.1408, "step": 3761 }, { "epoch": 0.3466162988897591, "grad_norm": 0.8333273764873563, "learning_rate": 3.89074607240672e-06, "loss": 0.1503, "step": 3762 }, { "epoch": 0.3467084350670291, "grad_norm": 0.9480337430008207, "learning_rate": 3.890112979532222e-06, "loss": 0.1541, "step": 3763 }, { "epoch": 0.3468005712442991, "grad_norm": 0.9599410814646354, "learning_rate": 3.889479757585584e-06, "loss": 0.1665, "step": 3764 }, { "epoch": 0.3468927074215691, "grad_norm": 0.9433260473515176, "learning_rate": 3.888846406625601e-06, "loss": 0.1454, "step": 3765 }, { "epoch": 0.3469848435988391, "grad_norm": 0.9380561623033904, "learning_rate": 3.888212926711079e-06, "loss": 0.1573, "step": 3766 }, { "epoch": 0.3470769797761091, "grad_norm": 1.0103890768413186, "learning_rate": 3.887579317900838e-06, "loss": 0.1512, "step": 3767 }, { "epoch": 0.34716911595337907, "grad_norm": 0.9545274750885263, "learning_rate": 3.886945580253708e-06, "loss": 0.1507, "step": 3768 }, { "epoch": 0.3472612521306491, "grad_norm": 0.8545779025060927, "learning_rate": 3.886311713828531e-06, "loss": 0.134, "step": 3769 }, { "epoch": 0.3473533883079191, "grad_norm": 0.9256233241238948, "learning_rate": 3.885677718684163e-06, "loss": 0.1652, "step": 3770 }, { "epoch": 0.3474455244851891, "grad_norm": 0.8881952279984489, "learning_rate": 3.885043594879469e-06, "loss": 0.1515, "step": 3771 }, { "epoch": 0.3475376606624591, "grad_norm": 0.9239248201411169, "learning_rate": 3.884409342473329e-06, "loss": 0.1722, "step": 3772 }, { "epoch": 0.3476297968397291, "grad_norm": 0.9022748249991301, "learning_rate": 3.883774961524632e-06, "loss": 0.1591, "step": 3773 }, { "epoch": 0.3477219330169991, "grad_norm": 0.9727583904335542, "learning_rate": 3.88314045209228e-06, "loss": 0.1586, "step": 3774 }, { "epoch": 0.3478140691942691, "grad_norm": 0.8946050022186862, "learning_rate": 3.8825058142351895e-06, "loss": 0.1474, "step": 3775 }, { "epoch": 0.34790620537153916, "grad_norm": 0.9516275063497269, "learning_rate": 3.881871048012285e-06, "loss": 0.1534, "step": 3776 }, { "epoch": 0.34799834154880915, "grad_norm": 0.9403615984819307, "learning_rate": 3.881236153482505e-06, "loss": 0.1684, "step": 3777 }, { "epoch": 0.34809047772607915, "grad_norm": 0.9788364338685362, "learning_rate": 3.880601130704799e-06, "loss": 0.1594, "step": 3778 }, { "epoch": 0.34818261390334915, "grad_norm": 0.8769684675894291, "learning_rate": 3.87996597973813e-06, "loss": 0.1494, "step": 3779 }, { "epoch": 0.34827475008061914, "grad_norm": 0.9241894076679832, "learning_rate": 3.879330700641471e-06, "loss": 0.1516, "step": 3780 }, { "epoch": 0.34836688625788914, "grad_norm": 1.010356419388737, "learning_rate": 3.878695293473809e-06, "loss": 0.1786, "step": 3781 }, { "epoch": 0.34845902243515914, "grad_norm": 0.8481571310124365, "learning_rate": 3.878059758294139e-06, "loss": 0.1566, "step": 3782 }, { "epoch": 0.3485511586124292, "grad_norm": 0.9237936486021854, "learning_rate": 3.877424095161473e-06, "loss": 0.1643, "step": 3783 }, { "epoch": 0.3486432947896992, "grad_norm": 0.9049124285799792, "learning_rate": 3.8767883041348305e-06, "loss": 0.1524, "step": 3784 }, { "epoch": 0.3487354309669692, "grad_norm": 0.8886044487962574, "learning_rate": 3.8761523852732475e-06, "loss": 0.1486, "step": 3785 }, { "epoch": 0.3488275671442392, "grad_norm": 0.8609799128855903, "learning_rate": 3.875516338635766e-06, "loss": 0.1456, "step": 3786 }, { "epoch": 0.3489197033215092, "grad_norm": 0.8265506676375848, "learning_rate": 3.874880164281446e-06, "loss": 0.1403, "step": 3787 }, { "epoch": 0.3490118394987792, "grad_norm": 0.8677006052855488, "learning_rate": 3.874243862269353e-06, "loss": 0.1505, "step": 3788 }, { "epoch": 0.3491039756760492, "grad_norm": 0.8970711140727137, "learning_rate": 3.87360743265857e-06, "loss": 0.1628, "step": 3789 }, { "epoch": 0.3491961118533192, "grad_norm": 1.0145950719528463, "learning_rate": 3.87297087550819e-06, "loss": 0.1624, "step": 3790 }, { "epoch": 0.3492882480305892, "grad_norm": 0.888008623820799, "learning_rate": 3.872334190877316e-06, "loss": 0.1459, "step": 3791 }, { "epoch": 0.3493803842078592, "grad_norm": 0.8672174025069913, "learning_rate": 3.8716973788250645e-06, "loss": 0.1444, "step": 3792 }, { "epoch": 0.3494725203851292, "grad_norm": 0.9186929663280052, "learning_rate": 3.871060439410563e-06, "loss": 0.1463, "step": 3793 }, { "epoch": 0.3495646565623992, "grad_norm": 0.8865568286318863, "learning_rate": 3.870423372692953e-06, "loss": 0.147, "step": 3794 }, { "epoch": 0.3496567927396692, "grad_norm": 0.8920318935417838, "learning_rate": 3.869786178731386e-06, "loss": 0.1471, "step": 3795 }, { "epoch": 0.34974892891693926, "grad_norm": 1.0001696409280874, "learning_rate": 3.869148857585024e-06, "loss": 0.1567, "step": 3796 }, { "epoch": 0.34984106509420926, "grad_norm": 0.9044038444981554, "learning_rate": 3.8685114093130436e-06, "loss": 0.1605, "step": 3797 }, { "epoch": 0.34993320127147926, "grad_norm": 0.9419109233515364, "learning_rate": 3.867873833974631e-06, "loss": 0.1628, "step": 3798 }, { "epoch": 0.35002533744874925, "grad_norm": 0.9098145882514128, "learning_rate": 3.867236131628985e-06, "loss": 0.147, "step": 3799 }, { "epoch": 0.35011747362601925, "grad_norm": 0.9675498143428517, "learning_rate": 3.8665983023353195e-06, "loss": 0.1613, "step": 3800 }, { "epoch": 0.35020960980328925, "grad_norm": 0.9152259025758308, "learning_rate": 3.865960346152853e-06, "loss": 0.1575, "step": 3801 }, { "epoch": 0.35030174598055924, "grad_norm": 0.9096318752456122, "learning_rate": 3.865322263140821e-06, "loss": 0.1603, "step": 3802 }, { "epoch": 0.3503938821578293, "grad_norm": 0.9042773380336094, "learning_rate": 3.86468405335847e-06, "loss": 0.1593, "step": 3803 }, { "epoch": 0.3504860183350993, "grad_norm": 0.8810883239456601, "learning_rate": 3.864045716865059e-06, "loss": 0.1564, "step": 3804 }, { "epoch": 0.3505781545123693, "grad_norm": 0.8969771666725758, "learning_rate": 3.863407253719855e-06, "loss": 0.1658, "step": 3805 }, { "epoch": 0.3506702906896393, "grad_norm": 0.8859714505584699, "learning_rate": 3.8627686639821415e-06, "loss": 0.1524, "step": 3806 }, { "epoch": 0.3507624268669093, "grad_norm": 0.9396107844353678, "learning_rate": 3.8621299477112105e-06, "loss": 0.1594, "step": 3807 }, { "epoch": 0.3508545630441793, "grad_norm": 0.9568604332583132, "learning_rate": 3.861491104966368e-06, "loss": 0.143, "step": 3808 }, { "epoch": 0.3509466992214493, "grad_norm": 0.9548184489453653, "learning_rate": 3.860852135806929e-06, "loss": 0.1612, "step": 3809 }, { "epoch": 0.35103883539871933, "grad_norm": 0.9162268999810167, "learning_rate": 3.860213040292224e-06, "loss": 0.1516, "step": 3810 }, { "epoch": 0.3511309715759893, "grad_norm": 0.965092172228159, "learning_rate": 3.85957381848159e-06, "loss": 0.1462, "step": 3811 }, { "epoch": 0.3512231077532593, "grad_norm": 0.9578818766366897, "learning_rate": 3.858934470434381e-06, "loss": 0.1489, "step": 3812 }, { "epoch": 0.3513152439305293, "grad_norm": 0.9123089429516482, "learning_rate": 3.858294996209961e-06, "loss": 0.1563, "step": 3813 }, { "epoch": 0.3514073801077993, "grad_norm": 0.9948072100858789, "learning_rate": 3.857655395867704e-06, "loss": 0.1571, "step": 3814 }, { "epoch": 0.3514995162850693, "grad_norm": 0.9216205578818107, "learning_rate": 3.857015669466998e-06, "loss": 0.157, "step": 3815 }, { "epoch": 0.3515916524623393, "grad_norm": 0.9864385412189361, "learning_rate": 3.856375817067241e-06, "loss": 0.1627, "step": 3816 }, { "epoch": 0.35168378863960936, "grad_norm": 0.8407221015580862, "learning_rate": 3.855735838727842e-06, "loss": 0.1417, "step": 3817 }, { "epoch": 0.35177592481687936, "grad_norm": 0.8902927685530272, "learning_rate": 3.855095734508225e-06, "loss": 0.1379, "step": 3818 }, { "epoch": 0.35186806099414936, "grad_norm": 0.9232807543583181, "learning_rate": 3.854455504467824e-06, "loss": 0.1494, "step": 3819 }, { "epoch": 0.35196019717141935, "grad_norm": 0.8747114191669552, "learning_rate": 3.853815148666084e-06, "loss": 0.1392, "step": 3820 }, { "epoch": 0.35205233334868935, "grad_norm": 0.9007112346102228, "learning_rate": 3.85317466716246e-06, "loss": 0.1459, "step": 3821 }, { "epoch": 0.35214446952595935, "grad_norm": 0.9703594906332523, "learning_rate": 3.852534060016424e-06, "loss": 0.1514, "step": 3822 }, { "epoch": 0.3522366057032294, "grad_norm": 0.9876709700858648, "learning_rate": 3.8518933272874546e-06, "loss": 0.1556, "step": 3823 }, { "epoch": 0.3523287418804994, "grad_norm": 0.9085359041715336, "learning_rate": 3.851252469035044e-06, "loss": 0.1647, "step": 3824 }, { "epoch": 0.3524208780577694, "grad_norm": 0.8843618900771493, "learning_rate": 3.850611485318696e-06, "loss": 0.1526, "step": 3825 }, { "epoch": 0.3525130142350394, "grad_norm": 0.9384375911161207, "learning_rate": 3.8499703761979276e-06, "loss": 0.1504, "step": 3826 }, { "epoch": 0.3526051504123094, "grad_norm": 0.8748805623679292, "learning_rate": 3.849329141732263e-06, "loss": 0.1539, "step": 3827 }, { "epoch": 0.3526972865895794, "grad_norm": 0.8627817633209965, "learning_rate": 3.848687781981243e-06, "loss": 0.1552, "step": 3828 }, { "epoch": 0.3527894227668494, "grad_norm": 0.9059527689927666, "learning_rate": 3.848046297004417e-06, "loss": 0.1447, "step": 3829 }, { "epoch": 0.35288155894411943, "grad_norm": 0.8805324891957399, "learning_rate": 3.847404686861348e-06, "loss": 0.1439, "step": 3830 }, { "epoch": 0.35297369512138943, "grad_norm": 0.9344084024801386, "learning_rate": 3.846762951611608e-06, "loss": 0.1552, "step": 3831 }, { "epoch": 0.3530658312986594, "grad_norm": 0.9363214449601064, "learning_rate": 3.846121091314783e-06, "loss": 0.1545, "step": 3832 }, { "epoch": 0.3531579674759294, "grad_norm": 0.8539977014512892, "learning_rate": 3.84547910603047e-06, "loss": 0.1405, "step": 3833 }, { "epoch": 0.3532501036531994, "grad_norm": 0.9995946565004439, "learning_rate": 3.8448369958182775e-06, "loss": 0.1757, "step": 3834 }, { "epoch": 0.3533422398304694, "grad_norm": 0.9308525526025416, "learning_rate": 3.844194760737825e-06, "loss": 0.163, "step": 3835 }, { "epoch": 0.3534343760077394, "grad_norm": 0.8951960585707758, "learning_rate": 3.843552400848744e-06, "loss": 0.1492, "step": 3836 }, { "epoch": 0.35352651218500947, "grad_norm": 0.8647013473773321, "learning_rate": 3.842909916210678e-06, "loss": 0.143, "step": 3837 }, { "epoch": 0.35361864836227946, "grad_norm": 0.9713686917245324, "learning_rate": 3.842267306883283e-06, "loss": 0.1589, "step": 3838 }, { "epoch": 0.35371078453954946, "grad_norm": 0.8925126993308895, "learning_rate": 3.8416245729262225e-06, "loss": 0.1415, "step": 3839 }, { "epoch": 0.35380292071681946, "grad_norm": 1.0428020798287154, "learning_rate": 3.840981714399177e-06, "loss": 0.1517, "step": 3840 }, { "epoch": 0.35389505689408945, "grad_norm": 0.9157111788231413, "learning_rate": 3.840338731361834e-06, "loss": 0.1494, "step": 3841 }, { "epoch": 0.35398719307135945, "grad_norm": 0.9316243217330057, "learning_rate": 3.839695623873896e-06, "loss": 0.1558, "step": 3842 }, { "epoch": 0.35407932924862945, "grad_norm": 0.9232650712470144, "learning_rate": 3.839052391995076e-06, "loss": 0.1542, "step": 3843 }, { "epoch": 0.3541714654258995, "grad_norm": 0.8754883534686446, "learning_rate": 3.8384090357850964e-06, "loss": 0.1544, "step": 3844 }, { "epoch": 0.3542636016031695, "grad_norm": 0.9115494168855587, "learning_rate": 3.837765555303694e-06, "loss": 0.1538, "step": 3845 }, { "epoch": 0.3543557377804395, "grad_norm": 0.8999621586897121, "learning_rate": 3.837121950610616e-06, "loss": 0.1479, "step": 3846 }, { "epoch": 0.3544478739577095, "grad_norm": 0.8276305518580209, "learning_rate": 3.8364782217656205e-06, "loss": 0.1324, "step": 3847 }, { "epoch": 0.3545400101349795, "grad_norm": 0.9024826007607681, "learning_rate": 3.835834368828479e-06, "loss": 0.1557, "step": 3848 }, { "epoch": 0.3546321463122495, "grad_norm": 0.9065730873524084, "learning_rate": 3.835190391858972e-06, "loss": 0.1607, "step": 3849 }, { "epoch": 0.3547242824895195, "grad_norm": 0.8661100603350929, "learning_rate": 3.834546290916893e-06, "loss": 0.1565, "step": 3850 }, { "epoch": 0.35481641866678953, "grad_norm": 0.8664911463390288, "learning_rate": 3.833902066062049e-06, "loss": 0.14, "step": 3851 }, { "epoch": 0.35490855484405953, "grad_norm": 0.9297572513132248, "learning_rate": 3.833257717354253e-06, "loss": 0.162, "step": 3852 }, { "epoch": 0.35500069102132953, "grad_norm": 0.9334632150840317, "learning_rate": 3.832613244853335e-06, "loss": 0.1549, "step": 3853 }, { "epoch": 0.3550928271985995, "grad_norm": 0.9117192881469401, "learning_rate": 3.831968648619133e-06, "loss": 0.1643, "step": 3854 }, { "epoch": 0.3551849633758695, "grad_norm": 0.8679690659313138, "learning_rate": 3.8313239287115e-06, "loss": 0.148, "step": 3855 }, { "epoch": 0.3552770995531395, "grad_norm": 0.9554686130719982, "learning_rate": 3.830679085190296e-06, "loss": 0.1585, "step": 3856 }, { "epoch": 0.35536923573040957, "grad_norm": 0.9674925680074199, "learning_rate": 3.830034118115396e-06, "loss": 0.1582, "step": 3857 }, { "epoch": 0.35546137190767957, "grad_norm": 0.8961305393810277, "learning_rate": 3.829389027546685e-06, "loss": 0.1437, "step": 3858 }, { "epoch": 0.35555350808494957, "grad_norm": 0.8747439392004103, "learning_rate": 3.828743813544059e-06, "loss": 0.1432, "step": 3859 }, { "epoch": 0.35564564426221956, "grad_norm": 0.9511837265747041, "learning_rate": 3.8280984761674286e-06, "loss": 0.1554, "step": 3860 }, { "epoch": 0.35573778043948956, "grad_norm": 0.9271027409618041, "learning_rate": 3.82745301547671e-06, "loss": 0.1604, "step": 3861 }, { "epoch": 0.35582991661675956, "grad_norm": 0.8952219446892987, "learning_rate": 3.8268074315318375e-06, "loss": 0.1585, "step": 3862 }, { "epoch": 0.35592205279402955, "grad_norm": 0.8927757851183054, "learning_rate": 3.826161724392751e-06, "loss": 0.142, "step": 3863 }, { "epoch": 0.3560141889712996, "grad_norm": 0.8424179319437527, "learning_rate": 3.8255158941194066e-06, "loss": 0.1413, "step": 3864 }, { "epoch": 0.3561063251485696, "grad_norm": 0.8740217468806033, "learning_rate": 3.824869940771768e-06, "loss": 0.1448, "step": 3865 }, { "epoch": 0.3561984613258396, "grad_norm": 0.8540961364919858, "learning_rate": 3.824223864409813e-06, "loss": 0.1508, "step": 3866 }, { "epoch": 0.3562905975031096, "grad_norm": 0.883177257480543, "learning_rate": 3.823577665093529e-06, "loss": 0.1583, "step": 3867 }, { "epoch": 0.3563827336803796, "grad_norm": 0.8970386523940295, "learning_rate": 3.822931342882918e-06, "loss": 0.154, "step": 3868 }, { "epoch": 0.3564748698576496, "grad_norm": 0.9148582364371604, "learning_rate": 3.822284897837989e-06, "loss": 0.1574, "step": 3869 }, { "epoch": 0.3565670060349196, "grad_norm": 0.9835888081454008, "learning_rate": 3.821638330018764e-06, "loss": 0.1613, "step": 3870 }, { "epoch": 0.35665914221218964, "grad_norm": 0.9487618778033893, "learning_rate": 3.820991639485279e-06, "loss": 0.15, "step": 3871 }, { "epoch": 0.35675127838945964, "grad_norm": 0.9581745160471137, "learning_rate": 3.820344826297577e-06, "loss": 0.1482, "step": 3872 }, { "epoch": 0.35684341456672963, "grad_norm": 0.9939597183184489, "learning_rate": 3.819697890515717e-06, "loss": 0.1741, "step": 3873 }, { "epoch": 0.35693555074399963, "grad_norm": 0.9341386337680994, "learning_rate": 3.819050832199766e-06, "loss": 0.1534, "step": 3874 }, { "epoch": 0.3570276869212696, "grad_norm": 0.9627433592601693, "learning_rate": 3.818403651409801e-06, "loss": 0.166, "step": 3875 }, { "epoch": 0.3571198230985396, "grad_norm": 0.8966016704437211, "learning_rate": 3.817756348205917e-06, "loss": 0.1534, "step": 3876 }, { "epoch": 0.3572119592758096, "grad_norm": 0.831950783650535, "learning_rate": 3.817108922648214e-06, "loss": 0.1479, "step": 3877 }, { "epoch": 0.3573040954530797, "grad_norm": 0.9242438801830429, "learning_rate": 3.816461374796805e-06, "loss": 0.1581, "step": 3878 }, { "epoch": 0.35739623163034967, "grad_norm": 0.9027636442769159, "learning_rate": 3.815813704711816e-06, "loss": 0.1535, "step": 3879 }, { "epoch": 0.35748836780761967, "grad_norm": 0.796082355005034, "learning_rate": 3.815165912453383e-06, "loss": 0.1377, "step": 3880 }, { "epoch": 0.35758050398488966, "grad_norm": 0.9768643954402325, "learning_rate": 3.814517998081654e-06, "loss": 0.1672, "step": 3881 }, { "epoch": 0.35767264016215966, "grad_norm": 0.924729896863386, "learning_rate": 3.8138699616567875e-06, "loss": 0.1619, "step": 3882 }, { "epoch": 0.35776477633942966, "grad_norm": 0.8615406672854165, "learning_rate": 3.8132218032389524e-06, "loss": 0.1438, "step": 3883 }, { "epoch": 0.35785691251669965, "grad_norm": 0.8715405718747457, "learning_rate": 3.812573522888332e-06, "loss": 0.1516, "step": 3884 }, { "epoch": 0.3579490486939697, "grad_norm": 0.9561787493207595, "learning_rate": 3.81192512066512e-06, "loss": 0.1721, "step": 3885 }, { "epoch": 0.3580411848712397, "grad_norm": 0.8752491679156089, "learning_rate": 3.811276596629518e-06, "loss": 0.1502, "step": 3886 }, { "epoch": 0.3581333210485097, "grad_norm": 0.9660899512247874, "learning_rate": 3.810627950841743e-06, "loss": 0.1586, "step": 3887 }, { "epoch": 0.3582254572257797, "grad_norm": 0.9448658827634945, "learning_rate": 3.8099791833620214e-06, "loss": 0.1533, "step": 3888 }, { "epoch": 0.3583175934030497, "grad_norm": 0.9004121487474659, "learning_rate": 3.8093302942505935e-06, "loss": 0.1585, "step": 3889 }, { "epoch": 0.3584097295803197, "grad_norm": 0.906632158147621, "learning_rate": 3.8086812835677044e-06, "loss": 0.1624, "step": 3890 }, { "epoch": 0.35850186575758974, "grad_norm": 0.9183195616043317, "learning_rate": 3.808032151373619e-06, "loss": 0.1618, "step": 3891 }, { "epoch": 0.35859400193485974, "grad_norm": 0.8553552558472457, "learning_rate": 3.807382897728607e-06, "loss": 0.1444, "step": 3892 }, { "epoch": 0.35868613811212974, "grad_norm": 0.902571763244677, "learning_rate": 3.8067335226929523e-06, "loss": 0.1554, "step": 3893 }, { "epoch": 0.35877827428939973, "grad_norm": 0.8831422504068266, "learning_rate": 3.8060840263269494e-06, "loss": 0.1553, "step": 3894 }, { "epoch": 0.35887041046666973, "grad_norm": 0.8695419644636682, "learning_rate": 3.8054344086909043e-06, "loss": 0.1539, "step": 3895 }, { "epoch": 0.35896254664393973, "grad_norm": 0.8634127120614579, "learning_rate": 3.804784669845133e-06, "loss": 0.1438, "step": 3896 }, { "epoch": 0.3590546828212097, "grad_norm": 0.8608209569312256, "learning_rate": 3.8041348098499655e-06, "loss": 0.1397, "step": 3897 }, { "epoch": 0.3591468189984798, "grad_norm": 0.9012937821304821, "learning_rate": 3.8034848287657403e-06, "loss": 0.153, "step": 3898 }, { "epoch": 0.3592389551757498, "grad_norm": 0.913635814588998, "learning_rate": 3.802834726652809e-06, "loss": 0.1583, "step": 3899 }, { "epoch": 0.35933109135301977, "grad_norm": 0.9246153282899159, "learning_rate": 3.802184503571532e-06, "loss": 0.1508, "step": 3900 }, { "epoch": 0.35942322753028977, "grad_norm": 0.9563799822615789, "learning_rate": 3.801534159582285e-06, "loss": 0.1728, "step": 3901 }, { "epoch": 0.35951536370755977, "grad_norm": 0.8893729611997498, "learning_rate": 3.80088369474545e-06, "loss": 0.1571, "step": 3902 }, { "epoch": 0.35960749988482976, "grad_norm": 0.8977935109197466, "learning_rate": 3.800233109121425e-06, "loss": 0.1395, "step": 3903 }, { "epoch": 0.35969963606209976, "grad_norm": 0.9174627801219775, "learning_rate": 3.7995824027706152e-06, "loss": 0.1475, "step": 3904 }, { "epoch": 0.3597917722393698, "grad_norm": 0.9695866259137063, "learning_rate": 3.7989315757534397e-06, "loss": 0.1549, "step": 3905 }, { "epoch": 0.3598839084166398, "grad_norm": 0.9840473979899322, "learning_rate": 3.7982806281303276e-06, "loss": 0.1672, "step": 3906 }, { "epoch": 0.3599760445939098, "grad_norm": 0.8725606551036027, "learning_rate": 3.797629559961719e-06, "loss": 0.1436, "step": 3907 }, { "epoch": 0.3600681807711798, "grad_norm": 0.8950596885234003, "learning_rate": 3.7969783713080665e-06, "loss": 0.1595, "step": 3908 }, { "epoch": 0.3601603169484498, "grad_norm": 0.8905525328504984, "learning_rate": 3.796327062229833e-06, "loss": 0.1572, "step": 3909 }, { "epoch": 0.3602524531257198, "grad_norm": 0.8923575257402443, "learning_rate": 3.7956756327874912e-06, "loss": 0.1623, "step": 3910 }, { "epoch": 0.3603445893029898, "grad_norm": 0.9554499080768074, "learning_rate": 3.7950240830415286e-06, "loss": 0.1594, "step": 3911 }, { "epoch": 0.36043672548025985, "grad_norm": 0.9192802900436988, "learning_rate": 3.79437241305244e-06, "loss": 0.157, "step": 3912 }, { "epoch": 0.36052886165752984, "grad_norm": 0.8913340818064148, "learning_rate": 3.7937206228807333e-06, "loss": 0.1491, "step": 3913 }, { "epoch": 0.36062099783479984, "grad_norm": 0.8862949193565367, "learning_rate": 3.793068712586928e-06, "loss": 0.1456, "step": 3914 }, { "epoch": 0.36071313401206984, "grad_norm": 0.9419844735143882, "learning_rate": 3.7924166822315535e-06, "loss": 0.1605, "step": 3915 }, { "epoch": 0.36080527018933983, "grad_norm": 0.9360333851589002, "learning_rate": 3.791764531875151e-06, "loss": 0.1638, "step": 3916 }, { "epoch": 0.36089740636660983, "grad_norm": 0.8885011416795211, "learning_rate": 3.7911122615782727e-06, "loss": 0.1557, "step": 3917 }, { "epoch": 0.3609895425438799, "grad_norm": 0.890939735479181, "learning_rate": 3.790459871401482e-06, "loss": 0.1624, "step": 3918 }, { "epoch": 0.3610816787211499, "grad_norm": 0.8438507583767467, "learning_rate": 3.7898073614053527e-06, "loss": 0.1455, "step": 3919 }, { "epoch": 0.3611738148984199, "grad_norm": 0.9064039983475835, "learning_rate": 3.7891547316504716e-06, "loss": 0.146, "step": 3920 }, { "epoch": 0.3612659510756899, "grad_norm": 0.8475645530494015, "learning_rate": 3.788501982197435e-06, "loss": 0.1392, "step": 3921 }, { "epoch": 0.36135808725295987, "grad_norm": 0.9057176590949076, "learning_rate": 3.787849113106851e-06, "loss": 0.159, "step": 3922 }, { "epoch": 0.36145022343022987, "grad_norm": 0.9600989104067679, "learning_rate": 3.787196124439337e-06, "loss": 0.1694, "step": 3923 }, { "epoch": 0.36154235960749986, "grad_norm": 0.9044900537237868, "learning_rate": 3.7865430162555255e-06, "loss": 0.1639, "step": 3924 }, { "epoch": 0.3616344957847699, "grad_norm": 0.8508738273375758, "learning_rate": 3.7858897886160562e-06, "loss": 0.1453, "step": 3925 }, { "epoch": 0.3617266319620399, "grad_norm": 0.9620587536792758, "learning_rate": 3.785236441581581e-06, "loss": 0.1674, "step": 3926 }, { "epoch": 0.3618187681393099, "grad_norm": 0.8942864413844239, "learning_rate": 3.784582975212765e-06, "loss": 0.1456, "step": 3927 }, { "epoch": 0.3619109043165799, "grad_norm": 0.840135666464938, "learning_rate": 3.783929389570281e-06, "loss": 0.1455, "step": 3928 }, { "epoch": 0.3620030404938499, "grad_norm": 0.9114590346869866, "learning_rate": 3.7832756847148146e-06, "loss": 0.1616, "step": 3929 }, { "epoch": 0.3620951766711199, "grad_norm": 0.983234958010798, "learning_rate": 3.782621860707063e-06, "loss": 0.1725, "step": 3930 }, { "epoch": 0.3621873128483899, "grad_norm": 0.8225486926457173, "learning_rate": 3.781967917607734e-06, "loss": 0.1351, "step": 3931 }, { "epoch": 0.36227944902565995, "grad_norm": 0.8813300984579409, "learning_rate": 3.7813138554775454e-06, "loss": 0.1477, "step": 3932 }, { "epoch": 0.36237158520292995, "grad_norm": 0.8821804198238675, "learning_rate": 3.780659674377227e-06, "loss": 0.1596, "step": 3933 }, { "epoch": 0.36246372138019994, "grad_norm": 0.8722528057807815, "learning_rate": 3.7800053743675213e-06, "loss": 0.1419, "step": 3934 }, { "epoch": 0.36255585755746994, "grad_norm": 0.9010612330891158, "learning_rate": 3.779350955509178e-06, "loss": 0.1565, "step": 3935 }, { "epoch": 0.36264799373473994, "grad_norm": 0.8503750455245771, "learning_rate": 3.7786964178629613e-06, "loss": 0.1398, "step": 3936 }, { "epoch": 0.36274012991200993, "grad_norm": 0.8822053261660605, "learning_rate": 3.7780417614896438e-06, "loss": 0.1573, "step": 3937 }, { "epoch": 0.36283226608927993, "grad_norm": 0.895341082099772, "learning_rate": 3.777386986450012e-06, "loss": 0.1549, "step": 3938 }, { "epoch": 0.36292440226655, "grad_norm": 0.8502920032613639, "learning_rate": 3.77673209280486e-06, "loss": 0.1537, "step": 3939 }, { "epoch": 0.36301653844382, "grad_norm": 0.904481467548156, "learning_rate": 3.776077080614997e-06, "loss": 0.14, "step": 3940 }, { "epoch": 0.36310867462109, "grad_norm": 0.850345613001194, "learning_rate": 3.7754219499412393e-06, "loss": 0.1416, "step": 3941 }, { "epoch": 0.36320081079836, "grad_norm": 0.896357927903997, "learning_rate": 3.7747667008444154e-06, "loss": 0.1442, "step": 3942 }, { "epoch": 0.36329294697562997, "grad_norm": 0.9476496642343656, "learning_rate": 3.7741113333853673e-06, "loss": 0.1625, "step": 3943 }, { "epoch": 0.36338508315289997, "grad_norm": 0.9442071990268597, "learning_rate": 3.773455847624944e-06, "loss": 0.1602, "step": 3944 }, { "epoch": 0.36347721933016996, "grad_norm": 0.8869444492762905, "learning_rate": 3.7728002436240086e-06, "loss": 0.1449, "step": 3945 }, { "epoch": 0.36356935550744, "grad_norm": 0.8691908645382854, "learning_rate": 3.772144521443434e-06, "loss": 0.1466, "step": 3946 }, { "epoch": 0.36366149168471, "grad_norm": 0.8722709796332478, "learning_rate": 3.7714886811441033e-06, "loss": 0.1544, "step": 3947 }, { "epoch": 0.36375362786198, "grad_norm": 0.9781950813418121, "learning_rate": 3.7708327227869113e-06, "loss": 0.1619, "step": 3948 }, { "epoch": 0.36384576403925, "grad_norm": 0.9741482528206916, "learning_rate": 3.770176646432765e-06, "loss": 0.1611, "step": 3949 }, { "epoch": 0.36393790021652, "grad_norm": 0.9059945224642333, "learning_rate": 3.76952045214258e-06, "loss": 0.1474, "step": 3950 }, { "epoch": 0.36403003639379, "grad_norm": 0.8904357001863795, "learning_rate": 3.7688641399772842e-06, "loss": 0.1556, "step": 3951 }, { "epoch": 0.36412217257106005, "grad_norm": 0.9321752585797721, "learning_rate": 3.7682077099978163e-06, "loss": 0.1646, "step": 3952 }, { "epoch": 0.36421430874833005, "grad_norm": 0.9378488103126844, "learning_rate": 3.767551162265126e-06, "loss": 0.1535, "step": 3953 }, { "epoch": 0.36430644492560005, "grad_norm": 0.9011582010590111, "learning_rate": 3.7668944968401743e-06, "loss": 0.1544, "step": 3954 }, { "epoch": 0.36439858110287005, "grad_norm": 0.8790970485023327, "learning_rate": 3.7662377137839323e-06, "loss": 0.1442, "step": 3955 }, { "epoch": 0.36449071728014004, "grad_norm": 0.908621835636859, "learning_rate": 3.7655808131573823e-06, "loss": 0.1511, "step": 3956 }, { "epoch": 0.36458285345741004, "grad_norm": 0.8903756339669455, "learning_rate": 3.7649237950215178e-06, "loss": 0.1387, "step": 3957 }, { "epoch": 0.36467498963468004, "grad_norm": 1.0374094432276606, "learning_rate": 3.764266659437342e-06, "loss": 0.1752, "step": 3958 }, { "epoch": 0.3647671258119501, "grad_norm": 0.8597914758339518, "learning_rate": 3.763609406465872e-06, "loss": 0.1442, "step": 3959 }, { "epoch": 0.3648592619892201, "grad_norm": 0.9149822828048013, "learning_rate": 3.7629520361681317e-06, "loss": 0.1613, "step": 3960 }, { "epoch": 0.3649513981664901, "grad_norm": 0.9028396433291438, "learning_rate": 3.7622945486051585e-06, "loss": 0.1412, "step": 3961 }, { "epoch": 0.3650435343437601, "grad_norm": 0.9537831433092164, "learning_rate": 3.7616369438380014e-06, "loss": 0.1592, "step": 3962 }, { "epoch": 0.3651356705210301, "grad_norm": 0.9595063973112449, "learning_rate": 3.760979221927718e-06, "loss": 0.1501, "step": 3963 }, { "epoch": 0.3652278066983001, "grad_norm": 0.9440368618491344, "learning_rate": 3.760321382935378e-06, "loss": 0.1633, "step": 3964 }, { "epoch": 0.36531994287557007, "grad_norm": 0.8716304492059451, "learning_rate": 3.759663426922062e-06, "loss": 0.1406, "step": 3965 }, { "epoch": 0.3654120790528401, "grad_norm": 0.9317503191937407, "learning_rate": 3.7590053539488613e-06, "loss": 0.1573, "step": 3966 }, { "epoch": 0.3655042152301101, "grad_norm": 0.8833226489364178, "learning_rate": 3.758347164076879e-06, "loss": 0.1561, "step": 3967 }, { "epoch": 0.3655963514073801, "grad_norm": 0.9745417396103699, "learning_rate": 3.7576888573672254e-06, "loss": 0.1592, "step": 3968 }, { "epoch": 0.3656884875846501, "grad_norm": 0.9530059653507331, "learning_rate": 3.757030433881027e-06, "loss": 0.1468, "step": 3969 }, { "epoch": 0.3657806237619201, "grad_norm": 0.9366752553028688, "learning_rate": 3.7563718936794176e-06, "loss": 0.1567, "step": 3970 }, { "epoch": 0.3658727599391901, "grad_norm": 0.9622765132112847, "learning_rate": 3.755713236823542e-06, "loss": 0.1589, "step": 3971 }, { "epoch": 0.3659648961164601, "grad_norm": 0.8902165328288483, "learning_rate": 3.755054463374558e-06, "loss": 0.1525, "step": 3972 }, { "epoch": 0.36605703229373016, "grad_norm": 0.9688432772053249, "learning_rate": 3.754395573393631e-06, "loss": 0.1728, "step": 3973 }, { "epoch": 0.36614916847100015, "grad_norm": 0.9026315201949281, "learning_rate": 3.7537365669419413e-06, "loss": 0.1518, "step": 3974 }, { "epoch": 0.36624130464827015, "grad_norm": 0.8789867884729393, "learning_rate": 3.7530774440806757e-06, "loss": 0.1473, "step": 3975 }, { "epoch": 0.36633344082554015, "grad_norm": 0.8515286072012973, "learning_rate": 3.7524182048710343e-06, "loss": 0.1439, "step": 3976 }, { "epoch": 0.36642557700281014, "grad_norm": 0.8828903306684278, "learning_rate": 3.751758849374228e-06, "loss": 0.1413, "step": 3977 }, { "epoch": 0.36651771318008014, "grad_norm": 0.884530916429444, "learning_rate": 3.7510993776514786e-06, "loss": 0.1513, "step": 3978 }, { "epoch": 0.36660984935735014, "grad_norm": 0.8848921511295754, "learning_rate": 3.7504397897640165e-06, "loss": 0.1499, "step": 3979 }, { "epoch": 0.3667019855346202, "grad_norm": 0.9206901760475932, "learning_rate": 3.7497800857730854e-06, "loss": 0.1526, "step": 3980 }, { "epoch": 0.3667941217118902, "grad_norm": 0.8989347990602119, "learning_rate": 3.749120265739939e-06, "loss": 0.1416, "step": 3981 }, { "epoch": 0.3668862578891602, "grad_norm": 0.9478265392367967, "learning_rate": 3.7484603297258413e-06, "loss": 0.1526, "step": 3982 }, { "epoch": 0.3669783940664302, "grad_norm": 0.9804580570371507, "learning_rate": 3.747800277792068e-06, "loss": 0.1547, "step": 3983 }, { "epoch": 0.3670705302437002, "grad_norm": 0.9347412913063416, "learning_rate": 3.7471401099999044e-06, "loss": 0.152, "step": 3984 }, { "epoch": 0.3671626664209702, "grad_norm": 0.9714621448950228, "learning_rate": 3.7464798264106474e-06, "loss": 0.1546, "step": 3985 }, { "epoch": 0.3672548025982402, "grad_norm": 0.9361573736104657, "learning_rate": 3.7458194270856046e-06, "loss": 0.1496, "step": 3986 }, { "epoch": 0.3673469387755102, "grad_norm": 0.9435489839258917, "learning_rate": 3.745158912086093e-06, "loss": 0.166, "step": 3987 }, { "epoch": 0.3674390749527802, "grad_norm": 0.9511965958836167, "learning_rate": 3.744498281473443e-06, "loss": 0.1558, "step": 3988 }, { "epoch": 0.3675312111300502, "grad_norm": 0.8979980364324908, "learning_rate": 3.743837535308994e-06, "loss": 0.1401, "step": 3989 }, { "epoch": 0.3676233473073202, "grad_norm": 0.9236641015351937, "learning_rate": 3.7431766736540958e-06, "loss": 0.1482, "step": 3990 }, { "epoch": 0.3677154834845902, "grad_norm": 0.9291040410569116, "learning_rate": 3.74251569657011e-06, "loss": 0.1511, "step": 3991 }, { "epoch": 0.3678076196618602, "grad_norm": 1.014189124142894, "learning_rate": 3.7418546041184074e-06, "loss": 0.1467, "step": 3992 }, { "epoch": 0.36789975583913026, "grad_norm": 0.9383118379868695, "learning_rate": 3.7411933963603706e-06, "loss": 0.1655, "step": 3993 }, { "epoch": 0.36799189201640026, "grad_norm": 0.8876646385129865, "learning_rate": 3.7405320733573948e-06, "loss": 0.1433, "step": 3994 }, { "epoch": 0.36808402819367025, "grad_norm": 0.9951541805502094, "learning_rate": 3.739870635170881e-06, "loss": 0.1674, "step": 3995 }, { "epoch": 0.36817616437094025, "grad_norm": 0.9346916827824021, "learning_rate": 3.739209081862247e-06, "loss": 0.1408, "step": 3996 }, { "epoch": 0.36826830054821025, "grad_norm": 0.9003306272523681, "learning_rate": 3.738547413492916e-06, "loss": 0.1445, "step": 3997 }, { "epoch": 0.36836043672548024, "grad_norm": 0.9747753339711037, "learning_rate": 3.7378856301243233e-06, "loss": 0.1558, "step": 3998 }, { "epoch": 0.36845257290275024, "grad_norm": 0.9296953912478665, "learning_rate": 3.7372237318179172e-06, "loss": 0.1533, "step": 3999 }, { "epoch": 0.3685447090800203, "grad_norm": 0.886098729035054, "learning_rate": 3.7365617186351538e-06, "loss": 0.1403, "step": 4000 }, { "epoch": 0.3685447090800203, "eval_loss": 0.15275675058364868, "eval_runtime": 299.8799, "eval_samples_per_second": 23.399, "eval_steps_per_second": 2.928, "step": 4000 }, { "epoch": 0.3686368452572903, "grad_norm": 0.9026680591810774, "learning_rate": 3.735899590637503e-06, "loss": 0.1374, "step": 4001 }, { "epoch": 0.3687289814345603, "grad_norm": 0.9006265105747577, "learning_rate": 3.735237347886441e-06, "loss": 0.141, "step": 4002 }, { "epoch": 0.3688211176118303, "grad_norm": 0.9632222935393651, "learning_rate": 3.7345749904434593e-06, "loss": 0.1463, "step": 4003 }, { "epoch": 0.3689132537891003, "grad_norm": 0.8573673467809124, "learning_rate": 3.733912518370056e-06, "loss": 0.1416, "step": 4004 }, { "epoch": 0.3690053899663703, "grad_norm": 0.9167873217872394, "learning_rate": 3.7332499317277432e-06, "loss": 0.1514, "step": 4005 }, { "epoch": 0.3690975261436403, "grad_norm": 0.9053492707952349, "learning_rate": 3.732587230578041e-06, "loss": 0.1498, "step": 4006 }, { "epoch": 0.36918966232091033, "grad_norm": 0.9514143436948048, "learning_rate": 3.7319244149824825e-06, "loss": 0.1502, "step": 4007 }, { "epoch": 0.3692817984981803, "grad_norm": 0.9243371986561314, "learning_rate": 3.7312614850026086e-06, "loss": 0.1647, "step": 4008 }, { "epoch": 0.3693739346754503, "grad_norm": 0.9274365702949716, "learning_rate": 3.730598440699974e-06, "loss": 0.1508, "step": 4009 }, { "epoch": 0.3694660708527203, "grad_norm": 0.8933690616443185, "learning_rate": 3.729935282136142e-06, "loss": 0.1527, "step": 4010 }, { "epoch": 0.3695582070299903, "grad_norm": 0.9779518506144268, "learning_rate": 3.729272009372686e-06, "loss": 0.1528, "step": 4011 }, { "epoch": 0.3696503432072603, "grad_norm": 0.9370281031601337, "learning_rate": 3.7286086224711916e-06, "loss": 0.1455, "step": 4012 }, { "epoch": 0.3697424793845303, "grad_norm": 0.8777378271793664, "learning_rate": 3.727945121493255e-06, "loss": 0.1496, "step": 4013 }, { "epoch": 0.36983461556180036, "grad_norm": 0.9393712387806363, "learning_rate": 3.7272815065004808e-06, "loss": 0.1641, "step": 4014 }, { "epoch": 0.36992675173907036, "grad_norm": 1.0039688517917074, "learning_rate": 3.7266177775544877e-06, "loss": 0.1511, "step": 4015 }, { "epoch": 0.37001888791634036, "grad_norm": 0.8739356202824567, "learning_rate": 3.7259539347169015e-06, "loss": 0.1371, "step": 4016 }, { "epoch": 0.37011102409361035, "grad_norm": 0.888356334870733, "learning_rate": 3.72528997804936e-06, "loss": 0.1519, "step": 4017 }, { "epoch": 0.37020316027088035, "grad_norm": 0.993257143098972, "learning_rate": 3.724625907613513e-06, "loss": 0.1565, "step": 4018 }, { "epoch": 0.37029529644815035, "grad_norm": 0.9754754956762344, "learning_rate": 3.7239617234710185e-06, "loss": 0.1413, "step": 4019 }, { "epoch": 0.3703874326254204, "grad_norm": 0.843187808737617, "learning_rate": 3.7232974256835457e-06, "loss": 0.1433, "step": 4020 }, { "epoch": 0.3704795688026904, "grad_norm": 0.8997900277623461, "learning_rate": 3.7226330143127765e-06, "loss": 0.1468, "step": 4021 }, { "epoch": 0.3705717049799604, "grad_norm": 0.9570942103840279, "learning_rate": 3.721968489420399e-06, "loss": 0.1358, "step": 4022 }, { "epoch": 0.3706638411572304, "grad_norm": 0.9574979970822548, "learning_rate": 3.721303851068116e-06, "loss": 0.1602, "step": 4023 }, { "epoch": 0.3707559773345004, "grad_norm": 0.8884651421400183, "learning_rate": 3.7206390993176395e-06, "loss": 0.1414, "step": 4024 }, { "epoch": 0.3708481135117704, "grad_norm": 0.9861288907351328, "learning_rate": 3.719974234230691e-06, "loss": 0.1542, "step": 4025 }, { "epoch": 0.3709402496890404, "grad_norm": 1.0008506973241864, "learning_rate": 3.7193092558690036e-06, "loss": 0.1479, "step": 4026 }, { "epoch": 0.37103238586631043, "grad_norm": 0.9941006684988769, "learning_rate": 3.7186441642943206e-06, "loss": 0.1483, "step": 4027 }, { "epoch": 0.37112452204358043, "grad_norm": 0.9438400436931483, "learning_rate": 3.7179789595683954e-06, "loss": 0.1474, "step": 4028 }, { "epoch": 0.3712166582208504, "grad_norm": 0.9935959286704832, "learning_rate": 3.717313641752993e-06, "loss": 0.147, "step": 4029 }, { "epoch": 0.3713087943981204, "grad_norm": 1.011789007873907, "learning_rate": 3.7166482109098878e-06, "loss": 0.1558, "step": 4030 }, { "epoch": 0.3714009305753904, "grad_norm": 0.9405959335230354, "learning_rate": 3.715982667100866e-06, "loss": 0.1637, "step": 4031 }, { "epoch": 0.3714930667526604, "grad_norm": 0.909937263307173, "learning_rate": 3.7153170103877216e-06, "loss": 0.1431, "step": 4032 }, { "epoch": 0.3715852029299304, "grad_norm": 0.9554612213688228, "learning_rate": 3.7146512408322623e-06, "loss": 0.1652, "step": 4033 }, { "epoch": 0.37167733910720047, "grad_norm": 1.0065983816320472, "learning_rate": 3.7139853584963054e-06, "loss": 0.1552, "step": 4034 }, { "epoch": 0.37176947528447046, "grad_norm": 0.9020436912692089, "learning_rate": 3.7133193634416766e-06, "loss": 0.148, "step": 4035 }, { "epoch": 0.37186161146174046, "grad_norm": 0.9803335179141888, "learning_rate": 3.7126532557302144e-06, "loss": 0.1667, "step": 4036 }, { "epoch": 0.37195374763901046, "grad_norm": 0.9175684244894553, "learning_rate": 3.711987035423767e-06, "loss": 0.1566, "step": 4037 }, { "epoch": 0.37204588381628045, "grad_norm": 0.9262653613986682, "learning_rate": 3.711320702584193e-06, "loss": 0.154, "step": 4038 }, { "epoch": 0.37213801999355045, "grad_norm": 0.9086129584913225, "learning_rate": 3.710654257273361e-06, "loss": 0.1507, "step": 4039 }, { "epoch": 0.37223015617082045, "grad_norm": 1.0044929480816431, "learning_rate": 3.7099876995531515e-06, "loss": 0.1568, "step": 4040 }, { "epoch": 0.3723222923480905, "grad_norm": 0.9511895454298042, "learning_rate": 3.709321029485453e-06, "loss": 0.1596, "step": 4041 }, { "epoch": 0.3724144285253605, "grad_norm": 0.9457430568964129, "learning_rate": 3.708654247132168e-06, "loss": 0.1473, "step": 4042 }, { "epoch": 0.3725065647026305, "grad_norm": 0.9216559389726534, "learning_rate": 3.7079873525552053e-06, "loss": 0.1471, "step": 4043 }, { "epoch": 0.3725987008799005, "grad_norm": 0.9511420633348692, "learning_rate": 3.707320345816487e-06, "loss": 0.151, "step": 4044 }, { "epoch": 0.3726908370571705, "grad_norm": 0.9025153243738641, "learning_rate": 3.7066532269779444e-06, "loss": 0.142, "step": 4045 }, { "epoch": 0.3727829732344405, "grad_norm": 0.9011326279630237, "learning_rate": 3.7059859961015205e-06, "loss": 0.1437, "step": 4046 }, { "epoch": 0.3728751094117105, "grad_norm": 0.964381694542699, "learning_rate": 3.705318653249166e-06, "loss": 0.1624, "step": 4047 }, { "epoch": 0.37296724558898053, "grad_norm": 0.9491833954146554, "learning_rate": 3.704651198482846e-06, "loss": 0.151, "step": 4048 }, { "epoch": 0.37305938176625053, "grad_norm": 0.9783666245098148, "learning_rate": 3.703983631864532e-06, "loss": 0.1651, "step": 4049 }, { "epoch": 0.3731515179435205, "grad_norm": 0.9566763043838267, "learning_rate": 3.703315953456208e-06, "loss": 0.1457, "step": 4050 }, { "epoch": 0.3732436541207905, "grad_norm": 0.97352017386297, "learning_rate": 3.7026481633198687e-06, "loss": 0.1631, "step": 4051 }, { "epoch": 0.3733357902980605, "grad_norm": 0.9425864394639262, "learning_rate": 3.701980261517518e-06, "loss": 0.1661, "step": 4052 }, { "epoch": 0.3734279264753305, "grad_norm": 0.9059232717573347, "learning_rate": 3.70131224811117e-06, "loss": 0.1579, "step": 4053 }, { "epoch": 0.37352006265260057, "grad_norm": 0.9281068034699287, "learning_rate": 3.7006441231628517e-06, "loss": 0.1474, "step": 4054 }, { "epoch": 0.37361219882987057, "grad_norm": 0.9670493816154251, "learning_rate": 3.699975886734596e-06, "loss": 0.1638, "step": 4055 }, { "epoch": 0.37370433500714056, "grad_norm": 0.8970057217264027, "learning_rate": 3.6993075388884507e-06, "loss": 0.1477, "step": 4056 }, { "epoch": 0.37379647118441056, "grad_norm": 0.9142284746518392, "learning_rate": 3.698639079686471e-06, "loss": 0.1577, "step": 4057 }, { "epoch": 0.37388860736168056, "grad_norm": 0.9045178818037406, "learning_rate": 3.6979705091907244e-06, "loss": 0.158, "step": 4058 }, { "epoch": 0.37398074353895056, "grad_norm": 0.8711812616573087, "learning_rate": 3.6973018274632865e-06, "loss": 0.1529, "step": 4059 }, { "epoch": 0.37407287971622055, "grad_norm": 0.8729978347669862, "learning_rate": 3.696633034566245e-06, "loss": 0.157, "step": 4060 }, { "epoch": 0.3741650158934906, "grad_norm": 0.8990542116703788, "learning_rate": 3.6959641305616984e-06, "loss": 0.1503, "step": 4061 }, { "epoch": 0.3742571520707606, "grad_norm": 0.8904444740765883, "learning_rate": 3.695295115511752e-06, "loss": 0.1516, "step": 4062 }, { "epoch": 0.3743492882480306, "grad_norm": 0.9753397041967755, "learning_rate": 3.694625989478527e-06, "loss": 0.1631, "step": 4063 }, { "epoch": 0.3744414244253006, "grad_norm": 0.9049757714025393, "learning_rate": 3.69395675252415e-06, "loss": 0.1423, "step": 4064 }, { "epoch": 0.3745335606025706, "grad_norm": 0.8526472535969689, "learning_rate": 3.6932874047107597e-06, "loss": 0.1411, "step": 4065 }, { "epoch": 0.3746256967798406, "grad_norm": 0.9655334358530389, "learning_rate": 3.6926179461005056e-06, "loss": 0.1569, "step": 4066 }, { "epoch": 0.3747178329571106, "grad_norm": 0.9250687407747031, "learning_rate": 3.691948376755547e-06, "loss": 0.1572, "step": 4067 }, { "epoch": 0.37480996913438064, "grad_norm": 0.9047372516849579, "learning_rate": 3.6912786967380528e-06, "loss": 0.1562, "step": 4068 }, { "epoch": 0.37490210531165064, "grad_norm": 0.859181189210115, "learning_rate": 3.6906089061102043e-06, "loss": 0.1413, "step": 4069 }, { "epoch": 0.37499424148892063, "grad_norm": 0.9373737963751739, "learning_rate": 3.6899390049341893e-06, "loss": 0.1587, "step": 4070 }, { "epoch": 0.37508637766619063, "grad_norm": 0.9447926915063538, "learning_rate": 3.68926899327221e-06, "loss": 0.1514, "step": 4071 }, { "epoch": 0.3751785138434606, "grad_norm": 0.9193281338428294, "learning_rate": 3.6885988711864777e-06, "loss": 0.1583, "step": 4072 }, { "epoch": 0.3752706500207306, "grad_norm": 0.9827375591842401, "learning_rate": 3.6879286387392122e-06, "loss": 0.1512, "step": 4073 }, { "epoch": 0.3753627861980006, "grad_norm": 0.8550091267938758, "learning_rate": 3.687258295992644e-06, "loss": 0.1334, "step": 4074 }, { "epoch": 0.37545492237527067, "grad_norm": 0.9004154950704388, "learning_rate": 3.686587843009016e-06, "loss": 0.1479, "step": 4075 }, { "epoch": 0.37554705855254067, "grad_norm": 0.9743962905621874, "learning_rate": 3.685917279850578e-06, "loss": 0.1606, "step": 4076 }, { "epoch": 0.37563919472981067, "grad_norm": 0.9150974523990754, "learning_rate": 3.685246606579594e-06, "loss": 0.1479, "step": 4077 }, { "epoch": 0.37573133090708066, "grad_norm": 0.8927057804303226, "learning_rate": 3.684575823258334e-06, "loss": 0.1553, "step": 4078 }, { "epoch": 0.37582346708435066, "grad_norm": 0.8912269318519523, "learning_rate": 3.683904929949082e-06, "loss": 0.1606, "step": 4079 }, { "epoch": 0.37591560326162066, "grad_norm": 0.8714988941859493, "learning_rate": 3.68323392671413e-06, "loss": 0.1512, "step": 4080 }, { "epoch": 0.37600773943889065, "grad_norm": 0.9066092812200909, "learning_rate": 3.6825628136157805e-06, "loss": 0.1645, "step": 4081 }, { "epoch": 0.3760998756161607, "grad_norm": 0.9353430946436311, "learning_rate": 3.6818915907163456e-06, "loss": 0.1546, "step": 4082 }, { "epoch": 0.3761920117934307, "grad_norm": 0.824207106952847, "learning_rate": 3.6812202580781507e-06, "loss": 0.1357, "step": 4083 }, { "epoch": 0.3762841479707007, "grad_norm": 0.9127969613382955, "learning_rate": 3.680548815763527e-06, "loss": 0.1486, "step": 4084 }, { "epoch": 0.3763762841479707, "grad_norm": 0.9223650712476852, "learning_rate": 3.6798772638348186e-06, "loss": 0.1452, "step": 4085 }, { "epoch": 0.3764684203252407, "grad_norm": 0.9504996208651127, "learning_rate": 3.679205602354379e-06, "loss": 0.1593, "step": 4086 }, { "epoch": 0.3765605565025107, "grad_norm": 0.9697927009717019, "learning_rate": 3.6785338313845725e-06, "loss": 0.17, "step": 4087 }, { "epoch": 0.37665269267978074, "grad_norm": 0.9399208083149008, "learning_rate": 3.677861950987773e-06, "loss": 0.1476, "step": 4088 }, { "epoch": 0.37674482885705074, "grad_norm": 0.8869065172245069, "learning_rate": 3.677189961226365e-06, "loss": 0.1477, "step": 4089 }, { "epoch": 0.37683696503432074, "grad_norm": 0.9534644978626714, "learning_rate": 3.6765178621627418e-06, "loss": 0.1616, "step": 4090 }, { "epoch": 0.37692910121159073, "grad_norm": 0.85557009388475, "learning_rate": 3.675845653859309e-06, "loss": 0.1403, "step": 4091 }, { "epoch": 0.37702123738886073, "grad_norm": 0.9242645580752875, "learning_rate": 3.6751733363784804e-06, "loss": 0.1523, "step": 4092 }, { "epoch": 0.3771133735661307, "grad_norm": 0.8425672883949519, "learning_rate": 3.6745009097826813e-06, "loss": 0.1412, "step": 4093 }, { "epoch": 0.3772055097434007, "grad_norm": 0.8877254768400884, "learning_rate": 3.6738283741343463e-06, "loss": 0.1518, "step": 4094 }, { "epoch": 0.3772976459206708, "grad_norm": 0.9544665857522291, "learning_rate": 3.6731557294959196e-06, "loss": 0.156, "step": 4095 }, { "epoch": 0.3773897820979408, "grad_norm": 0.8985137299961812, "learning_rate": 3.6724829759298585e-06, "loss": 0.1567, "step": 4096 }, { "epoch": 0.37748191827521077, "grad_norm": 0.9207976569709534, "learning_rate": 3.671810113498626e-06, "loss": 0.1514, "step": 4097 }, { "epoch": 0.37757405445248077, "grad_norm": 0.9250476073445184, "learning_rate": 3.6711371422646984e-06, "loss": 0.1529, "step": 4098 }, { "epoch": 0.37766619062975076, "grad_norm": 0.9270432357066251, "learning_rate": 3.6704640622905617e-06, "loss": 0.1632, "step": 4099 }, { "epoch": 0.37775832680702076, "grad_norm": 0.9510100858087815, "learning_rate": 3.6697908736387105e-06, "loss": 0.1664, "step": 4100 }, { "epoch": 0.37785046298429076, "grad_norm": 0.8715401290229666, "learning_rate": 3.669117576371651e-06, "loss": 0.1355, "step": 4101 }, { "epoch": 0.3779425991615608, "grad_norm": 0.9679288812456934, "learning_rate": 3.668444170551898e-06, "loss": 0.1607, "step": 4102 }, { "epoch": 0.3780347353388308, "grad_norm": 0.9808453390014398, "learning_rate": 3.6677706562419784e-06, "loss": 0.1641, "step": 4103 }, { "epoch": 0.3781268715161008, "grad_norm": 0.9312962652606193, "learning_rate": 3.667097033504428e-06, "loss": 0.1692, "step": 4104 }, { "epoch": 0.3782190076933708, "grad_norm": 0.9276051374101398, "learning_rate": 3.666423302401792e-06, "loss": 0.1667, "step": 4105 }, { "epoch": 0.3783111438706408, "grad_norm": 0.8760055981989053, "learning_rate": 3.6657494629966274e-06, "loss": 0.144, "step": 4106 }, { "epoch": 0.3784032800479108, "grad_norm": 0.8706088602855692, "learning_rate": 3.6650755153514993e-06, "loss": 0.1451, "step": 4107 }, { "epoch": 0.3784954162251808, "grad_norm": 0.9489960109369865, "learning_rate": 3.664401459528984e-06, "loss": 0.1522, "step": 4108 }, { "epoch": 0.37858755240245084, "grad_norm": 0.9511363844631635, "learning_rate": 3.663727295591668e-06, "loss": 0.1603, "step": 4109 }, { "epoch": 0.37867968857972084, "grad_norm": 0.984138360362065, "learning_rate": 3.6630530236021478e-06, "loss": 0.1527, "step": 4110 }, { "epoch": 0.37877182475699084, "grad_norm": 0.9177359028717674, "learning_rate": 3.6623786436230287e-06, "loss": 0.1487, "step": 4111 }, { "epoch": 0.37886396093426083, "grad_norm": 0.9387063111522116, "learning_rate": 3.6617041557169282e-06, "loss": 0.1429, "step": 4112 }, { "epoch": 0.37895609711153083, "grad_norm": 0.9434138295220491, "learning_rate": 3.6610295599464707e-06, "loss": 0.1474, "step": 4113 }, { "epoch": 0.37904823328880083, "grad_norm": 1.0168889669868741, "learning_rate": 3.660354856374294e-06, "loss": 0.1462, "step": 4114 }, { "epoch": 0.3791403694660708, "grad_norm": 0.9435621753939302, "learning_rate": 3.6596800450630445e-06, "loss": 0.1488, "step": 4115 }, { "epoch": 0.3792325056433409, "grad_norm": 1.0039934347610908, "learning_rate": 3.659005126075377e-06, "loss": 0.1606, "step": 4116 }, { "epoch": 0.3793246418206109, "grad_norm": 0.9742949418469122, "learning_rate": 3.65833009947396e-06, "loss": 0.1618, "step": 4117 }, { "epoch": 0.37941677799788087, "grad_norm": 0.97062878459915, "learning_rate": 3.657654965321468e-06, "loss": 0.151, "step": 4118 }, { "epoch": 0.37950891417515087, "grad_norm": 0.9727659249762055, "learning_rate": 3.6569797236805877e-06, "loss": 0.1531, "step": 4119 }, { "epoch": 0.37960105035242087, "grad_norm": 0.9530211060763032, "learning_rate": 3.656304374614016e-06, "loss": 0.1608, "step": 4120 }, { "epoch": 0.37969318652969086, "grad_norm": 0.9892002262495758, "learning_rate": 3.6556289181844582e-06, "loss": 0.1614, "step": 4121 }, { "epoch": 0.3797853227069609, "grad_norm": 0.9853737375495374, "learning_rate": 3.654953354454631e-06, "loss": 0.1658, "step": 4122 }, { "epoch": 0.3798774588842309, "grad_norm": 0.8190640589995611, "learning_rate": 3.654277683487261e-06, "loss": 0.1318, "step": 4123 }, { "epoch": 0.3799695950615009, "grad_norm": 0.9400423177734195, "learning_rate": 3.6536019053450834e-06, "loss": 0.161, "step": 4124 }, { "epoch": 0.3800617312387709, "grad_norm": 0.9517573566439758, "learning_rate": 3.652926020090845e-06, "loss": 0.1518, "step": 4125 }, { "epoch": 0.3801538674160409, "grad_norm": 0.926377462587662, "learning_rate": 3.6522500277873017e-06, "loss": 0.1413, "step": 4126 }, { "epoch": 0.3802460035933109, "grad_norm": 0.8943962669159516, "learning_rate": 3.651573928497219e-06, "loss": 0.1545, "step": 4127 }, { "epoch": 0.3803381397705809, "grad_norm": 0.9300678578368041, "learning_rate": 3.6508977222833737e-06, "loss": 0.1546, "step": 4128 }, { "epoch": 0.38043027594785095, "grad_norm": 0.9643638642908029, "learning_rate": 3.6502214092085504e-06, "loss": 0.1529, "step": 4129 }, { "epoch": 0.38052241212512095, "grad_norm": 0.9714822619040409, "learning_rate": 3.649544989335545e-06, "loss": 0.1542, "step": 4130 }, { "epoch": 0.38061454830239094, "grad_norm": 0.9773020057604235, "learning_rate": 3.648868462727165e-06, "loss": 0.1575, "step": 4131 }, { "epoch": 0.38070668447966094, "grad_norm": 0.9255103386023259, "learning_rate": 3.6481918294462237e-06, "loss": 0.1533, "step": 4132 }, { "epoch": 0.38079882065693094, "grad_norm": 0.9723738172144644, "learning_rate": 3.647515089555548e-06, "loss": 0.1587, "step": 4133 }, { "epoch": 0.38089095683420093, "grad_norm": 0.9349434163742851, "learning_rate": 3.6468382431179717e-06, "loss": 0.1435, "step": 4134 }, { "epoch": 0.38098309301147093, "grad_norm": 0.9636701484737411, "learning_rate": 3.646161290196342e-06, "loss": 0.1529, "step": 4135 }, { "epoch": 0.381075229188741, "grad_norm": 0.9754765196777119, "learning_rate": 3.645484230853513e-06, "loss": 0.1707, "step": 4136 }, { "epoch": 0.381167365366011, "grad_norm": 0.9895673826813174, "learning_rate": 3.64480706515235e-06, "loss": 0.1673, "step": 4137 }, { "epoch": 0.381259501543281, "grad_norm": 0.9674314673920184, "learning_rate": 3.6441297931557274e-06, "loss": 0.1552, "step": 4138 }, { "epoch": 0.381351637720551, "grad_norm": 0.9225542238948127, "learning_rate": 3.643452414926531e-06, "loss": 0.1594, "step": 4139 }, { "epoch": 0.38144377389782097, "grad_norm": 0.8649506707868612, "learning_rate": 3.6427749305276537e-06, "loss": 0.1415, "step": 4140 }, { "epoch": 0.38153591007509097, "grad_norm": 0.8950199000553921, "learning_rate": 3.6420973400220016e-06, "loss": 0.16, "step": 4141 }, { "epoch": 0.38162804625236096, "grad_norm": 0.9225797065582534, "learning_rate": 3.641419643472489e-06, "loss": 0.1576, "step": 4142 }, { "epoch": 0.381720182429631, "grad_norm": 0.9392880937976605, "learning_rate": 3.640741840942039e-06, "loss": 0.1504, "step": 4143 }, { "epoch": 0.381812318606901, "grad_norm": 0.8928198640604731, "learning_rate": 3.640063932493588e-06, "loss": 0.1525, "step": 4144 }, { "epoch": 0.381904454784171, "grad_norm": 0.899053773871494, "learning_rate": 3.639385918190076e-06, "loss": 0.1527, "step": 4145 }, { "epoch": 0.381996590961441, "grad_norm": 0.9202410829435287, "learning_rate": 3.6387077980944595e-06, "loss": 0.1621, "step": 4146 }, { "epoch": 0.382088727138711, "grad_norm": 1.0028767320926162, "learning_rate": 3.6380295722697023e-06, "loss": 0.1505, "step": 4147 }, { "epoch": 0.382180863315981, "grad_norm": 0.8458601915921796, "learning_rate": 3.637351240778776e-06, "loss": 0.1354, "step": 4148 }, { "epoch": 0.382272999493251, "grad_norm": 0.8827937855379273, "learning_rate": 3.6366728036846647e-06, "loss": 0.1546, "step": 4149 }, { "epoch": 0.38236513567052105, "grad_norm": 0.8949424057254047, "learning_rate": 3.635994261050362e-06, "loss": 0.1544, "step": 4150 }, { "epoch": 0.38245727184779105, "grad_norm": 0.9282648917790803, "learning_rate": 3.6353156129388683e-06, "loss": 0.1522, "step": 4151 }, { "epoch": 0.38254940802506104, "grad_norm": 0.8870218985004921, "learning_rate": 3.634636859413199e-06, "loss": 0.1393, "step": 4152 }, { "epoch": 0.38264154420233104, "grad_norm": 0.9425406478284127, "learning_rate": 3.633958000536375e-06, "loss": 0.1567, "step": 4153 }, { "epoch": 0.38273368037960104, "grad_norm": 0.9264991414341093, "learning_rate": 3.633279036371429e-06, "loss": 0.1542, "step": 4154 }, { "epoch": 0.38282581655687103, "grad_norm": 0.9064377119313413, "learning_rate": 3.6325999669814014e-06, "loss": 0.1477, "step": 4155 }, { "epoch": 0.3829179527341411, "grad_norm": 0.9228052083944193, "learning_rate": 3.631920792429346e-06, "loss": 0.1534, "step": 4156 }, { "epoch": 0.3830100889114111, "grad_norm": 0.8711501223021223, "learning_rate": 3.6312415127783228e-06, "loss": 0.1436, "step": 4157 }, { "epoch": 0.3831022250886811, "grad_norm": 0.9402770645056095, "learning_rate": 3.630562128091403e-06, "loss": 0.1546, "step": 4158 }, { "epoch": 0.3831943612659511, "grad_norm": 0.9619983038446867, "learning_rate": 3.6298826384316684e-06, "loss": 0.1514, "step": 4159 }, { "epoch": 0.3832864974432211, "grad_norm": 0.8653963516789092, "learning_rate": 3.6292030438622093e-06, "loss": 0.1424, "step": 4160 }, { "epoch": 0.38337863362049107, "grad_norm": 0.938124421714208, "learning_rate": 3.6285233444461255e-06, "loss": 0.1644, "step": 4161 }, { "epoch": 0.38347076979776107, "grad_norm": 0.8478622536698076, "learning_rate": 3.6278435402465283e-06, "loss": 0.1357, "step": 4162 }, { "epoch": 0.3835629059750311, "grad_norm": 0.9507324930194543, "learning_rate": 3.6271636313265368e-06, "loss": 0.1697, "step": 4163 }, { "epoch": 0.3836550421523011, "grad_norm": 0.9054939912598036, "learning_rate": 3.6264836177492812e-06, "loss": 0.1469, "step": 4164 }, { "epoch": 0.3837471783295711, "grad_norm": 0.8788456687040019, "learning_rate": 3.6258034995778994e-06, "loss": 0.1342, "step": 4165 }, { "epoch": 0.3838393145068411, "grad_norm": 0.9015870187596965, "learning_rate": 3.6251232768755428e-06, "loss": 0.154, "step": 4166 }, { "epoch": 0.3839314506841111, "grad_norm": 0.8909983172995234, "learning_rate": 3.6244429497053678e-06, "loss": 0.1507, "step": 4167 }, { "epoch": 0.3840235868613811, "grad_norm": 0.8913316867597605, "learning_rate": 3.623762518130545e-06, "loss": 0.1474, "step": 4168 }, { "epoch": 0.3841157230386511, "grad_norm": 0.9361153707666292, "learning_rate": 3.6230819822142504e-06, "loss": 0.1416, "step": 4169 }, { "epoch": 0.38420785921592115, "grad_norm": 0.9455562022520799, "learning_rate": 3.6224013420196734e-06, "loss": 0.1586, "step": 4170 }, { "epoch": 0.38429999539319115, "grad_norm": 0.8593562197121624, "learning_rate": 3.621720597610011e-06, "loss": 0.1431, "step": 4171 }, { "epoch": 0.38439213157046115, "grad_norm": 0.8832913395812277, "learning_rate": 3.62103974904847e-06, "loss": 0.1383, "step": 4172 }, { "epoch": 0.38448426774773115, "grad_norm": 0.8483230097860568, "learning_rate": 3.620358796398268e-06, "loss": 0.1438, "step": 4173 }, { "epoch": 0.38457640392500114, "grad_norm": 0.9683562509779495, "learning_rate": 3.6196777397226314e-06, "loss": 0.1439, "step": 4174 }, { "epoch": 0.38466854010227114, "grad_norm": 0.9822738998325394, "learning_rate": 3.618996579084796e-06, "loss": 0.1706, "step": 4175 }, { "epoch": 0.38476067627954114, "grad_norm": 0.9318590340850513, "learning_rate": 3.6183153145480075e-06, "loss": 0.1502, "step": 4176 }, { "epoch": 0.3848528124568112, "grad_norm": 0.91227748896253, "learning_rate": 3.6176339461755217e-06, "loss": 0.1544, "step": 4177 }, { "epoch": 0.3849449486340812, "grad_norm": 0.849625877446819, "learning_rate": 3.6169524740306038e-06, "loss": 0.1269, "step": 4178 }, { "epoch": 0.3850370848113512, "grad_norm": 0.8533351529502364, "learning_rate": 3.6162708981765294e-06, "loss": 0.1392, "step": 4179 }, { "epoch": 0.3851292209886212, "grad_norm": 0.9224771053382539, "learning_rate": 3.6155892186765805e-06, "loss": 0.1477, "step": 4180 }, { "epoch": 0.3852213571658912, "grad_norm": 0.8713099758237273, "learning_rate": 3.6149074355940533e-06, "loss": 0.1398, "step": 4181 }, { "epoch": 0.3853134933431612, "grad_norm": 0.9272503453478166, "learning_rate": 3.614225548992251e-06, "loss": 0.1531, "step": 4182 }, { "epoch": 0.38540562952043117, "grad_norm": 0.9314307699112695, "learning_rate": 3.6135435589344857e-06, "loss": 0.1451, "step": 4183 }, { "epoch": 0.3854977656977012, "grad_norm": 0.9222458730386164, "learning_rate": 3.612861465484082e-06, "loss": 0.1572, "step": 4184 }, { "epoch": 0.3855899018749712, "grad_norm": 0.9132259187920421, "learning_rate": 3.612179268704371e-06, "loss": 0.1673, "step": 4185 }, { "epoch": 0.3856820380522412, "grad_norm": 0.8399624159184574, "learning_rate": 3.611496968658695e-06, "loss": 0.1397, "step": 4186 }, { "epoch": 0.3857741742295112, "grad_norm": 0.8945593972634557, "learning_rate": 3.6108145654104065e-06, "loss": 0.1539, "step": 4187 }, { "epoch": 0.3858663104067812, "grad_norm": 0.8969467662175643, "learning_rate": 3.610132059022865e-06, "loss": 0.151, "step": 4188 }, { "epoch": 0.3859584465840512, "grad_norm": 0.8663822959838602, "learning_rate": 3.6094494495594435e-06, "loss": 0.1457, "step": 4189 }, { "epoch": 0.38605058276132126, "grad_norm": 0.9056864522351286, "learning_rate": 3.6087667370835213e-06, "loss": 0.1533, "step": 4190 }, { "epoch": 0.38614271893859126, "grad_norm": 0.9201003973845128, "learning_rate": 3.6080839216584875e-06, "loss": 0.1463, "step": 4191 }, { "epoch": 0.38623485511586125, "grad_norm": 0.9124350794797036, "learning_rate": 3.6074010033477425e-06, "loss": 0.1473, "step": 4192 }, { "epoch": 0.38632699129313125, "grad_norm": 0.9708223579259208, "learning_rate": 3.606717982214695e-06, "loss": 0.1629, "step": 4193 }, { "epoch": 0.38641912747040125, "grad_norm": 0.869245957927959, "learning_rate": 3.6060348583227635e-06, "loss": 0.1282, "step": 4194 }, { "epoch": 0.38651126364767124, "grad_norm": 0.9028555853054538, "learning_rate": 3.6053516317353777e-06, "loss": 0.1499, "step": 4195 }, { "epoch": 0.38660339982494124, "grad_norm": 0.8716710099401325, "learning_rate": 3.6046683025159722e-06, "loss": 0.1315, "step": 4196 }, { "epoch": 0.3866955360022113, "grad_norm": 0.995012950602247, "learning_rate": 3.6039848707279965e-06, "loss": 0.1675, "step": 4197 }, { "epoch": 0.3867876721794813, "grad_norm": 0.8635770358105515, "learning_rate": 3.6033013364349074e-06, "loss": 0.1433, "step": 4198 }, { "epoch": 0.3868798083567513, "grad_norm": 0.9387478709393704, "learning_rate": 3.60261769970017e-06, "loss": 0.1602, "step": 4199 }, { "epoch": 0.3869719445340213, "grad_norm": 0.9106186310143041, "learning_rate": 3.6019339605872604e-06, "loss": 0.1423, "step": 4200 }, { "epoch": 0.3870640807112913, "grad_norm": 0.829738795239621, "learning_rate": 3.6012501191596637e-06, "loss": 0.129, "step": 4201 }, { "epoch": 0.3871562168885613, "grad_norm": 0.8274379621149167, "learning_rate": 3.6005661754808755e-06, "loss": 0.1306, "step": 4202 }, { "epoch": 0.3872483530658313, "grad_norm": 1.0033242130316309, "learning_rate": 3.5998821296143995e-06, "loss": 0.1647, "step": 4203 }, { "epoch": 0.3873404892431013, "grad_norm": 0.8796576048924489, "learning_rate": 3.5991979816237495e-06, "loss": 0.1398, "step": 4204 }, { "epoch": 0.3874326254203713, "grad_norm": 0.9147076443755038, "learning_rate": 3.5985137315724476e-06, "loss": 0.1467, "step": 4205 }, { "epoch": 0.3875247615976413, "grad_norm": 0.9113754827450924, "learning_rate": 3.597829379524029e-06, "loss": 0.1527, "step": 4206 }, { "epoch": 0.3876168977749113, "grad_norm": 0.8575385170167867, "learning_rate": 3.5971449255420334e-06, "loss": 0.1429, "step": 4207 }, { "epoch": 0.3877090339521813, "grad_norm": 0.971025350124052, "learning_rate": 3.5964603696900137e-06, "loss": 0.1767, "step": 4208 }, { "epoch": 0.3878011701294513, "grad_norm": 0.8814985588682502, "learning_rate": 3.59577571203153e-06, "loss": 0.1477, "step": 4209 }, { "epoch": 0.3878933063067213, "grad_norm": 0.885856410302397, "learning_rate": 3.5950909526301543e-06, "loss": 0.1435, "step": 4210 }, { "epoch": 0.38798544248399136, "grad_norm": 0.8978747474508465, "learning_rate": 3.5944060915494656e-06, "loss": 0.1496, "step": 4211 }, { "epoch": 0.38807757866126136, "grad_norm": 0.8340715669280159, "learning_rate": 3.5937211288530536e-06, "loss": 0.1414, "step": 4212 }, { "epoch": 0.38816971483853135, "grad_norm": 0.9322207725295413, "learning_rate": 3.5930360646045165e-06, "loss": 0.144, "step": 4213 }, { "epoch": 0.38826185101580135, "grad_norm": 0.9764968494745287, "learning_rate": 3.5923508988674643e-06, "loss": 0.1531, "step": 4214 }, { "epoch": 0.38835398719307135, "grad_norm": 0.929979392627363, "learning_rate": 3.591665631705512e-06, "loss": 0.1583, "step": 4215 }, { "epoch": 0.38844612337034135, "grad_norm": 0.8855391143671449, "learning_rate": 3.59098026318229e-06, "loss": 0.1392, "step": 4216 }, { "epoch": 0.38853825954761134, "grad_norm": 0.8787069684232223, "learning_rate": 3.5902947933614317e-06, "loss": 0.1394, "step": 4217 }, { "epoch": 0.3886303957248814, "grad_norm": 0.9634827487742722, "learning_rate": 3.5896092223065854e-06, "loss": 0.1541, "step": 4218 }, { "epoch": 0.3887225319021514, "grad_norm": 0.9325862623957322, "learning_rate": 3.5889235500814055e-06, "loss": 0.1542, "step": 4219 }, { "epoch": 0.3888146680794214, "grad_norm": 0.9023366000754964, "learning_rate": 3.588237776749557e-06, "loss": 0.1501, "step": 4220 }, { "epoch": 0.3889068042566914, "grad_norm": 0.8986206366257142, "learning_rate": 3.5875519023747125e-06, "loss": 0.1453, "step": 4221 }, { "epoch": 0.3889989404339614, "grad_norm": 0.917397921943575, "learning_rate": 3.5868659270205584e-06, "loss": 0.1493, "step": 4222 }, { "epoch": 0.3890910766112314, "grad_norm": 0.9143535754483454, "learning_rate": 3.586179850750785e-06, "loss": 0.1544, "step": 4223 }, { "epoch": 0.38918321278850143, "grad_norm": 0.9044475607995964, "learning_rate": 3.5854936736290956e-06, "loss": 0.1495, "step": 4224 }, { "epoch": 0.38927534896577143, "grad_norm": 0.8377084041288603, "learning_rate": 3.584807395719202e-06, "loss": 0.1316, "step": 4225 }, { "epoch": 0.3893674851430414, "grad_norm": 0.8703025793104591, "learning_rate": 3.584121017084825e-06, "loss": 0.162, "step": 4226 }, { "epoch": 0.3894596213203114, "grad_norm": 0.8891808936863865, "learning_rate": 3.5834345377896953e-06, "loss": 0.1534, "step": 4227 }, { "epoch": 0.3895517574975814, "grad_norm": 0.8490427643718715, "learning_rate": 3.5827479578975523e-06, "loss": 0.1485, "step": 4228 }, { "epoch": 0.3896438936748514, "grad_norm": 0.9253620813691368, "learning_rate": 3.582061277472144e-06, "loss": 0.142, "step": 4229 }, { "epoch": 0.3897360298521214, "grad_norm": 0.9100820857060963, "learning_rate": 3.5813744965772296e-06, "loss": 0.1496, "step": 4230 }, { "epoch": 0.38982816602939147, "grad_norm": 0.8307256013094806, "learning_rate": 3.580687615276577e-06, "loss": 0.1349, "step": 4231 }, { "epoch": 0.38992030220666146, "grad_norm": 0.9088847447095758, "learning_rate": 3.580000633633963e-06, "loss": 0.1495, "step": 4232 }, { "epoch": 0.39001243838393146, "grad_norm": 1.1234634530976173, "learning_rate": 3.579313551713175e-06, "loss": 0.1616, "step": 4233 }, { "epoch": 0.39010457456120146, "grad_norm": 0.9343623593094748, "learning_rate": 3.578626369578006e-06, "loss": 0.145, "step": 4234 }, { "epoch": 0.39019671073847145, "grad_norm": 0.8849146327309522, "learning_rate": 3.5779390872922637e-06, "loss": 0.1542, "step": 4235 }, { "epoch": 0.39028884691574145, "grad_norm": 0.9700332080648529, "learning_rate": 3.5772517049197602e-06, "loss": 0.1668, "step": 4236 }, { "epoch": 0.39038098309301145, "grad_norm": 0.9073077760008486, "learning_rate": 3.5765642225243204e-06, "loss": 0.1501, "step": 4237 }, { "epoch": 0.3904731192702815, "grad_norm": 0.8560186434769101, "learning_rate": 3.575876640169777e-06, "loss": 0.1526, "step": 4238 }, { "epoch": 0.3905652554475515, "grad_norm": 0.9149195393802323, "learning_rate": 3.5751889579199715e-06, "loss": 0.1519, "step": 4239 }, { "epoch": 0.3906573916248215, "grad_norm": 0.9313907437239937, "learning_rate": 3.574501175838755e-06, "loss": 0.147, "step": 4240 }, { "epoch": 0.3907495278020915, "grad_norm": 0.9774674402082485, "learning_rate": 3.5738132939899895e-06, "loss": 0.1593, "step": 4241 }, { "epoch": 0.3908416639793615, "grad_norm": 0.9249259871090791, "learning_rate": 3.573125312437544e-06, "loss": 0.1571, "step": 4242 }, { "epoch": 0.3909338001566315, "grad_norm": 0.8517631758062792, "learning_rate": 3.572437231245297e-06, "loss": 0.1469, "step": 4243 }, { "epoch": 0.3910259363339015, "grad_norm": 0.9075554502422322, "learning_rate": 3.5717490504771386e-06, "loss": 0.1549, "step": 4244 }, { "epoch": 0.39111807251117153, "grad_norm": 0.8860237957842017, "learning_rate": 3.571060770196965e-06, "loss": 0.1426, "step": 4245 }, { "epoch": 0.39121020868844153, "grad_norm": 0.8585811165821868, "learning_rate": 3.570372390468684e-06, "loss": 0.1373, "step": 4246 }, { "epoch": 0.3913023448657115, "grad_norm": 0.9061281656675514, "learning_rate": 3.569683911356211e-06, "loss": 0.147, "step": 4247 }, { "epoch": 0.3913944810429815, "grad_norm": 0.9101609658494008, "learning_rate": 3.568995332923472e-06, "loss": 0.1557, "step": 4248 }, { "epoch": 0.3914866172202515, "grad_norm": 1.0288124302622148, "learning_rate": 3.568306655234401e-06, "loss": 0.1653, "step": 4249 }, { "epoch": 0.3915787533975215, "grad_norm": 0.9003424685543512, "learning_rate": 3.567617878352942e-06, "loss": 0.1434, "step": 4250 }, { "epoch": 0.3916708895747915, "grad_norm": 0.8803026788568897, "learning_rate": 3.566929002343048e-06, "loss": 0.145, "step": 4251 }, { "epoch": 0.39176302575206157, "grad_norm": 0.9300702343272207, "learning_rate": 3.5662400272686813e-06, "loss": 0.1505, "step": 4252 }, { "epoch": 0.39185516192933156, "grad_norm": 0.9166481799372976, "learning_rate": 3.5655509531938143e-06, "loss": 0.1491, "step": 4253 }, { "epoch": 0.39194729810660156, "grad_norm": 0.8898318583085889, "learning_rate": 3.5648617801824257e-06, "loss": 0.1474, "step": 4254 }, { "epoch": 0.39203943428387156, "grad_norm": 0.8754326393842209, "learning_rate": 3.5641725082985066e-06, "loss": 0.1385, "step": 4255 }, { "epoch": 0.39213157046114155, "grad_norm": 0.8837064377392709, "learning_rate": 3.5634831376060554e-06, "loss": 0.1483, "step": 4256 }, { "epoch": 0.39222370663841155, "grad_norm": 0.82973390186192, "learning_rate": 3.5627936681690804e-06, "loss": 0.1385, "step": 4257 }, { "epoch": 0.3923158428156816, "grad_norm": 0.8980341352139759, "learning_rate": 3.562104100051599e-06, "loss": 0.1502, "step": 4258 }, { "epoch": 0.3924079789929516, "grad_norm": 0.8826296993941916, "learning_rate": 3.561414433317637e-06, "loss": 0.144, "step": 4259 }, { "epoch": 0.3925001151702216, "grad_norm": 0.8825113800199493, "learning_rate": 3.560724668031231e-06, "loss": 0.1535, "step": 4260 }, { "epoch": 0.3925922513474916, "grad_norm": 0.9439974689177584, "learning_rate": 3.560034804256426e-06, "loss": 0.1575, "step": 4261 }, { "epoch": 0.3926843875247616, "grad_norm": 0.8261955307735191, "learning_rate": 3.5593448420572753e-06, "loss": 0.1352, "step": 4262 }, { "epoch": 0.3927765237020316, "grad_norm": 0.9212672496243489, "learning_rate": 3.558654781497841e-06, "loss": 0.1351, "step": 4263 }, { "epoch": 0.3928686598793016, "grad_norm": 0.9109157673678041, "learning_rate": 3.557964622642197e-06, "loss": 0.1488, "step": 4264 }, { "epoch": 0.39296079605657164, "grad_norm": 0.9448156747304166, "learning_rate": 3.557274365554424e-06, "loss": 0.1651, "step": 4265 }, { "epoch": 0.39305293223384163, "grad_norm": 0.8808161080550717, "learning_rate": 3.5565840102986128e-06, "loss": 0.1377, "step": 4266 }, { "epoch": 0.39314506841111163, "grad_norm": 0.8536571695891909, "learning_rate": 3.555893556938862e-06, "loss": 0.1401, "step": 4267 }, { "epoch": 0.39323720458838163, "grad_norm": 0.8420334900764369, "learning_rate": 3.5552030055392805e-06, "loss": 0.1333, "step": 4268 }, { "epoch": 0.3933293407656516, "grad_norm": 0.9069556452168558, "learning_rate": 3.554512356163986e-06, "loss": 0.152, "step": 4269 }, { "epoch": 0.3934214769429216, "grad_norm": 0.8525624922991532, "learning_rate": 3.553821608877107e-06, "loss": 0.1355, "step": 4270 }, { "epoch": 0.3935136131201916, "grad_norm": 0.8460506248401927, "learning_rate": 3.5531307637427774e-06, "loss": 0.1461, "step": 4271 }, { "epoch": 0.39360574929746167, "grad_norm": 0.866204338149706, "learning_rate": 3.552439820825143e-06, "loss": 0.1475, "step": 4272 }, { "epoch": 0.39369788547473167, "grad_norm": 0.9409368306978433, "learning_rate": 3.5517487801883587e-06, "loss": 0.1617, "step": 4273 }, { "epoch": 0.39379002165200166, "grad_norm": 0.8316147873982872, "learning_rate": 3.5510576418965862e-06, "loss": 0.1383, "step": 4274 }, { "epoch": 0.39388215782927166, "grad_norm": 0.8920309395123601, "learning_rate": 3.5503664060139987e-06, "loss": 0.1514, "step": 4275 }, { "epoch": 0.39397429400654166, "grad_norm": 0.8663059385692813, "learning_rate": 3.549675072604778e-06, "loss": 0.1327, "step": 4276 }, { "epoch": 0.39406643018381166, "grad_norm": 0.908796183232886, "learning_rate": 3.548983641733113e-06, "loss": 0.1525, "step": 4277 }, { "epoch": 0.39415856636108165, "grad_norm": 0.9156866300447943, "learning_rate": 3.5482921134632043e-06, "loss": 0.1509, "step": 4278 }, { "epoch": 0.3942507025383517, "grad_norm": 0.885423965354652, "learning_rate": 3.54760048785926e-06, "loss": 0.1454, "step": 4279 }, { "epoch": 0.3943428387156217, "grad_norm": 0.9269220799512766, "learning_rate": 3.546908764985498e-06, "loss": 0.1604, "step": 4280 }, { "epoch": 0.3944349748928917, "grad_norm": 0.968797661863933, "learning_rate": 3.5462169449061445e-06, "loss": 0.1433, "step": 4281 }, { "epoch": 0.3945271110701617, "grad_norm": 0.9613953673270564, "learning_rate": 3.5455250276854348e-06, "loss": 0.1445, "step": 4282 }, { "epoch": 0.3946192472474317, "grad_norm": 0.8605829725128497, "learning_rate": 3.544833013387613e-06, "loss": 0.1269, "step": 4283 }, { "epoch": 0.3947113834247017, "grad_norm": 0.9087732510897352, "learning_rate": 3.5441409020769347e-06, "loss": 0.1496, "step": 4284 }, { "epoch": 0.39480351960197174, "grad_norm": 0.9422059956909788, "learning_rate": 3.5434486938176606e-06, "loss": 0.1407, "step": 4285 }, { "epoch": 0.39489565577924174, "grad_norm": 0.9356627540900108, "learning_rate": 3.5427563886740633e-06, "loss": 0.1519, "step": 4286 }, { "epoch": 0.39498779195651174, "grad_norm": 0.9822939505174967, "learning_rate": 3.542063986710423e-06, "loss": 0.1582, "step": 4287 }, { "epoch": 0.39507992813378173, "grad_norm": 0.9727146799740334, "learning_rate": 3.5413714879910287e-06, "loss": 0.1435, "step": 4288 }, { "epoch": 0.39517206431105173, "grad_norm": 0.9789445530371959, "learning_rate": 3.540678892580181e-06, "loss": 0.146, "step": 4289 }, { "epoch": 0.3952642004883217, "grad_norm": 0.9347182617511673, "learning_rate": 3.539986200542185e-06, "loss": 0.148, "step": 4290 }, { "epoch": 0.3953563366655917, "grad_norm": 0.8953433756495065, "learning_rate": 3.539293411941359e-06, "loss": 0.1486, "step": 4291 }, { "epoch": 0.3954484728428618, "grad_norm": 0.8971317416449771, "learning_rate": 3.5386005268420277e-06, "loss": 0.1507, "step": 4292 }, { "epoch": 0.3955406090201318, "grad_norm": 0.9427770172715076, "learning_rate": 3.5379075453085256e-06, "loss": 0.1572, "step": 4293 }, { "epoch": 0.39563274519740177, "grad_norm": 0.8767799263466138, "learning_rate": 3.5372144674051963e-06, "loss": 0.1366, "step": 4294 }, { "epoch": 0.39572488137467177, "grad_norm": 0.9997094521941999, "learning_rate": 3.536521293196392e-06, "loss": 0.1654, "step": 4295 }, { "epoch": 0.39581701755194176, "grad_norm": 0.9105426819568583, "learning_rate": 3.5358280227464735e-06, "loss": 0.1624, "step": 4296 }, { "epoch": 0.39590915372921176, "grad_norm": 0.8461159177852656, "learning_rate": 3.535134656119813e-06, "loss": 0.1367, "step": 4297 }, { "epoch": 0.39600128990648176, "grad_norm": 0.8914245299115292, "learning_rate": 3.534441193380787e-06, "loss": 0.142, "step": 4298 }, { "epoch": 0.3960934260837518, "grad_norm": 0.894727223944378, "learning_rate": 3.5337476345937853e-06, "loss": 0.1462, "step": 4299 }, { "epoch": 0.3961855622610218, "grad_norm": 0.9194710796710017, "learning_rate": 3.5330539798232044e-06, "loss": 0.1333, "step": 4300 }, { "epoch": 0.3962776984382918, "grad_norm": 0.9067339161473813, "learning_rate": 3.5323602291334508e-06, "loss": 0.1525, "step": 4301 }, { "epoch": 0.3963698346155618, "grad_norm": 0.9207199566595172, "learning_rate": 3.5316663825889384e-06, "loss": 0.1462, "step": 4302 }, { "epoch": 0.3964619707928318, "grad_norm": 0.893790793559201, "learning_rate": 3.530972440254092e-06, "loss": 0.1465, "step": 4303 }, { "epoch": 0.3965541069701018, "grad_norm": 0.9323229957009004, "learning_rate": 3.530278402193342e-06, "loss": 0.1501, "step": 4304 }, { "epoch": 0.3966462431473718, "grad_norm": 0.93288274935138, "learning_rate": 3.5295842684711334e-06, "loss": 0.1487, "step": 4305 }, { "epoch": 0.39673837932464184, "grad_norm": 0.8904180096398162, "learning_rate": 3.528890039151913e-06, "loss": 0.1374, "step": 4306 }, { "epoch": 0.39683051550191184, "grad_norm": 0.9609819910977235, "learning_rate": 3.5281957143001426e-06, "loss": 0.165, "step": 4307 }, { "epoch": 0.39692265167918184, "grad_norm": 0.9827893129703009, "learning_rate": 3.5275012939802895e-06, "loss": 0.1546, "step": 4308 }, { "epoch": 0.39701478785645183, "grad_norm": 0.9057704871681684, "learning_rate": 3.5268067782568306e-06, "loss": 0.1433, "step": 4309 }, { "epoch": 0.39710692403372183, "grad_norm": 0.9032061609195176, "learning_rate": 3.5261121671942515e-06, "loss": 0.1475, "step": 4310 }, { "epoch": 0.39719906021099183, "grad_norm": 0.9355446728513368, "learning_rate": 3.525417460857048e-06, "loss": 0.1468, "step": 4311 }, { "epoch": 0.3972911963882618, "grad_norm": 0.9825199046051186, "learning_rate": 3.524722659309722e-06, "loss": 0.1642, "step": 4312 }, { "epoch": 0.3973833325655319, "grad_norm": 0.915612075494406, "learning_rate": 3.5240277626167875e-06, "loss": 0.1434, "step": 4313 }, { "epoch": 0.3974754687428019, "grad_norm": 0.8754977403678704, "learning_rate": 3.5233327708427638e-06, "loss": 0.1382, "step": 4314 }, { "epoch": 0.39756760492007187, "grad_norm": 0.8479881511273041, "learning_rate": 3.522637684052184e-06, "loss": 0.1358, "step": 4315 }, { "epoch": 0.39765974109734187, "grad_norm": 0.9382658935005335, "learning_rate": 3.5219425023095837e-06, "loss": 0.1547, "step": 4316 }, { "epoch": 0.39775187727461186, "grad_norm": 0.8891053914875621, "learning_rate": 3.5212472256795122e-06, "loss": 0.151, "step": 4317 }, { "epoch": 0.39784401345188186, "grad_norm": 0.8883971554082362, "learning_rate": 3.5205518542265265e-06, "loss": 0.1536, "step": 4318 }, { "epoch": 0.3979361496291519, "grad_norm": 1.0232040373202294, "learning_rate": 3.5198563880151913e-06, "loss": 0.1609, "step": 4319 }, { "epoch": 0.3980282858064219, "grad_norm": 0.9193288491755802, "learning_rate": 3.519160827110081e-06, "loss": 0.1391, "step": 4320 }, { "epoch": 0.3981204219836919, "grad_norm": 0.9096900817977849, "learning_rate": 3.5184651715757772e-06, "loss": 0.148, "step": 4321 }, { "epoch": 0.3982125581609619, "grad_norm": 0.8684172311676668, "learning_rate": 3.517769421476873e-06, "loss": 0.1324, "step": 4322 }, { "epoch": 0.3983046943382319, "grad_norm": 0.924708159192272, "learning_rate": 3.5170735768779683e-06, "loss": 0.1576, "step": 4323 }, { "epoch": 0.3983968305155019, "grad_norm": 0.8836737240145939, "learning_rate": 3.5163776378436736e-06, "loss": 0.1509, "step": 4324 }, { "epoch": 0.3984889666927719, "grad_norm": 0.92612250459839, "learning_rate": 3.515681604438605e-06, "loss": 0.1584, "step": 4325 }, { "epoch": 0.39858110287004195, "grad_norm": 0.9261909259373187, "learning_rate": 3.5149854767273904e-06, "loss": 0.163, "step": 4326 }, { "epoch": 0.39867323904731194, "grad_norm": 0.8606719880644568, "learning_rate": 3.5142892547746647e-06, "loss": 0.1507, "step": 4327 }, { "epoch": 0.39876537522458194, "grad_norm": 0.9505649437739544, "learning_rate": 3.513592938645073e-06, "loss": 0.1598, "step": 4328 }, { "epoch": 0.39885751140185194, "grad_norm": 0.8572409396930928, "learning_rate": 3.5128965284032677e-06, "loss": 0.1378, "step": 4329 }, { "epoch": 0.39894964757912194, "grad_norm": 0.9370424084273961, "learning_rate": 3.512200024113911e-06, "loss": 0.1586, "step": 4330 }, { "epoch": 0.39904178375639193, "grad_norm": 0.9345899795920014, "learning_rate": 3.511503425841672e-06, "loss": 0.158, "step": 4331 }, { "epoch": 0.39913391993366193, "grad_norm": 0.8384552894192423, "learning_rate": 3.5108067336512325e-06, "loss": 0.1327, "step": 4332 }, { "epoch": 0.399226056110932, "grad_norm": 0.9193822845561663, "learning_rate": 3.5101099476072776e-06, "loss": 0.1561, "step": 4333 }, { "epoch": 0.399318192288202, "grad_norm": 0.9288838613780442, "learning_rate": 3.5094130677745065e-06, "loss": 0.1564, "step": 4334 }, { "epoch": 0.399410328465472, "grad_norm": 0.9365564041095639, "learning_rate": 3.5087160942176228e-06, "loss": 0.1551, "step": 4335 }, { "epoch": 0.399502464642742, "grad_norm": 0.9228486860598516, "learning_rate": 3.5080190270013415e-06, "loss": 0.149, "step": 4336 }, { "epoch": 0.39959460082001197, "grad_norm": 0.9585047222693666, "learning_rate": 3.5073218661903852e-06, "loss": 0.1491, "step": 4337 }, { "epoch": 0.39968673699728197, "grad_norm": 0.9608521329567735, "learning_rate": 3.5066246118494847e-06, "loss": 0.1651, "step": 4338 }, { "epoch": 0.39977887317455196, "grad_norm": 0.9241350643720966, "learning_rate": 3.5059272640433808e-06, "loss": 0.1455, "step": 4339 }, { "epoch": 0.399871009351822, "grad_norm": 0.9336561932328415, "learning_rate": 3.5052298228368227e-06, "loss": 0.1585, "step": 4340 }, { "epoch": 0.399963145529092, "grad_norm": 0.9287712735257532, "learning_rate": 3.5045322882945666e-06, "loss": 0.1511, "step": 4341 }, { "epoch": 0.400055281706362, "grad_norm": 0.8132984018015555, "learning_rate": 3.5038346604813796e-06, "loss": 0.1208, "step": 4342 }, { "epoch": 0.400147417883632, "grad_norm": 0.9102128798105348, "learning_rate": 3.5031369394620364e-06, "loss": 0.1461, "step": 4343 }, { "epoch": 0.400239554060902, "grad_norm": 0.8849265666537798, "learning_rate": 3.5024391253013206e-06, "loss": 0.1492, "step": 4344 }, { "epoch": 0.400331690238172, "grad_norm": 0.9810479880919115, "learning_rate": 3.5017412180640243e-06, "loss": 0.1521, "step": 4345 }, { "epoch": 0.400423826415442, "grad_norm": 0.956181986827905, "learning_rate": 3.5010432178149473e-06, "loss": 0.1609, "step": 4346 }, { "epoch": 0.40051596259271205, "grad_norm": 0.9029488074220976, "learning_rate": 3.5003451246189003e-06, "loss": 0.1482, "step": 4347 }, { "epoch": 0.40060809876998205, "grad_norm": 0.8824563585798078, "learning_rate": 3.499646938540701e-06, "loss": 0.146, "step": 4348 }, { "epoch": 0.40070023494725204, "grad_norm": 0.9269831174400178, "learning_rate": 3.498948659645176e-06, "loss": 0.1484, "step": 4349 }, { "epoch": 0.40079237112452204, "grad_norm": 0.9216669167926768, "learning_rate": 3.4982502879971596e-06, "loss": 0.1429, "step": 4350 }, { "epoch": 0.40088450730179204, "grad_norm": 0.9119424481041382, "learning_rate": 3.497551823661498e-06, "loss": 0.1438, "step": 4351 }, { "epoch": 0.40097664347906203, "grad_norm": 0.9521206551490597, "learning_rate": 3.4968532667030408e-06, "loss": 0.1684, "step": 4352 }, { "epoch": 0.4010687796563321, "grad_norm": 0.9169723849076772, "learning_rate": 3.496154617186651e-06, "loss": 0.1452, "step": 4353 }, { "epoch": 0.4011609158336021, "grad_norm": 0.9816498659771387, "learning_rate": 3.4954558751771976e-06, "loss": 0.1611, "step": 4354 }, { "epoch": 0.4012530520108721, "grad_norm": 1.0011798661122229, "learning_rate": 3.4947570407395593e-06, "loss": 0.1568, "step": 4355 }, { "epoch": 0.4013451881881421, "grad_norm": 0.8784800818378974, "learning_rate": 3.494058113938623e-06, "loss": 0.1454, "step": 4356 }, { "epoch": 0.4014373243654121, "grad_norm": 0.9091950278863203, "learning_rate": 3.493359094839284e-06, "loss": 0.1454, "step": 4357 }, { "epoch": 0.40152946054268207, "grad_norm": 1.003839121809296, "learning_rate": 3.4926599835064446e-06, "loss": 0.1527, "step": 4358 }, { "epoch": 0.40162159671995207, "grad_norm": 0.8735840233392277, "learning_rate": 3.491960780005021e-06, "loss": 0.1488, "step": 4359 }, { "epoch": 0.4017137328972221, "grad_norm": 0.9279416109721896, "learning_rate": 3.4912614843999304e-06, "loss": 0.1519, "step": 4360 }, { "epoch": 0.4018058690744921, "grad_norm": 0.9292349005371296, "learning_rate": 3.490562096756105e-06, "loss": 0.145, "step": 4361 }, { "epoch": 0.4018980052517621, "grad_norm": 0.8176160899793758, "learning_rate": 3.4898626171384823e-06, "loss": 0.1319, "step": 4362 }, { "epoch": 0.4019901414290321, "grad_norm": 0.8663661790382975, "learning_rate": 3.4891630456120098e-06, "loss": 0.1478, "step": 4363 }, { "epoch": 0.4020822776063021, "grad_norm": 0.8951011796955761, "learning_rate": 3.4884633822416412e-06, "loss": 0.1411, "step": 4364 }, { "epoch": 0.4021744137835721, "grad_norm": 0.9513840457401349, "learning_rate": 3.4877636270923416e-06, "loss": 0.1557, "step": 4365 }, { "epoch": 0.4022665499608421, "grad_norm": 0.9631571432658622, "learning_rate": 3.4870637802290817e-06, "loss": 0.1528, "step": 4366 }, { "epoch": 0.40235868613811215, "grad_norm": 0.9009246331376555, "learning_rate": 3.4863638417168455e-06, "loss": 0.1406, "step": 4367 }, { "epoch": 0.40245082231538215, "grad_norm": 0.9418153307876809, "learning_rate": 3.4856638116206194e-06, "loss": 0.1522, "step": 4368 }, { "epoch": 0.40254295849265215, "grad_norm": 0.938253466177827, "learning_rate": 3.4849636900054023e-06, "loss": 0.1414, "step": 4369 }, { "epoch": 0.40263509466992214, "grad_norm": 0.949524744273461, "learning_rate": 3.484263476936201e-06, "loss": 0.1527, "step": 4370 }, { "epoch": 0.40272723084719214, "grad_norm": 0.9037729435498303, "learning_rate": 3.4835631724780296e-06, "loss": 0.1445, "step": 4371 }, { "epoch": 0.40281936702446214, "grad_norm": 0.8961162386122397, "learning_rate": 3.4828627766959123e-06, "loss": 0.1491, "step": 4372 }, { "epoch": 0.40291150320173214, "grad_norm": 0.875416342667484, "learning_rate": 3.4821622896548795e-06, "loss": 0.1483, "step": 4373 }, { "epoch": 0.4030036393790022, "grad_norm": 0.8912047991694245, "learning_rate": 3.4814617114199722e-06, "loss": 0.1514, "step": 4374 }, { "epoch": 0.4030957755562722, "grad_norm": 0.9112833813015218, "learning_rate": 3.4807610420562406e-06, "loss": 0.1599, "step": 4375 }, { "epoch": 0.4031879117335422, "grad_norm": 0.8839605396294251, "learning_rate": 3.48006028162874e-06, "loss": 0.1532, "step": 4376 }, { "epoch": 0.4032800479108122, "grad_norm": 0.8948899894612451, "learning_rate": 3.4793594302025367e-06, "loss": 0.1465, "step": 4377 }, { "epoch": 0.4033721840880822, "grad_norm": 0.851447023415438, "learning_rate": 3.4786584878427056e-06, "loss": 0.134, "step": 4378 }, { "epoch": 0.40346432026535217, "grad_norm": 0.9114250139558066, "learning_rate": 3.4779574546143276e-06, "loss": 0.1539, "step": 4379 }, { "epoch": 0.40355645644262217, "grad_norm": 0.9361638989038511, "learning_rate": 3.4772563305824956e-06, "loss": 0.1612, "step": 4380 }, { "epoch": 0.4036485926198922, "grad_norm": 0.8195140088476734, "learning_rate": 3.4765551158123074e-06, "loss": 0.1333, "step": 4381 }, { "epoch": 0.4037407287971622, "grad_norm": 0.9008523426752787, "learning_rate": 3.4758538103688723e-06, "loss": 0.1525, "step": 4382 }, { "epoch": 0.4038328649744322, "grad_norm": 0.957477825346113, "learning_rate": 3.4751524143173055e-06, "loss": 0.1651, "step": 4383 }, { "epoch": 0.4039250011517022, "grad_norm": 0.9227344073131626, "learning_rate": 3.4744509277227316e-06, "loss": 0.1493, "step": 4384 }, { "epoch": 0.4040171373289722, "grad_norm": 0.9151830123149463, "learning_rate": 3.473749350650285e-06, "loss": 0.1554, "step": 4385 }, { "epoch": 0.4041092735062422, "grad_norm": 0.8878099053567197, "learning_rate": 3.473047683165106e-06, "loss": 0.1552, "step": 4386 }, { "epoch": 0.40420140968351226, "grad_norm": 0.9120949620840199, "learning_rate": 3.472345925332344e-06, "loss": 0.156, "step": 4387 }, { "epoch": 0.40429354586078226, "grad_norm": 0.9151073635872946, "learning_rate": 3.47164407721716e-06, "loss": 0.1601, "step": 4388 }, { "epoch": 0.40438568203805225, "grad_norm": 0.9144746721555486, "learning_rate": 3.4709421388847177e-06, "loss": 0.1442, "step": 4389 }, { "epoch": 0.40447781821532225, "grad_norm": 0.9910249337465405, "learning_rate": 3.4702401104001937e-06, "loss": 0.161, "step": 4390 }, { "epoch": 0.40456995439259225, "grad_norm": 0.905580616645535, "learning_rate": 3.4695379918287708e-06, "loss": 0.147, "step": 4391 }, { "epoch": 0.40466209056986224, "grad_norm": 0.9682851249530802, "learning_rate": 3.468835783235641e-06, "loss": 0.1664, "step": 4392 }, { "epoch": 0.40475422674713224, "grad_norm": 1.019950194661881, "learning_rate": 3.468133484686005e-06, "loss": 0.1581, "step": 4393 }, { "epoch": 0.4048463629244023, "grad_norm": 0.9054834581346294, "learning_rate": 3.467431096245071e-06, "loss": 0.1458, "step": 4394 }, { "epoch": 0.4049384991016723, "grad_norm": 0.9394518511371741, "learning_rate": 3.466728617978054e-06, "loss": 0.1465, "step": 4395 }, { "epoch": 0.4050306352789423, "grad_norm": 0.9276116238870109, "learning_rate": 3.466026049950182e-06, "loss": 0.1533, "step": 4396 }, { "epoch": 0.4051227714562123, "grad_norm": 0.8814615109668135, "learning_rate": 3.465323392226687e-06, "loss": 0.1474, "step": 4397 }, { "epoch": 0.4052149076334823, "grad_norm": 0.9276110676799734, "learning_rate": 3.4646206448728113e-06, "loss": 0.1471, "step": 4398 }, { "epoch": 0.4053070438107523, "grad_norm": 0.9740379558096653, "learning_rate": 3.463917807953805e-06, "loss": 0.1514, "step": 4399 }, { "epoch": 0.4053991799880223, "grad_norm": 0.8572241682747315, "learning_rate": 3.4632148815349265e-06, "loss": 0.1411, "step": 4400 }, { "epoch": 0.4054913161652923, "grad_norm": 0.8856261129316616, "learning_rate": 3.4625118656814414e-06, "loss": 0.1531, "step": 4401 }, { "epoch": 0.4055834523425623, "grad_norm": 0.8319574623205851, "learning_rate": 3.4618087604586277e-06, "loss": 0.1476, "step": 4402 }, { "epoch": 0.4056755885198323, "grad_norm": 0.851179009626978, "learning_rate": 3.4611055659317663e-06, "loss": 0.1403, "step": 4403 }, { "epoch": 0.4057677246971023, "grad_norm": 0.7983809793208019, "learning_rate": 3.4604022821661493e-06, "loss": 0.1217, "step": 4404 }, { "epoch": 0.4058598608743723, "grad_norm": 0.8594086174519592, "learning_rate": 3.459698909227078e-06, "loss": 0.15, "step": 4405 }, { "epoch": 0.4059519970516423, "grad_norm": 0.8403974045516667, "learning_rate": 3.458995447179858e-06, "loss": 0.1442, "step": 4406 }, { "epoch": 0.4060441332289123, "grad_norm": 0.9268270027862243, "learning_rate": 3.4582918960898094e-06, "loss": 0.1556, "step": 4407 }, { "epoch": 0.40613626940618236, "grad_norm": 0.8732579314464122, "learning_rate": 3.457588256022254e-06, "loss": 0.1425, "step": 4408 }, { "epoch": 0.40622840558345236, "grad_norm": 0.9393071006353455, "learning_rate": 3.4568845270425268e-06, "loss": 0.1402, "step": 4409 }, { "epoch": 0.40632054176072235, "grad_norm": 0.8936419289776788, "learning_rate": 3.456180709215968e-06, "loss": 0.1391, "step": 4410 }, { "epoch": 0.40641267793799235, "grad_norm": 0.9028553321289632, "learning_rate": 3.455476802607927e-06, "loss": 0.1455, "step": 4411 }, { "epoch": 0.40650481411526235, "grad_norm": 0.9629848751950774, "learning_rate": 3.454772807283763e-06, "loss": 0.1595, "step": 4412 }, { "epoch": 0.40659695029253234, "grad_norm": 0.8973627311061653, "learning_rate": 3.45406872330884e-06, "loss": 0.1451, "step": 4413 }, { "epoch": 0.40668908646980234, "grad_norm": 0.8543835552665785, "learning_rate": 3.453364550748533e-06, "loss": 0.1496, "step": 4414 }, { "epoch": 0.4067812226470724, "grad_norm": 0.8201019616645914, "learning_rate": 3.4526602896682267e-06, "loss": 0.1435, "step": 4415 }, { "epoch": 0.4068733588243424, "grad_norm": 0.8978747617563513, "learning_rate": 3.451955940133308e-06, "loss": 0.1436, "step": 4416 }, { "epoch": 0.4069654950016124, "grad_norm": 0.9475916174468934, "learning_rate": 3.451251502209179e-06, "loss": 0.1572, "step": 4417 }, { "epoch": 0.4070576311788824, "grad_norm": 0.9162427080905498, "learning_rate": 3.4505469759612453e-06, "loss": 0.1454, "step": 4418 }, { "epoch": 0.4071497673561524, "grad_norm": 0.8802358936759178, "learning_rate": 3.4498423614549226e-06, "loss": 0.1437, "step": 4419 }, { "epoch": 0.4072419035334224, "grad_norm": 0.9874591988178819, "learning_rate": 3.449137658755635e-06, "loss": 0.1558, "step": 4420 }, { "epoch": 0.40733403971069243, "grad_norm": 0.9103269014973717, "learning_rate": 3.4484328679288133e-06, "loss": 0.1512, "step": 4421 }, { "epoch": 0.4074261758879624, "grad_norm": 0.8962116901628482, "learning_rate": 3.4477279890398968e-06, "loss": 0.1472, "step": 4422 }, { "epoch": 0.4075183120652324, "grad_norm": 0.9979364839432481, "learning_rate": 3.4470230221543362e-06, "loss": 0.1431, "step": 4423 }, { "epoch": 0.4076104482425024, "grad_norm": 0.9124653449987579, "learning_rate": 3.4463179673375846e-06, "loss": 0.1426, "step": 4424 }, { "epoch": 0.4077025844197724, "grad_norm": 0.9408756063038355, "learning_rate": 3.445612824655108e-06, "loss": 0.1414, "step": 4425 }, { "epoch": 0.4077947205970424, "grad_norm": 0.9395683151940247, "learning_rate": 3.4449075941723797e-06, "loss": 0.1427, "step": 4426 }, { "epoch": 0.4078868567743124, "grad_norm": 0.9611998547934455, "learning_rate": 3.444202275954879e-06, "loss": 0.1412, "step": 4427 }, { "epoch": 0.40797899295158246, "grad_norm": 0.93129468781063, "learning_rate": 3.443496870068096e-06, "loss": 0.1483, "step": 4428 }, { "epoch": 0.40807112912885246, "grad_norm": 0.9047030358220689, "learning_rate": 3.442791376577527e-06, "loss": 0.1372, "step": 4429 }, { "epoch": 0.40816326530612246, "grad_norm": 0.9545577553610516, "learning_rate": 3.4420857955486756e-06, "loss": 0.1442, "step": 4430 }, { "epoch": 0.40825540148339245, "grad_norm": 0.8843025095837417, "learning_rate": 3.441380127047058e-06, "loss": 0.1345, "step": 4431 }, { "epoch": 0.40834753766066245, "grad_norm": 0.8466673145673613, "learning_rate": 3.4406743711381945e-06, "loss": 0.1435, "step": 4432 }, { "epoch": 0.40843967383793245, "grad_norm": 0.8921580068825665, "learning_rate": 3.439968527887614e-06, "loss": 0.1462, "step": 4433 }, { "epoch": 0.40853181001520245, "grad_norm": 0.8755904986298998, "learning_rate": 3.439262597360855e-06, "loss": 0.152, "step": 4434 }, { "epoch": 0.4086239461924725, "grad_norm": 0.9056162255854322, "learning_rate": 3.438556579623462e-06, "loss": 0.1412, "step": 4435 }, { "epoch": 0.4087160823697425, "grad_norm": 0.9685429925466471, "learning_rate": 3.43785047474099e-06, "loss": 0.1574, "step": 4436 }, { "epoch": 0.4088082185470125, "grad_norm": 0.850888377979816, "learning_rate": 3.437144282779e-06, "loss": 0.1449, "step": 4437 }, { "epoch": 0.4089003547242825, "grad_norm": 0.9337752457104304, "learning_rate": 3.4364380038030636e-06, "loss": 0.1647, "step": 4438 }, { "epoch": 0.4089924909015525, "grad_norm": 0.8772877594659604, "learning_rate": 3.435731637878757e-06, "loss": 0.146, "step": 4439 }, { "epoch": 0.4090846270788225, "grad_norm": 0.9383038547513723, "learning_rate": 3.435025185071668e-06, "loss": 0.1494, "step": 4440 }, { "epoch": 0.4091767632560925, "grad_norm": 0.8908104687401064, "learning_rate": 3.434318645447388e-06, "loss": 0.1344, "step": 4441 }, { "epoch": 0.40926889943336253, "grad_norm": 0.9462178584246064, "learning_rate": 3.433612019071523e-06, "loss": 0.1481, "step": 4442 }, { "epoch": 0.40936103561063253, "grad_norm": 1.0324931287459957, "learning_rate": 3.4329053060096805e-06, "loss": 0.1564, "step": 4443 }, { "epoch": 0.4094531717879025, "grad_norm": 0.9704275908210812, "learning_rate": 3.4321985063274805e-06, "loss": 0.1566, "step": 4444 }, { "epoch": 0.4095453079651725, "grad_norm": 0.9285469120115527, "learning_rate": 3.431491620090549e-06, "loss": 0.1529, "step": 4445 }, { "epoch": 0.4096374441424425, "grad_norm": 0.8357706208744242, "learning_rate": 3.43078464736452e-06, "loss": 0.1382, "step": 4446 }, { "epoch": 0.4097295803197125, "grad_norm": 0.979590666946963, "learning_rate": 3.4300775882150367e-06, "loss": 0.1642, "step": 4447 }, { "epoch": 0.4098217164969825, "grad_norm": 0.9152729280726183, "learning_rate": 3.429370442707749e-06, "loss": 0.1474, "step": 4448 }, { "epoch": 0.40991385267425257, "grad_norm": 0.9441617628306267, "learning_rate": 3.428663210908315e-06, "loss": 0.1565, "step": 4449 }, { "epoch": 0.41000598885152256, "grad_norm": 0.9176715113996435, "learning_rate": 3.427955892882403e-06, "loss": 0.1561, "step": 4450 }, { "epoch": 0.41009812502879256, "grad_norm": 0.9473640183674535, "learning_rate": 3.4272484886956856e-06, "loss": 0.1532, "step": 4451 }, { "epoch": 0.41019026120606256, "grad_norm": 0.9388563681988958, "learning_rate": 3.4265409984138463e-06, "loss": 0.1552, "step": 4452 }, { "epoch": 0.41028239738333255, "grad_norm": 0.8830180332465869, "learning_rate": 3.4258334221025763e-06, "loss": 0.1392, "step": 4453 }, { "epoch": 0.41037453356060255, "grad_norm": 0.8404506052238401, "learning_rate": 3.425125759827573e-06, "loss": 0.1408, "step": 4454 }, { "epoch": 0.4104666697378726, "grad_norm": 0.8937046949022973, "learning_rate": 3.4244180116545434e-06, "loss": 0.1493, "step": 4455 }, { "epoch": 0.4105588059151426, "grad_norm": 0.9179506656858187, "learning_rate": 3.423710177649202e-06, "loss": 0.1469, "step": 4456 }, { "epoch": 0.4106509420924126, "grad_norm": 0.9052018029466072, "learning_rate": 3.423002257877271e-06, "loss": 0.1524, "step": 4457 }, { "epoch": 0.4107430782696826, "grad_norm": 0.9531593496243919, "learning_rate": 3.4222942524044817e-06, "loss": 0.1635, "step": 4458 }, { "epoch": 0.4108352144469526, "grad_norm": 0.8613976632136475, "learning_rate": 3.4215861612965705e-06, "loss": 0.1364, "step": 4459 }, { "epoch": 0.4109273506242226, "grad_norm": 0.9203983911114743, "learning_rate": 3.4208779846192856e-06, "loss": 0.1471, "step": 4460 }, { "epoch": 0.4110194868014926, "grad_norm": 0.8893575267649316, "learning_rate": 3.420169722438381e-06, "loss": 0.1481, "step": 4461 }, { "epoch": 0.41111162297876264, "grad_norm": 0.9481408074859154, "learning_rate": 3.419461374819618e-06, "loss": 0.153, "step": 4462 }, { "epoch": 0.41120375915603263, "grad_norm": 0.9102576968282057, "learning_rate": 3.418752941828769e-06, "loss": 0.1479, "step": 4463 }, { "epoch": 0.41129589533330263, "grad_norm": 0.9170697810535563, "learning_rate": 3.418044423531609e-06, "loss": 0.1493, "step": 4464 }, { "epoch": 0.4113880315105726, "grad_norm": 0.9107203490066708, "learning_rate": 3.4173358199939253e-06, "loss": 0.1544, "step": 4465 }, { "epoch": 0.4114801676878426, "grad_norm": 0.8864919007877409, "learning_rate": 3.416627131281513e-06, "loss": 0.1501, "step": 4466 }, { "epoch": 0.4115723038651126, "grad_norm": 0.8923614673943928, "learning_rate": 3.415918357460173e-06, "loss": 0.1529, "step": 4467 }, { "epoch": 0.4116644400423826, "grad_norm": 0.8625947768008402, "learning_rate": 3.4152094985957135e-06, "loss": 0.1537, "step": 4468 }, { "epoch": 0.41175657621965267, "grad_norm": 0.8763271138448714, "learning_rate": 3.4145005547539552e-06, "loss": 0.1422, "step": 4469 }, { "epoch": 0.41184871239692267, "grad_norm": 0.845461883705554, "learning_rate": 3.413791526000721e-06, "loss": 0.137, "step": 4470 }, { "epoch": 0.41194084857419266, "grad_norm": 0.8670354139637978, "learning_rate": 3.4130824124018453e-06, "loss": 0.1487, "step": 4471 }, { "epoch": 0.41203298475146266, "grad_norm": 0.9156206700607552, "learning_rate": 3.4123732140231695e-06, "loss": 0.1505, "step": 4472 }, { "epoch": 0.41212512092873266, "grad_norm": 0.9246364937441267, "learning_rate": 3.411663930930543e-06, "loss": 0.1559, "step": 4473 }, { "epoch": 0.41221725710600265, "grad_norm": 0.9060481038174606, "learning_rate": 3.4109545631898223e-06, "loss": 0.1559, "step": 4474 }, { "epoch": 0.41230939328327265, "grad_norm": 0.9347223663635646, "learning_rate": 3.410245110866872e-06, "loss": 0.1598, "step": 4475 }, { "epoch": 0.4124015294605427, "grad_norm": 0.9055809419343559, "learning_rate": 3.409535574027565e-06, "loss": 0.1373, "step": 4476 }, { "epoch": 0.4124936656378127, "grad_norm": 0.8832161886506429, "learning_rate": 3.4088259527377826e-06, "loss": 0.1453, "step": 4477 }, { "epoch": 0.4125858018150827, "grad_norm": 0.9766041924808401, "learning_rate": 3.408116247063412e-06, "loss": 0.1624, "step": 4478 }, { "epoch": 0.4126779379923527, "grad_norm": 0.9355812719201331, "learning_rate": 3.407406457070351e-06, "loss": 0.1439, "step": 4479 }, { "epoch": 0.4127700741696227, "grad_norm": 0.9492137205341494, "learning_rate": 3.4066965828245023e-06, "loss": 0.1495, "step": 4480 }, { "epoch": 0.4128622103468927, "grad_norm": 0.9537352797092123, "learning_rate": 3.4059866243917784e-06, "loss": 0.1587, "step": 4481 }, { "epoch": 0.4129543465241627, "grad_norm": 0.9084473898134254, "learning_rate": 3.4052765818380988e-06, "loss": 0.1528, "step": 4482 }, { "epoch": 0.41304648270143274, "grad_norm": 0.8738894354219552, "learning_rate": 3.4045664552293913e-06, "loss": 0.1439, "step": 4483 }, { "epoch": 0.41313861887870273, "grad_norm": 0.955205886413484, "learning_rate": 3.4038562446315908e-06, "loss": 0.1621, "step": 4484 }, { "epoch": 0.41323075505597273, "grad_norm": 0.9384210771953125, "learning_rate": 3.4031459501106412e-06, "loss": 0.1402, "step": 4485 }, { "epoch": 0.41332289123324273, "grad_norm": 0.9346661196851742, "learning_rate": 3.4024355717324927e-06, "loss": 0.1599, "step": 4486 }, { "epoch": 0.4134150274105127, "grad_norm": 0.8793320839571395, "learning_rate": 3.4017251095631044e-06, "loss": 0.15, "step": 4487 }, { "epoch": 0.4135071635877827, "grad_norm": 0.976370275498756, "learning_rate": 3.401014563668442e-06, "loss": 0.1457, "step": 4488 }, { "epoch": 0.4135992997650528, "grad_norm": 0.8577892362812106, "learning_rate": 3.4003039341144807e-06, "loss": 0.1381, "step": 4489 }, { "epoch": 0.41369143594232277, "grad_norm": 0.8677820161364236, "learning_rate": 3.3995932209672028e-06, "loss": 0.1457, "step": 4490 }, { "epoch": 0.41378357211959277, "grad_norm": 0.9488229913826759, "learning_rate": 3.3988824242925965e-06, "loss": 0.1516, "step": 4491 }, { "epoch": 0.41387570829686277, "grad_norm": 0.8646831069607954, "learning_rate": 3.398171544156661e-06, "loss": 0.1294, "step": 4492 }, { "epoch": 0.41396784447413276, "grad_norm": 1.0109574950040325, "learning_rate": 3.3974605806254015e-06, "loss": 0.1455, "step": 4493 }, { "epoch": 0.41405998065140276, "grad_norm": 0.862225186168334, "learning_rate": 3.3967495337648297e-06, "loss": 0.1471, "step": 4494 }, { "epoch": 0.41415211682867276, "grad_norm": 0.9281967815655426, "learning_rate": 3.396038403640968e-06, "loss": 0.1588, "step": 4495 }, { "epoch": 0.4142442530059428, "grad_norm": 0.9404582510399649, "learning_rate": 3.395327190319843e-06, "loss": 0.1556, "step": 4496 }, { "epoch": 0.4143363891832128, "grad_norm": 0.871442207254464, "learning_rate": 3.394615893867492e-06, "loss": 0.1425, "step": 4497 }, { "epoch": 0.4144285253604828, "grad_norm": 0.9006939834301403, "learning_rate": 3.3939045143499604e-06, "loss": 0.1392, "step": 4498 }, { "epoch": 0.4145206615377528, "grad_norm": 0.8677132626890294, "learning_rate": 3.393193051833297e-06, "loss": 0.146, "step": 4499 }, { "epoch": 0.4146127977150228, "grad_norm": 0.8750036614339674, "learning_rate": 3.392481506383563e-06, "loss": 0.1557, "step": 4500 }, { "epoch": 0.4146127977150228, "eval_loss": 0.14849522709846497, "eval_runtime": 299.3682, "eval_samples_per_second": 23.439, "eval_steps_per_second": 2.933, "step": 4500 }, { "epoch": 0.4147049338922928, "grad_norm": 0.9083364571977722, "learning_rate": 3.391769878066825e-06, "loss": 0.1554, "step": 4501 }, { "epoch": 0.4147970700695628, "grad_norm": 0.9037418164648761, "learning_rate": 3.391058166949159e-06, "loss": 0.1483, "step": 4502 }, { "epoch": 0.41488920624683284, "grad_norm": 0.9011614056787858, "learning_rate": 3.390346373096645e-06, "loss": 0.1622, "step": 4503 }, { "epoch": 0.41498134242410284, "grad_norm": 0.935184827582457, "learning_rate": 3.3896344965753746e-06, "loss": 0.1542, "step": 4504 }, { "epoch": 0.41507347860137284, "grad_norm": 0.9172362705852074, "learning_rate": 3.3889225374514455e-06, "loss": 0.1522, "step": 4505 }, { "epoch": 0.41516561477864283, "grad_norm": 0.8595824614921564, "learning_rate": 3.388210495790964e-06, "loss": 0.1447, "step": 4506 }, { "epoch": 0.41525775095591283, "grad_norm": 0.8785044781998784, "learning_rate": 3.3874983716600414e-06, "loss": 0.1459, "step": 4507 }, { "epoch": 0.4153498871331828, "grad_norm": 0.9537125508997492, "learning_rate": 3.3867861651247997e-06, "loss": 0.1605, "step": 4508 }, { "epoch": 0.4154420233104528, "grad_norm": 0.89864935320374, "learning_rate": 3.3860738762513674e-06, "loss": 0.1539, "step": 4509 }, { "epoch": 0.4155341594877229, "grad_norm": 0.9531899881558427, "learning_rate": 3.3853615051058798e-06, "loss": 0.1476, "step": 4510 }, { "epoch": 0.4156262956649929, "grad_norm": 0.8988810611214069, "learning_rate": 3.384649051754481e-06, "loss": 0.1453, "step": 4511 }, { "epoch": 0.41571843184226287, "grad_norm": 0.9566810993912842, "learning_rate": 3.3839365162633237e-06, "loss": 0.1554, "step": 4512 }, { "epoch": 0.41581056801953287, "grad_norm": 0.9267067824355266, "learning_rate": 3.3832238986985643e-06, "loss": 0.1581, "step": 4513 }, { "epoch": 0.41590270419680286, "grad_norm": 0.9276426644752526, "learning_rate": 3.382511199126372e-06, "loss": 0.1353, "step": 4514 }, { "epoch": 0.41599484037407286, "grad_norm": 0.9891793471815831, "learning_rate": 3.3817984176129194e-06, "loss": 0.1456, "step": 4515 }, { "epoch": 0.41608697655134286, "grad_norm": 0.8778307632415944, "learning_rate": 3.3810855542243892e-06, "loss": 0.1417, "step": 4516 }, { "epoch": 0.4161791127286129, "grad_norm": 0.8922929124760213, "learning_rate": 3.38037260902697e-06, "loss": 0.146, "step": 4517 }, { "epoch": 0.4162712489058829, "grad_norm": 0.8832533852460722, "learning_rate": 3.3796595820868596e-06, "loss": 0.1403, "step": 4518 }, { "epoch": 0.4163633850831529, "grad_norm": 0.9314412639880251, "learning_rate": 3.378946473470262e-06, "loss": 0.1535, "step": 4519 }, { "epoch": 0.4164555212604229, "grad_norm": 0.8383119728511532, "learning_rate": 3.37823328324339e-06, "loss": 0.1357, "step": 4520 }, { "epoch": 0.4165476574376929, "grad_norm": 0.9162340271668836, "learning_rate": 3.3775200114724632e-06, "loss": 0.1479, "step": 4521 }, { "epoch": 0.4166397936149629, "grad_norm": 0.9381510485047666, "learning_rate": 3.3768066582237084e-06, "loss": 0.162, "step": 4522 }, { "epoch": 0.41673192979223295, "grad_norm": 0.903114085112653, "learning_rate": 3.3760932235633614e-06, "loss": 0.1555, "step": 4523 }, { "epoch": 0.41682406596950294, "grad_norm": 0.9275753356424354, "learning_rate": 3.3753797075576646e-06, "loss": 0.1515, "step": 4524 }, { "epoch": 0.41691620214677294, "grad_norm": 0.896898839228486, "learning_rate": 3.374666110272868e-06, "loss": 0.137, "step": 4525 }, { "epoch": 0.41700833832404294, "grad_norm": 0.8418394217228213, "learning_rate": 3.3739524317752276e-06, "loss": 0.1297, "step": 4526 }, { "epoch": 0.41710047450131293, "grad_norm": 0.8178077340704089, "learning_rate": 3.373238672131011e-06, "loss": 0.1383, "step": 4527 }, { "epoch": 0.41719261067858293, "grad_norm": 0.9015807010863438, "learning_rate": 3.372524831406489e-06, "loss": 0.1405, "step": 4528 }, { "epoch": 0.41728474685585293, "grad_norm": 0.9091807204242067, "learning_rate": 3.371810909667943e-06, "loss": 0.1425, "step": 4529 }, { "epoch": 0.417376883033123, "grad_norm": 0.8591056164974364, "learning_rate": 3.37109690698166e-06, "loss": 0.1298, "step": 4530 }, { "epoch": 0.417469019210393, "grad_norm": 0.8970907735401513, "learning_rate": 3.3703828234139357e-06, "loss": 0.1335, "step": 4531 }, { "epoch": 0.417561155387663, "grad_norm": 0.9376572782577746, "learning_rate": 3.369668659031072e-06, "loss": 0.1542, "step": 4532 }, { "epoch": 0.41765329156493297, "grad_norm": 0.9497750415555174, "learning_rate": 3.368954413899381e-06, "loss": 0.1557, "step": 4533 }, { "epoch": 0.41774542774220297, "grad_norm": 0.7958487365461181, "learning_rate": 3.368240088085177e-06, "loss": 0.1361, "step": 4534 }, { "epoch": 0.41783756391947297, "grad_norm": 0.9245976345181074, "learning_rate": 3.367525681654789e-06, "loss": 0.1636, "step": 4535 }, { "epoch": 0.41792970009674296, "grad_norm": 0.8430081265658356, "learning_rate": 3.366811194674548e-06, "loss": 0.1275, "step": 4536 }, { "epoch": 0.418021836274013, "grad_norm": 0.9432573855683106, "learning_rate": 3.3660966272107943e-06, "loss": 0.1401, "step": 4537 }, { "epoch": 0.418113972451283, "grad_norm": 0.878180759208677, "learning_rate": 3.365381979329875e-06, "loss": 0.1425, "step": 4538 }, { "epoch": 0.418206108628553, "grad_norm": 0.8842514952556141, "learning_rate": 3.3646672510981458e-06, "loss": 0.1518, "step": 4539 }, { "epoch": 0.418298244805823, "grad_norm": 0.890925588999716, "learning_rate": 3.363952442581969e-06, "loss": 0.1453, "step": 4540 }, { "epoch": 0.418390380983093, "grad_norm": 0.8994367794628658, "learning_rate": 3.3632375538477165e-06, "loss": 0.1379, "step": 4541 }, { "epoch": 0.418482517160363, "grad_norm": 1.0133084304207123, "learning_rate": 3.3625225849617625e-06, "loss": 0.1578, "step": 4542 }, { "epoch": 0.418574653337633, "grad_norm": 0.9206787724545995, "learning_rate": 3.3618075359904946e-06, "loss": 0.1376, "step": 4543 }, { "epoch": 0.41866678951490305, "grad_norm": 0.9561681753325468, "learning_rate": 3.361092407000304e-06, "loss": 0.1585, "step": 4544 }, { "epoch": 0.41875892569217305, "grad_norm": 0.8866968591572271, "learning_rate": 3.3603771980575907e-06, "loss": 0.1444, "step": 4545 }, { "epoch": 0.41885106186944304, "grad_norm": 0.9398479480714228, "learning_rate": 3.359661909228762e-06, "loss": 0.159, "step": 4546 }, { "epoch": 0.41894319804671304, "grad_norm": 0.9582293954402347, "learning_rate": 3.3589465405802324e-06, "loss": 0.1441, "step": 4547 }, { "epoch": 0.41903533422398304, "grad_norm": 0.9766029174637952, "learning_rate": 3.358231092178424e-06, "loss": 0.1474, "step": 4548 }, { "epoch": 0.41912747040125303, "grad_norm": 0.8689780855332151, "learning_rate": 3.3575155640897666e-06, "loss": 0.1414, "step": 4549 }, { "epoch": 0.41921960657852303, "grad_norm": 0.897722415255773, "learning_rate": 3.356799956380697e-06, "loss": 0.1402, "step": 4550 }, { "epoch": 0.4193117427557931, "grad_norm": 0.8923317845839843, "learning_rate": 3.3560842691176583e-06, "loss": 0.1449, "step": 4551 }, { "epoch": 0.4194038789330631, "grad_norm": 0.9314612085458115, "learning_rate": 3.355368502367104e-06, "loss": 0.1467, "step": 4552 }, { "epoch": 0.4194960151103331, "grad_norm": 0.8821129071299318, "learning_rate": 3.354652656195492e-06, "loss": 0.1522, "step": 4553 }, { "epoch": 0.4195881512876031, "grad_norm": 0.9219280388280229, "learning_rate": 3.3539367306692884e-06, "loss": 0.1479, "step": 4554 }, { "epoch": 0.41968028746487307, "grad_norm": 0.894711504158373, "learning_rate": 3.3532207258549676e-06, "loss": 0.1422, "step": 4555 }, { "epoch": 0.41977242364214307, "grad_norm": 0.8375395309893726, "learning_rate": 3.352504641819011e-06, "loss": 0.1351, "step": 4556 }, { "epoch": 0.4198645598194131, "grad_norm": 0.9230302453034883, "learning_rate": 3.3517884786279065e-06, "loss": 0.1455, "step": 4557 }, { "epoch": 0.4199566959966831, "grad_norm": 0.8746154702964645, "learning_rate": 3.3510722363481505e-06, "loss": 0.1318, "step": 4558 }, { "epoch": 0.4200488321739531, "grad_norm": 0.992793698546288, "learning_rate": 3.350355915046245e-06, "loss": 0.1579, "step": 4559 }, { "epoch": 0.4201409683512231, "grad_norm": 0.9214331623204591, "learning_rate": 3.3496395147887017e-06, "loss": 0.1434, "step": 4560 }, { "epoch": 0.4202331045284931, "grad_norm": 0.9457222587582186, "learning_rate": 3.348923035642038e-06, "loss": 0.1534, "step": 4561 }, { "epoch": 0.4203252407057631, "grad_norm": 0.956493474906948, "learning_rate": 3.3482064776727784e-06, "loss": 0.1621, "step": 4562 }, { "epoch": 0.4204173768830331, "grad_norm": 0.8780357825071199, "learning_rate": 3.3474898409474573e-06, "loss": 0.1429, "step": 4563 }, { "epoch": 0.42050951306030315, "grad_norm": 1.0473299857234213, "learning_rate": 3.3467731255326123e-06, "loss": 0.1698, "step": 4564 }, { "epoch": 0.42060164923757315, "grad_norm": 0.9885364033480389, "learning_rate": 3.346056331494792e-06, "loss": 0.1444, "step": 4565 }, { "epoch": 0.42069378541484315, "grad_norm": 0.8701414593375438, "learning_rate": 3.34533945890055e-06, "loss": 0.1327, "step": 4566 }, { "epoch": 0.42078592159211314, "grad_norm": 0.7992360530702873, "learning_rate": 3.344622507816448e-06, "loss": 0.1254, "step": 4567 }, { "epoch": 0.42087805776938314, "grad_norm": 0.8571525200902486, "learning_rate": 3.343905478309056e-06, "loss": 0.1392, "step": 4568 }, { "epoch": 0.42097019394665314, "grad_norm": 1.1164661745110709, "learning_rate": 3.3431883704449485e-06, "loss": 0.165, "step": 4569 }, { "epoch": 0.42106233012392313, "grad_norm": 1.0685661318077153, "learning_rate": 3.342471184290711e-06, "loss": 0.1468, "step": 4570 }, { "epoch": 0.4211544663011932, "grad_norm": 1.0295348039132994, "learning_rate": 3.3417539199129327e-06, "loss": 0.1755, "step": 4571 }, { "epoch": 0.4212466024784632, "grad_norm": 0.8852635534731436, "learning_rate": 3.341036577378213e-06, "loss": 0.141, "step": 4572 }, { "epoch": 0.4213387386557332, "grad_norm": 1.0333297539743211, "learning_rate": 3.3403191567531563e-06, "loss": 0.1612, "step": 4573 }, { "epoch": 0.4214308748330032, "grad_norm": 0.9589308822078715, "learning_rate": 3.3396016581043757e-06, "loss": 0.1416, "step": 4574 }, { "epoch": 0.4215230110102732, "grad_norm": 1.0447741475735821, "learning_rate": 3.3388840814984896e-06, "loss": 0.1674, "step": 4575 }, { "epoch": 0.42161514718754317, "grad_norm": 0.9598745446739234, "learning_rate": 3.3381664270021273e-06, "loss": 0.1526, "step": 4576 }, { "epoch": 0.42170728336481317, "grad_norm": 0.8744813212867445, "learning_rate": 3.337448694681922e-06, "loss": 0.1466, "step": 4577 }, { "epoch": 0.4217994195420832, "grad_norm": 0.8700235852673424, "learning_rate": 3.3367308846045155e-06, "loss": 0.1504, "step": 4578 }, { "epoch": 0.4218915557193532, "grad_norm": 0.9462197033773357, "learning_rate": 3.3360129968365556e-06, "loss": 0.1571, "step": 4579 }, { "epoch": 0.4219836918966232, "grad_norm": 1.016930154691944, "learning_rate": 3.335295031444699e-06, "loss": 0.1627, "step": 4580 }, { "epoch": 0.4220758280738932, "grad_norm": 1.0256579540504431, "learning_rate": 3.3345769884956097e-06, "loss": 0.1592, "step": 4581 }, { "epoch": 0.4221679642511632, "grad_norm": 0.8357570594016366, "learning_rate": 3.3338588680559565e-06, "loss": 0.1362, "step": 4582 }, { "epoch": 0.4222601004284332, "grad_norm": 0.8566130957892835, "learning_rate": 3.3331406701924173e-06, "loss": 0.1412, "step": 4583 }, { "epoch": 0.4223522366057032, "grad_norm": 1.0122900400853339, "learning_rate": 3.3324223949716783e-06, "loss": 0.1658, "step": 4584 }, { "epoch": 0.42244437278297325, "grad_norm": 1.026159322299466, "learning_rate": 3.3317040424604296e-06, "loss": 0.1389, "step": 4585 }, { "epoch": 0.42253650896024325, "grad_norm": 0.9442753705533472, "learning_rate": 3.330985612725371e-06, "loss": 0.1301, "step": 4586 }, { "epoch": 0.42262864513751325, "grad_norm": 0.9828708879208898, "learning_rate": 3.330267105833209e-06, "loss": 0.156, "step": 4587 }, { "epoch": 0.42272078131478324, "grad_norm": 0.9244268518849134, "learning_rate": 3.3295485218506568e-06, "loss": 0.1427, "step": 4588 }, { "epoch": 0.42281291749205324, "grad_norm": 0.9784915828014263, "learning_rate": 3.328829860844435e-06, "loss": 0.1543, "step": 4589 }, { "epoch": 0.42290505366932324, "grad_norm": 0.9435802145767377, "learning_rate": 3.328111122881272e-06, "loss": 0.1499, "step": 4590 }, { "epoch": 0.4229971898465933, "grad_norm": 0.9158661147876648, "learning_rate": 3.327392308027902e-06, "loss": 0.1486, "step": 4591 }, { "epoch": 0.4230893260238633, "grad_norm": 0.9137787014812229, "learning_rate": 3.3266734163510668e-06, "loss": 0.1423, "step": 4592 }, { "epoch": 0.4231814622011333, "grad_norm": 0.9229747858346737, "learning_rate": 3.325954447917516e-06, "loss": 0.145, "step": 4593 }, { "epoch": 0.4232735983784033, "grad_norm": 0.8730894128266261, "learning_rate": 3.3252354027940055e-06, "loss": 0.149, "step": 4594 }, { "epoch": 0.4233657345556733, "grad_norm": 0.9057946535086321, "learning_rate": 3.3245162810472998e-06, "loss": 0.155, "step": 4595 }, { "epoch": 0.4234578707329433, "grad_norm": 0.8590449088625499, "learning_rate": 3.323797082744168e-06, "loss": 0.1412, "step": 4596 }, { "epoch": 0.4235500069102133, "grad_norm": 0.8387324971373283, "learning_rate": 3.3230778079513883e-06, "loss": 0.1345, "step": 4597 }, { "epoch": 0.4236421430874833, "grad_norm": 0.7968624023810995, "learning_rate": 3.3223584567357458e-06, "loss": 0.1398, "step": 4598 }, { "epoch": 0.4237342792647533, "grad_norm": 0.9163589605700169, "learning_rate": 3.3216390291640327e-06, "loss": 0.1479, "step": 4599 }, { "epoch": 0.4238264154420233, "grad_norm": 0.9194471612561067, "learning_rate": 3.320919525303047e-06, "loss": 0.1504, "step": 4600 }, { "epoch": 0.4239185516192933, "grad_norm": 0.8551752072019035, "learning_rate": 3.3201999452195942e-06, "loss": 0.1497, "step": 4601 }, { "epoch": 0.4240106877965633, "grad_norm": 0.8814315296349494, "learning_rate": 3.3194802889804887e-06, "loss": 0.1511, "step": 4602 }, { "epoch": 0.4241028239738333, "grad_norm": 0.8960260768130908, "learning_rate": 3.318760556652551e-06, "loss": 0.1413, "step": 4603 }, { "epoch": 0.4241949601511033, "grad_norm": 0.9430328687544396, "learning_rate": 3.318040748302606e-06, "loss": 0.1566, "step": 4604 }, { "epoch": 0.42428709632837336, "grad_norm": 0.8505985044861099, "learning_rate": 3.317320863997491e-06, "loss": 0.1383, "step": 4605 }, { "epoch": 0.42437923250564336, "grad_norm": 0.9368280598249041, "learning_rate": 3.316600903804045e-06, "loss": 0.1509, "step": 4606 }, { "epoch": 0.42447136868291335, "grad_norm": 0.9237871325274064, "learning_rate": 3.3158808677891167e-06, "loss": 0.1571, "step": 4607 }, { "epoch": 0.42456350486018335, "grad_norm": 0.9214272756644586, "learning_rate": 3.315160756019563e-06, "loss": 0.1448, "step": 4608 }, { "epoch": 0.42465564103745335, "grad_norm": 0.9531573937406549, "learning_rate": 3.314440568562245e-06, "loss": 0.149, "step": 4609 }, { "epoch": 0.42474777721472334, "grad_norm": 0.8698615665529474, "learning_rate": 3.3137203054840323e-06, "loss": 0.1444, "step": 4610 }, { "epoch": 0.42483991339199334, "grad_norm": 0.8876415171072912, "learning_rate": 3.312999966851802e-06, "loss": 0.1431, "step": 4611 }, { "epoch": 0.4249320495692634, "grad_norm": 0.9031438952079939, "learning_rate": 3.3122795527324374e-06, "loss": 0.1436, "step": 4612 }, { "epoch": 0.4250241857465334, "grad_norm": 0.9283680531824011, "learning_rate": 3.3115590631928284e-06, "loss": 0.1478, "step": 4613 }, { "epoch": 0.4251163219238034, "grad_norm": 0.949901482292331, "learning_rate": 3.3108384982998736e-06, "loss": 0.1534, "step": 4614 }, { "epoch": 0.4252084581010734, "grad_norm": 0.8647134104622481, "learning_rate": 3.310117858120476e-06, "loss": 0.144, "step": 4615 }, { "epoch": 0.4253005942783434, "grad_norm": 0.9460010594500424, "learning_rate": 3.3093971427215497e-06, "loss": 0.1514, "step": 4616 }, { "epoch": 0.4253927304556134, "grad_norm": 0.8916111121637049, "learning_rate": 3.3086763521700105e-06, "loss": 0.1418, "step": 4617 }, { "epoch": 0.4254848666328834, "grad_norm": 0.9277173612977886, "learning_rate": 3.307955486532785e-06, "loss": 0.1511, "step": 4618 }, { "epoch": 0.4255770028101534, "grad_norm": 0.9048532366246484, "learning_rate": 3.3072345458768063e-06, "loss": 0.1489, "step": 4619 }, { "epoch": 0.4256691389874234, "grad_norm": 0.9137996003679597, "learning_rate": 3.306513530269012e-06, "loss": 0.1414, "step": 4620 }, { "epoch": 0.4257612751646934, "grad_norm": 0.890587578081525, "learning_rate": 3.30579243977635e-06, "loss": 0.147, "step": 4621 }, { "epoch": 0.4258534113419634, "grad_norm": 0.8890025500786466, "learning_rate": 3.305071274465774e-06, "loss": 0.1409, "step": 4622 }, { "epoch": 0.4259455475192334, "grad_norm": 0.9500786995375841, "learning_rate": 3.304350034404243e-06, "loss": 0.146, "step": 4623 }, { "epoch": 0.4260376836965034, "grad_norm": 0.8989819546643587, "learning_rate": 3.3036287196587245e-06, "loss": 0.1493, "step": 4624 }, { "epoch": 0.42612981987377346, "grad_norm": 0.8978782164819534, "learning_rate": 3.3029073302961933e-06, "loss": 0.1507, "step": 4625 }, { "epoch": 0.42622195605104346, "grad_norm": 0.8881037216614471, "learning_rate": 3.3021858663836302e-06, "loss": 0.1488, "step": 4626 }, { "epoch": 0.42631409222831346, "grad_norm": 0.9523384549504138, "learning_rate": 3.301464327988023e-06, "loss": 0.1527, "step": 4627 }, { "epoch": 0.42640622840558345, "grad_norm": 0.9354390642807312, "learning_rate": 3.300742715176366e-06, "loss": 0.1607, "step": 4628 }, { "epoch": 0.42649836458285345, "grad_norm": 0.9184053064450814, "learning_rate": 3.300021028015662e-06, "loss": 0.163, "step": 4629 }, { "epoch": 0.42659050076012345, "grad_norm": 1.0349141173183476, "learning_rate": 3.29929926657292e-06, "loss": 0.1473, "step": 4630 }, { "epoch": 0.42668263693739344, "grad_norm": 0.9429341556872202, "learning_rate": 3.298577430915155e-06, "loss": 0.1457, "step": 4631 }, { "epoch": 0.4267747731146635, "grad_norm": 0.9133407174487953, "learning_rate": 3.297855521109389e-06, "loss": 0.1428, "step": 4632 }, { "epoch": 0.4268669092919335, "grad_norm": 0.9300103407525209, "learning_rate": 3.297133537222652e-06, "loss": 0.1494, "step": 4633 }, { "epoch": 0.4269590454692035, "grad_norm": 1.0055639970528747, "learning_rate": 3.2964114793219802e-06, "loss": 0.1581, "step": 4634 }, { "epoch": 0.4270511816464735, "grad_norm": 0.923588698817885, "learning_rate": 3.2956893474744177e-06, "loss": 0.1634, "step": 4635 }, { "epoch": 0.4271433178237435, "grad_norm": 0.965170088385289, "learning_rate": 3.294967141747013e-06, "loss": 0.157, "step": 4636 }, { "epoch": 0.4272354540010135, "grad_norm": 0.9313997319447985, "learning_rate": 3.294244862206824e-06, "loss": 0.147, "step": 4637 }, { "epoch": 0.4273275901782835, "grad_norm": 0.9533289369081304, "learning_rate": 3.293522508920914e-06, "loss": 0.15, "step": 4638 }, { "epoch": 0.42741972635555353, "grad_norm": 0.9066220272117559, "learning_rate": 3.292800081956354e-06, "loss": 0.1534, "step": 4639 }, { "epoch": 0.42751186253282353, "grad_norm": 0.9284221041524333, "learning_rate": 3.29207758138022e-06, "loss": 0.1683, "step": 4640 }, { "epoch": 0.4276039987100935, "grad_norm": 0.9581504482495421, "learning_rate": 3.2913550072595986e-06, "loss": 0.152, "step": 4641 }, { "epoch": 0.4276961348873635, "grad_norm": 0.9272473768699457, "learning_rate": 3.290632359661578e-06, "loss": 0.1514, "step": 4642 }, { "epoch": 0.4277882710646335, "grad_norm": 0.91905411911038, "learning_rate": 3.289909638653259e-06, "loss": 0.155, "step": 4643 }, { "epoch": 0.4278804072419035, "grad_norm": 0.8779240052127671, "learning_rate": 3.289186844301745e-06, "loss": 0.1467, "step": 4644 }, { "epoch": 0.4279725434191735, "grad_norm": 0.92752988914161, "learning_rate": 3.2884639766741473e-06, "loss": 0.1503, "step": 4645 }, { "epoch": 0.42806467959644356, "grad_norm": 0.9044292622735791, "learning_rate": 3.2877410358375845e-06, "loss": 0.1484, "step": 4646 }, { "epoch": 0.42815681577371356, "grad_norm": 0.98783505304058, "learning_rate": 3.287018021859182e-06, "loss": 0.1379, "step": 4647 }, { "epoch": 0.42824895195098356, "grad_norm": 0.9153357355574386, "learning_rate": 3.2862949348060707e-06, "loss": 0.1485, "step": 4648 }, { "epoch": 0.42834108812825356, "grad_norm": 0.8891298445580663, "learning_rate": 3.285571774745391e-06, "loss": 0.145, "step": 4649 }, { "epoch": 0.42843322430552355, "grad_norm": 0.9539432888562989, "learning_rate": 3.2848485417442867e-06, "loss": 0.143, "step": 4650 }, { "epoch": 0.42852536048279355, "grad_norm": 0.9597047185512093, "learning_rate": 3.2841252358699115e-06, "loss": 0.1428, "step": 4651 }, { "epoch": 0.4286174966600636, "grad_norm": 0.9896270645503378, "learning_rate": 3.2834018571894233e-06, "loss": 0.1447, "step": 4652 }, { "epoch": 0.4287096328373336, "grad_norm": 1.0332260681499266, "learning_rate": 3.282678405769988e-06, "loss": 0.1542, "step": 4653 }, { "epoch": 0.4288017690146036, "grad_norm": 0.9539150898547636, "learning_rate": 3.2819548816787794e-06, "loss": 0.1526, "step": 4654 }, { "epoch": 0.4288939051918736, "grad_norm": 0.8810375273548426, "learning_rate": 3.2812312849829754e-06, "loss": 0.1399, "step": 4655 }, { "epoch": 0.4289860413691436, "grad_norm": 0.8775173004031491, "learning_rate": 3.280507615749763e-06, "loss": 0.1401, "step": 4656 }, { "epoch": 0.4290781775464136, "grad_norm": 0.9351979870510768, "learning_rate": 3.279783874046334e-06, "loss": 0.1516, "step": 4657 }, { "epoch": 0.4291703137236836, "grad_norm": 0.9308439149797066, "learning_rate": 3.2790600599398882e-06, "loss": 0.1416, "step": 4658 }, { "epoch": 0.42926244990095364, "grad_norm": 0.9622148973196127, "learning_rate": 3.2783361734976325e-06, "loss": 0.1585, "step": 4659 }, { "epoch": 0.42935458607822363, "grad_norm": 0.8675676981158611, "learning_rate": 3.2776122147867782e-06, "loss": 0.1388, "step": 4660 }, { "epoch": 0.42944672225549363, "grad_norm": 0.8296889413789598, "learning_rate": 3.276888183874547e-06, "loss": 0.1348, "step": 4661 }, { "epoch": 0.4295388584327636, "grad_norm": 0.8858689329829845, "learning_rate": 3.2761640808281647e-06, "loss": 0.1405, "step": 4662 }, { "epoch": 0.4296309946100336, "grad_norm": 0.9024337133238689, "learning_rate": 3.275439905714863e-06, "loss": 0.1478, "step": 4663 }, { "epoch": 0.4297231307873036, "grad_norm": 0.9342100871010554, "learning_rate": 3.274715658601883e-06, "loss": 0.1545, "step": 4664 }, { "epoch": 0.4298152669645736, "grad_norm": 0.906760939816061, "learning_rate": 3.273991339556471e-06, "loss": 0.144, "step": 4665 }, { "epoch": 0.42990740314184367, "grad_norm": 0.9287086900859022, "learning_rate": 3.2732669486458796e-06, "loss": 0.1617, "step": 4666 }, { "epoch": 0.42999953931911367, "grad_norm": 0.8811760608349966, "learning_rate": 3.272542485937369e-06, "loss": 0.1321, "step": 4667 }, { "epoch": 0.43009167549638366, "grad_norm": 0.904797164755038, "learning_rate": 3.271817951498205e-06, "loss": 0.1404, "step": 4668 }, { "epoch": 0.43018381167365366, "grad_norm": 0.8849305530273994, "learning_rate": 3.271093345395661e-06, "loss": 0.146, "step": 4669 }, { "epoch": 0.43027594785092366, "grad_norm": 0.8885015854103672, "learning_rate": 3.270368667697018e-06, "loss": 0.145, "step": 4670 }, { "epoch": 0.43036808402819365, "grad_norm": 0.8259412462687333, "learning_rate": 3.2696439184695606e-06, "loss": 0.124, "step": 4671 }, { "epoch": 0.43046022020546365, "grad_norm": 0.8918841353411899, "learning_rate": 3.2689190977805822e-06, "loss": 0.1293, "step": 4672 }, { "epoch": 0.4305523563827337, "grad_norm": 0.9323176495034798, "learning_rate": 3.2681942056973838e-06, "loss": 0.141, "step": 4673 }, { "epoch": 0.4306444925600037, "grad_norm": 0.9326715643680737, "learning_rate": 3.26746924228727e-06, "loss": 0.1574, "step": 4674 }, { "epoch": 0.4307366287372737, "grad_norm": 0.8857621090497365, "learning_rate": 3.2667442076175543e-06, "loss": 0.1416, "step": 4675 }, { "epoch": 0.4308287649145437, "grad_norm": 0.9038051971376051, "learning_rate": 3.2660191017555567e-06, "loss": 0.1414, "step": 4676 }, { "epoch": 0.4309209010918137, "grad_norm": 0.9216378597983858, "learning_rate": 3.2652939247686027e-06, "loss": 0.1366, "step": 4677 }, { "epoch": 0.4310130372690837, "grad_norm": 0.9440044237173141, "learning_rate": 3.2645686767240263e-06, "loss": 0.1478, "step": 4678 }, { "epoch": 0.4311051734463537, "grad_norm": 0.9933274011071618, "learning_rate": 3.2638433576891647e-06, "loss": 0.1602, "step": 4679 }, { "epoch": 0.43119730962362374, "grad_norm": 0.9203155714472351, "learning_rate": 3.263117967731366e-06, "loss": 0.1558, "step": 4680 }, { "epoch": 0.43128944580089373, "grad_norm": 0.8740835069784506, "learning_rate": 3.2623925069179817e-06, "loss": 0.1335, "step": 4681 }, { "epoch": 0.43138158197816373, "grad_norm": 1.0122592203806835, "learning_rate": 3.2616669753163717e-06, "loss": 0.1554, "step": 4682 }, { "epoch": 0.4314737181554337, "grad_norm": 0.9154881263246533, "learning_rate": 3.2609413729939005e-06, "loss": 0.1444, "step": 4683 }, { "epoch": 0.4315658543327037, "grad_norm": 0.892742980949353, "learning_rate": 3.260215700017941e-06, "loss": 0.1378, "step": 4684 }, { "epoch": 0.4316579905099737, "grad_norm": 0.9415049276203106, "learning_rate": 3.2594899564558713e-06, "loss": 0.1585, "step": 4685 }, { "epoch": 0.4317501266872438, "grad_norm": 0.8706508361164794, "learning_rate": 3.2587641423750782e-06, "loss": 0.1431, "step": 4686 }, { "epoch": 0.43184226286451377, "grad_norm": 0.8445511287505905, "learning_rate": 3.2580382578429525e-06, "loss": 0.1384, "step": 4687 }, { "epoch": 0.43193439904178377, "grad_norm": 0.9137170420709978, "learning_rate": 3.2573123029268926e-06, "loss": 0.1468, "step": 4688 }, { "epoch": 0.43202653521905376, "grad_norm": 0.9192552269512315, "learning_rate": 3.256586277694305e-06, "loss": 0.1407, "step": 4689 }, { "epoch": 0.43211867139632376, "grad_norm": 0.9422981334954762, "learning_rate": 3.255860182212599e-06, "loss": 0.159, "step": 4690 }, { "epoch": 0.43221080757359376, "grad_norm": 0.888256356837777, "learning_rate": 3.2551340165491947e-06, "loss": 0.148, "step": 4691 }, { "epoch": 0.43230294375086376, "grad_norm": 0.9066577043703513, "learning_rate": 3.254407780771515e-06, "loss": 0.1446, "step": 4692 }, { "epoch": 0.4323950799281338, "grad_norm": 0.8204021095990877, "learning_rate": 3.2536814749469915e-06, "loss": 0.1287, "step": 4693 }, { "epoch": 0.4324872161054038, "grad_norm": 0.8675428336875458, "learning_rate": 3.252955099143062e-06, "loss": 0.1402, "step": 4694 }, { "epoch": 0.4325793522826738, "grad_norm": 0.8491928019041444, "learning_rate": 3.2522286534271706e-06, "loss": 0.1403, "step": 4695 }, { "epoch": 0.4326714884599438, "grad_norm": 0.8968385179030194, "learning_rate": 3.2515021378667677e-06, "loss": 0.1456, "step": 4696 }, { "epoch": 0.4327636246372138, "grad_norm": 0.8221696054310007, "learning_rate": 3.250775552529312e-06, "loss": 0.1304, "step": 4697 }, { "epoch": 0.4328557608144838, "grad_norm": 0.924831124311071, "learning_rate": 3.250048897482263e-06, "loss": 0.1527, "step": 4698 }, { "epoch": 0.4329478969917538, "grad_norm": 0.9196754936163174, "learning_rate": 3.2493221727930947e-06, "loss": 0.1568, "step": 4699 }, { "epoch": 0.43304003316902384, "grad_norm": 0.9493069882745526, "learning_rate": 3.2485953785292813e-06, "loss": 0.1467, "step": 4700 }, { "epoch": 0.43313216934629384, "grad_norm": 0.8814778987108999, "learning_rate": 3.247868514758307e-06, "loss": 0.1507, "step": 4701 }, { "epoch": 0.43322430552356384, "grad_norm": 0.9240643672635029, "learning_rate": 3.2471415815476603e-06, "loss": 0.1526, "step": 4702 }, { "epoch": 0.43331644170083383, "grad_norm": 0.8567708852826385, "learning_rate": 3.246414578964837e-06, "loss": 0.1372, "step": 4703 }, { "epoch": 0.43340857787810383, "grad_norm": 0.8995443727689885, "learning_rate": 3.24568750707734e-06, "loss": 0.1518, "step": 4704 }, { "epoch": 0.4335007140553738, "grad_norm": 0.8848310081209736, "learning_rate": 3.2449603659526787e-06, "loss": 0.1414, "step": 4705 }, { "epoch": 0.4335928502326438, "grad_norm": 0.9253510253751343, "learning_rate": 3.244233155658365e-06, "loss": 0.1561, "step": 4706 }, { "epoch": 0.4336849864099139, "grad_norm": 0.8987080572532566, "learning_rate": 3.2435058762619243e-06, "loss": 0.1459, "step": 4707 }, { "epoch": 0.43377712258718387, "grad_norm": 0.8418375834879664, "learning_rate": 3.2427785278308832e-06, "loss": 0.1374, "step": 4708 }, { "epoch": 0.43386925876445387, "grad_norm": 0.8605526697968859, "learning_rate": 3.242051110432775e-06, "loss": 0.1493, "step": 4709 }, { "epoch": 0.43396139494172387, "grad_norm": 0.8897085366064001, "learning_rate": 3.241323624135142e-06, "loss": 0.1448, "step": 4710 }, { "epoch": 0.43405353111899386, "grad_norm": 0.8611729087577313, "learning_rate": 3.2405960690055307e-06, "loss": 0.137, "step": 4711 }, { "epoch": 0.43414566729626386, "grad_norm": 0.8688996045321737, "learning_rate": 3.2398684451114936e-06, "loss": 0.1367, "step": 4712 }, { "epoch": 0.43423780347353386, "grad_norm": 0.9014621771431871, "learning_rate": 3.2391407525205933e-06, "loss": 0.1479, "step": 4713 }, { "epoch": 0.4343299396508039, "grad_norm": 0.9021014682601557, "learning_rate": 3.2384129913003935e-06, "loss": 0.1499, "step": 4714 }, { "epoch": 0.4344220758280739, "grad_norm": 0.9191301374460689, "learning_rate": 3.237685161518468e-06, "loss": 0.1513, "step": 4715 }, { "epoch": 0.4345142120053439, "grad_norm": 0.9119658593871829, "learning_rate": 3.236957263242396e-06, "loss": 0.1439, "step": 4716 }, { "epoch": 0.4346063481826139, "grad_norm": 0.9280932838157593, "learning_rate": 3.2362292965397633e-06, "loss": 0.1528, "step": 4717 }, { "epoch": 0.4346984843598839, "grad_norm": 0.9312157213794583, "learning_rate": 3.235501261478161e-06, "loss": 0.146, "step": 4718 }, { "epoch": 0.4347906205371539, "grad_norm": 0.9302842434648845, "learning_rate": 3.2347731581251866e-06, "loss": 0.1393, "step": 4719 }, { "epoch": 0.43488275671442395, "grad_norm": 0.8610238509695106, "learning_rate": 3.2340449865484464e-06, "loss": 0.1412, "step": 4720 }, { "epoch": 0.43497489289169394, "grad_norm": 0.8708422464114209, "learning_rate": 3.23331674681555e-06, "loss": 0.127, "step": 4721 }, { "epoch": 0.43506702906896394, "grad_norm": 0.9170158188594641, "learning_rate": 3.2325884389941147e-06, "loss": 0.1388, "step": 4722 }, { "epoch": 0.43515916524623394, "grad_norm": 0.9055547940614805, "learning_rate": 3.2318600631517637e-06, "loss": 0.1438, "step": 4723 }, { "epoch": 0.43525130142350393, "grad_norm": 0.937925363757348, "learning_rate": 3.2311316193561277e-06, "loss": 0.155, "step": 4724 }, { "epoch": 0.43534343760077393, "grad_norm": 0.8980494734303124, "learning_rate": 3.230403107674841e-06, "loss": 0.1511, "step": 4725 }, { "epoch": 0.4354355737780439, "grad_norm": 0.9131532114439331, "learning_rate": 3.2296745281755485e-06, "loss": 0.146, "step": 4726 }, { "epoch": 0.435527709955314, "grad_norm": 0.9058295332114711, "learning_rate": 3.2289458809258965e-06, "loss": 0.1552, "step": 4727 }, { "epoch": 0.435619846132584, "grad_norm": 0.8947703410791472, "learning_rate": 3.2282171659935415e-06, "loss": 0.1388, "step": 4728 }, { "epoch": 0.435711982309854, "grad_norm": 0.8926286281907707, "learning_rate": 3.2274883834461444e-06, "loss": 0.1344, "step": 4729 }, { "epoch": 0.43580411848712397, "grad_norm": 0.9531048239256399, "learning_rate": 3.2267595333513724e-06, "loss": 0.1592, "step": 4730 }, { "epoch": 0.43589625466439397, "grad_norm": 0.9201501804582292, "learning_rate": 3.2260306157768994e-06, "loss": 0.148, "step": 4731 }, { "epoch": 0.43598839084166396, "grad_norm": 0.9174290109231636, "learning_rate": 3.2253016307904063e-06, "loss": 0.1385, "step": 4732 }, { "epoch": 0.43608052701893396, "grad_norm": 0.8375669138427223, "learning_rate": 3.224572578459577e-06, "loss": 0.1278, "step": 4733 }, { "epoch": 0.436172663196204, "grad_norm": 0.963997166950795, "learning_rate": 3.2238434588521078e-06, "loss": 0.1501, "step": 4734 }, { "epoch": 0.436264799373474, "grad_norm": 0.9336340407484712, "learning_rate": 3.2231142720356946e-06, "loss": 0.1528, "step": 4735 }, { "epoch": 0.436356935550744, "grad_norm": 0.9042393512372882, "learning_rate": 3.222385018078043e-06, "loss": 0.1368, "step": 4736 }, { "epoch": 0.436449071728014, "grad_norm": 0.9438201754765096, "learning_rate": 3.2216556970468656e-06, "loss": 0.1562, "step": 4737 }, { "epoch": 0.436541207905284, "grad_norm": 0.938414838398955, "learning_rate": 3.2209263090098785e-06, "loss": 0.1526, "step": 4738 }, { "epoch": 0.436633344082554, "grad_norm": 0.9018511969382467, "learning_rate": 3.220196854034806e-06, "loss": 0.138, "step": 4739 }, { "epoch": 0.436725480259824, "grad_norm": 0.8535484944038092, "learning_rate": 3.2194673321893787e-06, "loss": 0.1288, "step": 4740 }, { "epoch": 0.43681761643709405, "grad_norm": 0.8802381462413789, "learning_rate": 3.2187377435413316e-06, "loss": 0.1487, "step": 4741 }, { "epoch": 0.43690975261436404, "grad_norm": 0.9534514302434024, "learning_rate": 3.2180080881584075e-06, "loss": 0.1572, "step": 4742 }, { "epoch": 0.43700188879163404, "grad_norm": 0.8807997475524247, "learning_rate": 3.2172783661083556e-06, "loss": 0.1514, "step": 4743 }, { "epoch": 0.43709402496890404, "grad_norm": 0.9205359815568849, "learning_rate": 3.21654857745893e-06, "loss": 0.1507, "step": 4744 }, { "epoch": 0.43718616114617403, "grad_norm": 0.8949200471013621, "learning_rate": 3.2158187222778926e-06, "loss": 0.1548, "step": 4745 }, { "epoch": 0.43727829732344403, "grad_norm": 0.893054846565139, "learning_rate": 3.215088800633009e-06, "loss": 0.1368, "step": 4746 }, { "epoch": 0.43737043350071403, "grad_norm": 0.8943073971027454, "learning_rate": 3.214358812592053e-06, "loss": 0.135, "step": 4747 }, { "epoch": 0.4374625696779841, "grad_norm": 0.9181687265142016, "learning_rate": 3.2136287582228048e-06, "loss": 0.1328, "step": 4748 }, { "epoch": 0.4375547058552541, "grad_norm": 0.9251073922168981, "learning_rate": 3.2128986375930495e-06, "loss": 0.1437, "step": 4749 }, { "epoch": 0.4376468420325241, "grad_norm": 0.8926775990316347, "learning_rate": 3.212168450770579e-06, "loss": 0.1378, "step": 4750 }, { "epoch": 0.43773897820979407, "grad_norm": 0.853248400512252, "learning_rate": 3.2114381978231918e-06, "loss": 0.1443, "step": 4751 }, { "epoch": 0.43783111438706407, "grad_norm": 0.9583420162746287, "learning_rate": 3.21070787881869e-06, "loss": 0.1488, "step": 4752 }, { "epoch": 0.43792325056433407, "grad_norm": 0.8855991731771236, "learning_rate": 3.2099774938248866e-06, "loss": 0.1349, "step": 4753 }, { "epoch": 0.4380153867416041, "grad_norm": 0.8946899043733684, "learning_rate": 3.2092470429095955e-06, "loss": 0.1519, "step": 4754 }, { "epoch": 0.4381075229188741, "grad_norm": 0.8759680421106206, "learning_rate": 3.208516526140641e-06, "loss": 0.14, "step": 4755 }, { "epoch": 0.4381996590961441, "grad_norm": 0.9408448869394528, "learning_rate": 3.2077859435858503e-06, "loss": 0.1601, "step": 4756 }, { "epoch": 0.4382917952734141, "grad_norm": 0.9347952147345089, "learning_rate": 3.2070552953130586e-06, "loss": 0.1589, "step": 4757 }, { "epoch": 0.4383839314506841, "grad_norm": 0.9081980861793578, "learning_rate": 3.2063245813901068e-06, "loss": 0.1582, "step": 4758 }, { "epoch": 0.4384760676279541, "grad_norm": 0.8939823014855057, "learning_rate": 3.2055938018848417e-06, "loss": 0.154, "step": 4759 }, { "epoch": 0.4385682038052241, "grad_norm": 0.9056882583574686, "learning_rate": 3.2048629568651153e-06, "loss": 0.1539, "step": 4760 }, { "epoch": 0.43866033998249415, "grad_norm": 0.8749131675102494, "learning_rate": 3.2041320463987886e-06, "loss": 0.1444, "step": 4761 }, { "epoch": 0.43875247615976415, "grad_norm": 0.8808614298731526, "learning_rate": 3.2034010705537245e-06, "loss": 0.1415, "step": 4762 }, { "epoch": 0.43884461233703415, "grad_norm": 0.9542017201745561, "learning_rate": 3.202670029397796e-06, "loss": 0.1473, "step": 4763 }, { "epoch": 0.43893674851430414, "grad_norm": 0.9145181955112874, "learning_rate": 3.2019389229988794e-06, "loss": 0.1527, "step": 4764 }, { "epoch": 0.43902888469157414, "grad_norm": 0.9118897020011315, "learning_rate": 3.2012077514248592e-06, "loss": 0.1365, "step": 4765 }, { "epoch": 0.43912102086884414, "grad_norm": 0.8890964393279647, "learning_rate": 3.2004765147436228e-06, "loss": 0.1555, "step": 4766 }, { "epoch": 0.43921315704611413, "grad_norm": 0.9537052872925784, "learning_rate": 3.1997452130230664e-06, "loss": 0.1395, "step": 4767 }, { "epoch": 0.4393052932233842, "grad_norm": 0.924180348352055, "learning_rate": 3.1990138463310923e-06, "loss": 0.1405, "step": 4768 }, { "epoch": 0.4393974294006542, "grad_norm": 0.8637655086910015, "learning_rate": 3.1982824147356078e-06, "loss": 0.1447, "step": 4769 }, { "epoch": 0.4394895655779242, "grad_norm": 0.912924057763308, "learning_rate": 3.197550918304525e-06, "loss": 0.141, "step": 4770 }, { "epoch": 0.4395817017551942, "grad_norm": 0.9110661693707881, "learning_rate": 3.196819357105764e-06, "loss": 0.1416, "step": 4771 }, { "epoch": 0.4396738379324642, "grad_norm": 0.9416488247361345, "learning_rate": 3.196087731207252e-06, "loss": 0.1539, "step": 4772 }, { "epoch": 0.43976597410973417, "grad_norm": 0.9280205052202724, "learning_rate": 3.1953560406769184e-06, "loss": 0.15, "step": 4773 }, { "epoch": 0.43985811028700417, "grad_norm": 0.9515080388192296, "learning_rate": 3.194624285582702e-06, "loss": 0.1563, "step": 4774 }, { "epoch": 0.4399502464642742, "grad_norm": 0.8522352854395673, "learning_rate": 3.1938924659925457e-06, "loss": 0.1416, "step": 4775 }, { "epoch": 0.4400423826415442, "grad_norm": 0.8685099112900193, "learning_rate": 3.193160581974399e-06, "loss": 0.1275, "step": 4776 }, { "epoch": 0.4401345188188142, "grad_norm": 0.8909613526044037, "learning_rate": 3.1924286335962177e-06, "loss": 0.1475, "step": 4777 }, { "epoch": 0.4402266549960842, "grad_norm": 0.9496888743162736, "learning_rate": 3.1916966209259636e-06, "loss": 0.1506, "step": 4778 }, { "epoch": 0.4403187911733542, "grad_norm": 0.8581608533036216, "learning_rate": 3.1909645440316034e-06, "loss": 0.1322, "step": 4779 }, { "epoch": 0.4404109273506242, "grad_norm": 0.9290846371768269, "learning_rate": 3.1902324029811115e-06, "loss": 0.1433, "step": 4780 }, { "epoch": 0.4405030635278942, "grad_norm": 0.8837081497834979, "learning_rate": 3.1895001978424665e-06, "loss": 0.148, "step": 4781 }, { "epoch": 0.44059519970516425, "grad_norm": 0.8818053795949022, "learning_rate": 3.188767928683654e-06, "loss": 0.1572, "step": 4782 }, { "epoch": 0.44068733588243425, "grad_norm": 0.9566123724363703, "learning_rate": 3.188035595572665e-06, "loss": 0.1468, "step": 4783 }, { "epoch": 0.44077947205970425, "grad_norm": 0.9126945997475431, "learning_rate": 3.1873031985774972e-06, "loss": 0.1417, "step": 4784 }, { "epoch": 0.44087160823697424, "grad_norm": 0.9319946243034919, "learning_rate": 3.186570737766153e-06, "loss": 0.1524, "step": 4785 }, { "epoch": 0.44096374441424424, "grad_norm": 0.8594125708493436, "learning_rate": 3.1858382132066422e-06, "loss": 0.1428, "step": 4786 }, { "epoch": 0.44105588059151424, "grad_norm": 0.8475132117275923, "learning_rate": 3.1851056249669786e-06, "loss": 0.1396, "step": 4787 }, { "epoch": 0.4411480167687843, "grad_norm": 0.9114714437921265, "learning_rate": 3.1843729731151855e-06, "loss": 0.1506, "step": 4788 }, { "epoch": 0.4412401529460543, "grad_norm": 0.8478111369854033, "learning_rate": 3.183640257719287e-06, "loss": 0.1399, "step": 4789 }, { "epoch": 0.4413322891233243, "grad_norm": 0.9881708242449926, "learning_rate": 3.182907478847318e-06, "loss": 0.1628, "step": 4790 }, { "epoch": 0.4414244253005943, "grad_norm": 0.8973574240687511, "learning_rate": 3.1821746365673157e-06, "loss": 0.1507, "step": 4791 }, { "epoch": 0.4415165614778643, "grad_norm": 0.9318494415821815, "learning_rate": 3.1814417309473243e-06, "loss": 0.1401, "step": 4792 }, { "epoch": 0.4416086976551343, "grad_norm": 0.9456870027866897, "learning_rate": 3.1807087620553957e-06, "loss": 0.1535, "step": 4793 }, { "epoch": 0.44170083383240427, "grad_norm": 0.8681572085432423, "learning_rate": 3.179975729959585e-06, "loss": 0.1532, "step": 4794 }, { "epoch": 0.4417929700096743, "grad_norm": 0.931784470899894, "learning_rate": 3.1792426347279544e-06, "loss": 0.1557, "step": 4795 }, { "epoch": 0.4418851061869443, "grad_norm": 0.8708926614080035, "learning_rate": 3.178509476428573e-06, "loss": 0.1389, "step": 4796 }, { "epoch": 0.4419772423642143, "grad_norm": 0.9255219971385017, "learning_rate": 3.177776255129512e-06, "loss": 0.1564, "step": 4797 }, { "epoch": 0.4420693785414843, "grad_norm": 0.9397331600366731, "learning_rate": 3.1770429708988536e-06, "loss": 0.1665, "step": 4798 }, { "epoch": 0.4421615147187543, "grad_norm": 0.8823264473223045, "learning_rate": 3.1763096238046833e-06, "loss": 0.1448, "step": 4799 }, { "epoch": 0.4422536508960243, "grad_norm": 0.9225948201087502, "learning_rate": 3.1755762139150905e-06, "loss": 0.1496, "step": 4800 }, { "epoch": 0.4423457870732943, "grad_norm": 0.8726837869356154, "learning_rate": 3.1748427412981742e-06, "loss": 0.1418, "step": 4801 }, { "epoch": 0.44243792325056436, "grad_norm": 0.8557987601165721, "learning_rate": 3.1741092060220364e-06, "loss": 0.1345, "step": 4802 }, { "epoch": 0.44253005942783435, "grad_norm": 0.8580919943355668, "learning_rate": 3.1733756081547864e-06, "loss": 0.1394, "step": 4803 }, { "epoch": 0.44262219560510435, "grad_norm": 0.9176851413304004, "learning_rate": 3.172641947764539e-06, "loss": 0.1449, "step": 4804 }, { "epoch": 0.44271433178237435, "grad_norm": 0.9095773510548247, "learning_rate": 3.1719082249194134e-06, "loss": 0.1456, "step": 4805 }, { "epoch": 0.44280646795964435, "grad_norm": 0.8799083657238806, "learning_rate": 3.171174439687538e-06, "loss": 0.1377, "step": 4806 }, { "epoch": 0.44289860413691434, "grad_norm": 0.8851747834590045, "learning_rate": 3.1704405921370428e-06, "loss": 0.1313, "step": 4807 }, { "epoch": 0.44299074031418434, "grad_norm": 0.878962379862866, "learning_rate": 3.169706682336066e-06, "loss": 0.1395, "step": 4808 }, { "epoch": 0.4430828764914544, "grad_norm": 0.8508108772869166, "learning_rate": 3.1689727103527536e-06, "loss": 0.1408, "step": 4809 }, { "epoch": 0.4431750126687244, "grad_norm": 0.8878317199533106, "learning_rate": 3.168238676255251e-06, "loss": 0.1433, "step": 4810 }, { "epoch": 0.4432671488459944, "grad_norm": 0.9226903423145814, "learning_rate": 3.1675045801117167e-06, "loss": 0.1492, "step": 4811 }, { "epoch": 0.4433592850232644, "grad_norm": 0.9388424210883725, "learning_rate": 3.1667704219903095e-06, "loss": 0.1575, "step": 4812 }, { "epoch": 0.4434514212005344, "grad_norm": 0.8534203981677879, "learning_rate": 3.1660362019591972e-06, "loss": 0.1377, "step": 4813 }, { "epoch": 0.4435435573778044, "grad_norm": 0.9133209220134044, "learning_rate": 3.1653019200865513e-06, "loss": 0.1437, "step": 4814 }, { "epoch": 0.4436356935550744, "grad_norm": 0.9172019407225023, "learning_rate": 3.164567576440552e-06, "loss": 0.1507, "step": 4815 }, { "epoch": 0.4437278297323444, "grad_norm": 0.9953185567517315, "learning_rate": 3.1638331710893804e-06, "loss": 0.1562, "step": 4816 }, { "epoch": 0.4438199659096144, "grad_norm": 0.9378368946596357, "learning_rate": 3.163098704101228e-06, "loss": 0.1554, "step": 4817 }, { "epoch": 0.4439121020868844, "grad_norm": 0.9135001066867825, "learning_rate": 3.162364175544289e-06, "loss": 0.1472, "step": 4818 }, { "epoch": 0.4440042382641544, "grad_norm": 0.9971829365126571, "learning_rate": 3.161629585486766e-06, "loss": 0.1409, "step": 4819 }, { "epoch": 0.4440963744414244, "grad_norm": 0.9040841107610292, "learning_rate": 3.160894933996864e-06, "loss": 0.149, "step": 4820 }, { "epoch": 0.4441885106186944, "grad_norm": 0.9206094051297338, "learning_rate": 3.160160221142797e-06, "loss": 0.1364, "step": 4821 }, { "epoch": 0.44428064679596446, "grad_norm": 0.9379688154656526, "learning_rate": 3.159425446992781e-06, "loss": 0.143, "step": 4822 }, { "epoch": 0.44437278297323446, "grad_norm": 0.9567257382459619, "learning_rate": 3.1586906116150428e-06, "loss": 0.1482, "step": 4823 }, { "epoch": 0.44446491915050446, "grad_norm": 1.005451648614117, "learning_rate": 3.1579557150778094e-06, "loss": 0.1601, "step": 4824 }, { "epoch": 0.44455705532777445, "grad_norm": 0.8817243195267939, "learning_rate": 3.1572207574493174e-06, "loss": 0.1428, "step": 4825 }, { "epoch": 0.44464919150504445, "grad_norm": 0.849646786373053, "learning_rate": 3.1564857387978075e-06, "loss": 0.1456, "step": 4826 }, { "epoch": 0.44474132768231445, "grad_norm": 0.9519644437243897, "learning_rate": 3.155750659191526e-06, "loss": 0.1594, "step": 4827 }, { "epoch": 0.44483346385958444, "grad_norm": 0.8774384982653047, "learning_rate": 3.155015518698725e-06, "loss": 0.1443, "step": 4828 }, { "epoch": 0.4449256000368545, "grad_norm": 0.9294480749474386, "learning_rate": 3.154280317387663e-06, "loss": 0.152, "step": 4829 }, { "epoch": 0.4450177362141245, "grad_norm": 0.8812827080037829, "learning_rate": 3.1535450553266024e-06, "loss": 0.1467, "step": 4830 }, { "epoch": 0.4451098723913945, "grad_norm": 0.9149468689160323, "learning_rate": 3.1528097325838143e-06, "loss": 0.1496, "step": 4831 }, { "epoch": 0.4452020085686645, "grad_norm": 0.886095744307431, "learning_rate": 3.1520743492275714e-06, "loss": 0.1504, "step": 4832 }, { "epoch": 0.4452941447459345, "grad_norm": 0.9507047582370619, "learning_rate": 3.151338905326155e-06, "loss": 0.1536, "step": 4833 }, { "epoch": 0.4453862809232045, "grad_norm": 0.9475520516133387, "learning_rate": 3.1506034009478515e-06, "loss": 0.1513, "step": 4834 }, { "epoch": 0.4454784171004745, "grad_norm": 0.8963368746247009, "learning_rate": 3.1498678361609514e-06, "loss": 0.1379, "step": 4835 }, { "epoch": 0.44557055327774453, "grad_norm": 0.9466097663899271, "learning_rate": 3.149132211033754e-06, "loss": 0.1352, "step": 4836 }, { "epoch": 0.4456626894550145, "grad_norm": 0.9237287141376792, "learning_rate": 3.1483965256345596e-06, "loss": 0.1409, "step": 4837 }, { "epoch": 0.4457548256322845, "grad_norm": 1.0177207633781526, "learning_rate": 3.147660780031679e-06, "loss": 0.1584, "step": 4838 }, { "epoch": 0.4458469618095545, "grad_norm": 0.9370371601534141, "learning_rate": 3.146924974293425e-06, "loss": 0.1532, "step": 4839 }, { "epoch": 0.4459390979868245, "grad_norm": 0.9331012417513412, "learning_rate": 3.1461891084881175e-06, "loss": 0.1604, "step": 4840 }, { "epoch": 0.4460312341640945, "grad_norm": 0.9580514103078052, "learning_rate": 3.1454531826840816e-06, "loss": 0.1515, "step": 4841 }, { "epoch": 0.4461233703413645, "grad_norm": 0.931795200004456, "learning_rate": 3.1447171969496487e-06, "loss": 0.1515, "step": 4842 }, { "epoch": 0.44621550651863456, "grad_norm": 0.826891990289607, "learning_rate": 3.1439811513531537e-06, "loss": 0.1302, "step": 4843 }, { "epoch": 0.44630764269590456, "grad_norm": 0.8025586612837762, "learning_rate": 3.143245045962941e-06, "loss": 0.134, "step": 4844 }, { "epoch": 0.44639977887317456, "grad_norm": 0.9430651189859448, "learning_rate": 3.142508880847355e-06, "loss": 0.1572, "step": 4845 }, { "epoch": 0.44649191505044455, "grad_norm": 0.8750199948395706, "learning_rate": 3.1417726560747507e-06, "loss": 0.1414, "step": 4846 }, { "epoch": 0.44658405122771455, "grad_norm": 0.8970428891745201, "learning_rate": 3.1410363717134868e-06, "loss": 0.1362, "step": 4847 }, { "epoch": 0.44667618740498455, "grad_norm": 0.8970399368797006, "learning_rate": 3.140300027831927e-06, "loss": 0.156, "step": 4848 }, { "epoch": 0.44676832358225455, "grad_norm": 0.900467919696107, "learning_rate": 3.1395636244984397e-06, "loss": 0.1441, "step": 4849 }, { "epoch": 0.4468604597595246, "grad_norm": 0.8944755285685609, "learning_rate": 3.1388271617814015e-06, "loss": 0.1348, "step": 4850 }, { "epoch": 0.4469525959367946, "grad_norm": 0.83068628823159, "learning_rate": 3.1380906397491923e-06, "loss": 0.1314, "step": 4851 }, { "epoch": 0.4470447321140646, "grad_norm": 0.9164112054172598, "learning_rate": 3.1373540584701997e-06, "loss": 0.1537, "step": 4852 }, { "epoch": 0.4471368682913346, "grad_norm": 0.8992847662913391, "learning_rate": 3.1366174180128127e-06, "loss": 0.1364, "step": 4853 }, { "epoch": 0.4472290044686046, "grad_norm": 0.9212563327688689, "learning_rate": 3.1358807184454305e-06, "loss": 0.1493, "step": 4854 }, { "epoch": 0.4473211406458746, "grad_norm": 0.924510733072517, "learning_rate": 3.1351439598364554e-06, "loss": 0.1481, "step": 4855 }, { "epoch": 0.44741327682314463, "grad_norm": 0.8533748975648047, "learning_rate": 3.134407142254295e-06, "loss": 0.1257, "step": 4856 }, { "epoch": 0.44750541300041463, "grad_norm": 0.9120449629078842, "learning_rate": 3.1336702657673625e-06, "loss": 0.1446, "step": 4857 }, { "epoch": 0.44759754917768463, "grad_norm": 0.8955446860187469, "learning_rate": 3.132933330444079e-06, "loss": 0.1375, "step": 4858 }, { "epoch": 0.4476896853549546, "grad_norm": 0.9088059249145077, "learning_rate": 3.132196336352867e-06, "loss": 0.1317, "step": 4859 }, { "epoch": 0.4477818215322246, "grad_norm": 0.9870459351359203, "learning_rate": 3.131459283562157e-06, "loss": 0.1494, "step": 4860 }, { "epoch": 0.4478739577094946, "grad_norm": 0.8765641433359809, "learning_rate": 3.1307221721403846e-06, "loss": 0.1465, "step": 4861 }, { "epoch": 0.4479660938867646, "grad_norm": 0.8894333905339528, "learning_rate": 3.129985002155991e-06, "loss": 0.1375, "step": 4862 }, { "epoch": 0.44805823006403467, "grad_norm": 0.9046224882892272, "learning_rate": 3.129247773677422e-06, "loss": 0.1459, "step": 4863 }, { "epoch": 0.44815036624130467, "grad_norm": 0.8885653224314798, "learning_rate": 3.128510486773129e-06, "loss": 0.1435, "step": 4864 }, { "epoch": 0.44824250241857466, "grad_norm": 0.9047398877430426, "learning_rate": 3.1277731415115696e-06, "loss": 0.1521, "step": 4865 }, { "epoch": 0.44833463859584466, "grad_norm": 0.9101582197651139, "learning_rate": 3.127035737961207e-06, "loss": 0.1475, "step": 4866 }, { "epoch": 0.44842677477311466, "grad_norm": 0.9211512953806295, "learning_rate": 3.1262982761905084e-06, "loss": 0.1483, "step": 4867 }, { "epoch": 0.44851891095038465, "grad_norm": 0.9168309363122416, "learning_rate": 3.125560756267948e-06, "loss": 0.1431, "step": 4868 }, { "epoch": 0.44861104712765465, "grad_norm": 0.882017959352168, "learning_rate": 3.1248231782620035e-06, "loss": 0.1291, "step": 4869 }, { "epoch": 0.4487031833049247, "grad_norm": 0.9737260719423534, "learning_rate": 3.1240855422411593e-06, "loss": 0.1535, "step": 4870 }, { "epoch": 0.4487953194821947, "grad_norm": 0.8840045486699224, "learning_rate": 3.1233478482739065e-06, "loss": 0.1509, "step": 4871 }, { "epoch": 0.4488874556594647, "grad_norm": 0.9149749621641127, "learning_rate": 3.1226100964287378e-06, "loss": 0.1387, "step": 4872 }, { "epoch": 0.4489795918367347, "grad_norm": 0.9192452144221734, "learning_rate": 3.1218722867741553e-06, "loss": 0.1547, "step": 4873 }, { "epoch": 0.4490717280140047, "grad_norm": 0.8908377156409427, "learning_rate": 3.1211344193786636e-06, "loss": 0.1432, "step": 4874 }, { "epoch": 0.4491638641912747, "grad_norm": 0.8925399884941798, "learning_rate": 3.1203964943107747e-06, "loss": 0.1533, "step": 4875 }, { "epoch": 0.4492560003685447, "grad_norm": 0.861973775471057, "learning_rate": 3.1196585116390045e-06, "loss": 0.1282, "step": 4876 }, { "epoch": 0.44934813654581474, "grad_norm": 0.8880068164435665, "learning_rate": 3.1189204714318743e-06, "loss": 0.1355, "step": 4877 }, { "epoch": 0.44944027272308473, "grad_norm": 1.0015594851191234, "learning_rate": 3.1181823737579115e-06, "loss": 0.1649, "step": 4878 }, { "epoch": 0.44953240890035473, "grad_norm": 0.9213880892562584, "learning_rate": 3.11744421868565e-06, "loss": 0.1458, "step": 4879 }, { "epoch": 0.4496245450776247, "grad_norm": 0.865422722909716, "learning_rate": 3.1167060062836253e-06, "loss": 0.148, "step": 4880 }, { "epoch": 0.4497166812548947, "grad_norm": 0.877456464715817, "learning_rate": 3.1159677366203815e-06, "loss": 0.1443, "step": 4881 }, { "epoch": 0.4498088174321647, "grad_norm": 0.9847277495406819, "learning_rate": 3.1152294097644677e-06, "loss": 0.1631, "step": 4882 }, { "epoch": 0.4499009536094347, "grad_norm": 0.987382375783057, "learning_rate": 3.1144910257844367e-06, "loss": 0.1562, "step": 4883 }, { "epoch": 0.44999308978670477, "grad_norm": 0.8920639965187964, "learning_rate": 3.113752584748848e-06, "loss": 0.1413, "step": 4884 }, { "epoch": 0.45008522596397477, "grad_norm": 0.8848619030803502, "learning_rate": 3.1130140867262653e-06, "loss": 0.1435, "step": 4885 }, { "epoch": 0.45017736214124476, "grad_norm": 0.9631374264594577, "learning_rate": 3.112275531785259e-06, "loss": 0.1502, "step": 4886 }, { "epoch": 0.45026949831851476, "grad_norm": 0.8783560040106241, "learning_rate": 3.111536919994404e-06, "loss": 0.1384, "step": 4887 }, { "epoch": 0.45036163449578476, "grad_norm": 0.8626103552628747, "learning_rate": 3.110798251422279e-06, "loss": 0.1376, "step": 4888 }, { "epoch": 0.45045377067305475, "grad_norm": 0.9437049556206974, "learning_rate": 3.1100595261374718e-06, "loss": 0.1393, "step": 4889 }, { "epoch": 0.4505459068503248, "grad_norm": 0.9041755349011702, "learning_rate": 3.1093207442085716e-06, "loss": 0.1404, "step": 4890 }, { "epoch": 0.4506380430275948, "grad_norm": 0.8609050559264265, "learning_rate": 3.108581905704175e-06, "loss": 0.1401, "step": 4891 }, { "epoch": 0.4507301792048648, "grad_norm": 0.931689125980329, "learning_rate": 3.107843010692882e-06, "loss": 0.1522, "step": 4892 }, { "epoch": 0.4508223153821348, "grad_norm": 0.9401339014710594, "learning_rate": 3.1071040592433003e-06, "loss": 0.1625, "step": 4893 }, { "epoch": 0.4509144515594048, "grad_norm": 0.9329946448016365, "learning_rate": 3.1063650514240425e-06, "loss": 0.141, "step": 4894 }, { "epoch": 0.4510065877366748, "grad_norm": 0.8452855653517855, "learning_rate": 3.105625987303723e-06, "loss": 0.1403, "step": 4895 }, { "epoch": 0.4510987239139448, "grad_norm": 0.8710890044558494, "learning_rate": 3.104886866950966e-06, "loss": 0.1351, "step": 4896 }, { "epoch": 0.45119086009121484, "grad_norm": 0.9314641306578018, "learning_rate": 3.104147690434398e-06, "loss": 0.148, "step": 4897 }, { "epoch": 0.45128299626848484, "grad_norm": 0.972321592820565, "learning_rate": 3.103408457822653e-06, "loss": 0.1458, "step": 4898 }, { "epoch": 0.45137513244575483, "grad_norm": 0.9440018331535416, "learning_rate": 3.1026691691843667e-06, "loss": 0.1448, "step": 4899 }, { "epoch": 0.45146726862302483, "grad_norm": 0.8171835747175893, "learning_rate": 3.1019298245881836e-06, "loss": 0.1261, "step": 4900 }, { "epoch": 0.45155940480029483, "grad_norm": 0.9060060164646293, "learning_rate": 3.101190424102752e-06, "loss": 0.1444, "step": 4901 }, { "epoch": 0.4516515409775648, "grad_norm": 0.9156139073129704, "learning_rate": 3.100450967796724e-06, "loss": 0.1435, "step": 4902 }, { "epoch": 0.4517436771548348, "grad_norm": 0.9480421762707635, "learning_rate": 3.099711455738759e-06, "loss": 0.1498, "step": 4903 }, { "epoch": 0.4518358133321049, "grad_norm": 0.8572388070239273, "learning_rate": 3.0989718879975216e-06, "loss": 0.1261, "step": 4904 }, { "epoch": 0.45192794950937487, "grad_norm": 0.9440417055921769, "learning_rate": 3.098232264641679e-06, "loss": 0.1539, "step": 4905 }, { "epoch": 0.45202008568664487, "grad_norm": 0.9367708656728884, "learning_rate": 3.0974925857399067e-06, "loss": 0.1469, "step": 4906 }, { "epoch": 0.45211222186391486, "grad_norm": 0.9228480233084598, "learning_rate": 3.0967528513608834e-06, "loss": 0.1505, "step": 4907 }, { "epoch": 0.45220435804118486, "grad_norm": 0.9142943272446579, "learning_rate": 3.0960130615732934e-06, "loss": 0.1359, "step": 4908 }, { "epoch": 0.45229649421845486, "grad_norm": 0.9470937423096474, "learning_rate": 3.095273216445827e-06, "loss": 0.1545, "step": 4909 }, { "epoch": 0.45238863039572486, "grad_norm": 0.8542008317546738, "learning_rate": 3.0945333160471784e-06, "loss": 0.1419, "step": 4910 }, { "epoch": 0.4524807665729949, "grad_norm": 0.913915263073126, "learning_rate": 3.0937933604460475e-06, "loss": 0.1411, "step": 4911 }, { "epoch": 0.4525729027502649, "grad_norm": 0.8900641602854975, "learning_rate": 3.0930533497111385e-06, "loss": 0.1452, "step": 4912 }, { "epoch": 0.4526650389275349, "grad_norm": 0.8682569105387904, "learning_rate": 3.0923132839111623e-06, "loss": 0.1308, "step": 4913 }, { "epoch": 0.4527571751048049, "grad_norm": 0.9630318001076058, "learning_rate": 3.0915731631148347e-06, "loss": 0.1394, "step": 4914 }, { "epoch": 0.4528493112820749, "grad_norm": 0.9096845912213878, "learning_rate": 3.0908329873908744e-06, "loss": 0.1442, "step": 4915 }, { "epoch": 0.4529414474593449, "grad_norm": 0.898608735328125, "learning_rate": 3.0900927568080074e-06, "loss": 0.1393, "step": 4916 }, { "epoch": 0.4530335836366149, "grad_norm": 0.917045289108983, "learning_rate": 3.0893524714349655e-06, "loss": 0.1449, "step": 4917 }, { "epoch": 0.45312571981388494, "grad_norm": 0.9167944285700524, "learning_rate": 3.0886121313404827e-06, "loss": 0.1475, "step": 4918 }, { "epoch": 0.45321785599115494, "grad_norm": 0.9361875808177101, "learning_rate": 3.0878717365933005e-06, "loss": 0.1521, "step": 4919 }, { "epoch": 0.45330999216842494, "grad_norm": 0.872165934115353, "learning_rate": 3.087131287262163e-06, "loss": 0.1435, "step": 4920 }, { "epoch": 0.45340212834569493, "grad_norm": 0.9511119704159243, "learning_rate": 3.0863907834158236e-06, "loss": 0.1489, "step": 4921 }, { "epoch": 0.45349426452296493, "grad_norm": 0.878621950916722, "learning_rate": 3.0856502251230363e-06, "loss": 0.1406, "step": 4922 }, { "epoch": 0.4535864007002349, "grad_norm": 0.8888612621302028, "learning_rate": 3.084909612452563e-06, "loss": 0.14, "step": 4923 }, { "epoch": 0.453678536877505, "grad_norm": 0.9063848035951376, "learning_rate": 3.0841689454731686e-06, "loss": 0.1543, "step": 4924 }, { "epoch": 0.453770673054775, "grad_norm": 0.9055802766662286, "learning_rate": 3.0834282242536253e-06, "loss": 0.1433, "step": 4925 }, { "epoch": 0.453862809232045, "grad_norm": 0.8804890898499717, "learning_rate": 3.082687448862708e-06, "loss": 0.1415, "step": 4926 }, { "epoch": 0.45395494540931497, "grad_norm": 0.8951461422226128, "learning_rate": 3.0819466193691995e-06, "loss": 0.1366, "step": 4927 }, { "epoch": 0.45404708158658497, "grad_norm": 0.9641931901566596, "learning_rate": 3.0812057358418834e-06, "loss": 0.1516, "step": 4928 }, { "epoch": 0.45413921776385496, "grad_norm": 0.8913322240282533, "learning_rate": 3.0804647983495527e-06, "loss": 0.144, "step": 4929 }, { "epoch": 0.45423135394112496, "grad_norm": 0.8853943694518898, "learning_rate": 3.079723806961003e-06, "loss": 0.1415, "step": 4930 }, { "epoch": 0.454323490118395, "grad_norm": 0.892217812255629, "learning_rate": 3.078982761745036e-06, "loss": 0.1356, "step": 4931 }, { "epoch": 0.454415626295665, "grad_norm": 0.9055251034056351, "learning_rate": 3.078241662770456e-06, "loss": 0.1459, "step": 4932 }, { "epoch": 0.454507762472935, "grad_norm": 0.87958798551727, "learning_rate": 3.0775005101060766e-06, "loss": 0.1334, "step": 4933 }, { "epoch": 0.454599898650205, "grad_norm": 0.9294900159804047, "learning_rate": 3.076759303820712e-06, "loss": 0.1483, "step": 4934 }, { "epoch": 0.454692034827475, "grad_norm": 0.9137965516586513, "learning_rate": 3.0760180439831844e-06, "loss": 0.1461, "step": 4935 }, { "epoch": 0.454784171004745, "grad_norm": 0.9170055144005241, "learning_rate": 3.0752767306623193e-06, "loss": 0.1549, "step": 4936 }, { "epoch": 0.454876307182015, "grad_norm": 0.9215013771632808, "learning_rate": 3.0745353639269476e-06, "loss": 0.1468, "step": 4937 }, { "epoch": 0.45496844335928505, "grad_norm": 0.87370944363821, "learning_rate": 3.073793943845906e-06, "loss": 0.1312, "step": 4938 }, { "epoch": 0.45506057953655504, "grad_norm": 0.8529167887808932, "learning_rate": 3.073052470488035e-06, "loss": 0.1368, "step": 4939 }, { "epoch": 0.45515271571382504, "grad_norm": 0.8213407441730175, "learning_rate": 3.0723109439221794e-06, "loss": 0.1219, "step": 4940 }, { "epoch": 0.45524485189109504, "grad_norm": 0.9499098561290193, "learning_rate": 3.071569364217192e-06, "loss": 0.1488, "step": 4941 }, { "epoch": 0.45533698806836503, "grad_norm": 0.9412672079828817, "learning_rate": 3.070827731441927e-06, "loss": 0.145, "step": 4942 }, { "epoch": 0.45542912424563503, "grad_norm": 0.9449970015438249, "learning_rate": 3.0700860456652467e-06, "loss": 0.1527, "step": 4943 }, { "epoch": 0.455521260422905, "grad_norm": 1.023026610833486, "learning_rate": 3.0693443069560147e-06, "loss": 0.1645, "step": 4944 }, { "epoch": 0.4556133966001751, "grad_norm": 0.9322705358338801, "learning_rate": 3.0686025153831033e-06, "loss": 0.14, "step": 4945 }, { "epoch": 0.4557055327774451, "grad_norm": 0.9495432808167561, "learning_rate": 3.067860671015387e-06, "loss": 0.1393, "step": 4946 }, { "epoch": 0.4557976689547151, "grad_norm": 0.940822123502306, "learning_rate": 3.0671187739217455e-06, "loss": 0.1453, "step": 4947 }, { "epoch": 0.45588980513198507, "grad_norm": 0.9230469467919934, "learning_rate": 3.0663768241710653e-06, "loss": 0.1404, "step": 4948 }, { "epoch": 0.45598194130925507, "grad_norm": 0.8879484675632415, "learning_rate": 3.065634821832237e-06, "loss": 0.1453, "step": 4949 }, { "epoch": 0.45607407748652506, "grad_norm": 0.8834701175995728, "learning_rate": 3.064892766974153e-06, "loss": 0.1337, "step": 4950 }, { "epoch": 0.45616621366379506, "grad_norm": 0.9122572961159483, "learning_rate": 3.0641506596657155e-06, "loss": 0.1525, "step": 4951 }, { "epoch": 0.4562583498410651, "grad_norm": 0.9907773312470673, "learning_rate": 3.0634084999758283e-06, "loss": 0.1481, "step": 4952 }, { "epoch": 0.4563504860183351, "grad_norm": 1.0032234693838713, "learning_rate": 3.0626662879734015e-06, "loss": 0.1542, "step": 4953 }, { "epoch": 0.4564426221956051, "grad_norm": 0.9644592767333385, "learning_rate": 3.0619240237273496e-06, "loss": 0.1467, "step": 4954 }, { "epoch": 0.4565347583728751, "grad_norm": 0.9045737461881386, "learning_rate": 3.0611817073065906e-06, "loss": 0.1411, "step": 4955 }, { "epoch": 0.4566268945501451, "grad_norm": 0.8749374497112531, "learning_rate": 3.0604393387800506e-06, "loss": 0.133, "step": 4956 }, { "epoch": 0.4567190307274151, "grad_norm": 0.9062323098590219, "learning_rate": 3.059696918216658e-06, "loss": 0.1517, "step": 4957 }, { "epoch": 0.45681116690468515, "grad_norm": 0.8681354325932334, "learning_rate": 3.058954445685346e-06, "loss": 0.139, "step": 4958 }, { "epoch": 0.45690330308195515, "grad_norm": 0.9817471157907012, "learning_rate": 3.058211921255053e-06, "loss": 0.1503, "step": 4959 }, { "epoch": 0.45699543925922514, "grad_norm": 0.8802616285300711, "learning_rate": 3.0574693449947234e-06, "loss": 0.1369, "step": 4960 }, { "epoch": 0.45708757543649514, "grad_norm": 0.910891593640118, "learning_rate": 3.056726716973305e-06, "loss": 0.1402, "step": 4961 }, { "epoch": 0.45717971161376514, "grad_norm": 1.030761021092152, "learning_rate": 3.0559840372597516e-06, "loss": 0.158, "step": 4962 }, { "epoch": 0.45727184779103514, "grad_norm": 0.8832204519175471, "learning_rate": 3.0552413059230196e-06, "loss": 0.1497, "step": 4963 }, { "epoch": 0.45736398396830513, "grad_norm": 0.932715520563677, "learning_rate": 3.054498523032073e-06, "loss": 0.1606, "step": 4964 }, { "epoch": 0.4574561201455752, "grad_norm": 0.9295316822032308, "learning_rate": 3.053755688655879e-06, "loss": 0.1419, "step": 4965 }, { "epoch": 0.4575482563228452, "grad_norm": 0.9730620058415319, "learning_rate": 3.05301280286341e-06, "loss": 0.1379, "step": 4966 }, { "epoch": 0.4576403925001152, "grad_norm": 0.8698683882590288, "learning_rate": 3.0522698657236417e-06, "loss": 0.1364, "step": 4967 }, { "epoch": 0.4577325286773852, "grad_norm": 0.9725906398499707, "learning_rate": 3.0515268773055577e-06, "loss": 0.1574, "step": 4968 }, { "epoch": 0.4578246648546552, "grad_norm": 0.973396363810178, "learning_rate": 3.0507838376781433e-06, "loss": 0.1599, "step": 4969 }, { "epoch": 0.45791680103192517, "grad_norm": 1.007119621555577, "learning_rate": 3.050040746910391e-06, "loss": 0.1615, "step": 4970 }, { "epoch": 0.45800893720919517, "grad_norm": 0.9464800328407788, "learning_rate": 3.049297605071296e-06, "loss": 0.1472, "step": 4971 }, { "epoch": 0.4581010733864652, "grad_norm": 0.970285334538535, "learning_rate": 3.0485544122298586e-06, "loss": 0.1539, "step": 4972 }, { "epoch": 0.4581932095637352, "grad_norm": 0.8753266986448403, "learning_rate": 3.0478111684550855e-06, "loss": 0.1349, "step": 4973 }, { "epoch": 0.4582853457410052, "grad_norm": 0.8632450928210212, "learning_rate": 3.0470678738159865e-06, "loss": 0.1393, "step": 4974 }, { "epoch": 0.4583774819182752, "grad_norm": 0.9481887029950181, "learning_rate": 3.046324528381576e-06, "loss": 0.1544, "step": 4975 }, { "epoch": 0.4584696180955452, "grad_norm": 0.9528130537970406, "learning_rate": 3.045581132220875e-06, "loss": 0.144, "step": 4976 }, { "epoch": 0.4585617542728152, "grad_norm": 0.958356098741581, "learning_rate": 3.0448376854029067e-06, "loss": 0.158, "step": 4977 }, { "epoch": 0.4586538904500852, "grad_norm": 0.886430427120259, "learning_rate": 3.0440941879967007e-06, "loss": 0.1349, "step": 4978 }, { "epoch": 0.45874602662735525, "grad_norm": 0.9492295330617945, "learning_rate": 3.043350640071291e-06, "loss": 0.1498, "step": 4979 }, { "epoch": 0.45883816280462525, "grad_norm": 0.8711956527213836, "learning_rate": 3.0426070416957155e-06, "loss": 0.1367, "step": 4980 }, { "epoch": 0.45893029898189525, "grad_norm": 0.8805743996456511, "learning_rate": 3.0418633929390184e-06, "loss": 0.133, "step": 4981 }, { "epoch": 0.45902243515916524, "grad_norm": 0.939114838941981, "learning_rate": 3.0411196938702465e-06, "loss": 0.1491, "step": 4982 }, { "epoch": 0.45911457133643524, "grad_norm": 0.8942524010998183, "learning_rate": 3.040375944558453e-06, "loss": 0.1487, "step": 4983 }, { "epoch": 0.45920670751370524, "grad_norm": 0.9381437330304268, "learning_rate": 3.0396321450726946e-06, "loss": 0.1454, "step": 4984 }, { "epoch": 0.4592988436909753, "grad_norm": 0.8871246016080946, "learning_rate": 3.0388882954820336e-06, "loss": 0.1364, "step": 4985 }, { "epoch": 0.4593909798682453, "grad_norm": 0.8884750944032963, "learning_rate": 3.0381443958555367e-06, "loss": 0.139, "step": 4986 }, { "epoch": 0.4594831160455153, "grad_norm": 0.9264575790757549, "learning_rate": 3.037400446262274e-06, "loss": 0.1471, "step": 4987 }, { "epoch": 0.4595752522227853, "grad_norm": 0.9376702144868666, "learning_rate": 3.036656446771322e-06, "loss": 0.1486, "step": 4988 }, { "epoch": 0.4596673884000553, "grad_norm": 0.9416874908664606, "learning_rate": 3.035912397451763e-06, "loss": 0.1412, "step": 4989 }, { "epoch": 0.4597595245773253, "grad_norm": 0.918365816769307, "learning_rate": 3.035168298372678e-06, "loss": 0.1395, "step": 4990 }, { "epoch": 0.45985166075459527, "grad_norm": 0.9667651339344426, "learning_rate": 3.0344241496031602e-06, "loss": 0.1467, "step": 4991 }, { "epoch": 0.4599437969318653, "grad_norm": 0.9702613519979137, "learning_rate": 3.0336799512123017e-06, "loss": 0.15, "step": 4992 }, { "epoch": 0.4600359331091353, "grad_norm": 0.9617046493464246, "learning_rate": 3.032935703269203e-06, "loss": 0.145, "step": 4993 }, { "epoch": 0.4601280692864053, "grad_norm": 1.021218623960953, "learning_rate": 3.0321914058429668e-06, "loss": 0.1517, "step": 4994 }, { "epoch": 0.4602202054636753, "grad_norm": 0.9376645972483496, "learning_rate": 3.0314470590027012e-06, "loss": 0.1586, "step": 4995 }, { "epoch": 0.4603123416409453, "grad_norm": 0.9052458305949739, "learning_rate": 3.0307026628175183e-06, "loss": 0.1454, "step": 4996 }, { "epoch": 0.4604044778182153, "grad_norm": 0.8892358271557455, "learning_rate": 3.029958217356537e-06, "loss": 0.1423, "step": 4997 }, { "epoch": 0.4604966139954853, "grad_norm": 0.8721683270176317, "learning_rate": 3.029213722688878e-06, "loss": 0.1422, "step": 4998 }, { "epoch": 0.46058875017275536, "grad_norm": 0.9118092519515993, "learning_rate": 3.0284691788836672e-06, "loss": 0.1535, "step": 4999 }, { "epoch": 0.46068088635002535, "grad_norm": 0.9566666504443163, "learning_rate": 3.027724586010037e-06, "loss": 0.1536, "step": 5000 }, { "epoch": 0.46068088635002535, "eval_loss": 0.14408743381500244, "eval_runtime": 299.7274, "eval_samples_per_second": 23.411, "eval_steps_per_second": 2.929, "step": 5000 }, { "epoch": 0.46077302252729535, "grad_norm": 0.944085745803305, "learning_rate": 3.0269799441371224e-06, "loss": 0.1453, "step": 5001 }, { "epoch": 0.46086515870456535, "grad_norm": 0.8901400825031632, "learning_rate": 3.026235253334063e-06, "loss": 0.1445, "step": 5002 }, { "epoch": 0.46095729488183534, "grad_norm": 0.9088254286241351, "learning_rate": 3.0254905136700038e-06, "loss": 0.1415, "step": 5003 }, { "epoch": 0.46104943105910534, "grad_norm": 0.9018385475977528, "learning_rate": 3.024745725214093e-06, "loss": 0.1404, "step": 5004 }, { "epoch": 0.46114156723637534, "grad_norm": 0.8610078807583995, "learning_rate": 3.024000888035486e-06, "loss": 0.1327, "step": 5005 }, { "epoch": 0.4612337034136454, "grad_norm": 0.9052272195057318, "learning_rate": 3.0232560022033398e-06, "loss": 0.1389, "step": 5006 }, { "epoch": 0.4613258395909154, "grad_norm": 0.9070172834425693, "learning_rate": 3.022511067786817e-06, "loss": 0.1444, "step": 5007 }, { "epoch": 0.4614179757681854, "grad_norm": 1.0011454075311015, "learning_rate": 3.0217660848550863e-06, "loss": 0.1616, "step": 5008 }, { "epoch": 0.4615101119454554, "grad_norm": 0.8636917231521298, "learning_rate": 3.0210210534773175e-06, "loss": 0.1461, "step": 5009 }, { "epoch": 0.4616022481227254, "grad_norm": 0.887277083871904, "learning_rate": 3.020275973722688e-06, "loss": 0.1413, "step": 5010 }, { "epoch": 0.4616943842999954, "grad_norm": 0.904649471086909, "learning_rate": 3.0195308456603795e-06, "loss": 0.1444, "step": 5011 }, { "epoch": 0.46178652047726537, "grad_norm": 0.922525202971339, "learning_rate": 3.018785669359575e-06, "loss": 0.1349, "step": 5012 }, { "epoch": 0.4618786566545354, "grad_norm": 0.8871262781575864, "learning_rate": 3.018040444889466e-06, "loss": 0.1357, "step": 5013 }, { "epoch": 0.4619707928318054, "grad_norm": 0.8900463787551096, "learning_rate": 3.0172951723192456e-06, "loss": 0.1374, "step": 5014 }, { "epoch": 0.4620629290090754, "grad_norm": 0.9042316250161178, "learning_rate": 3.016549851718112e-06, "loss": 0.1456, "step": 5015 }, { "epoch": 0.4621550651863454, "grad_norm": 0.9786460909009905, "learning_rate": 3.0158044831552703e-06, "loss": 0.1508, "step": 5016 }, { "epoch": 0.4622472013636154, "grad_norm": 0.8686153084102173, "learning_rate": 3.015059066699926e-06, "loss": 0.1343, "step": 5017 }, { "epoch": 0.4623393375408854, "grad_norm": 0.8651676610958235, "learning_rate": 3.0143136024212923e-06, "loss": 0.1362, "step": 5018 }, { "epoch": 0.46243147371815546, "grad_norm": 0.9019007066819323, "learning_rate": 3.013568090388585e-06, "loss": 0.145, "step": 5019 }, { "epoch": 0.46252360989542546, "grad_norm": 0.9500449930037843, "learning_rate": 3.012822530671026e-06, "loss": 0.1437, "step": 5020 }, { "epoch": 0.46261574607269546, "grad_norm": 0.9361307249362852, "learning_rate": 3.012076923337839e-06, "loss": 0.1569, "step": 5021 }, { "epoch": 0.46270788224996545, "grad_norm": 0.8733335938562423, "learning_rate": 3.011331268458255e-06, "loss": 0.1424, "step": 5022 }, { "epoch": 0.46280001842723545, "grad_norm": 0.8812589568625355, "learning_rate": 3.010585566101507e-06, "loss": 0.1437, "step": 5023 }, { "epoch": 0.46289215460450545, "grad_norm": 0.8940904754428848, "learning_rate": 3.0098398163368353e-06, "loss": 0.1385, "step": 5024 }, { "epoch": 0.46298429078177544, "grad_norm": 0.8489133395575272, "learning_rate": 3.0090940192334805e-06, "loss": 0.1351, "step": 5025 }, { "epoch": 0.4630764269590455, "grad_norm": 0.905730134591134, "learning_rate": 3.0083481748606923e-06, "loss": 0.1499, "step": 5026 }, { "epoch": 0.4631685631363155, "grad_norm": 0.8927244938308796, "learning_rate": 3.007602283287721e-06, "loss": 0.1435, "step": 5027 }, { "epoch": 0.4632606993135855, "grad_norm": 0.8957615740961147, "learning_rate": 3.0068563445838234e-06, "loss": 0.1496, "step": 5028 }, { "epoch": 0.4633528354908555, "grad_norm": 0.9292440088049803, "learning_rate": 3.0061103588182592e-06, "loss": 0.1532, "step": 5029 }, { "epoch": 0.4634449716681255, "grad_norm": 0.8525916459383819, "learning_rate": 3.005364326060294e-06, "loss": 0.1267, "step": 5030 }, { "epoch": 0.4635371078453955, "grad_norm": 0.9596307408249468, "learning_rate": 3.0046182463791962e-06, "loss": 0.1395, "step": 5031 }, { "epoch": 0.4636292440226655, "grad_norm": 0.9635650608907829, "learning_rate": 3.0038721198442406e-06, "loss": 0.1495, "step": 5032 }, { "epoch": 0.46372138019993553, "grad_norm": 0.8783556628180413, "learning_rate": 3.003125946524704e-06, "loss": 0.1374, "step": 5033 }, { "epoch": 0.4638135163772055, "grad_norm": 0.942047255532235, "learning_rate": 3.002379726489869e-06, "loss": 0.1443, "step": 5034 }, { "epoch": 0.4639056525544755, "grad_norm": 1.029948109716625, "learning_rate": 3.001633459809023e-06, "loss": 0.1544, "step": 5035 }, { "epoch": 0.4639977887317455, "grad_norm": 0.9450058200355388, "learning_rate": 3.000887146551455e-06, "loss": 0.1447, "step": 5036 }, { "epoch": 0.4640899249090155, "grad_norm": 0.8932809463577716, "learning_rate": 3.000140786786463e-06, "loss": 0.1351, "step": 5037 }, { "epoch": 0.4641820610862855, "grad_norm": 0.9064007681388029, "learning_rate": 2.9993943805833444e-06, "loss": 0.1449, "step": 5038 }, { "epoch": 0.4642741972635555, "grad_norm": 0.8988360819697974, "learning_rate": 2.998647928011404e-06, "loss": 0.1423, "step": 5039 }, { "epoch": 0.46436633344082556, "grad_norm": 0.8884133863776579, "learning_rate": 2.9979014291399495e-06, "loss": 0.1381, "step": 5040 }, { "epoch": 0.46445846961809556, "grad_norm": 0.9578344478960423, "learning_rate": 2.997154884038294e-06, "loss": 0.1457, "step": 5041 }, { "epoch": 0.46455060579536556, "grad_norm": 0.8889333116943653, "learning_rate": 2.9964082927757537e-06, "loss": 0.1434, "step": 5042 }, { "epoch": 0.46464274197263555, "grad_norm": 0.8880790358302674, "learning_rate": 2.995661655421651e-06, "loss": 0.1422, "step": 5043 }, { "epoch": 0.46473487814990555, "grad_norm": 0.9310009728143278, "learning_rate": 2.994914972045309e-06, "loss": 0.1519, "step": 5044 }, { "epoch": 0.46482701432717555, "grad_norm": 0.9135511073129388, "learning_rate": 2.994168242716059e-06, "loss": 0.1439, "step": 5045 }, { "epoch": 0.46491915050444554, "grad_norm": 0.850859033376075, "learning_rate": 2.9934214675032346e-06, "loss": 0.1444, "step": 5046 }, { "epoch": 0.4650112866817156, "grad_norm": 0.9225464686632786, "learning_rate": 2.9926746464761743e-06, "loss": 0.1519, "step": 5047 }, { "epoch": 0.4651034228589856, "grad_norm": 0.9176808967329263, "learning_rate": 2.9919277797042196e-06, "loss": 0.1358, "step": 5048 }, { "epoch": 0.4651955590362556, "grad_norm": 0.8571010707868579, "learning_rate": 2.991180867256718e-06, "loss": 0.1421, "step": 5049 }, { "epoch": 0.4652876952135256, "grad_norm": 0.8862854880044667, "learning_rate": 2.990433909203019e-06, "loss": 0.1419, "step": 5050 }, { "epoch": 0.4653798313907956, "grad_norm": 0.8790782944491246, "learning_rate": 2.9896869056124795e-06, "loss": 0.1408, "step": 5051 }, { "epoch": 0.4654719675680656, "grad_norm": 0.9012749764460423, "learning_rate": 2.9889398565544576e-06, "loss": 0.1476, "step": 5052 }, { "epoch": 0.46556410374533563, "grad_norm": 0.8814257961671169, "learning_rate": 2.9881927620983175e-06, "loss": 0.1455, "step": 5053 }, { "epoch": 0.46565623992260563, "grad_norm": 0.8888919896330232, "learning_rate": 2.9874456223134273e-06, "loss": 0.1395, "step": 5054 }, { "epoch": 0.4657483760998756, "grad_norm": 0.8966843988002527, "learning_rate": 2.9866984372691586e-06, "loss": 0.1461, "step": 5055 }, { "epoch": 0.4658405122771456, "grad_norm": 0.9811961006056726, "learning_rate": 2.985951207034888e-06, "loss": 0.1566, "step": 5056 }, { "epoch": 0.4659326484544156, "grad_norm": 0.9175985358748722, "learning_rate": 2.985203931679995e-06, "loss": 0.1524, "step": 5057 }, { "epoch": 0.4660247846316856, "grad_norm": 0.9465765486071972, "learning_rate": 2.984456611273864e-06, "loss": 0.1522, "step": 5058 }, { "epoch": 0.4661169208089556, "grad_norm": 0.9159762833014369, "learning_rate": 2.9837092458858862e-06, "loss": 0.1425, "step": 5059 }, { "epoch": 0.46620905698622567, "grad_norm": 0.9303292659055917, "learning_rate": 2.982961835585451e-06, "loss": 0.1508, "step": 5060 }, { "epoch": 0.46630119316349566, "grad_norm": 0.9815182981934475, "learning_rate": 2.9822143804419586e-06, "loss": 0.1443, "step": 5061 }, { "epoch": 0.46639332934076566, "grad_norm": 0.9247239725037197, "learning_rate": 2.981466880524809e-06, "loss": 0.1619, "step": 5062 }, { "epoch": 0.46648546551803566, "grad_norm": 0.9203976339424482, "learning_rate": 2.9807193359034077e-06, "loss": 0.1437, "step": 5063 }, { "epoch": 0.46657760169530565, "grad_norm": 0.9511622917745709, "learning_rate": 2.979971746647164e-06, "loss": 0.1503, "step": 5064 }, { "epoch": 0.46666973787257565, "grad_norm": 0.9639638100811875, "learning_rate": 2.9792241128254916e-06, "loss": 0.1526, "step": 5065 }, { "epoch": 0.46676187404984565, "grad_norm": 0.8636214111011123, "learning_rate": 2.978476434507809e-06, "loss": 0.1406, "step": 5066 }, { "epoch": 0.4668540102271157, "grad_norm": 0.9786702258760724, "learning_rate": 2.9777287117635387e-06, "loss": 0.1501, "step": 5067 }, { "epoch": 0.4669461464043857, "grad_norm": 0.8728745748666031, "learning_rate": 2.9769809446621057e-06, "loss": 0.1398, "step": 5068 }, { "epoch": 0.4670382825816557, "grad_norm": 0.9069597536608185, "learning_rate": 2.9762331332729405e-06, "loss": 0.1374, "step": 5069 }, { "epoch": 0.4671304187589257, "grad_norm": 0.9322944650625257, "learning_rate": 2.975485277665478e-06, "loss": 0.1469, "step": 5070 }, { "epoch": 0.4672225549361957, "grad_norm": 0.9255137327705216, "learning_rate": 2.9747373779091552e-06, "loss": 0.1442, "step": 5071 }, { "epoch": 0.4673146911134657, "grad_norm": 0.9013228649521703, "learning_rate": 2.9739894340734177e-06, "loss": 0.1485, "step": 5072 }, { "epoch": 0.4674068272907357, "grad_norm": 0.9255808656859331, "learning_rate": 2.9732414462277083e-06, "loss": 0.1361, "step": 5073 }, { "epoch": 0.46749896346800573, "grad_norm": 0.8959121976406828, "learning_rate": 2.9724934144414807e-06, "loss": 0.1392, "step": 5074 }, { "epoch": 0.46759109964527573, "grad_norm": 0.9969189706936202, "learning_rate": 2.9717453387841884e-06, "loss": 0.1477, "step": 5075 }, { "epoch": 0.46768323582254573, "grad_norm": 0.8902239293901296, "learning_rate": 2.9709972193252905e-06, "loss": 0.1448, "step": 5076 }, { "epoch": 0.4677753719998157, "grad_norm": 0.8644170281111183, "learning_rate": 2.9702490561342505e-06, "loss": 0.1384, "step": 5077 }, { "epoch": 0.4678675081770857, "grad_norm": 0.8383712450753181, "learning_rate": 2.969500849280535e-06, "loss": 0.1306, "step": 5078 }, { "epoch": 0.4679596443543557, "grad_norm": 0.8679667354263388, "learning_rate": 2.9687525988336147e-06, "loss": 0.144, "step": 5079 }, { "epoch": 0.4680517805316257, "grad_norm": 0.9255121092467719, "learning_rate": 2.968004304862966e-06, "loss": 0.1363, "step": 5080 }, { "epoch": 0.46814391670889577, "grad_norm": 0.9135538111005072, "learning_rate": 2.9672559674380664e-06, "loss": 0.149, "step": 5081 }, { "epoch": 0.46823605288616577, "grad_norm": 0.8891994213746041, "learning_rate": 2.9665075866284e-06, "loss": 0.1455, "step": 5082 }, { "epoch": 0.46832818906343576, "grad_norm": 0.906236563353806, "learning_rate": 2.9657591625034543e-06, "loss": 0.1342, "step": 5083 }, { "epoch": 0.46842032524070576, "grad_norm": 0.8876771489888291, "learning_rate": 2.9650106951327202e-06, "loss": 0.1346, "step": 5084 }, { "epoch": 0.46851246141797576, "grad_norm": 0.9419083874001088, "learning_rate": 2.964262184585692e-06, "loss": 0.1434, "step": 5085 }, { "epoch": 0.46860459759524575, "grad_norm": 0.9289938357240787, "learning_rate": 2.963513630931872e-06, "loss": 0.1576, "step": 5086 }, { "epoch": 0.4686967337725158, "grad_norm": 0.9715748809553193, "learning_rate": 2.96276503424076e-06, "loss": 0.1542, "step": 5087 }, { "epoch": 0.4687888699497858, "grad_norm": 0.8970855643969083, "learning_rate": 2.9620163945818648e-06, "loss": 0.1441, "step": 5088 }, { "epoch": 0.4688810061270558, "grad_norm": 0.999806972740666, "learning_rate": 2.961267712024698e-06, "loss": 0.1608, "step": 5089 }, { "epoch": 0.4689731423043258, "grad_norm": 0.9615698363397197, "learning_rate": 2.9605189866387746e-06, "loss": 0.15, "step": 5090 }, { "epoch": 0.4690652784815958, "grad_norm": 0.9082739679420999, "learning_rate": 2.9597702184936137e-06, "loss": 0.1494, "step": 5091 }, { "epoch": 0.4691574146588658, "grad_norm": 0.8927931890323686, "learning_rate": 2.9590214076587386e-06, "loss": 0.1425, "step": 5092 }, { "epoch": 0.4692495508361358, "grad_norm": 0.8850697632312967, "learning_rate": 2.958272554203676e-06, "loss": 0.1422, "step": 5093 }, { "epoch": 0.46934168701340584, "grad_norm": 0.9147171490433019, "learning_rate": 2.9575236581979576e-06, "loss": 0.1467, "step": 5094 }, { "epoch": 0.46943382319067584, "grad_norm": 0.864702255400151, "learning_rate": 2.9567747197111186e-06, "loss": 0.1387, "step": 5095 }, { "epoch": 0.46952595936794583, "grad_norm": 0.8767863278849377, "learning_rate": 2.9560257388126973e-06, "loss": 0.1473, "step": 5096 }, { "epoch": 0.46961809554521583, "grad_norm": 0.9674640608974738, "learning_rate": 2.9552767155722375e-06, "loss": 0.132, "step": 5097 }, { "epoch": 0.4697102317224858, "grad_norm": 0.9065230519661844, "learning_rate": 2.954527650059285e-06, "loss": 0.1394, "step": 5098 }, { "epoch": 0.4698023678997558, "grad_norm": 0.8556130271531583, "learning_rate": 2.9537785423433925e-06, "loss": 0.1301, "step": 5099 }, { "epoch": 0.4698945040770258, "grad_norm": 0.9112839599118244, "learning_rate": 2.9530293924941123e-06, "loss": 0.1542, "step": 5100 }, { "epoch": 0.4699866402542959, "grad_norm": 0.9598181354372883, "learning_rate": 2.9522802005810043e-06, "loss": 0.1526, "step": 5101 }, { "epoch": 0.47007877643156587, "grad_norm": 0.8624365229094141, "learning_rate": 2.9515309666736312e-06, "loss": 0.1359, "step": 5102 }, { "epoch": 0.47017091260883587, "grad_norm": 0.8809972735025539, "learning_rate": 2.95078169084156e-06, "loss": 0.1363, "step": 5103 }, { "epoch": 0.47026304878610586, "grad_norm": 0.8561841482860453, "learning_rate": 2.9500323731543596e-06, "loss": 0.1252, "step": 5104 }, { "epoch": 0.47035518496337586, "grad_norm": 0.8809602128506461, "learning_rate": 2.9492830136816053e-06, "loss": 0.1407, "step": 5105 }, { "epoch": 0.47044732114064586, "grad_norm": 0.9372906064000932, "learning_rate": 2.948533612492874e-06, "loss": 0.1543, "step": 5106 }, { "epoch": 0.47053945731791585, "grad_norm": 0.8940500798202332, "learning_rate": 2.947784169657749e-06, "loss": 0.1342, "step": 5107 }, { "epoch": 0.4706315934951859, "grad_norm": 0.9499099523288487, "learning_rate": 2.947034685245816e-06, "loss": 0.1528, "step": 5108 }, { "epoch": 0.4707237296724559, "grad_norm": 0.9332993914331615, "learning_rate": 2.946285159326664e-06, "loss": 0.146, "step": 5109 }, { "epoch": 0.4708158658497259, "grad_norm": 0.9145503369594333, "learning_rate": 2.945535591969887e-06, "loss": 0.1366, "step": 5110 }, { "epoch": 0.4709080020269959, "grad_norm": 0.87163080765042, "learning_rate": 2.944785983245082e-06, "loss": 0.1327, "step": 5111 }, { "epoch": 0.4710001382042659, "grad_norm": 0.9165609316092446, "learning_rate": 2.944036333221851e-06, "loss": 0.1538, "step": 5112 }, { "epoch": 0.4710922743815359, "grad_norm": 0.8710335528196103, "learning_rate": 2.9432866419697993e-06, "loss": 0.1289, "step": 5113 }, { "epoch": 0.4711844105588059, "grad_norm": 0.8827968344332082, "learning_rate": 2.9425369095585337e-06, "loss": 0.133, "step": 5114 }, { "epoch": 0.47127654673607594, "grad_norm": 0.8897129402540811, "learning_rate": 2.94178713605767e-06, "loss": 0.1358, "step": 5115 }, { "epoch": 0.47136868291334594, "grad_norm": 0.9143135967958729, "learning_rate": 2.9410373215368216e-06, "loss": 0.1402, "step": 5116 }, { "epoch": 0.47146081909061593, "grad_norm": 0.8708685814829983, "learning_rate": 2.9402874660656113e-06, "loss": 0.141, "step": 5117 }, { "epoch": 0.47155295526788593, "grad_norm": 0.9041670120185827, "learning_rate": 2.9395375697136623e-06, "loss": 0.1388, "step": 5118 }, { "epoch": 0.47164509144515593, "grad_norm": 0.9052941120311353, "learning_rate": 2.9387876325506025e-06, "loss": 0.14, "step": 5119 }, { "epoch": 0.4717372276224259, "grad_norm": 0.8817598067857801, "learning_rate": 2.9380376546460643e-06, "loss": 0.1344, "step": 5120 }, { "epoch": 0.471829363799696, "grad_norm": 0.9163393295135425, "learning_rate": 2.9372876360696823e-06, "loss": 0.1398, "step": 5121 }, { "epoch": 0.471921499976966, "grad_norm": 0.8989709981272607, "learning_rate": 2.9365375768910957e-06, "loss": 0.1398, "step": 5122 }, { "epoch": 0.47201363615423597, "grad_norm": 0.8474850729012813, "learning_rate": 2.935787477179949e-06, "loss": 0.128, "step": 5123 }, { "epoch": 0.47210577233150597, "grad_norm": 0.9426786416617355, "learning_rate": 2.9350373370058882e-06, "loss": 0.1482, "step": 5124 }, { "epoch": 0.47219790850877597, "grad_norm": 0.9157568621182558, "learning_rate": 2.9342871564385627e-06, "loss": 0.1445, "step": 5125 }, { "epoch": 0.47229004468604596, "grad_norm": 0.9060909697019053, "learning_rate": 2.9335369355476295e-06, "loss": 0.14, "step": 5126 }, { "epoch": 0.47238218086331596, "grad_norm": 0.9206297306151924, "learning_rate": 2.932786674402744e-06, "loss": 0.1467, "step": 5127 }, { "epoch": 0.472474317040586, "grad_norm": 0.9762708271384426, "learning_rate": 2.9320363730735696e-06, "loss": 0.1528, "step": 5128 }, { "epoch": 0.472566453217856, "grad_norm": 0.9265016675178621, "learning_rate": 2.9312860316297716e-06, "loss": 0.1349, "step": 5129 }, { "epoch": 0.472658589395126, "grad_norm": 0.9251132679088837, "learning_rate": 2.930535650141019e-06, "loss": 0.147, "step": 5130 }, { "epoch": 0.472750725572396, "grad_norm": 0.9300808982858864, "learning_rate": 2.9297852286769852e-06, "loss": 0.1452, "step": 5131 }, { "epoch": 0.472842861749666, "grad_norm": 0.8853462094377297, "learning_rate": 2.9290347673073466e-06, "loss": 0.1353, "step": 5132 }, { "epoch": 0.472934997926936, "grad_norm": 0.8979270084102228, "learning_rate": 2.928284266101783e-06, "loss": 0.1379, "step": 5133 }, { "epoch": 0.473027134104206, "grad_norm": 0.858106147363866, "learning_rate": 2.9275337251299808e-06, "loss": 0.1373, "step": 5134 }, { "epoch": 0.47311927028147605, "grad_norm": 0.9349995659851857, "learning_rate": 2.9267831444616244e-06, "loss": 0.1358, "step": 5135 }, { "epoch": 0.47321140645874604, "grad_norm": 0.9034520136048325, "learning_rate": 2.926032524166408e-06, "loss": 0.1382, "step": 5136 }, { "epoch": 0.47330354263601604, "grad_norm": 0.9136257429700642, "learning_rate": 2.9252818643140256e-06, "loss": 0.143, "step": 5137 }, { "epoch": 0.47339567881328604, "grad_norm": 0.9468303486280631, "learning_rate": 2.9245311649741765e-06, "loss": 0.1354, "step": 5138 }, { "epoch": 0.47348781499055603, "grad_norm": 0.9333786953701937, "learning_rate": 2.9237804262165632e-06, "loss": 0.1494, "step": 5139 }, { "epoch": 0.47357995116782603, "grad_norm": 0.8913158148329773, "learning_rate": 2.9230296481108916e-06, "loss": 0.1491, "step": 5140 }, { "epoch": 0.473672087345096, "grad_norm": 0.9347408552372286, "learning_rate": 2.922278830726871e-06, "loss": 0.1469, "step": 5141 }, { "epoch": 0.4737642235223661, "grad_norm": 0.9046310230891329, "learning_rate": 2.9215279741342165e-06, "loss": 0.1248, "step": 5142 }, { "epoch": 0.4738563596996361, "grad_norm": 0.8910835401059762, "learning_rate": 2.9207770784026436e-06, "loss": 0.1375, "step": 5143 }, { "epoch": 0.4739484958769061, "grad_norm": 0.8892182784728998, "learning_rate": 2.920026143601874e-06, "loss": 0.1277, "step": 5144 }, { "epoch": 0.47404063205417607, "grad_norm": 0.8774140482608112, "learning_rate": 2.9192751698016317e-06, "loss": 0.1368, "step": 5145 }, { "epoch": 0.47413276823144607, "grad_norm": 0.9889873174330852, "learning_rate": 2.918524157071645e-06, "loss": 0.1509, "step": 5146 }, { "epoch": 0.47422490440871606, "grad_norm": 0.9436506649824882, "learning_rate": 2.917773105481645e-06, "loss": 0.1535, "step": 5147 }, { "epoch": 0.47431704058598606, "grad_norm": 0.8873798622275758, "learning_rate": 2.917022015101367e-06, "loss": 0.1312, "step": 5148 }, { "epoch": 0.4744091767632561, "grad_norm": 0.9843878112858804, "learning_rate": 2.91627088600055e-06, "loss": 0.1525, "step": 5149 }, { "epoch": 0.4745013129405261, "grad_norm": 0.9278713582496736, "learning_rate": 2.9155197182489375e-06, "loss": 0.133, "step": 5150 }, { "epoch": 0.4745934491177961, "grad_norm": 0.8901035694884752, "learning_rate": 2.9147685119162735e-06, "loss": 0.1194, "step": 5151 }, { "epoch": 0.4746855852950661, "grad_norm": 0.8786311039803401, "learning_rate": 2.9140172670723083e-06, "loss": 0.1364, "step": 5152 }, { "epoch": 0.4747777214723361, "grad_norm": 0.9353843520819218, "learning_rate": 2.913265983786796e-06, "loss": 0.1367, "step": 5153 }, { "epoch": 0.4748698576496061, "grad_norm": 0.9136948540094968, "learning_rate": 2.9125146621294915e-06, "loss": 0.1585, "step": 5154 }, { "epoch": 0.47496199382687615, "grad_norm": 0.9192694976349985, "learning_rate": 2.9117633021701574e-06, "loss": 0.1482, "step": 5155 }, { "epoch": 0.47505413000414615, "grad_norm": 0.9747913478550096, "learning_rate": 2.9110119039785554e-06, "loss": 0.1425, "step": 5156 }, { "epoch": 0.47514626618141614, "grad_norm": 0.9813043098360802, "learning_rate": 2.910260467624455e-06, "loss": 0.153, "step": 5157 }, { "epoch": 0.47523840235868614, "grad_norm": 0.8829325066846414, "learning_rate": 2.9095089931776255e-06, "loss": 0.1476, "step": 5158 }, { "epoch": 0.47533053853595614, "grad_norm": 0.9194266463326177, "learning_rate": 2.908757480707842e-06, "loss": 0.1553, "step": 5159 }, { "epoch": 0.47542267471322613, "grad_norm": 0.9373661605075402, "learning_rate": 2.908005930284882e-06, "loss": 0.1523, "step": 5160 }, { "epoch": 0.47551481089049613, "grad_norm": 0.9409623196439526, "learning_rate": 2.907254341978528e-06, "loss": 0.1479, "step": 5161 }, { "epoch": 0.4756069470677662, "grad_norm": 0.9489593698972468, "learning_rate": 2.906502715858564e-06, "loss": 0.1532, "step": 5162 }, { "epoch": 0.4756990832450362, "grad_norm": 0.8697876898890173, "learning_rate": 2.9057510519947794e-06, "loss": 0.1359, "step": 5163 }, { "epoch": 0.4757912194223062, "grad_norm": 0.9932677774158268, "learning_rate": 2.9049993504569663e-06, "loss": 0.1465, "step": 5164 }, { "epoch": 0.4758833555995762, "grad_norm": 0.8608562365126148, "learning_rate": 2.9042476113149193e-06, "loss": 0.136, "step": 5165 }, { "epoch": 0.47597549177684617, "grad_norm": 0.9028113269728798, "learning_rate": 2.9034958346384385e-06, "loss": 0.1507, "step": 5166 }, { "epoch": 0.47606762795411617, "grad_norm": 0.9736011569329539, "learning_rate": 2.9027440204973263e-06, "loss": 0.1503, "step": 5167 }, { "epoch": 0.47615976413138617, "grad_norm": 0.8603637223144222, "learning_rate": 2.9019921689613874e-06, "loss": 0.1339, "step": 5168 }, { "epoch": 0.4762519003086562, "grad_norm": 0.8692468134048886, "learning_rate": 2.9012402801004334e-06, "loss": 0.1389, "step": 5169 }, { "epoch": 0.4763440364859262, "grad_norm": 0.9180819372587773, "learning_rate": 2.9004883539842756e-06, "loss": 0.1456, "step": 5170 }, { "epoch": 0.4764361726631962, "grad_norm": 0.9080609642454912, "learning_rate": 2.8997363906827315e-06, "loss": 0.1365, "step": 5171 }, { "epoch": 0.4765283088404662, "grad_norm": 0.8854688032810417, "learning_rate": 2.8989843902656202e-06, "loss": 0.1291, "step": 5172 }, { "epoch": 0.4766204450177362, "grad_norm": 0.9528179450062821, "learning_rate": 2.898232352802765e-06, "loss": 0.1453, "step": 5173 }, { "epoch": 0.4767125811950062, "grad_norm": 0.8690123948765117, "learning_rate": 2.8974802783639934e-06, "loss": 0.133, "step": 5174 }, { "epoch": 0.4768047173722762, "grad_norm": 0.943895660424614, "learning_rate": 2.8967281670191357e-06, "loss": 0.146, "step": 5175 }, { "epoch": 0.47689685354954625, "grad_norm": 0.922222468563535, "learning_rate": 2.895976018838024e-06, "loss": 0.1397, "step": 5176 }, { "epoch": 0.47698898972681625, "grad_norm": 0.8783345231451642, "learning_rate": 2.895223833890497e-06, "loss": 0.1368, "step": 5177 }, { "epoch": 0.47708112590408625, "grad_norm": 0.8963037452817142, "learning_rate": 2.8944716122463933e-06, "loss": 0.127, "step": 5178 }, { "epoch": 0.47717326208135624, "grad_norm": 0.9540068837330717, "learning_rate": 2.8937193539755593e-06, "loss": 0.133, "step": 5179 }, { "epoch": 0.47726539825862624, "grad_norm": 0.926155655361845, "learning_rate": 2.8929670591478404e-06, "loss": 0.1597, "step": 5180 }, { "epoch": 0.47735753443589624, "grad_norm": 0.9621119246946411, "learning_rate": 2.8922147278330876e-06, "loss": 0.1544, "step": 5181 }, { "epoch": 0.47744967061316623, "grad_norm": 0.957134161209612, "learning_rate": 2.891462360101156e-06, "loss": 0.1517, "step": 5182 }, { "epoch": 0.4775418067904363, "grad_norm": 0.9236587620190673, "learning_rate": 2.890709956021901e-06, "loss": 0.14, "step": 5183 }, { "epoch": 0.4776339429677063, "grad_norm": 0.9000620955817382, "learning_rate": 2.8899575156651847e-06, "loss": 0.1374, "step": 5184 }, { "epoch": 0.4777260791449763, "grad_norm": 0.919864564287617, "learning_rate": 2.889205039100872e-06, "loss": 0.1415, "step": 5185 }, { "epoch": 0.4778182153222463, "grad_norm": 0.8402420054493916, "learning_rate": 2.8884525263988288e-06, "loss": 0.1265, "step": 5186 }, { "epoch": 0.4779103514995163, "grad_norm": 0.8852975503830686, "learning_rate": 2.887699977628927e-06, "loss": 0.1262, "step": 5187 }, { "epoch": 0.47800248767678627, "grad_norm": 0.8630089162205361, "learning_rate": 2.886947392861041e-06, "loss": 0.1349, "step": 5188 }, { "epoch": 0.4780946238540563, "grad_norm": 0.8956680318013008, "learning_rate": 2.886194772165046e-06, "loss": 0.1479, "step": 5189 }, { "epoch": 0.4781867600313263, "grad_norm": 0.9614757825575214, "learning_rate": 2.8854421156108276e-06, "loss": 0.15, "step": 5190 }, { "epoch": 0.4782788962085963, "grad_norm": 0.9044999706904249, "learning_rate": 2.8846894232682654e-06, "loss": 0.1476, "step": 5191 }, { "epoch": 0.4783710323858663, "grad_norm": 0.8821383917105962, "learning_rate": 2.883936695207249e-06, "loss": 0.147, "step": 5192 }, { "epoch": 0.4784631685631363, "grad_norm": 0.9243868354852021, "learning_rate": 2.8831839314976696e-06, "loss": 0.1506, "step": 5193 }, { "epoch": 0.4785553047404063, "grad_norm": 0.8698264824416307, "learning_rate": 2.8824311322094213e-06, "loss": 0.137, "step": 5194 }, { "epoch": 0.4786474409176763, "grad_norm": 0.9675043619811561, "learning_rate": 2.8816782974124007e-06, "loss": 0.1586, "step": 5195 }, { "epoch": 0.47873957709494636, "grad_norm": 0.8938915507442141, "learning_rate": 2.880925427176509e-06, "loss": 0.1361, "step": 5196 }, { "epoch": 0.47883171327221635, "grad_norm": 0.9334479510460674, "learning_rate": 2.8801725215716504e-06, "loss": 0.1474, "step": 5197 }, { "epoch": 0.47892384944948635, "grad_norm": 0.8647314780366868, "learning_rate": 2.879419580667733e-06, "loss": 0.1216, "step": 5198 }, { "epoch": 0.47901598562675635, "grad_norm": 0.8678424881431106, "learning_rate": 2.878666604534665e-06, "loss": 0.1295, "step": 5199 }, { "epoch": 0.47910812180402634, "grad_norm": 0.8752317443418055, "learning_rate": 2.8779135932423633e-06, "loss": 0.1377, "step": 5200 }, { "epoch": 0.47920025798129634, "grad_norm": 0.9457831979152571, "learning_rate": 2.877160546860744e-06, "loss": 0.1505, "step": 5201 }, { "epoch": 0.47929239415856634, "grad_norm": 0.870921425375068, "learning_rate": 2.8764074654597267e-06, "loss": 0.1312, "step": 5202 }, { "epoch": 0.4793845303358364, "grad_norm": 0.9157472068782744, "learning_rate": 2.8756543491092352e-06, "loss": 0.1472, "step": 5203 }, { "epoch": 0.4794766665131064, "grad_norm": 0.9300844720183196, "learning_rate": 2.8749011978791984e-06, "loss": 0.1528, "step": 5204 }, { "epoch": 0.4795688026903764, "grad_norm": 0.8245854441755501, "learning_rate": 2.8741480118395443e-06, "loss": 0.1167, "step": 5205 }, { "epoch": 0.4796609388676464, "grad_norm": 0.9984410331300712, "learning_rate": 2.873394791060207e-06, "loss": 0.1477, "step": 5206 }, { "epoch": 0.4797530750449164, "grad_norm": 0.9181133933506418, "learning_rate": 2.872641535611123e-06, "loss": 0.1526, "step": 5207 }, { "epoch": 0.4798452112221864, "grad_norm": 0.9508564827737102, "learning_rate": 2.8718882455622334e-06, "loss": 0.1596, "step": 5208 }, { "epoch": 0.47993734739945637, "grad_norm": 0.8843054175098252, "learning_rate": 2.871134920983479e-06, "loss": 0.1333, "step": 5209 }, { "epoch": 0.4800294835767264, "grad_norm": 0.9349077875143057, "learning_rate": 2.8703815619448072e-06, "loss": 0.1379, "step": 5210 }, { "epoch": 0.4801216197539964, "grad_norm": 0.8935820005729005, "learning_rate": 2.8696281685161676e-06, "loss": 0.1366, "step": 5211 }, { "epoch": 0.4802137559312664, "grad_norm": 0.889136554484447, "learning_rate": 2.868874740767513e-06, "loss": 0.1359, "step": 5212 }, { "epoch": 0.4803058921085364, "grad_norm": 0.9768791414763527, "learning_rate": 2.8681212787687997e-06, "loss": 0.1468, "step": 5213 }, { "epoch": 0.4803980282858064, "grad_norm": 0.9558517666869947, "learning_rate": 2.8673677825899852e-06, "loss": 0.156, "step": 5214 }, { "epoch": 0.4804901644630764, "grad_norm": 0.9018021401267595, "learning_rate": 2.866614252301033e-06, "loss": 0.1298, "step": 5215 }, { "epoch": 0.4805823006403464, "grad_norm": 0.9408168488926255, "learning_rate": 2.865860687971907e-06, "loss": 0.1504, "step": 5216 }, { "epoch": 0.48067443681761646, "grad_norm": 0.9242266840610662, "learning_rate": 2.8651070896725786e-06, "loss": 0.1438, "step": 5217 }, { "epoch": 0.48076657299488645, "grad_norm": 0.8848173378094824, "learning_rate": 2.864353457473016e-06, "loss": 0.1456, "step": 5218 }, { "epoch": 0.48085870917215645, "grad_norm": 0.9226387337834759, "learning_rate": 2.863599791443196e-06, "loss": 0.1452, "step": 5219 }, { "epoch": 0.48095084534942645, "grad_norm": 0.9215882804526133, "learning_rate": 2.8628460916530967e-06, "loss": 0.1484, "step": 5220 }, { "epoch": 0.48104298152669644, "grad_norm": 0.9025065525594986, "learning_rate": 2.8620923581726983e-06, "loss": 0.1481, "step": 5221 }, { "epoch": 0.48113511770396644, "grad_norm": 0.9097430267440212, "learning_rate": 2.861338591071986e-06, "loss": 0.1434, "step": 5222 }, { "epoch": 0.4812272538812365, "grad_norm": 0.8802740153350054, "learning_rate": 2.860584790420946e-06, "loss": 0.1362, "step": 5223 }, { "epoch": 0.4813193900585065, "grad_norm": 0.9819178496384163, "learning_rate": 2.85983095628957e-06, "loss": 0.1554, "step": 5224 }, { "epoch": 0.4814115262357765, "grad_norm": 0.8843967606324523, "learning_rate": 2.8590770887478507e-06, "loss": 0.1425, "step": 5225 }, { "epoch": 0.4815036624130465, "grad_norm": 0.8725658702095903, "learning_rate": 2.8583231878657847e-06, "loss": 0.1329, "step": 5226 }, { "epoch": 0.4815957985903165, "grad_norm": 0.8909883448928446, "learning_rate": 2.8575692537133726e-06, "loss": 0.1499, "step": 5227 }, { "epoch": 0.4816879347675865, "grad_norm": 0.9430742708330203, "learning_rate": 2.8568152863606167e-06, "loss": 0.1433, "step": 5228 }, { "epoch": 0.4817800709448565, "grad_norm": 0.8869863335741072, "learning_rate": 2.8560612858775233e-06, "loss": 0.1303, "step": 5229 }, { "epoch": 0.48187220712212653, "grad_norm": 0.9377415356064038, "learning_rate": 2.8553072523341008e-06, "loss": 0.1498, "step": 5230 }, { "epoch": 0.4819643432993965, "grad_norm": 0.8758553521749834, "learning_rate": 2.8545531858003623e-06, "loss": 0.1238, "step": 5231 }, { "epoch": 0.4820564794766665, "grad_norm": 0.8988921073626308, "learning_rate": 2.8537990863463212e-06, "loss": 0.1298, "step": 5232 }, { "epoch": 0.4821486156539365, "grad_norm": 0.8798732879738537, "learning_rate": 2.853044954041998e-06, "loss": 0.1454, "step": 5233 }, { "epoch": 0.4822407518312065, "grad_norm": 0.9246980033341549, "learning_rate": 2.8522907889574117e-06, "loss": 0.1524, "step": 5234 }, { "epoch": 0.4823328880084765, "grad_norm": 0.9174157745994747, "learning_rate": 2.851536591162589e-06, "loss": 0.1529, "step": 5235 }, { "epoch": 0.4824250241857465, "grad_norm": 0.9424499320791001, "learning_rate": 2.8507823607275554e-06, "loss": 0.1484, "step": 5236 }, { "epoch": 0.48251716036301656, "grad_norm": 0.8256974300101093, "learning_rate": 2.8500280977223416e-06, "loss": 0.1247, "step": 5237 }, { "epoch": 0.48260929654028656, "grad_norm": 0.8940530112241213, "learning_rate": 2.8492738022169815e-06, "loss": 0.1393, "step": 5238 }, { "epoch": 0.48270143271755656, "grad_norm": 0.8472711799346075, "learning_rate": 2.848519474281511e-06, "loss": 0.1298, "step": 5239 }, { "epoch": 0.48279356889482655, "grad_norm": 0.9181352849217045, "learning_rate": 2.84776511398597e-06, "loss": 0.1498, "step": 5240 }, { "epoch": 0.48288570507209655, "grad_norm": 0.9012991958361424, "learning_rate": 2.847010721400401e-06, "loss": 0.1477, "step": 5241 }, { "epoch": 0.48297784124936655, "grad_norm": 0.814508085647066, "learning_rate": 2.8462562965948493e-06, "loss": 0.1316, "step": 5242 }, { "epoch": 0.48306997742663654, "grad_norm": 0.8971639247190899, "learning_rate": 2.8455018396393618e-06, "loss": 0.1386, "step": 5243 }, { "epoch": 0.4831621136039066, "grad_norm": 0.8670252001320244, "learning_rate": 2.8447473506039934e-06, "loss": 0.1274, "step": 5244 }, { "epoch": 0.4832542497811766, "grad_norm": 0.9182328079782047, "learning_rate": 2.8439928295587948e-06, "loss": 0.1402, "step": 5245 }, { "epoch": 0.4833463859584466, "grad_norm": 0.8738254881249767, "learning_rate": 2.843238276573826e-06, "loss": 0.1395, "step": 5246 }, { "epoch": 0.4834385221357166, "grad_norm": 0.8751207383004573, "learning_rate": 2.8424836917191455e-06, "loss": 0.1397, "step": 5247 }, { "epoch": 0.4835306583129866, "grad_norm": 0.8559763699901859, "learning_rate": 2.841729075064818e-06, "loss": 0.1326, "step": 5248 }, { "epoch": 0.4836227944902566, "grad_norm": 0.9330748310081932, "learning_rate": 2.840974426680909e-06, "loss": 0.1308, "step": 5249 }, { "epoch": 0.4837149306675266, "grad_norm": 0.9467378848060349, "learning_rate": 2.840219746637487e-06, "loss": 0.1499, "step": 5250 }, { "epoch": 0.48380706684479663, "grad_norm": 0.9756656663187364, "learning_rate": 2.8394650350046256e-06, "loss": 0.1326, "step": 5251 }, { "epoch": 0.4838992030220666, "grad_norm": 0.9544148043520337, "learning_rate": 2.8387102918523995e-06, "loss": 0.1516, "step": 5252 }, { "epoch": 0.4839913391993366, "grad_norm": 0.8967080887994231, "learning_rate": 2.8379555172508853e-06, "loss": 0.125, "step": 5253 }, { "epoch": 0.4840834753766066, "grad_norm": 0.9096511642692993, "learning_rate": 2.8372007112701657e-06, "loss": 0.1415, "step": 5254 }, { "epoch": 0.4841756115538766, "grad_norm": 0.9212905745476281, "learning_rate": 2.8364458739803237e-06, "loss": 0.1394, "step": 5255 }, { "epoch": 0.4842677477311466, "grad_norm": 0.9188703117763569, "learning_rate": 2.835691005451446e-06, "loss": 0.1497, "step": 5256 }, { "epoch": 0.48435988390841667, "grad_norm": 0.9223024413019026, "learning_rate": 2.8349361057536223e-06, "loss": 0.1438, "step": 5257 }, { "epoch": 0.48445202008568666, "grad_norm": 0.9040847454967237, "learning_rate": 2.8341811749569452e-06, "loss": 0.1391, "step": 5258 }, { "epoch": 0.48454415626295666, "grad_norm": 0.9561157127435418, "learning_rate": 2.8334262131315094e-06, "loss": 0.1476, "step": 5259 }, { "epoch": 0.48463629244022666, "grad_norm": 0.8481512102632566, "learning_rate": 2.832671220347415e-06, "loss": 0.1265, "step": 5260 }, { "epoch": 0.48472842861749665, "grad_norm": 0.9162494132871907, "learning_rate": 2.831916196674761e-06, "loss": 0.1506, "step": 5261 }, { "epoch": 0.48482056479476665, "grad_norm": 0.9285013656763865, "learning_rate": 2.831161142183653e-06, "loss": 0.1442, "step": 5262 }, { "epoch": 0.48491270097203665, "grad_norm": 0.9376551897657546, "learning_rate": 2.830406056944197e-06, "loss": 0.1395, "step": 5263 }, { "epoch": 0.4850048371493067, "grad_norm": 0.9226971589582109, "learning_rate": 2.8296509410265032e-06, "loss": 0.1478, "step": 5264 }, { "epoch": 0.4850969733265767, "grad_norm": 0.9000083531197767, "learning_rate": 2.8288957945006845e-06, "loss": 0.1462, "step": 5265 }, { "epoch": 0.4851891095038467, "grad_norm": 0.89690674045041, "learning_rate": 2.8281406174368555e-06, "loss": 0.1419, "step": 5266 }, { "epoch": 0.4852812456811167, "grad_norm": 0.9345061065274617, "learning_rate": 2.827385409905134e-06, "loss": 0.1488, "step": 5267 }, { "epoch": 0.4853733818583867, "grad_norm": 0.9064937256577421, "learning_rate": 2.8266301719756427e-06, "loss": 0.1428, "step": 5268 }, { "epoch": 0.4854655180356567, "grad_norm": 0.9088525413468661, "learning_rate": 2.825874903718505e-06, "loss": 0.1478, "step": 5269 }, { "epoch": 0.4855576542129267, "grad_norm": 0.9271486145199646, "learning_rate": 2.8251196052038475e-06, "loss": 0.1551, "step": 5270 }, { "epoch": 0.48564979039019673, "grad_norm": 0.9522761407530571, "learning_rate": 2.8243642765017993e-06, "loss": 0.1507, "step": 5271 }, { "epoch": 0.48574192656746673, "grad_norm": 0.8485048602690964, "learning_rate": 2.8236089176824926e-06, "loss": 0.123, "step": 5272 }, { "epoch": 0.4858340627447367, "grad_norm": 0.8780735623699752, "learning_rate": 2.8228535288160647e-06, "loss": 0.1285, "step": 5273 }, { "epoch": 0.4859261989220067, "grad_norm": 0.9869769002349372, "learning_rate": 2.8220981099726503e-06, "loss": 0.1501, "step": 5274 }, { "epoch": 0.4860183350992767, "grad_norm": 0.9459927723090031, "learning_rate": 2.821342661222392e-06, "loss": 0.1459, "step": 5275 }, { "epoch": 0.4861104712765467, "grad_norm": 0.943149589932418, "learning_rate": 2.8205871826354336e-06, "loss": 0.1447, "step": 5276 }, { "epoch": 0.4862026074538167, "grad_norm": 0.8907007660895891, "learning_rate": 2.819831674281921e-06, "loss": 0.1372, "step": 5277 }, { "epoch": 0.48629474363108677, "grad_norm": 0.9140557800767286, "learning_rate": 2.819076136232002e-06, "loss": 0.1374, "step": 5278 }, { "epoch": 0.48638687980835676, "grad_norm": 0.963337859306478, "learning_rate": 2.81832056855583e-06, "loss": 0.1486, "step": 5279 }, { "epoch": 0.48647901598562676, "grad_norm": 0.9274489957087415, "learning_rate": 2.8175649713235586e-06, "loss": 0.1447, "step": 5280 }, { "epoch": 0.48657115216289676, "grad_norm": 0.8655898357592946, "learning_rate": 2.8168093446053455e-06, "loss": 0.1321, "step": 5281 }, { "epoch": 0.48666328834016676, "grad_norm": 0.9215423663303324, "learning_rate": 2.816053688471351e-06, "loss": 0.1351, "step": 5282 }, { "epoch": 0.48675542451743675, "grad_norm": 0.9621694893535245, "learning_rate": 2.815298002991738e-06, "loss": 0.1382, "step": 5283 }, { "epoch": 0.48684756069470675, "grad_norm": 0.8585957646539193, "learning_rate": 2.8145422882366707e-06, "loss": 0.1251, "step": 5284 }, { "epoch": 0.4869396968719768, "grad_norm": 0.9610427782236858, "learning_rate": 2.8137865442763186e-06, "loss": 0.155, "step": 5285 }, { "epoch": 0.4870318330492468, "grad_norm": 0.8803502360568106, "learning_rate": 2.813030771180851e-06, "loss": 0.1389, "step": 5286 }, { "epoch": 0.4871239692265168, "grad_norm": 0.8809740678094535, "learning_rate": 2.8122749690204443e-06, "loss": 0.1375, "step": 5287 }, { "epoch": 0.4872161054037868, "grad_norm": 0.8903435006680406, "learning_rate": 2.8115191378652716e-06, "loss": 0.1402, "step": 5288 }, { "epoch": 0.4873082415810568, "grad_norm": 0.8398996275510962, "learning_rate": 2.810763277785514e-06, "loss": 0.1345, "step": 5289 }, { "epoch": 0.4874003777583268, "grad_norm": 0.9572207221710832, "learning_rate": 2.810007388851353e-06, "loss": 0.1616, "step": 5290 }, { "epoch": 0.48749251393559684, "grad_norm": 0.913103201905886, "learning_rate": 2.809251471132972e-06, "loss": 0.1513, "step": 5291 }, { "epoch": 0.48758465011286684, "grad_norm": 0.8682324842644803, "learning_rate": 2.808495524700559e-06, "loss": 0.1291, "step": 5292 }, { "epoch": 0.48767678629013683, "grad_norm": 0.8997230016251552, "learning_rate": 2.807739549624303e-06, "loss": 0.1389, "step": 5293 }, { "epoch": 0.48776892246740683, "grad_norm": 0.8509844715151446, "learning_rate": 2.8069835459743965e-06, "loss": 0.1393, "step": 5294 }, { "epoch": 0.4878610586446768, "grad_norm": 0.865759399554568, "learning_rate": 2.8062275138210355e-06, "loss": 0.1351, "step": 5295 }, { "epoch": 0.4879531948219468, "grad_norm": 0.8590645407142179, "learning_rate": 2.805471453234416e-06, "loss": 0.1308, "step": 5296 }, { "epoch": 0.4880453309992168, "grad_norm": 0.9560127081452957, "learning_rate": 2.80471536428474e-06, "loss": 0.1431, "step": 5297 }, { "epoch": 0.48813746717648687, "grad_norm": 0.9540596366204239, "learning_rate": 2.8039592470422096e-06, "loss": 0.147, "step": 5298 }, { "epoch": 0.48822960335375687, "grad_norm": 0.9183606271614978, "learning_rate": 2.8032031015770296e-06, "loss": 0.1493, "step": 5299 }, { "epoch": 0.48832173953102687, "grad_norm": 0.9200811671231739, "learning_rate": 2.8024469279594102e-06, "loss": 0.1383, "step": 5300 }, { "epoch": 0.48841387570829686, "grad_norm": 0.8694060487879737, "learning_rate": 2.80169072625956e-06, "loss": 0.1333, "step": 5301 }, { "epoch": 0.48850601188556686, "grad_norm": 0.9202762624654115, "learning_rate": 2.8009344965476935e-06, "loss": 0.134, "step": 5302 }, { "epoch": 0.48859814806283686, "grad_norm": 0.9367392274537912, "learning_rate": 2.8001782388940267e-06, "loss": 0.1547, "step": 5303 }, { "epoch": 0.48869028424010685, "grad_norm": 0.9219003180099276, "learning_rate": 2.7994219533687784e-06, "loss": 0.1417, "step": 5304 }, { "epoch": 0.4887824204173769, "grad_norm": 0.8394096894153247, "learning_rate": 2.79866564004217e-06, "loss": 0.1372, "step": 5305 }, { "epoch": 0.4888745565946469, "grad_norm": 0.9632709174947548, "learning_rate": 2.797909298984424e-06, "loss": 0.1501, "step": 5306 }, { "epoch": 0.4889666927719169, "grad_norm": 0.8824075293556416, "learning_rate": 2.797152930265767e-06, "loss": 0.143, "step": 5307 }, { "epoch": 0.4890588289491869, "grad_norm": 0.8817934891698063, "learning_rate": 2.796396533956429e-06, "loss": 0.1418, "step": 5308 }, { "epoch": 0.4891509651264569, "grad_norm": 0.9544326229762716, "learning_rate": 2.7956401101266407e-06, "loss": 0.1504, "step": 5309 }, { "epoch": 0.4892431013037269, "grad_norm": 0.9322273063288705, "learning_rate": 2.7948836588466373e-06, "loss": 0.1456, "step": 5310 }, { "epoch": 0.4893352374809969, "grad_norm": 0.909857805157454, "learning_rate": 2.794127180186653e-06, "loss": 0.1401, "step": 5311 }, { "epoch": 0.48942737365826694, "grad_norm": 0.939370763815054, "learning_rate": 2.7933706742169297e-06, "loss": 0.1482, "step": 5312 }, { "epoch": 0.48951950983553694, "grad_norm": 0.8992145813911244, "learning_rate": 2.792614141007707e-06, "loss": 0.1437, "step": 5313 }, { "epoch": 0.48961164601280693, "grad_norm": 0.9533663660430065, "learning_rate": 2.7918575806292305e-06, "loss": 0.1432, "step": 5314 }, { "epoch": 0.48970378219007693, "grad_norm": 0.9175012649462685, "learning_rate": 2.791100993151745e-06, "loss": 0.1449, "step": 5315 }, { "epoch": 0.4897959183673469, "grad_norm": 0.9633249103576214, "learning_rate": 2.790344378645502e-06, "loss": 0.1503, "step": 5316 }, { "epoch": 0.4898880545446169, "grad_norm": 0.9468216343031558, "learning_rate": 2.7895877371807516e-06, "loss": 0.1546, "step": 5317 }, { "epoch": 0.4899801907218869, "grad_norm": 0.90993861662428, "learning_rate": 2.7888310688277493e-06, "loss": 0.1412, "step": 5318 }, { "epoch": 0.490072326899157, "grad_norm": 0.9040365876464947, "learning_rate": 2.7880743736567505e-06, "loss": 0.141, "step": 5319 }, { "epoch": 0.49016446307642697, "grad_norm": 0.9620046505695853, "learning_rate": 2.7873176517380157e-06, "loss": 0.1545, "step": 5320 }, { "epoch": 0.49025659925369697, "grad_norm": 0.9103509663251281, "learning_rate": 2.786560903141805e-06, "loss": 0.1348, "step": 5321 }, { "epoch": 0.49034873543096696, "grad_norm": 0.8917965010613744, "learning_rate": 2.7858041279383854e-06, "loss": 0.1457, "step": 5322 }, { "epoch": 0.49044087160823696, "grad_norm": 0.8894071380853192, "learning_rate": 2.7850473261980197e-06, "loss": 0.1396, "step": 5323 }, { "epoch": 0.49053300778550696, "grad_norm": 0.9274368507998124, "learning_rate": 2.78429049799098e-06, "loss": 0.1548, "step": 5324 }, { "epoch": 0.490625143962777, "grad_norm": 0.9052156307937325, "learning_rate": 2.783533643387537e-06, "loss": 0.1438, "step": 5325 }, { "epoch": 0.490717280140047, "grad_norm": 0.8956506738203168, "learning_rate": 2.7827767624579645e-06, "loss": 0.1307, "step": 5326 }, { "epoch": 0.490809416317317, "grad_norm": 0.9182979070301321, "learning_rate": 2.7820198552725404e-06, "loss": 0.1478, "step": 5327 }, { "epoch": 0.490901552494587, "grad_norm": 0.9067870769765347, "learning_rate": 2.781262921901541e-06, "loss": 0.1323, "step": 5328 }, { "epoch": 0.490993688671857, "grad_norm": 0.9321816561616926, "learning_rate": 2.780505962415249e-06, "loss": 0.1403, "step": 5329 }, { "epoch": 0.491085824849127, "grad_norm": 0.9245267858958562, "learning_rate": 2.779748976883949e-06, "loss": 0.1419, "step": 5330 }, { "epoch": 0.491177961026397, "grad_norm": 0.9301677422865269, "learning_rate": 2.7789919653779257e-06, "loss": 0.1374, "step": 5331 }, { "epoch": 0.49127009720366704, "grad_norm": 0.9786411388579338, "learning_rate": 2.7782349279674684e-06, "loss": 0.1587, "step": 5332 }, { "epoch": 0.49136223338093704, "grad_norm": 0.912963264042218, "learning_rate": 2.7774778647228688e-06, "loss": 0.1406, "step": 5333 }, { "epoch": 0.49145436955820704, "grad_norm": 0.9085498490897894, "learning_rate": 2.7767207757144186e-06, "loss": 0.1417, "step": 5334 }, { "epoch": 0.49154650573547704, "grad_norm": 0.9226144813877271, "learning_rate": 2.7759636610124158e-06, "loss": 0.142, "step": 5335 }, { "epoch": 0.49163864191274703, "grad_norm": 0.9713959796758398, "learning_rate": 2.7752065206871564e-06, "loss": 0.1375, "step": 5336 }, { "epoch": 0.49173077809001703, "grad_norm": 0.8991677667705583, "learning_rate": 2.7744493548089425e-06, "loss": 0.1304, "step": 5337 }, { "epoch": 0.491822914267287, "grad_norm": 0.8722949087858429, "learning_rate": 2.773692163448076e-06, "loss": 0.1346, "step": 5338 }, { "epoch": 0.4919150504445571, "grad_norm": 0.9623034124111783, "learning_rate": 2.7729349466748634e-06, "loss": 0.1347, "step": 5339 }, { "epoch": 0.4920071866218271, "grad_norm": 1.0356559269275247, "learning_rate": 2.772177704559611e-06, "loss": 0.1553, "step": 5340 }, { "epoch": 0.49209932279909707, "grad_norm": 0.9648096203571851, "learning_rate": 2.7714204371726293e-06, "loss": 0.148, "step": 5341 }, { "epoch": 0.49219145897636707, "grad_norm": 0.8771413421110204, "learning_rate": 2.770663144584231e-06, "loss": 0.1315, "step": 5342 }, { "epoch": 0.49228359515363707, "grad_norm": 0.9609389946251562, "learning_rate": 2.769905826864731e-06, "loss": 0.1541, "step": 5343 }, { "epoch": 0.49237573133090706, "grad_norm": 0.9202480450196757, "learning_rate": 2.769148484084445e-06, "loss": 0.1323, "step": 5344 }, { "epoch": 0.49246786750817706, "grad_norm": 0.9168884464799304, "learning_rate": 2.7683911163136944e-06, "loss": 0.1356, "step": 5345 }, { "epoch": 0.4925600036854471, "grad_norm": 0.8905102892909667, "learning_rate": 2.767633723622799e-06, "loss": 0.1327, "step": 5346 }, { "epoch": 0.4926521398627171, "grad_norm": 0.9283258835996472, "learning_rate": 2.7668763060820842e-06, "loss": 0.1397, "step": 5347 }, { "epoch": 0.4927442760399871, "grad_norm": 0.9031577778408963, "learning_rate": 2.7661188637618752e-06, "loss": 0.1432, "step": 5348 }, { "epoch": 0.4928364122172571, "grad_norm": 0.9704106362718894, "learning_rate": 2.7653613967325018e-06, "loss": 0.1452, "step": 5349 }, { "epoch": 0.4929285483945271, "grad_norm": 0.9047274748741216, "learning_rate": 2.7646039050642926e-06, "loss": 0.141, "step": 5350 }, { "epoch": 0.4930206845717971, "grad_norm": 0.9080009969739212, "learning_rate": 2.763846388827584e-06, "loss": 0.1323, "step": 5351 }, { "epoch": 0.49311282074906715, "grad_norm": 0.9190765010543278, "learning_rate": 2.7630888480927082e-06, "loss": 0.143, "step": 5352 }, { "epoch": 0.49320495692633715, "grad_norm": 0.836165641654129, "learning_rate": 2.7623312829300053e-06, "loss": 0.1317, "step": 5353 }, { "epoch": 0.49329709310360714, "grad_norm": 1.0026628826121744, "learning_rate": 2.7615736934098146e-06, "loss": 0.1598, "step": 5354 }, { "epoch": 0.49338922928087714, "grad_norm": 0.9572287593009317, "learning_rate": 2.760816079602478e-06, "loss": 0.146, "step": 5355 }, { "epoch": 0.49348136545814714, "grad_norm": 0.8912204631678248, "learning_rate": 2.760058441578341e-06, "loss": 0.1339, "step": 5356 }, { "epoch": 0.49357350163541713, "grad_norm": 0.914931483053279, "learning_rate": 2.7593007794077493e-06, "loss": 0.1279, "step": 5357 }, { "epoch": 0.49366563781268713, "grad_norm": 0.9112520500430483, "learning_rate": 2.7585430931610526e-06, "loss": 0.1336, "step": 5358 }, { "epoch": 0.4937577739899572, "grad_norm": 0.8758937123292749, "learning_rate": 2.7577853829086014e-06, "loss": 0.133, "step": 5359 }, { "epoch": 0.4938499101672272, "grad_norm": 0.9703690055488118, "learning_rate": 2.7570276487207504e-06, "loss": 0.1546, "step": 5360 }, { "epoch": 0.4939420463444972, "grad_norm": 0.9070344715122353, "learning_rate": 2.7562698906678537e-06, "loss": 0.1281, "step": 5361 }, { "epoch": 0.4940341825217672, "grad_norm": 1.0743999343067259, "learning_rate": 2.755512108820271e-06, "loss": 0.1499, "step": 5362 }, { "epoch": 0.49412631869903717, "grad_norm": 0.8768062588403824, "learning_rate": 2.7547543032483604e-06, "loss": 0.1268, "step": 5363 }, { "epoch": 0.49421845487630717, "grad_norm": 0.9074114494072715, "learning_rate": 2.753996474022486e-06, "loss": 0.1355, "step": 5364 }, { "epoch": 0.49431059105357716, "grad_norm": 0.9424744962467466, "learning_rate": 2.753238621213012e-06, "loss": 0.1412, "step": 5365 }, { "epoch": 0.4944027272308472, "grad_norm": 0.9539914855888854, "learning_rate": 2.752480744890304e-06, "loss": 0.1555, "step": 5366 }, { "epoch": 0.4944948634081172, "grad_norm": 0.8823231270895644, "learning_rate": 2.751722845124732e-06, "loss": 0.1392, "step": 5367 }, { "epoch": 0.4945869995853872, "grad_norm": 0.9403023365444029, "learning_rate": 2.750964921986667e-06, "loss": 0.1488, "step": 5368 }, { "epoch": 0.4946791357626572, "grad_norm": 0.8975289021275779, "learning_rate": 2.750206975546481e-06, "loss": 0.1361, "step": 5369 }, { "epoch": 0.4947712719399272, "grad_norm": 0.9253417573353605, "learning_rate": 2.7494490058745514e-06, "loss": 0.1425, "step": 5370 }, { "epoch": 0.4948634081171972, "grad_norm": 0.9328707319829574, "learning_rate": 2.7486910130412543e-06, "loss": 0.1505, "step": 5371 }, { "epoch": 0.4949555442944672, "grad_norm": 0.8941550491967376, "learning_rate": 2.74793299711697e-06, "loss": 0.1312, "step": 5372 }, { "epoch": 0.49504768047173725, "grad_norm": 0.9099114610645433, "learning_rate": 2.747174958172081e-06, "loss": 0.1349, "step": 5373 }, { "epoch": 0.49513981664900725, "grad_norm": 0.8779917259058921, "learning_rate": 2.7464168962769696e-06, "loss": 0.1314, "step": 5374 }, { "epoch": 0.49523195282627724, "grad_norm": 0.9125090915870446, "learning_rate": 2.745658811502023e-06, "loss": 0.1363, "step": 5375 }, { "epoch": 0.49532408900354724, "grad_norm": 0.9775013337024041, "learning_rate": 2.7449007039176296e-06, "loss": 0.1473, "step": 5376 }, { "epoch": 0.49541622518081724, "grad_norm": 0.8985672527780988, "learning_rate": 2.7441425735941787e-06, "loss": 0.1443, "step": 5377 }, { "epoch": 0.49550836135808723, "grad_norm": 0.9152198455379926, "learning_rate": 2.7433844206020643e-06, "loss": 0.1395, "step": 5378 }, { "epoch": 0.49560049753535723, "grad_norm": 0.8692587899638442, "learning_rate": 2.7426262450116798e-06, "loss": 0.1301, "step": 5379 }, { "epoch": 0.4956926337126273, "grad_norm": 0.968301271071014, "learning_rate": 2.7418680468934227e-06, "loss": 0.141, "step": 5380 }, { "epoch": 0.4957847698898973, "grad_norm": 0.9688367958386958, "learning_rate": 2.7411098263176917e-06, "loss": 0.1474, "step": 5381 }, { "epoch": 0.4958769060671673, "grad_norm": 0.9636259047294249, "learning_rate": 2.740351583354886e-06, "loss": 0.1466, "step": 5382 }, { "epoch": 0.4959690422444373, "grad_norm": 0.9710179385144946, "learning_rate": 2.739593318075412e-06, "loss": 0.1511, "step": 5383 }, { "epoch": 0.49606117842170727, "grad_norm": 0.9860533062959879, "learning_rate": 2.7388350305496708e-06, "loss": 0.1531, "step": 5384 }, { "epoch": 0.49615331459897727, "grad_norm": 0.8845136650001137, "learning_rate": 2.7380767208480726e-06, "loss": 0.1335, "step": 5385 }, { "epoch": 0.4962454507762473, "grad_norm": 0.857431418417288, "learning_rate": 2.7373183890410245e-06, "loss": 0.1266, "step": 5386 }, { "epoch": 0.4963375869535173, "grad_norm": 0.8725792617659032, "learning_rate": 2.7365600351989386e-06, "loss": 0.139, "step": 5387 }, { "epoch": 0.4964297231307873, "grad_norm": 0.9327533333797368, "learning_rate": 2.7358016593922283e-06, "loss": 0.147, "step": 5388 }, { "epoch": 0.4965218593080573, "grad_norm": 0.8981246518761035, "learning_rate": 2.7350432616913083e-06, "loss": 0.1345, "step": 5389 }, { "epoch": 0.4966139954853273, "grad_norm": 0.9240269830117442, "learning_rate": 2.734284842166596e-06, "loss": 0.1558, "step": 5390 }, { "epoch": 0.4967061316625973, "grad_norm": 0.9839833653231123, "learning_rate": 2.733526400888511e-06, "loss": 0.1562, "step": 5391 }, { "epoch": 0.4967982678398673, "grad_norm": 0.9071812673870605, "learning_rate": 2.732767937927474e-06, "loss": 0.1365, "step": 5392 }, { "epoch": 0.49689040401713735, "grad_norm": 0.869434467607522, "learning_rate": 2.73200945335391e-06, "loss": 0.1372, "step": 5393 }, { "epoch": 0.49698254019440735, "grad_norm": 0.9389875824273619, "learning_rate": 2.7312509472382425e-06, "loss": 0.151, "step": 5394 }, { "epoch": 0.49707467637167735, "grad_norm": 0.8639226483522556, "learning_rate": 2.7304924196509004e-06, "loss": 0.125, "step": 5395 }, { "epoch": 0.49716681254894735, "grad_norm": 0.9154900592463313, "learning_rate": 2.7297338706623113e-06, "loss": 0.1392, "step": 5396 }, { "epoch": 0.49725894872621734, "grad_norm": 0.9468057879828546, "learning_rate": 2.728975300342909e-06, "loss": 0.1349, "step": 5397 }, { "epoch": 0.49735108490348734, "grad_norm": 0.942572127556101, "learning_rate": 2.7282167087631234e-06, "loss": 0.1487, "step": 5398 }, { "epoch": 0.49744322108075734, "grad_norm": 0.9139656094622787, "learning_rate": 2.7274580959933933e-06, "loss": 0.1452, "step": 5399 }, { "epoch": 0.4975353572580274, "grad_norm": 0.8875985109012006, "learning_rate": 2.726699462104154e-06, "loss": 0.1327, "step": 5400 }, { "epoch": 0.4976274934352974, "grad_norm": 0.9556085304450759, "learning_rate": 2.7259408071658456e-06, "loss": 0.1439, "step": 5401 }, { "epoch": 0.4977196296125674, "grad_norm": 1.0220365803003981, "learning_rate": 2.725182131248909e-06, "loss": 0.1409, "step": 5402 }, { "epoch": 0.4978117657898374, "grad_norm": 0.9580616109925933, "learning_rate": 2.724423434423787e-06, "loss": 0.1477, "step": 5403 }, { "epoch": 0.4979039019671074, "grad_norm": 0.9150371096328926, "learning_rate": 2.7236647167609246e-06, "loss": 0.1345, "step": 5404 }, { "epoch": 0.4979960381443774, "grad_norm": 0.9365443118478644, "learning_rate": 2.7229059783307703e-06, "loss": 0.1501, "step": 5405 }, { "epoch": 0.49808817432164737, "grad_norm": 0.9815020710994572, "learning_rate": 2.7221472192037707e-06, "loss": 0.1408, "step": 5406 }, { "epoch": 0.4981803104989174, "grad_norm": 0.980382203078079, "learning_rate": 2.721388439450379e-06, "loss": 0.1447, "step": 5407 }, { "epoch": 0.4982724466761874, "grad_norm": 0.9089049802957376, "learning_rate": 2.7206296391410457e-06, "loss": 0.1448, "step": 5408 }, { "epoch": 0.4983645828534574, "grad_norm": 0.946696566658947, "learning_rate": 2.7198708183462275e-06, "loss": 0.1367, "step": 5409 }, { "epoch": 0.4984567190307274, "grad_norm": 0.9457376021726533, "learning_rate": 2.71911197713638e-06, "loss": 0.1422, "step": 5410 }, { "epoch": 0.4985488552079974, "grad_norm": 0.8824387290721871, "learning_rate": 2.7183531155819607e-06, "loss": 0.1421, "step": 5411 }, { "epoch": 0.4986409913852674, "grad_norm": 0.8761674752566213, "learning_rate": 2.7175942337534326e-06, "loss": 0.1242, "step": 5412 }, { "epoch": 0.4987331275625374, "grad_norm": 0.9330486674418532, "learning_rate": 2.7168353317212565e-06, "loss": 0.1435, "step": 5413 }, { "epoch": 0.49882526373980746, "grad_norm": 0.9235257314844412, "learning_rate": 2.7160764095558954e-06, "loss": 0.1385, "step": 5414 }, { "epoch": 0.49891739991707745, "grad_norm": 0.9117658297669242, "learning_rate": 2.7153174673278174e-06, "loss": 0.139, "step": 5415 }, { "epoch": 0.49900953609434745, "grad_norm": 0.898645968059442, "learning_rate": 2.7145585051074893e-06, "loss": 0.1388, "step": 5416 }, { "epoch": 0.49910167227161745, "grad_norm": 0.961300262470349, "learning_rate": 2.7137995229653803e-06, "loss": 0.1457, "step": 5417 }, { "epoch": 0.49919380844888744, "grad_norm": 0.9290136686056124, "learning_rate": 2.7130405209719637e-06, "loss": 0.1307, "step": 5418 }, { "epoch": 0.49928594462615744, "grad_norm": 0.9459474842848344, "learning_rate": 2.7122814991977104e-06, "loss": 0.1405, "step": 5419 }, { "epoch": 0.4993780808034275, "grad_norm": 0.9241431470133946, "learning_rate": 2.711522457713098e-06, "loss": 0.1334, "step": 5420 }, { "epoch": 0.4994702169806975, "grad_norm": 0.9390340034093618, "learning_rate": 2.710763396588602e-06, "loss": 0.1307, "step": 5421 }, { "epoch": 0.4995623531579675, "grad_norm": 0.9981828709590618, "learning_rate": 2.7100043158947027e-06, "loss": 0.1405, "step": 5422 }, { "epoch": 0.4996544893352375, "grad_norm": 1.0367977847526075, "learning_rate": 2.7092452157018795e-06, "loss": 0.1536, "step": 5423 }, { "epoch": 0.4997466255125075, "grad_norm": 0.9040763895456477, "learning_rate": 2.708486096080616e-06, "loss": 0.1362, "step": 5424 }, { "epoch": 0.4998387616897775, "grad_norm": 0.9262236757461823, "learning_rate": 2.7077269571013947e-06, "loss": 0.1311, "step": 5425 }, { "epoch": 0.4999308978670475, "grad_norm": 0.9351274103164497, "learning_rate": 2.7069677988347048e-06, "loss": 0.1466, "step": 5426 }, { "epoch": 0.5000230340443175, "grad_norm": 1.010791283106135, "learning_rate": 2.7062086213510315e-06, "loss": 0.1392, "step": 5427 }, { "epoch": 0.5001151702215875, "grad_norm": 0.9800680940213662, "learning_rate": 2.705449424720866e-06, "loss": 0.1393, "step": 5428 }, { "epoch": 0.5002073063988575, "grad_norm": 0.9090025369824177, "learning_rate": 2.7046902090146986e-06, "loss": 0.1332, "step": 5429 }, { "epoch": 0.5002994425761275, "grad_norm": 0.8947906760810447, "learning_rate": 2.703930974303024e-06, "loss": 0.1257, "step": 5430 }, { "epoch": 0.5003915787533976, "grad_norm": 0.918143860684268, "learning_rate": 2.703171720656336e-06, "loss": 0.1377, "step": 5431 }, { "epoch": 0.5004837149306676, "grad_norm": 0.8565108609127393, "learning_rate": 2.7024124481451323e-06, "loss": 0.1342, "step": 5432 }, { "epoch": 0.5005758511079376, "grad_norm": 0.9133626021685404, "learning_rate": 2.701653156839911e-06, "loss": 0.1407, "step": 5433 }, { "epoch": 0.5006679872852076, "grad_norm": 0.909655782017589, "learning_rate": 2.700893846811172e-06, "loss": 0.1474, "step": 5434 }, { "epoch": 0.5007601234624776, "grad_norm": 0.9613093139686264, "learning_rate": 2.700134518129418e-06, "loss": 0.1483, "step": 5435 }, { "epoch": 0.5008522596397476, "grad_norm": 0.922464421682086, "learning_rate": 2.699375170865152e-06, "loss": 0.1331, "step": 5436 }, { "epoch": 0.5009443958170176, "grad_norm": 0.8953135114057215, "learning_rate": 2.6986158050888804e-06, "loss": 0.1182, "step": 5437 }, { "epoch": 0.5010365319942875, "grad_norm": 0.9488693208071233, "learning_rate": 2.6978564208711098e-06, "loss": 0.138, "step": 5438 }, { "epoch": 0.5011286681715575, "grad_norm": 0.9225635631092897, "learning_rate": 2.697097018282349e-06, "loss": 0.1225, "step": 5439 }, { "epoch": 0.5012208043488275, "grad_norm": 0.9820006390170077, "learning_rate": 2.6963375973931095e-06, "loss": 0.1456, "step": 5440 }, { "epoch": 0.5013129405260975, "grad_norm": 0.9329638194227488, "learning_rate": 2.6955781582739028e-06, "loss": 0.1421, "step": 5441 }, { "epoch": 0.5014050767033675, "grad_norm": 0.9709775019424565, "learning_rate": 2.6948187009952426e-06, "loss": 0.1469, "step": 5442 }, { "epoch": 0.5014972128806375, "grad_norm": 0.8940728365805736, "learning_rate": 2.6940592256276455e-06, "loss": 0.1338, "step": 5443 }, { "epoch": 0.5015893490579076, "grad_norm": 0.9582589895606664, "learning_rate": 2.6932997322416276e-06, "loss": 0.1418, "step": 5444 }, { "epoch": 0.5016814852351776, "grad_norm": 0.9555495425316477, "learning_rate": 2.6925402209077096e-06, "loss": 0.1493, "step": 5445 }, { "epoch": 0.5017736214124476, "grad_norm": 0.8761008247398887, "learning_rate": 2.6917806916964107e-06, "loss": 0.133, "step": 5446 }, { "epoch": 0.5018657575897176, "grad_norm": 0.8917513046762765, "learning_rate": 2.691021144678254e-06, "loss": 0.139, "step": 5447 }, { "epoch": 0.5019578937669876, "grad_norm": 0.9523258198573107, "learning_rate": 2.690261579923764e-06, "loss": 0.1487, "step": 5448 }, { "epoch": 0.5020500299442576, "grad_norm": 0.8774773911026075, "learning_rate": 2.689501997503466e-06, "loss": 0.138, "step": 5449 }, { "epoch": 0.5021421661215276, "grad_norm": 0.9107629385972631, "learning_rate": 2.688742397487887e-06, "loss": 0.1475, "step": 5450 }, { "epoch": 0.5022343022987976, "grad_norm": 0.8954104101874873, "learning_rate": 2.6879827799475557e-06, "loss": 0.1432, "step": 5451 }, { "epoch": 0.5023264384760676, "grad_norm": 0.951129195820262, "learning_rate": 2.6872231449530027e-06, "loss": 0.1456, "step": 5452 }, { "epoch": 0.5024185746533376, "grad_norm": 0.9334140124428768, "learning_rate": 2.686463492574761e-06, "loss": 0.1537, "step": 5453 }, { "epoch": 0.5025107108306076, "grad_norm": 0.9532927507320598, "learning_rate": 2.6857038228833644e-06, "loss": 0.1485, "step": 5454 }, { "epoch": 0.5026028470078776, "grad_norm": 0.8330878352738564, "learning_rate": 2.6849441359493474e-06, "loss": 0.1158, "step": 5455 }, { "epoch": 0.5026949831851476, "grad_norm": 0.9220034819099294, "learning_rate": 2.6841844318432476e-06, "loss": 0.1396, "step": 5456 }, { "epoch": 0.5027871193624176, "grad_norm": 0.9118685665157755, "learning_rate": 2.683424710635603e-06, "loss": 0.1284, "step": 5457 }, { "epoch": 0.5028792555396877, "grad_norm": 0.8654389397910528, "learning_rate": 2.682664972396955e-06, "loss": 0.1283, "step": 5458 }, { "epoch": 0.5029713917169577, "grad_norm": 0.884857390795016, "learning_rate": 2.6819052171978443e-06, "loss": 0.1394, "step": 5459 }, { "epoch": 0.5030635278942277, "grad_norm": 0.894611570661682, "learning_rate": 2.681145445108814e-06, "loss": 0.1299, "step": 5460 }, { "epoch": 0.5031556640714977, "grad_norm": 0.9548521712233243, "learning_rate": 2.6803856562004112e-06, "loss": 0.1446, "step": 5461 }, { "epoch": 0.5032478002487677, "grad_norm": 0.9100534643805418, "learning_rate": 2.6796258505431786e-06, "loss": 0.1287, "step": 5462 }, { "epoch": 0.5033399364260377, "grad_norm": 0.9261093041169923, "learning_rate": 2.6788660282076682e-06, "loss": 0.1397, "step": 5463 }, { "epoch": 0.5034320726033077, "grad_norm": 0.9255465069936254, "learning_rate": 2.6781061892644273e-06, "loss": 0.1333, "step": 5464 }, { "epoch": 0.5035242087805777, "grad_norm": 0.8713776146436004, "learning_rate": 2.6773463337840078e-06, "loss": 0.1392, "step": 5465 }, { "epoch": 0.5036163449578477, "grad_norm": 0.9341165883169124, "learning_rate": 2.676586461836962e-06, "loss": 0.1554, "step": 5466 }, { "epoch": 0.5037084811351177, "grad_norm": 0.8604949495271219, "learning_rate": 2.6758265734938444e-06, "loss": 0.1443, "step": 5467 }, { "epoch": 0.5038006173123877, "grad_norm": 0.8940670948046049, "learning_rate": 2.67506666882521e-06, "loss": 0.1383, "step": 5468 }, { "epoch": 0.5038927534896577, "grad_norm": 0.9117729021006208, "learning_rate": 2.6743067479016166e-06, "loss": 0.147, "step": 5469 }, { "epoch": 0.5039848896669277, "grad_norm": 0.909075371011717, "learning_rate": 2.673546810793623e-06, "loss": 0.1298, "step": 5470 }, { "epoch": 0.5040770258441978, "grad_norm": 0.9179808077227184, "learning_rate": 2.6727868575717893e-06, "loss": 0.1472, "step": 5471 }, { "epoch": 0.5041691620214678, "grad_norm": 0.9233613284382322, "learning_rate": 2.6720268883066773e-06, "loss": 0.1291, "step": 5472 }, { "epoch": 0.5042612981987378, "grad_norm": 0.9615650138923706, "learning_rate": 2.6712669030688503e-06, "loss": 0.1502, "step": 5473 }, { "epoch": 0.5043534343760078, "grad_norm": 0.9397692153613146, "learning_rate": 2.6705069019288733e-06, "loss": 0.1463, "step": 5474 }, { "epoch": 0.5044455705532778, "grad_norm": 1.0194186132210752, "learning_rate": 2.6697468849573114e-06, "loss": 0.1598, "step": 5475 }, { "epoch": 0.5045377067305478, "grad_norm": 0.9924246077269206, "learning_rate": 2.6689868522247334e-06, "loss": 0.1521, "step": 5476 }, { "epoch": 0.5046298429078178, "grad_norm": 0.9255855941944566, "learning_rate": 2.668226803801708e-06, "loss": 0.1385, "step": 5477 }, { "epoch": 0.5047219790850878, "grad_norm": 0.8860193614954378, "learning_rate": 2.6674667397588056e-06, "loss": 0.1335, "step": 5478 }, { "epoch": 0.5048141152623578, "grad_norm": 0.954126434296226, "learning_rate": 2.666706660166598e-06, "loss": 0.1539, "step": 5479 }, { "epoch": 0.5049062514396278, "grad_norm": 0.9327823851336733, "learning_rate": 2.66594656509566e-06, "loss": 0.1436, "step": 5480 }, { "epoch": 0.5049983876168977, "grad_norm": 0.9487423920612162, "learning_rate": 2.665186454616565e-06, "loss": 0.131, "step": 5481 }, { "epoch": 0.5050905237941677, "grad_norm": 0.9131599113448318, "learning_rate": 2.66442632879989e-06, "loss": 0.1465, "step": 5482 }, { "epoch": 0.5051826599714377, "grad_norm": 0.8724986437513693, "learning_rate": 2.663666187716213e-06, "loss": 0.1257, "step": 5483 }, { "epoch": 0.5052747961487077, "grad_norm": 0.910959624134114, "learning_rate": 2.662906031436112e-06, "loss": 0.1379, "step": 5484 }, { "epoch": 0.5053669323259778, "grad_norm": 0.9769423030222774, "learning_rate": 2.662145860030169e-06, "loss": 0.1457, "step": 5485 }, { "epoch": 0.5054590685032478, "grad_norm": 0.9195321853537671, "learning_rate": 2.6613856735689656e-06, "loss": 0.1315, "step": 5486 }, { "epoch": 0.5055512046805178, "grad_norm": 0.8813823710242042, "learning_rate": 2.6606254721230845e-06, "loss": 0.1325, "step": 5487 }, { "epoch": 0.5056433408577878, "grad_norm": 0.9379783302984157, "learning_rate": 2.6598652557631123e-06, "loss": 0.1525, "step": 5488 }, { "epoch": 0.5057354770350578, "grad_norm": 0.9048060270273836, "learning_rate": 2.6591050245596322e-06, "loss": 0.1298, "step": 5489 }, { "epoch": 0.5058276132123278, "grad_norm": 0.9650276864334607, "learning_rate": 2.6583447785832343e-06, "loss": 0.1389, "step": 5490 }, { "epoch": 0.5059197493895978, "grad_norm": 0.8938180533740254, "learning_rate": 2.657584517904507e-06, "loss": 0.13, "step": 5491 }, { "epoch": 0.5060118855668678, "grad_norm": 0.9550953969484592, "learning_rate": 2.65682424259404e-06, "loss": 0.1443, "step": 5492 }, { "epoch": 0.5061040217441378, "grad_norm": 0.9048846266087659, "learning_rate": 2.6560639527224246e-06, "loss": 0.1368, "step": 5493 }, { "epoch": 0.5061961579214078, "grad_norm": 0.9388284521232664, "learning_rate": 2.6553036483602553e-06, "loss": 0.1528, "step": 5494 }, { "epoch": 0.5062882940986778, "grad_norm": 0.9094867358427684, "learning_rate": 2.6545433295781242e-06, "loss": 0.1417, "step": 5495 }, { "epoch": 0.5063804302759478, "grad_norm": 0.917149402691518, "learning_rate": 2.65378299644663e-06, "loss": 0.1406, "step": 5496 }, { "epoch": 0.5064725664532178, "grad_norm": 0.8829207817590017, "learning_rate": 2.653022649036367e-06, "loss": 0.1236, "step": 5497 }, { "epoch": 0.5065647026304878, "grad_norm": 0.8893997459270351, "learning_rate": 2.652262287417935e-06, "loss": 0.1372, "step": 5498 }, { "epoch": 0.5066568388077579, "grad_norm": 0.9035837066275488, "learning_rate": 2.6515019116619327e-06, "loss": 0.1374, "step": 5499 }, { "epoch": 0.5067489749850279, "grad_norm": 0.8749623890705591, "learning_rate": 2.6507415218389616e-06, "loss": 0.1344, "step": 5500 }, { "epoch": 0.5067489749850279, "eval_loss": 0.13985012471675873, "eval_runtime": 299.8724, "eval_samples_per_second": 23.4, "eval_steps_per_second": 2.928, "step": 5500 }, { "epoch": 0.5068411111622979, "grad_norm": 0.8972053443275787, "learning_rate": 2.649981118019625e-06, "loss": 0.1325, "step": 5501 }, { "epoch": 0.5069332473395679, "grad_norm": 0.9610764579191868, "learning_rate": 2.649220700274524e-06, "loss": 0.1421, "step": 5502 }, { "epoch": 0.5070253835168379, "grad_norm": 0.9421384788983665, "learning_rate": 2.648460268674266e-06, "loss": 0.1502, "step": 5503 }, { "epoch": 0.5071175196941079, "grad_norm": 0.8800435296498718, "learning_rate": 2.6476998232894557e-06, "loss": 0.1352, "step": 5504 }, { "epoch": 0.5072096558713779, "grad_norm": 0.917670590492245, "learning_rate": 2.646939364190701e-06, "loss": 0.1425, "step": 5505 }, { "epoch": 0.5073017920486479, "grad_norm": 0.9647345651378058, "learning_rate": 2.6461788914486098e-06, "loss": 0.1458, "step": 5506 }, { "epoch": 0.5073939282259179, "grad_norm": 0.9404490001911155, "learning_rate": 2.6454184051337946e-06, "loss": 0.1581, "step": 5507 }, { "epoch": 0.5074860644031879, "grad_norm": 0.8676278066012653, "learning_rate": 2.644657905316863e-06, "loss": 0.1316, "step": 5508 }, { "epoch": 0.5075782005804579, "grad_norm": 0.9111223393555133, "learning_rate": 2.64389739206843e-06, "loss": 0.1482, "step": 5509 }, { "epoch": 0.5076703367577279, "grad_norm": 0.9475807706333695, "learning_rate": 2.6431368654591087e-06, "loss": 0.1519, "step": 5510 }, { "epoch": 0.5077624729349979, "grad_norm": 0.9054563411368267, "learning_rate": 2.6423763255595143e-06, "loss": 0.146, "step": 5511 }, { "epoch": 0.507854609112268, "grad_norm": 0.9368488337401683, "learning_rate": 2.6416157724402626e-06, "loss": 0.1381, "step": 5512 }, { "epoch": 0.507946745289538, "grad_norm": 0.9040531100606465, "learning_rate": 2.640855206171971e-06, "loss": 0.1371, "step": 5513 }, { "epoch": 0.508038881466808, "grad_norm": 0.9249124192962886, "learning_rate": 2.640094626825258e-06, "loss": 0.1355, "step": 5514 }, { "epoch": 0.508131017644078, "grad_norm": 0.9403553647015074, "learning_rate": 2.6393340344707448e-06, "loss": 0.1459, "step": 5515 }, { "epoch": 0.508223153821348, "grad_norm": 0.9221283008538266, "learning_rate": 2.6385734291790506e-06, "loss": 0.1307, "step": 5516 }, { "epoch": 0.508315289998618, "grad_norm": 0.9181336545301192, "learning_rate": 2.637812811020799e-06, "loss": 0.1428, "step": 5517 }, { "epoch": 0.508407426175888, "grad_norm": 0.9060067197014477, "learning_rate": 2.6370521800666127e-06, "loss": 0.1323, "step": 5518 }, { "epoch": 0.508499562353158, "grad_norm": 0.8997448706666332, "learning_rate": 2.636291536387117e-06, "loss": 0.136, "step": 5519 }, { "epoch": 0.508591698530428, "grad_norm": 0.905826181470098, "learning_rate": 2.6355308800529377e-06, "loss": 0.1356, "step": 5520 }, { "epoch": 0.508683834707698, "grad_norm": 0.9173879086665507, "learning_rate": 2.6347702111347017e-06, "loss": 0.144, "step": 5521 }, { "epoch": 0.508775970884968, "grad_norm": 0.9378280218523944, "learning_rate": 2.634009529703036e-06, "loss": 0.1344, "step": 5522 }, { "epoch": 0.508868107062238, "grad_norm": 0.8970030378074605, "learning_rate": 2.633248835828573e-06, "loss": 0.1177, "step": 5523 }, { "epoch": 0.508960243239508, "grad_norm": 0.9873589104671638, "learning_rate": 2.6324881295819394e-06, "loss": 0.1525, "step": 5524 }, { "epoch": 0.509052379416778, "grad_norm": 0.8669878479759109, "learning_rate": 2.6317274110337692e-06, "loss": 0.1277, "step": 5525 }, { "epoch": 0.509144515594048, "grad_norm": 0.9081281065004965, "learning_rate": 2.6309666802546953e-06, "loss": 0.1294, "step": 5526 }, { "epoch": 0.509236651771318, "grad_norm": 0.9014846717124748, "learning_rate": 2.63020593731535e-06, "loss": 0.1291, "step": 5527 }, { "epoch": 0.509328787948588, "grad_norm": 0.9802281228815567, "learning_rate": 2.629445182286371e-06, "loss": 0.145, "step": 5528 }, { "epoch": 0.509420924125858, "grad_norm": 0.9134326612787356, "learning_rate": 2.6286844152383913e-06, "loss": 0.1384, "step": 5529 }, { "epoch": 0.509513060303128, "grad_norm": 0.9077152752905923, "learning_rate": 2.6279236362420503e-06, "loss": 0.1403, "step": 5530 }, { "epoch": 0.509605196480398, "grad_norm": 0.8849225997974401, "learning_rate": 2.6271628453679865e-06, "loss": 0.1391, "step": 5531 }, { "epoch": 0.509697332657668, "grad_norm": 0.9200720102028345, "learning_rate": 2.6264020426868382e-06, "loss": 0.1477, "step": 5532 }, { "epoch": 0.509789468834938, "grad_norm": 0.8476479866854897, "learning_rate": 2.6256412282692467e-06, "loss": 0.1334, "step": 5533 }, { "epoch": 0.509881605012208, "grad_norm": 0.8497457959986047, "learning_rate": 2.6248804021858538e-06, "loss": 0.1312, "step": 5534 }, { "epoch": 0.509973741189478, "grad_norm": 0.9003692531477517, "learning_rate": 2.624119564507302e-06, "loss": 0.139, "step": 5535 }, { "epoch": 0.510065877366748, "grad_norm": 0.8630292481575238, "learning_rate": 2.6233587153042356e-06, "loss": 0.1279, "step": 5536 }, { "epoch": 0.510158013544018, "grad_norm": 0.9052917352915406, "learning_rate": 2.6225978546472985e-06, "loss": 0.1267, "step": 5537 }, { "epoch": 0.510250149721288, "grad_norm": 0.8627747624985834, "learning_rate": 2.621836982607138e-06, "loss": 0.133, "step": 5538 }, { "epoch": 0.5103422858985581, "grad_norm": 0.9549906006503575, "learning_rate": 2.6210760992544004e-06, "loss": 0.1357, "step": 5539 }, { "epoch": 0.5104344220758281, "grad_norm": 0.9526451014635038, "learning_rate": 2.6203152046597343e-06, "loss": 0.1423, "step": 5540 }, { "epoch": 0.5105265582530981, "grad_norm": 0.861004252901416, "learning_rate": 2.6195542988937882e-06, "loss": 0.1395, "step": 5541 }, { "epoch": 0.5106186944303681, "grad_norm": 0.9027357450630178, "learning_rate": 2.6187933820272128e-06, "loss": 0.142, "step": 5542 }, { "epoch": 0.5107108306076381, "grad_norm": 0.891785231864971, "learning_rate": 2.6180324541306577e-06, "loss": 0.1385, "step": 5543 }, { "epoch": 0.5108029667849081, "grad_norm": 0.8647280432780181, "learning_rate": 2.6172715152747784e-06, "loss": 0.129, "step": 5544 }, { "epoch": 0.5108951029621781, "grad_norm": 0.8875513460819612, "learning_rate": 2.6165105655302252e-06, "loss": 0.1359, "step": 5545 }, { "epoch": 0.5109872391394481, "grad_norm": 0.9032613573115922, "learning_rate": 2.615749604967654e-06, "loss": 0.1406, "step": 5546 }, { "epoch": 0.5110793753167181, "grad_norm": 0.8882411960902156, "learning_rate": 2.614988633657719e-06, "loss": 0.1361, "step": 5547 }, { "epoch": 0.5111715114939881, "grad_norm": 0.867215069051396, "learning_rate": 2.614227651671078e-06, "loss": 0.1335, "step": 5548 }, { "epoch": 0.5112636476712581, "grad_norm": 0.8963000008178789, "learning_rate": 2.6134666590783856e-06, "loss": 0.1453, "step": 5549 }, { "epoch": 0.5113557838485281, "grad_norm": 0.9627517202967072, "learning_rate": 2.6127056559503035e-06, "loss": 0.1392, "step": 5550 }, { "epoch": 0.5114479200257981, "grad_norm": 0.898582511766749, "learning_rate": 2.611944642357488e-06, "loss": 0.1307, "step": 5551 }, { "epoch": 0.5115400562030681, "grad_norm": 0.8994720606560878, "learning_rate": 2.611183618370601e-06, "loss": 0.1424, "step": 5552 }, { "epoch": 0.5116321923803382, "grad_norm": 0.9189882349593465, "learning_rate": 2.6104225840603026e-06, "loss": 0.1447, "step": 5553 }, { "epoch": 0.5117243285576082, "grad_norm": 0.8508772095700861, "learning_rate": 2.609661539497255e-06, "loss": 0.1294, "step": 5554 }, { "epoch": 0.5118164647348782, "grad_norm": 0.9372166101365486, "learning_rate": 2.6089004847521227e-06, "loss": 0.1384, "step": 5555 }, { "epoch": 0.5119086009121482, "grad_norm": 0.9239599944745616, "learning_rate": 2.608139419895568e-06, "loss": 0.1349, "step": 5556 }, { "epoch": 0.5120007370894182, "grad_norm": 0.9349790401903082, "learning_rate": 2.6073783449982563e-06, "loss": 0.138, "step": 5557 }, { "epoch": 0.5120928732666882, "grad_norm": 0.8800466157716698, "learning_rate": 2.6066172601308544e-06, "loss": 0.1302, "step": 5558 }, { "epoch": 0.5121850094439582, "grad_norm": 0.876448123482639, "learning_rate": 2.605856165364028e-06, "loss": 0.1362, "step": 5559 }, { "epoch": 0.5122771456212282, "grad_norm": 0.8427611964166464, "learning_rate": 2.6050950607684454e-06, "loss": 0.1289, "step": 5560 }, { "epoch": 0.5123692817984982, "grad_norm": 0.9047573143154314, "learning_rate": 2.6043339464147754e-06, "loss": 0.1369, "step": 5561 }, { "epoch": 0.5124614179757682, "grad_norm": 0.9495280124543655, "learning_rate": 2.603572822373686e-06, "loss": 0.1531, "step": 5562 }, { "epoch": 0.5125535541530382, "grad_norm": 0.9881092237552044, "learning_rate": 2.6028116887158503e-06, "loss": 0.1504, "step": 5563 }, { "epoch": 0.5126456903303082, "grad_norm": 0.8214010445612134, "learning_rate": 2.6020505455119375e-06, "loss": 0.1276, "step": 5564 }, { "epoch": 0.5127378265075782, "grad_norm": 0.8555404360624906, "learning_rate": 2.601289392832621e-06, "loss": 0.1335, "step": 5565 }, { "epoch": 0.5128299626848482, "grad_norm": 0.8853452441226398, "learning_rate": 2.6005282307485735e-06, "loss": 0.1249, "step": 5566 }, { "epoch": 0.5129220988621183, "grad_norm": 0.8881684098601418, "learning_rate": 2.5997670593304688e-06, "loss": 0.1383, "step": 5567 }, { "epoch": 0.5130142350393883, "grad_norm": 0.9486217169656922, "learning_rate": 2.5990058786489818e-06, "loss": 0.1339, "step": 5568 }, { "epoch": 0.5131063712166583, "grad_norm": 0.8598186957614298, "learning_rate": 2.5982446887747885e-06, "loss": 0.125, "step": 5569 }, { "epoch": 0.5131985073939282, "grad_norm": 0.9382037989178497, "learning_rate": 2.5974834897785646e-06, "loss": 0.141, "step": 5570 }, { "epoch": 0.5132906435711982, "grad_norm": 0.8555476200376073, "learning_rate": 2.5967222817309893e-06, "loss": 0.1276, "step": 5571 }, { "epoch": 0.5133827797484682, "grad_norm": 0.928756828249199, "learning_rate": 2.5959610647027388e-06, "loss": 0.1456, "step": 5572 }, { "epoch": 0.5134749159257382, "grad_norm": 0.9146043332944963, "learning_rate": 2.595199838764493e-06, "loss": 0.1424, "step": 5573 }, { "epoch": 0.5135670521030082, "grad_norm": 0.8719255281170171, "learning_rate": 2.5944386039869328e-06, "loss": 0.127, "step": 5574 }, { "epoch": 0.5136591882802782, "grad_norm": 0.9891327414194487, "learning_rate": 2.593677360440738e-06, "loss": 0.1535, "step": 5575 }, { "epoch": 0.5137513244575482, "grad_norm": 0.8941008360456992, "learning_rate": 2.5929161081965898e-06, "loss": 0.1252, "step": 5576 }, { "epoch": 0.5138434606348182, "grad_norm": 0.9390926757674907, "learning_rate": 2.592154847325171e-06, "loss": 0.1411, "step": 5577 }, { "epoch": 0.5139355968120882, "grad_norm": 0.8699348061966624, "learning_rate": 2.5913935778971644e-06, "loss": 0.1363, "step": 5578 }, { "epoch": 0.5140277329893582, "grad_norm": 0.9583723014793004, "learning_rate": 2.590632299983255e-06, "loss": 0.1401, "step": 5579 }, { "epoch": 0.5141198691666283, "grad_norm": 0.9182617054191736, "learning_rate": 2.589871013654126e-06, "loss": 0.134, "step": 5580 }, { "epoch": 0.5142120053438983, "grad_norm": 0.9611058899311491, "learning_rate": 2.589109718980464e-06, "loss": 0.1469, "step": 5581 }, { "epoch": 0.5143041415211683, "grad_norm": 0.8705141047969384, "learning_rate": 2.5883484160329552e-06, "loss": 0.1255, "step": 5582 }, { "epoch": 0.5143962776984383, "grad_norm": 0.8586967447912096, "learning_rate": 2.587587104882286e-06, "loss": 0.14, "step": 5583 }, { "epoch": 0.5144884138757083, "grad_norm": 0.9372494805713448, "learning_rate": 2.586825785599145e-06, "loss": 0.1277, "step": 5584 }, { "epoch": 0.5145805500529783, "grad_norm": 0.9599321610073678, "learning_rate": 2.5860644582542206e-06, "loss": 0.1567, "step": 5585 }, { "epoch": 0.5146726862302483, "grad_norm": 0.9015636760266527, "learning_rate": 2.5853031229182017e-06, "loss": 0.1218, "step": 5586 }, { "epoch": 0.5147648224075183, "grad_norm": 0.8916433289597714, "learning_rate": 2.584541779661779e-06, "loss": 0.1325, "step": 5587 }, { "epoch": 0.5148569585847883, "grad_norm": 0.8773087815609789, "learning_rate": 2.583780428555643e-06, "loss": 0.1309, "step": 5588 }, { "epoch": 0.5149490947620583, "grad_norm": 0.8559951677078586, "learning_rate": 2.5830190696704843e-06, "loss": 0.1328, "step": 5589 }, { "epoch": 0.5150412309393283, "grad_norm": 0.919260554053468, "learning_rate": 2.5822577030769972e-06, "loss": 0.1353, "step": 5590 }, { "epoch": 0.5151333671165983, "grad_norm": 0.9129424240959548, "learning_rate": 2.581496328845873e-06, "loss": 0.1382, "step": 5591 }, { "epoch": 0.5152255032938683, "grad_norm": 0.9485496218586936, "learning_rate": 2.580734947047806e-06, "loss": 0.1294, "step": 5592 }, { "epoch": 0.5153176394711383, "grad_norm": 1.0463560758101833, "learning_rate": 2.57997355775349e-06, "loss": 0.1582, "step": 5593 }, { "epoch": 0.5154097756484084, "grad_norm": 0.9828298858039358, "learning_rate": 2.5792121610336215e-06, "loss": 0.138, "step": 5594 }, { "epoch": 0.5155019118256784, "grad_norm": 0.8865690987054157, "learning_rate": 2.5784507569588947e-06, "loss": 0.133, "step": 5595 }, { "epoch": 0.5155940480029484, "grad_norm": 0.948753054611248, "learning_rate": 2.577689345600007e-06, "loss": 0.1398, "step": 5596 }, { "epoch": 0.5156861841802184, "grad_norm": 0.8963248656506673, "learning_rate": 2.5769279270276544e-06, "loss": 0.1359, "step": 5597 }, { "epoch": 0.5157783203574884, "grad_norm": 0.8633984849791445, "learning_rate": 2.5761665013125364e-06, "loss": 0.1266, "step": 5598 }, { "epoch": 0.5158704565347584, "grad_norm": 0.9111635283757924, "learning_rate": 2.5754050685253503e-06, "loss": 0.1495, "step": 5599 }, { "epoch": 0.5159625927120284, "grad_norm": 0.8888987005242416, "learning_rate": 2.5746436287367956e-06, "loss": 0.1411, "step": 5600 }, { "epoch": 0.5160547288892984, "grad_norm": 0.9027874762523549, "learning_rate": 2.5738821820175713e-06, "loss": 0.1462, "step": 5601 }, { "epoch": 0.5161468650665684, "grad_norm": 0.8936351680734343, "learning_rate": 2.573120728438379e-06, "loss": 0.1567, "step": 5602 }, { "epoch": 0.5162390012438384, "grad_norm": 0.823163896670656, "learning_rate": 2.5723592680699194e-06, "loss": 0.1141, "step": 5603 }, { "epoch": 0.5163311374211084, "grad_norm": 0.8578357763989864, "learning_rate": 2.5715978009828934e-06, "loss": 0.1252, "step": 5604 }, { "epoch": 0.5164232735983784, "grad_norm": 0.9347199730746312, "learning_rate": 2.5708363272480034e-06, "loss": 0.1471, "step": 5605 }, { "epoch": 0.5165154097756484, "grad_norm": 0.9230560472785323, "learning_rate": 2.5700748469359542e-06, "loss": 0.1462, "step": 5606 }, { "epoch": 0.5166075459529185, "grad_norm": 0.9146428934847072, "learning_rate": 2.569313360117447e-06, "loss": 0.1435, "step": 5607 }, { "epoch": 0.5166996821301885, "grad_norm": 0.8883033669285219, "learning_rate": 2.568551866863187e-06, "loss": 0.1342, "step": 5608 }, { "epoch": 0.5167918183074585, "grad_norm": 0.8902566289498707, "learning_rate": 2.567790367243879e-06, "loss": 0.1358, "step": 5609 }, { "epoch": 0.5168839544847285, "grad_norm": 0.884923794423157, "learning_rate": 2.5670288613302278e-06, "loss": 0.1363, "step": 5610 }, { "epoch": 0.5169760906619985, "grad_norm": 0.8595484693799914, "learning_rate": 2.56626734919294e-06, "loss": 0.131, "step": 5611 }, { "epoch": 0.5170682268392685, "grad_norm": 0.8881898408502089, "learning_rate": 2.5655058309027216e-06, "loss": 0.133, "step": 5612 }, { "epoch": 0.5171603630165384, "grad_norm": 0.9055171174486542, "learning_rate": 2.5647443065302797e-06, "loss": 0.1337, "step": 5613 }, { "epoch": 0.5172524991938084, "grad_norm": 0.8982698558938885, "learning_rate": 2.5639827761463217e-06, "loss": 0.1385, "step": 5614 }, { "epoch": 0.5173446353710784, "grad_norm": 0.8439364034167672, "learning_rate": 2.5632212398215563e-06, "loss": 0.1277, "step": 5615 }, { "epoch": 0.5174367715483484, "grad_norm": 0.9053369863573947, "learning_rate": 2.562459697626692e-06, "loss": 0.1322, "step": 5616 }, { "epoch": 0.5175289077256184, "grad_norm": 0.9414286983524403, "learning_rate": 2.561698149632438e-06, "loss": 0.1425, "step": 5617 }, { "epoch": 0.5176210439028884, "grad_norm": 0.8600323617972152, "learning_rate": 2.560936595909504e-06, "loss": 0.1314, "step": 5618 }, { "epoch": 0.5177131800801584, "grad_norm": 0.9022890230620475, "learning_rate": 2.560175036528601e-06, "loss": 0.13, "step": 5619 }, { "epoch": 0.5178053162574284, "grad_norm": 0.8822823466002423, "learning_rate": 2.5594134715604384e-06, "loss": 0.136, "step": 5620 }, { "epoch": 0.5178974524346985, "grad_norm": 0.8770079594534396, "learning_rate": 2.5586519010757295e-06, "loss": 0.1416, "step": 5621 }, { "epoch": 0.5179895886119685, "grad_norm": 0.9227396154729606, "learning_rate": 2.557890325145185e-06, "loss": 0.1449, "step": 5622 }, { "epoch": 0.5180817247892385, "grad_norm": 0.8965913173018901, "learning_rate": 2.5571287438395175e-06, "loss": 0.1402, "step": 5623 }, { "epoch": 0.5181738609665085, "grad_norm": 0.8834294157700694, "learning_rate": 2.5563671572294396e-06, "loss": 0.1331, "step": 5624 }, { "epoch": 0.5182659971437785, "grad_norm": 0.898106091348643, "learning_rate": 2.5556055653856667e-06, "loss": 0.1401, "step": 5625 }, { "epoch": 0.5183581333210485, "grad_norm": 0.8886607871965505, "learning_rate": 2.554843968378909e-06, "loss": 0.1356, "step": 5626 }, { "epoch": 0.5184502694983185, "grad_norm": 0.87036466401323, "learning_rate": 2.5540823662798843e-06, "loss": 0.1291, "step": 5627 }, { "epoch": 0.5185424056755885, "grad_norm": 0.996280569292839, "learning_rate": 2.553320759159305e-06, "loss": 0.1457, "step": 5628 }, { "epoch": 0.5186345418528585, "grad_norm": 0.8837610420412856, "learning_rate": 2.5525591470878886e-06, "loss": 0.1322, "step": 5629 }, { "epoch": 0.5187266780301285, "grad_norm": 0.8816532288664364, "learning_rate": 2.551797530136349e-06, "loss": 0.1291, "step": 5630 }, { "epoch": 0.5188188142073985, "grad_norm": 1.01580389465096, "learning_rate": 2.5510359083754038e-06, "loss": 0.138, "step": 5631 }, { "epoch": 0.5189109503846685, "grad_norm": 0.9453562490091288, "learning_rate": 2.5502742818757683e-06, "loss": 0.1348, "step": 5632 }, { "epoch": 0.5190030865619385, "grad_norm": 0.9264318284476458, "learning_rate": 2.549512650708161e-06, "loss": 0.1373, "step": 5633 }, { "epoch": 0.5190952227392085, "grad_norm": 0.9397210232263981, "learning_rate": 2.5487510149432974e-06, "loss": 0.1372, "step": 5634 }, { "epoch": 0.5191873589164786, "grad_norm": 0.917269244252231, "learning_rate": 2.547989374651898e-06, "loss": 0.1362, "step": 5635 }, { "epoch": 0.5192794950937486, "grad_norm": 0.9624687169062707, "learning_rate": 2.54722772990468e-06, "loss": 0.1312, "step": 5636 }, { "epoch": 0.5193716312710186, "grad_norm": 0.8989884615305049, "learning_rate": 2.546466080772362e-06, "loss": 0.1393, "step": 5637 }, { "epoch": 0.5194637674482886, "grad_norm": 0.8504854986989404, "learning_rate": 2.5457044273256635e-06, "loss": 0.1419, "step": 5638 }, { "epoch": 0.5195559036255586, "grad_norm": 0.9181894072822718, "learning_rate": 2.544942769635304e-06, "loss": 0.1371, "step": 5639 }, { "epoch": 0.5196480398028286, "grad_norm": 0.9676061229686893, "learning_rate": 2.544181107772003e-06, "loss": 0.1416, "step": 5640 }, { "epoch": 0.5197401759800986, "grad_norm": 0.8791670761476063, "learning_rate": 2.543419441806482e-06, "loss": 0.1418, "step": 5641 }, { "epoch": 0.5198323121573686, "grad_norm": 0.8491594494899254, "learning_rate": 2.5426577718094607e-06, "loss": 0.1266, "step": 5642 }, { "epoch": 0.5199244483346386, "grad_norm": 1.0354163929555338, "learning_rate": 2.541896097851661e-06, "loss": 0.1409, "step": 5643 }, { "epoch": 0.5200165845119086, "grad_norm": 0.9635321671316454, "learning_rate": 2.541134420003804e-06, "loss": 0.1365, "step": 5644 }, { "epoch": 0.5201087206891786, "grad_norm": 1.031766808189202, "learning_rate": 2.5403727383366116e-06, "loss": 0.1383, "step": 5645 }, { "epoch": 0.5202008568664486, "grad_norm": 0.8389665943046382, "learning_rate": 2.5396110529208066e-06, "loss": 0.1141, "step": 5646 }, { "epoch": 0.5202929930437186, "grad_norm": 0.9395740207321802, "learning_rate": 2.53884936382711e-06, "loss": 0.1369, "step": 5647 }, { "epoch": 0.5203851292209887, "grad_norm": 0.9084013073081592, "learning_rate": 2.538087671126247e-06, "loss": 0.1406, "step": 5648 }, { "epoch": 0.5204772653982587, "grad_norm": 0.9266640118622809, "learning_rate": 2.537325974888939e-06, "loss": 0.1386, "step": 5649 }, { "epoch": 0.5205694015755287, "grad_norm": 1.0315770750585445, "learning_rate": 2.5365642751859103e-06, "loss": 0.1348, "step": 5650 }, { "epoch": 0.5206615377527987, "grad_norm": 0.9054680007028475, "learning_rate": 2.5358025720878847e-06, "loss": 0.135, "step": 5651 }, { "epoch": 0.5207536739300687, "grad_norm": 0.9740550288932809, "learning_rate": 2.535040865665587e-06, "loss": 0.1449, "step": 5652 }, { "epoch": 0.5208458101073387, "grad_norm": 0.8951997050564433, "learning_rate": 2.53427915598974e-06, "loss": 0.1393, "step": 5653 }, { "epoch": 0.5209379462846087, "grad_norm": 0.8907261864130697, "learning_rate": 2.5335174431310705e-06, "loss": 0.1392, "step": 5654 }, { "epoch": 0.5210300824618787, "grad_norm": 0.9414523754070911, "learning_rate": 2.532755727160302e-06, "loss": 0.1455, "step": 5655 }, { "epoch": 0.5211222186391486, "grad_norm": 0.9540517476228871, "learning_rate": 2.5319940081481612e-06, "loss": 0.1235, "step": 5656 }, { "epoch": 0.5212143548164186, "grad_norm": 0.9954834340666702, "learning_rate": 2.531232286165374e-06, "loss": 0.1438, "step": 5657 }, { "epoch": 0.5213064909936886, "grad_norm": 0.8970096073321792, "learning_rate": 2.530470561282665e-06, "loss": 0.1308, "step": 5658 }, { "epoch": 0.5213986271709586, "grad_norm": 0.9058477790180682, "learning_rate": 2.5297088335707607e-06, "loss": 0.1461, "step": 5659 }, { "epoch": 0.5214907633482286, "grad_norm": 0.9711778108276775, "learning_rate": 2.5289471031003894e-06, "loss": 0.1396, "step": 5660 }, { "epoch": 0.5215828995254986, "grad_norm": 0.8543618205002567, "learning_rate": 2.528185369942275e-06, "loss": 0.1323, "step": 5661 }, { "epoch": 0.5216750357027687, "grad_norm": 0.9505400443417231, "learning_rate": 2.5274236341671464e-06, "loss": 0.1437, "step": 5662 }, { "epoch": 0.5217671718800387, "grad_norm": 0.9705759551536418, "learning_rate": 2.5266618958457305e-06, "loss": 0.1501, "step": 5663 }, { "epoch": 0.5218593080573087, "grad_norm": 0.9286699162670864, "learning_rate": 2.525900155048755e-06, "loss": 0.1351, "step": 5664 }, { "epoch": 0.5219514442345787, "grad_norm": 0.9460951212975731, "learning_rate": 2.525138411846947e-06, "loss": 0.155, "step": 5665 }, { "epoch": 0.5220435804118487, "grad_norm": 0.9521125076330914, "learning_rate": 2.524376666311035e-06, "loss": 0.1552, "step": 5666 }, { "epoch": 0.5221357165891187, "grad_norm": 0.9052726980091685, "learning_rate": 2.523614918511746e-06, "loss": 0.1253, "step": 5667 }, { "epoch": 0.5222278527663887, "grad_norm": 0.9223530508604543, "learning_rate": 2.5228531685198105e-06, "loss": 0.1265, "step": 5668 }, { "epoch": 0.5223199889436587, "grad_norm": 0.9534027425880114, "learning_rate": 2.522091416405955e-06, "loss": 0.1339, "step": 5669 }, { "epoch": 0.5224121251209287, "grad_norm": 0.9757261885463333, "learning_rate": 2.5213296622409094e-06, "loss": 0.1418, "step": 5670 }, { "epoch": 0.5225042612981987, "grad_norm": 0.948753953135261, "learning_rate": 2.5205679060954025e-06, "loss": 0.1439, "step": 5671 }, { "epoch": 0.5225963974754687, "grad_norm": 0.9326818741941446, "learning_rate": 2.5198061480401623e-06, "loss": 0.1449, "step": 5672 }, { "epoch": 0.5226885336527387, "grad_norm": 0.9363994286992702, "learning_rate": 2.519044388145921e-06, "loss": 0.1326, "step": 5673 }, { "epoch": 0.5227806698300087, "grad_norm": 0.9856542056580295, "learning_rate": 2.5182826264834046e-06, "loss": 0.1359, "step": 5674 }, { "epoch": 0.5228728060072788, "grad_norm": 1.034901023201578, "learning_rate": 2.5175208631233454e-06, "loss": 0.145, "step": 5675 }, { "epoch": 0.5229649421845488, "grad_norm": 0.9040002234718671, "learning_rate": 2.516759098136472e-06, "loss": 0.1362, "step": 5676 }, { "epoch": 0.5230570783618188, "grad_norm": 0.9295191125870893, "learning_rate": 2.515997331593514e-06, "loss": 0.1458, "step": 5677 }, { "epoch": 0.5231492145390888, "grad_norm": 0.9049169701368106, "learning_rate": 2.5152355635652027e-06, "loss": 0.137, "step": 5678 }, { "epoch": 0.5232413507163588, "grad_norm": 0.9557392345266481, "learning_rate": 2.5144737941222673e-06, "loss": 0.1471, "step": 5679 }, { "epoch": 0.5233334868936288, "grad_norm": 0.929502828589841, "learning_rate": 2.513712023335438e-06, "loss": 0.1413, "step": 5680 }, { "epoch": 0.5234256230708988, "grad_norm": 0.9097049347259758, "learning_rate": 2.512950251275447e-06, "loss": 0.1347, "step": 5681 }, { "epoch": 0.5235177592481688, "grad_norm": 0.9202841127962849, "learning_rate": 2.512188478013023e-06, "loss": 0.1366, "step": 5682 }, { "epoch": 0.5236098954254388, "grad_norm": 0.8929983406529515, "learning_rate": 2.5114267036188975e-06, "loss": 0.13, "step": 5683 }, { "epoch": 0.5237020316027088, "grad_norm": 0.9686069240868677, "learning_rate": 2.510664928163802e-06, "loss": 0.1517, "step": 5684 }, { "epoch": 0.5237941677799788, "grad_norm": 0.8428080692138286, "learning_rate": 2.5099031517184665e-06, "loss": 0.1235, "step": 5685 }, { "epoch": 0.5238863039572488, "grad_norm": 0.8988271287258455, "learning_rate": 2.509141374353622e-06, "loss": 0.1339, "step": 5686 }, { "epoch": 0.5239784401345188, "grad_norm": 0.9611827066606157, "learning_rate": 2.50837959614e-06, "loss": 0.1489, "step": 5687 }, { "epoch": 0.5240705763117888, "grad_norm": 0.9468591677252746, "learning_rate": 2.5076178171483312e-06, "loss": 0.1415, "step": 5688 }, { "epoch": 0.5241627124890589, "grad_norm": 0.9603892814379588, "learning_rate": 2.506856037449348e-06, "loss": 0.14, "step": 5689 }, { "epoch": 0.5242548486663289, "grad_norm": 0.9746388238990209, "learning_rate": 2.50609425711378e-06, "loss": 0.1549, "step": 5690 }, { "epoch": 0.5243469848435989, "grad_norm": 0.9676253072065032, "learning_rate": 2.505332476212361e-06, "loss": 0.1547, "step": 5691 }, { "epoch": 0.5244391210208689, "grad_norm": 0.9193704129647952, "learning_rate": 2.50457069481582e-06, "loss": 0.1412, "step": 5692 }, { "epoch": 0.5245312571981389, "grad_norm": 0.8835165046226858, "learning_rate": 2.50380891299489e-06, "loss": 0.14, "step": 5693 }, { "epoch": 0.5246233933754089, "grad_norm": 0.8897949239988316, "learning_rate": 2.503047130820302e-06, "loss": 0.1305, "step": 5694 }, { "epoch": 0.5247155295526789, "grad_norm": 0.8582482232047172, "learning_rate": 2.5022853483627876e-06, "loss": 0.1291, "step": 5695 }, { "epoch": 0.5248076657299489, "grad_norm": 0.9377699199181725, "learning_rate": 2.5015235656930774e-06, "loss": 0.1468, "step": 5696 }, { "epoch": 0.5248998019072189, "grad_norm": 0.9298891282148648, "learning_rate": 2.500761782881905e-06, "loss": 0.1402, "step": 5697 }, { "epoch": 0.5249919380844889, "grad_norm": 0.8586736938601531, "learning_rate": 2.5e-06, "loss": 0.1319, "step": 5698 }, { "epoch": 0.5250840742617588, "grad_norm": 0.856179840256379, "learning_rate": 2.499238217118095e-06, "loss": 0.1341, "step": 5699 }, { "epoch": 0.5251762104390288, "grad_norm": 0.918400983382606, "learning_rate": 2.498476434306923e-06, "loss": 0.1414, "step": 5700 }, { "epoch": 0.5252683466162988, "grad_norm": 0.8617014174835537, "learning_rate": 2.4977146516372137e-06, "loss": 0.135, "step": 5701 }, { "epoch": 0.525360482793569, "grad_norm": 0.9095148426558132, "learning_rate": 2.496952869179699e-06, "loss": 0.1414, "step": 5702 }, { "epoch": 0.525452618970839, "grad_norm": 0.9003434006030184, "learning_rate": 2.4961910870051105e-06, "loss": 0.1304, "step": 5703 }, { "epoch": 0.5255447551481089, "grad_norm": 0.9417961533337451, "learning_rate": 2.49542930518418e-06, "loss": 0.1421, "step": 5704 }, { "epoch": 0.5256368913253789, "grad_norm": 0.9417914777947627, "learning_rate": 2.49466752378764e-06, "loss": 0.1425, "step": 5705 }, { "epoch": 0.5257290275026489, "grad_norm": 0.8897487906201332, "learning_rate": 2.4939057428862203e-06, "loss": 0.1284, "step": 5706 }, { "epoch": 0.5258211636799189, "grad_norm": 0.8457401884584623, "learning_rate": 2.4931439625506522e-06, "loss": 0.1276, "step": 5707 }, { "epoch": 0.5259132998571889, "grad_norm": 0.9013953147336925, "learning_rate": 2.4923821828516688e-06, "loss": 0.1426, "step": 5708 }, { "epoch": 0.5260054360344589, "grad_norm": 0.8852735984465513, "learning_rate": 2.491620403860001e-06, "loss": 0.1377, "step": 5709 }, { "epoch": 0.5260975722117289, "grad_norm": 0.9107949064957632, "learning_rate": 2.4908586256463788e-06, "loss": 0.1482, "step": 5710 }, { "epoch": 0.5261897083889989, "grad_norm": 0.8745259986185256, "learning_rate": 2.4900968482815344e-06, "loss": 0.1391, "step": 5711 }, { "epoch": 0.5262818445662689, "grad_norm": 0.8978816508895493, "learning_rate": 2.4893350718361984e-06, "loss": 0.1323, "step": 5712 }, { "epoch": 0.5263739807435389, "grad_norm": 0.9371818854858208, "learning_rate": 2.488573296381103e-06, "loss": 0.1392, "step": 5713 }, { "epoch": 0.5264661169208089, "grad_norm": 0.8768828375830369, "learning_rate": 2.487811521986978e-06, "loss": 0.1366, "step": 5714 }, { "epoch": 0.5265582530980789, "grad_norm": 0.9975475708282623, "learning_rate": 2.4870497487245534e-06, "loss": 0.1469, "step": 5715 }, { "epoch": 0.526650389275349, "grad_norm": 0.9416496571228281, "learning_rate": 2.486287976664562e-06, "loss": 0.1516, "step": 5716 }, { "epoch": 0.526742525452619, "grad_norm": 0.8976130137898136, "learning_rate": 2.485526205877734e-06, "loss": 0.1329, "step": 5717 }, { "epoch": 0.526834661629889, "grad_norm": 0.9229030102050523, "learning_rate": 2.484764436434798e-06, "loss": 0.1435, "step": 5718 }, { "epoch": 0.526926797807159, "grad_norm": 0.848994540276747, "learning_rate": 2.4840026684064867e-06, "loss": 0.1249, "step": 5719 }, { "epoch": 0.527018933984429, "grad_norm": 0.9202350631334453, "learning_rate": 2.4832409018635283e-06, "loss": 0.1428, "step": 5720 }, { "epoch": 0.527111070161699, "grad_norm": 0.938200837146183, "learning_rate": 2.4824791368766555e-06, "loss": 0.1412, "step": 5721 }, { "epoch": 0.527203206338969, "grad_norm": 0.9183623201693331, "learning_rate": 2.4817173735165958e-06, "loss": 0.1406, "step": 5722 }, { "epoch": 0.527295342516239, "grad_norm": 0.9091326879662819, "learning_rate": 2.4809556118540795e-06, "loss": 0.1333, "step": 5723 }, { "epoch": 0.527387478693509, "grad_norm": 0.8675851743483034, "learning_rate": 2.4801938519598372e-06, "loss": 0.1201, "step": 5724 }, { "epoch": 0.527479614870779, "grad_norm": 0.9000877408453869, "learning_rate": 2.4794320939045988e-06, "loss": 0.1418, "step": 5725 }, { "epoch": 0.527571751048049, "grad_norm": 0.8474851554049467, "learning_rate": 2.4786703377590914e-06, "loss": 0.1314, "step": 5726 }, { "epoch": 0.527663887225319, "grad_norm": 0.9530774213615841, "learning_rate": 2.4779085835940457e-06, "loss": 0.1501, "step": 5727 }, { "epoch": 0.527756023402589, "grad_norm": 0.8946042244381394, "learning_rate": 2.47714683148019e-06, "loss": 0.1368, "step": 5728 }, { "epoch": 0.527848159579859, "grad_norm": 0.9488184152086155, "learning_rate": 2.476385081488254e-06, "loss": 0.1416, "step": 5729 }, { "epoch": 0.5279402957571291, "grad_norm": 0.8769562906327312, "learning_rate": 2.4756233336889663e-06, "loss": 0.1331, "step": 5730 }, { "epoch": 0.5280324319343991, "grad_norm": 0.960488041104247, "learning_rate": 2.474861588153054e-06, "loss": 0.1434, "step": 5731 }, { "epoch": 0.5281245681116691, "grad_norm": 0.9442539249540435, "learning_rate": 2.4740998449512456e-06, "loss": 0.1405, "step": 5732 }, { "epoch": 0.5282167042889391, "grad_norm": 1.017322220614333, "learning_rate": 2.4733381041542695e-06, "loss": 0.1442, "step": 5733 }, { "epoch": 0.5283088404662091, "grad_norm": 0.9558560440845575, "learning_rate": 2.4725763658328544e-06, "loss": 0.128, "step": 5734 }, { "epoch": 0.5284009766434791, "grad_norm": 0.9552439366518181, "learning_rate": 2.471814630057726e-06, "loss": 0.1368, "step": 5735 }, { "epoch": 0.5284931128207491, "grad_norm": 0.9475504293116053, "learning_rate": 2.4710528968996114e-06, "loss": 0.149, "step": 5736 }, { "epoch": 0.5285852489980191, "grad_norm": 0.9358738909776354, "learning_rate": 2.4702911664292397e-06, "loss": 0.1365, "step": 5737 }, { "epoch": 0.5286773851752891, "grad_norm": 0.9334724145108257, "learning_rate": 2.469529438717336e-06, "loss": 0.1373, "step": 5738 }, { "epoch": 0.5287695213525591, "grad_norm": 0.8697872053181739, "learning_rate": 2.4687677138346265e-06, "loss": 0.137, "step": 5739 }, { "epoch": 0.528861657529829, "grad_norm": 1.0230475394744452, "learning_rate": 2.468005991851839e-06, "loss": 0.1547, "step": 5740 }, { "epoch": 0.528953793707099, "grad_norm": 0.9182793634858509, "learning_rate": 2.467244272839698e-06, "loss": 0.1402, "step": 5741 }, { "epoch": 0.529045929884369, "grad_norm": 0.8635610908929247, "learning_rate": 2.4664825568689303e-06, "loss": 0.1316, "step": 5742 }, { "epoch": 0.5291380660616392, "grad_norm": 0.9238816088041331, "learning_rate": 2.4657208440102607e-06, "loss": 0.1346, "step": 5743 }, { "epoch": 0.5292302022389092, "grad_norm": 0.9603841203290647, "learning_rate": 2.464959134334414e-06, "loss": 0.1398, "step": 5744 }, { "epoch": 0.5293223384161792, "grad_norm": 0.9691785358952262, "learning_rate": 2.4641974279121157e-06, "loss": 0.1338, "step": 5745 }, { "epoch": 0.5294144745934491, "grad_norm": 0.9811825017453772, "learning_rate": 2.463435724814091e-06, "loss": 0.1363, "step": 5746 }, { "epoch": 0.5295066107707191, "grad_norm": 0.9089320567911033, "learning_rate": 2.4626740251110615e-06, "loss": 0.1451, "step": 5747 }, { "epoch": 0.5295987469479891, "grad_norm": 0.9108691062317888, "learning_rate": 2.461912328873754e-06, "loss": 0.1357, "step": 5748 }, { "epoch": 0.5296908831252591, "grad_norm": 0.9259190835186841, "learning_rate": 2.46115063617289e-06, "loss": 0.1492, "step": 5749 }, { "epoch": 0.5297830193025291, "grad_norm": 0.9615169324748283, "learning_rate": 2.4603889470791946e-06, "loss": 0.128, "step": 5750 }, { "epoch": 0.5298751554797991, "grad_norm": 0.9546828640622951, "learning_rate": 2.4596272616633892e-06, "loss": 0.1339, "step": 5751 }, { "epoch": 0.5299672916570691, "grad_norm": 0.9448443537367963, "learning_rate": 2.4588655799961968e-06, "loss": 0.1365, "step": 5752 }, { "epoch": 0.5300594278343391, "grad_norm": 0.9490782504336478, "learning_rate": 2.45810390214834e-06, "loss": 0.1388, "step": 5753 }, { "epoch": 0.5301515640116091, "grad_norm": 0.9427353748495464, "learning_rate": 2.4573422281905405e-06, "loss": 0.1451, "step": 5754 }, { "epoch": 0.5302437001888791, "grad_norm": 0.9010459704299345, "learning_rate": 2.456580558193519e-06, "loss": 0.1378, "step": 5755 }, { "epoch": 0.5303358363661491, "grad_norm": 0.9586692997882068, "learning_rate": 2.4558188922279977e-06, "loss": 0.147, "step": 5756 }, { "epoch": 0.5304279725434192, "grad_norm": 1.0038167732871943, "learning_rate": 2.4550572303646965e-06, "loss": 0.155, "step": 5757 }, { "epoch": 0.5305201087206892, "grad_norm": 0.9211631756539981, "learning_rate": 2.454295572674337e-06, "loss": 0.14, "step": 5758 }, { "epoch": 0.5306122448979592, "grad_norm": 1.0000059466636613, "learning_rate": 2.453533919227639e-06, "loss": 0.1378, "step": 5759 }, { "epoch": 0.5307043810752292, "grad_norm": 0.8958358231178939, "learning_rate": 2.4527722700953205e-06, "loss": 0.1348, "step": 5760 }, { "epoch": 0.5307965172524992, "grad_norm": 0.9202617314939519, "learning_rate": 2.4520106253481025e-06, "loss": 0.1408, "step": 5761 }, { "epoch": 0.5308886534297692, "grad_norm": 0.9861823309901614, "learning_rate": 2.451248985056702e-06, "loss": 0.1464, "step": 5762 }, { "epoch": 0.5309807896070392, "grad_norm": 0.9958770704868112, "learning_rate": 2.4504873492918404e-06, "loss": 0.1252, "step": 5763 }, { "epoch": 0.5310729257843092, "grad_norm": 0.9492656613133602, "learning_rate": 2.449725718124233e-06, "loss": 0.1401, "step": 5764 }, { "epoch": 0.5311650619615792, "grad_norm": 0.9305337760930398, "learning_rate": 2.448964091624597e-06, "loss": 0.1383, "step": 5765 }, { "epoch": 0.5312571981388492, "grad_norm": 0.9418538276706124, "learning_rate": 2.4482024698636514e-06, "loss": 0.1334, "step": 5766 }, { "epoch": 0.5313493343161192, "grad_norm": 0.9056941479460245, "learning_rate": 2.4474408529121126e-06, "loss": 0.1379, "step": 5767 }, { "epoch": 0.5314414704933892, "grad_norm": 0.823476911768923, "learning_rate": 2.4466792408406953e-06, "loss": 0.1158, "step": 5768 }, { "epoch": 0.5315336066706592, "grad_norm": 0.8974601975603193, "learning_rate": 2.445917633720117e-06, "loss": 0.1366, "step": 5769 }, { "epoch": 0.5316257428479293, "grad_norm": 0.8852553284312088, "learning_rate": 2.4451560316210913e-06, "loss": 0.1298, "step": 5770 }, { "epoch": 0.5317178790251993, "grad_norm": 0.9577532072576866, "learning_rate": 2.444394434614335e-06, "loss": 0.1448, "step": 5771 }, { "epoch": 0.5318100152024693, "grad_norm": 0.9122078585444239, "learning_rate": 2.4436328427705612e-06, "loss": 0.1484, "step": 5772 }, { "epoch": 0.5319021513797393, "grad_norm": 0.9341072336908377, "learning_rate": 2.442871256160483e-06, "loss": 0.1463, "step": 5773 }, { "epoch": 0.5319942875570093, "grad_norm": 0.8852758499988086, "learning_rate": 2.442109674854815e-06, "loss": 0.1435, "step": 5774 }, { "epoch": 0.5320864237342793, "grad_norm": 0.8580463168445764, "learning_rate": 2.4413480989242718e-06, "loss": 0.1247, "step": 5775 }, { "epoch": 0.5321785599115493, "grad_norm": 0.9335327520464461, "learning_rate": 2.440586528439562e-06, "loss": 0.1404, "step": 5776 }, { "epoch": 0.5322706960888193, "grad_norm": 0.8745349837111808, "learning_rate": 2.4398249634713996e-06, "loss": 0.1332, "step": 5777 }, { "epoch": 0.5323628322660893, "grad_norm": 0.8252889666092439, "learning_rate": 2.4390634040904965e-06, "loss": 0.1182, "step": 5778 }, { "epoch": 0.5324549684433593, "grad_norm": 0.9157329631624775, "learning_rate": 2.4383018503675633e-06, "loss": 0.1345, "step": 5779 }, { "epoch": 0.5325471046206293, "grad_norm": 0.8603655816644842, "learning_rate": 2.437540302373309e-06, "loss": 0.125, "step": 5780 }, { "epoch": 0.5326392407978993, "grad_norm": 0.9038892694652924, "learning_rate": 2.4367787601784446e-06, "loss": 0.1371, "step": 5781 }, { "epoch": 0.5327313769751693, "grad_norm": 0.92831194618531, "learning_rate": 2.4360172238536787e-06, "loss": 0.1364, "step": 5782 }, { "epoch": 0.5328235131524393, "grad_norm": 0.9846067383086416, "learning_rate": 2.435255693469721e-06, "loss": 0.1423, "step": 5783 }, { "epoch": 0.5329156493297094, "grad_norm": 0.990724526005928, "learning_rate": 2.4344941690972797e-06, "loss": 0.1498, "step": 5784 }, { "epoch": 0.5330077855069794, "grad_norm": 0.9493770573022826, "learning_rate": 2.4337326508070604e-06, "loss": 0.139, "step": 5785 }, { "epoch": 0.5330999216842494, "grad_norm": 0.9201553217830986, "learning_rate": 2.4329711386697726e-06, "loss": 0.1298, "step": 5786 }, { "epoch": 0.5331920578615194, "grad_norm": 0.8880673033056966, "learning_rate": 2.432209632756121e-06, "loss": 0.1489, "step": 5787 }, { "epoch": 0.5332841940387893, "grad_norm": 0.9510070687868549, "learning_rate": 2.4314481331368133e-06, "loss": 0.135, "step": 5788 }, { "epoch": 0.5333763302160593, "grad_norm": 0.8694595494903921, "learning_rate": 2.430686639882554e-06, "loss": 0.132, "step": 5789 }, { "epoch": 0.5334684663933293, "grad_norm": 0.881862571605303, "learning_rate": 2.429925153064046e-06, "loss": 0.1237, "step": 5790 }, { "epoch": 0.5335606025705993, "grad_norm": 0.9202894290372889, "learning_rate": 2.4291636727519966e-06, "loss": 0.1287, "step": 5791 }, { "epoch": 0.5336527387478693, "grad_norm": 0.8795110496314976, "learning_rate": 2.428402199017108e-06, "loss": 0.1265, "step": 5792 }, { "epoch": 0.5337448749251393, "grad_norm": 0.973557854750666, "learning_rate": 2.4276407319300815e-06, "loss": 0.1515, "step": 5793 }, { "epoch": 0.5338370111024093, "grad_norm": 0.9241824971544353, "learning_rate": 2.4268792715616217e-06, "loss": 0.1386, "step": 5794 }, { "epoch": 0.5339291472796793, "grad_norm": 0.9019945526996254, "learning_rate": 2.4261178179824287e-06, "loss": 0.1312, "step": 5795 }, { "epoch": 0.5340212834569493, "grad_norm": 0.8982120470146486, "learning_rate": 2.4253563712632057e-06, "loss": 0.1329, "step": 5796 }, { "epoch": 0.5341134196342193, "grad_norm": 0.8751533441546493, "learning_rate": 2.4245949314746506e-06, "loss": 0.1367, "step": 5797 }, { "epoch": 0.5342055558114894, "grad_norm": 0.9530832980103735, "learning_rate": 2.423833498687464e-06, "loss": 0.1374, "step": 5798 }, { "epoch": 0.5342976919887594, "grad_norm": 0.9553902470626439, "learning_rate": 2.423072072972346e-06, "loss": 0.1346, "step": 5799 }, { "epoch": 0.5343898281660294, "grad_norm": 0.9146688205607726, "learning_rate": 2.4223106543999943e-06, "loss": 0.1378, "step": 5800 }, { "epoch": 0.5344819643432994, "grad_norm": 0.8880858853753428, "learning_rate": 2.4215492430411057e-06, "loss": 0.1275, "step": 5801 }, { "epoch": 0.5345741005205694, "grad_norm": 0.9299530782474574, "learning_rate": 2.4207878389663794e-06, "loss": 0.1372, "step": 5802 }, { "epoch": 0.5346662366978394, "grad_norm": 1.0145172996220377, "learning_rate": 2.4200264422465096e-06, "loss": 0.1415, "step": 5803 }, { "epoch": 0.5347583728751094, "grad_norm": 0.9577851127728939, "learning_rate": 2.4192650529521948e-06, "loss": 0.131, "step": 5804 }, { "epoch": 0.5348505090523794, "grad_norm": 0.8902962613744783, "learning_rate": 2.418503671154128e-06, "loss": 0.1416, "step": 5805 }, { "epoch": 0.5349426452296494, "grad_norm": 0.8831433355746883, "learning_rate": 2.417742296923003e-06, "loss": 0.1298, "step": 5806 }, { "epoch": 0.5350347814069194, "grad_norm": 0.9091113886860835, "learning_rate": 2.4169809303295157e-06, "loss": 0.1372, "step": 5807 }, { "epoch": 0.5351269175841894, "grad_norm": 0.920230718279897, "learning_rate": 2.4162195714443584e-06, "loss": 0.1336, "step": 5808 }, { "epoch": 0.5352190537614594, "grad_norm": 0.9225664409925763, "learning_rate": 2.4154582203382216e-06, "loss": 0.1377, "step": 5809 }, { "epoch": 0.5353111899387294, "grad_norm": 0.8214010998632829, "learning_rate": 2.4146968770817988e-06, "loss": 0.1252, "step": 5810 }, { "epoch": 0.5354033261159995, "grad_norm": 0.9037345408786586, "learning_rate": 2.41393554174578e-06, "loss": 0.1499, "step": 5811 }, { "epoch": 0.5354954622932695, "grad_norm": 0.8879279953804324, "learning_rate": 2.4131742144008557e-06, "loss": 0.1366, "step": 5812 }, { "epoch": 0.5355875984705395, "grad_norm": 0.8643469307323431, "learning_rate": 2.4124128951177146e-06, "loss": 0.119, "step": 5813 }, { "epoch": 0.5356797346478095, "grad_norm": 0.9000012611078515, "learning_rate": 2.4116515839670456e-06, "loss": 0.1358, "step": 5814 }, { "epoch": 0.5357718708250795, "grad_norm": 0.9397044907955544, "learning_rate": 2.4108902810195367e-06, "loss": 0.1468, "step": 5815 }, { "epoch": 0.5358640070023495, "grad_norm": 0.915825164357773, "learning_rate": 2.4101289863458744e-06, "loss": 0.1255, "step": 5816 }, { "epoch": 0.5359561431796195, "grad_norm": 0.8662962491574562, "learning_rate": 2.409367700016746e-06, "loss": 0.1344, "step": 5817 }, { "epoch": 0.5360482793568895, "grad_norm": 0.9247751195485957, "learning_rate": 2.4086064221028365e-06, "loss": 0.1302, "step": 5818 }, { "epoch": 0.5361404155341595, "grad_norm": 0.9472673515439451, "learning_rate": 2.40784515267483e-06, "loss": 0.1407, "step": 5819 }, { "epoch": 0.5362325517114295, "grad_norm": 0.9411408748162816, "learning_rate": 2.407083891803411e-06, "loss": 0.1394, "step": 5820 }, { "epoch": 0.5363246878886995, "grad_norm": 0.9255154027937263, "learning_rate": 2.4063226395592635e-06, "loss": 0.1358, "step": 5821 }, { "epoch": 0.5364168240659695, "grad_norm": 0.9180421838847347, "learning_rate": 2.4055613960130676e-06, "loss": 0.1376, "step": 5822 }, { "epoch": 0.5365089602432395, "grad_norm": 0.8784816828161529, "learning_rate": 2.4048001612355072e-06, "loss": 0.1375, "step": 5823 }, { "epoch": 0.5366010964205095, "grad_norm": 0.854986124528438, "learning_rate": 2.4040389352972616e-06, "loss": 0.1345, "step": 5824 }, { "epoch": 0.5366932325977796, "grad_norm": 0.8877816767972053, "learning_rate": 2.403277718269012e-06, "loss": 0.1287, "step": 5825 }, { "epoch": 0.5367853687750496, "grad_norm": 0.8756257399903816, "learning_rate": 2.4025165102214363e-06, "loss": 0.1274, "step": 5826 }, { "epoch": 0.5368775049523196, "grad_norm": 0.9513688328099061, "learning_rate": 2.4017553112252123e-06, "loss": 0.1427, "step": 5827 }, { "epoch": 0.5369696411295896, "grad_norm": 0.9443193310474314, "learning_rate": 2.400994121351019e-06, "loss": 0.1422, "step": 5828 }, { "epoch": 0.5370617773068596, "grad_norm": 0.8714929349419318, "learning_rate": 2.4002329406695325e-06, "loss": 0.135, "step": 5829 }, { "epoch": 0.5371539134841296, "grad_norm": 0.9829188571248488, "learning_rate": 2.3994717692514274e-06, "loss": 0.1479, "step": 5830 }, { "epoch": 0.5372460496613995, "grad_norm": 0.8778193951704442, "learning_rate": 2.3987106071673797e-06, "loss": 0.1339, "step": 5831 }, { "epoch": 0.5373381858386695, "grad_norm": 0.8295131686996187, "learning_rate": 2.3979494544880625e-06, "loss": 0.117, "step": 5832 }, { "epoch": 0.5374303220159395, "grad_norm": 0.9603954748107639, "learning_rate": 2.3971883112841505e-06, "loss": 0.1481, "step": 5833 }, { "epoch": 0.5375224581932095, "grad_norm": 0.9293362890021563, "learning_rate": 2.3964271776263146e-06, "loss": 0.1448, "step": 5834 }, { "epoch": 0.5376145943704795, "grad_norm": 0.9414302795244884, "learning_rate": 2.3956660535852254e-06, "loss": 0.1403, "step": 5835 }, { "epoch": 0.5377067305477495, "grad_norm": 0.9002162943398508, "learning_rate": 2.3949049392315555e-06, "loss": 0.1313, "step": 5836 }, { "epoch": 0.5377988667250195, "grad_norm": 0.8896376778803587, "learning_rate": 2.394143834635973e-06, "loss": 0.132, "step": 5837 }, { "epoch": 0.5378910029022896, "grad_norm": 0.9191022468212096, "learning_rate": 2.3933827398691464e-06, "loss": 0.1291, "step": 5838 }, { "epoch": 0.5379831390795596, "grad_norm": 0.9652932862586016, "learning_rate": 2.3926216550017445e-06, "loss": 0.1402, "step": 5839 }, { "epoch": 0.5380752752568296, "grad_norm": 0.8928366891316858, "learning_rate": 2.3918605801044325e-06, "loss": 0.1403, "step": 5840 }, { "epoch": 0.5381674114340996, "grad_norm": 0.9072163582373372, "learning_rate": 2.3910995152478786e-06, "loss": 0.1399, "step": 5841 }, { "epoch": 0.5382595476113696, "grad_norm": 1.5408775770789918, "learning_rate": 2.3903384605027462e-06, "loss": 0.1339, "step": 5842 }, { "epoch": 0.5383516837886396, "grad_norm": 0.9626327273310918, "learning_rate": 2.3895774159396982e-06, "loss": 0.1425, "step": 5843 }, { "epoch": 0.5384438199659096, "grad_norm": 0.8853401330370692, "learning_rate": 2.3888163816294e-06, "loss": 0.1345, "step": 5844 }, { "epoch": 0.5385359561431796, "grad_norm": 0.8572199289738917, "learning_rate": 2.3880553576425124e-06, "loss": 0.1313, "step": 5845 }, { "epoch": 0.5386280923204496, "grad_norm": 0.9431335769657753, "learning_rate": 2.3872943440496978e-06, "loss": 0.1472, "step": 5846 }, { "epoch": 0.5387202284977196, "grad_norm": 0.8765177735234764, "learning_rate": 2.386533340921615e-06, "loss": 0.1413, "step": 5847 }, { "epoch": 0.5388123646749896, "grad_norm": 1.0028906351410283, "learning_rate": 2.385772348328923e-06, "loss": 0.1584, "step": 5848 }, { "epoch": 0.5389045008522596, "grad_norm": 0.9002792229676244, "learning_rate": 2.385011366342281e-06, "loss": 0.1407, "step": 5849 }, { "epoch": 0.5389966370295296, "grad_norm": 0.8770503850906126, "learning_rate": 2.3842503950323473e-06, "loss": 0.136, "step": 5850 }, { "epoch": 0.5390887732067996, "grad_norm": 0.9336646528001852, "learning_rate": 2.383489434469775e-06, "loss": 0.1426, "step": 5851 }, { "epoch": 0.5391809093840697, "grad_norm": 0.9010134456089555, "learning_rate": 2.382728484725222e-06, "loss": 0.1341, "step": 5852 }, { "epoch": 0.5392730455613397, "grad_norm": 0.8942998501108559, "learning_rate": 2.3819675458693422e-06, "loss": 0.1409, "step": 5853 }, { "epoch": 0.5393651817386097, "grad_norm": 0.9340316091553998, "learning_rate": 2.381206617972789e-06, "loss": 0.1444, "step": 5854 }, { "epoch": 0.5394573179158797, "grad_norm": 0.8876144334710793, "learning_rate": 2.3804457011062126e-06, "loss": 0.1398, "step": 5855 }, { "epoch": 0.5395494540931497, "grad_norm": 0.9016840240138915, "learning_rate": 2.3796847953402665e-06, "loss": 0.1225, "step": 5856 }, { "epoch": 0.5396415902704197, "grad_norm": 0.9181794340809388, "learning_rate": 2.3789239007455996e-06, "loss": 0.1405, "step": 5857 }, { "epoch": 0.5397337264476897, "grad_norm": 0.8881973608279808, "learning_rate": 2.3781630173928627e-06, "loss": 0.1347, "step": 5858 }, { "epoch": 0.5398258626249597, "grad_norm": 0.856762225089732, "learning_rate": 2.3774021453527023e-06, "loss": 0.1193, "step": 5859 }, { "epoch": 0.5399179988022297, "grad_norm": 0.9519632501690944, "learning_rate": 2.3766412846957652e-06, "loss": 0.1485, "step": 5860 }, { "epoch": 0.5400101349794997, "grad_norm": 0.9612225735542497, "learning_rate": 2.3758804354926986e-06, "loss": 0.1391, "step": 5861 }, { "epoch": 0.5401022711567697, "grad_norm": 0.934863489073019, "learning_rate": 2.375119597814147e-06, "loss": 0.1455, "step": 5862 }, { "epoch": 0.5401944073340397, "grad_norm": 0.8937423146747298, "learning_rate": 2.374358771730754e-06, "loss": 0.1285, "step": 5863 }, { "epoch": 0.5402865435113097, "grad_norm": 0.9940701675808745, "learning_rate": 2.3735979573131626e-06, "loss": 0.147, "step": 5864 }, { "epoch": 0.5403786796885797, "grad_norm": 1.0060354786289987, "learning_rate": 2.372837154632014e-06, "loss": 0.1289, "step": 5865 }, { "epoch": 0.5404708158658498, "grad_norm": 0.9563270691350768, "learning_rate": 2.37207636375795e-06, "loss": 0.132, "step": 5866 }, { "epoch": 0.5405629520431198, "grad_norm": 0.9225113435435278, "learning_rate": 2.3713155847616095e-06, "loss": 0.1444, "step": 5867 }, { "epoch": 0.5406550882203898, "grad_norm": 0.9051134560491042, "learning_rate": 2.37055481771363e-06, "loss": 0.1257, "step": 5868 }, { "epoch": 0.5407472243976598, "grad_norm": 0.9125435043547829, "learning_rate": 2.3697940626846504e-06, "loss": 0.1366, "step": 5869 }, { "epoch": 0.5408393605749298, "grad_norm": 0.8827037590314208, "learning_rate": 2.369033319745306e-06, "loss": 0.1278, "step": 5870 }, { "epoch": 0.5409314967521998, "grad_norm": 0.9542353104247997, "learning_rate": 2.3682725889662316e-06, "loss": 0.1316, "step": 5871 }, { "epoch": 0.5410236329294698, "grad_norm": 0.9436897926471467, "learning_rate": 2.3675118704180614e-06, "loss": 0.1375, "step": 5872 }, { "epoch": 0.5411157691067398, "grad_norm": 0.9053198809341689, "learning_rate": 2.366751164171428e-06, "loss": 0.1433, "step": 5873 }, { "epoch": 0.5412079052840097, "grad_norm": 0.8602873340218734, "learning_rate": 2.3659904702969636e-06, "loss": 0.1339, "step": 5874 }, { "epoch": 0.5413000414612797, "grad_norm": 0.8998889070188997, "learning_rate": 2.3652297888653e-06, "loss": 0.1409, "step": 5875 }, { "epoch": 0.5413921776385497, "grad_norm": 0.9105525894251323, "learning_rate": 2.3644691199470628e-06, "loss": 0.1369, "step": 5876 }, { "epoch": 0.5414843138158197, "grad_norm": 0.9362291238439949, "learning_rate": 2.3637084636128836e-06, "loss": 0.1356, "step": 5877 }, { "epoch": 0.5415764499930897, "grad_norm": 0.8885859902523415, "learning_rate": 2.3629478199333873e-06, "loss": 0.1328, "step": 5878 }, { "epoch": 0.5416685861703598, "grad_norm": 0.9125123891199016, "learning_rate": 2.362187188979202e-06, "loss": 0.1368, "step": 5879 }, { "epoch": 0.5417607223476298, "grad_norm": 0.8937998412152254, "learning_rate": 2.3614265708209503e-06, "loss": 0.1324, "step": 5880 }, { "epoch": 0.5418528585248998, "grad_norm": 0.8888623370293548, "learning_rate": 2.360665965529256e-06, "loss": 0.1336, "step": 5881 }, { "epoch": 0.5419449947021698, "grad_norm": 0.9001269534353986, "learning_rate": 2.3599053731747424e-06, "loss": 0.1396, "step": 5882 }, { "epoch": 0.5420371308794398, "grad_norm": 0.9073225423258994, "learning_rate": 2.3591447938280304e-06, "loss": 0.1349, "step": 5883 }, { "epoch": 0.5421292670567098, "grad_norm": 0.9664393080376217, "learning_rate": 2.3583842275597382e-06, "loss": 0.1362, "step": 5884 }, { "epoch": 0.5422214032339798, "grad_norm": 0.9234357650375972, "learning_rate": 2.3576236744404866e-06, "loss": 0.1383, "step": 5885 }, { "epoch": 0.5423135394112498, "grad_norm": 0.8729256712212028, "learning_rate": 2.3568631345408912e-06, "loss": 0.1326, "step": 5886 }, { "epoch": 0.5424056755885198, "grad_norm": 0.9081475409204117, "learning_rate": 2.3561026079315707e-06, "loss": 0.1271, "step": 5887 }, { "epoch": 0.5424978117657898, "grad_norm": 0.9919399955510334, "learning_rate": 2.3553420946831377e-06, "loss": 0.159, "step": 5888 }, { "epoch": 0.5425899479430598, "grad_norm": 0.8465349151729697, "learning_rate": 2.3545815948662066e-06, "loss": 0.1246, "step": 5889 }, { "epoch": 0.5426820841203298, "grad_norm": 0.8814514667720759, "learning_rate": 2.3538211085513902e-06, "loss": 0.1258, "step": 5890 }, { "epoch": 0.5427742202975998, "grad_norm": 0.9320930510441189, "learning_rate": 2.3530606358093e-06, "loss": 0.1336, "step": 5891 }, { "epoch": 0.5428663564748698, "grad_norm": 0.8891625231779033, "learning_rate": 2.352300176710545e-06, "loss": 0.1367, "step": 5892 }, { "epoch": 0.5429584926521399, "grad_norm": 0.9387961274368829, "learning_rate": 2.351539731325735e-06, "loss": 0.1397, "step": 5893 }, { "epoch": 0.5430506288294099, "grad_norm": 0.8696939578101172, "learning_rate": 2.350779299725476e-06, "loss": 0.1364, "step": 5894 }, { "epoch": 0.5431427650066799, "grad_norm": 0.9477061229980223, "learning_rate": 2.3500188819803764e-06, "loss": 0.1488, "step": 5895 }, { "epoch": 0.5432349011839499, "grad_norm": 0.8300016678907538, "learning_rate": 2.3492584781610392e-06, "loss": 0.1199, "step": 5896 }, { "epoch": 0.5433270373612199, "grad_norm": 0.9676273385906484, "learning_rate": 2.3484980883380677e-06, "loss": 0.1389, "step": 5897 }, { "epoch": 0.5434191735384899, "grad_norm": 0.9146011666455297, "learning_rate": 2.347737712582066e-06, "loss": 0.1288, "step": 5898 }, { "epoch": 0.5435113097157599, "grad_norm": 0.8746026606193679, "learning_rate": 2.3469773509636346e-06, "loss": 0.1253, "step": 5899 }, { "epoch": 0.5436034458930299, "grad_norm": 0.9163120972409077, "learning_rate": 2.3462170035533713e-06, "loss": 0.1383, "step": 5900 }, { "epoch": 0.5436955820702999, "grad_norm": 0.9377392397159249, "learning_rate": 2.345456670421876e-06, "loss": 0.1444, "step": 5901 }, { "epoch": 0.5437877182475699, "grad_norm": 0.9564357005506579, "learning_rate": 2.3446963516397455e-06, "loss": 0.1432, "step": 5902 }, { "epoch": 0.5438798544248399, "grad_norm": 0.9273350082364941, "learning_rate": 2.3439360472775758e-06, "loss": 0.1398, "step": 5903 }, { "epoch": 0.5439719906021099, "grad_norm": 0.8985150989161702, "learning_rate": 2.3431757574059616e-06, "loss": 0.1324, "step": 5904 }, { "epoch": 0.5440641267793799, "grad_norm": 0.9057262190585165, "learning_rate": 2.342415482095494e-06, "loss": 0.1322, "step": 5905 }, { "epoch": 0.54415626295665, "grad_norm": 0.9487898862954096, "learning_rate": 2.341655221416766e-06, "loss": 0.1367, "step": 5906 }, { "epoch": 0.54424839913392, "grad_norm": 0.8907811006669535, "learning_rate": 2.3408949754403678e-06, "loss": 0.1257, "step": 5907 }, { "epoch": 0.54434053531119, "grad_norm": 0.8690356403332792, "learning_rate": 2.340134744236889e-06, "loss": 0.1194, "step": 5908 }, { "epoch": 0.54443267148846, "grad_norm": 0.9402466917106319, "learning_rate": 2.3393745278769163e-06, "loss": 0.1393, "step": 5909 }, { "epoch": 0.54452480766573, "grad_norm": 0.9079624440722254, "learning_rate": 2.3386143264310348e-06, "loss": 0.1338, "step": 5910 }, { "epoch": 0.544616943843, "grad_norm": 0.9249329243264749, "learning_rate": 2.3378541399698314e-06, "loss": 0.1367, "step": 5911 }, { "epoch": 0.54470908002027, "grad_norm": 0.9118627413475253, "learning_rate": 2.337093968563889e-06, "loss": 0.1267, "step": 5912 }, { "epoch": 0.54480121619754, "grad_norm": 0.8645824091337139, "learning_rate": 2.336333812283788e-06, "loss": 0.1214, "step": 5913 }, { "epoch": 0.54489335237481, "grad_norm": 0.8851023831702883, "learning_rate": 2.3355736712001107e-06, "loss": 0.1425, "step": 5914 }, { "epoch": 0.54498548855208, "grad_norm": 0.9789065324109237, "learning_rate": 2.3348135453834353e-06, "loss": 0.1477, "step": 5915 }, { "epoch": 0.54507762472935, "grad_norm": 0.8929699059776705, "learning_rate": 2.3340534349043407e-06, "loss": 0.1324, "step": 5916 }, { "epoch": 0.54516976090662, "grad_norm": 0.8525535061935612, "learning_rate": 2.3332933398334028e-06, "loss": 0.1348, "step": 5917 }, { "epoch": 0.54526189708389, "grad_norm": 0.9265971942479982, "learning_rate": 2.332533260241195e-06, "loss": 0.146, "step": 5918 }, { "epoch": 0.5453540332611599, "grad_norm": 0.8893191415358593, "learning_rate": 2.3317731961982926e-06, "loss": 0.1422, "step": 5919 }, { "epoch": 0.54544616943843, "grad_norm": 0.8715215907778809, "learning_rate": 2.331013147775268e-06, "loss": 0.1322, "step": 5920 }, { "epoch": 0.5455383056157, "grad_norm": 0.9232093592061419, "learning_rate": 2.3302531150426894e-06, "loss": 0.1514, "step": 5921 }, { "epoch": 0.54563044179297, "grad_norm": 0.9221326425086337, "learning_rate": 2.329493098071128e-06, "loss": 0.1377, "step": 5922 }, { "epoch": 0.54572257797024, "grad_norm": 0.8629100101924353, "learning_rate": 2.32873309693115e-06, "loss": 0.1292, "step": 5923 }, { "epoch": 0.54581471414751, "grad_norm": 0.8764601317643181, "learning_rate": 2.3279731116933235e-06, "loss": 0.1382, "step": 5924 }, { "epoch": 0.54590685032478, "grad_norm": 0.9527189795795429, "learning_rate": 2.327213142428212e-06, "loss": 0.1412, "step": 5925 }, { "epoch": 0.54599898650205, "grad_norm": 0.8514752428752124, "learning_rate": 2.326453189206378e-06, "loss": 0.137, "step": 5926 }, { "epoch": 0.54609112267932, "grad_norm": 0.8631535978765967, "learning_rate": 2.325693252098384e-06, "loss": 0.1302, "step": 5927 }, { "epoch": 0.54618325885659, "grad_norm": 0.9256537186435602, "learning_rate": 2.324933331174792e-06, "loss": 0.1399, "step": 5928 }, { "epoch": 0.54627539503386, "grad_norm": 0.9165258915845617, "learning_rate": 2.3241734265061573e-06, "loss": 0.134, "step": 5929 }, { "epoch": 0.54636753121113, "grad_norm": 0.9783289918459979, "learning_rate": 2.323413538163039e-06, "loss": 0.137, "step": 5930 }, { "epoch": 0.5464596673884, "grad_norm": 0.952969188227047, "learning_rate": 2.322653666215993e-06, "loss": 0.1499, "step": 5931 }, { "epoch": 0.54655180356567, "grad_norm": 0.8964130572884526, "learning_rate": 2.3218938107355727e-06, "loss": 0.1367, "step": 5932 }, { "epoch": 0.54664393974294, "grad_norm": 0.8954517651328167, "learning_rate": 2.3211339717923326e-06, "loss": 0.1226, "step": 5933 }, { "epoch": 0.5467360759202101, "grad_norm": 0.9005748837624358, "learning_rate": 2.320374149456822e-06, "loss": 0.1346, "step": 5934 }, { "epoch": 0.5468282120974801, "grad_norm": 0.9435439221997367, "learning_rate": 2.31961434379959e-06, "loss": 0.1442, "step": 5935 }, { "epoch": 0.5469203482747501, "grad_norm": 0.8486317533267757, "learning_rate": 2.3188545548911863e-06, "loss": 0.1176, "step": 5936 }, { "epoch": 0.5470124844520201, "grad_norm": 0.8973418783092466, "learning_rate": 2.3180947828021574e-06, "loss": 0.1255, "step": 5937 }, { "epoch": 0.5471046206292901, "grad_norm": 0.975856203440726, "learning_rate": 2.317335027603046e-06, "loss": 0.1382, "step": 5938 }, { "epoch": 0.5471967568065601, "grad_norm": 0.8448417831997888, "learning_rate": 2.3165752893643974e-06, "loss": 0.1218, "step": 5939 }, { "epoch": 0.5472888929838301, "grad_norm": 0.8631379001410238, "learning_rate": 2.315815568156753e-06, "loss": 0.1274, "step": 5940 }, { "epoch": 0.5473810291611001, "grad_norm": 0.8739460925822028, "learning_rate": 2.315055864050654e-06, "loss": 0.1343, "step": 5941 }, { "epoch": 0.5474731653383701, "grad_norm": 0.9056750460446128, "learning_rate": 2.314296177116637e-06, "loss": 0.1456, "step": 5942 }, { "epoch": 0.5475653015156401, "grad_norm": 0.8597999521946855, "learning_rate": 2.3135365074252393e-06, "loss": 0.1229, "step": 5943 }, { "epoch": 0.5476574376929101, "grad_norm": 0.9429380689039017, "learning_rate": 2.3127768550469977e-06, "loss": 0.1517, "step": 5944 }, { "epoch": 0.5477495738701801, "grad_norm": 0.9326983964366256, "learning_rate": 2.3120172200524456e-06, "loss": 0.1409, "step": 5945 }, { "epoch": 0.5478417100474501, "grad_norm": 0.9029183579785335, "learning_rate": 2.311257602512114e-06, "loss": 0.1375, "step": 5946 }, { "epoch": 0.5479338462247202, "grad_norm": 0.8405914319952673, "learning_rate": 2.310498002496535e-06, "loss": 0.125, "step": 5947 }, { "epoch": 0.5480259824019902, "grad_norm": 0.8963143530428416, "learning_rate": 2.309738420076236e-06, "loss": 0.1362, "step": 5948 }, { "epoch": 0.5481181185792602, "grad_norm": 0.9373725224516829, "learning_rate": 2.308978855321746e-06, "loss": 0.1381, "step": 5949 }, { "epoch": 0.5482102547565302, "grad_norm": 0.904479495310587, "learning_rate": 2.30821930830359e-06, "loss": 0.1344, "step": 5950 }, { "epoch": 0.5483023909338002, "grad_norm": 0.947211874283797, "learning_rate": 2.307459779092291e-06, "loss": 0.1391, "step": 5951 }, { "epoch": 0.5483945271110702, "grad_norm": 0.9137280527447117, "learning_rate": 2.306700267758373e-06, "loss": 0.1244, "step": 5952 }, { "epoch": 0.5484866632883402, "grad_norm": 0.887794582900808, "learning_rate": 2.3059407743723562e-06, "loss": 0.1236, "step": 5953 }, { "epoch": 0.5485787994656102, "grad_norm": 0.8957336178387945, "learning_rate": 2.305181299004758e-06, "loss": 0.1319, "step": 5954 }, { "epoch": 0.5486709356428802, "grad_norm": 0.9285491033008205, "learning_rate": 2.304421841726098e-06, "loss": 0.1375, "step": 5955 }, { "epoch": 0.5487630718201502, "grad_norm": 0.9000401889585357, "learning_rate": 2.303662402606891e-06, "loss": 0.1394, "step": 5956 }, { "epoch": 0.5488552079974202, "grad_norm": 0.8887392546394776, "learning_rate": 2.3029029817176513e-06, "loss": 0.1241, "step": 5957 }, { "epoch": 0.5489473441746902, "grad_norm": 0.9063498591456991, "learning_rate": 2.302143579128891e-06, "loss": 0.1452, "step": 5958 }, { "epoch": 0.5490394803519602, "grad_norm": 0.9179438604712449, "learning_rate": 2.30138419491112e-06, "loss": 0.1393, "step": 5959 }, { "epoch": 0.5491316165292301, "grad_norm": 0.9440096748055181, "learning_rate": 2.3006248291348483e-06, "loss": 0.1458, "step": 5960 }, { "epoch": 0.5492237527065003, "grad_norm": 0.8571329043484666, "learning_rate": 2.2998654818705824e-06, "loss": 0.1258, "step": 5961 }, { "epoch": 0.5493158888837703, "grad_norm": 0.974799513318454, "learning_rate": 2.2991061531888285e-06, "loss": 0.146, "step": 5962 }, { "epoch": 0.5494080250610403, "grad_norm": 0.9535479404809656, "learning_rate": 2.29834684316009e-06, "loss": 0.1387, "step": 5963 }, { "epoch": 0.5495001612383102, "grad_norm": 0.9528230339650521, "learning_rate": 2.297587551854868e-06, "loss": 0.1313, "step": 5964 }, { "epoch": 0.5495922974155802, "grad_norm": 0.9235402560277223, "learning_rate": 2.296828279343664e-06, "loss": 0.1291, "step": 5965 }, { "epoch": 0.5496844335928502, "grad_norm": 0.858766032670027, "learning_rate": 2.2960690256969774e-06, "loss": 0.1261, "step": 5966 }, { "epoch": 0.5497765697701202, "grad_norm": 1.0036102049390694, "learning_rate": 2.2953097909853018e-06, "loss": 0.15, "step": 5967 }, { "epoch": 0.5498687059473902, "grad_norm": 0.9197290368123798, "learning_rate": 2.294550575279135e-06, "loss": 0.1263, "step": 5968 }, { "epoch": 0.5499608421246602, "grad_norm": 0.9410944923888054, "learning_rate": 2.293791378648969e-06, "loss": 0.1267, "step": 5969 }, { "epoch": 0.5500529783019302, "grad_norm": 0.9187152074045628, "learning_rate": 2.2930322011652965e-06, "loss": 0.1332, "step": 5970 }, { "epoch": 0.5501451144792002, "grad_norm": 0.877662807921679, "learning_rate": 2.2922730428986057e-06, "loss": 0.1323, "step": 5971 }, { "epoch": 0.5502372506564702, "grad_norm": 0.9595621542293774, "learning_rate": 2.291513903919385e-06, "loss": 0.1587, "step": 5972 }, { "epoch": 0.5503293868337402, "grad_norm": 0.9391446872559864, "learning_rate": 2.2907547842981213e-06, "loss": 0.1322, "step": 5973 }, { "epoch": 0.5504215230110103, "grad_norm": 0.9539499349532112, "learning_rate": 2.289995684105299e-06, "loss": 0.1416, "step": 5974 }, { "epoch": 0.5505136591882803, "grad_norm": 0.9231926460585064, "learning_rate": 2.2892366034113988e-06, "loss": 0.1305, "step": 5975 }, { "epoch": 0.5506057953655503, "grad_norm": 0.9016548074632982, "learning_rate": 2.288477542286903e-06, "loss": 0.1392, "step": 5976 }, { "epoch": 0.5506979315428203, "grad_norm": 0.942515622309317, "learning_rate": 2.2877185008022896e-06, "loss": 0.143, "step": 5977 }, { "epoch": 0.5507900677200903, "grad_norm": 0.9349815447766366, "learning_rate": 2.2869594790280376e-06, "loss": 0.1456, "step": 5978 }, { "epoch": 0.5508822038973603, "grad_norm": 0.9109243804576193, "learning_rate": 2.2862004770346205e-06, "loss": 0.1407, "step": 5979 }, { "epoch": 0.5509743400746303, "grad_norm": 0.8404738991971081, "learning_rate": 2.285441494892511e-06, "loss": 0.1245, "step": 5980 }, { "epoch": 0.5510664762519003, "grad_norm": 0.9338626947780788, "learning_rate": 2.284682532672183e-06, "loss": 0.1445, "step": 5981 }, { "epoch": 0.5511586124291703, "grad_norm": 0.9020789497422819, "learning_rate": 2.2839235904441054e-06, "loss": 0.1344, "step": 5982 }, { "epoch": 0.5512507486064403, "grad_norm": 0.8908428591725923, "learning_rate": 2.2831646682787443e-06, "loss": 0.1343, "step": 5983 }, { "epoch": 0.5513428847837103, "grad_norm": 0.9079304182626624, "learning_rate": 2.282405766246568e-06, "loss": 0.1287, "step": 5984 }, { "epoch": 0.5514350209609803, "grad_norm": 0.8329410231373364, "learning_rate": 2.281646884418039e-06, "loss": 0.1135, "step": 5985 }, { "epoch": 0.5515271571382503, "grad_norm": 0.9381099953259999, "learning_rate": 2.280888022863621e-06, "loss": 0.1405, "step": 5986 }, { "epoch": 0.5516192933155203, "grad_norm": 0.8513722262685598, "learning_rate": 2.2801291816537738e-06, "loss": 0.1265, "step": 5987 }, { "epoch": 0.5517114294927904, "grad_norm": 0.8820089761459324, "learning_rate": 2.2793703608589547e-06, "loss": 0.1432, "step": 5988 }, { "epoch": 0.5518035656700604, "grad_norm": 0.9175146678877896, "learning_rate": 2.2786115605496224e-06, "loss": 0.1372, "step": 5989 }, { "epoch": 0.5518957018473304, "grad_norm": 0.8941107121696663, "learning_rate": 2.2778527807962297e-06, "loss": 0.1289, "step": 5990 }, { "epoch": 0.5519878380246004, "grad_norm": 0.9554274388492524, "learning_rate": 2.277094021669231e-06, "loss": 0.1451, "step": 5991 }, { "epoch": 0.5520799742018704, "grad_norm": 0.8202855336317009, "learning_rate": 2.2763352832390762e-06, "loss": 0.1209, "step": 5992 }, { "epoch": 0.5521721103791404, "grad_norm": 0.8890984039392089, "learning_rate": 2.2755765655762135e-06, "loss": 0.1273, "step": 5993 }, { "epoch": 0.5522642465564104, "grad_norm": 0.9178667132404417, "learning_rate": 2.2748178687510915e-06, "loss": 0.1426, "step": 5994 }, { "epoch": 0.5523563827336804, "grad_norm": 0.8546557060383881, "learning_rate": 2.2740591928341552e-06, "loss": 0.1335, "step": 5995 }, { "epoch": 0.5524485189109504, "grad_norm": 0.9204965701548999, "learning_rate": 2.2733005378958462e-06, "loss": 0.1343, "step": 5996 }, { "epoch": 0.5525406550882204, "grad_norm": 0.9234067877974934, "learning_rate": 2.2725419040066075e-06, "loss": 0.1346, "step": 5997 }, { "epoch": 0.5526327912654904, "grad_norm": 0.9161899474482855, "learning_rate": 2.2717832912368766e-06, "loss": 0.133, "step": 5998 }, { "epoch": 0.5527249274427604, "grad_norm": 0.8984198063815197, "learning_rate": 2.271024699657093e-06, "loss": 0.1397, "step": 5999 }, { "epoch": 0.5528170636200304, "grad_norm": 0.8464187626712723, "learning_rate": 2.2702661293376895e-06, "loss": 0.1195, "step": 6000 }, { "epoch": 0.5528170636200304, "eval_loss": 0.13545145094394684, "eval_runtime": 299.9711, "eval_samples_per_second": 23.392, "eval_steps_per_second": 2.927, "step": 6000 }, { "epoch": 0.5529091997973004, "grad_norm": 0.8673690915958489, "learning_rate": 2.269507580349101e-06, "loss": 0.1426, "step": 6001 }, { "epoch": 0.5530013359745705, "grad_norm": 0.8595959230817573, "learning_rate": 2.2687490527617575e-06, "loss": 0.1308, "step": 6002 }, { "epoch": 0.5530934721518405, "grad_norm": 0.8980906183910887, "learning_rate": 2.2679905466460917e-06, "loss": 0.1319, "step": 6003 }, { "epoch": 0.5531856083291105, "grad_norm": 0.9618260702341198, "learning_rate": 2.2672320620725265e-06, "loss": 0.143, "step": 6004 }, { "epoch": 0.5532777445063805, "grad_norm": 0.8827191753043445, "learning_rate": 2.2664735991114893e-06, "loss": 0.1325, "step": 6005 }, { "epoch": 0.5533698806836505, "grad_norm": 0.9068695564847284, "learning_rate": 2.2657151578334046e-06, "loss": 0.131, "step": 6006 }, { "epoch": 0.5534620168609204, "grad_norm": 0.895570377791969, "learning_rate": 2.264956738308693e-06, "loss": 0.1286, "step": 6007 }, { "epoch": 0.5535541530381904, "grad_norm": 0.8658652374176646, "learning_rate": 2.2641983406077726e-06, "loss": 0.1287, "step": 6008 }, { "epoch": 0.5536462892154604, "grad_norm": 0.9105693225657342, "learning_rate": 2.2634399648010623e-06, "loss": 0.1358, "step": 6009 }, { "epoch": 0.5537384253927304, "grad_norm": 0.9705522735493408, "learning_rate": 2.262681610958976e-06, "loss": 0.1461, "step": 6010 }, { "epoch": 0.5538305615700004, "grad_norm": 0.9386721602193085, "learning_rate": 2.2619232791519287e-06, "loss": 0.1358, "step": 6011 }, { "epoch": 0.5539226977472704, "grad_norm": 0.8948899091915336, "learning_rate": 2.26116496945033e-06, "loss": 0.1298, "step": 6012 }, { "epoch": 0.5540148339245404, "grad_norm": 0.9849000608057639, "learning_rate": 2.260406681924589e-06, "loss": 0.1398, "step": 6013 }, { "epoch": 0.5541069701018104, "grad_norm": 0.9402527148448475, "learning_rate": 2.2596484166451136e-06, "loss": 0.1393, "step": 6014 }, { "epoch": 0.5541991062790805, "grad_norm": 0.8512564356361474, "learning_rate": 2.2588901736823087e-06, "loss": 0.1195, "step": 6015 }, { "epoch": 0.5542912424563505, "grad_norm": 0.9600645116664145, "learning_rate": 2.2581319531065777e-06, "loss": 0.1411, "step": 6016 }, { "epoch": 0.5543833786336205, "grad_norm": 0.9064131823119506, "learning_rate": 2.257373754988321e-06, "loss": 0.1265, "step": 6017 }, { "epoch": 0.5544755148108905, "grad_norm": 0.9335295798432109, "learning_rate": 2.256615579397936e-06, "loss": 0.1339, "step": 6018 }, { "epoch": 0.5545676509881605, "grad_norm": 0.9618056064055769, "learning_rate": 2.2558574264058218e-06, "loss": 0.145, "step": 6019 }, { "epoch": 0.5546597871654305, "grad_norm": 0.8809660515422949, "learning_rate": 2.255099296082372e-06, "loss": 0.1253, "step": 6020 }, { "epoch": 0.5547519233427005, "grad_norm": 0.8893338573120851, "learning_rate": 2.2543411884979775e-06, "loss": 0.131, "step": 6021 }, { "epoch": 0.5548440595199705, "grad_norm": 0.8911158292533096, "learning_rate": 2.2535831037230313e-06, "loss": 0.129, "step": 6022 }, { "epoch": 0.5549361956972405, "grad_norm": 0.931882441099175, "learning_rate": 2.2528250418279196e-06, "loss": 0.1421, "step": 6023 }, { "epoch": 0.5550283318745105, "grad_norm": 0.9395929443411103, "learning_rate": 2.2520670028830305e-06, "loss": 0.1337, "step": 6024 }, { "epoch": 0.5551204680517805, "grad_norm": 0.9529079984593413, "learning_rate": 2.251308986958746e-06, "loss": 0.1342, "step": 6025 }, { "epoch": 0.5552126042290505, "grad_norm": 0.9295137492491802, "learning_rate": 2.250550994125449e-06, "loss": 0.1377, "step": 6026 }, { "epoch": 0.5553047404063205, "grad_norm": 0.9283395008536116, "learning_rate": 2.249793024453519e-06, "loss": 0.1243, "step": 6027 }, { "epoch": 0.5553968765835905, "grad_norm": 0.9643312312736612, "learning_rate": 2.2490350780133344e-06, "loss": 0.1447, "step": 6028 }, { "epoch": 0.5554890127608606, "grad_norm": 0.9532369619786419, "learning_rate": 2.2482771548752684e-06, "loss": 0.137, "step": 6029 }, { "epoch": 0.5555811489381306, "grad_norm": 0.9746110125718069, "learning_rate": 2.247519255109697e-06, "loss": 0.1369, "step": 6030 }, { "epoch": 0.5556732851154006, "grad_norm": 0.9429182322625936, "learning_rate": 2.2467613787869886e-06, "loss": 0.1387, "step": 6031 }, { "epoch": 0.5557654212926706, "grad_norm": 0.8952924660189465, "learning_rate": 2.2460035259775147e-06, "loss": 0.1373, "step": 6032 }, { "epoch": 0.5558575574699406, "grad_norm": 0.8812945184571047, "learning_rate": 2.2452456967516404e-06, "loss": 0.1349, "step": 6033 }, { "epoch": 0.5559496936472106, "grad_norm": 0.9285111333188203, "learning_rate": 2.2444878911797295e-06, "loss": 0.1378, "step": 6034 }, { "epoch": 0.5560418298244806, "grad_norm": 0.9269768243193539, "learning_rate": 2.2437301093321467e-06, "loss": 0.149, "step": 6035 }, { "epoch": 0.5561339660017506, "grad_norm": 0.9081816960352027, "learning_rate": 2.242972351279251e-06, "loss": 0.133, "step": 6036 }, { "epoch": 0.5562261021790206, "grad_norm": 0.9203965936383334, "learning_rate": 2.242214617091399e-06, "loss": 0.1269, "step": 6037 }, { "epoch": 0.5563182383562906, "grad_norm": 0.8587985077915162, "learning_rate": 2.241456906838948e-06, "loss": 0.1248, "step": 6038 }, { "epoch": 0.5564103745335606, "grad_norm": 0.9433740268698593, "learning_rate": 2.2406992205922506e-06, "loss": 0.1332, "step": 6039 }, { "epoch": 0.5565025107108306, "grad_norm": 0.8494531923033956, "learning_rate": 2.2399415584216595e-06, "loss": 0.1199, "step": 6040 }, { "epoch": 0.5565946468881006, "grad_norm": 0.8689474784825143, "learning_rate": 2.2391839203975225e-06, "loss": 0.1291, "step": 6041 }, { "epoch": 0.5566867830653707, "grad_norm": 0.9208796138126546, "learning_rate": 2.238426306590186e-06, "loss": 0.1504, "step": 6042 }, { "epoch": 0.5567789192426407, "grad_norm": 0.9175005106338083, "learning_rate": 2.237668717069995e-06, "loss": 0.1338, "step": 6043 }, { "epoch": 0.5568710554199107, "grad_norm": 0.9590343105192461, "learning_rate": 2.2369111519072917e-06, "loss": 0.1402, "step": 6044 }, { "epoch": 0.5569631915971807, "grad_norm": 0.9352154416418151, "learning_rate": 2.2361536111724176e-06, "loss": 0.1285, "step": 6045 }, { "epoch": 0.5570553277744507, "grad_norm": 0.9108642890336781, "learning_rate": 2.2353960949357082e-06, "loss": 0.1327, "step": 6046 }, { "epoch": 0.5571474639517207, "grad_norm": 0.9125190233288996, "learning_rate": 2.2346386032674995e-06, "loss": 0.137, "step": 6047 }, { "epoch": 0.5572396001289907, "grad_norm": 0.9423488359590013, "learning_rate": 2.2338811362381256e-06, "loss": 0.1324, "step": 6048 }, { "epoch": 0.5573317363062606, "grad_norm": 0.9392634101242259, "learning_rate": 2.233123693917917e-06, "loss": 0.1322, "step": 6049 }, { "epoch": 0.5574238724835306, "grad_norm": 0.908005657369545, "learning_rate": 2.232366276377201e-06, "loss": 0.1258, "step": 6050 }, { "epoch": 0.5575160086608006, "grad_norm": 0.8968615049336631, "learning_rate": 2.2316088836863064e-06, "loss": 0.1243, "step": 6051 }, { "epoch": 0.5576081448380706, "grad_norm": 0.9926119163177404, "learning_rate": 2.2308515159155546e-06, "loss": 0.1417, "step": 6052 }, { "epoch": 0.5577002810153406, "grad_norm": 0.9075124724568028, "learning_rate": 2.23009417313527e-06, "loss": 0.1259, "step": 6053 }, { "epoch": 0.5577924171926106, "grad_norm": 0.9618556843783079, "learning_rate": 2.2293368554157695e-06, "loss": 0.1311, "step": 6054 }, { "epoch": 0.5578845533698806, "grad_norm": 1.0245024369165074, "learning_rate": 2.228579562827371e-06, "loss": 0.1452, "step": 6055 }, { "epoch": 0.5579766895471507, "grad_norm": 0.9542187116021007, "learning_rate": 2.2278222954403895e-06, "loss": 0.1345, "step": 6056 }, { "epoch": 0.5580688257244207, "grad_norm": 1.026635884315615, "learning_rate": 2.2270650533251383e-06, "loss": 0.1531, "step": 6057 }, { "epoch": 0.5581609619016907, "grad_norm": 0.9103746701579992, "learning_rate": 2.2263078365519244e-06, "loss": 0.1315, "step": 6058 }, { "epoch": 0.5582530980789607, "grad_norm": 0.8750502768735856, "learning_rate": 2.2255506451910584e-06, "loss": 0.1389, "step": 6059 }, { "epoch": 0.5583452342562307, "grad_norm": 1.0348797840452786, "learning_rate": 2.2247934793128436e-06, "loss": 0.1507, "step": 6060 }, { "epoch": 0.5584373704335007, "grad_norm": 0.9387319109433172, "learning_rate": 2.224036338987585e-06, "loss": 0.1416, "step": 6061 }, { "epoch": 0.5585295066107707, "grad_norm": 0.9648857566576942, "learning_rate": 2.223279224285582e-06, "loss": 0.1465, "step": 6062 }, { "epoch": 0.5586216427880407, "grad_norm": 0.9512060091262989, "learning_rate": 2.2225221352771316e-06, "loss": 0.1333, "step": 6063 }, { "epoch": 0.5587137789653107, "grad_norm": 0.8818452555644942, "learning_rate": 2.221765072032532e-06, "loss": 0.1354, "step": 6064 }, { "epoch": 0.5588059151425807, "grad_norm": 0.9595230313609375, "learning_rate": 2.2210080346220755e-06, "loss": 0.1495, "step": 6065 }, { "epoch": 0.5588980513198507, "grad_norm": 0.9406934954772517, "learning_rate": 2.220251023116052e-06, "loss": 0.1395, "step": 6066 }, { "epoch": 0.5589901874971207, "grad_norm": 1.0184402402535877, "learning_rate": 2.2194940375847517e-06, "loss": 0.1491, "step": 6067 }, { "epoch": 0.5590823236743907, "grad_norm": 0.9912445760663547, "learning_rate": 2.2187370780984596e-06, "loss": 0.1489, "step": 6068 }, { "epoch": 0.5591744598516608, "grad_norm": 0.94668807549237, "learning_rate": 2.2179801447274613e-06, "loss": 0.1385, "step": 6069 }, { "epoch": 0.5592665960289308, "grad_norm": 0.9082934245577071, "learning_rate": 2.217223237542036e-06, "loss": 0.1235, "step": 6070 }, { "epoch": 0.5593587322062008, "grad_norm": 0.9420787600329253, "learning_rate": 2.2164663566124635e-06, "loss": 0.1376, "step": 6071 }, { "epoch": 0.5594508683834708, "grad_norm": 0.8741240273373798, "learning_rate": 2.2157095020090207e-06, "loss": 0.1316, "step": 6072 }, { "epoch": 0.5595430045607408, "grad_norm": 0.8657043268109199, "learning_rate": 2.2149526738019802e-06, "loss": 0.1233, "step": 6073 }, { "epoch": 0.5596351407380108, "grad_norm": 0.8732124884259539, "learning_rate": 2.2141958720616163e-06, "loss": 0.1279, "step": 6074 }, { "epoch": 0.5597272769152808, "grad_norm": 0.8525074108717983, "learning_rate": 2.2134390968581958e-06, "loss": 0.1243, "step": 6075 }, { "epoch": 0.5598194130925508, "grad_norm": 0.8487356309861768, "learning_rate": 2.212682348261985e-06, "loss": 0.1242, "step": 6076 }, { "epoch": 0.5599115492698208, "grad_norm": 0.9289207090322201, "learning_rate": 2.21192562634325e-06, "loss": 0.1336, "step": 6077 }, { "epoch": 0.5600036854470908, "grad_norm": 0.8841161908632112, "learning_rate": 2.2111689311722524e-06, "loss": 0.1327, "step": 6078 }, { "epoch": 0.5600958216243608, "grad_norm": 0.9007258520902498, "learning_rate": 2.210412262819249e-06, "loss": 0.1314, "step": 6079 }, { "epoch": 0.5601879578016308, "grad_norm": 0.9034049217075238, "learning_rate": 2.209655621354499e-06, "loss": 0.1288, "step": 6080 }, { "epoch": 0.5602800939789008, "grad_norm": 1.0561927888352458, "learning_rate": 2.2088990068482554e-06, "loss": 0.1638, "step": 6081 }, { "epoch": 0.5603722301561708, "grad_norm": 0.9263479169980393, "learning_rate": 2.208142419370771e-06, "loss": 0.1286, "step": 6082 }, { "epoch": 0.5604643663334409, "grad_norm": 0.876129774276463, "learning_rate": 2.207385858992294e-06, "loss": 0.1303, "step": 6083 }, { "epoch": 0.5605565025107109, "grad_norm": 0.8577719677238493, "learning_rate": 2.206629325783071e-06, "loss": 0.1278, "step": 6084 }, { "epoch": 0.5606486386879809, "grad_norm": 0.9145085290177857, "learning_rate": 2.2058728198133466e-06, "loss": 0.1449, "step": 6085 }, { "epoch": 0.5607407748652509, "grad_norm": 0.9125478368838313, "learning_rate": 2.2051163411533644e-06, "loss": 0.1308, "step": 6086 }, { "epoch": 0.5608329110425209, "grad_norm": 0.8282140216339626, "learning_rate": 2.2043598898733597e-06, "loss": 0.1243, "step": 6087 }, { "epoch": 0.5609250472197909, "grad_norm": 0.8843568534788926, "learning_rate": 2.2036034660435714e-06, "loss": 0.1404, "step": 6088 }, { "epoch": 0.5610171833970609, "grad_norm": 0.9683614071935065, "learning_rate": 2.2028470697342334e-06, "loss": 0.1459, "step": 6089 }, { "epoch": 0.5611093195743309, "grad_norm": 0.9027082869152945, "learning_rate": 2.2020907010155775e-06, "loss": 0.136, "step": 6090 }, { "epoch": 0.5612014557516009, "grad_norm": 0.9025715692301286, "learning_rate": 2.2013343599578314e-06, "loss": 0.1334, "step": 6091 }, { "epoch": 0.5612935919288708, "grad_norm": 1.0019084022146867, "learning_rate": 2.2005780466312224e-06, "loss": 0.1453, "step": 6092 }, { "epoch": 0.5613857281061408, "grad_norm": 0.8716930577679224, "learning_rate": 2.1998217611059733e-06, "loss": 0.118, "step": 6093 }, { "epoch": 0.5614778642834108, "grad_norm": 1.0009481851360602, "learning_rate": 2.1990655034523073e-06, "loss": 0.1483, "step": 6094 }, { "epoch": 0.5615700004606808, "grad_norm": 0.900153344517744, "learning_rate": 2.198309273740441e-06, "loss": 0.1184, "step": 6095 }, { "epoch": 0.5616621366379508, "grad_norm": 0.9337923240361191, "learning_rate": 2.1975530720405906e-06, "loss": 0.1316, "step": 6096 }, { "epoch": 0.561754272815221, "grad_norm": 0.8799886004445121, "learning_rate": 2.1967968984229704e-06, "loss": 0.131, "step": 6097 }, { "epoch": 0.5618464089924909, "grad_norm": 0.9528519591932912, "learning_rate": 2.1960407529577917e-06, "loss": 0.1323, "step": 6098 }, { "epoch": 0.5619385451697609, "grad_norm": 0.9216207218465383, "learning_rate": 2.1952846357152603e-06, "loss": 0.1332, "step": 6099 }, { "epoch": 0.5620306813470309, "grad_norm": 0.902665622003663, "learning_rate": 2.1945285467655843e-06, "loss": 0.1312, "step": 6100 }, { "epoch": 0.5621228175243009, "grad_norm": 0.9401051874207889, "learning_rate": 2.1937724861789645e-06, "loss": 0.1443, "step": 6101 }, { "epoch": 0.5622149537015709, "grad_norm": 0.9345134562655586, "learning_rate": 2.1930164540256035e-06, "loss": 0.1343, "step": 6102 }, { "epoch": 0.5623070898788409, "grad_norm": 0.8892763251463518, "learning_rate": 2.1922604503756977e-06, "loss": 0.1312, "step": 6103 }, { "epoch": 0.5623992260561109, "grad_norm": 0.8633253656360871, "learning_rate": 2.1915044752994417e-06, "loss": 0.1254, "step": 6104 }, { "epoch": 0.5624913622333809, "grad_norm": 0.9320587345393514, "learning_rate": 2.1907485288670288e-06, "loss": 0.1367, "step": 6105 }, { "epoch": 0.5625834984106509, "grad_norm": 0.889817249819364, "learning_rate": 2.1899926111486473e-06, "loss": 0.1374, "step": 6106 }, { "epoch": 0.5626756345879209, "grad_norm": 0.9262828568720447, "learning_rate": 2.1892367222144863e-06, "loss": 0.1449, "step": 6107 }, { "epoch": 0.5627677707651909, "grad_norm": 0.8659566791382182, "learning_rate": 2.1884808621347288e-06, "loss": 0.1249, "step": 6108 }, { "epoch": 0.5628599069424609, "grad_norm": 0.9488609398698363, "learning_rate": 2.1877250309795565e-06, "loss": 0.1458, "step": 6109 }, { "epoch": 0.562952043119731, "grad_norm": 0.9129507239437383, "learning_rate": 2.186969228819149e-06, "loss": 0.1356, "step": 6110 }, { "epoch": 0.563044179297001, "grad_norm": 0.9234955497308438, "learning_rate": 2.1862134557236826e-06, "loss": 0.1358, "step": 6111 }, { "epoch": 0.563136315474271, "grad_norm": 0.9315935822477422, "learning_rate": 2.1854577117633297e-06, "loss": 0.1459, "step": 6112 }, { "epoch": 0.563228451651541, "grad_norm": 0.961404001994605, "learning_rate": 2.1847019970082628e-06, "loss": 0.1366, "step": 6113 }, { "epoch": 0.563320587828811, "grad_norm": 0.8541689771391625, "learning_rate": 2.1839463115286484e-06, "loss": 0.1188, "step": 6114 }, { "epoch": 0.563412724006081, "grad_norm": 0.9470634734872366, "learning_rate": 2.183190655394655e-06, "loss": 0.1404, "step": 6115 }, { "epoch": 0.563504860183351, "grad_norm": 0.9911048928912876, "learning_rate": 2.182435028676442e-06, "loss": 0.1413, "step": 6116 }, { "epoch": 0.563596996360621, "grad_norm": 0.9182342845541454, "learning_rate": 2.1816794314441704e-06, "loss": 0.1234, "step": 6117 }, { "epoch": 0.563689132537891, "grad_norm": 0.9403718290802439, "learning_rate": 2.1809238637679984e-06, "loss": 0.1295, "step": 6118 }, { "epoch": 0.563781268715161, "grad_norm": 0.9045271472960527, "learning_rate": 2.1801683257180807e-06, "loss": 0.1291, "step": 6119 }, { "epoch": 0.563873404892431, "grad_norm": 0.9327840459682971, "learning_rate": 2.179412817364567e-06, "loss": 0.1305, "step": 6120 }, { "epoch": 0.563965541069701, "grad_norm": 0.9333718027134292, "learning_rate": 2.1786573387776085e-06, "loss": 0.1385, "step": 6121 }, { "epoch": 0.564057677246971, "grad_norm": 0.9100700904648433, "learning_rate": 2.17790189002735e-06, "loss": 0.1284, "step": 6122 }, { "epoch": 0.564149813424241, "grad_norm": 0.9593641910292168, "learning_rate": 2.177146471183937e-06, "loss": 0.1342, "step": 6123 }, { "epoch": 0.5642419496015111, "grad_norm": 0.968082559377105, "learning_rate": 2.176391082317508e-06, "loss": 0.141, "step": 6124 }, { "epoch": 0.5643340857787811, "grad_norm": 0.9149483271324426, "learning_rate": 2.175635723498201e-06, "loss": 0.1378, "step": 6125 }, { "epoch": 0.5644262219560511, "grad_norm": 0.8897063217512015, "learning_rate": 2.1748803947961533e-06, "loss": 0.1264, "step": 6126 }, { "epoch": 0.5645183581333211, "grad_norm": 0.8974960547750435, "learning_rate": 2.174125096281496e-06, "loss": 0.1409, "step": 6127 }, { "epoch": 0.5646104943105911, "grad_norm": 0.9182853643197407, "learning_rate": 2.1733698280243578e-06, "loss": 0.1307, "step": 6128 }, { "epoch": 0.5647026304878611, "grad_norm": 0.8913059045600129, "learning_rate": 2.1726145900948664e-06, "loss": 0.1326, "step": 6129 }, { "epoch": 0.5647947666651311, "grad_norm": 0.9202971875767432, "learning_rate": 2.1718593825631454e-06, "loss": 0.1407, "step": 6130 }, { "epoch": 0.5648869028424011, "grad_norm": 0.9154564708205672, "learning_rate": 2.1711042054993164e-06, "loss": 0.144, "step": 6131 }, { "epoch": 0.5649790390196711, "grad_norm": 0.9188105344844977, "learning_rate": 2.1703490589734976e-06, "loss": 0.1406, "step": 6132 }, { "epoch": 0.5650711751969411, "grad_norm": 0.9213698353945419, "learning_rate": 2.1695939430558035e-06, "loss": 0.1337, "step": 6133 }, { "epoch": 0.565163311374211, "grad_norm": 0.9162759583166316, "learning_rate": 2.1688388578163476e-06, "loss": 0.1334, "step": 6134 }, { "epoch": 0.565255447551481, "grad_norm": 0.941951590936026, "learning_rate": 2.168083803325239e-06, "loss": 0.1333, "step": 6135 }, { "epoch": 0.565347583728751, "grad_norm": 0.9398691081517228, "learning_rate": 2.167328779652586e-06, "loss": 0.134, "step": 6136 }, { "epoch": 0.5654397199060212, "grad_norm": 0.9567818118407774, "learning_rate": 2.166573786868491e-06, "loss": 0.1385, "step": 6137 }, { "epoch": 0.5655318560832912, "grad_norm": 0.9417036150379874, "learning_rate": 2.1658188250430556e-06, "loss": 0.1428, "step": 6138 }, { "epoch": 0.5656239922605611, "grad_norm": 0.8769247131311982, "learning_rate": 2.1650638942463785e-06, "loss": 0.131, "step": 6139 }, { "epoch": 0.5657161284378311, "grad_norm": 0.9078529735616421, "learning_rate": 2.1643089945485555e-06, "loss": 0.1335, "step": 6140 }, { "epoch": 0.5658082646151011, "grad_norm": 0.9324124879098438, "learning_rate": 2.163554126019677e-06, "loss": 0.143, "step": 6141 }, { "epoch": 0.5659004007923711, "grad_norm": 0.9048706928934882, "learning_rate": 2.162799288729835e-06, "loss": 0.1359, "step": 6142 }, { "epoch": 0.5659925369696411, "grad_norm": 0.9352246348847684, "learning_rate": 2.162044482749115e-06, "loss": 0.142, "step": 6143 }, { "epoch": 0.5660846731469111, "grad_norm": 0.911385629437897, "learning_rate": 2.161289708147602e-06, "loss": 0.1347, "step": 6144 }, { "epoch": 0.5661768093241811, "grad_norm": 1.011126666761643, "learning_rate": 2.1605349649953756e-06, "loss": 0.1568, "step": 6145 }, { "epoch": 0.5662689455014511, "grad_norm": 0.8963212952587003, "learning_rate": 2.1597802533625135e-06, "loss": 0.126, "step": 6146 }, { "epoch": 0.5663610816787211, "grad_norm": 0.8806494895413713, "learning_rate": 2.159025573319092e-06, "loss": 0.129, "step": 6147 }, { "epoch": 0.5664532178559911, "grad_norm": 0.9027048432779949, "learning_rate": 2.1582709249351834e-06, "loss": 0.1256, "step": 6148 }, { "epoch": 0.5665453540332611, "grad_norm": 0.926098276509541, "learning_rate": 2.157516308280855e-06, "loss": 0.1377, "step": 6149 }, { "epoch": 0.5666374902105311, "grad_norm": 0.8869498269399431, "learning_rate": 2.156761723426175e-06, "loss": 0.1191, "step": 6150 }, { "epoch": 0.5667296263878012, "grad_norm": 0.9599949611293014, "learning_rate": 2.1560071704412052e-06, "loss": 0.1386, "step": 6151 }, { "epoch": 0.5668217625650712, "grad_norm": 0.9464471559188455, "learning_rate": 2.155252649396008e-06, "loss": 0.124, "step": 6152 }, { "epoch": 0.5669138987423412, "grad_norm": 0.9102320016506573, "learning_rate": 2.1544981603606386e-06, "loss": 0.131, "step": 6153 }, { "epoch": 0.5670060349196112, "grad_norm": 0.90157281915478, "learning_rate": 2.1537437034051516e-06, "loss": 0.1221, "step": 6154 }, { "epoch": 0.5670981710968812, "grad_norm": 0.9285510101392009, "learning_rate": 2.1529892785995996e-06, "loss": 0.1279, "step": 6155 }, { "epoch": 0.5671903072741512, "grad_norm": 0.9514959766388984, "learning_rate": 2.152234886014031e-06, "loss": 0.1314, "step": 6156 }, { "epoch": 0.5672824434514212, "grad_norm": 0.9429262054723736, "learning_rate": 2.1514805257184894e-06, "loss": 0.1375, "step": 6157 }, { "epoch": 0.5673745796286912, "grad_norm": 0.9573607033483201, "learning_rate": 2.1507261977830198e-06, "loss": 0.1345, "step": 6158 }, { "epoch": 0.5674667158059612, "grad_norm": 0.9221981689933934, "learning_rate": 2.1499719022776588e-06, "loss": 0.1385, "step": 6159 }, { "epoch": 0.5675588519832312, "grad_norm": 0.9011655547573784, "learning_rate": 2.149217639272445e-06, "loss": 0.1306, "step": 6160 }, { "epoch": 0.5676509881605012, "grad_norm": 0.9171495889253741, "learning_rate": 2.1484634088374124e-06, "loss": 0.1327, "step": 6161 }, { "epoch": 0.5677431243377712, "grad_norm": 1.0023115963468265, "learning_rate": 2.1477092110425887e-06, "loss": 0.1435, "step": 6162 }, { "epoch": 0.5678352605150412, "grad_norm": 0.9694155127507335, "learning_rate": 2.1469550459580025e-06, "loss": 0.1493, "step": 6163 }, { "epoch": 0.5679273966923112, "grad_norm": 0.8953821703774014, "learning_rate": 2.1462009136536787e-06, "loss": 0.1302, "step": 6164 }, { "epoch": 0.5680195328695813, "grad_norm": 0.9109471052698274, "learning_rate": 2.145446814199639e-06, "loss": 0.1317, "step": 6165 }, { "epoch": 0.5681116690468513, "grad_norm": 0.9592739524875322, "learning_rate": 2.1446927476658996e-06, "loss": 0.1398, "step": 6166 }, { "epoch": 0.5682038052241213, "grad_norm": 0.8768594453644337, "learning_rate": 2.1439387141224775e-06, "loss": 0.1232, "step": 6167 }, { "epoch": 0.5682959414013913, "grad_norm": 0.894972250282082, "learning_rate": 2.1431847136393832e-06, "loss": 0.1311, "step": 6168 }, { "epoch": 0.5683880775786613, "grad_norm": 0.8918939489151908, "learning_rate": 2.1424307462866283e-06, "loss": 0.1292, "step": 6169 }, { "epoch": 0.5684802137559313, "grad_norm": 0.902243814029342, "learning_rate": 2.141676812134216e-06, "loss": 0.1217, "step": 6170 }, { "epoch": 0.5685723499332013, "grad_norm": 0.971252940336675, "learning_rate": 2.1409229112521498e-06, "loss": 0.1394, "step": 6171 }, { "epoch": 0.5686644861104713, "grad_norm": 0.9021673999338276, "learning_rate": 2.1401690437104306e-06, "loss": 0.1227, "step": 6172 }, { "epoch": 0.5687566222877413, "grad_norm": 0.9182448929575698, "learning_rate": 2.139415209579055e-06, "loss": 0.1361, "step": 6173 }, { "epoch": 0.5688487584650113, "grad_norm": 0.9163926727615889, "learning_rate": 2.1386614089280145e-06, "loss": 0.1446, "step": 6174 }, { "epoch": 0.5689408946422813, "grad_norm": 0.9611220324654529, "learning_rate": 2.137907641827302e-06, "loss": 0.1437, "step": 6175 }, { "epoch": 0.5690330308195513, "grad_norm": 0.918237967741333, "learning_rate": 2.1371539083469033e-06, "loss": 0.1397, "step": 6176 }, { "epoch": 0.5691251669968213, "grad_norm": 0.9040728280725138, "learning_rate": 2.1364002085568046e-06, "loss": 0.1355, "step": 6177 }, { "epoch": 0.5692173031740914, "grad_norm": 0.9140522866500976, "learning_rate": 2.135646542526985e-06, "loss": 0.1288, "step": 6178 }, { "epoch": 0.5693094393513614, "grad_norm": 0.9138310043305649, "learning_rate": 2.1348929103274223e-06, "loss": 0.1296, "step": 6179 }, { "epoch": 0.5694015755286314, "grad_norm": 0.8987271086082552, "learning_rate": 2.134139312028093e-06, "loss": 0.1366, "step": 6180 }, { "epoch": 0.5694937117059014, "grad_norm": 0.8915096965436777, "learning_rate": 2.1333857476989685e-06, "loss": 0.1348, "step": 6181 }, { "epoch": 0.5695858478831713, "grad_norm": 0.9142618530436438, "learning_rate": 2.1326322174100156e-06, "loss": 0.1265, "step": 6182 }, { "epoch": 0.5696779840604413, "grad_norm": 1.0020211952626925, "learning_rate": 2.1318787212312015e-06, "loss": 0.14, "step": 6183 }, { "epoch": 0.5697701202377113, "grad_norm": 0.9206368798131661, "learning_rate": 2.131125259232487e-06, "loss": 0.1319, "step": 6184 }, { "epoch": 0.5698622564149813, "grad_norm": 0.867245016991194, "learning_rate": 2.130371831483833e-06, "loss": 0.1226, "step": 6185 }, { "epoch": 0.5699543925922513, "grad_norm": 0.8823565460940279, "learning_rate": 2.1296184380551936e-06, "loss": 0.128, "step": 6186 }, { "epoch": 0.5700465287695213, "grad_norm": 0.9646875119052738, "learning_rate": 2.128865079016522e-06, "loss": 0.1402, "step": 6187 }, { "epoch": 0.5701386649467913, "grad_norm": 0.9309522537125285, "learning_rate": 2.128111754437768e-06, "loss": 0.1391, "step": 6188 }, { "epoch": 0.5702308011240613, "grad_norm": 0.895035105648232, "learning_rate": 2.127358464388877e-06, "loss": 0.1366, "step": 6189 }, { "epoch": 0.5703229373013313, "grad_norm": 0.9393248599902015, "learning_rate": 2.1266052089397936e-06, "loss": 0.1467, "step": 6190 }, { "epoch": 0.5704150734786013, "grad_norm": 0.9666709102936543, "learning_rate": 2.1258519881604566e-06, "loss": 0.1287, "step": 6191 }, { "epoch": 0.5705072096558714, "grad_norm": 0.8937479680491346, "learning_rate": 2.125098802120802e-06, "loss": 0.1304, "step": 6192 }, { "epoch": 0.5705993458331414, "grad_norm": 0.9700220829306405, "learning_rate": 2.1243456508907643e-06, "loss": 0.1394, "step": 6193 }, { "epoch": 0.5706914820104114, "grad_norm": 0.9037958372928121, "learning_rate": 2.1235925345402746e-06, "loss": 0.1232, "step": 6194 }, { "epoch": 0.5707836181876814, "grad_norm": 0.8889183496728102, "learning_rate": 2.122839453139257e-06, "loss": 0.1274, "step": 6195 }, { "epoch": 0.5708757543649514, "grad_norm": 0.8926523541631516, "learning_rate": 2.122086406757637e-06, "loss": 0.1307, "step": 6196 }, { "epoch": 0.5709678905422214, "grad_norm": 0.9152895066179517, "learning_rate": 2.121333395465335e-06, "loss": 0.1343, "step": 6197 }, { "epoch": 0.5710600267194914, "grad_norm": 0.9557095594049324, "learning_rate": 2.1205804193322685e-06, "loss": 0.138, "step": 6198 }, { "epoch": 0.5711521628967614, "grad_norm": 0.8443583063364482, "learning_rate": 2.119827478428351e-06, "loss": 0.1314, "step": 6199 }, { "epoch": 0.5712442990740314, "grad_norm": 0.9528291650337537, "learning_rate": 2.1190745728234916e-06, "loss": 0.1361, "step": 6200 }, { "epoch": 0.5713364352513014, "grad_norm": 0.895194785522947, "learning_rate": 2.1183217025876e-06, "loss": 0.1381, "step": 6201 }, { "epoch": 0.5714285714285714, "grad_norm": 0.9073430870330993, "learning_rate": 2.1175688677905804e-06, "loss": 0.1211, "step": 6202 }, { "epoch": 0.5715207076058414, "grad_norm": 0.9412625238043502, "learning_rate": 2.116816068502331e-06, "loss": 0.1392, "step": 6203 }, { "epoch": 0.5716128437831114, "grad_norm": 0.8922180197402308, "learning_rate": 2.1160633047927515e-06, "loss": 0.1346, "step": 6204 }, { "epoch": 0.5717049799603815, "grad_norm": 0.9184043552563133, "learning_rate": 2.115310576731735e-06, "loss": 0.132, "step": 6205 }, { "epoch": 0.5717971161376515, "grad_norm": 0.8905117266588061, "learning_rate": 2.114557884389174e-06, "loss": 0.1338, "step": 6206 }, { "epoch": 0.5718892523149215, "grad_norm": 1.0157498803550593, "learning_rate": 2.1138052278349543e-06, "loss": 0.1359, "step": 6207 }, { "epoch": 0.5719813884921915, "grad_norm": 0.91678227280953, "learning_rate": 2.1130526071389603e-06, "loss": 0.1155, "step": 6208 }, { "epoch": 0.5720735246694615, "grad_norm": 0.9086101316942187, "learning_rate": 2.1123000223710737e-06, "loss": 0.1303, "step": 6209 }, { "epoch": 0.5721656608467315, "grad_norm": 0.9277009997023078, "learning_rate": 2.1115474736011725e-06, "loss": 0.1321, "step": 6210 }, { "epoch": 0.5722577970240015, "grad_norm": 0.9127183367904651, "learning_rate": 2.110794960899129e-06, "loss": 0.1383, "step": 6211 }, { "epoch": 0.5723499332012715, "grad_norm": 0.9983831214605733, "learning_rate": 2.1100424843348157e-06, "loss": 0.1328, "step": 6212 }, { "epoch": 0.5724420693785415, "grad_norm": 0.9561191380206018, "learning_rate": 2.1092900439780993e-06, "loss": 0.1393, "step": 6213 }, { "epoch": 0.5725342055558115, "grad_norm": 0.9076953689928847, "learning_rate": 2.108537639898845e-06, "loss": 0.1343, "step": 6214 }, { "epoch": 0.5726263417330815, "grad_norm": 0.9230865489132, "learning_rate": 2.1077852721669132e-06, "loss": 0.125, "step": 6215 }, { "epoch": 0.5727184779103515, "grad_norm": 0.8781994139944165, "learning_rate": 2.10703294085216e-06, "loss": 0.1222, "step": 6216 }, { "epoch": 0.5728106140876215, "grad_norm": 0.9418352709870952, "learning_rate": 2.1062806460244415e-06, "loss": 0.1351, "step": 6217 }, { "epoch": 0.5729027502648915, "grad_norm": 0.9333115918970857, "learning_rate": 2.1055283877536066e-06, "loss": 0.1376, "step": 6218 }, { "epoch": 0.5729948864421616, "grad_norm": 0.8909079069488656, "learning_rate": 2.1047761661095043e-06, "loss": 0.134, "step": 6219 }, { "epoch": 0.5730870226194316, "grad_norm": 0.923389929285855, "learning_rate": 2.1040239811619774e-06, "loss": 0.1382, "step": 6220 }, { "epoch": 0.5731791587967016, "grad_norm": 0.900590742643775, "learning_rate": 2.1032718329808656e-06, "loss": 0.135, "step": 6221 }, { "epoch": 0.5732712949739716, "grad_norm": 0.8999258835799233, "learning_rate": 2.102519721636007e-06, "loss": 0.1269, "step": 6222 }, { "epoch": 0.5733634311512416, "grad_norm": 0.9115861566104987, "learning_rate": 2.1017676471972363e-06, "loss": 0.1194, "step": 6223 }, { "epoch": 0.5734555673285116, "grad_norm": 0.9547979195731437, "learning_rate": 2.101015609734381e-06, "loss": 0.1358, "step": 6224 }, { "epoch": 0.5735477035057815, "grad_norm": 0.8987983423045988, "learning_rate": 2.1002636093172694e-06, "loss": 0.1404, "step": 6225 }, { "epoch": 0.5736398396830515, "grad_norm": 0.9396026936959258, "learning_rate": 2.099511646015725e-06, "loss": 0.1358, "step": 6226 }, { "epoch": 0.5737319758603215, "grad_norm": 0.9380954721223328, "learning_rate": 2.098759719899568e-06, "loss": 0.1365, "step": 6227 }, { "epoch": 0.5738241120375915, "grad_norm": 0.9456480775172186, "learning_rate": 2.0980078310386135e-06, "loss": 0.1368, "step": 6228 }, { "epoch": 0.5739162482148615, "grad_norm": 0.9404251140274369, "learning_rate": 2.097255979502675e-06, "loss": 0.1374, "step": 6229 }, { "epoch": 0.5740083843921315, "grad_norm": 0.9329114486300538, "learning_rate": 2.096504165361562e-06, "loss": 0.1503, "step": 6230 }, { "epoch": 0.5741005205694015, "grad_norm": 0.9589004596645393, "learning_rate": 2.0957523886850815e-06, "loss": 0.142, "step": 6231 }, { "epoch": 0.5741926567466715, "grad_norm": 0.9469984435269726, "learning_rate": 2.095000649543035e-06, "loss": 0.1287, "step": 6232 }, { "epoch": 0.5742847929239416, "grad_norm": 0.8447650874717392, "learning_rate": 2.0942489480052214e-06, "loss": 0.1242, "step": 6233 }, { "epoch": 0.5743769291012116, "grad_norm": 0.8887316814688837, "learning_rate": 2.093497284141436e-06, "loss": 0.1393, "step": 6234 }, { "epoch": 0.5744690652784816, "grad_norm": 0.9034629564353456, "learning_rate": 2.0927456580214733e-06, "loss": 0.1421, "step": 6235 }, { "epoch": 0.5745612014557516, "grad_norm": 0.8562329900422688, "learning_rate": 2.091994069715119e-06, "loss": 0.1242, "step": 6236 }, { "epoch": 0.5746533376330216, "grad_norm": 0.9299994710734435, "learning_rate": 2.0912425192921588e-06, "loss": 0.1312, "step": 6237 }, { "epoch": 0.5747454738102916, "grad_norm": 0.9446203251305053, "learning_rate": 2.0904910068223745e-06, "loss": 0.147, "step": 6238 }, { "epoch": 0.5748376099875616, "grad_norm": 0.8745483561720717, "learning_rate": 2.0897395323755464e-06, "loss": 0.115, "step": 6239 }, { "epoch": 0.5749297461648316, "grad_norm": 0.863217780958388, "learning_rate": 2.088988096021445e-06, "loss": 0.1237, "step": 6240 }, { "epoch": 0.5750218823421016, "grad_norm": 0.9183400050395388, "learning_rate": 2.088236697829843e-06, "loss": 0.1366, "step": 6241 }, { "epoch": 0.5751140185193716, "grad_norm": 0.8436149595259076, "learning_rate": 2.0874853378705085e-06, "loss": 0.1229, "step": 6242 }, { "epoch": 0.5752061546966416, "grad_norm": 0.9275652931055757, "learning_rate": 2.0867340162132054e-06, "loss": 0.1303, "step": 6243 }, { "epoch": 0.5752982908739116, "grad_norm": 0.9302318599693131, "learning_rate": 2.0859827329276926e-06, "loss": 0.1418, "step": 6244 }, { "epoch": 0.5753904270511816, "grad_norm": 0.95815229518161, "learning_rate": 2.0852314880837278e-06, "loss": 0.1397, "step": 6245 }, { "epoch": 0.5754825632284517, "grad_norm": 0.8904343622778238, "learning_rate": 2.0844802817510633e-06, "loss": 0.1341, "step": 6246 }, { "epoch": 0.5755746994057217, "grad_norm": 0.862398919434429, "learning_rate": 2.08372911399945e-06, "loss": 0.1185, "step": 6247 }, { "epoch": 0.5756668355829917, "grad_norm": 0.9395104140482388, "learning_rate": 2.0829779848986337e-06, "loss": 0.1445, "step": 6248 }, { "epoch": 0.5757589717602617, "grad_norm": 0.8877225576451748, "learning_rate": 2.0822268945183555e-06, "loss": 0.1301, "step": 6249 }, { "epoch": 0.5758511079375317, "grad_norm": 0.8670131634340984, "learning_rate": 2.081475842928356e-06, "loss": 0.1242, "step": 6250 }, { "epoch": 0.5759432441148017, "grad_norm": 0.965400954698164, "learning_rate": 2.0807248301983682e-06, "loss": 0.1479, "step": 6251 }, { "epoch": 0.5760353802920717, "grad_norm": 0.8849794715504932, "learning_rate": 2.0799738563981263e-06, "loss": 0.1287, "step": 6252 }, { "epoch": 0.5761275164693417, "grad_norm": 0.905858445926939, "learning_rate": 2.079222921597357e-06, "loss": 0.1386, "step": 6253 }, { "epoch": 0.5762196526466117, "grad_norm": 0.9180158841915589, "learning_rate": 2.078472025865784e-06, "loss": 0.1375, "step": 6254 }, { "epoch": 0.5763117888238817, "grad_norm": 0.8919740094962243, "learning_rate": 2.077721169273129e-06, "loss": 0.1344, "step": 6255 }, { "epoch": 0.5764039250011517, "grad_norm": 0.9292425036753654, "learning_rate": 2.0769703518891096e-06, "loss": 0.1339, "step": 6256 }, { "epoch": 0.5764960611784217, "grad_norm": 0.9122724926415321, "learning_rate": 2.076219573783437e-06, "loss": 0.1295, "step": 6257 }, { "epoch": 0.5765881973556917, "grad_norm": 0.9458670285472852, "learning_rate": 2.075468835025824e-06, "loss": 0.1332, "step": 6258 }, { "epoch": 0.5766803335329617, "grad_norm": 1.0076418370512317, "learning_rate": 2.0747181356859743e-06, "loss": 0.1383, "step": 6259 }, { "epoch": 0.5767724697102318, "grad_norm": 0.8925007436761428, "learning_rate": 2.073967475833593e-06, "loss": 0.1301, "step": 6260 }, { "epoch": 0.5768646058875018, "grad_norm": 0.9879169780568002, "learning_rate": 2.0732168555383764e-06, "loss": 0.1358, "step": 6261 }, { "epoch": 0.5769567420647718, "grad_norm": 0.942015182599193, "learning_rate": 2.0724662748700205e-06, "loss": 0.1328, "step": 6262 }, { "epoch": 0.5770488782420418, "grad_norm": 0.910961493062362, "learning_rate": 2.0717157338982172e-06, "loss": 0.1328, "step": 6263 }, { "epoch": 0.5771410144193118, "grad_norm": 0.9031369142798781, "learning_rate": 2.0709652326926547e-06, "loss": 0.1309, "step": 6264 }, { "epoch": 0.5772331505965818, "grad_norm": 1.0202082284494964, "learning_rate": 2.070214771323015e-06, "loss": 0.1547, "step": 6265 }, { "epoch": 0.5773252867738518, "grad_norm": 0.8730129929135148, "learning_rate": 2.0694643498589816e-06, "loss": 0.1291, "step": 6266 }, { "epoch": 0.5774174229511218, "grad_norm": 0.8652049603519832, "learning_rate": 2.0687139683702284e-06, "loss": 0.1235, "step": 6267 }, { "epoch": 0.5775095591283917, "grad_norm": 0.9304371949873086, "learning_rate": 2.067963626926431e-06, "loss": 0.1375, "step": 6268 }, { "epoch": 0.5776016953056617, "grad_norm": 0.932762906253494, "learning_rate": 2.0672133255972567e-06, "loss": 0.1354, "step": 6269 }, { "epoch": 0.5776938314829317, "grad_norm": 0.9551092367759105, "learning_rate": 2.066463064452371e-06, "loss": 0.1378, "step": 6270 }, { "epoch": 0.5777859676602017, "grad_norm": 0.9374294366319679, "learning_rate": 2.0657128435614372e-06, "loss": 0.1426, "step": 6271 }, { "epoch": 0.5778781038374717, "grad_norm": 0.9424685529587546, "learning_rate": 2.0649626629941134e-06, "loss": 0.1383, "step": 6272 }, { "epoch": 0.5779702400147418, "grad_norm": 0.8798221517310623, "learning_rate": 2.0642125228200515e-06, "loss": 0.1231, "step": 6273 }, { "epoch": 0.5780623761920118, "grad_norm": 0.8718920010933575, "learning_rate": 2.0634624231089047e-06, "loss": 0.1317, "step": 6274 }, { "epoch": 0.5781545123692818, "grad_norm": 0.8900509963420861, "learning_rate": 2.062712363930318e-06, "loss": 0.1306, "step": 6275 }, { "epoch": 0.5782466485465518, "grad_norm": 0.9602083244915917, "learning_rate": 2.0619623453539365e-06, "loss": 0.1311, "step": 6276 }, { "epoch": 0.5783387847238218, "grad_norm": 0.9409875054246267, "learning_rate": 2.0612123674493983e-06, "loss": 0.1349, "step": 6277 }, { "epoch": 0.5784309209010918, "grad_norm": 0.954883571528099, "learning_rate": 2.060462430286338e-06, "loss": 0.1488, "step": 6278 }, { "epoch": 0.5785230570783618, "grad_norm": 0.924233430850071, "learning_rate": 2.059712533934389e-06, "loss": 0.1353, "step": 6279 }, { "epoch": 0.5786151932556318, "grad_norm": 0.9041734680736925, "learning_rate": 2.0589626784631784e-06, "loss": 0.1323, "step": 6280 }, { "epoch": 0.5787073294329018, "grad_norm": 0.9837493412140981, "learning_rate": 2.0582128639423316e-06, "loss": 0.1536, "step": 6281 }, { "epoch": 0.5787994656101718, "grad_norm": 0.974634864030492, "learning_rate": 2.057463090441467e-06, "loss": 0.1436, "step": 6282 }, { "epoch": 0.5788916017874418, "grad_norm": 0.9432227471990523, "learning_rate": 2.056713358030202e-06, "loss": 0.1441, "step": 6283 }, { "epoch": 0.5789837379647118, "grad_norm": 0.8964335561210266, "learning_rate": 2.0559636667781493e-06, "loss": 0.1318, "step": 6284 }, { "epoch": 0.5790758741419818, "grad_norm": 0.8913206385209099, "learning_rate": 2.055214016754919e-06, "loss": 0.1303, "step": 6285 }, { "epoch": 0.5791680103192518, "grad_norm": 0.9267354488882299, "learning_rate": 2.0544644080301138e-06, "loss": 0.1401, "step": 6286 }, { "epoch": 0.5792601464965219, "grad_norm": 0.9305789259939854, "learning_rate": 2.053714840673337e-06, "loss": 0.1358, "step": 6287 }, { "epoch": 0.5793522826737919, "grad_norm": 0.9084611989875776, "learning_rate": 2.0529653147541844e-06, "loss": 0.1357, "step": 6288 }, { "epoch": 0.5794444188510619, "grad_norm": 0.9851294864819644, "learning_rate": 2.0522158303422518e-06, "loss": 0.1441, "step": 6289 }, { "epoch": 0.5795365550283319, "grad_norm": 0.9148519072416933, "learning_rate": 2.051466387507127e-06, "loss": 0.1353, "step": 6290 }, { "epoch": 0.5796286912056019, "grad_norm": 0.8849124093932106, "learning_rate": 2.0507169863183956e-06, "loss": 0.1207, "step": 6291 }, { "epoch": 0.5797208273828719, "grad_norm": 0.9565706832013744, "learning_rate": 2.0499676268456412e-06, "loss": 0.1329, "step": 6292 }, { "epoch": 0.5798129635601419, "grad_norm": 0.9551972700761554, "learning_rate": 2.0492183091584414e-06, "loss": 0.1381, "step": 6293 }, { "epoch": 0.5799050997374119, "grad_norm": 0.8916816492853256, "learning_rate": 2.048469033326369e-06, "loss": 0.139, "step": 6294 }, { "epoch": 0.5799972359146819, "grad_norm": 0.9837000019154031, "learning_rate": 2.047719799418996e-06, "loss": 0.1408, "step": 6295 }, { "epoch": 0.5800893720919519, "grad_norm": 0.9296646554316655, "learning_rate": 2.046970607505888e-06, "loss": 0.1238, "step": 6296 }, { "epoch": 0.5801815082692219, "grad_norm": 0.9916491510040498, "learning_rate": 2.046221457656609e-06, "loss": 0.1467, "step": 6297 }, { "epoch": 0.5802736444464919, "grad_norm": 1.0222186289839617, "learning_rate": 2.0454723499407158e-06, "loss": 0.142, "step": 6298 }, { "epoch": 0.5803657806237619, "grad_norm": 0.9199317685695529, "learning_rate": 2.044723284427763e-06, "loss": 0.1337, "step": 6299 }, { "epoch": 0.5804579168010319, "grad_norm": 0.9431538247478978, "learning_rate": 2.043974261187303e-06, "loss": 0.1311, "step": 6300 }, { "epoch": 0.580550052978302, "grad_norm": 0.935052064731194, "learning_rate": 2.0432252802888827e-06, "loss": 0.1319, "step": 6301 }, { "epoch": 0.580642189155572, "grad_norm": 0.9305806123779443, "learning_rate": 2.042476341802043e-06, "loss": 0.1321, "step": 6302 }, { "epoch": 0.580734325332842, "grad_norm": 0.9340840745588856, "learning_rate": 2.0417274457963247e-06, "loss": 0.1351, "step": 6303 }, { "epoch": 0.580826461510112, "grad_norm": 0.8279176393267009, "learning_rate": 2.040978592341262e-06, "loss": 0.1174, "step": 6304 }, { "epoch": 0.580918597687382, "grad_norm": 0.8983819696246862, "learning_rate": 2.0402297815063867e-06, "loss": 0.1301, "step": 6305 }, { "epoch": 0.581010733864652, "grad_norm": 0.9312038343939995, "learning_rate": 2.0394810133612263e-06, "loss": 0.1365, "step": 6306 }, { "epoch": 0.581102870041922, "grad_norm": 0.8673030702969664, "learning_rate": 2.0387322879753025e-06, "loss": 0.1295, "step": 6307 }, { "epoch": 0.581195006219192, "grad_norm": 0.9257544184093895, "learning_rate": 2.0379836054181356e-06, "loss": 0.1348, "step": 6308 }, { "epoch": 0.581287142396462, "grad_norm": 0.9720242295003335, "learning_rate": 2.0372349657592404e-06, "loss": 0.1422, "step": 6309 }, { "epoch": 0.581379278573732, "grad_norm": 0.9263012800097226, "learning_rate": 2.0364863690681293e-06, "loss": 0.1291, "step": 6310 }, { "epoch": 0.581471414751002, "grad_norm": 0.8925339009739691, "learning_rate": 2.0357378154143083e-06, "loss": 0.1305, "step": 6311 }, { "epoch": 0.5815635509282719, "grad_norm": 0.8770195000598113, "learning_rate": 2.0349893048672806e-06, "loss": 0.1249, "step": 6312 }, { "epoch": 0.5816556871055419, "grad_norm": 0.9175165472111774, "learning_rate": 2.0342408374965457e-06, "loss": 0.136, "step": 6313 }, { "epoch": 0.581747823282812, "grad_norm": 0.9407699994116291, "learning_rate": 2.033492413371601e-06, "loss": 0.137, "step": 6314 }, { "epoch": 0.581839959460082, "grad_norm": 0.9116296301855412, "learning_rate": 2.0327440325619345e-06, "loss": 0.1299, "step": 6315 }, { "epoch": 0.581932095637352, "grad_norm": 0.9369925685666635, "learning_rate": 2.0319956951370346e-06, "loss": 0.1427, "step": 6316 }, { "epoch": 0.582024231814622, "grad_norm": 0.9385348869940181, "learning_rate": 2.0312474011663857e-06, "loss": 0.1479, "step": 6317 }, { "epoch": 0.582116367991892, "grad_norm": 0.9268995831678168, "learning_rate": 2.030499150719466e-06, "loss": 0.1337, "step": 6318 }, { "epoch": 0.582208504169162, "grad_norm": 0.9028226590767937, "learning_rate": 2.02975094386575e-06, "loss": 0.1309, "step": 6319 }, { "epoch": 0.582300640346432, "grad_norm": 0.9370832163666957, "learning_rate": 2.02900278067471e-06, "loss": 0.1258, "step": 6320 }, { "epoch": 0.582392776523702, "grad_norm": 0.9655358616877272, "learning_rate": 2.0282546612158116e-06, "loss": 0.1372, "step": 6321 }, { "epoch": 0.582484912700972, "grad_norm": 0.9148607737633211, "learning_rate": 2.02750658555852e-06, "loss": 0.1211, "step": 6322 }, { "epoch": 0.582577048878242, "grad_norm": 0.8864078738002699, "learning_rate": 2.026758553772292e-06, "loss": 0.1305, "step": 6323 }, { "epoch": 0.582669185055512, "grad_norm": 1.002989551156702, "learning_rate": 2.026010565926583e-06, "loss": 0.152, "step": 6324 }, { "epoch": 0.582761321232782, "grad_norm": 0.9183628328083221, "learning_rate": 2.0252626220908448e-06, "loss": 0.1376, "step": 6325 }, { "epoch": 0.582853457410052, "grad_norm": 0.8937942177656565, "learning_rate": 2.0245147223345235e-06, "loss": 0.1316, "step": 6326 }, { "epoch": 0.582945593587322, "grad_norm": 0.8875351589120594, "learning_rate": 2.0237668667270603e-06, "loss": 0.1303, "step": 6327 }, { "epoch": 0.5830377297645921, "grad_norm": 0.9263707122808156, "learning_rate": 2.023019055337895e-06, "loss": 0.1328, "step": 6328 }, { "epoch": 0.5831298659418621, "grad_norm": 0.915973919313679, "learning_rate": 2.0222712882364617e-06, "loss": 0.1386, "step": 6329 }, { "epoch": 0.5832220021191321, "grad_norm": 0.9156431422133637, "learning_rate": 2.0215235654921912e-06, "loss": 0.1362, "step": 6330 }, { "epoch": 0.5833141382964021, "grad_norm": 0.8978505787324376, "learning_rate": 2.0207758871745088e-06, "loss": 0.1229, "step": 6331 }, { "epoch": 0.5834062744736721, "grad_norm": 0.8691338078911657, "learning_rate": 2.0200282533528367e-06, "loss": 0.1373, "step": 6332 }, { "epoch": 0.5834984106509421, "grad_norm": 0.938257513392887, "learning_rate": 2.019280664096593e-06, "loss": 0.1378, "step": 6333 }, { "epoch": 0.5835905468282121, "grad_norm": 0.8559661977591175, "learning_rate": 2.018533119475191e-06, "loss": 0.1215, "step": 6334 }, { "epoch": 0.5836826830054821, "grad_norm": 0.9573360476871194, "learning_rate": 2.017785619558042e-06, "loss": 0.1355, "step": 6335 }, { "epoch": 0.5837748191827521, "grad_norm": 0.8571583060040944, "learning_rate": 2.0170381644145492e-06, "loss": 0.1224, "step": 6336 }, { "epoch": 0.5838669553600221, "grad_norm": 0.9268463083225386, "learning_rate": 2.0162907541141146e-06, "loss": 0.1348, "step": 6337 }, { "epoch": 0.5839590915372921, "grad_norm": 0.8793654369992728, "learning_rate": 2.0155433887261362e-06, "loss": 0.1289, "step": 6338 }, { "epoch": 0.5840512277145621, "grad_norm": 0.9334765040012325, "learning_rate": 2.0147960683200064e-06, "loss": 0.1351, "step": 6339 }, { "epoch": 0.5841433638918321, "grad_norm": 0.913189689611009, "learning_rate": 2.014048792965113e-06, "loss": 0.1306, "step": 6340 }, { "epoch": 0.5842355000691022, "grad_norm": 0.9441334337706997, "learning_rate": 2.013301562730842e-06, "loss": 0.1384, "step": 6341 }, { "epoch": 0.5843276362463722, "grad_norm": 0.8629368354740582, "learning_rate": 2.0125543776865723e-06, "loss": 0.1213, "step": 6342 }, { "epoch": 0.5844197724236422, "grad_norm": 0.8799947858308416, "learning_rate": 2.011807237901683e-06, "loss": 0.1168, "step": 6343 }, { "epoch": 0.5845119086009122, "grad_norm": 0.9652486866537766, "learning_rate": 2.011060143445543e-06, "loss": 0.141, "step": 6344 }, { "epoch": 0.5846040447781822, "grad_norm": 0.8773569096523852, "learning_rate": 2.010313094387521e-06, "loss": 0.1324, "step": 6345 }, { "epoch": 0.5846961809554522, "grad_norm": 0.9465727325543921, "learning_rate": 2.0095660907969816e-06, "loss": 0.1331, "step": 6346 }, { "epoch": 0.5847883171327222, "grad_norm": 0.8830422552735072, "learning_rate": 2.0088191327432838e-06, "loss": 0.1205, "step": 6347 }, { "epoch": 0.5848804533099922, "grad_norm": 0.8911291650874078, "learning_rate": 2.0080722202957813e-06, "loss": 0.1269, "step": 6348 }, { "epoch": 0.5849725894872622, "grad_norm": 0.919726354522966, "learning_rate": 2.0073253535238266e-06, "loss": 0.1348, "step": 6349 }, { "epoch": 0.5850647256645322, "grad_norm": 0.9153772381047109, "learning_rate": 2.0065785324967654e-06, "loss": 0.1231, "step": 6350 }, { "epoch": 0.5851568618418022, "grad_norm": 0.9585102905267247, "learning_rate": 2.0058317572839418e-06, "loss": 0.1361, "step": 6351 }, { "epoch": 0.5852489980190722, "grad_norm": 0.944155411460431, "learning_rate": 2.0050850279546918e-06, "loss": 0.1421, "step": 6352 }, { "epoch": 0.5853411341963421, "grad_norm": 0.9449499207179856, "learning_rate": 2.00433834457835e-06, "loss": 0.1311, "step": 6353 }, { "epoch": 0.5854332703736121, "grad_norm": 0.8963039607572094, "learning_rate": 2.0035917072242463e-06, "loss": 0.1365, "step": 6354 }, { "epoch": 0.5855254065508823, "grad_norm": 0.9372716167180863, "learning_rate": 2.002845115961707e-06, "loss": 0.1304, "step": 6355 }, { "epoch": 0.5856175427281523, "grad_norm": 0.9593342332592468, "learning_rate": 2.002098570860051e-06, "loss": 0.1341, "step": 6356 }, { "epoch": 0.5857096789054222, "grad_norm": 0.9355281745635248, "learning_rate": 2.001352071988597e-06, "loss": 0.1395, "step": 6357 }, { "epoch": 0.5858018150826922, "grad_norm": 0.9088372778534395, "learning_rate": 2.000605619416656e-06, "loss": 0.133, "step": 6358 }, { "epoch": 0.5858939512599622, "grad_norm": 0.9209424144820311, "learning_rate": 1.999859213213538e-06, "loss": 0.1437, "step": 6359 }, { "epoch": 0.5859860874372322, "grad_norm": 0.9475953953719402, "learning_rate": 1.9991128534485454e-06, "loss": 0.1315, "step": 6360 }, { "epoch": 0.5860782236145022, "grad_norm": 0.9238882849548906, "learning_rate": 1.998366540190978e-06, "loss": 0.1228, "step": 6361 }, { "epoch": 0.5861703597917722, "grad_norm": 0.937878020643912, "learning_rate": 1.9976202735101314e-06, "loss": 0.1385, "step": 6362 }, { "epoch": 0.5862624959690422, "grad_norm": 0.8840553821623729, "learning_rate": 1.9968740534752965e-06, "loss": 0.1201, "step": 6363 }, { "epoch": 0.5863546321463122, "grad_norm": 0.9642337662716879, "learning_rate": 1.9961278801557606e-06, "loss": 0.1402, "step": 6364 }, { "epoch": 0.5864467683235822, "grad_norm": 0.9253050908043012, "learning_rate": 1.9953817536208046e-06, "loss": 0.1289, "step": 6365 }, { "epoch": 0.5865389045008522, "grad_norm": 0.9555889041598452, "learning_rate": 1.994635673939707e-06, "loss": 0.1401, "step": 6366 }, { "epoch": 0.5866310406781222, "grad_norm": 0.9409992712837372, "learning_rate": 1.9938896411817416e-06, "loss": 0.1389, "step": 6367 }, { "epoch": 0.5867231768553922, "grad_norm": 0.8298665944219692, "learning_rate": 1.9931436554161783e-06, "loss": 0.1194, "step": 6368 }, { "epoch": 0.5868153130326623, "grad_norm": 0.9843872433472307, "learning_rate": 1.9923977167122797e-06, "loss": 0.1489, "step": 6369 }, { "epoch": 0.5869074492099323, "grad_norm": 0.9426206584555298, "learning_rate": 1.9916518251393085e-06, "loss": 0.1354, "step": 6370 }, { "epoch": 0.5869995853872023, "grad_norm": 0.9291998492432275, "learning_rate": 1.9909059807665195e-06, "loss": 0.1257, "step": 6371 }, { "epoch": 0.5870917215644723, "grad_norm": 0.9164376882762449, "learning_rate": 1.990160183663166e-06, "loss": 0.1348, "step": 6372 }, { "epoch": 0.5871838577417423, "grad_norm": 0.916904820338903, "learning_rate": 1.9894144338984937e-06, "loss": 0.132, "step": 6373 }, { "epoch": 0.5872759939190123, "grad_norm": 0.9084949228829207, "learning_rate": 1.9886687315417456e-06, "loss": 0.1344, "step": 6374 }, { "epoch": 0.5873681300962823, "grad_norm": 0.9304152484544277, "learning_rate": 1.9879230766621616e-06, "loss": 0.1354, "step": 6375 }, { "epoch": 0.5874602662735523, "grad_norm": 0.9262616287041675, "learning_rate": 1.9871774693289754e-06, "loss": 0.1279, "step": 6376 }, { "epoch": 0.5875524024508223, "grad_norm": 0.8867273085909185, "learning_rate": 1.9864319096114152e-06, "loss": 0.1301, "step": 6377 }, { "epoch": 0.5876445386280923, "grad_norm": 1.0611872055913358, "learning_rate": 1.985686397578708e-06, "loss": 0.1477, "step": 6378 }, { "epoch": 0.5877366748053623, "grad_norm": 0.8468401803460077, "learning_rate": 1.984940933300074e-06, "loss": 0.1174, "step": 6379 }, { "epoch": 0.5878288109826323, "grad_norm": 0.8830701936617826, "learning_rate": 1.984195516844731e-06, "loss": 0.1329, "step": 6380 }, { "epoch": 0.5879209471599023, "grad_norm": 0.9153760956552899, "learning_rate": 1.9834501482818885e-06, "loss": 0.1394, "step": 6381 }, { "epoch": 0.5880130833371724, "grad_norm": 0.884290099600892, "learning_rate": 1.9827048276807552e-06, "loss": 0.1361, "step": 6382 }, { "epoch": 0.5881052195144424, "grad_norm": 0.9630089565926064, "learning_rate": 1.9819595551105346e-06, "loss": 0.142, "step": 6383 }, { "epoch": 0.5881973556917124, "grad_norm": 0.9348061290004007, "learning_rate": 1.9812143306404262e-06, "loss": 0.1325, "step": 6384 }, { "epoch": 0.5882894918689824, "grad_norm": 0.8751611132400253, "learning_rate": 1.9804691543396213e-06, "loss": 0.1255, "step": 6385 }, { "epoch": 0.5883816280462524, "grad_norm": 0.9355019028595838, "learning_rate": 1.9797240262773122e-06, "loss": 0.1325, "step": 6386 }, { "epoch": 0.5884737642235224, "grad_norm": 0.8332639547487759, "learning_rate": 1.9789789465226825e-06, "loss": 0.1253, "step": 6387 }, { "epoch": 0.5885659004007924, "grad_norm": 0.9403565805795623, "learning_rate": 1.978233915144915e-06, "loss": 0.1401, "step": 6388 }, { "epoch": 0.5886580365780624, "grad_norm": 0.8968199420083651, "learning_rate": 1.977488932213184e-06, "loss": 0.1397, "step": 6389 }, { "epoch": 0.5887501727553324, "grad_norm": 0.9429483455249357, "learning_rate": 1.976743997796661e-06, "loss": 0.1431, "step": 6390 }, { "epoch": 0.5888423089326024, "grad_norm": 0.886059394188964, "learning_rate": 1.975999111964515e-06, "loss": 0.1311, "step": 6391 }, { "epoch": 0.5889344451098724, "grad_norm": 0.9131107533925749, "learning_rate": 1.9752542747859076e-06, "loss": 0.1332, "step": 6392 }, { "epoch": 0.5890265812871424, "grad_norm": 0.8724833844713735, "learning_rate": 1.974509486329998e-06, "loss": 0.1185, "step": 6393 }, { "epoch": 0.5891187174644124, "grad_norm": 0.8517216431460966, "learning_rate": 1.973764746665938e-06, "loss": 0.1244, "step": 6394 }, { "epoch": 0.5892108536416824, "grad_norm": 0.8853967206799523, "learning_rate": 1.9730200558628784e-06, "loss": 0.1361, "step": 6395 }, { "epoch": 0.5893029898189525, "grad_norm": 0.933921719916813, "learning_rate": 1.972275413989963e-06, "loss": 0.1276, "step": 6396 }, { "epoch": 0.5893951259962225, "grad_norm": 0.9412040556603793, "learning_rate": 1.971530821116333e-06, "loss": 0.1351, "step": 6397 }, { "epoch": 0.5894872621734925, "grad_norm": 0.9419111745680039, "learning_rate": 1.970786277311123e-06, "loss": 0.135, "step": 6398 }, { "epoch": 0.5895793983507625, "grad_norm": 0.9215559284783924, "learning_rate": 1.9700417826434633e-06, "loss": 0.1351, "step": 6399 }, { "epoch": 0.5896715345280324, "grad_norm": 0.996475566243809, "learning_rate": 1.969297337182482e-06, "loss": 0.1521, "step": 6400 }, { "epoch": 0.5897636707053024, "grad_norm": 0.9291448076226406, "learning_rate": 1.9685529409973e-06, "loss": 0.1278, "step": 6401 }, { "epoch": 0.5898558068825724, "grad_norm": 0.8924108059641798, "learning_rate": 1.967808594157034e-06, "loss": 0.1267, "step": 6402 }, { "epoch": 0.5899479430598424, "grad_norm": 0.9645722547839062, "learning_rate": 1.9670642967307974e-06, "loss": 0.1399, "step": 6403 }, { "epoch": 0.5900400792371124, "grad_norm": 0.8925650886648214, "learning_rate": 1.9663200487876983e-06, "loss": 0.1282, "step": 6404 }, { "epoch": 0.5901322154143824, "grad_norm": 0.8834690207967658, "learning_rate": 1.965575850396841e-06, "loss": 0.1313, "step": 6405 }, { "epoch": 0.5902243515916524, "grad_norm": 0.882154265370182, "learning_rate": 1.9648317016273227e-06, "loss": 0.1169, "step": 6406 }, { "epoch": 0.5903164877689224, "grad_norm": 0.9405001358124874, "learning_rate": 1.964087602548238e-06, "loss": 0.134, "step": 6407 }, { "epoch": 0.5904086239461924, "grad_norm": 0.9444373778196875, "learning_rate": 1.9633435532286775e-06, "loss": 0.1397, "step": 6408 }, { "epoch": 0.5905007601234625, "grad_norm": 0.8989319983039749, "learning_rate": 1.9625995537377268e-06, "loss": 0.1155, "step": 6409 }, { "epoch": 0.5905928963007325, "grad_norm": 0.9366810824860379, "learning_rate": 1.961855604144464e-06, "loss": 0.1409, "step": 6410 }, { "epoch": 0.5906850324780025, "grad_norm": 0.9492399799170057, "learning_rate": 1.961111704517967e-06, "loss": 0.1441, "step": 6411 }, { "epoch": 0.5907771686552725, "grad_norm": 0.9306708910687921, "learning_rate": 1.9603678549273054e-06, "loss": 0.1392, "step": 6412 }, { "epoch": 0.5908693048325425, "grad_norm": 0.9310169038231709, "learning_rate": 1.959624055441548e-06, "loss": 0.1309, "step": 6413 }, { "epoch": 0.5909614410098125, "grad_norm": 0.9509199776543236, "learning_rate": 1.9588803061297544e-06, "loss": 0.1469, "step": 6414 }, { "epoch": 0.5910535771870825, "grad_norm": 0.8560938591568295, "learning_rate": 1.9581366070609824e-06, "loss": 0.1137, "step": 6415 }, { "epoch": 0.5911457133643525, "grad_norm": 0.935421361088011, "learning_rate": 1.957392958304285e-06, "loss": 0.1302, "step": 6416 }, { "epoch": 0.5912378495416225, "grad_norm": 0.9578568430913035, "learning_rate": 1.9566493599287103e-06, "loss": 0.1378, "step": 6417 }, { "epoch": 0.5913299857188925, "grad_norm": 0.8990248820513408, "learning_rate": 1.9559058120032997e-06, "loss": 0.1172, "step": 6418 }, { "epoch": 0.5914221218961625, "grad_norm": 0.9364115670604503, "learning_rate": 1.955162314597094e-06, "loss": 0.1382, "step": 6419 }, { "epoch": 0.5915142580734325, "grad_norm": 0.9513877830712206, "learning_rate": 1.9544188677791253e-06, "loss": 0.1395, "step": 6420 }, { "epoch": 0.5916063942507025, "grad_norm": 0.9556704700985831, "learning_rate": 1.9536754716184244e-06, "loss": 0.1324, "step": 6421 }, { "epoch": 0.5916985304279725, "grad_norm": 0.9350415543063032, "learning_rate": 1.9529321261840148e-06, "loss": 0.1286, "step": 6422 }, { "epoch": 0.5917906666052426, "grad_norm": 0.9438832953394088, "learning_rate": 1.952188831544915e-06, "loss": 0.1425, "step": 6423 }, { "epoch": 0.5918828027825126, "grad_norm": 0.9351384678963927, "learning_rate": 1.951445587770142e-06, "loss": 0.1236, "step": 6424 }, { "epoch": 0.5919749389597826, "grad_norm": 0.9389889457570394, "learning_rate": 1.9507023949287045e-06, "loss": 0.135, "step": 6425 }, { "epoch": 0.5920670751370526, "grad_norm": 0.8928792043876652, "learning_rate": 1.94995925308961e-06, "loss": 0.1191, "step": 6426 }, { "epoch": 0.5921592113143226, "grad_norm": 0.907511539500844, "learning_rate": 1.9492161623218576e-06, "loss": 0.1347, "step": 6427 }, { "epoch": 0.5922513474915926, "grad_norm": 0.9123849674907245, "learning_rate": 1.9484731226944427e-06, "loss": 0.1301, "step": 6428 }, { "epoch": 0.5923434836688626, "grad_norm": 0.8943152512986418, "learning_rate": 1.9477301342763587e-06, "loss": 0.1356, "step": 6429 }, { "epoch": 0.5924356198461326, "grad_norm": 0.968239050353456, "learning_rate": 1.946987197136592e-06, "loss": 0.1356, "step": 6430 }, { "epoch": 0.5925277560234026, "grad_norm": 0.9555806245021822, "learning_rate": 1.946244311344122e-06, "loss": 0.1442, "step": 6431 }, { "epoch": 0.5926198922006726, "grad_norm": 0.9023064613208227, "learning_rate": 1.945501476967928e-06, "loss": 0.1334, "step": 6432 }, { "epoch": 0.5927120283779426, "grad_norm": 0.8772285953239736, "learning_rate": 1.9447586940769808e-06, "loss": 0.1251, "step": 6433 }, { "epoch": 0.5928041645552126, "grad_norm": 0.8673699715919908, "learning_rate": 1.9440159627402497e-06, "loss": 0.1224, "step": 6434 }, { "epoch": 0.5928963007324826, "grad_norm": 0.8963161155771975, "learning_rate": 1.9432732830266958e-06, "loss": 0.1314, "step": 6435 }, { "epoch": 0.5929884369097527, "grad_norm": 0.9320802532189435, "learning_rate": 1.9425306550052774e-06, "loss": 0.1438, "step": 6436 }, { "epoch": 0.5930805730870227, "grad_norm": 0.9223967293003285, "learning_rate": 1.9417880787449476e-06, "loss": 0.1359, "step": 6437 }, { "epoch": 0.5931727092642927, "grad_norm": 0.8711183973077686, "learning_rate": 1.9410455543146554e-06, "loss": 0.1305, "step": 6438 }, { "epoch": 0.5932648454415627, "grad_norm": 0.9242491659975414, "learning_rate": 1.9403030817833428e-06, "loss": 0.134, "step": 6439 }, { "epoch": 0.5933569816188327, "grad_norm": 0.91941387536559, "learning_rate": 1.93956066121995e-06, "loss": 0.1367, "step": 6440 }, { "epoch": 0.5934491177961027, "grad_norm": 0.948051439287453, "learning_rate": 1.938818292693409e-06, "loss": 0.1379, "step": 6441 }, { "epoch": 0.5935412539733727, "grad_norm": 0.8998731076445318, "learning_rate": 1.9380759762726512e-06, "loss": 0.1439, "step": 6442 }, { "epoch": 0.5936333901506426, "grad_norm": 0.8641438471342882, "learning_rate": 1.9373337120265993e-06, "loss": 0.1262, "step": 6443 }, { "epoch": 0.5937255263279126, "grad_norm": 0.9469896305094141, "learning_rate": 1.936591500024172e-06, "loss": 0.1448, "step": 6444 }, { "epoch": 0.5938176625051826, "grad_norm": 0.9327091962545253, "learning_rate": 1.935849340334285e-06, "loss": 0.1316, "step": 6445 }, { "epoch": 0.5939097986824526, "grad_norm": 0.9333644067409109, "learning_rate": 1.9351072330258483e-06, "loss": 0.1345, "step": 6446 }, { "epoch": 0.5940019348597226, "grad_norm": 0.9562785312219442, "learning_rate": 1.9343651781677648e-06, "loss": 0.142, "step": 6447 }, { "epoch": 0.5940940710369926, "grad_norm": 0.9451689710704071, "learning_rate": 1.933623175828935e-06, "loss": 0.1385, "step": 6448 }, { "epoch": 0.5941862072142626, "grad_norm": 0.9044973124651259, "learning_rate": 1.932881226078255e-06, "loss": 0.1287, "step": 6449 }, { "epoch": 0.5942783433915327, "grad_norm": 0.9583553094337743, "learning_rate": 1.932139328984614e-06, "loss": 0.1265, "step": 6450 }, { "epoch": 0.5943704795688027, "grad_norm": 0.9073283509605476, "learning_rate": 1.931397484616898e-06, "loss": 0.1322, "step": 6451 }, { "epoch": 0.5944626157460727, "grad_norm": 0.918641355087958, "learning_rate": 1.9306556930439857e-06, "loss": 0.1389, "step": 6452 }, { "epoch": 0.5945547519233427, "grad_norm": 0.9440290388840534, "learning_rate": 1.929913954334754e-06, "loss": 0.1404, "step": 6453 }, { "epoch": 0.5946468881006127, "grad_norm": 0.9119563904232301, "learning_rate": 1.929172268558073e-06, "loss": 0.1386, "step": 6454 }, { "epoch": 0.5947390242778827, "grad_norm": 0.8866487591480595, "learning_rate": 1.928430635782809e-06, "loss": 0.1194, "step": 6455 }, { "epoch": 0.5948311604551527, "grad_norm": 0.8549168890829434, "learning_rate": 1.9276890560778215e-06, "loss": 0.1148, "step": 6456 }, { "epoch": 0.5949232966324227, "grad_norm": 0.9034668553279024, "learning_rate": 1.9269475295119663e-06, "loss": 0.1354, "step": 6457 }, { "epoch": 0.5950154328096927, "grad_norm": 0.8658038474446961, "learning_rate": 1.9262060561540946e-06, "loss": 0.1336, "step": 6458 }, { "epoch": 0.5951075689869627, "grad_norm": 0.8666140071243187, "learning_rate": 1.9254646360730533e-06, "loss": 0.1215, "step": 6459 }, { "epoch": 0.5951997051642327, "grad_norm": 0.8869437410154752, "learning_rate": 1.9247232693376815e-06, "loss": 0.1241, "step": 6460 }, { "epoch": 0.5952918413415027, "grad_norm": 0.9344054957254118, "learning_rate": 1.9239819560168165e-06, "loss": 0.1386, "step": 6461 }, { "epoch": 0.5953839775187727, "grad_norm": 0.9095798537916769, "learning_rate": 1.9232406961792884e-06, "loss": 0.1361, "step": 6462 }, { "epoch": 0.5954761136960427, "grad_norm": 0.8624089349049318, "learning_rate": 1.9224994898939247e-06, "loss": 0.1223, "step": 6463 }, { "epoch": 0.5955682498733128, "grad_norm": 0.861278948017207, "learning_rate": 1.9217583372295446e-06, "loss": 0.1257, "step": 6464 }, { "epoch": 0.5956603860505828, "grad_norm": 0.9454217068596863, "learning_rate": 1.921017238254965e-06, "loss": 0.145, "step": 6465 }, { "epoch": 0.5957525222278528, "grad_norm": 0.9079234970512026, "learning_rate": 1.920276193038997e-06, "loss": 0.1299, "step": 6466 }, { "epoch": 0.5958446584051228, "grad_norm": 0.9043646425298468, "learning_rate": 1.9195352016504486e-06, "loss": 0.1288, "step": 6467 }, { "epoch": 0.5959367945823928, "grad_norm": 0.9208332337385433, "learning_rate": 1.9187942641581174e-06, "loss": 0.1367, "step": 6468 }, { "epoch": 0.5960289307596628, "grad_norm": 0.8611028175581442, "learning_rate": 1.9180533806308017e-06, "loss": 0.1256, "step": 6469 }, { "epoch": 0.5961210669369328, "grad_norm": 0.9491166063148969, "learning_rate": 1.9173125511372923e-06, "loss": 0.1349, "step": 6470 }, { "epoch": 0.5962132031142028, "grad_norm": 0.9318622192071003, "learning_rate": 1.916571775746376e-06, "loss": 0.1375, "step": 6471 }, { "epoch": 0.5963053392914728, "grad_norm": 0.9168563890482114, "learning_rate": 1.915831054526832e-06, "loss": 0.1412, "step": 6472 }, { "epoch": 0.5963974754687428, "grad_norm": 0.883224161497809, "learning_rate": 1.915090387547438e-06, "loss": 0.1223, "step": 6473 }, { "epoch": 0.5964896116460128, "grad_norm": 0.8794085943148341, "learning_rate": 1.914349774876964e-06, "loss": 0.1307, "step": 6474 }, { "epoch": 0.5965817478232828, "grad_norm": 0.9169286039253907, "learning_rate": 1.9136092165841776e-06, "loss": 0.1385, "step": 6475 }, { "epoch": 0.5966738840005528, "grad_norm": 0.8934819174982507, "learning_rate": 1.9128687127378376e-06, "loss": 0.1324, "step": 6476 }, { "epoch": 0.5967660201778229, "grad_norm": 0.9454788969073651, "learning_rate": 1.9121282634067008e-06, "loss": 0.1299, "step": 6477 }, { "epoch": 0.5968581563550929, "grad_norm": 0.9455512022162309, "learning_rate": 1.911387868659518e-06, "loss": 0.1314, "step": 6478 }, { "epoch": 0.5969502925323629, "grad_norm": 0.8730096720038547, "learning_rate": 1.9106475285650345e-06, "loss": 0.1144, "step": 6479 }, { "epoch": 0.5970424287096329, "grad_norm": 0.9349386390282505, "learning_rate": 1.909907243191993e-06, "loss": 0.1352, "step": 6480 }, { "epoch": 0.5971345648869029, "grad_norm": 0.9006142107363241, "learning_rate": 1.9091670126091264e-06, "loss": 0.1368, "step": 6481 }, { "epoch": 0.5972267010641729, "grad_norm": 0.9440688968299398, "learning_rate": 1.908426836885166e-06, "loss": 0.1277, "step": 6482 }, { "epoch": 0.5973188372414429, "grad_norm": 0.9115356328181138, "learning_rate": 1.907686716088838e-06, "loss": 0.13, "step": 6483 }, { "epoch": 0.5974109734187129, "grad_norm": 0.9107915908099802, "learning_rate": 1.9069466502888625e-06, "loss": 0.123, "step": 6484 }, { "epoch": 0.5975031095959829, "grad_norm": 0.9531637489514408, "learning_rate": 1.9062066395539535e-06, "loss": 0.1436, "step": 6485 }, { "epoch": 0.5975952457732528, "grad_norm": 0.9428193773075232, "learning_rate": 1.9054666839528225e-06, "loss": 0.1232, "step": 6486 }, { "epoch": 0.5976873819505228, "grad_norm": 0.9627495716809685, "learning_rate": 1.904726783554173e-06, "loss": 0.1331, "step": 6487 }, { "epoch": 0.5977795181277928, "grad_norm": 0.9517407487393909, "learning_rate": 1.903986938426707e-06, "loss": 0.1305, "step": 6488 }, { "epoch": 0.5978716543050628, "grad_norm": 0.9264042232053695, "learning_rate": 1.9032471486391175e-06, "loss": 0.1359, "step": 6489 }, { "epoch": 0.5979637904823328, "grad_norm": 0.904126855844614, "learning_rate": 1.9025074142600935e-06, "loss": 0.1244, "step": 6490 }, { "epoch": 0.5980559266596029, "grad_norm": 0.9136459862220759, "learning_rate": 1.9017677353583213e-06, "loss": 0.1312, "step": 6491 }, { "epoch": 0.5981480628368729, "grad_norm": 0.9007666304246773, "learning_rate": 1.90102811200248e-06, "loss": 0.1354, "step": 6492 }, { "epoch": 0.5982401990141429, "grad_norm": 0.9122491086990855, "learning_rate": 1.9002885442612413e-06, "loss": 0.1296, "step": 6493 }, { "epoch": 0.5983323351914129, "grad_norm": 0.8637309458864265, "learning_rate": 1.8995490322032767e-06, "loss": 0.1177, "step": 6494 }, { "epoch": 0.5984244713686829, "grad_norm": 0.9140446335579833, "learning_rate": 1.8988095758972485e-06, "loss": 0.126, "step": 6495 }, { "epoch": 0.5985166075459529, "grad_norm": 0.891049772866993, "learning_rate": 1.8980701754118168e-06, "loss": 0.1407, "step": 6496 }, { "epoch": 0.5986087437232229, "grad_norm": 0.9308146796900931, "learning_rate": 1.8973308308156337e-06, "loss": 0.146, "step": 6497 }, { "epoch": 0.5987008799004929, "grad_norm": 0.9217024801030724, "learning_rate": 1.8965915421773473e-06, "loss": 0.1385, "step": 6498 }, { "epoch": 0.5987930160777629, "grad_norm": 0.9446524923822498, "learning_rate": 1.8958523095656016e-06, "loss": 0.1264, "step": 6499 }, { "epoch": 0.5988851522550329, "grad_norm": 0.87348994025732, "learning_rate": 1.8951131330490347e-06, "loss": 0.1209, "step": 6500 }, { "epoch": 0.5988851522550329, "eval_loss": 0.1315893828868866, "eval_runtime": 299.1326, "eval_samples_per_second": 23.458, "eval_steps_per_second": 2.935, "step": 6500 }, { "epoch": 0.5989772884323029, "grad_norm": 0.890060441599928, "learning_rate": 1.8943740126962774e-06, "loss": 0.1333, "step": 6501 }, { "epoch": 0.5990694246095729, "grad_norm": 0.8535593592292823, "learning_rate": 1.8936349485759586e-06, "loss": 0.1216, "step": 6502 }, { "epoch": 0.5991615607868429, "grad_norm": 0.8947532542781077, "learning_rate": 1.8928959407566994e-06, "loss": 0.1378, "step": 6503 }, { "epoch": 0.599253696964113, "grad_norm": 0.8657797376728128, "learning_rate": 1.8921569893071187e-06, "loss": 0.1238, "step": 6504 }, { "epoch": 0.599345833141383, "grad_norm": 0.8597590797050176, "learning_rate": 1.8914180942958265e-06, "loss": 0.1328, "step": 6505 }, { "epoch": 0.599437969318653, "grad_norm": 0.9403836820908409, "learning_rate": 1.890679255791429e-06, "loss": 0.1387, "step": 6506 }, { "epoch": 0.599530105495923, "grad_norm": 0.92284449719563, "learning_rate": 1.8899404738625288e-06, "loss": 0.1357, "step": 6507 }, { "epoch": 0.599622241673193, "grad_norm": 0.8763065217252709, "learning_rate": 1.8892017485777208e-06, "loss": 0.1314, "step": 6508 }, { "epoch": 0.599714377850463, "grad_norm": 0.9386674388659384, "learning_rate": 1.8884630800055973e-06, "loss": 0.1223, "step": 6509 }, { "epoch": 0.599806514027733, "grad_norm": 0.9215754757081813, "learning_rate": 1.8877244682147419e-06, "loss": 0.1371, "step": 6510 }, { "epoch": 0.599898650205003, "grad_norm": 0.8830249289667409, "learning_rate": 1.886985913273735e-06, "loss": 0.1346, "step": 6511 }, { "epoch": 0.599990786382273, "grad_norm": 1.0090984121003168, "learning_rate": 1.8862474152511529e-06, "loss": 0.1506, "step": 6512 }, { "epoch": 0.600082922559543, "grad_norm": 0.8890967804641455, "learning_rate": 1.8855089742155647e-06, "loss": 0.1314, "step": 6513 }, { "epoch": 0.600175058736813, "grad_norm": 0.9269506146509996, "learning_rate": 1.8847705902355332e-06, "loss": 0.1349, "step": 6514 }, { "epoch": 0.600267194914083, "grad_norm": 0.889886502404544, "learning_rate": 1.8840322633796191e-06, "loss": 0.126, "step": 6515 }, { "epoch": 0.600359331091353, "grad_norm": 0.9002338360481987, "learning_rate": 1.8832939937163753e-06, "loss": 0.1241, "step": 6516 }, { "epoch": 0.600451467268623, "grad_norm": 0.9666422702216696, "learning_rate": 1.8825557813143513e-06, "loss": 0.1313, "step": 6517 }, { "epoch": 0.6005436034458931, "grad_norm": 0.8794516400849002, "learning_rate": 1.8818176262420893e-06, "loss": 0.1217, "step": 6518 }, { "epoch": 0.6006357396231631, "grad_norm": 0.8712755680659606, "learning_rate": 1.8810795285681263e-06, "loss": 0.1133, "step": 6519 }, { "epoch": 0.6007278758004331, "grad_norm": 0.9428832972839667, "learning_rate": 1.8803414883609967e-06, "loss": 0.1384, "step": 6520 }, { "epoch": 0.6008200119777031, "grad_norm": 0.8541956118384897, "learning_rate": 1.8796035056892268e-06, "loss": 0.1218, "step": 6521 }, { "epoch": 0.6009121481549731, "grad_norm": 0.9310338659977105, "learning_rate": 1.8788655806213372e-06, "loss": 0.1251, "step": 6522 }, { "epoch": 0.6010042843322431, "grad_norm": 0.9325252258734855, "learning_rate": 1.8781277132258458e-06, "loss": 0.1349, "step": 6523 }, { "epoch": 0.6010964205095131, "grad_norm": 0.95634726705644, "learning_rate": 1.8773899035712622e-06, "loss": 0.1349, "step": 6524 }, { "epoch": 0.6011885566867831, "grad_norm": 0.904487386830463, "learning_rate": 1.8766521517260946e-06, "loss": 0.1216, "step": 6525 }, { "epoch": 0.6012806928640531, "grad_norm": 0.9604402323364885, "learning_rate": 1.875914457758841e-06, "loss": 0.1414, "step": 6526 }, { "epoch": 0.601372829041323, "grad_norm": 0.8887907178428888, "learning_rate": 1.8751768217379973e-06, "loss": 0.1283, "step": 6527 }, { "epoch": 0.601464965218593, "grad_norm": 0.9066603462086329, "learning_rate": 1.874439243732053e-06, "loss": 0.1317, "step": 6528 }, { "epoch": 0.601557101395863, "grad_norm": 0.9088621832752597, "learning_rate": 1.8737017238094926e-06, "loss": 0.1323, "step": 6529 }, { "epoch": 0.601649237573133, "grad_norm": 0.8955444159164336, "learning_rate": 1.8729642620387935e-06, "loss": 0.1264, "step": 6530 }, { "epoch": 0.601741373750403, "grad_norm": 0.9723213160380102, "learning_rate": 1.8722268584884312e-06, "loss": 0.1385, "step": 6531 }, { "epoch": 0.6018335099276731, "grad_norm": 0.9220388614910245, "learning_rate": 1.8714895132268718e-06, "loss": 0.1387, "step": 6532 }, { "epoch": 0.6019256461049431, "grad_norm": 0.9805865380748094, "learning_rate": 1.8707522263225797e-06, "loss": 0.1388, "step": 6533 }, { "epoch": 0.6020177822822131, "grad_norm": 0.9279629753238866, "learning_rate": 1.8700149978440105e-06, "loss": 0.1287, "step": 6534 }, { "epoch": 0.6021099184594831, "grad_norm": 0.9464551237736833, "learning_rate": 1.8692778278596162e-06, "loss": 0.1321, "step": 6535 }, { "epoch": 0.6022020546367531, "grad_norm": 0.9800861251562089, "learning_rate": 1.868540716437844e-06, "loss": 0.1338, "step": 6536 }, { "epoch": 0.6022941908140231, "grad_norm": 0.9347287257761518, "learning_rate": 1.8678036636471336e-06, "loss": 0.1269, "step": 6537 }, { "epoch": 0.6023863269912931, "grad_norm": 0.9972581832144547, "learning_rate": 1.867066669555922e-06, "loss": 0.1431, "step": 6538 }, { "epoch": 0.6024784631685631, "grad_norm": 0.9908423627612108, "learning_rate": 1.8663297342326381e-06, "loss": 0.1363, "step": 6539 }, { "epoch": 0.6025705993458331, "grad_norm": 0.897036984294902, "learning_rate": 1.8655928577457058e-06, "loss": 0.1231, "step": 6540 }, { "epoch": 0.6026627355231031, "grad_norm": 0.9047862414880307, "learning_rate": 1.8648560401635448e-06, "loss": 0.134, "step": 6541 }, { "epoch": 0.6027548717003731, "grad_norm": 0.9144810403633027, "learning_rate": 1.8641192815545705e-06, "loss": 0.1301, "step": 6542 }, { "epoch": 0.6028470078776431, "grad_norm": 0.9365388153587314, "learning_rate": 1.8633825819871881e-06, "loss": 0.1347, "step": 6543 }, { "epoch": 0.6029391440549131, "grad_norm": 0.8949914004485408, "learning_rate": 1.8626459415298012e-06, "loss": 0.122, "step": 6544 }, { "epoch": 0.6030312802321832, "grad_norm": 0.8895585861402004, "learning_rate": 1.8619093602508075e-06, "loss": 0.1173, "step": 6545 }, { "epoch": 0.6031234164094532, "grad_norm": 0.9070371139758455, "learning_rate": 1.8611728382185995e-06, "loss": 0.1338, "step": 6546 }, { "epoch": 0.6032155525867232, "grad_norm": 0.9223856940001541, "learning_rate": 1.860436375501561e-06, "loss": 0.1228, "step": 6547 }, { "epoch": 0.6033076887639932, "grad_norm": 0.9695663122827732, "learning_rate": 1.8596999721680743e-06, "loss": 0.138, "step": 6548 }, { "epoch": 0.6033998249412632, "grad_norm": 0.9665305664799301, "learning_rate": 1.858963628286513e-06, "loss": 0.136, "step": 6549 }, { "epoch": 0.6034919611185332, "grad_norm": 0.8799821475459378, "learning_rate": 1.8582273439252497e-06, "loss": 0.1295, "step": 6550 }, { "epoch": 0.6035840972958032, "grad_norm": 0.9107048517759243, "learning_rate": 1.8574911191526456e-06, "loss": 0.1199, "step": 6551 }, { "epoch": 0.6036762334730732, "grad_norm": 0.9273771857397388, "learning_rate": 1.85675495403706e-06, "loss": 0.1419, "step": 6552 }, { "epoch": 0.6037683696503432, "grad_norm": 0.879572291727904, "learning_rate": 1.8560188486468463e-06, "loss": 0.1257, "step": 6553 }, { "epoch": 0.6038605058276132, "grad_norm": 0.917819635353749, "learning_rate": 1.8552828030503528e-06, "loss": 0.1362, "step": 6554 }, { "epoch": 0.6039526420048832, "grad_norm": 0.854519148192027, "learning_rate": 1.854546817315919e-06, "loss": 0.1247, "step": 6555 }, { "epoch": 0.6040447781821532, "grad_norm": 0.9358624951200376, "learning_rate": 1.8538108915118833e-06, "loss": 0.1385, "step": 6556 }, { "epoch": 0.6041369143594232, "grad_norm": 0.8987979977343791, "learning_rate": 1.8530750257065752e-06, "loss": 0.1299, "step": 6557 }, { "epoch": 0.6042290505366932, "grad_norm": 0.8655912576757261, "learning_rate": 1.8523392199683218e-06, "loss": 0.1232, "step": 6558 }, { "epoch": 0.6043211867139633, "grad_norm": 0.8844496081591054, "learning_rate": 1.851603474365441e-06, "loss": 0.1381, "step": 6559 }, { "epoch": 0.6044133228912333, "grad_norm": 0.9974044514544727, "learning_rate": 1.8508677889662469e-06, "loss": 0.1425, "step": 6560 }, { "epoch": 0.6045054590685033, "grad_norm": 0.8670146001504918, "learning_rate": 1.850132163839049e-06, "loss": 0.1305, "step": 6561 }, { "epoch": 0.6045975952457733, "grad_norm": 0.8994301785848731, "learning_rate": 1.849396599052149e-06, "loss": 0.1161, "step": 6562 }, { "epoch": 0.6046897314230433, "grad_norm": 0.9049796994871221, "learning_rate": 1.848661094673846e-06, "loss": 0.126, "step": 6563 }, { "epoch": 0.6047818676003133, "grad_norm": 0.9022404281815567, "learning_rate": 1.8479256507724297e-06, "loss": 0.1187, "step": 6564 }, { "epoch": 0.6048740037775833, "grad_norm": 0.9916709809090737, "learning_rate": 1.8471902674161863e-06, "loss": 0.1475, "step": 6565 }, { "epoch": 0.6049661399548533, "grad_norm": 0.9205985919823508, "learning_rate": 1.8464549446733976e-06, "loss": 0.1355, "step": 6566 }, { "epoch": 0.6050582761321233, "grad_norm": 0.8901635257949637, "learning_rate": 1.8457196826123381e-06, "loss": 0.1254, "step": 6567 }, { "epoch": 0.6051504123093933, "grad_norm": 0.8943163998141479, "learning_rate": 1.8449844813012755e-06, "loss": 0.1238, "step": 6568 }, { "epoch": 0.6052425484866633, "grad_norm": 0.9384468453458241, "learning_rate": 1.8442493408084746e-06, "loss": 0.135, "step": 6569 }, { "epoch": 0.6053346846639333, "grad_norm": 0.8784833757143954, "learning_rate": 1.8435142612021929e-06, "loss": 0.1356, "step": 6570 }, { "epoch": 0.6054268208412033, "grad_norm": 0.9014973212837387, "learning_rate": 1.8427792425506833e-06, "loss": 0.1255, "step": 6571 }, { "epoch": 0.6055189570184734, "grad_norm": 0.9243350184650585, "learning_rate": 1.8420442849221915e-06, "loss": 0.136, "step": 6572 }, { "epoch": 0.6056110931957434, "grad_norm": 0.8652026511605855, "learning_rate": 1.8413093883849579e-06, "loss": 0.1181, "step": 6573 }, { "epoch": 0.6057032293730134, "grad_norm": 0.9563469319601445, "learning_rate": 1.840574553007219e-06, "loss": 0.1281, "step": 6574 }, { "epoch": 0.6057953655502833, "grad_norm": 0.8706380604782731, "learning_rate": 1.8398397788572046e-06, "loss": 0.1345, "step": 6575 }, { "epoch": 0.6058875017275533, "grad_norm": 0.9457792347195052, "learning_rate": 1.8391050660031364e-06, "loss": 0.142, "step": 6576 }, { "epoch": 0.6059796379048233, "grad_norm": 0.9546506661285495, "learning_rate": 1.8383704145132347e-06, "loss": 0.1328, "step": 6577 }, { "epoch": 0.6060717740820933, "grad_norm": 0.9360510999701172, "learning_rate": 1.8376358244557108e-06, "loss": 0.1405, "step": 6578 }, { "epoch": 0.6061639102593633, "grad_norm": 0.953871600525164, "learning_rate": 1.8369012958987728e-06, "loss": 0.1294, "step": 6579 }, { "epoch": 0.6062560464366333, "grad_norm": 0.9827266384723413, "learning_rate": 1.8361668289106204e-06, "loss": 0.1404, "step": 6580 }, { "epoch": 0.6063481826139033, "grad_norm": 0.9683587334877363, "learning_rate": 1.8354324235594488e-06, "loss": 0.1409, "step": 6581 }, { "epoch": 0.6064403187911733, "grad_norm": 0.8877532566082904, "learning_rate": 1.8346980799134485e-06, "loss": 0.1232, "step": 6582 }, { "epoch": 0.6065324549684433, "grad_norm": 0.9025550006506345, "learning_rate": 1.8339637980408038e-06, "loss": 0.1153, "step": 6583 }, { "epoch": 0.6066245911457133, "grad_norm": 0.9428207407116446, "learning_rate": 1.833229578009691e-06, "loss": 0.1303, "step": 6584 }, { "epoch": 0.6067167273229833, "grad_norm": 0.9556142056967836, "learning_rate": 1.8324954198882843e-06, "loss": 0.1438, "step": 6585 }, { "epoch": 0.6068088635002534, "grad_norm": 0.8780366492422997, "learning_rate": 1.831761323744749e-06, "loss": 0.1214, "step": 6586 }, { "epoch": 0.6069009996775234, "grad_norm": 0.9234513915076087, "learning_rate": 1.831027289647248e-06, "loss": 0.1257, "step": 6587 }, { "epoch": 0.6069931358547934, "grad_norm": 0.9163158474665248, "learning_rate": 1.8302933176639346e-06, "loss": 0.1326, "step": 6588 }, { "epoch": 0.6070852720320634, "grad_norm": 0.9169583283659786, "learning_rate": 1.829559407862958e-06, "loss": 0.1313, "step": 6589 }, { "epoch": 0.6071774082093334, "grad_norm": 0.9362982236980304, "learning_rate": 1.8288255603124632e-06, "loss": 0.1338, "step": 6590 }, { "epoch": 0.6072695443866034, "grad_norm": 0.9062618567697522, "learning_rate": 1.8280917750805865e-06, "loss": 0.1259, "step": 6591 }, { "epoch": 0.6073616805638734, "grad_norm": 0.8924801799490278, "learning_rate": 1.8273580522354622e-06, "loss": 0.1189, "step": 6592 }, { "epoch": 0.6074538167411434, "grad_norm": 0.925574310988602, "learning_rate": 1.8266243918452149e-06, "loss": 0.128, "step": 6593 }, { "epoch": 0.6075459529184134, "grad_norm": 0.9140715166358384, "learning_rate": 1.825890793977964e-06, "loss": 0.128, "step": 6594 }, { "epoch": 0.6076380890956834, "grad_norm": 0.9680327082562757, "learning_rate": 1.8251572587018268e-06, "loss": 0.1515, "step": 6595 }, { "epoch": 0.6077302252729534, "grad_norm": 0.9601890925112531, "learning_rate": 1.8244237860849108e-06, "loss": 0.1322, "step": 6596 }, { "epoch": 0.6078223614502234, "grad_norm": 0.8684119145542256, "learning_rate": 1.8236903761953178e-06, "loss": 0.1176, "step": 6597 }, { "epoch": 0.6079144976274934, "grad_norm": 0.8648843664557521, "learning_rate": 1.8229570291011468e-06, "loss": 0.1168, "step": 6598 }, { "epoch": 0.6080066338047634, "grad_norm": 0.9193053247872316, "learning_rate": 1.8222237448704882e-06, "loss": 0.1301, "step": 6599 }, { "epoch": 0.6080987699820335, "grad_norm": 0.92509832524853, "learning_rate": 1.8214905235714286e-06, "loss": 0.128, "step": 6600 }, { "epoch": 0.6081909061593035, "grad_norm": 0.8970623170235373, "learning_rate": 1.8207573652720467e-06, "loss": 0.1161, "step": 6601 }, { "epoch": 0.6082830423365735, "grad_norm": 0.8532064017979026, "learning_rate": 1.8200242700404159e-06, "loss": 0.1201, "step": 6602 }, { "epoch": 0.6083751785138435, "grad_norm": 0.9100390031276187, "learning_rate": 1.8192912379446048e-06, "loss": 0.1226, "step": 6603 }, { "epoch": 0.6084673146911135, "grad_norm": 0.9301923891417945, "learning_rate": 1.8185582690526765e-06, "loss": 0.1332, "step": 6604 }, { "epoch": 0.6085594508683835, "grad_norm": 0.9588130238961836, "learning_rate": 1.8178253634326854e-06, "loss": 0.1323, "step": 6605 }, { "epoch": 0.6086515870456535, "grad_norm": 0.9464922450102702, "learning_rate": 1.817092521152683e-06, "loss": 0.1243, "step": 6606 }, { "epoch": 0.6087437232229235, "grad_norm": 0.9661556183879925, "learning_rate": 1.816359742280713e-06, "loss": 0.1309, "step": 6607 }, { "epoch": 0.6088358594001935, "grad_norm": 0.8915377822004282, "learning_rate": 1.8156270268848155e-06, "loss": 0.1183, "step": 6608 }, { "epoch": 0.6089279955774635, "grad_norm": 0.9370935343576614, "learning_rate": 1.8148943750330216e-06, "loss": 0.1296, "step": 6609 }, { "epoch": 0.6090201317547335, "grad_norm": 0.9175329703834323, "learning_rate": 1.8141617867933586e-06, "loss": 0.1215, "step": 6610 }, { "epoch": 0.6091122679320035, "grad_norm": 0.939823774501705, "learning_rate": 1.813429262233848e-06, "loss": 0.1316, "step": 6611 }, { "epoch": 0.6092044041092735, "grad_norm": 0.9438374697151484, "learning_rate": 1.8126968014225044e-06, "loss": 0.1381, "step": 6612 }, { "epoch": 0.6092965402865436, "grad_norm": 0.9026520719916683, "learning_rate": 1.811964404427336e-06, "loss": 0.1357, "step": 6613 }, { "epoch": 0.6093886764638136, "grad_norm": 0.8733218006894725, "learning_rate": 1.811232071316347e-06, "loss": 0.1118, "step": 6614 }, { "epoch": 0.6094808126410836, "grad_norm": 0.8841490921267199, "learning_rate": 1.8104998021575337e-06, "loss": 0.1295, "step": 6615 }, { "epoch": 0.6095729488183536, "grad_norm": 0.9359678194340786, "learning_rate": 1.8097675970188894e-06, "loss": 0.1327, "step": 6616 }, { "epoch": 0.6096650849956236, "grad_norm": 0.9605463356795557, "learning_rate": 1.8090354559683972e-06, "loss": 0.1378, "step": 6617 }, { "epoch": 0.6097572211728935, "grad_norm": 0.9054243649757274, "learning_rate": 1.8083033790740368e-06, "loss": 0.1289, "step": 6618 }, { "epoch": 0.6098493573501635, "grad_norm": 0.8842290378463926, "learning_rate": 1.8075713664037823e-06, "loss": 0.1289, "step": 6619 }, { "epoch": 0.6099414935274335, "grad_norm": 0.8947640414559052, "learning_rate": 1.806839418025601e-06, "loss": 0.1275, "step": 6620 }, { "epoch": 0.6100336297047035, "grad_norm": 0.921951402811935, "learning_rate": 1.8061075340074556e-06, "loss": 0.1297, "step": 6621 }, { "epoch": 0.6101257658819735, "grad_norm": 0.9294620374457182, "learning_rate": 1.8053757144172987e-06, "loss": 0.1237, "step": 6622 }, { "epoch": 0.6102179020592435, "grad_norm": 0.8834759916422078, "learning_rate": 1.8046439593230822e-06, "loss": 0.1147, "step": 6623 }, { "epoch": 0.6103100382365135, "grad_norm": 0.9385060723312659, "learning_rate": 1.8039122687927485e-06, "loss": 0.1386, "step": 6624 }, { "epoch": 0.6104021744137835, "grad_norm": 0.9310030772186891, "learning_rate": 1.803180642894236e-06, "loss": 0.1389, "step": 6625 }, { "epoch": 0.6104943105910535, "grad_norm": 0.8966613217875679, "learning_rate": 1.802449081695476e-06, "loss": 0.1269, "step": 6626 }, { "epoch": 0.6105864467683236, "grad_norm": 0.8572229985519388, "learning_rate": 1.801717585264393e-06, "loss": 0.1282, "step": 6627 }, { "epoch": 0.6106785829455936, "grad_norm": 0.930161237084897, "learning_rate": 1.800986153668908e-06, "loss": 0.125, "step": 6628 }, { "epoch": 0.6107707191228636, "grad_norm": 0.9925047377703466, "learning_rate": 1.8002547869769344e-06, "loss": 0.1297, "step": 6629 }, { "epoch": 0.6108628553001336, "grad_norm": 0.8971787402961252, "learning_rate": 1.7995234852563779e-06, "loss": 0.1228, "step": 6630 }, { "epoch": 0.6109549914774036, "grad_norm": 0.9592466914464529, "learning_rate": 1.7987922485751418e-06, "loss": 0.1383, "step": 6631 }, { "epoch": 0.6110471276546736, "grad_norm": 0.9101582801987562, "learning_rate": 1.7980610770011203e-06, "loss": 0.1217, "step": 6632 }, { "epoch": 0.6111392638319436, "grad_norm": 0.9388719801247791, "learning_rate": 1.7973299706022046e-06, "loss": 0.1289, "step": 6633 }, { "epoch": 0.6112314000092136, "grad_norm": 0.8983598124593635, "learning_rate": 1.796598929446276e-06, "loss": 0.1196, "step": 6634 }, { "epoch": 0.6113235361864836, "grad_norm": 0.9182147997516233, "learning_rate": 1.7958679536012118e-06, "loss": 0.1207, "step": 6635 }, { "epoch": 0.6114156723637536, "grad_norm": 0.8619585830126839, "learning_rate": 1.795137043134885e-06, "loss": 0.1232, "step": 6636 }, { "epoch": 0.6115078085410236, "grad_norm": 1.0263119069785722, "learning_rate": 1.7944061981151598e-06, "loss": 0.1374, "step": 6637 }, { "epoch": 0.6115999447182936, "grad_norm": 0.9085278130780867, "learning_rate": 1.793675418609894e-06, "loss": 0.124, "step": 6638 }, { "epoch": 0.6116920808955636, "grad_norm": 0.890303263816946, "learning_rate": 1.792944704686942e-06, "loss": 0.1253, "step": 6639 }, { "epoch": 0.6117842170728337, "grad_norm": 0.8922482946265187, "learning_rate": 1.79221405641415e-06, "loss": 0.1204, "step": 6640 }, { "epoch": 0.6118763532501037, "grad_norm": 0.9219285468355103, "learning_rate": 1.79148347385936e-06, "loss": 0.1339, "step": 6641 }, { "epoch": 0.6119684894273737, "grad_norm": 0.904392406416535, "learning_rate": 1.790752957090405e-06, "loss": 0.1285, "step": 6642 }, { "epoch": 0.6120606256046437, "grad_norm": 0.9040831409584187, "learning_rate": 1.7900225061751136e-06, "loss": 0.1305, "step": 6643 }, { "epoch": 0.6121527617819137, "grad_norm": 0.8813255919074837, "learning_rate": 1.78929212118131e-06, "loss": 0.1205, "step": 6644 }, { "epoch": 0.6122448979591837, "grad_norm": 0.8850407998747193, "learning_rate": 1.7885618021768097e-06, "loss": 0.1253, "step": 6645 }, { "epoch": 0.6123370341364537, "grad_norm": 0.9142336900388941, "learning_rate": 1.7878315492294213e-06, "loss": 0.1248, "step": 6646 }, { "epoch": 0.6124291703137237, "grad_norm": 0.9290446184856952, "learning_rate": 1.7871013624069511e-06, "loss": 0.1391, "step": 6647 }, { "epoch": 0.6125213064909937, "grad_norm": 0.9968257121332454, "learning_rate": 1.7863712417771956e-06, "loss": 0.1348, "step": 6648 }, { "epoch": 0.6126134426682637, "grad_norm": 0.9863639721347642, "learning_rate": 1.7856411874079475e-06, "loss": 0.1402, "step": 6649 }, { "epoch": 0.6127055788455337, "grad_norm": 0.9698492821648672, "learning_rate": 1.7849111993669924e-06, "loss": 0.1345, "step": 6650 }, { "epoch": 0.6127977150228037, "grad_norm": 0.9558952945055265, "learning_rate": 1.7841812777221085e-06, "loss": 0.1246, "step": 6651 }, { "epoch": 0.6128898512000737, "grad_norm": 0.9316749933029452, "learning_rate": 1.7834514225410704e-06, "loss": 0.1283, "step": 6652 }, { "epoch": 0.6129819873773437, "grad_norm": 0.9578132894362446, "learning_rate": 1.7827216338916444e-06, "loss": 0.1362, "step": 6653 }, { "epoch": 0.6130741235546138, "grad_norm": 0.9942396128253892, "learning_rate": 1.781991911841593e-06, "loss": 0.1384, "step": 6654 }, { "epoch": 0.6131662597318838, "grad_norm": 0.8876883802815898, "learning_rate": 1.7812622564586695e-06, "loss": 0.1197, "step": 6655 }, { "epoch": 0.6132583959091538, "grad_norm": 0.9207165138441854, "learning_rate": 1.7805326678106221e-06, "loss": 0.1306, "step": 6656 }, { "epoch": 0.6133505320864238, "grad_norm": 0.8709068748396972, "learning_rate": 1.7798031459651942e-06, "loss": 0.1205, "step": 6657 }, { "epoch": 0.6134426682636938, "grad_norm": 0.8609800545742414, "learning_rate": 1.7790736909901228e-06, "loss": 0.1251, "step": 6658 }, { "epoch": 0.6135348044409638, "grad_norm": 0.8481561432664178, "learning_rate": 1.7783443029531352e-06, "loss": 0.1252, "step": 6659 }, { "epoch": 0.6136269406182338, "grad_norm": 0.9299442690236165, "learning_rate": 1.7776149819219574e-06, "loss": 0.1355, "step": 6660 }, { "epoch": 0.6137190767955037, "grad_norm": 0.9289067001977687, "learning_rate": 1.776885727964306e-06, "loss": 0.1263, "step": 6661 }, { "epoch": 0.6138112129727737, "grad_norm": 0.8770126865790537, "learning_rate": 1.7761565411478935e-06, "loss": 0.1341, "step": 6662 }, { "epoch": 0.6139033491500437, "grad_norm": 0.8953296834576114, "learning_rate": 1.7754274215404234e-06, "loss": 0.139, "step": 6663 }, { "epoch": 0.6139954853273137, "grad_norm": 0.9011900760471201, "learning_rate": 1.7746983692095947e-06, "loss": 0.1273, "step": 6664 }, { "epoch": 0.6140876215045837, "grad_norm": 0.9071509181765444, "learning_rate": 1.7739693842231008e-06, "loss": 0.1192, "step": 6665 }, { "epoch": 0.6141797576818537, "grad_norm": 0.953660969909198, "learning_rate": 1.7732404666486289e-06, "loss": 0.1402, "step": 6666 }, { "epoch": 0.6142718938591237, "grad_norm": 0.9647631217108921, "learning_rate": 1.7725116165538564e-06, "loss": 0.1391, "step": 6667 }, { "epoch": 0.6143640300363938, "grad_norm": 0.8881822165996579, "learning_rate": 1.7717828340064592e-06, "loss": 0.1262, "step": 6668 }, { "epoch": 0.6144561662136638, "grad_norm": 0.9178819920338384, "learning_rate": 1.7710541190741037e-06, "loss": 0.1242, "step": 6669 }, { "epoch": 0.6145483023909338, "grad_norm": 0.9312720344544685, "learning_rate": 1.7703254718244525e-06, "loss": 0.1339, "step": 6670 }, { "epoch": 0.6146404385682038, "grad_norm": 0.9442221013026411, "learning_rate": 1.7695968923251593e-06, "loss": 0.1309, "step": 6671 }, { "epoch": 0.6147325747454738, "grad_norm": 0.9321429789620098, "learning_rate": 1.7688683806438731e-06, "loss": 0.1339, "step": 6672 }, { "epoch": 0.6148247109227438, "grad_norm": 0.9000161388757578, "learning_rate": 1.7681399368482367e-06, "loss": 0.1306, "step": 6673 }, { "epoch": 0.6149168471000138, "grad_norm": 0.9606303166573105, "learning_rate": 1.7674115610058864e-06, "loss": 0.133, "step": 6674 }, { "epoch": 0.6150089832772838, "grad_norm": 0.9437709741521769, "learning_rate": 1.7666832531844508e-06, "loss": 0.1258, "step": 6675 }, { "epoch": 0.6151011194545538, "grad_norm": 0.9434879871809514, "learning_rate": 1.765955013451554e-06, "loss": 0.1313, "step": 6676 }, { "epoch": 0.6151932556318238, "grad_norm": 0.9881526594306695, "learning_rate": 1.765226841874813e-06, "loss": 0.1447, "step": 6677 }, { "epoch": 0.6152853918090938, "grad_norm": 0.9328846527215926, "learning_rate": 1.7644987385218395e-06, "loss": 0.1345, "step": 6678 }, { "epoch": 0.6153775279863638, "grad_norm": 0.9727818346921414, "learning_rate": 1.7637707034602378e-06, "loss": 0.1464, "step": 6679 }, { "epoch": 0.6154696641636338, "grad_norm": 0.9213397997962777, "learning_rate": 1.7630427367576042e-06, "loss": 0.131, "step": 6680 }, { "epoch": 0.6155618003409039, "grad_norm": 0.9437472003416133, "learning_rate": 1.7623148384815326e-06, "loss": 0.1251, "step": 6681 }, { "epoch": 0.6156539365181739, "grad_norm": 0.9166929533684961, "learning_rate": 1.7615870086996067e-06, "loss": 0.1384, "step": 6682 }, { "epoch": 0.6157460726954439, "grad_norm": 0.9249383528462048, "learning_rate": 1.7608592474794078e-06, "loss": 0.1295, "step": 6683 }, { "epoch": 0.6158382088727139, "grad_norm": 0.9679979891312327, "learning_rate": 1.760131554888507e-06, "loss": 0.1295, "step": 6684 }, { "epoch": 0.6159303450499839, "grad_norm": 0.9300305618407569, "learning_rate": 1.7594039309944702e-06, "loss": 0.1392, "step": 6685 }, { "epoch": 0.6160224812272539, "grad_norm": 0.9864960844070007, "learning_rate": 1.7586763758648587e-06, "loss": 0.1485, "step": 6686 }, { "epoch": 0.6161146174045239, "grad_norm": 0.91785390219516, "learning_rate": 1.757948889567226e-06, "loss": 0.1287, "step": 6687 }, { "epoch": 0.6162067535817939, "grad_norm": 0.8841484437270067, "learning_rate": 1.7572214721691178e-06, "loss": 0.1213, "step": 6688 }, { "epoch": 0.6162988897590639, "grad_norm": 0.9395321082526199, "learning_rate": 1.7564941237380761e-06, "loss": 0.1293, "step": 6689 }, { "epoch": 0.6163910259363339, "grad_norm": 0.8857358471622105, "learning_rate": 1.7557668443416348e-06, "loss": 0.124, "step": 6690 }, { "epoch": 0.6164831621136039, "grad_norm": 0.903927286093644, "learning_rate": 1.755039634047323e-06, "loss": 0.1228, "step": 6691 }, { "epoch": 0.6165752982908739, "grad_norm": 0.9233988812330909, "learning_rate": 1.7543124929226608e-06, "loss": 0.1355, "step": 6692 }, { "epoch": 0.6166674344681439, "grad_norm": 0.9142573934713351, "learning_rate": 1.7535854210351635e-06, "loss": 0.1368, "step": 6693 }, { "epoch": 0.6167595706454139, "grad_norm": 0.8897398795942182, "learning_rate": 1.7528584184523407e-06, "loss": 0.1232, "step": 6694 }, { "epoch": 0.616851706822684, "grad_norm": 0.9684599266466104, "learning_rate": 1.7521314852416946e-06, "loss": 0.1412, "step": 6695 }, { "epoch": 0.616943842999954, "grad_norm": 0.8633355016137957, "learning_rate": 1.7514046214707195e-06, "loss": 0.1163, "step": 6696 }, { "epoch": 0.617035979177224, "grad_norm": 0.9556729220047715, "learning_rate": 1.7506778272069064e-06, "loss": 0.1381, "step": 6697 }, { "epoch": 0.617128115354494, "grad_norm": 0.8849642829869895, "learning_rate": 1.7499511025177373e-06, "loss": 0.1264, "step": 6698 }, { "epoch": 0.617220251531764, "grad_norm": 0.9186363005241283, "learning_rate": 1.7492244474706898e-06, "loss": 0.1423, "step": 6699 }, { "epoch": 0.617312387709034, "grad_norm": 0.8928578767583446, "learning_rate": 1.748497862133233e-06, "loss": 0.1265, "step": 6700 }, { "epoch": 0.617404523886304, "grad_norm": 0.8535895801436509, "learning_rate": 1.7477713465728296e-06, "loss": 0.1246, "step": 6701 }, { "epoch": 0.617496660063574, "grad_norm": 0.9062694992292313, "learning_rate": 1.747044900856938e-06, "loss": 0.1244, "step": 6702 }, { "epoch": 0.617588796240844, "grad_norm": 1.0026481306418769, "learning_rate": 1.7463185250530102e-06, "loss": 0.1309, "step": 6703 }, { "epoch": 0.617680932418114, "grad_norm": 0.9401301173996884, "learning_rate": 1.7455922192284864e-06, "loss": 0.1313, "step": 6704 }, { "epoch": 0.617773068595384, "grad_norm": 0.8910759203702324, "learning_rate": 1.7448659834508064e-06, "loss": 0.1298, "step": 6705 }, { "epoch": 0.6178652047726539, "grad_norm": 0.9252349150341446, "learning_rate": 1.7441398177874015e-06, "loss": 0.1308, "step": 6706 }, { "epoch": 0.6179573409499239, "grad_norm": 0.8962704413424406, "learning_rate": 1.7434137223056954e-06, "loss": 0.1253, "step": 6707 }, { "epoch": 0.618049477127194, "grad_norm": 0.9382417961474702, "learning_rate": 1.7426876970731076e-06, "loss": 0.1267, "step": 6708 }, { "epoch": 0.618141613304464, "grad_norm": 0.8782716906829233, "learning_rate": 1.7419617421570483e-06, "loss": 0.1298, "step": 6709 }, { "epoch": 0.618233749481734, "grad_norm": 0.8877730559324124, "learning_rate": 1.7412358576249222e-06, "loss": 0.1242, "step": 6710 }, { "epoch": 0.618325885659004, "grad_norm": 1.031141967978923, "learning_rate": 1.740510043544129e-06, "loss": 0.1433, "step": 6711 }, { "epoch": 0.618418021836274, "grad_norm": 0.8924542958205756, "learning_rate": 1.7397842999820605e-06, "loss": 0.1228, "step": 6712 }, { "epoch": 0.618510158013544, "grad_norm": 0.951354922791097, "learning_rate": 1.7390586270061005e-06, "loss": 0.1304, "step": 6713 }, { "epoch": 0.618602294190814, "grad_norm": 0.963162432927016, "learning_rate": 1.7383330246836294e-06, "loss": 0.1266, "step": 6714 }, { "epoch": 0.618694430368084, "grad_norm": 0.8984043647800838, "learning_rate": 1.737607493082018e-06, "loss": 0.1252, "step": 6715 }, { "epoch": 0.618786566545354, "grad_norm": 0.9279719290802259, "learning_rate": 1.7368820322686345e-06, "loss": 0.1294, "step": 6716 }, { "epoch": 0.618878702722624, "grad_norm": 0.952211601716075, "learning_rate": 1.7361566423108355e-06, "loss": 0.1312, "step": 6717 }, { "epoch": 0.618970838899894, "grad_norm": 0.8749680465755658, "learning_rate": 1.7354313232759745e-06, "loss": 0.1179, "step": 6718 }, { "epoch": 0.619062975077164, "grad_norm": 0.8577709338059307, "learning_rate": 1.7347060752313978e-06, "loss": 0.1235, "step": 6719 }, { "epoch": 0.619155111254434, "grad_norm": 0.955540440918245, "learning_rate": 1.7339808982444444e-06, "loss": 0.1394, "step": 6720 }, { "epoch": 0.619247247431704, "grad_norm": 0.9616325266472617, "learning_rate": 1.7332557923824463e-06, "loss": 0.1355, "step": 6721 }, { "epoch": 0.6193393836089741, "grad_norm": 0.9302492116295052, "learning_rate": 1.732530757712731e-06, "loss": 0.121, "step": 6722 }, { "epoch": 0.6194315197862441, "grad_norm": 0.8914909942723336, "learning_rate": 1.7318057943026169e-06, "loss": 0.1194, "step": 6723 }, { "epoch": 0.6195236559635141, "grad_norm": 0.8862393627090467, "learning_rate": 1.7310809022194184e-06, "loss": 0.1279, "step": 6724 }, { "epoch": 0.6196157921407841, "grad_norm": 0.8809958029695578, "learning_rate": 1.7303560815304404e-06, "loss": 0.1274, "step": 6725 }, { "epoch": 0.6197079283180541, "grad_norm": 0.9235688604500591, "learning_rate": 1.7296313323029825e-06, "loss": 0.129, "step": 6726 }, { "epoch": 0.6198000644953241, "grad_norm": 0.9150737133599645, "learning_rate": 1.7289066546043386e-06, "loss": 0.1189, "step": 6727 }, { "epoch": 0.6198922006725941, "grad_norm": 0.9212605410966181, "learning_rate": 1.7281820485017958e-06, "loss": 0.1359, "step": 6728 }, { "epoch": 0.6199843368498641, "grad_norm": 0.8655563428055735, "learning_rate": 1.7274575140626318e-06, "loss": 0.1287, "step": 6729 }, { "epoch": 0.6200764730271341, "grad_norm": 0.9011894322767224, "learning_rate": 1.726733051354121e-06, "loss": 0.1325, "step": 6730 }, { "epoch": 0.6201686092044041, "grad_norm": 0.8678920213692736, "learning_rate": 1.7260086604435295e-06, "loss": 0.1222, "step": 6731 }, { "epoch": 0.6202607453816741, "grad_norm": 0.9225506544414821, "learning_rate": 1.7252843413981176e-06, "loss": 0.1291, "step": 6732 }, { "epoch": 0.6203528815589441, "grad_norm": 0.8741405868411134, "learning_rate": 1.7245600942851378e-06, "loss": 0.1168, "step": 6733 }, { "epoch": 0.6204450177362141, "grad_norm": 0.88964218818895, "learning_rate": 1.7238359191718362e-06, "loss": 0.1158, "step": 6734 }, { "epoch": 0.6205371539134841, "grad_norm": 0.8860723797025962, "learning_rate": 1.7231118161254534e-06, "loss": 0.1201, "step": 6735 }, { "epoch": 0.6206292900907542, "grad_norm": 0.9866557709375795, "learning_rate": 1.7223877852132218e-06, "loss": 0.1349, "step": 6736 }, { "epoch": 0.6207214262680242, "grad_norm": 0.8892445642733556, "learning_rate": 1.721663826502369e-06, "loss": 0.1218, "step": 6737 }, { "epoch": 0.6208135624452942, "grad_norm": 0.9688094575884019, "learning_rate": 1.7209399400601128e-06, "loss": 0.1346, "step": 6738 }, { "epoch": 0.6209056986225642, "grad_norm": 0.926838705061562, "learning_rate": 1.720216125953667e-06, "loss": 0.1334, "step": 6739 }, { "epoch": 0.6209978347998342, "grad_norm": 0.9396059515548089, "learning_rate": 1.7194923842502382e-06, "loss": 0.1324, "step": 6740 }, { "epoch": 0.6210899709771042, "grad_norm": 0.9830328224533756, "learning_rate": 1.7187687150170257e-06, "loss": 0.1345, "step": 6741 }, { "epoch": 0.6211821071543742, "grad_norm": 0.9430353714861338, "learning_rate": 1.7180451183212217e-06, "loss": 0.1211, "step": 6742 }, { "epoch": 0.6212742433316442, "grad_norm": 0.9121922967727294, "learning_rate": 1.7173215942300125e-06, "loss": 0.1428, "step": 6743 }, { "epoch": 0.6213663795089142, "grad_norm": 0.8985273162486016, "learning_rate": 1.7165981428105771e-06, "loss": 0.14, "step": 6744 }, { "epoch": 0.6214585156861842, "grad_norm": 0.9074990241015248, "learning_rate": 1.71587476413009e-06, "loss": 0.1354, "step": 6745 }, { "epoch": 0.6215506518634542, "grad_norm": 0.8835713991487871, "learning_rate": 1.7151514582557144e-06, "loss": 0.1295, "step": 6746 }, { "epoch": 0.6216427880407241, "grad_norm": 0.9563138522145045, "learning_rate": 1.71442822525461e-06, "loss": 0.1472, "step": 6747 }, { "epoch": 0.6217349242179941, "grad_norm": 0.9397063902023289, "learning_rate": 1.71370506519393e-06, "loss": 0.1251, "step": 6748 }, { "epoch": 0.6218270603952643, "grad_norm": 0.8707621777233324, "learning_rate": 1.7129819781408197e-06, "loss": 0.1278, "step": 6749 }, { "epoch": 0.6219191965725342, "grad_norm": 0.8388729362903343, "learning_rate": 1.7122589641624166e-06, "loss": 0.1181, "step": 6750 }, { "epoch": 0.6220113327498042, "grad_norm": 0.9139806446176466, "learning_rate": 1.7115360233258537e-06, "loss": 0.1202, "step": 6751 }, { "epoch": 0.6221034689270742, "grad_norm": 0.9311401798162033, "learning_rate": 1.7108131556982554e-06, "loss": 0.1372, "step": 6752 }, { "epoch": 0.6221956051043442, "grad_norm": 0.9511568955126489, "learning_rate": 1.7100903613467419e-06, "loss": 0.1214, "step": 6753 }, { "epoch": 0.6222877412816142, "grad_norm": 0.9239186752009969, "learning_rate": 1.7093676403384223e-06, "loss": 0.1301, "step": 6754 }, { "epoch": 0.6223798774588842, "grad_norm": 0.913588956394364, "learning_rate": 1.7086449927404025e-06, "loss": 0.1243, "step": 6755 }, { "epoch": 0.6224720136361542, "grad_norm": 0.905711339806985, "learning_rate": 1.7079224186197804e-06, "loss": 0.1294, "step": 6756 }, { "epoch": 0.6225641498134242, "grad_norm": 0.9681244319244915, "learning_rate": 1.7071999180436477e-06, "loss": 0.133, "step": 6757 }, { "epoch": 0.6226562859906942, "grad_norm": 0.9155233722559435, "learning_rate": 1.7064774910790865e-06, "loss": 0.1234, "step": 6758 }, { "epoch": 0.6227484221679642, "grad_norm": 0.9435640978336151, "learning_rate": 1.7057551377931767e-06, "loss": 0.1307, "step": 6759 }, { "epoch": 0.6228405583452342, "grad_norm": 0.9545102508423661, "learning_rate": 1.705032858252987e-06, "loss": 0.1422, "step": 6760 }, { "epoch": 0.6229326945225042, "grad_norm": 0.9415499869138108, "learning_rate": 1.7043106525255831e-06, "loss": 0.1395, "step": 6761 }, { "epoch": 0.6230248306997742, "grad_norm": 0.905530428956616, "learning_rate": 1.70358852067802e-06, "loss": 0.1226, "step": 6762 }, { "epoch": 0.6231169668770443, "grad_norm": 0.9482156251625352, "learning_rate": 1.7028664627773483e-06, "loss": 0.138, "step": 6763 }, { "epoch": 0.6232091030543143, "grad_norm": 0.8947426819048082, "learning_rate": 1.7021444788906117e-06, "loss": 0.1271, "step": 6764 }, { "epoch": 0.6233012392315843, "grad_norm": 0.9270692925867124, "learning_rate": 1.7014225690848458e-06, "loss": 0.1285, "step": 6765 }, { "epoch": 0.6233933754088543, "grad_norm": 0.8786188858935942, "learning_rate": 1.7007007334270809e-06, "loss": 0.1205, "step": 6766 }, { "epoch": 0.6234855115861243, "grad_norm": 0.9163708159636691, "learning_rate": 1.6999789719843388e-06, "loss": 0.1272, "step": 6767 }, { "epoch": 0.6235776477633943, "grad_norm": 0.8979496261233568, "learning_rate": 1.6992572848236343e-06, "loss": 0.1181, "step": 6768 }, { "epoch": 0.6236697839406643, "grad_norm": 0.9156614779990734, "learning_rate": 1.698535672011978e-06, "loss": 0.1341, "step": 6769 }, { "epoch": 0.6237619201179343, "grad_norm": 0.9391056956844751, "learning_rate": 1.6978141336163713e-06, "loss": 0.1263, "step": 6770 }, { "epoch": 0.6238540562952043, "grad_norm": 0.8858195058935551, "learning_rate": 1.6970926697038073e-06, "loss": 0.1247, "step": 6771 }, { "epoch": 0.6239461924724743, "grad_norm": 0.8735010962850701, "learning_rate": 1.6963712803412761e-06, "loss": 0.126, "step": 6772 }, { "epoch": 0.6240383286497443, "grad_norm": 0.9453628701261408, "learning_rate": 1.6956499655957577e-06, "loss": 0.1434, "step": 6773 }, { "epoch": 0.6241304648270143, "grad_norm": 0.9657158049888763, "learning_rate": 1.694928725534227e-06, "loss": 0.1342, "step": 6774 }, { "epoch": 0.6242226010042843, "grad_norm": 0.9759493120509639, "learning_rate": 1.6942075602236507e-06, "loss": 0.125, "step": 6775 }, { "epoch": 0.6243147371815544, "grad_norm": 0.9688092020766974, "learning_rate": 1.6934864697309883e-06, "loss": 0.1469, "step": 6776 }, { "epoch": 0.6244068733588244, "grad_norm": 0.9382127371598054, "learning_rate": 1.6927654541231941e-06, "loss": 0.1304, "step": 6777 }, { "epoch": 0.6244990095360944, "grad_norm": 0.9522838200564424, "learning_rate": 1.6920445134672162e-06, "loss": 0.126, "step": 6778 }, { "epoch": 0.6245911457133644, "grad_norm": 0.94904956281575, "learning_rate": 1.6913236478299906e-06, "loss": 0.1314, "step": 6779 }, { "epoch": 0.6246832818906344, "grad_norm": 0.9386781279946481, "learning_rate": 1.6906028572784511e-06, "loss": 0.1298, "step": 6780 }, { "epoch": 0.6247754180679044, "grad_norm": 0.9081504529487514, "learning_rate": 1.6898821418795237e-06, "loss": 0.1349, "step": 6781 }, { "epoch": 0.6248675542451744, "grad_norm": 0.8914673156367624, "learning_rate": 1.6891615017001272e-06, "loss": 0.1137, "step": 6782 }, { "epoch": 0.6249596904224444, "grad_norm": 0.891940546485863, "learning_rate": 1.6884409368071718e-06, "loss": 0.1231, "step": 6783 }, { "epoch": 0.6250518265997144, "grad_norm": 0.914512509293828, "learning_rate": 1.6877204472675634e-06, "loss": 0.1291, "step": 6784 }, { "epoch": 0.6251439627769844, "grad_norm": 0.8841333746458065, "learning_rate": 1.687000033148198e-06, "loss": 0.1289, "step": 6785 }, { "epoch": 0.6252360989542544, "grad_norm": 0.9418903021503365, "learning_rate": 1.686279694515968e-06, "loss": 0.1313, "step": 6786 }, { "epoch": 0.6253282351315244, "grad_norm": 0.8870373175179938, "learning_rate": 1.685559431437756e-06, "loss": 0.1176, "step": 6787 }, { "epoch": 0.6254203713087944, "grad_norm": 0.8973213859270821, "learning_rate": 1.6848392439804374e-06, "loss": 0.1247, "step": 6788 }, { "epoch": 0.6255125074860644, "grad_norm": 0.9056535481430396, "learning_rate": 1.6841191322108835e-06, "loss": 0.1316, "step": 6789 }, { "epoch": 0.6256046436633345, "grad_norm": 0.8611185014978975, "learning_rate": 1.6833990961959562e-06, "loss": 0.1105, "step": 6790 }, { "epoch": 0.6256967798406045, "grad_norm": 0.8714595196924181, "learning_rate": 1.6826791360025103e-06, "loss": 0.118, "step": 6791 }, { "epoch": 0.6257889160178745, "grad_norm": 0.939216708887262, "learning_rate": 1.6819592516973942e-06, "loss": 0.1294, "step": 6792 }, { "epoch": 0.6258810521951444, "grad_norm": 0.8805849507628835, "learning_rate": 1.6812394433474497e-06, "loss": 0.1303, "step": 6793 }, { "epoch": 0.6259731883724144, "grad_norm": 0.9904622658783916, "learning_rate": 1.6805197110195115e-06, "loss": 0.1366, "step": 6794 }, { "epoch": 0.6260653245496844, "grad_norm": 0.8452414433909419, "learning_rate": 1.6798000547804066e-06, "loss": 0.1138, "step": 6795 }, { "epoch": 0.6261574607269544, "grad_norm": 0.9647793184636386, "learning_rate": 1.6790804746969542e-06, "loss": 0.1318, "step": 6796 }, { "epoch": 0.6262495969042244, "grad_norm": 0.9190751074302138, "learning_rate": 1.6783609708359683e-06, "loss": 0.1272, "step": 6797 }, { "epoch": 0.6263417330814944, "grad_norm": 1.0135874834100627, "learning_rate": 1.677641543264254e-06, "loss": 0.1356, "step": 6798 }, { "epoch": 0.6264338692587644, "grad_norm": 0.9578041665275557, "learning_rate": 1.6769221920486123e-06, "loss": 0.1333, "step": 6799 }, { "epoch": 0.6265260054360344, "grad_norm": 0.9120272599831393, "learning_rate": 1.676202917255833e-06, "loss": 0.1236, "step": 6800 }, { "epoch": 0.6266181416133044, "grad_norm": 0.9545336241142, "learning_rate": 1.675483718952701e-06, "loss": 0.1505, "step": 6801 }, { "epoch": 0.6267102777905744, "grad_norm": 0.8996118102214581, "learning_rate": 1.6747645972059949e-06, "loss": 0.1156, "step": 6802 }, { "epoch": 0.6268024139678445, "grad_norm": 0.9780721556358306, "learning_rate": 1.6740455520824852e-06, "loss": 0.1311, "step": 6803 }, { "epoch": 0.6268945501451145, "grad_norm": 0.8756953274991542, "learning_rate": 1.673326583648934e-06, "loss": 0.1223, "step": 6804 }, { "epoch": 0.6269866863223845, "grad_norm": 0.9590846378967668, "learning_rate": 1.672607691972099e-06, "loss": 0.1327, "step": 6805 }, { "epoch": 0.6270788224996545, "grad_norm": 0.9725555167066449, "learning_rate": 1.671888877118728e-06, "loss": 0.1394, "step": 6806 }, { "epoch": 0.6271709586769245, "grad_norm": 0.9680149550710828, "learning_rate": 1.6711701391555654e-06, "loss": 0.1458, "step": 6807 }, { "epoch": 0.6272630948541945, "grad_norm": 0.9266144277449202, "learning_rate": 1.6704514781493439e-06, "loss": 0.1318, "step": 6808 }, { "epoch": 0.6273552310314645, "grad_norm": 0.9044064187078539, "learning_rate": 1.6697328941667911e-06, "loss": 0.1309, "step": 6809 }, { "epoch": 0.6274473672087345, "grad_norm": 0.9161445150421038, "learning_rate": 1.6690143872746295e-06, "loss": 0.1359, "step": 6810 }, { "epoch": 0.6275395033860045, "grad_norm": 0.9039846306537077, "learning_rate": 1.6682959575395717e-06, "loss": 0.13, "step": 6811 }, { "epoch": 0.6276316395632745, "grad_norm": 0.8368274819032405, "learning_rate": 1.6675776050283228e-06, "loss": 0.1117, "step": 6812 }, { "epoch": 0.6277237757405445, "grad_norm": 0.9653527218946011, "learning_rate": 1.666859329807583e-06, "loss": 0.1362, "step": 6813 }, { "epoch": 0.6278159119178145, "grad_norm": 0.9458104946736843, "learning_rate": 1.666141131944044e-06, "loss": 0.1323, "step": 6814 }, { "epoch": 0.6279080480950845, "grad_norm": 0.8872800410857472, "learning_rate": 1.6654230115043915e-06, "loss": 0.1275, "step": 6815 }, { "epoch": 0.6280001842723545, "grad_norm": 0.915733456696928, "learning_rate": 1.6647049685553018e-06, "loss": 0.1295, "step": 6816 }, { "epoch": 0.6280923204496246, "grad_norm": 0.9299225804478112, "learning_rate": 1.663987003163445e-06, "loss": 0.1289, "step": 6817 }, { "epoch": 0.6281844566268946, "grad_norm": 0.893925699523802, "learning_rate": 1.6632691153954855e-06, "loss": 0.1281, "step": 6818 }, { "epoch": 0.6282765928041646, "grad_norm": 0.9610798463300602, "learning_rate": 1.6625513053180791e-06, "loss": 0.1255, "step": 6819 }, { "epoch": 0.6283687289814346, "grad_norm": 0.9394658550305951, "learning_rate": 1.6618335729978736e-06, "loss": 0.1263, "step": 6820 }, { "epoch": 0.6284608651587046, "grad_norm": 0.941465892891746, "learning_rate": 1.661115918501511e-06, "loss": 0.1254, "step": 6821 }, { "epoch": 0.6285530013359746, "grad_norm": 0.9393058701052077, "learning_rate": 1.6603983418956254e-06, "loss": 0.1328, "step": 6822 }, { "epoch": 0.6286451375132446, "grad_norm": 0.9625490206776947, "learning_rate": 1.6596808432468445e-06, "loss": 0.1394, "step": 6823 }, { "epoch": 0.6287372736905146, "grad_norm": 0.934085002587979, "learning_rate": 1.6589634226217883e-06, "loss": 0.1328, "step": 6824 }, { "epoch": 0.6288294098677846, "grad_norm": 0.9148072326121705, "learning_rate": 1.6582460800870675e-06, "loss": 0.1284, "step": 6825 }, { "epoch": 0.6289215460450546, "grad_norm": 0.9431341023115458, "learning_rate": 1.6575288157092898e-06, "loss": 0.1236, "step": 6826 }, { "epoch": 0.6290136822223246, "grad_norm": 1.0046117659117793, "learning_rate": 1.6568116295550515e-06, "loss": 0.1313, "step": 6827 }, { "epoch": 0.6291058183995946, "grad_norm": 0.8757042152992928, "learning_rate": 1.6560945216909451e-06, "loss": 0.1144, "step": 6828 }, { "epoch": 0.6291979545768646, "grad_norm": 0.8499443288587102, "learning_rate": 1.6553774921835528e-06, "loss": 0.1107, "step": 6829 }, { "epoch": 0.6292900907541346, "grad_norm": 0.8959817137425333, "learning_rate": 1.6546605410994507e-06, "loss": 0.1255, "step": 6830 }, { "epoch": 0.6293822269314047, "grad_norm": 0.8993665748741935, "learning_rate": 1.6539436685052087e-06, "loss": 0.1263, "step": 6831 }, { "epoch": 0.6294743631086747, "grad_norm": 0.9006040587776355, "learning_rate": 1.6532268744673887e-06, "loss": 0.1154, "step": 6832 }, { "epoch": 0.6295664992859447, "grad_norm": 0.8915285615887515, "learning_rate": 1.6525101590525435e-06, "loss": 0.1241, "step": 6833 }, { "epoch": 0.6296586354632147, "grad_norm": 0.8626544961619003, "learning_rate": 1.651793522327222e-06, "loss": 0.1275, "step": 6834 }, { "epoch": 0.6297507716404847, "grad_norm": 0.9064252376215327, "learning_rate": 1.6510769643579625e-06, "loss": 0.133, "step": 6835 }, { "epoch": 0.6298429078177546, "grad_norm": 0.893570508838155, "learning_rate": 1.6503604852112992e-06, "loss": 0.1174, "step": 6836 }, { "epoch": 0.6299350439950246, "grad_norm": 0.9209261279569575, "learning_rate": 1.649644084953756e-06, "loss": 0.1335, "step": 6837 }, { "epoch": 0.6300271801722946, "grad_norm": 0.9612542183985056, "learning_rate": 1.6489277636518503e-06, "loss": 0.1364, "step": 6838 }, { "epoch": 0.6301193163495646, "grad_norm": 0.9030891613008248, "learning_rate": 1.6482115213720939e-06, "loss": 0.128, "step": 6839 }, { "epoch": 0.6302114525268346, "grad_norm": 0.892199661038204, "learning_rate": 1.64749535818099e-06, "loss": 0.1234, "step": 6840 }, { "epoch": 0.6303035887041046, "grad_norm": 0.892341611186558, "learning_rate": 1.6467792741450328e-06, "loss": 0.1262, "step": 6841 }, { "epoch": 0.6303957248813746, "grad_norm": 0.9303003796224806, "learning_rate": 1.6460632693307122e-06, "loss": 0.1332, "step": 6842 }, { "epoch": 0.6304878610586446, "grad_norm": 0.971311474946169, "learning_rate": 1.6453473438045088e-06, "loss": 0.1282, "step": 6843 }, { "epoch": 0.6305799972359147, "grad_norm": 0.9128566574816395, "learning_rate": 1.644631497632897e-06, "loss": 0.1268, "step": 6844 }, { "epoch": 0.6306721334131847, "grad_norm": 0.9329959880146634, "learning_rate": 1.6439157308823425e-06, "loss": 0.1281, "step": 6845 }, { "epoch": 0.6307642695904547, "grad_norm": 0.9643591086782031, "learning_rate": 1.6432000436193042e-06, "loss": 0.14, "step": 6846 }, { "epoch": 0.6308564057677247, "grad_norm": 0.8949048912604123, "learning_rate": 1.642484435910234e-06, "loss": 0.1275, "step": 6847 }, { "epoch": 0.6309485419449947, "grad_norm": 0.9294314454625718, "learning_rate": 1.6417689078215771e-06, "loss": 0.1317, "step": 6848 }, { "epoch": 0.6310406781222647, "grad_norm": 0.9092264943448212, "learning_rate": 1.6410534594197687e-06, "loss": 0.1243, "step": 6849 }, { "epoch": 0.6311328142995347, "grad_norm": 0.8915342261422562, "learning_rate": 1.640338090771239e-06, "loss": 0.126, "step": 6850 }, { "epoch": 0.6312249504768047, "grad_norm": 0.9631374154156046, "learning_rate": 1.6396228019424099e-06, "loss": 0.1193, "step": 6851 }, { "epoch": 0.6313170866540747, "grad_norm": 1.028510708487749, "learning_rate": 1.6389075929996961e-06, "loss": 0.1425, "step": 6852 }, { "epoch": 0.6314092228313447, "grad_norm": 0.9457079527488705, "learning_rate": 1.6381924640095065e-06, "loss": 0.1232, "step": 6853 }, { "epoch": 0.6315013590086147, "grad_norm": 0.8672064169139518, "learning_rate": 1.6374774150382377e-06, "loss": 0.1191, "step": 6854 }, { "epoch": 0.6315934951858847, "grad_norm": 0.9792257903977444, "learning_rate": 1.6367624461522841e-06, "loss": 0.1303, "step": 6855 }, { "epoch": 0.6316856313631547, "grad_norm": 0.9661371788971164, "learning_rate": 1.6360475574180306e-06, "loss": 0.136, "step": 6856 }, { "epoch": 0.6317777675404247, "grad_norm": 0.9543594834857416, "learning_rate": 1.635332748901855e-06, "loss": 0.1294, "step": 6857 }, { "epoch": 0.6318699037176948, "grad_norm": 0.8929324029831897, "learning_rate": 1.6346180206701256e-06, "loss": 0.1237, "step": 6858 }, { "epoch": 0.6319620398949648, "grad_norm": 0.9010697000995774, "learning_rate": 1.6339033727892067e-06, "loss": 0.1271, "step": 6859 }, { "epoch": 0.6320541760722348, "grad_norm": 0.8981085366716448, "learning_rate": 1.6331888053254521e-06, "loss": 0.1287, "step": 6860 }, { "epoch": 0.6321463122495048, "grad_norm": 0.9238622764409322, "learning_rate": 1.6324743183452113e-06, "loss": 0.1316, "step": 6861 }, { "epoch": 0.6322384484267748, "grad_norm": 0.9109979518487612, "learning_rate": 1.631759911914823e-06, "loss": 0.1232, "step": 6862 }, { "epoch": 0.6323305846040448, "grad_norm": 0.9189914997104379, "learning_rate": 1.63104558610062e-06, "loss": 0.1279, "step": 6863 }, { "epoch": 0.6324227207813148, "grad_norm": 0.9492831449731975, "learning_rate": 1.630331340968928e-06, "loss": 0.139, "step": 6864 }, { "epoch": 0.6325148569585848, "grad_norm": 0.9602943007686003, "learning_rate": 1.6296171765860651e-06, "loss": 0.1392, "step": 6865 }, { "epoch": 0.6326069931358548, "grad_norm": 0.96860825635109, "learning_rate": 1.6289030930183403e-06, "loss": 0.1261, "step": 6866 }, { "epoch": 0.6326991293131248, "grad_norm": 0.9355225957084158, "learning_rate": 1.6281890903320574e-06, "loss": 0.1349, "step": 6867 }, { "epoch": 0.6327912654903948, "grad_norm": 0.8994626792406648, "learning_rate": 1.627475168593511e-06, "loss": 0.1232, "step": 6868 }, { "epoch": 0.6328834016676648, "grad_norm": 0.8585321066061194, "learning_rate": 1.6267613278689898e-06, "loss": 0.1172, "step": 6869 }, { "epoch": 0.6329755378449348, "grad_norm": 1.0178195384314648, "learning_rate": 1.626047568224773e-06, "loss": 0.1346, "step": 6870 }, { "epoch": 0.6330676740222049, "grad_norm": 0.9180606266375119, "learning_rate": 1.625333889727133e-06, "loss": 0.1305, "step": 6871 }, { "epoch": 0.6331598101994749, "grad_norm": 0.86008781184332, "learning_rate": 1.624620292442336e-06, "loss": 0.1128, "step": 6872 }, { "epoch": 0.6332519463767449, "grad_norm": 0.8953508403267575, "learning_rate": 1.6239067764366396e-06, "loss": 0.1295, "step": 6873 }, { "epoch": 0.6333440825540149, "grad_norm": 0.9701241179780662, "learning_rate": 1.6231933417762918e-06, "loss": 0.132, "step": 6874 }, { "epoch": 0.6334362187312849, "grad_norm": 0.8964266930393143, "learning_rate": 1.6224799885275378e-06, "loss": 0.131, "step": 6875 }, { "epoch": 0.6335283549085549, "grad_norm": 0.8920990399552023, "learning_rate": 1.6217667167566103e-06, "loss": 0.1214, "step": 6876 }, { "epoch": 0.6336204910858249, "grad_norm": 0.9034818516436899, "learning_rate": 1.6210535265297389e-06, "loss": 0.1195, "step": 6877 }, { "epoch": 0.6337126272630949, "grad_norm": 0.9099483703963683, "learning_rate": 1.6203404179131415e-06, "loss": 0.1269, "step": 6878 }, { "epoch": 0.6338047634403648, "grad_norm": 0.919892064573786, "learning_rate": 1.6196273909730303e-06, "loss": 0.1291, "step": 6879 }, { "epoch": 0.6338968996176348, "grad_norm": 0.9388586022519625, "learning_rate": 1.6189144457756118e-06, "loss": 0.1282, "step": 6880 }, { "epoch": 0.6339890357949048, "grad_norm": 0.9493861638877241, "learning_rate": 1.6182015823870805e-06, "loss": 0.129, "step": 6881 }, { "epoch": 0.6340811719721748, "grad_norm": 0.9149259117449804, "learning_rate": 1.617488800873629e-06, "loss": 0.1364, "step": 6882 }, { "epoch": 0.6341733081494448, "grad_norm": 0.9344518751317833, "learning_rate": 1.616776101301436e-06, "loss": 0.1343, "step": 6883 }, { "epoch": 0.6342654443267148, "grad_norm": 0.8832340238190585, "learning_rate": 1.6160634837366771e-06, "loss": 0.1162, "step": 6884 }, { "epoch": 0.6343575805039849, "grad_norm": 0.8869408664575485, "learning_rate": 1.615350948245519e-06, "loss": 0.1304, "step": 6885 }, { "epoch": 0.6344497166812549, "grad_norm": 0.9268014816586394, "learning_rate": 1.6146384948941213e-06, "loss": 0.1257, "step": 6886 }, { "epoch": 0.6345418528585249, "grad_norm": 0.9930422635091828, "learning_rate": 1.6139261237486337e-06, "loss": 0.1316, "step": 6887 }, { "epoch": 0.6346339890357949, "grad_norm": 0.9828861548408764, "learning_rate": 1.6132138348752013e-06, "loss": 0.1396, "step": 6888 }, { "epoch": 0.6347261252130649, "grad_norm": 0.9392508922508789, "learning_rate": 1.6125016283399592e-06, "loss": 0.1238, "step": 6889 }, { "epoch": 0.6348182613903349, "grad_norm": 0.9367414950444616, "learning_rate": 1.6117895042090374e-06, "loss": 0.1369, "step": 6890 }, { "epoch": 0.6349103975676049, "grad_norm": 0.8913932424175949, "learning_rate": 1.6110774625485554e-06, "loss": 0.1277, "step": 6891 }, { "epoch": 0.6350025337448749, "grad_norm": 0.9487507579836935, "learning_rate": 1.6103655034246256e-06, "loss": 0.1293, "step": 6892 }, { "epoch": 0.6350946699221449, "grad_norm": 0.9078867583893813, "learning_rate": 1.6096536269033557e-06, "loss": 0.1253, "step": 6893 }, { "epoch": 0.6351868060994149, "grad_norm": 0.9657934294617364, "learning_rate": 1.6089418330508427e-06, "loss": 0.1303, "step": 6894 }, { "epoch": 0.6352789422766849, "grad_norm": 0.9369656368147398, "learning_rate": 1.6082301219331754e-06, "loss": 0.1361, "step": 6895 }, { "epoch": 0.6353710784539549, "grad_norm": 0.9221568322665612, "learning_rate": 1.6075184936164377e-06, "loss": 0.1186, "step": 6896 }, { "epoch": 0.6354632146312249, "grad_norm": 0.9646737106447447, "learning_rate": 1.606806948166703e-06, "loss": 0.1283, "step": 6897 }, { "epoch": 0.6355553508084949, "grad_norm": 0.985452183450984, "learning_rate": 1.606095485650041e-06, "loss": 0.1385, "step": 6898 }, { "epoch": 0.635647486985765, "grad_norm": 0.9455020939527603, "learning_rate": 1.6053841061325086e-06, "loss": 0.1298, "step": 6899 }, { "epoch": 0.635739623163035, "grad_norm": 0.9890402174988893, "learning_rate": 1.6046728096801575e-06, "loss": 0.1357, "step": 6900 }, { "epoch": 0.635831759340305, "grad_norm": 0.9609982352039083, "learning_rate": 1.6039615963590332e-06, "loss": 0.126, "step": 6901 }, { "epoch": 0.635923895517575, "grad_norm": 0.956556821723389, "learning_rate": 1.6032504662351713e-06, "loss": 0.1325, "step": 6902 }, { "epoch": 0.636016031694845, "grad_norm": 0.9432174119765748, "learning_rate": 1.6025394193745993e-06, "loss": 0.1276, "step": 6903 }, { "epoch": 0.636108167872115, "grad_norm": 0.9059817128700657, "learning_rate": 1.6018284558433395e-06, "loss": 0.1302, "step": 6904 }, { "epoch": 0.636200304049385, "grad_norm": 0.9010824136241979, "learning_rate": 1.6011175757074035e-06, "loss": 0.1273, "step": 6905 }, { "epoch": 0.636292440226655, "grad_norm": 0.9038667529472869, "learning_rate": 1.6004067790327983e-06, "loss": 0.1332, "step": 6906 }, { "epoch": 0.636384576403925, "grad_norm": 0.9335549013514323, "learning_rate": 1.5996960658855201e-06, "loss": 0.1453, "step": 6907 }, { "epoch": 0.636476712581195, "grad_norm": 0.9466004752685642, "learning_rate": 1.5989854363315585e-06, "loss": 0.136, "step": 6908 }, { "epoch": 0.636568848758465, "grad_norm": 0.9078843220602426, "learning_rate": 1.5982748904368966e-06, "loss": 0.132, "step": 6909 }, { "epoch": 0.636660984935735, "grad_norm": 0.9085035676803845, "learning_rate": 1.5975644282675077e-06, "loss": 0.1229, "step": 6910 }, { "epoch": 0.636753121113005, "grad_norm": 0.9707588244468391, "learning_rate": 1.5968540498893598e-06, "loss": 0.1399, "step": 6911 }, { "epoch": 0.6368452572902751, "grad_norm": 0.903772261250293, "learning_rate": 1.59614375536841e-06, "loss": 0.1325, "step": 6912 }, { "epoch": 0.6369373934675451, "grad_norm": 0.877488888929862, "learning_rate": 1.5954335447706093e-06, "loss": 0.1202, "step": 6913 }, { "epoch": 0.6370295296448151, "grad_norm": 0.9048261590261063, "learning_rate": 1.5947234181619017e-06, "loss": 0.126, "step": 6914 }, { "epoch": 0.6371216658220851, "grad_norm": 0.8992318434307321, "learning_rate": 1.5940133756082226e-06, "loss": 0.1235, "step": 6915 }, { "epoch": 0.6372138019993551, "grad_norm": 0.8816222891695241, "learning_rate": 1.5933034171754985e-06, "loss": 0.1233, "step": 6916 }, { "epoch": 0.6373059381766251, "grad_norm": 0.9163390848974939, "learning_rate": 1.5925935429296499e-06, "loss": 0.1227, "step": 6917 }, { "epoch": 0.6373980743538951, "grad_norm": 0.9121612444035625, "learning_rate": 1.5918837529365884e-06, "loss": 0.1197, "step": 6918 }, { "epoch": 0.6374902105311651, "grad_norm": 0.9739517316471556, "learning_rate": 1.5911740472622184e-06, "loss": 0.1325, "step": 6919 }, { "epoch": 0.637582346708435, "grad_norm": 0.9782508014301649, "learning_rate": 1.590464425972436e-06, "loss": 0.1369, "step": 6920 }, { "epoch": 0.637674482885705, "grad_norm": 0.9666734281729935, "learning_rate": 1.5897548891331288e-06, "loss": 0.1424, "step": 6921 }, { "epoch": 0.637766619062975, "grad_norm": 0.928424842338119, "learning_rate": 1.5890454368101788e-06, "loss": 0.1319, "step": 6922 }, { "epoch": 0.637858755240245, "grad_norm": 0.9400505264698045, "learning_rate": 1.5883360690694582e-06, "loss": 0.1248, "step": 6923 }, { "epoch": 0.637950891417515, "grad_norm": 0.8670288793128172, "learning_rate": 1.587626785976831e-06, "loss": 0.1156, "step": 6924 }, { "epoch": 0.638043027594785, "grad_norm": 0.8702635503997724, "learning_rate": 1.5869175875981551e-06, "loss": 0.1228, "step": 6925 }, { "epoch": 0.6381351637720551, "grad_norm": 0.9992073872034697, "learning_rate": 1.5862084739992794e-06, "loss": 0.1425, "step": 6926 }, { "epoch": 0.6382272999493251, "grad_norm": 0.9676724696571829, "learning_rate": 1.585499445246046e-06, "loss": 0.1381, "step": 6927 }, { "epoch": 0.6383194361265951, "grad_norm": 0.9479667525779824, "learning_rate": 1.584790501404287e-06, "loss": 0.1414, "step": 6928 }, { "epoch": 0.6384115723038651, "grad_norm": 0.9202645800521777, "learning_rate": 1.5840816425398282e-06, "loss": 0.1329, "step": 6929 }, { "epoch": 0.6385037084811351, "grad_norm": 0.9281144590684807, "learning_rate": 1.5833728687184868e-06, "loss": 0.1288, "step": 6930 }, { "epoch": 0.6385958446584051, "grad_norm": 0.9212398432492384, "learning_rate": 1.5826641800060755e-06, "loss": 0.1235, "step": 6931 }, { "epoch": 0.6386879808356751, "grad_norm": 0.907492438307411, "learning_rate": 1.581955576468392e-06, "loss": 0.1297, "step": 6932 }, { "epoch": 0.6387801170129451, "grad_norm": 0.9343330686647542, "learning_rate": 1.581247058171232e-06, "loss": 0.1308, "step": 6933 }, { "epoch": 0.6388722531902151, "grad_norm": 0.8988949949443719, "learning_rate": 1.5805386251803818e-06, "loss": 0.1183, "step": 6934 }, { "epoch": 0.6389643893674851, "grad_norm": 0.9210865319052437, "learning_rate": 1.5798302775616198e-06, "loss": 0.1257, "step": 6935 }, { "epoch": 0.6390565255447551, "grad_norm": 0.8669787078566003, "learning_rate": 1.5791220153807146e-06, "loss": 0.1099, "step": 6936 }, { "epoch": 0.6391486617220251, "grad_norm": 0.9472952498117674, "learning_rate": 1.5784138387034302e-06, "loss": 0.1283, "step": 6937 }, { "epoch": 0.6392407978992951, "grad_norm": 0.9301216379397955, "learning_rate": 1.5777057475955194e-06, "loss": 0.1332, "step": 6938 }, { "epoch": 0.6393329340765652, "grad_norm": 0.9069566491323627, "learning_rate": 1.5769977421227295e-06, "loss": 0.1201, "step": 6939 }, { "epoch": 0.6394250702538352, "grad_norm": 0.9762702867137394, "learning_rate": 1.5762898223507989e-06, "loss": 0.1368, "step": 6940 }, { "epoch": 0.6395172064311052, "grad_norm": 0.9753252251298636, "learning_rate": 1.575581988345457e-06, "loss": 0.1377, "step": 6941 }, { "epoch": 0.6396093426083752, "grad_norm": 0.9245488069439479, "learning_rate": 1.5748742401724276e-06, "loss": 0.1277, "step": 6942 }, { "epoch": 0.6397014787856452, "grad_norm": 0.9222913829706925, "learning_rate": 1.5741665778974239e-06, "loss": 0.1288, "step": 6943 }, { "epoch": 0.6397936149629152, "grad_norm": 0.9101223878977888, "learning_rate": 1.5734590015861539e-06, "loss": 0.1309, "step": 6944 }, { "epoch": 0.6398857511401852, "grad_norm": 0.960200928451764, "learning_rate": 1.5727515113043152e-06, "loss": 0.1398, "step": 6945 }, { "epoch": 0.6399778873174552, "grad_norm": 0.8788028856828145, "learning_rate": 1.5720441071175976e-06, "loss": 0.1145, "step": 6946 }, { "epoch": 0.6400700234947252, "grad_norm": 0.8708461112250071, "learning_rate": 1.5713367890916852e-06, "loss": 0.1225, "step": 6947 }, { "epoch": 0.6401621596719952, "grad_norm": 0.8772041016771617, "learning_rate": 1.5706295572922524e-06, "loss": 0.1173, "step": 6948 }, { "epoch": 0.6402542958492652, "grad_norm": 0.897848427339447, "learning_rate": 1.5699224117849644e-06, "loss": 0.1382, "step": 6949 }, { "epoch": 0.6403464320265352, "grad_norm": 0.8624040978043598, "learning_rate": 1.569215352635481e-06, "loss": 0.1172, "step": 6950 }, { "epoch": 0.6404385682038052, "grad_norm": 0.9391168244715019, "learning_rate": 1.5685083799094513e-06, "loss": 0.1246, "step": 6951 }, { "epoch": 0.6405307043810752, "grad_norm": 0.9483088170825726, "learning_rate": 1.56780149367252e-06, "loss": 0.1306, "step": 6952 }, { "epoch": 0.6406228405583453, "grad_norm": 0.9378810721645574, "learning_rate": 1.5670946939903201e-06, "loss": 0.1317, "step": 6953 }, { "epoch": 0.6407149767356153, "grad_norm": 0.8918097080165212, "learning_rate": 1.5663879809284777e-06, "loss": 0.1223, "step": 6954 }, { "epoch": 0.6408071129128853, "grad_norm": 0.9183976546797075, "learning_rate": 1.565681354552612e-06, "loss": 0.1322, "step": 6955 }, { "epoch": 0.6408992490901553, "grad_norm": 0.8927835519177011, "learning_rate": 1.5649748149283339e-06, "loss": 0.1156, "step": 6956 }, { "epoch": 0.6409913852674253, "grad_norm": 0.9527061176749169, "learning_rate": 1.5642683621212435e-06, "loss": 0.1264, "step": 6957 }, { "epoch": 0.6410835214446953, "grad_norm": 0.9276007981920011, "learning_rate": 1.5635619961969372e-06, "loss": 0.1224, "step": 6958 }, { "epoch": 0.6411756576219653, "grad_norm": 0.9341219872288229, "learning_rate": 1.5628557172209997e-06, "loss": 0.1242, "step": 6959 }, { "epoch": 0.6412677937992353, "grad_norm": 0.9087165767479295, "learning_rate": 1.5621495252590108e-06, "loss": 0.1232, "step": 6960 }, { "epoch": 0.6413599299765053, "grad_norm": 0.9845108733921047, "learning_rate": 1.561443420376539e-06, "loss": 0.1318, "step": 6961 }, { "epoch": 0.6414520661537753, "grad_norm": 0.8579120613839665, "learning_rate": 1.560737402639146e-06, "loss": 0.1142, "step": 6962 }, { "epoch": 0.6415442023310453, "grad_norm": 0.9145911360522224, "learning_rate": 1.5600314721123866e-06, "loss": 0.1305, "step": 6963 }, { "epoch": 0.6416363385083153, "grad_norm": 1.0023486798357177, "learning_rate": 1.5593256288618067e-06, "loss": 0.1388, "step": 6964 }, { "epoch": 0.6417284746855852, "grad_norm": 0.897665746625067, "learning_rate": 1.5586198729529422e-06, "loss": 0.1224, "step": 6965 }, { "epoch": 0.6418206108628552, "grad_norm": 0.9742657750149276, "learning_rate": 1.5579142044513248e-06, "loss": 0.1296, "step": 6966 }, { "epoch": 0.6419127470401254, "grad_norm": 0.9864819104891229, "learning_rate": 1.5572086234224743e-06, "loss": 0.127, "step": 6967 }, { "epoch": 0.6420048832173954, "grad_norm": 0.9440948450795339, "learning_rate": 1.556503129931905e-06, "loss": 0.1329, "step": 6968 }, { "epoch": 0.6420970193946653, "grad_norm": 0.8435423134543755, "learning_rate": 1.5557977240451223e-06, "loss": 0.1153, "step": 6969 }, { "epoch": 0.6421891555719353, "grad_norm": 0.9570481012038368, "learning_rate": 1.5550924058276213e-06, "loss": 0.127, "step": 6970 }, { "epoch": 0.6422812917492053, "grad_norm": 0.9603904086633094, "learning_rate": 1.5543871753448924e-06, "loss": 0.1355, "step": 6971 }, { "epoch": 0.6423734279264753, "grad_norm": 0.8370803839751185, "learning_rate": 1.5536820326624159e-06, "loss": 0.1134, "step": 6972 }, { "epoch": 0.6424655641037453, "grad_norm": 0.878586123110724, "learning_rate": 1.5529769778456654e-06, "loss": 0.1255, "step": 6973 }, { "epoch": 0.6425577002810153, "grad_norm": 0.934161796637777, "learning_rate": 1.5522720109601039e-06, "loss": 0.1338, "step": 6974 }, { "epoch": 0.6426498364582853, "grad_norm": 0.9010882629753968, "learning_rate": 1.5515671320711877e-06, "loss": 0.1239, "step": 6975 }, { "epoch": 0.6427419726355553, "grad_norm": 0.8943824566421207, "learning_rate": 1.5508623412443657e-06, "loss": 0.1302, "step": 6976 }, { "epoch": 0.6428341088128253, "grad_norm": 0.9430422757119458, "learning_rate": 1.5501576385450785e-06, "loss": 0.138, "step": 6977 }, { "epoch": 0.6429262449900953, "grad_norm": 0.8608802032031488, "learning_rate": 1.5494530240387552e-06, "loss": 0.1157, "step": 6978 }, { "epoch": 0.6430183811673653, "grad_norm": 0.8931155875083723, "learning_rate": 1.5487484977908219e-06, "loss": 0.1209, "step": 6979 }, { "epoch": 0.6431105173446354, "grad_norm": 0.934058212786175, "learning_rate": 1.5480440598666918e-06, "loss": 0.1286, "step": 6980 }, { "epoch": 0.6432026535219054, "grad_norm": 0.9499195487628329, "learning_rate": 1.5473397103317748e-06, "loss": 0.1289, "step": 6981 }, { "epoch": 0.6432947896991754, "grad_norm": 0.9172407650858186, "learning_rate": 1.5466354492514675e-06, "loss": 0.1234, "step": 6982 }, { "epoch": 0.6433869258764454, "grad_norm": 0.9040807218912611, "learning_rate": 1.5459312766911607e-06, "loss": 0.1147, "step": 6983 }, { "epoch": 0.6434790620537154, "grad_norm": 0.9821552436596821, "learning_rate": 1.5452271927162381e-06, "loss": 0.1332, "step": 6984 }, { "epoch": 0.6435711982309854, "grad_norm": 0.9677205513939832, "learning_rate": 1.5445231973920744e-06, "loss": 0.1383, "step": 6985 }, { "epoch": 0.6436633344082554, "grad_norm": 0.9278700960504868, "learning_rate": 1.543819290784033e-06, "loss": 0.1203, "step": 6986 }, { "epoch": 0.6437554705855254, "grad_norm": 1.0164771249246947, "learning_rate": 1.5431154729574743e-06, "loss": 0.1382, "step": 6987 }, { "epoch": 0.6438476067627954, "grad_norm": 0.9576436472546979, "learning_rate": 1.5424117439777458e-06, "loss": 0.1241, "step": 6988 }, { "epoch": 0.6439397429400654, "grad_norm": 0.8990740819754796, "learning_rate": 1.5417081039101916e-06, "loss": 0.1356, "step": 6989 }, { "epoch": 0.6440318791173354, "grad_norm": 0.9008021527591362, "learning_rate": 1.5410045528201423e-06, "loss": 0.1152, "step": 6990 }, { "epoch": 0.6441240152946054, "grad_norm": 0.8895201725561562, "learning_rate": 1.5403010907729233e-06, "loss": 0.1225, "step": 6991 }, { "epoch": 0.6442161514718754, "grad_norm": 0.9060414956049287, "learning_rate": 1.5395977178338511e-06, "loss": 0.1231, "step": 6992 }, { "epoch": 0.6443082876491454, "grad_norm": 0.9858058843844217, "learning_rate": 1.5388944340682352e-06, "loss": 0.1293, "step": 6993 }, { "epoch": 0.6444004238264155, "grad_norm": 0.9830503873904406, "learning_rate": 1.5381912395413733e-06, "loss": 0.1328, "step": 6994 }, { "epoch": 0.6444925600036855, "grad_norm": 0.8953062308564167, "learning_rate": 1.5374881343185592e-06, "loss": 0.1175, "step": 6995 }, { "epoch": 0.6445846961809555, "grad_norm": 0.932027313269444, "learning_rate": 1.5367851184650745e-06, "loss": 0.1366, "step": 6996 }, { "epoch": 0.6446768323582255, "grad_norm": 0.9017210709791099, "learning_rate": 1.536082192046196e-06, "loss": 0.1241, "step": 6997 }, { "epoch": 0.6447689685354955, "grad_norm": 0.8736522849637017, "learning_rate": 1.53537935512719e-06, "loss": 0.124, "step": 6998 }, { "epoch": 0.6448611047127655, "grad_norm": 0.9243176302835587, "learning_rate": 1.5346766077733138e-06, "loss": 0.1266, "step": 6999 }, { "epoch": 0.6449532408900355, "grad_norm": 0.9500637805596197, "learning_rate": 1.5339739500498189e-06, "loss": 0.137, "step": 7000 }, { "epoch": 0.6449532408900355, "eval_loss": 0.1284143477678299, "eval_runtime": 299.5444, "eval_samples_per_second": 23.426, "eval_steps_per_second": 2.931, "step": 7000 }, { "epoch": 0.6450453770673055, "grad_norm": 0.9176312502247079, "learning_rate": 1.5332713820219461e-06, "loss": 0.124, "step": 7001 }, { "epoch": 0.6451375132445755, "grad_norm": 0.9517224833402965, "learning_rate": 1.5325689037549307e-06, "loss": 0.1278, "step": 7002 }, { "epoch": 0.6452296494218455, "grad_norm": 0.9012201702409783, "learning_rate": 1.531866515313996e-06, "loss": 0.1294, "step": 7003 }, { "epoch": 0.6453217855991155, "grad_norm": 0.870380747830478, "learning_rate": 1.5311642167643592e-06, "loss": 0.1127, "step": 7004 }, { "epoch": 0.6454139217763855, "grad_norm": 0.9019711406867783, "learning_rate": 1.530462008171229e-06, "loss": 0.1284, "step": 7005 }, { "epoch": 0.6455060579536555, "grad_norm": 0.9153502803076784, "learning_rate": 1.5297598895998076e-06, "loss": 0.128, "step": 7006 }, { "epoch": 0.6455981941309256, "grad_norm": 0.9397863736754628, "learning_rate": 1.529057861115283e-06, "loss": 0.1225, "step": 7007 }, { "epoch": 0.6456903303081956, "grad_norm": 0.9821388898224823, "learning_rate": 1.5283559227828404e-06, "loss": 0.1292, "step": 7008 }, { "epoch": 0.6457824664854656, "grad_norm": 0.9461970827854304, "learning_rate": 1.5276540746676558e-06, "loss": 0.1184, "step": 7009 }, { "epoch": 0.6458746026627356, "grad_norm": 0.9689676881269678, "learning_rate": 1.5269523168348954e-06, "loss": 0.1319, "step": 7010 }, { "epoch": 0.6459667388400055, "grad_norm": 0.9410910844995799, "learning_rate": 1.5262506493497159e-06, "loss": 0.1259, "step": 7011 }, { "epoch": 0.6460588750172755, "grad_norm": 0.983799739077306, "learning_rate": 1.525549072277269e-06, "loss": 0.1288, "step": 7012 }, { "epoch": 0.6461510111945455, "grad_norm": 0.8946262414629372, "learning_rate": 1.524847585682695e-06, "loss": 0.1265, "step": 7013 }, { "epoch": 0.6462431473718155, "grad_norm": 0.925928189408186, "learning_rate": 1.5241461896311288e-06, "loss": 0.1333, "step": 7014 }, { "epoch": 0.6463352835490855, "grad_norm": 0.9306429155305737, "learning_rate": 1.5234448841876935e-06, "loss": 0.1275, "step": 7015 }, { "epoch": 0.6464274197263555, "grad_norm": 0.9580787768584496, "learning_rate": 1.5227436694175052e-06, "loss": 0.1344, "step": 7016 }, { "epoch": 0.6465195559036255, "grad_norm": 0.917554371682307, "learning_rate": 1.5220425453856728e-06, "loss": 0.1173, "step": 7017 }, { "epoch": 0.6466116920808955, "grad_norm": 0.9025472883792388, "learning_rate": 1.5213415121572959e-06, "loss": 0.1194, "step": 7018 }, { "epoch": 0.6467038282581655, "grad_norm": 0.9103463103642951, "learning_rate": 1.5206405697974635e-06, "loss": 0.1353, "step": 7019 }, { "epoch": 0.6467959644354355, "grad_norm": 0.9179849344012468, "learning_rate": 1.5199397183712606e-06, "loss": 0.1301, "step": 7020 }, { "epoch": 0.6468881006127056, "grad_norm": 0.9046015817619245, "learning_rate": 1.5192389579437596e-06, "loss": 0.1208, "step": 7021 }, { "epoch": 0.6469802367899756, "grad_norm": 0.9338287299267279, "learning_rate": 1.5185382885800282e-06, "loss": 0.13, "step": 7022 }, { "epoch": 0.6470723729672456, "grad_norm": 0.8913972315365095, "learning_rate": 1.5178377103451213e-06, "loss": 0.1284, "step": 7023 }, { "epoch": 0.6471645091445156, "grad_norm": 0.9767424424928457, "learning_rate": 1.5171372233040887e-06, "loss": 0.1427, "step": 7024 }, { "epoch": 0.6472566453217856, "grad_norm": 0.9782173682999218, "learning_rate": 1.516436827521971e-06, "loss": 0.1308, "step": 7025 }, { "epoch": 0.6473487814990556, "grad_norm": 0.8844971219928716, "learning_rate": 1.5157365230637993e-06, "loss": 0.1233, "step": 7026 }, { "epoch": 0.6474409176763256, "grad_norm": 0.9548655466581969, "learning_rate": 1.5150363099945984e-06, "loss": 0.133, "step": 7027 }, { "epoch": 0.6475330538535956, "grad_norm": 0.9232492722707007, "learning_rate": 1.5143361883793814e-06, "loss": 0.1379, "step": 7028 }, { "epoch": 0.6476251900308656, "grad_norm": 0.9127807484244733, "learning_rate": 1.513636158283155e-06, "loss": 0.1252, "step": 7029 }, { "epoch": 0.6477173262081356, "grad_norm": 0.9288643059106027, "learning_rate": 1.512936219770918e-06, "loss": 0.1258, "step": 7030 }, { "epoch": 0.6478094623854056, "grad_norm": 0.9159865631279025, "learning_rate": 1.5122363729076595e-06, "loss": 0.1152, "step": 7031 }, { "epoch": 0.6479015985626756, "grad_norm": 0.9582038384688031, "learning_rate": 1.5115366177583596e-06, "loss": 0.1245, "step": 7032 }, { "epoch": 0.6479937347399456, "grad_norm": 0.9135516336834137, "learning_rate": 1.510836954387991e-06, "loss": 0.1258, "step": 7033 }, { "epoch": 0.6480858709172156, "grad_norm": 0.9392462355403146, "learning_rate": 1.5101373828615172e-06, "loss": 0.1286, "step": 7034 }, { "epoch": 0.6481780070944857, "grad_norm": 0.8760360479867124, "learning_rate": 1.5094379032438956e-06, "loss": 0.1309, "step": 7035 }, { "epoch": 0.6482701432717557, "grad_norm": 0.8521056439065474, "learning_rate": 1.50873851560007e-06, "loss": 0.1213, "step": 7036 }, { "epoch": 0.6483622794490257, "grad_norm": 0.9119974004512107, "learning_rate": 1.50803921999498e-06, "loss": 0.1277, "step": 7037 }, { "epoch": 0.6484544156262957, "grad_norm": 0.864820230481895, "learning_rate": 1.5073400164935554e-06, "loss": 0.1098, "step": 7038 }, { "epoch": 0.6485465518035657, "grad_norm": 0.9371074072350675, "learning_rate": 1.5066409051607175e-06, "loss": 0.1283, "step": 7039 }, { "epoch": 0.6486386879808357, "grad_norm": 0.9440464798003158, "learning_rate": 1.5059418860613779e-06, "loss": 0.1239, "step": 7040 }, { "epoch": 0.6487308241581057, "grad_norm": 1.0084876710261992, "learning_rate": 1.5052429592604411e-06, "loss": 0.1346, "step": 7041 }, { "epoch": 0.6488229603353757, "grad_norm": 0.9344953956197914, "learning_rate": 1.5045441248228024e-06, "loss": 0.1346, "step": 7042 }, { "epoch": 0.6489150965126457, "grad_norm": 0.9987049159397682, "learning_rate": 1.5038453828133498e-06, "loss": 0.1408, "step": 7043 }, { "epoch": 0.6490072326899157, "grad_norm": 0.9838768617974539, "learning_rate": 1.50314673329696e-06, "loss": 0.1397, "step": 7044 }, { "epoch": 0.6490993688671857, "grad_norm": 0.8951590164756219, "learning_rate": 1.502448176338503e-06, "loss": 0.1207, "step": 7045 }, { "epoch": 0.6491915050444557, "grad_norm": 0.8974579872474433, "learning_rate": 1.5017497120028404e-06, "loss": 0.1296, "step": 7046 }, { "epoch": 0.6492836412217257, "grad_norm": 0.8768387129540236, "learning_rate": 1.5010513403548253e-06, "loss": 0.1255, "step": 7047 }, { "epoch": 0.6493757773989958, "grad_norm": 0.8970459648430776, "learning_rate": 1.5003530614592995e-06, "loss": 0.1323, "step": 7048 }, { "epoch": 0.6494679135762658, "grad_norm": 0.9665064017272036, "learning_rate": 1.4996548753811001e-06, "loss": 0.1355, "step": 7049 }, { "epoch": 0.6495600497535358, "grad_norm": 0.8812970511013521, "learning_rate": 1.4989567821850527e-06, "loss": 0.1211, "step": 7050 }, { "epoch": 0.6496521859308058, "grad_norm": 0.9746647652708732, "learning_rate": 1.4982587819359767e-06, "loss": 0.1253, "step": 7051 }, { "epoch": 0.6497443221080758, "grad_norm": 0.932466168486249, "learning_rate": 1.4975608746986802e-06, "loss": 0.1296, "step": 7052 }, { "epoch": 0.6498364582853458, "grad_norm": 0.9063329649266557, "learning_rate": 1.4968630605379642e-06, "loss": 0.1188, "step": 7053 }, { "epoch": 0.6499285944626157, "grad_norm": 0.9583750002128922, "learning_rate": 1.496165339518621e-06, "loss": 0.1356, "step": 7054 }, { "epoch": 0.6500207306398857, "grad_norm": 0.9622381338587453, "learning_rate": 1.495467711705434e-06, "loss": 0.1427, "step": 7055 }, { "epoch": 0.6501128668171557, "grad_norm": 0.8843393452517173, "learning_rate": 1.4947701771631788e-06, "loss": 0.1214, "step": 7056 }, { "epoch": 0.6502050029944257, "grad_norm": 0.9571823770880513, "learning_rate": 1.4940727359566205e-06, "loss": 0.14, "step": 7057 }, { "epoch": 0.6502971391716957, "grad_norm": 0.8934023203022077, "learning_rate": 1.493375388150516e-06, "loss": 0.1211, "step": 7058 }, { "epoch": 0.6503892753489657, "grad_norm": 0.9452011328243307, "learning_rate": 1.4926781338096158e-06, "loss": 0.135, "step": 7059 }, { "epoch": 0.6504814115262357, "grad_norm": 0.9565891549170992, "learning_rate": 1.4919809729986598e-06, "loss": 0.1308, "step": 7060 }, { "epoch": 0.6505735477035057, "grad_norm": 0.927163730231958, "learning_rate": 1.491283905782378e-06, "loss": 0.116, "step": 7061 }, { "epoch": 0.6506656838807758, "grad_norm": 0.9764884266369946, "learning_rate": 1.4905869322254946e-06, "loss": 0.1253, "step": 7062 }, { "epoch": 0.6507578200580458, "grad_norm": 0.9093361220955308, "learning_rate": 1.4898900523927224e-06, "loss": 0.1217, "step": 7063 }, { "epoch": 0.6508499562353158, "grad_norm": 1.00659867942203, "learning_rate": 1.489193266348769e-06, "loss": 0.1323, "step": 7064 }, { "epoch": 0.6509420924125858, "grad_norm": 0.9328286355155382, "learning_rate": 1.4884965741583288e-06, "loss": 0.1242, "step": 7065 }, { "epoch": 0.6510342285898558, "grad_norm": 0.9492231265782749, "learning_rate": 1.48779997588609e-06, "loss": 0.1352, "step": 7066 }, { "epoch": 0.6511263647671258, "grad_norm": 0.9336815769927269, "learning_rate": 1.4871034715967331e-06, "loss": 0.1321, "step": 7067 }, { "epoch": 0.6512185009443958, "grad_norm": 0.8837768145065813, "learning_rate": 1.4864070613549284e-06, "loss": 0.1234, "step": 7068 }, { "epoch": 0.6513106371216658, "grad_norm": 0.8757121977193029, "learning_rate": 1.485710745225336e-06, "loss": 0.1177, "step": 7069 }, { "epoch": 0.6514027732989358, "grad_norm": 0.925605697107438, "learning_rate": 1.4850145232726104e-06, "loss": 0.1207, "step": 7070 }, { "epoch": 0.6514949094762058, "grad_norm": 0.9039591896508201, "learning_rate": 1.4843183955613955e-06, "loss": 0.1262, "step": 7071 }, { "epoch": 0.6515870456534758, "grad_norm": 0.9261306989585284, "learning_rate": 1.4836223621563272e-06, "loss": 0.1188, "step": 7072 }, { "epoch": 0.6516791818307458, "grad_norm": 0.9079997328951442, "learning_rate": 1.4829264231220319e-06, "loss": 0.1319, "step": 7073 }, { "epoch": 0.6517713180080158, "grad_norm": 0.9149117830703877, "learning_rate": 1.4822305785231273e-06, "loss": 0.1248, "step": 7074 }, { "epoch": 0.6518634541852859, "grad_norm": 0.9574311441669855, "learning_rate": 1.4815348284242234e-06, "loss": 0.1353, "step": 7075 }, { "epoch": 0.6519555903625559, "grad_norm": 0.9621595470781569, "learning_rate": 1.4808391728899206e-06, "loss": 0.1353, "step": 7076 }, { "epoch": 0.6520477265398259, "grad_norm": 0.912082741700463, "learning_rate": 1.4801436119848096e-06, "loss": 0.1279, "step": 7077 }, { "epoch": 0.6521398627170959, "grad_norm": 0.8963015605594415, "learning_rate": 1.4794481457734743e-06, "loss": 0.1232, "step": 7078 }, { "epoch": 0.6522319988943659, "grad_norm": 0.9239801719949823, "learning_rate": 1.478752774320488e-06, "loss": 0.1279, "step": 7079 }, { "epoch": 0.6523241350716359, "grad_norm": 0.8899030279037768, "learning_rate": 1.4780574976904174e-06, "loss": 0.1159, "step": 7080 }, { "epoch": 0.6524162712489059, "grad_norm": 0.8542351029355911, "learning_rate": 1.4773623159478178e-06, "loss": 0.1102, "step": 7081 }, { "epoch": 0.6525084074261759, "grad_norm": 0.844299544578612, "learning_rate": 1.4766672291572364e-06, "loss": 0.114, "step": 7082 }, { "epoch": 0.6526005436034459, "grad_norm": 0.9211202958944511, "learning_rate": 1.4759722373832135e-06, "loss": 0.1159, "step": 7083 }, { "epoch": 0.6526926797807159, "grad_norm": 0.9334540781839048, "learning_rate": 1.4752773406902788e-06, "loss": 0.1275, "step": 7084 }, { "epoch": 0.6527848159579859, "grad_norm": 0.8839225399468466, "learning_rate": 1.4745825391429537e-06, "loss": 0.1225, "step": 7085 }, { "epoch": 0.6528769521352559, "grad_norm": 0.9135405613448878, "learning_rate": 1.4738878328057493e-06, "loss": 0.1272, "step": 7086 }, { "epoch": 0.6529690883125259, "grad_norm": 0.9291388808423455, "learning_rate": 1.4731932217431704e-06, "loss": 0.1304, "step": 7087 }, { "epoch": 0.6530612244897959, "grad_norm": 0.9333671336031034, "learning_rate": 1.472498706019711e-06, "loss": 0.1282, "step": 7088 }, { "epoch": 0.653153360667066, "grad_norm": 0.9549475483412628, "learning_rate": 1.4718042856998582e-06, "loss": 0.1323, "step": 7089 }, { "epoch": 0.653245496844336, "grad_norm": 0.9327030362108195, "learning_rate": 1.4711099608480878e-06, "loss": 0.126, "step": 7090 }, { "epoch": 0.653337633021606, "grad_norm": 0.90652969378768, "learning_rate": 1.4704157315288676e-06, "loss": 0.1287, "step": 7091 }, { "epoch": 0.653429769198876, "grad_norm": 0.9976244196720754, "learning_rate": 1.469721597806658e-06, "loss": 0.1358, "step": 7092 }, { "epoch": 0.653521905376146, "grad_norm": 0.9056095269673432, "learning_rate": 1.4690275597459097e-06, "loss": 0.126, "step": 7093 }, { "epoch": 0.653614041553416, "grad_norm": 0.9103089182932546, "learning_rate": 1.4683336174110622e-06, "loss": 0.1302, "step": 7094 }, { "epoch": 0.653706177730686, "grad_norm": 0.8777755066961466, "learning_rate": 1.4676397708665496e-06, "loss": 0.1223, "step": 7095 }, { "epoch": 0.653798313907956, "grad_norm": 0.9541944228782124, "learning_rate": 1.4669460201767954e-06, "loss": 0.1439, "step": 7096 }, { "epoch": 0.653890450085226, "grad_norm": 0.8908592177465279, "learning_rate": 1.4662523654062153e-06, "loss": 0.1268, "step": 7097 }, { "epoch": 0.653982586262496, "grad_norm": 0.934638689368783, "learning_rate": 1.4655588066192135e-06, "loss": 0.132, "step": 7098 }, { "epoch": 0.6540747224397659, "grad_norm": 0.9183179883275167, "learning_rate": 1.4648653438801876e-06, "loss": 0.1213, "step": 7099 }, { "epoch": 0.6541668586170359, "grad_norm": 0.899781104969002, "learning_rate": 1.4641719772535265e-06, "loss": 0.1157, "step": 7100 }, { "epoch": 0.6542589947943059, "grad_norm": 0.9516515431961823, "learning_rate": 1.463478706803609e-06, "loss": 0.1344, "step": 7101 }, { "epoch": 0.6543511309715759, "grad_norm": 0.8622076564585328, "learning_rate": 1.4627855325948044e-06, "loss": 0.1218, "step": 7102 }, { "epoch": 0.654443267148846, "grad_norm": 0.9622965357298842, "learning_rate": 1.462092454691475e-06, "loss": 0.1363, "step": 7103 }, { "epoch": 0.654535403326116, "grad_norm": 0.9411786160752286, "learning_rate": 1.461399473157973e-06, "loss": 0.1293, "step": 7104 }, { "epoch": 0.654627539503386, "grad_norm": 0.9203989734120116, "learning_rate": 1.4607065880586418e-06, "loss": 0.1234, "step": 7105 }, { "epoch": 0.654719675680656, "grad_norm": 0.9211111609012533, "learning_rate": 1.4600137994578156e-06, "loss": 0.1248, "step": 7106 }, { "epoch": 0.654811811857926, "grad_norm": 0.9447252888204418, "learning_rate": 1.4593211074198202e-06, "loss": 0.1305, "step": 7107 }, { "epoch": 0.654903948035196, "grad_norm": 0.9322211454899155, "learning_rate": 1.4586285120089713e-06, "loss": 0.1321, "step": 7108 }, { "epoch": 0.654996084212466, "grad_norm": 0.870988539601446, "learning_rate": 1.457936013289578e-06, "loss": 0.1242, "step": 7109 }, { "epoch": 0.655088220389736, "grad_norm": 0.9532198461498228, "learning_rate": 1.4572436113259376e-06, "loss": 0.138, "step": 7110 }, { "epoch": 0.655180356567006, "grad_norm": 0.9982937398063886, "learning_rate": 1.4565513061823394e-06, "loss": 0.1395, "step": 7111 }, { "epoch": 0.655272492744276, "grad_norm": 0.9169145577965592, "learning_rate": 1.4558590979230663e-06, "loss": 0.1288, "step": 7112 }, { "epoch": 0.655364628921546, "grad_norm": 0.9951663238796098, "learning_rate": 1.4551669866123868e-06, "loss": 0.1485, "step": 7113 }, { "epoch": 0.655456765098816, "grad_norm": 0.9797069270513918, "learning_rate": 1.4544749723145665e-06, "loss": 0.1385, "step": 7114 }, { "epoch": 0.655548901276086, "grad_norm": 0.9252067413656129, "learning_rate": 1.4537830550938563e-06, "loss": 0.1301, "step": 7115 }, { "epoch": 0.6556410374533561, "grad_norm": 0.8954911117919563, "learning_rate": 1.453091235014502e-06, "loss": 0.1304, "step": 7116 }, { "epoch": 0.6557331736306261, "grad_norm": 0.8981284612320173, "learning_rate": 1.4523995121407402e-06, "loss": 0.1213, "step": 7117 }, { "epoch": 0.6558253098078961, "grad_norm": 0.9051040761470195, "learning_rate": 1.4517078865367968e-06, "loss": 0.1301, "step": 7118 }, { "epoch": 0.6559174459851661, "grad_norm": 0.9465935974796041, "learning_rate": 1.4510163582668876e-06, "loss": 0.1374, "step": 7119 }, { "epoch": 0.6560095821624361, "grad_norm": 0.9314265941965006, "learning_rate": 1.4503249273952224e-06, "loss": 0.1384, "step": 7120 }, { "epoch": 0.6561017183397061, "grad_norm": 0.9425029363283532, "learning_rate": 1.449633593986001e-06, "loss": 0.1316, "step": 7121 }, { "epoch": 0.6561938545169761, "grad_norm": 0.90970340809109, "learning_rate": 1.448942358103414e-06, "loss": 0.1249, "step": 7122 }, { "epoch": 0.6562859906942461, "grad_norm": 0.9009364371908557, "learning_rate": 1.4482512198116424e-06, "loss": 0.1281, "step": 7123 }, { "epoch": 0.6563781268715161, "grad_norm": 0.9697235851276125, "learning_rate": 1.4475601791748572e-06, "loss": 0.1255, "step": 7124 }, { "epoch": 0.6564702630487861, "grad_norm": 0.9906107648348492, "learning_rate": 1.4468692362572228e-06, "loss": 0.1391, "step": 7125 }, { "epoch": 0.6565623992260561, "grad_norm": 0.9367678289236904, "learning_rate": 1.4461783911228938e-06, "loss": 0.1134, "step": 7126 }, { "epoch": 0.6566545354033261, "grad_norm": 0.960257839404813, "learning_rate": 1.4454876438360138e-06, "loss": 0.1315, "step": 7127 }, { "epoch": 0.6567466715805961, "grad_norm": 1.0250542021519866, "learning_rate": 1.4447969944607207e-06, "loss": 0.143, "step": 7128 }, { "epoch": 0.6568388077578661, "grad_norm": 0.9299221218118151, "learning_rate": 1.444106443061139e-06, "loss": 0.1223, "step": 7129 }, { "epoch": 0.6569309439351362, "grad_norm": 0.9862847658068205, "learning_rate": 1.443415989701389e-06, "loss": 0.1296, "step": 7130 }, { "epoch": 0.6570230801124062, "grad_norm": 0.9101288069296816, "learning_rate": 1.4427256344455764e-06, "loss": 0.1304, "step": 7131 }, { "epoch": 0.6571152162896762, "grad_norm": 0.843608595661738, "learning_rate": 1.442035377357803e-06, "loss": 0.1175, "step": 7132 }, { "epoch": 0.6572073524669462, "grad_norm": 0.9150555649451192, "learning_rate": 1.4413452185021594e-06, "loss": 0.1304, "step": 7133 }, { "epoch": 0.6572994886442162, "grad_norm": 0.9201461415510122, "learning_rate": 1.4406551579427264e-06, "loss": 0.1314, "step": 7134 }, { "epoch": 0.6573916248214862, "grad_norm": 0.8949364453594446, "learning_rate": 1.4399651957435751e-06, "loss": 0.1136, "step": 7135 }, { "epoch": 0.6574837609987562, "grad_norm": 0.8541001653937904, "learning_rate": 1.439275331968769e-06, "loss": 0.1188, "step": 7136 }, { "epoch": 0.6575758971760262, "grad_norm": 0.951903553111977, "learning_rate": 1.4385855666823628e-06, "loss": 0.1298, "step": 7137 }, { "epoch": 0.6576680333532962, "grad_norm": 0.9391325848473654, "learning_rate": 1.4378958999484021e-06, "loss": 0.125, "step": 7138 }, { "epoch": 0.6577601695305662, "grad_norm": 0.9498586655518755, "learning_rate": 1.4372063318309213e-06, "loss": 0.1338, "step": 7139 }, { "epoch": 0.6578523057078361, "grad_norm": 0.9439890757247625, "learning_rate": 1.4365168623939458e-06, "loss": 0.1247, "step": 7140 }, { "epoch": 0.6579444418851061, "grad_norm": 0.9715129796688949, "learning_rate": 1.4358274917014942e-06, "loss": 0.1319, "step": 7141 }, { "epoch": 0.6580365780623761, "grad_norm": 0.952778753231662, "learning_rate": 1.4351382198175745e-06, "loss": 0.1251, "step": 7142 }, { "epoch": 0.6581287142396463, "grad_norm": 0.8580832806372259, "learning_rate": 1.4344490468061867e-06, "loss": 0.115, "step": 7143 }, { "epoch": 0.6582208504169162, "grad_norm": 0.9595668294661949, "learning_rate": 1.4337599727313196e-06, "loss": 0.1347, "step": 7144 }, { "epoch": 0.6583129865941862, "grad_norm": 0.9272250033635803, "learning_rate": 1.4330709976569526e-06, "loss": 0.1285, "step": 7145 }, { "epoch": 0.6584051227714562, "grad_norm": 0.9198354354531479, "learning_rate": 1.4323821216470585e-06, "loss": 0.1313, "step": 7146 }, { "epoch": 0.6584972589487262, "grad_norm": 0.940128449050614, "learning_rate": 1.4316933447656e-06, "loss": 0.1316, "step": 7147 }, { "epoch": 0.6585893951259962, "grad_norm": 0.9651435947834156, "learning_rate": 1.4310046670765288e-06, "loss": 0.1331, "step": 7148 }, { "epoch": 0.6586815313032662, "grad_norm": 0.926751113334902, "learning_rate": 1.43031608864379e-06, "loss": 0.1335, "step": 7149 }, { "epoch": 0.6587736674805362, "grad_norm": 0.9034975813817566, "learning_rate": 1.4296276095313168e-06, "loss": 0.1274, "step": 7150 }, { "epoch": 0.6588658036578062, "grad_norm": 0.9131412602543207, "learning_rate": 1.4289392298030362e-06, "loss": 0.119, "step": 7151 }, { "epoch": 0.6589579398350762, "grad_norm": 0.9423968798876814, "learning_rate": 1.4282509495228622e-06, "loss": 0.1281, "step": 7152 }, { "epoch": 0.6590500760123462, "grad_norm": 0.939229125385478, "learning_rate": 1.4275627687547027e-06, "loss": 0.122, "step": 7153 }, { "epoch": 0.6591422121896162, "grad_norm": 1.0008297804235626, "learning_rate": 1.4268746875624572e-06, "loss": 0.1361, "step": 7154 }, { "epoch": 0.6592343483668862, "grad_norm": 0.975070832200004, "learning_rate": 1.426186706010012e-06, "loss": 0.1279, "step": 7155 }, { "epoch": 0.6593264845441562, "grad_norm": 0.952944740044869, "learning_rate": 1.4254988241612456e-06, "loss": 0.1216, "step": 7156 }, { "epoch": 0.6594186207214263, "grad_norm": 1.0120820938152526, "learning_rate": 1.4248110420800293e-06, "loss": 0.1241, "step": 7157 }, { "epoch": 0.6595107568986963, "grad_norm": 0.9205810165081968, "learning_rate": 1.4241233598302233e-06, "loss": 0.1232, "step": 7158 }, { "epoch": 0.6596028930759663, "grad_norm": 1.0440449077545666, "learning_rate": 1.4234357774756802e-06, "loss": 0.1349, "step": 7159 }, { "epoch": 0.6596950292532363, "grad_norm": 0.9193202768365726, "learning_rate": 1.422748295080241e-06, "loss": 0.1246, "step": 7160 }, { "epoch": 0.6597871654305063, "grad_norm": 0.8943555491454667, "learning_rate": 1.4220609127077373e-06, "loss": 0.1195, "step": 7161 }, { "epoch": 0.6598793016077763, "grad_norm": 0.9391692237037247, "learning_rate": 1.4213736304219945e-06, "loss": 0.1328, "step": 7162 }, { "epoch": 0.6599714377850463, "grad_norm": 0.9963129927890189, "learning_rate": 1.4206864482868265e-06, "loss": 0.1263, "step": 7163 }, { "epoch": 0.6600635739623163, "grad_norm": 0.933541761483965, "learning_rate": 1.4199993663660372e-06, "loss": 0.1315, "step": 7164 }, { "epoch": 0.6601557101395863, "grad_norm": 0.9339266259054608, "learning_rate": 1.419312384723423e-06, "loss": 0.1255, "step": 7165 }, { "epoch": 0.6602478463168563, "grad_norm": 0.9000373393357917, "learning_rate": 1.4186255034227714e-06, "loss": 0.1114, "step": 7166 }, { "epoch": 0.6603399824941263, "grad_norm": 0.8703795566468919, "learning_rate": 1.4179387225278568e-06, "loss": 0.1116, "step": 7167 }, { "epoch": 0.6604321186713963, "grad_norm": 0.955579337973247, "learning_rate": 1.4172520421024493e-06, "loss": 0.1269, "step": 7168 }, { "epoch": 0.6605242548486663, "grad_norm": 0.9639282983172128, "learning_rate": 1.4165654622103054e-06, "loss": 0.1206, "step": 7169 }, { "epoch": 0.6606163910259364, "grad_norm": 0.8729449392995738, "learning_rate": 1.4158789829151747e-06, "loss": 0.1203, "step": 7170 }, { "epoch": 0.6607085272032064, "grad_norm": 0.9116857232463509, "learning_rate": 1.4151926042807985e-06, "loss": 0.1147, "step": 7171 }, { "epoch": 0.6608006633804764, "grad_norm": 0.9490013290219388, "learning_rate": 1.4145063263709056e-06, "loss": 0.1421, "step": 7172 }, { "epoch": 0.6608927995577464, "grad_norm": 0.9488839253969639, "learning_rate": 1.413820149249216e-06, "loss": 0.1311, "step": 7173 }, { "epoch": 0.6609849357350164, "grad_norm": 1.0096436437834344, "learning_rate": 1.4131340729794424e-06, "loss": 0.1288, "step": 7174 }, { "epoch": 0.6610770719122864, "grad_norm": 0.9178413047426945, "learning_rate": 1.4124480976252872e-06, "loss": 0.1257, "step": 7175 }, { "epoch": 0.6611692080895564, "grad_norm": 0.8598420566610736, "learning_rate": 1.4117622232504442e-06, "loss": 0.1186, "step": 7176 }, { "epoch": 0.6612613442668264, "grad_norm": 0.9438112564377011, "learning_rate": 1.4110764499185957e-06, "loss": 0.1368, "step": 7177 }, { "epoch": 0.6613534804440964, "grad_norm": 0.9398113472022501, "learning_rate": 1.410390777693415e-06, "loss": 0.1287, "step": 7178 }, { "epoch": 0.6614456166213664, "grad_norm": 0.8882446435476032, "learning_rate": 1.409705206638568e-06, "loss": 0.1223, "step": 7179 }, { "epoch": 0.6615377527986364, "grad_norm": 0.9108996799402447, "learning_rate": 1.409019736817711e-06, "loss": 0.1313, "step": 7180 }, { "epoch": 0.6616298889759064, "grad_norm": 0.9425541395961481, "learning_rate": 1.4083343682944878e-06, "loss": 0.14, "step": 7181 }, { "epoch": 0.6617220251531764, "grad_norm": 0.9257644599811672, "learning_rate": 1.4076491011325372e-06, "loss": 0.1366, "step": 7182 }, { "epoch": 0.6618141613304463, "grad_norm": 0.9321219321891313, "learning_rate": 1.4069639353954837e-06, "loss": 0.1335, "step": 7183 }, { "epoch": 0.6619062975077165, "grad_norm": 0.9094495757127697, "learning_rate": 1.4062788711469478e-06, "loss": 0.1259, "step": 7184 }, { "epoch": 0.6619984336849865, "grad_norm": 0.9848326389326956, "learning_rate": 1.405593908450535e-06, "loss": 0.1272, "step": 7185 }, { "epoch": 0.6620905698622565, "grad_norm": 0.9063900632096158, "learning_rate": 1.4049090473698457e-06, "loss": 0.1238, "step": 7186 }, { "epoch": 0.6621827060395264, "grad_norm": 0.9357963658738954, "learning_rate": 1.4042242879684703e-06, "loss": 0.1282, "step": 7187 }, { "epoch": 0.6622748422167964, "grad_norm": 0.8979794771571415, "learning_rate": 1.403539630309988e-06, "loss": 0.121, "step": 7188 }, { "epoch": 0.6623669783940664, "grad_norm": 0.892611802740368, "learning_rate": 1.4028550744579677e-06, "loss": 0.1236, "step": 7189 }, { "epoch": 0.6624591145713364, "grad_norm": 0.8850509948496229, "learning_rate": 1.4021706204759716e-06, "loss": 0.1263, "step": 7190 }, { "epoch": 0.6625512507486064, "grad_norm": 0.9465343552302885, "learning_rate": 1.4014862684275522e-06, "loss": 0.1319, "step": 7191 }, { "epoch": 0.6626433869258764, "grad_norm": 0.9608608477037041, "learning_rate": 1.4008020183762513e-06, "loss": 0.1287, "step": 7192 }, { "epoch": 0.6627355231031464, "grad_norm": 0.9568016008936874, "learning_rate": 1.4001178703856016e-06, "loss": 0.1249, "step": 7193 }, { "epoch": 0.6628276592804164, "grad_norm": 0.9681371499803646, "learning_rate": 1.3994338245191249e-06, "loss": 0.1298, "step": 7194 }, { "epoch": 0.6629197954576864, "grad_norm": 0.9707806670367282, "learning_rate": 1.398749880840336e-06, "loss": 0.1291, "step": 7195 }, { "epoch": 0.6630119316349564, "grad_norm": 1.0292775882377738, "learning_rate": 1.3980660394127394e-06, "loss": 0.128, "step": 7196 }, { "epoch": 0.6631040678122264, "grad_norm": 0.9334590025297743, "learning_rate": 1.3973823002998305e-06, "loss": 0.1248, "step": 7197 }, { "epoch": 0.6631962039894965, "grad_norm": 0.8671221737926933, "learning_rate": 1.3966986635650936e-06, "loss": 0.1019, "step": 7198 }, { "epoch": 0.6632883401667665, "grad_norm": 0.906986379921877, "learning_rate": 1.3960151292720039e-06, "loss": 0.1225, "step": 7199 }, { "epoch": 0.6633804763440365, "grad_norm": 0.9726558373988553, "learning_rate": 1.395331697484028e-06, "loss": 0.1252, "step": 7200 }, { "epoch": 0.6634726125213065, "grad_norm": 0.8928249593897785, "learning_rate": 1.394648368264624e-06, "loss": 0.1179, "step": 7201 }, { "epoch": 0.6635647486985765, "grad_norm": 0.9246855574964215, "learning_rate": 1.3939651416772365e-06, "loss": 0.1227, "step": 7202 }, { "epoch": 0.6636568848758465, "grad_norm": 0.9321724830410227, "learning_rate": 1.3932820177853062e-06, "loss": 0.1238, "step": 7203 }, { "epoch": 0.6637490210531165, "grad_norm": 0.9507285348694358, "learning_rate": 1.3925989966522585e-06, "loss": 0.1268, "step": 7204 }, { "epoch": 0.6638411572303865, "grad_norm": 0.9361993103413431, "learning_rate": 1.391916078341514e-06, "loss": 0.1215, "step": 7205 }, { "epoch": 0.6639332934076565, "grad_norm": 0.8919295341094984, "learning_rate": 1.3912332629164798e-06, "loss": 0.1179, "step": 7206 }, { "epoch": 0.6640254295849265, "grad_norm": 0.8853783172570733, "learning_rate": 1.3905505504405567e-06, "loss": 0.124, "step": 7207 }, { "epoch": 0.6641175657621965, "grad_norm": 0.9334584777070276, "learning_rate": 1.3898679409771355e-06, "loss": 0.1283, "step": 7208 }, { "epoch": 0.6642097019394665, "grad_norm": 0.9539829238214229, "learning_rate": 1.389185434589595e-06, "loss": 0.1266, "step": 7209 }, { "epoch": 0.6643018381167365, "grad_norm": 0.9241016852887277, "learning_rate": 1.3885030313413056e-06, "loss": 0.1196, "step": 7210 }, { "epoch": 0.6643939742940066, "grad_norm": 0.9586669345335255, "learning_rate": 1.3878207312956295e-06, "loss": 0.1351, "step": 7211 }, { "epoch": 0.6644861104712766, "grad_norm": 0.9450592859824014, "learning_rate": 1.3871385345159183e-06, "loss": 0.1323, "step": 7212 }, { "epoch": 0.6645782466485466, "grad_norm": 0.9612734215640861, "learning_rate": 1.3864564410655149e-06, "loss": 0.1225, "step": 7213 }, { "epoch": 0.6646703828258166, "grad_norm": 0.9395580497180398, "learning_rate": 1.3857744510077507e-06, "loss": 0.125, "step": 7214 }, { "epoch": 0.6647625190030866, "grad_norm": 0.9515079076163937, "learning_rate": 1.3850925644059475e-06, "loss": 0.1413, "step": 7215 }, { "epoch": 0.6648546551803566, "grad_norm": 0.9818483068556666, "learning_rate": 1.3844107813234197e-06, "loss": 0.1317, "step": 7216 }, { "epoch": 0.6649467913576266, "grad_norm": 0.9217060441337399, "learning_rate": 1.3837291018234723e-06, "loss": 0.1327, "step": 7217 }, { "epoch": 0.6650389275348966, "grad_norm": 0.9614026258662707, "learning_rate": 1.3830475259693964e-06, "loss": 0.1324, "step": 7218 }, { "epoch": 0.6651310637121666, "grad_norm": 0.8997301921202329, "learning_rate": 1.3823660538244793e-06, "loss": 0.114, "step": 7219 }, { "epoch": 0.6652231998894366, "grad_norm": 0.9264679973250439, "learning_rate": 1.3816846854519934e-06, "loss": 0.1366, "step": 7220 }, { "epoch": 0.6653153360667066, "grad_norm": 0.9397209333426656, "learning_rate": 1.3810034209152057e-06, "loss": 0.1224, "step": 7221 }, { "epoch": 0.6654074722439766, "grad_norm": 0.9140976327735287, "learning_rate": 1.3803222602773696e-06, "loss": 0.1209, "step": 7222 }, { "epoch": 0.6654996084212466, "grad_norm": 0.9275928232405652, "learning_rate": 1.379641203601732e-06, "loss": 0.1295, "step": 7223 }, { "epoch": 0.6655917445985166, "grad_norm": 0.8812472686313237, "learning_rate": 1.3789602509515306e-06, "loss": 0.126, "step": 7224 }, { "epoch": 0.6656838807757867, "grad_norm": 0.9507714938881616, "learning_rate": 1.3782794023899899e-06, "loss": 0.1271, "step": 7225 }, { "epoch": 0.6657760169530567, "grad_norm": 0.8546349341742145, "learning_rate": 1.3775986579803276e-06, "loss": 0.111, "step": 7226 }, { "epoch": 0.6658681531303267, "grad_norm": 0.9735538532092373, "learning_rate": 1.37691801778575e-06, "loss": 0.1267, "step": 7227 }, { "epoch": 0.6659602893075967, "grad_norm": 0.9649175198066844, "learning_rate": 1.3762374818694558e-06, "loss": 0.1273, "step": 7228 }, { "epoch": 0.6660524254848667, "grad_norm": 0.9308364929610364, "learning_rate": 1.3755570502946324e-06, "loss": 0.135, "step": 7229 }, { "epoch": 0.6661445616621366, "grad_norm": 0.9482080604533994, "learning_rate": 1.3748767231244587e-06, "loss": 0.1295, "step": 7230 }, { "epoch": 0.6662366978394066, "grad_norm": 0.9044964317809467, "learning_rate": 1.3741965004221012e-06, "loss": 0.1201, "step": 7231 }, { "epoch": 0.6663288340166766, "grad_norm": 0.9573803087194334, "learning_rate": 1.3735163822507196e-06, "loss": 0.1425, "step": 7232 }, { "epoch": 0.6664209701939466, "grad_norm": 0.879465266637758, "learning_rate": 1.372836368673463e-06, "loss": 0.1097, "step": 7233 }, { "epoch": 0.6665131063712166, "grad_norm": 1.007476252708849, "learning_rate": 1.3721564597534723e-06, "loss": 0.1303, "step": 7234 }, { "epoch": 0.6666052425484866, "grad_norm": 0.930209822547841, "learning_rate": 1.3714766555538755e-06, "loss": 0.1259, "step": 7235 }, { "epoch": 0.6666973787257566, "grad_norm": 0.9989347154200431, "learning_rate": 1.3707969561377915e-06, "loss": 0.1465, "step": 7236 }, { "epoch": 0.6667895149030266, "grad_norm": 0.854671371178768, "learning_rate": 1.370117361568332e-06, "loss": 0.1137, "step": 7237 }, { "epoch": 0.6668816510802967, "grad_norm": 0.9819684863020709, "learning_rate": 1.3694378719085976e-06, "loss": 0.1388, "step": 7238 }, { "epoch": 0.6669737872575667, "grad_norm": 0.9252396488730227, "learning_rate": 1.368758487221678e-06, "loss": 0.1285, "step": 7239 }, { "epoch": 0.6670659234348367, "grad_norm": 0.9453506499299281, "learning_rate": 1.3680792075706545e-06, "loss": 0.1397, "step": 7240 }, { "epoch": 0.6671580596121067, "grad_norm": 0.8709913858856868, "learning_rate": 1.367400033018599e-06, "loss": 0.1148, "step": 7241 }, { "epoch": 0.6672501957893767, "grad_norm": 0.9137545587799056, "learning_rate": 1.3667209636285727e-06, "loss": 0.131, "step": 7242 }, { "epoch": 0.6673423319666467, "grad_norm": 0.9159588879600266, "learning_rate": 1.366041999463626e-06, "loss": 0.13, "step": 7243 }, { "epoch": 0.6674344681439167, "grad_norm": 0.9230621320209568, "learning_rate": 1.3653631405868011e-06, "loss": 0.1232, "step": 7244 }, { "epoch": 0.6675266043211867, "grad_norm": 0.8953404755783205, "learning_rate": 1.3646843870611313e-06, "loss": 0.1277, "step": 7245 }, { "epoch": 0.6676187404984567, "grad_norm": 0.9414937946712962, "learning_rate": 1.3640057389496392e-06, "loss": 0.1319, "step": 7246 }, { "epoch": 0.6677108766757267, "grad_norm": 0.9230244067093492, "learning_rate": 1.3633271963153363e-06, "loss": 0.1258, "step": 7247 }, { "epoch": 0.6678030128529967, "grad_norm": 0.9482742954005706, "learning_rate": 1.3626487592212245e-06, "loss": 0.1285, "step": 7248 }, { "epoch": 0.6678951490302667, "grad_norm": 0.9053301065986715, "learning_rate": 1.361970427730298e-06, "loss": 0.1221, "step": 7249 }, { "epoch": 0.6679872852075367, "grad_norm": 0.9269917273591195, "learning_rate": 1.3612922019055409e-06, "loss": 0.1278, "step": 7250 }, { "epoch": 0.6680794213848067, "grad_norm": 0.9945437288140861, "learning_rate": 1.3606140818099243e-06, "loss": 0.1283, "step": 7251 }, { "epoch": 0.6681715575620768, "grad_norm": 0.8834575369543373, "learning_rate": 1.3599360675064139e-06, "loss": 0.1173, "step": 7252 }, { "epoch": 0.6682636937393468, "grad_norm": 0.9297478423868443, "learning_rate": 1.359258159057961e-06, "loss": 0.1243, "step": 7253 }, { "epoch": 0.6683558299166168, "grad_norm": 0.8803633005549261, "learning_rate": 1.358580356527511e-06, "loss": 0.1271, "step": 7254 }, { "epoch": 0.6684479660938868, "grad_norm": 0.9436775723358185, "learning_rate": 1.3579026599779988e-06, "loss": 0.1261, "step": 7255 }, { "epoch": 0.6685401022711568, "grad_norm": 0.8952694224685467, "learning_rate": 1.3572250694723465e-06, "loss": 0.1245, "step": 7256 }, { "epoch": 0.6686322384484268, "grad_norm": 0.8551919104756415, "learning_rate": 1.3565475850734706e-06, "loss": 0.1139, "step": 7257 }, { "epoch": 0.6687243746256968, "grad_norm": 0.9032991002081001, "learning_rate": 1.355870206844273e-06, "loss": 0.111, "step": 7258 }, { "epoch": 0.6688165108029668, "grad_norm": 0.9402261271430783, "learning_rate": 1.3551929348476512e-06, "loss": 0.125, "step": 7259 }, { "epoch": 0.6689086469802368, "grad_norm": 0.8590361852348019, "learning_rate": 1.3545157691464878e-06, "loss": 0.1212, "step": 7260 }, { "epoch": 0.6690007831575068, "grad_norm": 0.953889062291394, "learning_rate": 1.353838709803658e-06, "loss": 0.1246, "step": 7261 }, { "epoch": 0.6690929193347768, "grad_norm": 0.9024711962523686, "learning_rate": 1.3531617568820287e-06, "loss": 0.1256, "step": 7262 }, { "epoch": 0.6691850555120468, "grad_norm": 0.9029203771714646, "learning_rate": 1.3524849104444537e-06, "loss": 0.1259, "step": 7263 }, { "epoch": 0.6692771916893168, "grad_norm": 0.9757431844463745, "learning_rate": 1.3518081705537771e-06, "loss": 0.1329, "step": 7264 }, { "epoch": 0.6693693278665868, "grad_norm": 0.9514092658235749, "learning_rate": 1.3511315372728357e-06, "loss": 0.1181, "step": 7265 }, { "epoch": 0.6694614640438569, "grad_norm": 0.9673560639479292, "learning_rate": 1.3504550106644542e-06, "loss": 0.1253, "step": 7266 }, { "epoch": 0.6695536002211269, "grad_norm": 0.9354307139797914, "learning_rate": 1.34977859079145e-06, "loss": 0.1139, "step": 7267 }, { "epoch": 0.6696457363983969, "grad_norm": 0.8914156946294755, "learning_rate": 1.3491022777166276e-06, "loss": 0.1156, "step": 7268 }, { "epoch": 0.6697378725756669, "grad_norm": 0.9649736445005475, "learning_rate": 1.3484260715027813e-06, "loss": 0.1436, "step": 7269 }, { "epoch": 0.6698300087529369, "grad_norm": 0.9483659023489992, "learning_rate": 1.3477499722126985e-06, "loss": 0.1323, "step": 7270 }, { "epoch": 0.6699221449302069, "grad_norm": 0.9200966092532619, "learning_rate": 1.3470739799091555e-06, "loss": 0.1322, "step": 7271 }, { "epoch": 0.6700142811074769, "grad_norm": 0.9403584085916542, "learning_rate": 1.3463980946549166e-06, "loss": 0.1382, "step": 7272 }, { "epoch": 0.6701064172847468, "grad_norm": 0.8378406184781029, "learning_rate": 1.3457223165127397e-06, "loss": 0.1088, "step": 7273 }, { "epoch": 0.6701985534620168, "grad_norm": 0.9551913632098953, "learning_rate": 1.3450466455453693e-06, "loss": 0.1342, "step": 7274 }, { "epoch": 0.6702906896392868, "grad_norm": 0.8597495274518914, "learning_rate": 1.3443710818155428e-06, "loss": 0.1191, "step": 7275 }, { "epoch": 0.6703828258165568, "grad_norm": 0.9307987876921514, "learning_rate": 1.3436956253859851e-06, "loss": 0.1119, "step": 7276 }, { "epoch": 0.6704749619938268, "grad_norm": 0.913451889373323, "learning_rate": 1.3430202763194125e-06, "loss": 0.1242, "step": 7277 }, { "epoch": 0.6705670981710968, "grad_norm": 0.9174767959610509, "learning_rate": 1.342345034678533e-06, "loss": 0.1207, "step": 7278 }, { "epoch": 0.6706592343483669, "grad_norm": 0.9625576179556493, "learning_rate": 1.3416699005260416e-06, "loss": 0.131, "step": 7279 }, { "epoch": 0.6707513705256369, "grad_norm": 0.9322744623558886, "learning_rate": 1.3409948739246236e-06, "loss": 0.1193, "step": 7280 }, { "epoch": 0.6708435067029069, "grad_norm": 0.9144503231058142, "learning_rate": 1.3403199549369564e-06, "loss": 0.1288, "step": 7281 }, { "epoch": 0.6709356428801769, "grad_norm": 0.9216913139875762, "learning_rate": 1.3396451436257062e-06, "loss": 0.1307, "step": 7282 }, { "epoch": 0.6710277790574469, "grad_norm": 0.9487383042584467, "learning_rate": 1.3389704400535303e-06, "loss": 0.1294, "step": 7283 }, { "epoch": 0.6711199152347169, "grad_norm": 0.8999456824995248, "learning_rate": 1.3382958442830737e-06, "loss": 0.1239, "step": 7284 }, { "epoch": 0.6712120514119869, "grad_norm": 0.9535840771969801, "learning_rate": 1.337621356376972e-06, "loss": 0.1264, "step": 7285 }, { "epoch": 0.6713041875892569, "grad_norm": 0.9497062608622805, "learning_rate": 1.3369469763978527e-06, "loss": 0.1316, "step": 7286 }, { "epoch": 0.6713963237665269, "grad_norm": 0.9452724159743188, "learning_rate": 1.3362727044083318e-06, "loss": 0.1418, "step": 7287 }, { "epoch": 0.6714884599437969, "grad_norm": 0.91445117456331, "learning_rate": 1.3355985404710164e-06, "loss": 0.1174, "step": 7288 }, { "epoch": 0.6715805961210669, "grad_norm": 0.9449257270380875, "learning_rate": 1.3349244846485022e-06, "loss": 0.1301, "step": 7289 }, { "epoch": 0.6716727322983369, "grad_norm": 0.964496022253499, "learning_rate": 1.3342505370033736e-06, "loss": 0.1125, "step": 7290 }, { "epoch": 0.6717648684756069, "grad_norm": 0.936912152887504, "learning_rate": 1.3335766975982082e-06, "loss": 0.1243, "step": 7291 }, { "epoch": 0.6718570046528769, "grad_norm": 0.8577965724245058, "learning_rate": 1.3329029664955729e-06, "loss": 0.1002, "step": 7292 }, { "epoch": 0.671949140830147, "grad_norm": 0.990204302102415, "learning_rate": 1.332229343758022e-06, "loss": 0.1265, "step": 7293 }, { "epoch": 0.672041277007417, "grad_norm": 1.0281368081432407, "learning_rate": 1.331555829448103e-06, "loss": 0.1257, "step": 7294 }, { "epoch": 0.672133413184687, "grad_norm": 0.9288074008539412, "learning_rate": 1.33088242362835e-06, "loss": 0.1209, "step": 7295 }, { "epoch": 0.672225549361957, "grad_norm": 0.9119209022595895, "learning_rate": 1.3302091263612907e-06, "loss": 0.1174, "step": 7296 }, { "epoch": 0.672317685539227, "grad_norm": 0.9143317228925907, "learning_rate": 1.3295359377094392e-06, "loss": 0.1295, "step": 7297 }, { "epoch": 0.672409821716497, "grad_norm": 0.8944269171000773, "learning_rate": 1.3288628577353014e-06, "loss": 0.1251, "step": 7298 }, { "epoch": 0.672501957893767, "grad_norm": 0.952822769694342, "learning_rate": 1.3281898865013749e-06, "loss": 0.1422, "step": 7299 }, { "epoch": 0.672594094071037, "grad_norm": 0.9731154772916867, "learning_rate": 1.327517024070143e-06, "loss": 0.129, "step": 7300 }, { "epoch": 0.672686230248307, "grad_norm": 0.9820492852941306, "learning_rate": 1.3268442705040808e-06, "loss": 0.1319, "step": 7301 }, { "epoch": 0.672778366425577, "grad_norm": 0.8859435352688277, "learning_rate": 1.3261716258656543e-06, "loss": 0.127, "step": 7302 }, { "epoch": 0.672870502602847, "grad_norm": 0.9183943013213689, "learning_rate": 1.3254990902173187e-06, "loss": 0.1272, "step": 7303 }, { "epoch": 0.672962638780117, "grad_norm": 0.9189656727211152, "learning_rate": 1.3248266636215202e-06, "loss": 0.125, "step": 7304 }, { "epoch": 0.673054774957387, "grad_norm": 0.867060049690445, "learning_rate": 1.324154346140692e-06, "loss": 0.1094, "step": 7305 }, { "epoch": 0.6731469111346571, "grad_norm": 1.0020192646596813, "learning_rate": 1.3234821378372586e-06, "loss": 0.1299, "step": 7306 }, { "epoch": 0.6732390473119271, "grad_norm": 0.9670118862538364, "learning_rate": 1.3228100387736353e-06, "loss": 0.1377, "step": 7307 }, { "epoch": 0.6733311834891971, "grad_norm": 0.9237445429388595, "learning_rate": 1.3221380490122276e-06, "loss": 0.1286, "step": 7308 }, { "epoch": 0.6734233196664671, "grad_norm": 0.9893606523013816, "learning_rate": 1.321466168615428e-06, "loss": 0.1281, "step": 7309 }, { "epoch": 0.6735154558437371, "grad_norm": 0.9510171646860528, "learning_rate": 1.3207943976456223e-06, "loss": 0.1281, "step": 7310 }, { "epoch": 0.6736075920210071, "grad_norm": 0.9048525408219783, "learning_rate": 1.3201227361651824e-06, "loss": 0.1258, "step": 7311 }, { "epoch": 0.6736997281982771, "grad_norm": 0.9484261385740433, "learning_rate": 1.3194511842364738e-06, "loss": 0.1188, "step": 7312 }, { "epoch": 0.6737918643755471, "grad_norm": 0.9452879348278467, "learning_rate": 1.3187797419218506e-06, "loss": 0.1274, "step": 7313 }, { "epoch": 0.673884000552817, "grad_norm": 1.006014751364486, "learning_rate": 1.3181084092836544e-06, "loss": 0.1361, "step": 7314 }, { "epoch": 0.673976136730087, "grad_norm": 0.9742076329629352, "learning_rate": 1.31743718638422e-06, "loss": 0.133, "step": 7315 }, { "epoch": 0.674068272907357, "grad_norm": 0.99308011921525, "learning_rate": 1.3167660732858705e-06, "loss": 0.145, "step": 7316 }, { "epoch": 0.674160409084627, "grad_norm": 0.9007162632362339, "learning_rate": 1.316095070050919e-06, "loss": 0.1169, "step": 7317 }, { "epoch": 0.674252545261897, "grad_norm": 0.9403181481368799, "learning_rate": 1.3154241767416665e-06, "loss": 0.1266, "step": 7318 }, { "epoch": 0.674344681439167, "grad_norm": 0.8985720780848173, "learning_rate": 1.3147533934204065e-06, "loss": 0.1148, "step": 7319 }, { "epoch": 0.6744368176164371, "grad_norm": 0.8679435821138797, "learning_rate": 1.3140827201494215e-06, "loss": 0.1242, "step": 7320 }, { "epoch": 0.6745289537937071, "grad_norm": 0.9096030324469376, "learning_rate": 1.313412156990985e-06, "loss": 0.1295, "step": 7321 }, { "epoch": 0.6746210899709771, "grad_norm": 0.9524739379153031, "learning_rate": 1.312741704007357e-06, "loss": 0.136, "step": 7322 }, { "epoch": 0.6747132261482471, "grad_norm": 0.9364638633954382, "learning_rate": 1.3120713612607888e-06, "loss": 0.1259, "step": 7323 }, { "epoch": 0.6748053623255171, "grad_norm": 0.9004953190539895, "learning_rate": 1.3114011288135225e-06, "loss": 0.1264, "step": 7324 }, { "epoch": 0.6748974985027871, "grad_norm": 0.9161284739862543, "learning_rate": 1.31073100672779e-06, "loss": 0.1181, "step": 7325 }, { "epoch": 0.6749896346800571, "grad_norm": 0.9414480947386061, "learning_rate": 1.3100609950658109e-06, "loss": 0.1223, "step": 7326 }, { "epoch": 0.6750817708573271, "grad_norm": 0.904014912340006, "learning_rate": 1.3093910938897972e-06, "loss": 0.1266, "step": 7327 }, { "epoch": 0.6751739070345971, "grad_norm": 0.8955658720631855, "learning_rate": 1.3087213032619478e-06, "loss": 0.117, "step": 7328 }, { "epoch": 0.6752660432118671, "grad_norm": 0.9207589123517635, "learning_rate": 1.3080516232444545e-06, "loss": 0.1212, "step": 7329 }, { "epoch": 0.6753581793891371, "grad_norm": 0.9392613606296041, "learning_rate": 1.3073820538994952e-06, "loss": 0.1307, "step": 7330 }, { "epoch": 0.6754503155664071, "grad_norm": 0.8560776006565872, "learning_rate": 1.3067125952892408e-06, "loss": 0.1074, "step": 7331 }, { "epoch": 0.6755424517436771, "grad_norm": 0.8545937841158776, "learning_rate": 1.3060432474758508e-06, "loss": 0.1121, "step": 7332 }, { "epoch": 0.6756345879209471, "grad_norm": 0.9466866706239028, "learning_rate": 1.3053740105214741e-06, "loss": 0.1374, "step": 7333 }, { "epoch": 0.6757267240982172, "grad_norm": 0.8949652527185565, "learning_rate": 1.3047048844882481e-06, "loss": 0.1158, "step": 7334 }, { "epoch": 0.6758188602754872, "grad_norm": 0.8689164231602294, "learning_rate": 1.304035869438302e-06, "loss": 0.111, "step": 7335 }, { "epoch": 0.6759109964527572, "grad_norm": 0.9635373718004209, "learning_rate": 1.3033669654337544e-06, "loss": 0.1406, "step": 7336 }, { "epoch": 0.6760031326300272, "grad_norm": 0.9337974075065598, "learning_rate": 1.302698172536714e-06, "loss": 0.1214, "step": 7337 }, { "epoch": 0.6760952688072972, "grad_norm": 0.8946904614413712, "learning_rate": 1.3020294908092767e-06, "loss": 0.1255, "step": 7338 }, { "epoch": 0.6761874049845672, "grad_norm": 0.9691555259612942, "learning_rate": 1.3013609203135297e-06, "loss": 0.1268, "step": 7339 }, { "epoch": 0.6762795411618372, "grad_norm": 0.9120333872404491, "learning_rate": 1.3006924611115495e-06, "loss": 0.1238, "step": 7340 }, { "epoch": 0.6763716773391072, "grad_norm": 0.9405321337635649, "learning_rate": 1.300024113265404e-06, "loss": 0.1232, "step": 7341 }, { "epoch": 0.6764638135163772, "grad_norm": 0.9354071231407695, "learning_rate": 1.2993558768371494e-06, "loss": 0.1262, "step": 7342 }, { "epoch": 0.6765559496936472, "grad_norm": 0.8924751034495181, "learning_rate": 1.2986877518888307e-06, "loss": 0.1272, "step": 7343 }, { "epoch": 0.6766480858709172, "grad_norm": 0.9034305897586791, "learning_rate": 1.2980197384824828e-06, "loss": 0.1189, "step": 7344 }, { "epoch": 0.6767402220481872, "grad_norm": 0.8854680747678314, "learning_rate": 1.2973518366801315e-06, "loss": 0.1124, "step": 7345 }, { "epoch": 0.6768323582254572, "grad_norm": 0.9314473818384239, "learning_rate": 1.2966840465437923e-06, "loss": 0.1333, "step": 7346 }, { "epoch": 0.6769244944027273, "grad_norm": 0.9275619556132121, "learning_rate": 1.2960163681354683e-06, "loss": 0.1216, "step": 7347 }, { "epoch": 0.6770166305799973, "grad_norm": 0.8869121761739572, "learning_rate": 1.2953488015171551e-06, "loss": 0.1186, "step": 7348 }, { "epoch": 0.6771087667572673, "grad_norm": 0.818807364000548, "learning_rate": 1.294681346750834e-06, "loss": 0.1117, "step": 7349 }, { "epoch": 0.6772009029345373, "grad_norm": 0.9352558114757723, "learning_rate": 1.294014003898481e-06, "loss": 0.1212, "step": 7350 }, { "epoch": 0.6772930391118073, "grad_norm": 1.041324186976536, "learning_rate": 1.2933467730220562e-06, "loss": 0.1429, "step": 7351 }, { "epoch": 0.6773851752890773, "grad_norm": 0.9600592474772713, "learning_rate": 1.2926796541835135e-06, "loss": 0.1338, "step": 7352 }, { "epoch": 0.6774773114663473, "grad_norm": 0.9034424041374622, "learning_rate": 1.2920126474447957e-06, "loss": 0.1153, "step": 7353 }, { "epoch": 0.6775694476436173, "grad_norm": 1.0130396180529198, "learning_rate": 1.2913457528678335e-06, "loss": 0.1418, "step": 7354 }, { "epoch": 0.6776615838208873, "grad_norm": 0.9177987594541854, "learning_rate": 1.2906789705145475e-06, "loss": 0.1257, "step": 7355 }, { "epoch": 0.6777537199981573, "grad_norm": 0.943278223201491, "learning_rate": 1.2900123004468493e-06, "loss": 0.1156, "step": 7356 }, { "epoch": 0.6778458561754273, "grad_norm": 0.9010057141578023, "learning_rate": 1.289345742726639e-06, "loss": 0.1326, "step": 7357 }, { "epoch": 0.6779379923526972, "grad_norm": 0.9178511003467293, "learning_rate": 1.288679297415808e-06, "loss": 0.1248, "step": 7358 }, { "epoch": 0.6780301285299672, "grad_norm": 0.9031359655111605, "learning_rate": 1.2880129645762344e-06, "loss": 0.1176, "step": 7359 }, { "epoch": 0.6781222647072372, "grad_norm": 0.9436874791851032, "learning_rate": 1.2873467442697862e-06, "loss": 0.1354, "step": 7360 }, { "epoch": 0.6782144008845074, "grad_norm": 0.9241098072391299, "learning_rate": 1.286680636558324e-06, "loss": 0.1179, "step": 7361 }, { "epoch": 0.6783065370617773, "grad_norm": 0.9397598292849974, "learning_rate": 1.2860146415036957e-06, "loss": 0.1177, "step": 7362 }, { "epoch": 0.6783986732390473, "grad_norm": 0.9488656040076715, "learning_rate": 1.2853487591677377e-06, "loss": 0.1226, "step": 7363 }, { "epoch": 0.6784908094163173, "grad_norm": 0.8690902550103791, "learning_rate": 1.2846829896122792e-06, "loss": 0.1116, "step": 7364 }, { "epoch": 0.6785829455935873, "grad_norm": 0.9312076330382117, "learning_rate": 1.284017332899135e-06, "loss": 0.1259, "step": 7365 }, { "epoch": 0.6786750817708573, "grad_norm": 0.923409407118127, "learning_rate": 1.283351789090113e-06, "loss": 0.1182, "step": 7366 }, { "epoch": 0.6787672179481273, "grad_norm": 0.9441374226173443, "learning_rate": 1.2826863582470078e-06, "loss": 0.1254, "step": 7367 }, { "epoch": 0.6788593541253973, "grad_norm": 0.9020458710503054, "learning_rate": 1.2820210404316053e-06, "loss": 0.1124, "step": 7368 }, { "epoch": 0.6789514903026673, "grad_norm": 0.9364867537207302, "learning_rate": 1.2813558357056806e-06, "loss": 0.1288, "step": 7369 }, { "epoch": 0.6790436264799373, "grad_norm": 1.0236374395047134, "learning_rate": 1.2806907441309974e-06, "loss": 0.1373, "step": 7370 }, { "epoch": 0.6791357626572073, "grad_norm": 0.8698015409414299, "learning_rate": 1.2800257657693105e-06, "loss": 0.1099, "step": 7371 }, { "epoch": 0.6792278988344773, "grad_norm": 0.9205891854687096, "learning_rate": 1.2793609006823615e-06, "loss": 0.1208, "step": 7372 }, { "epoch": 0.6793200350117473, "grad_norm": 1.0175982193294322, "learning_rate": 1.2786961489318842e-06, "loss": 0.1436, "step": 7373 }, { "epoch": 0.6794121711890174, "grad_norm": 0.9473057310918341, "learning_rate": 1.278031510579602e-06, "loss": 0.1197, "step": 7374 }, { "epoch": 0.6795043073662874, "grad_norm": 0.9997006968274136, "learning_rate": 1.2773669856872256e-06, "loss": 0.1398, "step": 7375 }, { "epoch": 0.6795964435435574, "grad_norm": 0.879881394116734, "learning_rate": 1.2767025743164551e-06, "loss": 0.1183, "step": 7376 }, { "epoch": 0.6796885797208274, "grad_norm": 0.8972780003606752, "learning_rate": 1.2760382765289821e-06, "loss": 0.1192, "step": 7377 }, { "epoch": 0.6797807158980974, "grad_norm": 0.9385686665101676, "learning_rate": 1.275374092386487e-06, "loss": 0.1281, "step": 7378 }, { "epoch": 0.6798728520753674, "grad_norm": 0.8774908119041022, "learning_rate": 1.2747100219506404e-06, "loss": 0.1199, "step": 7379 }, { "epoch": 0.6799649882526374, "grad_norm": 0.9192489819603069, "learning_rate": 1.2740460652831e-06, "loss": 0.1258, "step": 7380 }, { "epoch": 0.6800571244299074, "grad_norm": 0.9323643542426164, "learning_rate": 1.2733822224455133e-06, "loss": 0.1309, "step": 7381 }, { "epoch": 0.6801492606071774, "grad_norm": 1.0334964401625448, "learning_rate": 1.272718493499519e-06, "loss": 0.1378, "step": 7382 }, { "epoch": 0.6802413967844474, "grad_norm": 0.9062491939851149, "learning_rate": 1.272054878506746e-06, "loss": 0.126, "step": 7383 }, { "epoch": 0.6803335329617174, "grad_norm": 0.8902882054217731, "learning_rate": 1.2713913775288086e-06, "loss": 0.1158, "step": 7384 }, { "epoch": 0.6804256691389874, "grad_norm": 0.8903105818667769, "learning_rate": 1.2707279906273152e-06, "loss": 0.1278, "step": 7385 }, { "epoch": 0.6805178053162574, "grad_norm": 0.9324559481614326, "learning_rate": 1.270064717863859e-06, "loss": 0.1305, "step": 7386 }, { "epoch": 0.6806099414935274, "grad_norm": 0.8959648512122598, "learning_rate": 1.269401559300027e-06, "loss": 0.1273, "step": 7387 }, { "epoch": 0.6807020776707975, "grad_norm": 0.9152200447009375, "learning_rate": 1.2687385149973919e-06, "loss": 0.1353, "step": 7388 }, { "epoch": 0.6807942138480675, "grad_norm": 0.8724993170234326, "learning_rate": 1.268075585017518e-06, "loss": 0.1169, "step": 7389 }, { "epoch": 0.6808863500253375, "grad_norm": 0.9065934723664036, "learning_rate": 1.2674127694219588e-06, "loss": 0.1258, "step": 7390 }, { "epoch": 0.6809784862026075, "grad_norm": 1.0023250137596291, "learning_rate": 1.2667500682722584e-06, "loss": 0.1295, "step": 7391 }, { "epoch": 0.6810706223798775, "grad_norm": 0.9139762424853833, "learning_rate": 1.266087481629945e-06, "loss": 0.1222, "step": 7392 }, { "epoch": 0.6811627585571475, "grad_norm": 0.986025500116549, "learning_rate": 1.2654250095565417e-06, "loss": 0.1304, "step": 7393 }, { "epoch": 0.6812548947344175, "grad_norm": 0.9687913893761001, "learning_rate": 1.2647626521135592e-06, "loss": 0.1219, "step": 7394 }, { "epoch": 0.6813470309116875, "grad_norm": 1.0229554853329992, "learning_rate": 1.2641004093624981e-06, "loss": 0.131, "step": 7395 }, { "epoch": 0.6814391670889575, "grad_norm": 0.9457520902813298, "learning_rate": 1.2634382813648462e-06, "loss": 0.13, "step": 7396 }, { "epoch": 0.6815313032662275, "grad_norm": 0.8981736708544468, "learning_rate": 1.262776268182084e-06, "loss": 0.1344, "step": 7397 }, { "epoch": 0.6816234394434975, "grad_norm": 0.9310753904940697, "learning_rate": 1.2621143698756778e-06, "loss": 0.1256, "step": 7398 }, { "epoch": 0.6817155756207675, "grad_norm": 0.9271288321729084, "learning_rate": 1.2614525865070848e-06, "loss": 0.1283, "step": 7399 }, { "epoch": 0.6818077117980375, "grad_norm": 0.9112315579438799, "learning_rate": 1.260790918137754e-06, "loss": 0.1319, "step": 7400 }, { "epoch": 0.6818998479753074, "grad_norm": 0.9539092052600017, "learning_rate": 1.2601293648291184e-06, "loss": 0.1239, "step": 7401 }, { "epoch": 0.6819919841525776, "grad_norm": 0.9703369630185199, "learning_rate": 1.2594679266426063e-06, "loss": 0.1301, "step": 7402 }, { "epoch": 0.6820841203298476, "grad_norm": 0.9137480063164818, "learning_rate": 1.2588066036396294e-06, "loss": 0.1171, "step": 7403 }, { "epoch": 0.6821762565071176, "grad_norm": 0.9058815845580901, "learning_rate": 1.2581453958815937e-06, "loss": 0.1195, "step": 7404 }, { "epoch": 0.6822683926843875, "grad_norm": 0.9045402132109076, "learning_rate": 1.2574843034298912e-06, "loss": 0.1265, "step": 7405 }, { "epoch": 0.6823605288616575, "grad_norm": 0.9698781357496576, "learning_rate": 1.2568233263459042e-06, "loss": 0.1312, "step": 7406 }, { "epoch": 0.6824526650389275, "grad_norm": 1.0278967298327988, "learning_rate": 1.2561624646910064e-06, "loss": 0.1346, "step": 7407 }, { "epoch": 0.6825448012161975, "grad_norm": 0.9620339931384371, "learning_rate": 1.2555017185265578e-06, "loss": 0.1376, "step": 7408 }, { "epoch": 0.6826369373934675, "grad_norm": 0.8746590584378339, "learning_rate": 1.2548410879139072e-06, "loss": 0.105, "step": 7409 }, { "epoch": 0.6827290735707375, "grad_norm": 0.9537799769999069, "learning_rate": 1.254180572914396e-06, "loss": 0.1309, "step": 7410 }, { "epoch": 0.6828212097480075, "grad_norm": 0.8835486213792817, "learning_rate": 1.2535201735893526e-06, "loss": 0.1168, "step": 7411 }, { "epoch": 0.6829133459252775, "grad_norm": 0.9677867977994593, "learning_rate": 1.252859890000096e-06, "loss": 0.1247, "step": 7412 }, { "epoch": 0.6830054821025475, "grad_norm": 0.9173729144330235, "learning_rate": 1.252199722207933e-06, "loss": 0.1182, "step": 7413 }, { "epoch": 0.6830976182798175, "grad_norm": 0.9489197558146778, "learning_rate": 1.2515396702741593e-06, "loss": 0.1275, "step": 7414 }, { "epoch": 0.6831897544570876, "grad_norm": 1.0231784534441966, "learning_rate": 1.2508797342600613e-06, "loss": 0.137, "step": 7415 }, { "epoch": 0.6832818906343576, "grad_norm": 0.9137554998444966, "learning_rate": 1.2502199142269154e-06, "loss": 0.1262, "step": 7416 }, { "epoch": 0.6833740268116276, "grad_norm": 0.959616464056135, "learning_rate": 1.2495602102359837e-06, "loss": 0.1159, "step": 7417 }, { "epoch": 0.6834661629888976, "grad_norm": 0.9443226724457511, "learning_rate": 1.2489006223485225e-06, "loss": 0.1318, "step": 7418 }, { "epoch": 0.6835582991661676, "grad_norm": 0.9055951065867156, "learning_rate": 1.2482411506257722e-06, "loss": 0.1217, "step": 7419 }, { "epoch": 0.6836504353434376, "grad_norm": 0.8964410503789555, "learning_rate": 1.2475817951289665e-06, "loss": 0.1219, "step": 7420 }, { "epoch": 0.6837425715207076, "grad_norm": 0.9692218760916794, "learning_rate": 1.2469225559193251e-06, "loss": 0.1351, "step": 7421 }, { "epoch": 0.6838347076979776, "grad_norm": 0.913552989477623, "learning_rate": 1.2462634330580593e-06, "loss": 0.1179, "step": 7422 }, { "epoch": 0.6839268438752476, "grad_norm": 0.9013278373038858, "learning_rate": 1.2456044266063694e-06, "loss": 0.1231, "step": 7423 }, { "epoch": 0.6840189800525176, "grad_norm": 0.9833694142508832, "learning_rate": 1.2449455366254434e-06, "loss": 0.1342, "step": 7424 }, { "epoch": 0.6841111162297876, "grad_norm": 0.8884726425002472, "learning_rate": 1.2442867631764588e-06, "loss": 0.1214, "step": 7425 }, { "epoch": 0.6842032524070576, "grad_norm": 0.9212373080375857, "learning_rate": 1.2436281063205833e-06, "loss": 0.1264, "step": 7426 }, { "epoch": 0.6842953885843276, "grad_norm": 0.961439810407205, "learning_rate": 1.2429695661189731e-06, "loss": 0.1297, "step": 7427 }, { "epoch": 0.6843875247615976, "grad_norm": 0.9165466783704851, "learning_rate": 1.242311142632775e-06, "loss": 0.133, "step": 7428 }, { "epoch": 0.6844796609388677, "grad_norm": 0.9031234366618096, "learning_rate": 1.2416528359231228e-06, "loss": 0.1201, "step": 7429 }, { "epoch": 0.6845717971161377, "grad_norm": 0.923906786213281, "learning_rate": 1.240994646051139e-06, "loss": 0.1336, "step": 7430 }, { "epoch": 0.6846639332934077, "grad_norm": 0.959066739495178, "learning_rate": 1.2403365730779383e-06, "loss": 0.1339, "step": 7431 }, { "epoch": 0.6847560694706777, "grad_norm": 0.9688337673057995, "learning_rate": 1.2396786170646218e-06, "loss": 0.128, "step": 7432 }, { "epoch": 0.6848482056479477, "grad_norm": 0.9374828794417941, "learning_rate": 1.2390207780722827e-06, "loss": 0.1191, "step": 7433 }, { "epoch": 0.6849403418252177, "grad_norm": 0.9645857123533416, "learning_rate": 1.238363056162e-06, "loss": 0.1268, "step": 7434 }, { "epoch": 0.6850324780024877, "grad_norm": 0.917977034506336, "learning_rate": 1.2377054513948423e-06, "loss": 0.1317, "step": 7435 }, { "epoch": 0.6851246141797577, "grad_norm": 0.903871262985241, "learning_rate": 1.2370479638318692e-06, "loss": 0.1263, "step": 7436 }, { "epoch": 0.6852167503570277, "grad_norm": 0.9181670238680173, "learning_rate": 1.2363905935341295e-06, "loss": 0.1237, "step": 7437 }, { "epoch": 0.6853088865342977, "grad_norm": 0.9205604948512467, "learning_rate": 1.235733340562658e-06, "loss": 0.1302, "step": 7438 }, { "epoch": 0.6854010227115677, "grad_norm": 0.8849849610348361, "learning_rate": 1.2350762049784835e-06, "loss": 0.1192, "step": 7439 }, { "epoch": 0.6854931588888377, "grad_norm": 0.9159893977573488, "learning_rate": 1.2344191868426181e-06, "loss": 0.1278, "step": 7440 }, { "epoch": 0.6855852950661077, "grad_norm": 0.9363760794106957, "learning_rate": 1.2337622862160687e-06, "loss": 0.1238, "step": 7441 }, { "epoch": 0.6856774312433778, "grad_norm": 0.8938566236978432, "learning_rate": 1.233105503159826e-06, "loss": 0.1244, "step": 7442 }, { "epoch": 0.6857695674206478, "grad_norm": 0.8527983351894046, "learning_rate": 1.2324488377348736e-06, "loss": 0.1087, "step": 7443 }, { "epoch": 0.6858617035979178, "grad_norm": 0.907390478133244, "learning_rate": 1.2317922900021843e-06, "loss": 0.1223, "step": 7444 }, { "epoch": 0.6859538397751878, "grad_norm": 0.9151103834295761, "learning_rate": 1.2311358600227172e-06, "loss": 0.118, "step": 7445 }, { "epoch": 0.6860459759524578, "grad_norm": 0.9384525478458614, "learning_rate": 1.2304795478574211e-06, "loss": 0.1201, "step": 7446 }, { "epoch": 0.6861381121297278, "grad_norm": 0.8967253804927608, "learning_rate": 1.2298233535672357e-06, "loss": 0.1254, "step": 7447 }, { "epoch": 0.6862302483069977, "grad_norm": 0.9329650086140051, "learning_rate": 1.2291672772130885e-06, "loss": 0.1379, "step": 7448 }, { "epoch": 0.6863223844842677, "grad_norm": 0.9485151638142946, "learning_rate": 1.2285113188558975e-06, "loss": 0.1324, "step": 7449 }, { "epoch": 0.6864145206615377, "grad_norm": 0.9190267891724095, "learning_rate": 1.2278554785565671e-06, "loss": 0.1328, "step": 7450 }, { "epoch": 0.6865066568388077, "grad_norm": 0.929404692376141, "learning_rate": 1.2271997563759918e-06, "loss": 0.1326, "step": 7451 }, { "epoch": 0.6865987930160777, "grad_norm": 0.9353304920500429, "learning_rate": 1.226544152375056e-06, "loss": 0.1309, "step": 7452 }, { "epoch": 0.6866909291933477, "grad_norm": 0.9166655846212474, "learning_rate": 1.2258886666146335e-06, "loss": 0.1204, "step": 7453 }, { "epoch": 0.6867830653706177, "grad_norm": 0.8790079523079889, "learning_rate": 1.2252332991555846e-06, "loss": 0.1168, "step": 7454 }, { "epoch": 0.6868752015478877, "grad_norm": 0.9637556673833563, "learning_rate": 1.224578050058762e-06, "loss": 0.1382, "step": 7455 }, { "epoch": 0.6869673377251578, "grad_norm": 0.8295194190943669, "learning_rate": 1.2239229193850039e-06, "loss": 0.1025, "step": 7456 }, { "epoch": 0.6870594739024278, "grad_norm": 0.9419495896513839, "learning_rate": 1.2232679071951398e-06, "loss": 0.129, "step": 7457 }, { "epoch": 0.6871516100796978, "grad_norm": 0.90361666710837, "learning_rate": 1.2226130135499891e-06, "loss": 0.1215, "step": 7458 }, { "epoch": 0.6872437462569678, "grad_norm": 0.8778510526809243, "learning_rate": 1.2219582385103564e-06, "loss": 0.1186, "step": 7459 }, { "epoch": 0.6873358824342378, "grad_norm": 0.9003346616886775, "learning_rate": 1.2213035821370401e-06, "loss": 0.1193, "step": 7460 }, { "epoch": 0.6874280186115078, "grad_norm": 0.9341321475259016, "learning_rate": 1.2206490444908226e-06, "loss": 0.1184, "step": 7461 }, { "epoch": 0.6875201547887778, "grad_norm": 1.029120204312684, "learning_rate": 1.21999462563248e-06, "loss": 0.139, "step": 7462 }, { "epoch": 0.6876122909660478, "grad_norm": 0.991534769651168, "learning_rate": 1.2193403256227731e-06, "loss": 0.1412, "step": 7463 }, { "epoch": 0.6877044271433178, "grad_norm": 0.9770012066753403, "learning_rate": 1.2186861445224548e-06, "loss": 0.1279, "step": 7464 }, { "epoch": 0.6877965633205878, "grad_norm": 0.9755692964671077, "learning_rate": 1.2180320823922662e-06, "loss": 0.1307, "step": 7465 }, { "epoch": 0.6878886994978578, "grad_norm": 0.9479539739269719, "learning_rate": 1.2173781392929383e-06, "loss": 0.1119, "step": 7466 }, { "epoch": 0.6879808356751278, "grad_norm": 0.9075991770684234, "learning_rate": 1.2167243152851862e-06, "loss": 0.1236, "step": 7467 }, { "epoch": 0.6880729718523978, "grad_norm": 0.9412169301247907, "learning_rate": 1.21607061042972e-06, "loss": 0.1231, "step": 7468 }, { "epoch": 0.6881651080296678, "grad_norm": 0.9310202024859918, "learning_rate": 1.2154170247872354e-06, "loss": 0.1295, "step": 7469 }, { "epoch": 0.6882572442069379, "grad_norm": 0.8808125116214799, "learning_rate": 1.2147635584184194e-06, "loss": 0.1219, "step": 7470 }, { "epoch": 0.6883493803842079, "grad_norm": 0.9574965439429187, "learning_rate": 1.2141102113839442e-06, "loss": 0.126, "step": 7471 }, { "epoch": 0.6884415165614779, "grad_norm": 0.9585891755121796, "learning_rate": 1.2134569837444755e-06, "loss": 0.1192, "step": 7472 }, { "epoch": 0.6885336527387479, "grad_norm": 0.9390435430840817, "learning_rate": 1.2128038755606632e-06, "loss": 0.1312, "step": 7473 }, { "epoch": 0.6886257889160179, "grad_norm": 0.9404765976585037, "learning_rate": 1.2121508868931507e-06, "loss": 0.1272, "step": 7474 }, { "epoch": 0.6887179250932879, "grad_norm": 0.9462706939442835, "learning_rate": 1.2114980178025657e-06, "loss": 0.1238, "step": 7475 }, { "epoch": 0.6888100612705579, "grad_norm": 0.9467478685012891, "learning_rate": 1.2108452683495286e-06, "loss": 0.1275, "step": 7476 }, { "epoch": 0.6889021974478279, "grad_norm": 0.9634104501865915, "learning_rate": 1.210192638594648e-06, "loss": 0.135, "step": 7477 }, { "epoch": 0.6889943336250979, "grad_norm": 0.9728865732091142, "learning_rate": 1.2095401285985197e-06, "loss": 0.1333, "step": 7478 }, { "epoch": 0.6890864698023679, "grad_norm": 0.933124824968402, "learning_rate": 1.2088877384217286e-06, "loss": 0.1293, "step": 7479 }, { "epoch": 0.6891786059796379, "grad_norm": 0.9059485628010226, "learning_rate": 1.2082354681248495e-06, "loss": 0.1212, "step": 7480 }, { "epoch": 0.6892707421569079, "grad_norm": 0.8854842763292889, "learning_rate": 1.2075833177684465e-06, "loss": 0.1144, "step": 7481 }, { "epoch": 0.6893628783341779, "grad_norm": 0.922192406975205, "learning_rate": 1.2069312874130725e-06, "loss": 0.1121, "step": 7482 }, { "epoch": 0.689455014511448, "grad_norm": 0.9320451445217117, "learning_rate": 1.2062793771192676e-06, "loss": 0.1246, "step": 7483 }, { "epoch": 0.689547150688718, "grad_norm": 0.8946586316246357, "learning_rate": 1.2056275869475606e-06, "loss": 0.1246, "step": 7484 }, { "epoch": 0.689639286865988, "grad_norm": 0.8953871059867827, "learning_rate": 1.2049759169584718e-06, "loss": 0.12, "step": 7485 }, { "epoch": 0.689731423043258, "grad_norm": 0.9397138554660357, "learning_rate": 1.2043243672125083e-06, "loss": 0.1231, "step": 7486 }, { "epoch": 0.689823559220528, "grad_norm": 0.870590564525269, "learning_rate": 1.2036729377701679e-06, "loss": 0.1123, "step": 7487 }, { "epoch": 0.689915695397798, "grad_norm": 0.9123543737092503, "learning_rate": 1.2030216286919343e-06, "loss": 0.1241, "step": 7488 }, { "epoch": 0.690007831575068, "grad_norm": 0.9315347547200894, "learning_rate": 1.2023704400382813e-06, "loss": 0.1266, "step": 7489 }, { "epoch": 0.690099967752338, "grad_norm": 0.9419355207806347, "learning_rate": 1.201719371869673e-06, "loss": 0.1259, "step": 7490 }, { "epoch": 0.690192103929608, "grad_norm": 1.0714501807054846, "learning_rate": 1.2010684242465612e-06, "loss": 0.1364, "step": 7491 }, { "epoch": 0.6902842401068779, "grad_norm": 0.9120663194555401, "learning_rate": 1.2004175972293852e-06, "loss": 0.1118, "step": 7492 }, { "epoch": 0.6903763762841479, "grad_norm": 0.9301664178778738, "learning_rate": 1.1997668908785761e-06, "loss": 0.1289, "step": 7493 }, { "epoch": 0.6904685124614179, "grad_norm": 0.9135039431916775, "learning_rate": 1.1991163052545502e-06, "loss": 0.1254, "step": 7494 }, { "epoch": 0.6905606486386879, "grad_norm": 0.9372952523507923, "learning_rate": 1.1984658404177162e-06, "loss": 0.1101, "step": 7495 }, { "epoch": 0.6906527848159579, "grad_norm": 0.9081435909843156, "learning_rate": 1.1978154964284683e-06, "loss": 0.1182, "step": 7496 }, { "epoch": 0.690744920993228, "grad_norm": 0.9207021186709088, "learning_rate": 1.1971652733471915e-06, "loss": 0.1251, "step": 7497 }, { "epoch": 0.690837057170498, "grad_norm": 0.9124595681631252, "learning_rate": 1.19651517123426e-06, "loss": 0.126, "step": 7498 }, { "epoch": 0.690929193347768, "grad_norm": 0.9578194252866348, "learning_rate": 1.1958651901500356e-06, "loss": 0.1233, "step": 7499 }, { "epoch": 0.691021329525038, "grad_norm": 0.8642829438776286, "learning_rate": 1.1952153301548674e-06, "loss": 0.117, "step": 7500 }, { "epoch": 0.691021329525038, "eval_loss": 0.12531189620494843, "eval_runtime": 299.4056, "eval_samples_per_second": 23.436, "eval_steps_per_second": 2.932, "step": 7500 }, { "epoch": 0.691113465702308, "grad_norm": 0.9056786919297217, "learning_rate": 1.1945655913090965e-06, "loss": 0.1273, "step": 7501 }, { "epoch": 0.691205601879578, "grad_norm": 0.9366350244304404, "learning_rate": 1.1939159736730508e-06, "loss": 0.1304, "step": 7502 }, { "epoch": 0.691297738056848, "grad_norm": 0.9873293906173173, "learning_rate": 1.1932664773070483e-06, "loss": 0.1305, "step": 7503 }, { "epoch": 0.691389874234118, "grad_norm": 0.879909629424238, "learning_rate": 1.192617102271394e-06, "loss": 0.1246, "step": 7504 }, { "epoch": 0.691482010411388, "grad_norm": 0.879155311662118, "learning_rate": 1.1919678486263817e-06, "loss": 0.1142, "step": 7505 }, { "epoch": 0.691574146588658, "grad_norm": 0.9510831799893671, "learning_rate": 1.1913187164322954e-06, "loss": 0.1268, "step": 7506 }, { "epoch": 0.691666282765928, "grad_norm": 0.8829687120011289, "learning_rate": 1.190669705749408e-06, "loss": 0.1266, "step": 7507 }, { "epoch": 0.691758418943198, "grad_norm": 0.8564115074734555, "learning_rate": 1.1900208166379784e-06, "loss": 0.1147, "step": 7508 }, { "epoch": 0.691850555120468, "grad_norm": 0.9176006723966627, "learning_rate": 1.1893720491582579e-06, "loss": 0.1236, "step": 7509 }, { "epoch": 0.6919426912977381, "grad_norm": 0.9154865319735849, "learning_rate": 1.1887234033704827e-06, "loss": 0.1153, "step": 7510 }, { "epoch": 0.6920348274750081, "grad_norm": 0.8846755111687101, "learning_rate": 1.1880748793348818e-06, "loss": 0.1264, "step": 7511 }, { "epoch": 0.6921269636522781, "grad_norm": 0.9063995376950635, "learning_rate": 1.1874264771116684e-06, "loss": 0.1233, "step": 7512 }, { "epoch": 0.6922190998295481, "grad_norm": 0.9119144408492244, "learning_rate": 1.1867781967610478e-06, "loss": 0.1286, "step": 7513 }, { "epoch": 0.6923112360068181, "grad_norm": 0.9227075633333177, "learning_rate": 1.186130038343214e-06, "loss": 0.1237, "step": 7514 }, { "epoch": 0.6924033721840881, "grad_norm": 0.9447847319047454, "learning_rate": 1.1854820019183467e-06, "loss": 0.1213, "step": 7515 }, { "epoch": 0.6924955083613581, "grad_norm": 0.9557400253206907, "learning_rate": 1.1848340875466176e-06, "loss": 0.1241, "step": 7516 }, { "epoch": 0.6925876445386281, "grad_norm": 0.9213338639911549, "learning_rate": 1.1841862952881845e-06, "loss": 0.1216, "step": 7517 }, { "epoch": 0.6926797807158981, "grad_norm": 0.929761471437544, "learning_rate": 1.183538625203195e-06, "loss": 0.1161, "step": 7518 }, { "epoch": 0.6927719168931681, "grad_norm": 0.956777668991843, "learning_rate": 1.182891077351787e-06, "loss": 0.1286, "step": 7519 }, { "epoch": 0.6928640530704381, "grad_norm": 0.9351053697619062, "learning_rate": 1.1822436517940844e-06, "loss": 0.1376, "step": 7520 }, { "epoch": 0.6929561892477081, "grad_norm": 0.9307576923675359, "learning_rate": 1.1815963485901994e-06, "loss": 0.1233, "step": 7521 }, { "epoch": 0.6930483254249781, "grad_norm": 0.9209797831879457, "learning_rate": 1.1809491678002356e-06, "loss": 0.1235, "step": 7522 }, { "epoch": 0.6931404616022481, "grad_norm": 0.8927925400652459, "learning_rate": 1.1803021094842831e-06, "loss": 0.1257, "step": 7523 }, { "epoch": 0.6932325977795182, "grad_norm": 0.8842257147429041, "learning_rate": 1.179655173702423e-06, "loss": 0.1131, "step": 7524 }, { "epoch": 0.6933247339567882, "grad_norm": 0.960345088928631, "learning_rate": 1.1790083605147221e-06, "loss": 0.1219, "step": 7525 }, { "epoch": 0.6934168701340582, "grad_norm": 0.9421499013450262, "learning_rate": 1.1783616699812362e-06, "loss": 0.1343, "step": 7526 }, { "epoch": 0.6935090063113282, "grad_norm": 0.9502278428418999, "learning_rate": 1.1777151021620113e-06, "loss": 0.1312, "step": 7527 }, { "epoch": 0.6936011424885982, "grad_norm": 1.001310947592207, "learning_rate": 1.1770686571170824e-06, "loss": 0.1333, "step": 7528 }, { "epoch": 0.6936932786658682, "grad_norm": 0.9471759067515962, "learning_rate": 1.17642233490647e-06, "loss": 0.1271, "step": 7529 }, { "epoch": 0.6937854148431382, "grad_norm": 0.8974159275769846, "learning_rate": 1.1757761355901875e-06, "loss": 0.1104, "step": 7530 }, { "epoch": 0.6938775510204082, "grad_norm": 0.9724327109166822, "learning_rate": 1.1751300592282325e-06, "loss": 0.1385, "step": 7531 }, { "epoch": 0.6939696871976782, "grad_norm": 0.9465696548880492, "learning_rate": 1.1744841058805947e-06, "loss": 0.1143, "step": 7532 }, { "epoch": 0.6940618233749482, "grad_norm": 0.979571877068151, "learning_rate": 1.1738382756072495e-06, "loss": 0.1469, "step": 7533 }, { "epoch": 0.6941539595522181, "grad_norm": 0.9206628125733626, "learning_rate": 1.1731925684681631e-06, "loss": 0.1203, "step": 7534 }, { "epoch": 0.6942460957294881, "grad_norm": 0.9375005379901026, "learning_rate": 1.1725469845232906e-06, "loss": 0.1331, "step": 7535 }, { "epoch": 0.6943382319067581, "grad_norm": 0.9411215714914509, "learning_rate": 1.1719015238325731e-06, "loss": 0.1205, "step": 7536 }, { "epoch": 0.6944303680840282, "grad_norm": 0.8819105340485545, "learning_rate": 1.1712561864559415e-06, "loss": 0.1173, "step": 7537 }, { "epoch": 0.6945225042612982, "grad_norm": 0.9772820854322495, "learning_rate": 1.1706109724533158e-06, "loss": 0.1277, "step": 7538 }, { "epoch": 0.6946146404385682, "grad_norm": 0.8949011843616254, "learning_rate": 1.1699658818846044e-06, "loss": 0.1225, "step": 7539 }, { "epoch": 0.6947067766158382, "grad_norm": 0.8911294765958074, "learning_rate": 1.1693209148097049e-06, "loss": 0.1134, "step": 7540 }, { "epoch": 0.6947989127931082, "grad_norm": 0.9990537451251741, "learning_rate": 1.1686760712885018e-06, "loss": 0.1421, "step": 7541 }, { "epoch": 0.6948910489703782, "grad_norm": 0.929425147367063, "learning_rate": 1.1680313513808677e-06, "loss": 0.1273, "step": 7542 }, { "epoch": 0.6949831851476482, "grad_norm": 0.9202734532577793, "learning_rate": 1.1673867551466658e-06, "loss": 0.1231, "step": 7543 }, { "epoch": 0.6950753213249182, "grad_norm": 0.8998933513621242, "learning_rate": 1.1667422826457475e-06, "loss": 0.1212, "step": 7544 }, { "epoch": 0.6951674575021882, "grad_norm": 0.9106168799928811, "learning_rate": 1.1660979339379524e-06, "loss": 0.13, "step": 7545 }, { "epoch": 0.6952595936794582, "grad_norm": 0.9458638903120887, "learning_rate": 1.1654537090831069e-06, "loss": 0.1245, "step": 7546 }, { "epoch": 0.6953517298567282, "grad_norm": 0.9451228064476396, "learning_rate": 1.164809608141029e-06, "loss": 0.1291, "step": 7547 }, { "epoch": 0.6954438660339982, "grad_norm": 0.9613647716535906, "learning_rate": 1.1641656311715218e-06, "loss": 0.1357, "step": 7548 }, { "epoch": 0.6955360022112682, "grad_norm": 0.890685272110586, "learning_rate": 1.1635217782343801e-06, "loss": 0.1119, "step": 7549 }, { "epoch": 0.6956281383885382, "grad_norm": 0.9401787383359983, "learning_rate": 1.1628780493893849e-06, "loss": 0.1146, "step": 7550 }, { "epoch": 0.6957202745658083, "grad_norm": 0.9264539393150472, "learning_rate": 1.162234444696306e-06, "loss": 0.1108, "step": 7551 }, { "epoch": 0.6958124107430783, "grad_norm": 0.966080326479011, "learning_rate": 1.1615909642149042e-06, "loss": 0.1218, "step": 7552 }, { "epoch": 0.6959045469203483, "grad_norm": 0.9639128496683719, "learning_rate": 1.1609476080049254e-06, "loss": 0.1365, "step": 7553 }, { "epoch": 0.6959966830976183, "grad_norm": 0.9204230071876955, "learning_rate": 1.1603043761261043e-06, "loss": 0.1189, "step": 7554 }, { "epoch": 0.6960888192748883, "grad_norm": 0.9068194418459751, "learning_rate": 1.159661268638166e-06, "loss": 0.119, "step": 7555 }, { "epoch": 0.6961809554521583, "grad_norm": 0.9406348953180761, "learning_rate": 1.1590182856008233e-06, "loss": 0.1251, "step": 7556 }, { "epoch": 0.6962730916294283, "grad_norm": 1.0019374287174063, "learning_rate": 1.158375427073778e-06, "loss": 0.1549, "step": 7557 }, { "epoch": 0.6963652278066983, "grad_norm": 0.9614692218032151, "learning_rate": 1.1577326931167184e-06, "loss": 0.1194, "step": 7558 }, { "epoch": 0.6964573639839683, "grad_norm": 0.9627814151373388, "learning_rate": 1.1570900837893223e-06, "loss": 0.1276, "step": 7559 }, { "epoch": 0.6965495001612383, "grad_norm": 0.9401742982964019, "learning_rate": 1.1564475991512562e-06, "loss": 0.1195, "step": 7560 }, { "epoch": 0.6966416363385083, "grad_norm": 0.9606928144175957, "learning_rate": 1.1558052392621758e-06, "loss": 0.1386, "step": 7561 }, { "epoch": 0.6967337725157783, "grad_norm": 0.8242620094607082, "learning_rate": 1.155163004181723e-06, "loss": 0.1104, "step": 7562 }, { "epoch": 0.6968259086930483, "grad_norm": 0.9661538812059097, "learning_rate": 1.1545208939695306e-06, "loss": 0.1233, "step": 7563 }, { "epoch": 0.6969180448703183, "grad_norm": 0.9185722917327637, "learning_rate": 1.1538789086852173e-06, "loss": 0.1172, "step": 7564 }, { "epoch": 0.6970101810475884, "grad_norm": 0.9559839661690173, "learning_rate": 1.1532370483883931e-06, "loss": 0.1308, "step": 7565 }, { "epoch": 0.6971023172248584, "grad_norm": 0.9926826971490361, "learning_rate": 1.152595313138653e-06, "loss": 0.1378, "step": 7566 }, { "epoch": 0.6971944534021284, "grad_norm": 0.8957768567257417, "learning_rate": 1.151953702995583e-06, "loss": 0.1221, "step": 7567 }, { "epoch": 0.6972865895793984, "grad_norm": 0.919822250860159, "learning_rate": 1.1513122180187577e-06, "loss": 0.127, "step": 7568 }, { "epoch": 0.6973787257566684, "grad_norm": 0.9537334807702881, "learning_rate": 1.150670858267738e-06, "loss": 0.1254, "step": 7569 }, { "epoch": 0.6974708619339384, "grad_norm": 0.9311165498976608, "learning_rate": 1.150029623802074e-06, "loss": 0.1345, "step": 7570 }, { "epoch": 0.6975629981112084, "grad_norm": 0.8904827529578065, "learning_rate": 1.1493885146813042e-06, "loss": 0.12, "step": 7571 }, { "epoch": 0.6976551342884784, "grad_norm": 0.9123497944185116, "learning_rate": 1.148747530964956e-06, "loss": 0.1288, "step": 7572 }, { "epoch": 0.6977472704657484, "grad_norm": 0.9531719885240827, "learning_rate": 1.1481066727125463e-06, "loss": 0.1291, "step": 7573 }, { "epoch": 0.6978394066430184, "grad_norm": 0.9819613269245462, "learning_rate": 1.1474659399835772e-06, "loss": 0.1309, "step": 7574 }, { "epoch": 0.6979315428202884, "grad_norm": 0.9657481867646279, "learning_rate": 1.1468253328375404e-06, "loss": 0.1156, "step": 7575 }, { "epoch": 0.6980236789975584, "grad_norm": 0.9068680219647643, "learning_rate": 1.1461848513339168e-06, "loss": 0.1199, "step": 7576 }, { "epoch": 0.6981158151748283, "grad_norm": 0.9583994048161718, "learning_rate": 1.145544495532176e-06, "loss": 0.1276, "step": 7577 }, { "epoch": 0.6982079513520985, "grad_norm": 0.9183051486498401, "learning_rate": 1.144904265491775e-06, "loss": 0.1166, "step": 7578 }, { "epoch": 0.6983000875293685, "grad_norm": 0.9558218358159374, "learning_rate": 1.1442641612721588e-06, "loss": 0.1264, "step": 7579 }, { "epoch": 0.6983922237066384, "grad_norm": 0.9610830469191616, "learning_rate": 1.1436241829327605e-06, "loss": 0.1265, "step": 7580 }, { "epoch": 0.6984843598839084, "grad_norm": 0.925704991359198, "learning_rate": 1.1429843305330027e-06, "loss": 0.1258, "step": 7581 }, { "epoch": 0.6985764960611784, "grad_norm": 0.9619080391790512, "learning_rate": 1.1423446041322967e-06, "loss": 0.1166, "step": 7582 }, { "epoch": 0.6986686322384484, "grad_norm": 0.9200858544359843, "learning_rate": 1.1417050037900393e-06, "loss": 0.1187, "step": 7583 }, { "epoch": 0.6987607684157184, "grad_norm": 0.9165477155141011, "learning_rate": 1.1410655295656196e-06, "loss": 0.1289, "step": 7584 }, { "epoch": 0.6988529045929884, "grad_norm": 0.9558792693314704, "learning_rate": 1.1404261815184105e-06, "loss": 0.1297, "step": 7585 }, { "epoch": 0.6989450407702584, "grad_norm": 0.9447699533790705, "learning_rate": 1.1397869597077783e-06, "loss": 0.1296, "step": 7586 }, { "epoch": 0.6990371769475284, "grad_norm": 0.8962723565665709, "learning_rate": 1.1391478641930716e-06, "loss": 0.1199, "step": 7587 }, { "epoch": 0.6991293131247984, "grad_norm": 0.9362449149914991, "learning_rate": 1.1385088950336329e-06, "loss": 0.1236, "step": 7588 }, { "epoch": 0.6992214493020684, "grad_norm": 0.9195942463818534, "learning_rate": 1.1378700522887903e-06, "loss": 0.1101, "step": 7589 }, { "epoch": 0.6993135854793384, "grad_norm": 0.9756437558705977, "learning_rate": 1.13723133601786e-06, "loss": 0.1366, "step": 7590 }, { "epoch": 0.6994057216566084, "grad_norm": 0.8944513848537959, "learning_rate": 1.136592746280146e-06, "loss": 0.1117, "step": 7591 }, { "epoch": 0.6994978578338785, "grad_norm": 0.9330843733670557, "learning_rate": 1.1359542831349422e-06, "loss": 0.1278, "step": 7592 }, { "epoch": 0.6995899940111485, "grad_norm": 0.8788149107087988, "learning_rate": 1.1353159466415298e-06, "loss": 0.1139, "step": 7593 }, { "epoch": 0.6996821301884185, "grad_norm": 0.9187660811222983, "learning_rate": 1.1346777368591797e-06, "loss": 0.1227, "step": 7594 }, { "epoch": 0.6997742663656885, "grad_norm": 0.9293501590683788, "learning_rate": 1.1340396538471488e-06, "loss": 0.127, "step": 7595 }, { "epoch": 0.6998664025429585, "grad_norm": 0.9122089651037086, "learning_rate": 1.133401697664682e-06, "loss": 0.1247, "step": 7596 }, { "epoch": 0.6999585387202285, "grad_norm": 0.9514888912155925, "learning_rate": 1.1327638683710146e-06, "loss": 0.1195, "step": 7597 }, { "epoch": 0.7000506748974985, "grad_norm": 0.9093223008533924, "learning_rate": 1.13212616602537e-06, "loss": 0.1128, "step": 7598 }, { "epoch": 0.7001428110747685, "grad_norm": 0.8769014325228602, "learning_rate": 1.1314885906869575e-06, "loss": 0.1092, "step": 7599 }, { "epoch": 0.7002349472520385, "grad_norm": 0.8726147241906409, "learning_rate": 1.1308511424149774e-06, "loss": 0.1093, "step": 7600 }, { "epoch": 0.7003270834293085, "grad_norm": 0.8949518559323022, "learning_rate": 1.1302138212686152e-06, "loss": 0.1192, "step": 7601 }, { "epoch": 0.7004192196065785, "grad_norm": 0.9241564642082613, "learning_rate": 1.1295766273070469e-06, "loss": 0.1218, "step": 7602 }, { "epoch": 0.7005113557838485, "grad_norm": 0.9788950224549492, "learning_rate": 1.1289395605894374e-06, "loss": 0.1283, "step": 7603 }, { "epoch": 0.7006034919611185, "grad_norm": 1.0243458990881766, "learning_rate": 1.1283026211749362e-06, "loss": 0.1368, "step": 7604 }, { "epoch": 0.7006956281383886, "grad_norm": 0.9374500966330062, "learning_rate": 1.127665809122685e-06, "loss": 0.1191, "step": 7605 }, { "epoch": 0.7007877643156586, "grad_norm": 0.9205880293432904, "learning_rate": 1.1270291244918106e-06, "loss": 0.1316, "step": 7606 }, { "epoch": 0.7008799004929286, "grad_norm": 0.9307359895923712, "learning_rate": 1.1263925673414303e-06, "loss": 0.1207, "step": 7607 }, { "epoch": 0.7009720366701986, "grad_norm": 0.9457635843783687, "learning_rate": 1.1257561377306471e-06, "loss": 0.1286, "step": 7608 }, { "epoch": 0.7010641728474686, "grad_norm": 0.9370338619779593, "learning_rate": 1.1251198357185547e-06, "loss": 0.1295, "step": 7609 }, { "epoch": 0.7011563090247386, "grad_norm": 0.9616310297957011, "learning_rate": 1.1244836613642342e-06, "loss": 0.1313, "step": 7610 }, { "epoch": 0.7012484452020086, "grad_norm": 0.8917892618492652, "learning_rate": 1.1238476147267537e-06, "loss": 0.1225, "step": 7611 }, { "epoch": 0.7013405813792786, "grad_norm": 0.9314012246164496, "learning_rate": 1.1232116958651695e-06, "loss": 0.1284, "step": 7612 }, { "epoch": 0.7014327175565486, "grad_norm": 0.9447613390064742, "learning_rate": 1.1225759048385276e-06, "loss": 0.124, "step": 7613 }, { "epoch": 0.7015248537338186, "grad_norm": 0.9139852735566912, "learning_rate": 1.1219402417058611e-06, "loss": 0.1252, "step": 7614 }, { "epoch": 0.7016169899110886, "grad_norm": 0.8944899755971756, "learning_rate": 1.1213047065261922e-06, "loss": 0.1209, "step": 7615 }, { "epoch": 0.7017091260883586, "grad_norm": 0.9037644191735968, "learning_rate": 1.12066929935853e-06, "loss": 0.1197, "step": 7616 }, { "epoch": 0.7018012622656286, "grad_norm": 0.9280483040196563, "learning_rate": 1.1200340202618706e-06, "loss": 0.1194, "step": 7617 }, { "epoch": 0.7018933984428986, "grad_norm": 0.9171715200445577, "learning_rate": 1.1193988692952012e-06, "loss": 0.1111, "step": 7618 }, { "epoch": 0.7019855346201687, "grad_norm": 0.9146803179316838, "learning_rate": 1.118763846517496e-06, "loss": 0.1183, "step": 7619 }, { "epoch": 0.7020776707974387, "grad_norm": 0.923647975551502, "learning_rate": 1.1181289519877156e-06, "loss": 0.1249, "step": 7620 }, { "epoch": 0.7021698069747087, "grad_norm": 0.9747801502638038, "learning_rate": 1.1174941857648105e-06, "loss": 0.126, "step": 7621 }, { "epoch": 0.7022619431519787, "grad_norm": 0.9391905801381869, "learning_rate": 1.11685954790772e-06, "loss": 0.1226, "step": 7622 }, { "epoch": 0.7023540793292486, "grad_norm": 0.9048641404195729, "learning_rate": 1.1162250384753697e-06, "loss": 0.1193, "step": 7623 }, { "epoch": 0.7024462155065186, "grad_norm": 0.960076295126353, "learning_rate": 1.1155906575266722e-06, "loss": 0.129, "step": 7624 }, { "epoch": 0.7025383516837886, "grad_norm": 0.9925166288127805, "learning_rate": 1.1149564051205314e-06, "loss": 0.1347, "step": 7625 }, { "epoch": 0.7026304878610586, "grad_norm": 0.9426529785588942, "learning_rate": 1.114322281315837e-06, "loss": 0.1283, "step": 7626 }, { "epoch": 0.7027226240383286, "grad_norm": 0.9194194860925674, "learning_rate": 1.1136882861714692e-06, "loss": 0.1162, "step": 7627 }, { "epoch": 0.7028147602155986, "grad_norm": 0.937081948310993, "learning_rate": 1.1130544197462933e-06, "loss": 0.1226, "step": 7628 }, { "epoch": 0.7029068963928686, "grad_norm": 0.9750960465487545, "learning_rate": 1.1124206820991628e-06, "loss": 0.1211, "step": 7629 }, { "epoch": 0.7029990325701386, "grad_norm": 0.9309418428024832, "learning_rate": 1.1117870732889214e-06, "loss": 0.1249, "step": 7630 }, { "epoch": 0.7030911687474086, "grad_norm": 0.9511119877832231, "learning_rate": 1.111153593374399e-06, "loss": 0.129, "step": 7631 }, { "epoch": 0.7031833049246786, "grad_norm": 0.9528627594483212, "learning_rate": 1.1105202424144165e-06, "loss": 0.1297, "step": 7632 }, { "epoch": 0.7032754411019487, "grad_norm": 0.9814987830163073, "learning_rate": 1.109887020467779e-06, "loss": 0.1245, "step": 7633 }, { "epoch": 0.7033675772792187, "grad_norm": 0.9242159264865467, "learning_rate": 1.10925392759328e-06, "loss": 0.1288, "step": 7634 }, { "epoch": 0.7034597134564887, "grad_norm": 0.9120110671753703, "learning_rate": 1.1086209638497038e-06, "loss": 0.1262, "step": 7635 }, { "epoch": 0.7035518496337587, "grad_norm": 0.9969698964166246, "learning_rate": 1.1079881292958217e-06, "loss": 0.1417, "step": 7636 }, { "epoch": 0.7036439858110287, "grad_norm": 0.9703225929392982, "learning_rate": 1.1073554239903905e-06, "loss": 0.1427, "step": 7637 }, { "epoch": 0.7037361219882987, "grad_norm": 0.880006758354232, "learning_rate": 1.106722847992159e-06, "loss": 0.1191, "step": 7638 }, { "epoch": 0.7038282581655687, "grad_norm": 0.9694607975674124, "learning_rate": 1.1060904013598604e-06, "loss": 0.1183, "step": 7639 }, { "epoch": 0.7039203943428387, "grad_norm": 0.9711437397169369, "learning_rate": 1.1054580841522188e-06, "loss": 0.1263, "step": 7640 }, { "epoch": 0.7040125305201087, "grad_norm": 0.8726884936753815, "learning_rate": 1.1048258964279432e-06, "loss": 0.1206, "step": 7641 }, { "epoch": 0.7041046666973787, "grad_norm": 0.9384163356051172, "learning_rate": 1.1041938382457332e-06, "loss": 0.1274, "step": 7642 }, { "epoch": 0.7041968028746487, "grad_norm": 0.8797955140060947, "learning_rate": 1.1035619096642766e-06, "loss": 0.125, "step": 7643 }, { "epoch": 0.7042889390519187, "grad_norm": 0.9592333120315895, "learning_rate": 1.102930110742247e-06, "loss": 0.1305, "step": 7644 }, { "epoch": 0.7043810752291887, "grad_norm": 0.9883696119753728, "learning_rate": 1.102298441538306e-06, "loss": 0.1262, "step": 7645 }, { "epoch": 0.7044732114064588, "grad_norm": 0.9316484289506399, "learning_rate": 1.101666902111105e-06, "loss": 0.1188, "step": 7646 }, { "epoch": 0.7045653475837288, "grad_norm": 0.9159885584557372, "learning_rate": 1.1010354925192826e-06, "loss": 0.1153, "step": 7647 }, { "epoch": 0.7046574837609988, "grad_norm": 0.971625755678242, "learning_rate": 1.1004042128214664e-06, "loss": 0.1325, "step": 7648 }, { "epoch": 0.7047496199382688, "grad_norm": 0.9208399755488353, "learning_rate": 1.0997730630762697e-06, "loss": 0.1183, "step": 7649 }, { "epoch": 0.7048417561155388, "grad_norm": 0.9003396206133496, "learning_rate": 1.0991420433422936e-06, "loss": 0.1078, "step": 7650 }, { "epoch": 0.7049338922928088, "grad_norm": 0.9159612364008332, "learning_rate": 1.0985111536781298e-06, "loss": 0.1178, "step": 7651 }, { "epoch": 0.7050260284700788, "grad_norm": 0.9660724779459582, "learning_rate": 1.0978803941423572e-06, "loss": 0.129, "step": 7652 }, { "epoch": 0.7051181646473488, "grad_norm": 0.9196254695327626, "learning_rate": 1.0972497647935396e-06, "loss": 0.122, "step": 7653 }, { "epoch": 0.7052103008246188, "grad_norm": 0.9473601084641282, "learning_rate": 1.0966192656902335e-06, "loss": 0.1081, "step": 7654 }, { "epoch": 0.7053024370018888, "grad_norm": 1.0074305480369767, "learning_rate": 1.0959888968909784e-06, "loss": 0.1314, "step": 7655 }, { "epoch": 0.7053945731791588, "grad_norm": 0.9923751289787686, "learning_rate": 1.0953586584543066e-06, "loss": 0.1221, "step": 7656 }, { "epoch": 0.7054867093564288, "grad_norm": 0.8843998573581785, "learning_rate": 1.0947285504387337e-06, "loss": 0.1217, "step": 7657 }, { "epoch": 0.7055788455336988, "grad_norm": 0.8849026382738153, "learning_rate": 1.094098572902766e-06, "loss": 0.114, "step": 7658 }, { "epoch": 0.7056709817109688, "grad_norm": 0.9500578248983311, "learning_rate": 1.0934687259048975e-06, "loss": 0.1178, "step": 7659 }, { "epoch": 0.7057631178882389, "grad_norm": 0.9258739494949468, "learning_rate": 1.092839009503609e-06, "loss": 0.1229, "step": 7660 }, { "epoch": 0.7058552540655089, "grad_norm": 0.9621025980170279, "learning_rate": 1.0922094237573706e-06, "loss": 0.1261, "step": 7661 }, { "epoch": 0.7059473902427789, "grad_norm": 0.9641510931839957, "learning_rate": 1.0915799687246376e-06, "loss": 0.1221, "step": 7662 }, { "epoch": 0.7060395264200489, "grad_norm": 0.9571666499830939, "learning_rate": 1.0909506444638563e-06, "loss": 0.1247, "step": 7663 }, { "epoch": 0.7061316625973189, "grad_norm": 0.9417941506505101, "learning_rate": 1.09032145103346e-06, "loss": 0.1145, "step": 7664 }, { "epoch": 0.7062237987745889, "grad_norm": 0.8922995138858749, "learning_rate": 1.0896923884918687e-06, "loss": 0.1172, "step": 7665 }, { "epoch": 0.7063159349518588, "grad_norm": 0.9455943222219495, "learning_rate": 1.0890634568974901e-06, "loss": 0.1307, "step": 7666 }, { "epoch": 0.7064080711291288, "grad_norm": 0.8963449786751144, "learning_rate": 1.0884346563087214e-06, "loss": 0.1148, "step": 7667 }, { "epoch": 0.7065002073063988, "grad_norm": 0.9238178351632784, "learning_rate": 1.0878059867839469e-06, "loss": 0.1219, "step": 7668 }, { "epoch": 0.7065923434836688, "grad_norm": 0.9717273503839575, "learning_rate": 1.0871774483815393e-06, "loss": 0.124, "step": 7669 }, { "epoch": 0.7066844796609388, "grad_norm": 0.9020615180773476, "learning_rate": 1.0865490411598576e-06, "loss": 0.1226, "step": 7670 }, { "epoch": 0.7067766158382088, "grad_norm": 0.9052631909123191, "learning_rate": 1.0859207651772485e-06, "loss": 0.1221, "step": 7671 }, { "epoch": 0.7068687520154788, "grad_norm": 0.9363740585233611, "learning_rate": 1.0852926204920488e-06, "loss": 0.1254, "step": 7672 }, { "epoch": 0.7069608881927489, "grad_norm": 0.942073473131626, "learning_rate": 1.084664607162582e-06, "loss": 0.1197, "step": 7673 }, { "epoch": 0.7070530243700189, "grad_norm": 0.9533912073107669, "learning_rate": 1.0840367252471583e-06, "loss": 0.123, "step": 7674 }, { "epoch": 0.7071451605472889, "grad_norm": 1.0371830317376796, "learning_rate": 1.083408974804078e-06, "loss": 0.1408, "step": 7675 }, { "epoch": 0.7072372967245589, "grad_norm": 0.9234521408562314, "learning_rate": 1.082781355891626e-06, "loss": 0.1181, "step": 7676 }, { "epoch": 0.7073294329018289, "grad_norm": 0.904399098255252, "learning_rate": 1.0821538685680783e-06, "loss": 0.1183, "step": 7677 }, { "epoch": 0.7074215690790989, "grad_norm": 0.9279812511551865, "learning_rate": 1.0815265128916955e-06, "loss": 0.127, "step": 7678 }, { "epoch": 0.7075137052563689, "grad_norm": 0.8684164999023688, "learning_rate": 1.0808992889207287e-06, "loss": 0.1134, "step": 7679 }, { "epoch": 0.7076058414336389, "grad_norm": 0.9084082832703506, "learning_rate": 1.0802721967134167e-06, "loss": 0.1234, "step": 7680 }, { "epoch": 0.7076979776109089, "grad_norm": 0.9542334556040545, "learning_rate": 1.0796452363279838e-06, "loss": 0.125, "step": 7681 }, { "epoch": 0.7077901137881789, "grad_norm": 0.9552003098411618, "learning_rate": 1.079018407822643e-06, "loss": 0.1178, "step": 7682 }, { "epoch": 0.7078822499654489, "grad_norm": 0.9786307431398847, "learning_rate": 1.0783917112555956e-06, "loss": 0.1213, "step": 7683 }, { "epoch": 0.7079743861427189, "grad_norm": 0.9443212053638332, "learning_rate": 1.0777651466850308e-06, "loss": 0.123, "step": 7684 }, { "epoch": 0.7080665223199889, "grad_norm": 0.906375176394638, "learning_rate": 1.0771387141691265e-06, "loss": 0.1239, "step": 7685 }, { "epoch": 0.7081586584972589, "grad_norm": 0.9974511076859406, "learning_rate": 1.0765124137660454e-06, "loss": 0.1199, "step": 7686 }, { "epoch": 0.708250794674529, "grad_norm": 0.9252319719364072, "learning_rate": 1.075886245533939e-06, "loss": 0.1322, "step": 7687 }, { "epoch": 0.708342930851799, "grad_norm": 0.9953283487894312, "learning_rate": 1.075260209530948e-06, "loss": 0.1346, "step": 7688 }, { "epoch": 0.708435067029069, "grad_norm": 0.9675526374551723, "learning_rate": 1.0746343058151998e-06, "loss": 0.1359, "step": 7689 }, { "epoch": 0.708527203206339, "grad_norm": 0.9600303445058818, "learning_rate": 1.074008534444811e-06, "loss": 0.1341, "step": 7690 }, { "epoch": 0.708619339383609, "grad_norm": 0.8930072632299815, "learning_rate": 1.0733828954778827e-06, "loss": 0.1127, "step": 7691 }, { "epoch": 0.708711475560879, "grad_norm": 0.9679509417395482, "learning_rate": 1.0727573889725053e-06, "loss": 0.125, "step": 7692 }, { "epoch": 0.708803611738149, "grad_norm": 0.8541878797863817, "learning_rate": 1.0721320149867582e-06, "loss": 0.1176, "step": 7693 }, { "epoch": 0.708895747915419, "grad_norm": 0.9332963996090269, "learning_rate": 1.0715067735787079e-06, "loss": 0.1304, "step": 7694 }, { "epoch": 0.708987884092689, "grad_norm": 0.9552468087190065, "learning_rate": 1.0708816648064067e-06, "loss": 0.1362, "step": 7695 }, { "epoch": 0.709080020269959, "grad_norm": 0.9699586270892624, "learning_rate": 1.0702566887278975e-06, "loss": 0.1363, "step": 7696 }, { "epoch": 0.709172156447229, "grad_norm": 0.9025744016802671, "learning_rate": 1.0696318454012074e-06, "loss": 0.1194, "step": 7697 }, { "epoch": 0.709264292624499, "grad_norm": 0.9032683279099893, "learning_rate": 1.0690071348843559e-06, "loss": 0.1263, "step": 7698 }, { "epoch": 0.709356428801769, "grad_norm": 0.9876043723097531, "learning_rate": 1.0683825572353447e-06, "loss": 0.1268, "step": 7699 }, { "epoch": 0.709448564979039, "grad_norm": 0.9741498985940272, "learning_rate": 1.0677581125121672e-06, "loss": 0.1292, "step": 7700 }, { "epoch": 0.7095407011563091, "grad_norm": 0.9413448494069492, "learning_rate": 1.067133800772803e-06, "loss": 0.1328, "step": 7701 }, { "epoch": 0.7096328373335791, "grad_norm": 0.9214270401858093, "learning_rate": 1.0665096220752214e-06, "loss": 0.1256, "step": 7702 }, { "epoch": 0.7097249735108491, "grad_norm": 0.9671108647251513, "learning_rate": 1.065885576477374e-06, "loss": 0.125, "step": 7703 }, { "epoch": 0.7098171096881191, "grad_norm": 0.8983338985867325, "learning_rate": 1.0652616640372051e-06, "loss": 0.1227, "step": 7704 }, { "epoch": 0.7099092458653891, "grad_norm": 0.9167380926561612, "learning_rate": 1.064637884812645e-06, "loss": 0.1225, "step": 7705 }, { "epoch": 0.7100013820426591, "grad_norm": 0.9416855433977324, "learning_rate": 1.0640142388616128e-06, "loss": 0.1216, "step": 7706 }, { "epoch": 0.710093518219929, "grad_norm": 0.9558540627602284, "learning_rate": 1.063390726242012e-06, "loss": 0.1171, "step": 7707 }, { "epoch": 0.710185654397199, "grad_norm": 0.9049250133502065, "learning_rate": 1.062767347011738e-06, "loss": 0.1179, "step": 7708 }, { "epoch": 0.710277790574469, "grad_norm": 0.9117542944386543, "learning_rate": 1.0621441012286696e-06, "loss": 0.1249, "step": 7709 }, { "epoch": 0.710369926751739, "grad_norm": 0.9522183662298654, "learning_rate": 1.061520988950677e-06, "loss": 0.1297, "step": 7710 }, { "epoch": 0.710462062929009, "grad_norm": 0.9050768681888326, "learning_rate": 1.0608980102356146e-06, "loss": 0.1155, "step": 7711 }, { "epoch": 0.710554199106279, "grad_norm": 0.9802646477753757, "learning_rate": 1.0602751651413264e-06, "loss": 0.1335, "step": 7712 }, { "epoch": 0.710646335283549, "grad_norm": 0.9303028811692908, "learning_rate": 1.0596524537256453e-06, "loss": 0.1095, "step": 7713 }, { "epoch": 0.7107384714608191, "grad_norm": 0.9190098625800486, "learning_rate": 1.0590298760463879e-06, "loss": 0.1199, "step": 7714 }, { "epoch": 0.7108306076380891, "grad_norm": 0.94415621243913, "learning_rate": 1.0584074321613625e-06, "loss": 0.1242, "step": 7715 }, { "epoch": 0.7109227438153591, "grad_norm": 0.9319898998438171, "learning_rate": 1.0577851221283614e-06, "loss": 0.1165, "step": 7716 }, { "epoch": 0.7110148799926291, "grad_norm": 0.958963020372258, "learning_rate": 1.0571629460051665e-06, "loss": 0.1329, "step": 7717 }, { "epoch": 0.7111070161698991, "grad_norm": 0.992606267592599, "learning_rate": 1.0565409038495486e-06, "loss": 0.1371, "step": 7718 }, { "epoch": 0.7111991523471691, "grad_norm": 0.9600979422255448, "learning_rate": 1.055918995719263e-06, "loss": 0.1222, "step": 7719 }, { "epoch": 0.7112912885244391, "grad_norm": 0.922753779521535, "learning_rate": 1.0552972216720534e-06, "loss": 0.121, "step": 7720 }, { "epoch": 0.7113834247017091, "grad_norm": 0.9470964491509773, "learning_rate": 1.054675581765652e-06, "loss": 0.1318, "step": 7721 }, { "epoch": 0.7114755608789791, "grad_norm": 0.9243871817173555, "learning_rate": 1.0540540760577785e-06, "loss": 0.1285, "step": 7722 }, { "epoch": 0.7115676970562491, "grad_norm": 0.9789267036795136, "learning_rate": 1.0534327046061404e-06, "loss": 0.1306, "step": 7723 }, { "epoch": 0.7116598332335191, "grad_norm": 0.9684236064786368, "learning_rate": 1.0528114674684318e-06, "loss": 0.1198, "step": 7724 }, { "epoch": 0.7117519694107891, "grad_norm": 0.9082055129941168, "learning_rate": 1.0521903647023327e-06, "loss": 0.126, "step": 7725 }, { "epoch": 0.7118441055880591, "grad_norm": 0.8603222142268356, "learning_rate": 1.0515693963655144e-06, "loss": 0.1087, "step": 7726 }, { "epoch": 0.7119362417653291, "grad_norm": 0.907707522654929, "learning_rate": 1.0509485625156342e-06, "loss": 0.1198, "step": 7727 }, { "epoch": 0.7120283779425992, "grad_norm": 0.9058331237453139, "learning_rate": 1.0503278632103353e-06, "loss": 0.1204, "step": 7728 }, { "epoch": 0.7121205141198692, "grad_norm": 0.9647120715897719, "learning_rate": 1.0497072985072509e-06, "loss": 0.138, "step": 7729 }, { "epoch": 0.7122126502971392, "grad_norm": 0.9429718054470461, "learning_rate": 1.0490868684639994e-06, "loss": 0.1377, "step": 7730 }, { "epoch": 0.7123047864744092, "grad_norm": 1.019549767972903, "learning_rate": 1.0484665731381892e-06, "loss": 0.1275, "step": 7731 }, { "epoch": 0.7123969226516792, "grad_norm": 0.8914456137526924, "learning_rate": 1.0478464125874126e-06, "loss": 0.1211, "step": 7732 }, { "epoch": 0.7124890588289492, "grad_norm": 0.8732468987409082, "learning_rate": 1.047226386869253e-06, "loss": 0.1144, "step": 7733 }, { "epoch": 0.7125811950062192, "grad_norm": 0.9361439907322878, "learning_rate": 1.046606496041281e-06, "loss": 0.1236, "step": 7734 }, { "epoch": 0.7126733311834892, "grad_norm": 0.8808363617293428, "learning_rate": 1.0459867401610519e-06, "loss": 0.1174, "step": 7735 }, { "epoch": 0.7127654673607592, "grad_norm": 0.9359884244207574, "learning_rate": 1.0453671192861095e-06, "loss": 0.1251, "step": 7736 }, { "epoch": 0.7128576035380292, "grad_norm": 0.9779404856607452, "learning_rate": 1.0447476334739867e-06, "loss": 0.1321, "step": 7737 }, { "epoch": 0.7129497397152992, "grad_norm": 0.9287380972756585, "learning_rate": 1.0441282827822027e-06, "loss": 0.1257, "step": 7738 }, { "epoch": 0.7130418758925692, "grad_norm": 0.9621747448313562, "learning_rate": 1.0435090672682655e-06, "loss": 0.1311, "step": 7739 }, { "epoch": 0.7131340120698392, "grad_norm": 0.9258715491762729, "learning_rate": 1.042889986989668e-06, "loss": 0.1309, "step": 7740 }, { "epoch": 0.7132261482471093, "grad_norm": 0.9039074847686032, "learning_rate": 1.0422710420038912e-06, "loss": 0.1259, "step": 7741 }, { "epoch": 0.7133182844243793, "grad_norm": 0.8822465315547272, "learning_rate": 1.0416522323684048e-06, "loss": 0.1116, "step": 7742 }, { "epoch": 0.7134104206016493, "grad_norm": 0.952450362988351, "learning_rate": 1.0410335581406657e-06, "loss": 0.1275, "step": 7743 }, { "epoch": 0.7135025567789193, "grad_norm": 0.9446338466948969, "learning_rate": 1.0404150193781187e-06, "loss": 0.1285, "step": 7744 }, { "epoch": 0.7135946929561893, "grad_norm": 0.886851570227021, "learning_rate": 1.0397966161381943e-06, "loss": 0.1163, "step": 7745 }, { "epoch": 0.7136868291334593, "grad_norm": 0.9493597158807928, "learning_rate": 1.03917834847831e-06, "loss": 0.1261, "step": 7746 }, { "epoch": 0.7137789653107293, "grad_norm": 0.872603891886761, "learning_rate": 1.0385602164558735e-06, "loss": 0.109, "step": 7747 }, { "epoch": 0.7138711014879993, "grad_norm": 0.9153516353588165, "learning_rate": 1.037942220128279e-06, "loss": 0.1214, "step": 7748 }, { "epoch": 0.7139632376652693, "grad_norm": 0.918045785934361, "learning_rate": 1.0373243595529058e-06, "loss": 0.1218, "step": 7749 }, { "epoch": 0.7140553738425393, "grad_norm": 0.9248276605451051, "learning_rate": 1.0367066347871243e-06, "loss": 0.118, "step": 7750 }, { "epoch": 0.7141475100198093, "grad_norm": 0.9388758721031797, "learning_rate": 1.0360890458882882e-06, "loss": 0.1277, "step": 7751 }, { "epoch": 0.7142396461970792, "grad_norm": 0.8690529484848437, "learning_rate": 1.0354715929137429e-06, "loss": 0.1118, "step": 7752 }, { "epoch": 0.7143317823743492, "grad_norm": 0.8640578234777775, "learning_rate": 1.0348542759208166e-06, "loss": 0.1122, "step": 7753 }, { "epoch": 0.7144239185516192, "grad_norm": 0.9843435962019589, "learning_rate": 1.0342370949668287e-06, "loss": 0.1282, "step": 7754 }, { "epoch": 0.7145160547288893, "grad_norm": 0.8969391758762647, "learning_rate": 1.0336200501090848e-06, "loss": 0.118, "step": 7755 }, { "epoch": 0.7146081909061593, "grad_norm": 0.9532247647644668, "learning_rate": 1.0330031414048775e-06, "loss": 0.1266, "step": 7756 }, { "epoch": 0.7147003270834293, "grad_norm": 0.9088908017504251, "learning_rate": 1.0323863689114851e-06, "loss": 0.1212, "step": 7757 }, { "epoch": 0.7147924632606993, "grad_norm": 0.906425226852019, "learning_rate": 1.0317697326861766e-06, "loss": 0.1239, "step": 7758 }, { "epoch": 0.7148845994379693, "grad_norm": 0.9227457737134785, "learning_rate": 1.0311532327862064e-06, "loss": 0.1219, "step": 7759 }, { "epoch": 0.7149767356152393, "grad_norm": 0.9987736939867745, "learning_rate": 1.0305368692688175e-06, "loss": 0.1372, "step": 7760 }, { "epoch": 0.7150688717925093, "grad_norm": 0.9714466510612396, "learning_rate": 1.0299206421912382e-06, "loss": 0.1201, "step": 7761 }, { "epoch": 0.7151610079697793, "grad_norm": 0.8732980566662378, "learning_rate": 1.0293045516106848e-06, "loss": 0.1085, "step": 7762 }, { "epoch": 0.7152531441470493, "grad_norm": 0.9074801485331541, "learning_rate": 1.0286885975843621e-06, "loss": 0.1145, "step": 7763 }, { "epoch": 0.7153452803243193, "grad_norm": 0.9109621722743166, "learning_rate": 1.0280727801694624e-06, "loss": 0.1216, "step": 7764 }, { "epoch": 0.7154374165015893, "grad_norm": 0.922807759731232, "learning_rate": 1.0274570994231622e-06, "loss": 0.1254, "step": 7765 }, { "epoch": 0.7155295526788593, "grad_norm": 0.9349066676402771, "learning_rate": 1.02684155540263e-06, "loss": 0.1273, "step": 7766 }, { "epoch": 0.7156216888561293, "grad_norm": 0.8751564876537657, "learning_rate": 1.026226148165017e-06, "loss": 0.1203, "step": 7767 }, { "epoch": 0.7157138250333993, "grad_norm": 0.917719645814074, "learning_rate": 1.0256108777674656e-06, "loss": 0.1198, "step": 7768 }, { "epoch": 0.7158059612106694, "grad_norm": 0.8858931449744197, "learning_rate": 1.024995744267102e-06, "loss": 0.1157, "step": 7769 }, { "epoch": 0.7158980973879394, "grad_norm": 0.8912225251790697, "learning_rate": 1.0243807477210423e-06, "loss": 0.13, "step": 7770 }, { "epoch": 0.7159902335652094, "grad_norm": 0.8764687308266974, "learning_rate": 1.0237658881863898e-06, "loss": 0.1114, "step": 7771 }, { "epoch": 0.7160823697424794, "grad_norm": 0.9088978751333338, "learning_rate": 1.0231511657202327e-06, "loss": 0.1203, "step": 7772 }, { "epoch": 0.7161745059197494, "grad_norm": 0.9229428912327712, "learning_rate": 1.0225365803796498e-06, "loss": 0.1236, "step": 7773 }, { "epoch": 0.7162666420970194, "grad_norm": 0.8660219223718931, "learning_rate": 1.0219221322217032e-06, "loss": 0.1101, "step": 7774 }, { "epoch": 0.7163587782742894, "grad_norm": 0.9744313899778617, "learning_rate": 1.0213078213034457e-06, "loss": 0.1358, "step": 7775 }, { "epoch": 0.7164509144515594, "grad_norm": 0.9000387318873999, "learning_rate": 1.0206936476819165e-06, "loss": 0.1147, "step": 7776 }, { "epoch": 0.7165430506288294, "grad_norm": 0.9151921247508992, "learning_rate": 1.0200796114141428e-06, "loss": 0.1173, "step": 7777 }, { "epoch": 0.7166351868060994, "grad_norm": 0.9873702603847861, "learning_rate": 1.0194657125571347e-06, "loss": 0.1267, "step": 7778 }, { "epoch": 0.7167273229833694, "grad_norm": 0.8744889522529582, "learning_rate": 1.0188519511678946e-06, "loss": 0.1134, "step": 7779 }, { "epoch": 0.7168194591606394, "grad_norm": 0.9000947869027155, "learning_rate": 1.0182383273034102e-06, "loss": 0.1138, "step": 7780 }, { "epoch": 0.7169115953379094, "grad_norm": 0.9534692999739216, "learning_rate": 1.0176248410206577e-06, "loss": 0.131, "step": 7781 }, { "epoch": 0.7170037315151795, "grad_norm": 0.9343304549611972, "learning_rate": 1.017011492376597e-06, "loss": 0.1226, "step": 7782 }, { "epoch": 0.7170958676924495, "grad_norm": 0.9079633458736804, "learning_rate": 1.0163982814281797e-06, "loss": 0.1081, "step": 7783 }, { "epoch": 0.7171880038697195, "grad_norm": 0.945362100863493, "learning_rate": 1.0157852082323411e-06, "loss": 0.117, "step": 7784 }, { "epoch": 0.7172801400469895, "grad_norm": 0.9305854058984073, "learning_rate": 1.0151722728460064e-06, "loss": 0.1232, "step": 7785 }, { "epoch": 0.7173722762242595, "grad_norm": 0.9193933839229599, "learning_rate": 1.0145594753260849e-06, "loss": 0.1162, "step": 7786 }, { "epoch": 0.7174644124015295, "grad_norm": 0.9311548944071399, "learning_rate": 1.0139468157294762e-06, "loss": 0.1218, "step": 7787 }, { "epoch": 0.7175565485787995, "grad_norm": 0.9143005687332517, "learning_rate": 1.0133342941130664e-06, "loss": 0.1112, "step": 7788 }, { "epoch": 0.7176486847560695, "grad_norm": 0.9207452007285798, "learning_rate": 1.0127219105337274e-06, "loss": 0.1259, "step": 7789 }, { "epoch": 0.7177408209333395, "grad_norm": 0.9215304391889503, "learning_rate": 1.0121096650483182e-06, "loss": 0.1169, "step": 7790 }, { "epoch": 0.7178329571106095, "grad_norm": 0.8909392126110938, "learning_rate": 1.0114975577136866e-06, "loss": 0.1194, "step": 7791 }, { "epoch": 0.7179250932878795, "grad_norm": 0.9693442968946673, "learning_rate": 1.010885588586667e-06, "loss": 0.1277, "step": 7792 }, { "epoch": 0.7180172294651495, "grad_norm": 0.9316443174949289, "learning_rate": 1.0102737577240818e-06, "loss": 0.1202, "step": 7793 }, { "epoch": 0.7181093656424195, "grad_norm": 0.9187961844110014, "learning_rate": 1.0096620651827382e-06, "loss": 0.1214, "step": 7794 }, { "epoch": 0.7182015018196894, "grad_norm": 0.8998126418946526, "learning_rate": 1.0090505110194315e-06, "loss": 0.1128, "step": 7795 }, { "epoch": 0.7182936379969596, "grad_norm": 0.9585336588680275, "learning_rate": 1.0084390952909456e-06, "loss": 0.1269, "step": 7796 }, { "epoch": 0.7183857741742296, "grad_norm": 0.9211680803712629, "learning_rate": 1.0078278180540507e-06, "loss": 0.1154, "step": 7797 }, { "epoch": 0.7184779103514995, "grad_norm": 0.8997122910133363, "learning_rate": 1.0072166793655027e-06, "loss": 0.1106, "step": 7798 }, { "epoch": 0.7185700465287695, "grad_norm": 0.9414360029244899, "learning_rate": 1.0066056792820478e-06, "loss": 0.1301, "step": 7799 }, { "epoch": 0.7186621827060395, "grad_norm": 0.8439284973205966, "learning_rate": 1.0059948178604154e-06, "loss": 0.1053, "step": 7800 }, { "epoch": 0.7187543188833095, "grad_norm": 0.8981596749183159, "learning_rate": 1.0053840951573247e-06, "loss": 0.1349, "step": 7801 }, { "epoch": 0.7188464550605795, "grad_norm": 0.8912489563990994, "learning_rate": 1.0047735112294827e-06, "loss": 0.1157, "step": 7802 }, { "epoch": 0.7189385912378495, "grad_norm": 0.9025423474052598, "learning_rate": 1.00416306613358e-06, "loss": 0.1197, "step": 7803 }, { "epoch": 0.7190307274151195, "grad_norm": 0.9318053328867276, "learning_rate": 1.0035527599262988e-06, "loss": 0.1341, "step": 7804 }, { "epoch": 0.7191228635923895, "grad_norm": 0.9344718743643173, "learning_rate": 1.0029425926643035e-06, "loss": 0.1309, "step": 7805 }, { "epoch": 0.7192149997696595, "grad_norm": 0.9345501597826967, "learning_rate": 1.0023325644042508e-06, "loss": 0.1228, "step": 7806 }, { "epoch": 0.7193071359469295, "grad_norm": 0.8698344847523616, "learning_rate": 1.0017226752027798e-06, "loss": 0.1173, "step": 7807 }, { "epoch": 0.7193992721241995, "grad_norm": 0.8908033217302351, "learning_rate": 1.0011129251165198e-06, "loss": 0.1264, "step": 7808 }, { "epoch": 0.7194914083014696, "grad_norm": 0.8889868667942962, "learning_rate": 1.0005033142020868e-06, "loss": 0.1195, "step": 7809 }, { "epoch": 0.7195835444787396, "grad_norm": 0.8608771688254551, "learning_rate": 9.998938425160822e-07, "loss": 0.1127, "step": 7810 }, { "epoch": 0.7196756806560096, "grad_norm": 0.8824505031638588, "learning_rate": 9.992845101150949e-07, "loss": 0.1177, "step": 7811 }, { "epoch": 0.7197678168332796, "grad_norm": 0.9015088142832688, "learning_rate": 9.986753170557026e-07, "loss": 0.1272, "step": 7812 }, { "epoch": 0.7198599530105496, "grad_norm": 0.9486135831290708, "learning_rate": 9.980662633944687e-07, "loss": 0.1306, "step": 7813 }, { "epoch": 0.7199520891878196, "grad_norm": 0.9331244271896649, "learning_rate": 9.974573491879447e-07, "loss": 0.1187, "step": 7814 }, { "epoch": 0.7200442253650896, "grad_norm": 1.0721474757596579, "learning_rate": 9.968485744926673e-07, "loss": 0.1422, "step": 7815 }, { "epoch": 0.7201363615423596, "grad_norm": 0.9312764151210932, "learning_rate": 9.962399393651608e-07, "loss": 0.1283, "step": 7816 }, { "epoch": 0.7202284977196296, "grad_norm": 0.9758367075704081, "learning_rate": 9.95631443861938e-07, "loss": 0.1113, "step": 7817 }, { "epoch": 0.7203206338968996, "grad_norm": 0.9924718172406113, "learning_rate": 9.95023088039498e-07, "loss": 0.1239, "step": 7818 }, { "epoch": 0.7204127700741696, "grad_norm": 0.8883754010136571, "learning_rate": 9.94414871954326e-07, "loss": 0.1211, "step": 7819 }, { "epoch": 0.7205049062514396, "grad_norm": 0.9535928744680094, "learning_rate": 9.938067956628955e-07, "loss": 0.1275, "step": 7820 }, { "epoch": 0.7205970424287096, "grad_norm": 0.9243576442784894, "learning_rate": 9.931988592216654e-07, "loss": 0.1234, "step": 7821 }, { "epoch": 0.7206891786059796, "grad_norm": 0.8921816864655784, "learning_rate": 9.925910626870841e-07, "loss": 0.1247, "step": 7822 }, { "epoch": 0.7207813147832497, "grad_norm": 1.0087296503855505, "learning_rate": 9.919834061155841e-07, "loss": 0.1403, "step": 7823 }, { "epoch": 0.7208734509605197, "grad_norm": 0.9161566983649667, "learning_rate": 9.913758895635872e-07, "loss": 0.1207, "step": 7824 }, { "epoch": 0.7209655871377897, "grad_norm": 0.9466811448920842, "learning_rate": 9.907685130875022e-07, "loss": 0.1305, "step": 7825 }, { "epoch": 0.7210577233150597, "grad_norm": 0.9427068503028089, "learning_rate": 9.901612767437233e-07, "loss": 0.1322, "step": 7826 }, { "epoch": 0.7211498594923297, "grad_norm": 0.8997398397326478, "learning_rate": 9.89554180588631e-07, "loss": 0.126, "step": 7827 }, { "epoch": 0.7212419956695997, "grad_norm": 0.9158172595074155, "learning_rate": 9.889472246785962e-07, "loss": 0.1259, "step": 7828 }, { "epoch": 0.7213341318468697, "grad_norm": 0.9590521031356755, "learning_rate": 9.883404090699739e-07, "loss": 0.123, "step": 7829 }, { "epoch": 0.7214262680241397, "grad_norm": 0.9197380638874784, "learning_rate": 9.877337338191081e-07, "loss": 0.1255, "step": 7830 }, { "epoch": 0.7215184042014097, "grad_norm": 0.9495198865360079, "learning_rate": 9.871271989823279e-07, "loss": 0.1257, "step": 7831 }, { "epoch": 0.7216105403786797, "grad_norm": 0.9618388100017592, "learning_rate": 9.865208046159493e-07, "loss": 0.1222, "step": 7832 }, { "epoch": 0.7217026765559497, "grad_norm": 0.9187912885939795, "learning_rate": 9.85914550776277e-07, "loss": 0.1315, "step": 7833 }, { "epoch": 0.7217948127332197, "grad_norm": 0.9463336421029137, "learning_rate": 9.853084375196013e-07, "loss": 0.1376, "step": 7834 }, { "epoch": 0.7218869489104897, "grad_norm": 0.8998408177725145, "learning_rate": 9.847024649022014e-07, "loss": 0.1134, "step": 7835 }, { "epoch": 0.7219790850877598, "grad_norm": 0.9016223772783057, "learning_rate": 9.840966329803404e-07, "loss": 0.1229, "step": 7836 }, { "epoch": 0.7220712212650298, "grad_norm": 0.9254109877176989, "learning_rate": 9.834909418102694e-07, "loss": 0.1241, "step": 7837 }, { "epoch": 0.7221633574422998, "grad_norm": 0.9144500299198667, "learning_rate": 9.828853914482276e-07, "loss": 0.1148, "step": 7838 }, { "epoch": 0.7222554936195698, "grad_norm": 0.8920230252999829, "learning_rate": 9.822799819504413e-07, "loss": 0.1161, "step": 7839 }, { "epoch": 0.7223476297968398, "grad_norm": 0.865650754828013, "learning_rate": 9.816747133731213e-07, "loss": 0.1116, "step": 7840 }, { "epoch": 0.7224397659741097, "grad_norm": 0.9425761369193745, "learning_rate": 9.810695857724685e-07, "loss": 0.1177, "step": 7841 }, { "epoch": 0.7225319021513797, "grad_norm": 0.9392678290824935, "learning_rate": 9.80464599204667e-07, "loss": 0.1274, "step": 7842 }, { "epoch": 0.7226240383286497, "grad_norm": 0.8952324686337915, "learning_rate": 9.798597537258921e-07, "loss": 0.118, "step": 7843 }, { "epoch": 0.7227161745059197, "grad_norm": 0.9485924421467364, "learning_rate": 9.79255049392302e-07, "loss": 0.1253, "step": 7844 }, { "epoch": 0.7228083106831897, "grad_norm": 0.9258101069064801, "learning_rate": 9.78650486260044e-07, "loss": 0.1185, "step": 7845 }, { "epoch": 0.7229004468604597, "grad_norm": 0.9892771454693859, "learning_rate": 9.78046064385253e-07, "loss": 0.1191, "step": 7846 }, { "epoch": 0.7229925830377297, "grad_norm": 0.9281111956495237, "learning_rate": 9.774417838240485e-07, "loss": 0.1203, "step": 7847 }, { "epoch": 0.7230847192149997, "grad_norm": 0.9200675350911685, "learning_rate": 9.768376446325376e-07, "loss": 0.1163, "step": 7848 }, { "epoch": 0.7231768553922697, "grad_norm": 0.8588371158345436, "learning_rate": 9.762336468668151e-07, "loss": 0.1151, "step": 7849 }, { "epoch": 0.7232689915695398, "grad_norm": 0.848370777709939, "learning_rate": 9.756297905829627e-07, "loss": 0.111, "step": 7850 }, { "epoch": 0.7233611277468098, "grad_norm": 0.8984725801587228, "learning_rate": 9.75026075837049e-07, "loss": 0.1085, "step": 7851 }, { "epoch": 0.7234532639240798, "grad_norm": 1.0168488186269962, "learning_rate": 9.744225026851284e-07, "loss": 0.125, "step": 7852 }, { "epoch": 0.7235454001013498, "grad_norm": 0.8882011781278885, "learning_rate": 9.738190711832415e-07, "loss": 0.1143, "step": 7853 }, { "epoch": 0.7236375362786198, "grad_norm": 0.9282850267501597, "learning_rate": 9.732157813874185e-07, "loss": 0.1284, "step": 7854 }, { "epoch": 0.7237296724558898, "grad_norm": 0.9204717778431273, "learning_rate": 9.72612633353675e-07, "loss": 0.1117, "step": 7855 }, { "epoch": 0.7238218086331598, "grad_norm": 0.8853711615838081, "learning_rate": 9.720096271380122e-07, "loss": 0.1122, "step": 7856 }, { "epoch": 0.7239139448104298, "grad_norm": 0.9593573703467069, "learning_rate": 9.714067627964199e-07, "loss": 0.1265, "step": 7857 }, { "epoch": 0.7240060809876998, "grad_norm": 0.8933130363258317, "learning_rate": 9.708040403848752e-07, "loss": 0.1265, "step": 7858 }, { "epoch": 0.7240982171649698, "grad_norm": 0.9035103404027475, "learning_rate": 9.70201459959339e-07, "loss": 0.1134, "step": 7859 }, { "epoch": 0.7241903533422398, "grad_norm": 0.9124284467326477, "learning_rate": 9.695990215757625e-07, "loss": 0.1183, "step": 7860 }, { "epoch": 0.7242824895195098, "grad_norm": 0.9554134809640706, "learning_rate": 9.689967252900809e-07, "loss": 0.1205, "step": 7861 }, { "epoch": 0.7243746256967798, "grad_norm": 0.947696514674594, "learning_rate": 9.683945711582181e-07, "loss": 0.1195, "step": 7862 }, { "epoch": 0.7244667618740498, "grad_norm": 0.9489888944270076, "learning_rate": 9.677925592360851e-07, "loss": 0.1353, "step": 7863 }, { "epoch": 0.7245588980513199, "grad_norm": 0.9408998478748832, "learning_rate": 9.671906895795779e-07, "loss": 0.1206, "step": 7864 }, { "epoch": 0.7246510342285899, "grad_norm": 0.9700947586152011, "learning_rate": 9.665889622445792e-07, "loss": 0.1356, "step": 7865 }, { "epoch": 0.7247431704058599, "grad_norm": 0.961635334352596, "learning_rate": 9.659873772869601e-07, "loss": 0.121, "step": 7866 }, { "epoch": 0.7248353065831299, "grad_norm": 0.9745334075339324, "learning_rate": 9.653859347625786e-07, "loss": 0.1267, "step": 7867 }, { "epoch": 0.7249274427603999, "grad_norm": 0.8762381292464243, "learning_rate": 9.647846347272788e-07, "loss": 0.1128, "step": 7868 }, { "epoch": 0.7250195789376699, "grad_norm": 0.9462395663888918, "learning_rate": 9.64183477236891e-07, "loss": 0.1276, "step": 7869 }, { "epoch": 0.7251117151149399, "grad_norm": 0.9656221813969156, "learning_rate": 9.635824623472317e-07, "loss": 0.1354, "step": 7870 }, { "epoch": 0.7252038512922099, "grad_norm": 0.9520423537311674, "learning_rate": 9.629815901141062e-07, "loss": 0.1242, "step": 7871 }, { "epoch": 0.7252959874694799, "grad_norm": 0.9210581281415772, "learning_rate": 9.623808605933063e-07, "loss": 0.1182, "step": 7872 }, { "epoch": 0.7253881236467499, "grad_norm": 0.8821609441885799, "learning_rate": 9.617802738406082e-07, "loss": 0.1125, "step": 7873 }, { "epoch": 0.7254802598240199, "grad_norm": 0.8783025714845024, "learning_rate": 9.611798299117778e-07, "loss": 0.1193, "step": 7874 }, { "epoch": 0.7255723960012899, "grad_norm": 0.9102543279448607, "learning_rate": 9.605795288625652e-07, "loss": 0.118, "step": 7875 }, { "epoch": 0.7256645321785599, "grad_norm": 0.923045665646691, "learning_rate": 9.599793707487098e-07, "loss": 0.1302, "step": 7876 }, { "epoch": 0.72575666835583, "grad_norm": 0.9074231261058356, "learning_rate": 9.593793556259347e-07, "loss": 0.1216, "step": 7877 }, { "epoch": 0.7258488045331, "grad_norm": 0.9150292738479361, "learning_rate": 9.587794835499523e-07, "loss": 0.1231, "step": 7878 }, { "epoch": 0.72594094071037, "grad_norm": 0.8998385147836466, "learning_rate": 9.581797545764614e-07, "loss": 0.1155, "step": 7879 }, { "epoch": 0.72603307688764, "grad_norm": 0.9142946009765255, "learning_rate": 9.575801687611464e-07, "loss": 0.1185, "step": 7880 }, { "epoch": 0.72612521306491, "grad_norm": 1.0152010957770305, "learning_rate": 9.569807261596779e-07, "loss": 0.1298, "step": 7881 }, { "epoch": 0.72621734924218, "grad_norm": 0.9170396942900356, "learning_rate": 9.56381426827715e-07, "loss": 0.1248, "step": 7882 }, { "epoch": 0.72630948541945, "grad_norm": 0.9811977448714191, "learning_rate": 9.557822708209025e-07, "loss": 0.1295, "step": 7883 }, { "epoch": 0.72640162159672, "grad_norm": 0.9161193726605645, "learning_rate": 9.551832581948733e-07, "loss": 0.1181, "step": 7884 }, { "epoch": 0.72649375777399, "grad_norm": 0.9462600455397618, "learning_rate": 9.54584389005245e-07, "loss": 0.1184, "step": 7885 }, { "epoch": 0.7265858939512599, "grad_norm": 0.9143213594519252, "learning_rate": 9.539856633076217e-07, "loss": 0.1144, "step": 7886 }, { "epoch": 0.7266780301285299, "grad_norm": 0.8958649242270569, "learning_rate": 9.533870811575957e-07, "loss": 0.1206, "step": 7887 }, { "epoch": 0.7267701663057999, "grad_norm": 1.0084483158787285, "learning_rate": 9.527886426107458e-07, "loss": 0.132, "step": 7888 }, { "epoch": 0.7268623024830699, "grad_norm": 0.8899448954141135, "learning_rate": 9.52190347722638e-07, "loss": 0.1217, "step": 7889 }, { "epoch": 0.7269544386603399, "grad_norm": 0.9445700892184191, "learning_rate": 9.515921965488226e-07, "loss": 0.1214, "step": 7890 }, { "epoch": 0.72704657483761, "grad_norm": 0.9425465602318627, "learning_rate": 9.509941891448376e-07, "loss": 0.113, "step": 7891 }, { "epoch": 0.72713871101488, "grad_norm": 0.9188640731856463, "learning_rate": 9.503963255662091e-07, "loss": 0.1235, "step": 7892 }, { "epoch": 0.72723084719215, "grad_norm": 0.9183542312222722, "learning_rate": 9.497986058684491e-07, "loss": 0.124, "step": 7893 }, { "epoch": 0.72732298336942, "grad_norm": 0.9609361929729627, "learning_rate": 9.492010301070548e-07, "loss": 0.123, "step": 7894 }, { "epoch": 0.72741511954669, "grad_norm": 0.920395848689612, "learning_rate": 9.486035983375125e-07, "loss": 0.1203, "step": 7895 }, { "epoch": 0.72750725572396, "grad_norm": 0.8742457567724701, "learning_rate": 9.48006310615292e-07, "loss": 0.1061, "step": 7896 }, { "epoch": 0.72759939190123, "grad_norm": 0.9205268230772412, "learning_rate": 9.474091669958538e-07, "loss": 0.1166, "step": 7897 }, { "epoch": 0.7276915280785, "grad_norm": 0.9528723866899338, "learning_rate": 9.468121675346406e-07, "loss": 0.1117, "step": 7898 }, { "epoch": 0.72778366425577, "grad_norm": 0.8977194231147801, "learning_rate": 9.462153122870846e-07, "loss": 0.1227, "step": 7899 }, { "epoch": 0.72787580043304, "grad_norm": 0.9545650384877833, "learning_rate": 9.456186013086049e-07, "loss": 0.1275, "step": 7900 }, { "epoch": 0.72796793661031, "grad_norm": 0.9970229892888035, "learning_rate": 9.450220346546057e-07, "loss": 0.1252, "step": 7901 }, { "epoch": 0.72806007278758, "grad_norm": 0.9069698030197497, "learning_rate": 9.444256123804768e-07, "loss": 0.1342, "step": 7902 }, { "epoch": 0.72815220896485, "grad_norm": 0.917366478706477, "learning_rate": 9.438293345415972e-07, "loss": 0.1233, "step": 7903 }, { "epoch": 0.7282443451421201, "grad_norm": 0.9816361473567158, "learning_rate": 9.432332011933315e-07, "loss": 0.119, "step": 7904 }, { "epoch": 0.7283364813193901, "grad_norm": 0.912167553796657, "learning_rate": 9.426372123910313e-07, "loss": 0.1209, "step": 7905 }, { "epoch": 0.7284286174966601, "grad_norm": 0.9049561632030253, "learning_rate": 9.420413681900337e-07, "loss": 0.1126, "step": 7906 }, { "epoch": 0.7285207536739301, "grad_norm": 0.9459876967547667, "learning_rate": 9.414456686456619e-07, "loss": 0.1184, "step": 7907 }, { "epoch": 0.7286128898512001, "grad_norm": 0.9084561231139721, "learning_rate": 9.408501138132273e-07, "loss": 0.1154, "step": 7908 }, { "epoch": 0.7287050260284701, "grad_norm": 0.9798235306309518, "learning_rate": 9.402547037480284e-07, "loss": 0.1241, "step": 7909 }, { "epoch": 0.7287971622057401, "grad_norm": 0.9169403190088212, "learning_rate": 9.396594385053473e-07, "loss": 0.1188, "step": 7910 }, { "epoch": 0.7288892983830101, "grad_norm": 0.8842824376641447, "learning_rate": 9.39064318140456e-07, "loss": 0.1157, "step": 7911 }, { "epoch": 0.7289814345602801, "grad_norm": 0.8815960009445793, "learning_rate": 9.3846934270861e-07, "loss": 0.1129, "step": 7912 }, { "epoch": 0.7290735707375501, "grad_norm": 0.8735058660585312, "learning_rate": 9.378745122650545e-07, "loss": 0.0989, "step": 7913 }, { "epoch": 0.7291657069148201, "grad_norm": 0.9964407796113233, "learning_rate": 9.372798268650177e-07, "loss": 0.128, "step": 7914 }, { "epoch": 0.7292578430920901, "grad_norm": 0.9077958451134759, "learning_rate": 9.366852865637171e-07, "loss": 0.1119, "step": 7915 }, { "epoch": 0.7293499792693601, "grad_norm": 0.9191567318461464, "learning_rate": 9.360908914163569e-07, "loss": 0.1165, "step": 7916 }, { "epoch": 0.7294421154466301, "grad_norm": 0.9137556085419295, "learning_rate": 9.354966414781247e-07, "loss": 0.1151, "step": 7917 }, { "epoch": 0.7295342516239002, "grad_norm": 0.9377389661455451, "learning_rate": 9.349025368041989e-07, "loss": 0.1299, "step": 7918 }, { "epoch": 0.7296263878011702, "grad_norm": 0.9697573167162219, "learning_rate": 9.343085774497399e-07, "loss": 0.1303, "step": 7919 }, { "epoch": 0.7297185239784402, "grad_norm": 0.9276544404006211, "learning_rate": 9.337147634698979e-07, "loss": 0.1114, "step": 7920 }, { "epoch": 0.7298106601557102, "grad_norm": 0.9316178448679642, "learning_rate": 9.331210949198097e-07, "loss": 0.1186, "step": 7921 }, { "epoch": 0.7299027963329802, "grad_norm": 0.9297104126646819, "learning_rate": 9.325275718545962e-07, "loss": 0.1175, "step": 7922 }, { "epoch": 0.7299949325102502, "grad_norm": 0.9459703058813975, "learning_rate": 9.319341943293659e-07, "loss": 0.1143, "step": 7923 }, { "epoch": 0.7300870686875202, "grad_norm": 0.903636093967031, "learning_rate": 9.31340962399214e-07, "loss": 0.1131, "step": 7924 }, { "epoch": 0.7301792048647902, "grad_norm": 0.9130854765962986, "learning_rate": 9.307478761192229e-07, "loss": 0.1215, "step": 7925 }, { "epoch": 0.7302713410420602, "grad_norm": 0.9250366455134187, "learning_rate": 9.301549355444611e-07, "loss": 0.1148, "step": 7926 }, { "epoch": 0.7303634772193301, "grad_norm": 0.9753728160621229, "learning_rate": 9.295621407299824e-07, "loss": 0.1234, "step": 7927 }, { "epoch": 0.7304556133966001, "grad_norm": 0.901388541959924, "learning_rate": 9.289694917308273e-07, "loss": 0.1241, "step": 7928 }, { "epoch": 0.7305477495738701, "grad_norm": 0.9071666280203143, "learning_rate": 9.283769886020238e-07, "loss": 0.1211, "step": 7929 }, { "epoch": 0.7306398857511401, "grad_norm": 0.9197162146244954, "learning_rate": 9.277846313985869e-07, "loss": 0.1112, "step": 7930 }, { "epoch": 0.7307320219284101, "grad_norm": 0.9720399464274628, "learning_rate": 9.271924201755153e-07, "loss": 0.1291, "step": 7931 }, { "epoch": 0.7308241581056802, "grad_norm": 0.9462594696606331, "learning_rate": 9.26600354987797e-07, "loss": 0.1146, "step": 7932 }, { "epoch": 0.7309162942829502, "grad_norm": 0.9549579771749659, "learning_rate": 9.260084358904056e-07, "loss": 0.1253, "step": 7933 }, { "epoch": 0.7310084304602202, "grad_norm": 0.9346643983496812, "learning_rate": 9.254166629383005e-07, "loss": 0.118, "step": 7934 }, { "epoch": 0.7311005666374902, "grad_norm": 0.9197443173940072, "learning_rate": 9.24825036186427e-07, "loss": 0.1155, "step": 7935 }, { "epoch": 0.7311927028147602, "grad_norm": 0.9111826786059956, "learning_rate": 9.242335556897181e-07, "loss": 0.125, "step": 7936 }, { "epoch": 0.7312848389920302, "grad_norm": 0.8963685992216108, "learning_rate": 9.236422215030932e-07, "loss": 0.1071, "step": 7937 }, { "epoch": 0.7313769751693002, "grad_norm": 0.9996120834457425, "learning_rate": 9.230510336814586e-07, "loss": 0.1308, "step": 7938 }, { "epoch": 0.7314691113465702, "grad_norm": 0.9100280131314815, "learning_rate": 9.224599922797053e-07, "loss": 0.1254, "step": 7939 }, { "epoch": 0.7315612475238402, "grad_norm": 0.877434720202932, "learning_rate": 9.218690973527106e-07, "loss": 0.1145, "step": 7940 }, { "epoch": 0.7316533837011102, "grad_norm": 1.0262118485516254, "learning_rate": 9.212783489553401e-07, "loss": 0.1336, "step": 7941 }, { "epoch": 0.7317455198783802, "grad_norm": 0.9555725570664751, "learning_rate": 9.206877471424455e-07, "loss": 0.1341, "step": 7942 }, { "epoch": 0.7318376560556502, "grad_norm": 0.9392503740018264, "learning_rate": 9.200972919688628e-07, "loss": 0.1216, "step": 7943 }, { "epoch": 0.7319297922329202, "grad_norm": 0.964214487072524, "learning_rate": 9.195069834894174e-07, "loss": 0.1232, "step": 7944 }, { "epoch": 0.7320219284101903, "grad_norm": 0.9505944973653115, "learning_rate": 9.189168217589178e-07, "loss": 0.1208, "step": 7945 }, { "epoch": 0.7321140645874603, "grad_norm": 0.9211807278405854, "learning_rate": 9.183268068321616e-07, "loss": 0.1156, "step": 7946 }, { "epoch": 0.7322062007647303, "grad_norm": 0.8903747447623426, "learning_rate": 9.177369387639323e-07, "loss": 0.1082, "step": 7947 }, { "epoch": 0.7322983369420003, "grad_norm": 0.8724081988043988, "learning_rate": 9.171472176089977e-07, "loss": 0.1116, "step": 7948 }, { "epoch": 0.7323904731192703, "grad_norm": 0.9842222839255699, "learning_rate": 9.165576434221152e-07, "loss": 0.1304, "step": 7949 }, { "epoch": 0.7324826092965403, "grad_norm": 0.8799032026581489, "learning_rate": 9.15968216258025e-07, "loss": 0.11, "step": 7950 }, { "epoch": 0.7325747454738103, "grad_norm": 0.9429682413677968, "learning_rate": 9.153789361714573e-07, "loss": 0.1214, "step": 7951 }, { "epoch": 0.7326668816510803, "grad_norm": 0.9521147026715193, "learning_rate": 9.147898032171251e-07, "loss": 0.1265, "step": 7952 }, { "epoch": 0.7327590178283503, "grad_norm": 0.9118057426636171, "learning_rate": 9.142008174497302e-07, "loss": 0.1164, "step": 7953 }, { "epoch": 0.7328511540056203, "grad_norm": 0.9177961028689359, "learning_rate": 9.136119789239612e-07, "loss": 0.1167, "step": 7954 }, { "epoch": 0.7329432901828903, "grad_norm": 0.9368447428001684, "learning_rate": 9.130232876944903e-07, "loss": 0.1242, "step": 7955 }, { "epoch": 0.7330354263601603, "grad_norm": 0.9623873942205943, "learning_rate": 9.124347438159772e-07, "loss": 0.1272, "step": 7956 }, { "epoch": 0.7331275625374303, "grad_norm": 0.9679388638840875, "learning_rate": 9.118463473430689e-07, "loss": 0.1247, "step": 7957 }, { "epoch": 0.7332196987147003, "grad_norm": 0.9003206232231331, "learning_rate": 9.112580983303984e-07, "loss": 0.1175, "step": 7958 }, { "epoch": 0.7333118348919704, "grad_norm": 0.9656950894298632, "learning_rate": 9.106699968325849e-07, "loss": 0.121, "step": 7959 }, { "epoch": 0.7334039710692404, "grad_norm": 0.9890202665502145, "learning_rate": 9.100820429042337e-07, "loss": 0.1317, "step": 7960 }, { "epoch": 0.7334961072465104, "grad_norm": 0.9233138280132024, "learning_rate": 9.094942365999349e-07, "loss": 0.1193, "step": 7961 }, { "epoch": 0.7335882434237804, "grad_norm": 0.9742207172348182, "learning_rate": 9.089065779742673e-07, "loss": 0.1299, "step": 7962 }, { "epoch": 0.7336803796010504, "grad_norm": 0.8990113391460285, "learning_rate": 9.083190670817963e-07, "loss": 0.1145, "step": 7963 }, { "epoch": 0.7337725157783204, "grad_norm": 0.861187500883117, "learning_rate": 9.0773170397707e-07, "loss": 0.1143, "step": 7964 }, { "epoch": 0.7338646519555904, "grad_norm": 0.9339588576002366, "learning_rate": 9.071444887146275e-07, "loss": 0.125, "step": 7965 }, { "epoch": 0.7339567881328604, "grad_norm": 0.9228986922230902, "learning_rate": 9.065574213489897e-07, "loss": 0.1177, "step": 7966 }, { "epoch": 0.7340489243101304, "grad_norm": 0.8987184447458356, "learning_rate": 9.059705019346676e-07, "loss": 0.1147, "step": 7967 }, { "epoch": 0.7341410604874004, "grad_norm": 0.9718902501296317, "learning_rate": 9.053837305261551e-07, "loss": 0.117, "step": 7968 }, { "epoch": 0.7342331966646704, "grad_norm": 0.882640269301587, "learning_rate": 9.047971071779349e-07, "loss": 0.1014, "step": 7969 }, { "epoch": 0.7343253328419403, "grad_norm": 0.9295900436726557, "learning_rate": 9.042106319444757e-07, "loss": 0.1282, "step": 7970 }, { "epoch": 0.7344174690192103, "grad_norm": 0.9431206523555368, "learning_rate": 9.036243048802312e-07, "loss": 0.1172, "step": 7971 }, { "epoch": 0.7345096051964805, "grad_norm": 0.9400908273680944, "learning_rate": 9.030381260396409e-07, "loss": 0.1143, "step": 7972 }, { "epoch": 0.7346017413737504, "grad_norm": 0.9688951073944904, "learning_rate": 9.024520954771326e-07, "loss": 0.131, "step": 7973 }, { "epoch": 0.7346938775510204, "grad_norm": 0.9851741676237068, "learning_rate": 9.018662132471189e-07, "loss": 0.1241, "step": 7974 }, { "epoch": 0.7347860137282904, "grad_norm": 0.9057714871865711, "learning_rate": 9.012804794040003e-07, "loss": 0.1179, "step": 7975 }, { "epoch": 0.7348781499055604, "grad_norm": 0.9750342427400404, "learning_rate": 9.006948940021612e-07, "loss": 0.121, "step": 7976 }, { "epoch": 0.7349702860828304, "grad_norm": 0.9246612470094258, "learning_rate": 9.001094570959726e-07, "loss": 0.1145, "step": 7977 }, { "epoch": 0.7350624222601004, "grad_norm": 0.9570937459286335, "learning_rate": 8.995241687397929e-07, "loss": 0.1219, "step": 7978 }, { "epoch": 0.7351545584373704, "grad_norm": 0.9961421625364182, "learning_rate": 8.989390289879665e-07, "loss": 0.1294, "step": 7979 }, { "epoch": 0.7352466946146404, "grad_norm": 0.9600761953924268, "learning_rate": 8.983540378948244e-07, "loss": 0.1242, "step": 7980 }, { "epoch": 0.7353388307919104, "grad_norm": 0.9304715372533625, "learning_rate": 8.977691955146823e-07, "loss": 0.1137, "step": 7981 }, { "epoch": 0.7354309669691804, "grad_norm": 0.9413815198495417, "learning_rate": 8.971845019018419e-07, "loss": 0.1279, "step": 7982 }, { "epoch": 0.7355231031464504, "grad_norm": 0.9586301278205637, "learning_rate": 8.965999571105929e-07, "loss": 0.1302, "step": 7983 }, { "epoch": 0.7356152393237204, "grad_norm": 1.0239076800420843, "learning_rate": 8.960155611952115e-07, "loss": 0.139, "step": 7984 }, { "epoch": 0.7357073755009904, "grad_norm": 0.8874691209556264, "learning_rate": 8.954313142099568e-07, "loss": 0.1162, "step": 7985 }, { "epoch": 0.7357995116782605, "grad_norm": 0.8906043874184616, "learning_rate": 8.948472162090782e-07, "loss": 0.1218, "step": 7986 }, { "epoch": 0.7358916478555305, "grad_norm": 0.8774243675542508, "learning_rate": 8.942632672468077e-07, "loss": 0.1146, "step": 7987 }, { "epoch": 0.7359837840328005, "grad_norm": 0.9447564057822249, "learning_rate": 8.936794673773661e-07, "loss": 0.1266, "step": 7988 }, { "epoch": 0.7360759202100705, "grad_norm": 0.9399500136330979, "learning_rate": 8.930958166549583e-07, "loss": 0.1266, "step": 7989 }, { "epoch": 0.7361680563873405, "grad_norm": 0.9102773778780244, "learning_rate": 8.925123151337767e-07, "loss": 0.1176, "step": 7990 }, { "epoch": 0.7362601925646105, "grad_norm": 0.9128385627600489, "learning_rate": 8.919289628680005e-07, "loss": 0.1205, "step": 7991 }, { "epoch": 0.7363523287418805, "grad_norm": 0.9342386016200168, "learning_rate": 8.913457599117933e-07, "loss": 0.1156, "step": 7992 }, { "epoch": 0.7364444649191505, "grad_norm": 0.9199680452398934, "learning_rate": 8.907627063193045e-07, "loss": 0.1165, "step": 7993 }, { "epoch": 0.7365366010964205, "grad_norm": 0.9437755517472011, "learning_rate": 8.901798021446714e-07, "loss": 0.1275, "step": 7994 }, { "epoch": 0.7366287372736905, "grad_norm": 0.8953777925202131, "learning_rate": 8.895970474420171e-07, "loss": 0.1101, "step": 7995 }, { "epoch": 0.7367208734509605, "grad_norm": 0.9170465764857951, "learning_rate": 8.890144422654512e-07, "loss": 0.1194, "step": 7996 }, { "epoch": 0.7368130096282305, "grad_norm": 0.9822108915381218, "learning_rate": 8.884319866690674e-07, "loss": 0.1174, "step": 7997 }, { "epoch": 0.7369051458055005, "grad_norm": 0.933804644233933, "learning_rate": 8.878496807069464e-07, "loss": 0.1168, "step": 7998 }, { "epoch": 0.7369972819827705, "grad_norm": 0.9261693186397323, "learning_rate": 8.87267524433156e-07, "loss": 0.1199, "step": 7999 }, { "epoch": 0.7370894181600406, "grad_norm": 0.9344138753722313, "learning_rate": 8.866855179017505e-07, "loss": 0.116, "step": 8000 }, { "epoch": 0.7370894181600406, "eval_loss": 0.12275012582540512, "eval_runtime": 299.0394, "eval_samples_per_second": 23.465, "eval_steps_per_second": 2.936, "step": 8000 }, { "epoch": 0.7371815543373106, "grad_norm": 0.9069037559452209, "learning_rate": 8.861036611667676e-07, "loss": 0.1074, "step": 8001 }, { "epoch": 0.7372736905145806, "grad_norm": 0.916023672944426, "learning_rate": 8.855219542822341e-07, "loss": 0.1059, "step": 8002 }, { "epoch": 0.7373658266918506, "grad_norm": 0.9141703814175853, "learning_rate": 8.8494039730216e-07, "loss": 0.1236, "step": 8003 }, { "epoch": 0.7374579628691206, "grad_norm": 0.961606399248731, "learning_rate": 8.843589902805438e-07, "loss": 0.1317, "step": 8004 }, { "epoch": 0.7375500990463906, "grad_norm": 0.934755066682781, "learning_rate": 8.837777332713701e-07, "loss": 0.1203, "step": 8005 }, { "epoch": 0.7376422352236606, "grad_norm": 0.9025835586979728, "learning_rate": 8.831966263286071e-07, "loss": 0.1187, "step": 8006 }, { "epoch": 0.7377343714009306, "grad_norm": 0.9078088364126208, "learning_rate": 8.826156695062113e-07, "loss": 0.1241, "step": 8007 }, { "epoch": 0.7378265075782006, "grad_norm": 0.9342661255740071, "learning_rate": 8.820348628581254e-07, "loss": 0.1228, "step": 8008 }, { "epoch": 0.7379186437554706, "grad_norm": 0.9935337510975628, "learning_rate": 8.814542064382767e-07, "loss": 0.1244, "step": 8009 }, { "epoch": 0.7380107799327406, "grad_norm": 0.9048942427175215, "learning_rate": 8.808737003005782e-07, "loss": 0.1214, "step": 8010 }, { "epoch": 0.7381029161100106, "grad_norm": 0.9564069497268002, "learning_rate": 8.802933444989308e-07, "loss": 0.1257, "step": 8011 }, { "epoch": 0.7381950522872806, "grad_norm": 0.8980904699977105, "learning_rate": 8.797131390872207e-07, "loss": 0.1104, "step": 8012 }, { "epoch": 0.7382871884645507, "grad_norm": 0.8601604398887479, "learning_rate": 8.79133084119321e-07, "loss": 0.1216, "step": 8013 }, { "epoch": 0.7383793246418207, "grad_norm": 0.9231462426012692, "learning_rate": 8.78553179649089e-07, "loss": 0.1287, "step": 8014 }, { "epoch": 0.7384714608190907, "grad_norm": 0.9342174944000972, "learning_rate": 8.779734257303677e-07, "loss": 0.1248, "step": 8015 }, { "epoch": 0.7385635969963606, "grad_norm": 0.9247205293120123, "learning_rate": 8.773938224169884e-07, "loss": 0.119, "step": 8016 }, { "epoch": 0.7386557331736306, "grad_norm": 0.9345834504752919, "learning_rate": 8.768143697627681e-07, "loss": 0.1301, "step": 8017 }, { "epoch": 0.7387478693509006, "grad_norm": 0.8466531869104624, "learning_rate": 8.762350678215076e-07, "loss": 0.1085, "step": 8018 }, { "epoch": 0.7388400055281706, "grad_norm": 0.948753230207356, "learning_rate": 8.756559166469966e-07, "loss": 0.1299, "step": 8019 }, { "epoch": 0.7389321417054406, "grad_norm": 0.9596076569583665, "learning_rate": 8.750769162930076e-07, "loss": 0.1221, "step": 8020 }, { "epoch": 0.7390242778827106, "grad_norm": 0.9550525329866566, "learning_rate": 8.744980668133026e-07, "loss": 0.1208, "step": 8021 }, { "epoch": 0.7391164140599806, "grad_norm": 0.9490302288135481, "learning_rate": 8.739193682616265e-07, "loss": 0.1197, "step": 8022 }, { "epoch": 0.7392085502372506, "grad_norm": 0.9200381002548552, "learning_rate": 8.733408206917118e-07, "loss": 0.1226, "step": 8023 }, { "epoch": 0.7393006864145206, "grad_norm": 0.9650781421693304, "learning_rate": 8.727624241572779e-07, "loss": 0.1292, "step": 8024 }, { "epoch": 0.7393928225917906, "grad_norm": 0.904301714517166, "learning_rate": 8.72184178712028e-07, "loss": 0.1146, "step": 8025 }, { "epoch": 0.7394849587690606, "grad_norm": 0.8895832457122699, "learning_rate": 8.716060844096514e-07, "loss": 0.1159, "step": 8026 }, { "epoch": 0.7395770949463307, "grad_norm": 0.8901681509477561, "learning_rate": 8.710281413038252e-07, "loss": 0.1127, "step": 8027 }, { "epoch": 0.7396692311236007, "grad_norm": 0.973436941777358, "learning_rate": 8.704503494482114e-07, "loss": 0.1323, "step": 8028 }, { "epoch": 0.7397613673008707, "grad_norm": 0.8806928065451151, "learning_rate": 8.698727088964587e-07, "loss": 0.1144, "step": 8029 }, { "epoch": 0.7398535034781407, "grad_norm": 0.9283017152565503, "learning_rate": 8.692952197022006e-07, "loss": 0.1126, "step": 8030 }, { "epoch": 0.7399456396554107, "grad_norm": 0.9541252081277901, "learning_rate": 8.687178819190558e-07, "loss": 0.1252, "step": 8031 }, { "epoch": 0.7400377758326807, "grad_norm": 0.9693080055760379, "learning_rate": 8.681406956006316e-07, "loss": 0.1327, "step": 8032 }, { "epoch": 0.7401299120099507, "grad_norm": 0.9120977367696028, "learning_rate": 8.675636608005191e-07, "loss": 0.1218, "step": 8033 }, { "epoch": 0.7402220481872207, "grad_norm": 0.9133310755374804, "learning_rate": 8.669867775722973e-07, "loss": 0.1308, "step": 8034 }, { "epoch": 0.7403141843644907, "grad_norm": 0.8719559549311144, "learning_rate": 8.66410045969529e-07, "loss": 0.1094, "step": 8035 }, { "epoch": 0.7404063205417607, "grad_norm": 0.9203157162386649, "learning_rate": 8.658334660457629e-07, "loss": 0.1215, "step": 8036 }, { "epoch": 0.7404984567190307, "grad_norm": 0.8475985566773456, "learning_rate": 8.652570378545355e-07, "loss": 0.1024, "step": 8037 }, { "epoch": 0.7405905928963007, "grad_norm": 0.8714394463711818, "learning_rate": 8.646807614493685e-07, "loss": 0.1074, "step": 8038 }, { "epoch": 0.7406827290735707, "grad_norm": 0.8951025111622029, "learning_rate": 8.641046368837682e-07, "loss": 0.119, "step": 8039 }, { "epoch": 0.7407748652508408, "grad_norm": 0.8940983168209601, "learning_rate": 8.635286642112295e-07, "loss": 0.1068, "step": 8040 }, { "epoch": 0.7408670014281108, "grad_norm": 0.9700185466019189, "learning_rate": 8.629528434852294e-07, "loss": 0.1261, "step": 8041 }, { "epoch": 0.7409591376053808, "grad_norm": 0.9605648103993476, "learning_rate": 8.623771747592347e-07, "loss": 0.1303, "step": 8042 }, { "epoch": 0.7410512737826508, "grad_norm": 0.8647742808321739, "learning_rate": 8.618016580866947e-07, "loss": 0.113, "step": 8043 }, { "epoch": 0.7411434099599208, "grad_norm": 0.874872810061018, "learning_rate": 8.612262935210472e-07, "loss": 0.1088, "step": 8044 }, { "epoch": 0.7412355461371908, "grad_norm": 0.959941985103914, "learning_rate": 8.606510811157154e-07, "loss": 0.1181, "step": 8045 }, { "epoch": 0.7413276823144608, "grad_norm": 0.9386067809347824, "learning_rate": 8.600760209241074e-07, "loss": 0.1277, "step": 8046 }, { "epoch": 0.7414198184917308, "grad_norm": 0.9597688620808285, "learning_rate": 8.595011129996164e-07, "loss": 0.1219, "step": 8047 }, { "epoch": 0.7415119546690008, "grad_norm": 0.9520032460057815, "learning_rate": 8.589263573956236e-07, "loss": 0.1226, "step": 8048 }, { "epoch": 0.7416040908462708, "grad_norm": 0.9018956330326757, "learning_rate": 8.583517541654951e-07, "loss": 0.1117, "step": 8049 }, { "epoch": 0.7416962270235408, "grad_norm": 0.9368276434977748, "learning_rate": 8.577773033625836e-07, "loss": 0.1226, "step": 8050 }, { "epoch": 0.7417883632008108, "grad_norm": 0.8922899110331376, "learning_rate": 8.572030050402264e-07, "loss": 0.1094, "step": 8051 }, { "epoch": 0.7418804993780808, "grad_norm": 0.9630269672440016, "learning_rate": 8.566288592517461e-07, "loss": 0.1222, "step": 8052 }, { "epoch": 0.7419726355553508, "grad_norm": 0.9077601187164204, "learning_rate": 8.560548660504531e-07, "loss": 0.1109, "step": 8053 }, { "epoch": 0.7420647717326209, "grad_norm": 0.9147053432114655, "learning_rate": 8.554810254896434e-07, "loss": 0.1222, "step": 8054 }, { "epoch": 0.7421569079098909, "grad_norm": 0.8670005809699466, "learning_rate": 8.54907337622597e-07, "loss": 0.1051, "step": 8055 }, { "epoch": 0.7422490440871609, "grad_norm": 0.9849815202966365, "learning_rate": 8.543338025025818e-07, "loss": 0.1295, "step": 8056 }, { "epoch": 0.7423411802644309, "grad_norm": 0.9317265536420508, "learning_rate": 8.537604201828495e-07, "loss": 0.1194, "step": 8057 }, { "epoch": 0.7424333164417009, "grad_norm": 0.9531648343620684, "learning_rate": 8.5318719071664e-07, "loss": 0.1215, "step": 8058 }, { "epoch": 0.7425254526189708, "grad_norm": 0.9383372475678683, "learning_rate": 8.526141141571764e-07, "loss": 0.1263, "step": 8059 }, { "epoch": 0.7426175887962408, "grad_norm": 0.8973356402044012, "learning_rate": 8.520411905576697e-07, "loss": 0.1238, "step": 8060 }, { "epoch": 0.7427097249735108, "grad_norm": 0.8774857932653227, "learning_rate": 8.514684199713166e-07, "loss": 0.1126, "step": 8061 }, { "epoch": 0.7428018611507808, "grad_norm": 0.9196376921082318, "learning_rate": 8.508958024512972e-07, "loss": 0.1186, "step": 8062 }, { "epoch": 0.7428939973280508, "grad_norm": 0.9235244197036236, "learning_rate": 8.503233380507808e-07, "loss": 0.1221, "step": 8063 }, { "epoch": 0.7429861335053208, "grad_norm": 0.8945240160141257, "learning_rate": 8.497510268229192e-07, "loss": 0.125, "step": 8064 }, { "epoch": 0.7430782696825908, "grad_norm": 0.9899562846771053, "learning_rate": 8.491788688208524e-07, "loss": 0.1272, "step": 8065 }, { "epoch": 0.7431704058598608, "grad_norm": 0.9773907890691923, "learning_rate": 8.486068640977063e-07, "loss": 0.1312, "step": 8066 }, { "epoch": 0.7432625420371308, "grad_norm": 0.9148469368667612, "learning_rate": 8.480350127065904e-07, "loss": 0.1144, "step": 8067 }, { "epoch": 0.7433546782144009, "grad_norm": 0.8556460111218671, "learning_rate": 8.474633147006006e-07, "loss": 0.116, "step": 8068 }, { "epoch": 0.7434468143916709, "grad_norm": 0.9538636763158576, "learning_rate": 8.468917701328197e-07, "loss": 0.1281, "step": 8069 }, { "epoch": 0.7435389505689409, "grad_norm": 0.9288207869960571, "learning_rate": 8.46320379056316e-07, "loss": 0.1262, "step": 8070 }, { "epoch": 0.7436310867462109, "grad_norm": 0.9360281891768948, "learning_rate": 8.45749141524144e-07, "loss": 0.1239, "step": 8071 }, { "epoch": 0.7437232229234809, "grad_norm": 0.8474422015239383, "learning_rate": 8.45178057589342e-07, "loss": 0.1068, "step": 8072 }, { "epoch": 0.7438153591007509, "grad_norm": 0.9805074107349353, "learning_rate": 8.446071273049347e-07, "loss": 0.1309, "step": 8073 }, { "epoch": 0.7439074952780209, "grad_norm": 0.9428578922557328, "learning_rate": 8.440363507239338e-07, "loss": 0.122, "step": 8074 }, { "epoch": 0.7439996314552909, "grad_norm": 0.8616051880037617, "learning_rate": 8.434657278993369e-07, "loss": 0.1103, "step": 8075 }, { "epoch": 0.7440917676325609, "grad_norm": 0.8825856553344199, "learning_rate": 8.428952588841247e-07, "loss": 0.1141, "step": 8076 }, { "epoch": 0.7441839038098309, "grad_norm": 0.945774709888264, "learning_rate": 8.423249437312667e-07, "loss": 0.1267, "step": 8077 }, { "epoch": 0.7442760399871009, "grad_norm": 0.9036109907141343, "learning_rate": 8.41754782493715e-07, "loss": 0.1132, "step": 8078 }, { "epoch": 0.7443681761643709, "grad_norm": 0.9153835514619679, "learning_rate": 8.411847752244115e-07, "loss": 0.1165, "step": 8079 }, { "epoch": 0.7444603123416409, "grad_norm": 1.0480110548918762, "learning_rate": 8.406149219762791e-07, "loss": 0.1479, "step": 8080 }, { "epoch": 0.744552448518911, "grad_norm": 0.8801512229098181, "learning_rate": 8.400452228022296e-07, "loss": 0.1087, "step": 8081 }, { "epoch": 0.744644584696181, "grad_norm": 0.9245546425567027, "learning_rate": 8.394756777551602e-07, "loss": 0.125, "step": 8082 }, { "epoch": 0.744736720873451, "grad_norm": 0.9070405580998772, "learning_rate": 8.389062868879541e-07, "loss": 0.1173, "step": 8083 }, { "epoch": 0.744828857050721, "grad_norm": 0.8722507342722666, "learning_rate": 8.383370502534765e-07, "loss": 0.1102, "step": 8084 }, { "epoch": 0.744920993227991, "grad_norm": 0.9207796493390706, "learning_rate": 8.377679679045828e-07, "loss": 0.1245, "step": 8085 }, { "epoch": 0.745013129405261, "grad_norm": 0.9025661348734192, "learning_rate": 8.371990398941121e-07, "loss": 0.1255, "step": 8086 }, { "epoch": 0.745105265582531, "grad_norm": 0.9436641765010778, "learning_rate": 8.366302662748901e-07, "loss": 0.1198, "step": 8087 }, { "epoch": 0.745197401759801, "grad_norm": 0.9440084983200447, "learning_rate": 8.360616470997263e-07, "loss": 0.1132, "step": 8088 }, { "epoch": 0.745289537937071, "grad_norm": 0.9353058639345495, "learning_rate": 8.354931824214185e-07, "loss": 0.1203, "step": 8089 }, { "epoch": 0.745381674114341, "grad_norm": 0.9620361208227816, "learning_rate": 8.349248722927469e-07, "loss": 0.1263, "step": 8090 }, { "epoch": 0.745473810291611, "grad_norm": 0.9390265581383841, "learning_rate": 8.343567167664801e-07, "loss": 0.123, "step": 8091 }, { "epoch": 0.745565946468881, "grad_norm": 0.9554238810465895, "learning_rate": 8.337887158953723e-07, "loss": 0.1203, "step": 8092 }, { "epoch": 0.745658082646151, "grad_norm": 0.9129449457442721, "learning_rate": 8.332208697321606e-07, "loss": 0.1192, "step": 8093 }, { "epoch": 0.745750218823421, "grad_norm": 0.9137176785711236, "learning_rate": 8.326531783295716e-07, "loss": 0.115, "step": 8094 }, { "epoch": 0.7458423550006911, "grad_norm": 0.9545661166830167, "learning_rate": 8.320856417403134e-07, "loss": 0.1287, "step": 8095 }, { "epoch": 0.7459344911779611, "grad_norm": 0.9471937440524988, "learning_rate": 8.315182600170838e-07, "loss": 0.1241, "step": 8096 }, { "epoch": 0.7460266273552311, "grad_norm": 0.9942778081193968, "learning_rate": 8.309510332125623e-07, "loss": 0.134, "step": 8097 }, { "epoch": 0.7461187635325011, "grad_norm": 0.8990687326017974, "learning_rate": 8.30383961379417e-07, "loss": 0.1176, "step": 8098 }, { "epoch": 0.7462108997097711, "grad_norm": 0.9198246571850542, "learning_rate": 8.298170445703016e-07, "loss": 0.1205, "step": 8099 }, { "epoch": 0.746303035887041, "grad_norm": 0.8982667907638325, "learning_rate": 8.292502828378534e-07, "loss": 0.1242, "step": 8100 }, { "epoch": 0.746395172064311, "grad_norm": 0.9183599913087285, "learning_rate": 8.286836762346953e-07, "loss": 0.1261, "step": 8101 }, { "epoch": 0.746487308241581, "grad_norm": 0.9390779289689044, "learning_rate": 8.281172248134376e-07, "loss": 0.1253, "step": 8102 }, { "epoch": 0.746579444418851, "grad_norm": 0.915484652855498, "learning_rate": 8.275509286266755e-07, "loss": 0.1213, "step": 8103 }, { "epoch": 0.746671580596121, "grad_norm": 0.9337864533144221, "learning_rate": 8.26984787726991e-07, "loss": 0.124, "step": 8104 }, { "epoch": 0.746763716773391, "grad_norm": 0.9738610570783551, "learning_rate": 8.264188021669483e-07, "loss": 0.1287, "step": 8105 }, { "epoch": 0.746855852950661, "grad_norm": 0.8942501970326633, "learning_rate": 8.258529719990996e-07, "loss": 0.1213, "step": 8106 }, { "epoch": 0.746947989127931, "grad_norm": 1.0538439434415687, "learning_rate": 8.252872972759826e-07, "loss": 0.1349, "step": 8107 }, { "epoch": 0.7470401253052011, "grad_norm": 0.9313257811750428, "learning_rate": 8.24721778050121e-07, "loss": 0.1182, "step": 8108 }, { "epoch": 0.7471322614824711, "grad_norm": 0.9095931652756736, "learning_rate": 8.241564143740216e-07, "loss": 0.1245, "step": 8109 }, { "epoch": 0.7472243976597411, "grad_norm": 0.9127789000547557, "learning_rate": 8.235912063001805e-07, "loss": 0.1196, "step": 8110 }, { "epoch": 0.7473165338370111, "grad_norm": 0.9008185338290264, "learning_rate": 8.230261538810755e-07, "loss": 0.1161, "step": 8111 }, { "epoch": 0.7474086700142811, "grad_norm": 0.9802778555969498, "learning_rate": 8.224612571691734e-07, "loss": 0.1207, "step": 8112 }, { "epoch": 0.7475008061915511, "grad_norm": 0.9268639919324714, "learning_rate": 8.218965162169232e-07, "loss": 0.1223, "step": 8113 }, { "epoch": 0.7475929423688211, "grad_norm": 0.9181135011446372, "learning_rate": 8.21331931076762e-07, "loss": 0.1238, "step": 8114 }, { "epoch": 0.7476850785460911, "grad_norm": 0.9651031360768163, "learning_rate": 8.207675018011127e-07, "loss": 0.1277, "step": 8115 }, { "epoch": 0.7477772147233611, "grad_norm": 0.9007347893819367, "learning_rate": 8.202032284423817e-07, "loss": 0.1207, "step": 8116 }, { "epoch": 0.7478693509006311, "grad_norm": 0.9716022381465899, "learning_rate": 8.196391110529606e-07, "loss": 0.1241, "step": 8117 }, { "epoch": 0.7479614870779011, "grad_norm": 0.8951591662409739, "learning_rate": 8.19075149685229e-07, "loss": 0.1097, "step": 8118 }, { "epoch": 0.7480536232551711, "grad_norm": 0.9058294291306436, "learning_rate": 8.185113443915504e-07, "loss": 0.1178, "step": 8119 }, { "epoch": 0.7481457594324411, "grad_norm": 0.9752842361448212, "learning_rate": 8.179476952242757e-07, "loss": 0.1184, "step": 8120 }, { "epoch": 0.7482378956097111, "grad_norm": 0.9086052019177948, "learning_rate": 8.173842022357381e-07, "loss": 0.1134, "step": 8121 }, { "epoch": 0.7483300317869812, "grad_norm": 0.9355291015960355, "learning_rate": 8.168208654782578e-07, "loss": 0.1188, "step": 8122 }, { "epoch": 0.7484221679642512, "grad_norm": 0.9463130206534003, "learning_rate": 8.162576850041415e-07, "loss": 0.1269, "step": 8123 }, { "epoch": 0.7485143041415212, "grad_norm": 0.9097943913503486, "learning_rate": 8.156946608656799e-07, "loss": 0.1197, "step": 8124 }, { "epoch": 0.7486064403187912, "grad_norm": 0.9261635841305669, "learning_rate": 8.151317931151514e-07, "loss": 0.1065, "step": 8125 }, { "epoch": 0.7486985764960612, "grad_norm": 0.9384266586122866, "learning_rate": 8.145690818048171e-07, "loss": 0.1379, "step": 8126 }, { "epoch": 0.7487907126733312, "grad_norm": 0.996044063579827, "learning_rate": 8.140065269869244e-07, "loss": 0.1245, "step": 8127 }, { "epoch": 0.7488828488506012, "grad_norm": 0.9224711715286494, "learning_rate": 8.134441287137068e-07, "loss": 0.1114, "step": 8128 }, { "epoch": 0.7489749850278712, "grad_norm": 0.9220396484994093, "learning_rate": 8.128818870373845e-07, "loss": 0.1084, "step": 8129 }, { "epoch": 0.7490671212051412, "grad_norm": 0.9545910800584627, "learning_rate": 8.123198020101594e-07, "loss": 0.1278, "step": 8130 }, { "epoch": 0.7491592573824112, "grad_norm": 0.9261706948520899, "learning_rate": 8.117578736842232e-07, "loss": 0.1276, "step": 8131 }, { "epoch": 0.7492513935596812, "grad_norm": 0.9033268659393892, "learning_rate": 8.111961021117496e-07, "loss": 0.12, "step": 8132 }, { "epoch": 0.7493435297369512, "grad_norm": 0.9488735125394763, "learning_rate": 8.106344873449001e-07, "loss": 0.1216, "step": 8133 }, { "epoch": 0.7494356659142212, "grad_norm": 0.9861922120436211, "learning_rate": 8.100730294358197e-07, "loss": 0.1268, "step": 8134 }, { "epoch": 0.7495278020914912, "grad_norm": 0.9421639498933942, "learning_rate": 8.095117284366405e-07, "loss": 0.1239, "step": 8135 }, { "epoch": 0.7496199382687613, "grad_norm": 0.9059836146120238, "learning_rate": 8.089505843994797e-07, "loss": 0.1165, "step": 8136 }, { "epoch": 0.7497120744460313, "grad_norm": 0.8729933498186071, "learning_rate": 8.083895973764394e-07, "loss": 0.1101, "step": 8137 }, { "epoch": 0.7498042106233013, "grad_norm": 0.9354667026261895, "learning_rate": 8.078287674196061e-07, "loss": 0.1259, "step": 8138 }, { "epoch": 0.7498963468005713, "grad_norm": 0.9393954330086781, "learning_rate": 8.072680945810538e-07, "loss": 0.1264, "step": 8139 }, { "epoch": 0.7499884829778413, "grad_norm": 0.982062774545163, "learning_rate": 8.067075789128412e-07, "loss": 0.1351, "step": 8140 }, { "epoch": 0.7500806191551113, "grad_norm": 0.8870732838171413, "learning_rate": 8.061472204670129e-07, "loss": 0.1151, "step": 8141 }, { "epoch": 0.7501727553323813, "grad_norm": 0.9705648123376437, "learning_rate": 8.055870192955975e-07, "loss": 0.1183, "step": 8142 }, { "epoch": 0.7502648915096513, "grad_norm": 0.891933701352055, "learning_rate": 8.050269754506091e-07, "loss": 0.1179, "step": 8143 }, { "epoch": 0.7503570276869213, "grad_norm": 0.974472806167114, "learning_rate": 8.044670889840481e-07, "loss": 0.1281, "step": 8144 }, { "epoch": 0.7504491638641912, "grad_norm": 0.9393965328736213, "learning_rate": 8.039073599479014e-07, "loss": 0.1243, "step": 8145 }, { "epoch": 0.7505413000414612, "grad_norm": 0.9261251014677941, "learning_rate": 8.033477883941379e-07, "loss": 0.1229, "step": 8146 }, { "epoch": 0.7506334362187312, "grad_norm": 0.9257164254893021, "learning_rate": 8.027883743747156e-07, "loss": 0.1275, "step": 8147 }, { "epoch": 0.7507255723960012, "grad_norm": 0.8801199783379451, "learning_rate": 8.022291179415747e-07, "loss": 0.1158, "step": 8148 }, { "epoch": 0.7508177085732713, "grad_norm": 0.9106903812355431, "learning_rate": 8.016700191466431e-07, "loss": 0.1265, "step": 8149 }, { "epoch": 0.7509098447505413, "grad_norm": 0.9141324302012125, "learning_rate": 8.011110780418335e-07, "loss": 0.1172, "step": 8150 }, { "epoch": 0.7510019809278113, "grad_norm": 0.9587514921406114, "learning_rate": 8.005522946790426e-07, "loss": 0.1294, "step": 8151 }, { "epoch": 0.7510941171050813, "grad_norm": 0.9030493525414592, "learning_rate": 7.999936691101545e-07, "loss": 0.1256, "step": 8152 }, { "epoch": 0.7511862532823513, "grad_norm": 0.9972586634465126, "learning_rate": 7.994352013870366e-07, "loss": 0.1276, "step": 8153 }, { "epoch": 0.7512783894596213, "grad_norm": 0.8537576474310897, "learning_rate": 7.988768915615441e-07, "loss": 0.1093, "step": 8154 }, { "epoch": 0.7513705256368913, "grad_norm": 0.9041807136029697, "learning_rate": 7.983187396855144e-07, "loss": 0.1131, "step": 8155 }, { "epoch": 0.7514626618141613, "grad_norm": 0.8912359097150397, "learning_rate": 7.977607458107731e-07, "loss": 0.1218, "step": 8156 }, { "epoch": 0.7515547979914313, "grad_norm": 0.921588521542897, "learning_rate": 7.972029099891293e-07, "loss": 0.1189, "step": 8157 }, { "epoch": 0.7516469341687013, "grad_norm": 0.931955611648895, "learning_rate": 7.966452322723806e-07, "loss": 0.1142, "step": 8158 }, { "epoch": 0.7517390703459713, "grad_norm": 0.9486463061139534, "learning_rate": 7.960877127123038e-07, "loss": 0.1199, "step": 8159 }, { "epoch": 0.7518312065232413, "grad_norm": 0.930525832367596, "learning_rate": 7.955303513606657e-07, "loss": 0.1117, "step": 8160 }, { "epoch": 0.7519233427005113, "grad_norm": 0.9429285315072875, "learning_rate": 7.949731482692185e-07, "loss": 0.1276, "step": 8161 }, { "epoch": 0.7520154788777813, "grad_norm": 0.9014224398876224, "learning_rate": 7.944161034896986e-07, "loss": 0.1145, "step": 8162 }, { "epoch": 0.7521076150550514, "grad_norm": 0.9373335528043448, "learning_rate": 7.93859217073826e-07, "loss": 0.1261, "step": 8163 }, { "epoch": 0.7521997512323214, "grad_norm": 0.9031086836900423, "learning_rate": 7.933024890733099e-07, "loss": 0.1186, "step": 8164 }, { "epoch": 0.7522918874095914, "grad_norm": 0.9747283781206498, "learning_rate": 7.927459195398404e-07, "loss": 0.1238, "step": 8165 }, { "epoch": 0.7523840235868614, "grad_norm": 1.0216403307734223, "learning_rate": 7.921895085250967e-07, "loss": 0.1281, "step": 8166 }, { "epoch": 0.7524761597641314, "grad_norm": 0.9079435690483035, "learning_rate": 7.916332560807402e-07, "loss": 0.1115, "step": 8167 }, { "epoch": 0.7525682959414014, "grad_norm": 0.9732939639285801, "learning_rate": 7.910771622584199e-07, "loss": 0.1172, "step": 8168 }, { "epoch": 0.7526604321186714, "grad_norm": 0.9380322737134049, "learning_rate": 7.905212271097696e-07, "loss": 0.1144, "step": 8169 }, { "epoch": 0.7527525682959414, "grad_norm": 0.902154398830762, "learning_rate": 7.899654506864074e-07, "loss": 0.1203, "step": 8170 }, { "epoch": 0.7528447044732114, "grad_norm": 0.9161360802401297, "learning_rate": 7.894098330399363e-07, "loss": 0.1143, "step": 8171 }, { "epoch": 0.7529368406504814, "grad_norm": 0.9832148574859536, "learning_rate": 7.888543742219462e-07, "loss": 0.1297, "step": 8172 }, { "epoch": 0.7530289768277514, "grad_norm": 0.9578861811755155, "learning_rate": 7.882990742840119e-07, "loss": 0.1205, "step": 8173 }, { "epoch": 0.7531211130050214, "grad_norm": 0.9560100009959844, "learning_rate": 7.877439332776934e-07, "loss": 0.1246, "step": 8174 }, { "epoch": 0.7532132491822914, "grad_norm": 0.8931628999965731, "learning_rate": 7.87188951254535e-07, "loss": 0.1078, "step": 8175 }, { "epoch": 0.7533053853595615, "grad_norm": 0.9894713605728191, "learning_rate": 7.866341282660661e-07, "loss": 0.1246, "step": 8176 }, { "epoch": 0.7533975215368315, "grad_norm": 0.940458859644715, "learning_rate": 7.860794643638026e-07, "loss": 0.1194, "step": 8177 }, { "epoch": 0.7534896577141015, "grad_norm": 0.919068557096056, "learning_rate": 7.855249595992454e-07, "loss": 0.1274, "step": 8178 }, { "epoch": 0.7535817938913715, "grad_norm": 0.8730537807997693, "learning_rate": 7.849706140238808e-07, "loss": 0.1141, "step": 8179 }, { "epoch": 0.7536739300686415, "grad_norm": 0.8816936498803757, "learning_rate": 7.844164276891794e-07, "loss": 0.1171, "step": 8180 }, { "epoch": 0.7537660662459115, "grad_norm": 0.9362278283930622, "learning_rate": 7.838624006465967e-07, "loss": 0.124, "step": 8181 }, { "epoch": 0.7538582024231815, "grad_norm": 0.9231753539436256, "learning_rate": 7.833085329475748e-07, "loss": 0.1174, "step": 8182 }, { "epoch": 0.7539503386004515, "grad_norm": 0.9753567791774588, "learning_rate": 7.82754824643541e-07, "loss": 0.1298, "step": 8183 }, { "epoch": 0.7540424747777215, "grad_norm": 0.934180096557911, "learning_rate": 7.822012757859057e-07, "loss": 0.1291, "step": 8184 }, { "epoch": 0.7541346109549915, "grad_norm": 0.9257184287763613, "learning_rate": 7.816478864260677e-07, "loss": 0.1147, "step": 8185 }, { "epoch": 0.7542267471322615, "grad_norm": 0.9686431756661043, "learning_rate": 7.810946566154076e-07, "loss": 0.1255, "step": 8186 }, { "epoch": 0.7543188833095315, "grad_norm": 0.9168752484252765, "learning_rate": 7.805415864052942e-07, "loss": 0.1261, "step": 8187 }, { "epoch": 0.7544110194868014, "grad_norm": 0.9408848273284718, "learning_rate": 7.79988675847079e-07, "loss": 0.123, "step": 8188 }, { "epoch": 0.7545031556640714, "grad_norm": 0.9802139431624383, "learning_rate": 7.794359249921004e-07, "loss": 0.1225, "step": 8189 }, { "epoch": 0.7545952918413416, "grad_norm": 0.888342210491898, "learning_rate": 7.78883333891682e-07, "loss": 0.1172, "step": 8190 }, { "epoch": 0.7546874280186116, "grad_norm": 0.9059491622012312, "learning_rate": 7.783309025971314e-07, "loss": 0.1158, "step": 8191 }, { "epoch": 0.7547795641958815, "grad_norm": 0.9274618100083608, "learning_rate": 7.777786311597408e-07, "loss": 0.1343, "step": 8192 }, { "epoch": 0.7548717003731515, "grad_norm": 0.9096886113886077, "learning_rate": 7.772265196307896e-07, "loss": 0.1216, "step": 8193 }, { "epoch": 0.7549638365504215, "grad_norm": 0.9364608277777419, "learning_rate": 7.766745680615417e-07, "loss": 0.1193, "step": 8194 }, { "epoch": 0.7550559727276915, "grad_norm": 0.8856154937055789, "learning_rate": 7.761227765032464e-07, "loss": 0.1125, "step": 8195 }, { "epoch": 0.7551481089049615, "grad_norm": 0.9611275417568343, "learning_rate": 7.755711450071365e-07, "loss": 0.1198, "step": 8196 }, { "epoch": 0.7552402450822315, "grad_norm": 0.9812449711132765, "learning_rate": 7.750196736244309e-07, "loss": 0.1275, "step": 8197 }, { "epoch": 0.7553323812595015, "grad_norm": 0.963089860945346, "learning_rate": 7.744683624063343e-07, "loss": 0.1175, "step": 8198 }, { "epoch": 0.7554245174367715, "grad_norm": 0.9683624391402031, "learning_rate": 7.739172114040366e-07, "loss": 0.1212, "step": 8199 }, { "epoch": 0.7555166536140415, "grad_norm": 0.9774515231953995, "learning_rate": 7.733662206687106e-07, "loss": 0.1191, "step": 8200 }, { "epoch": 0.7556087897913115, "grad_norm": 0.9521027886273846, "learning_rate": 7.728153902515181e-07, "loss": 0.13, "step": 8201 }, { "epoch": 0.7557009259685815, "grad_norm": 1.0001992095848415, "learning_rate": 7.722647202036012e-07, "loss": 0.1409, "step": 8202 }, { "epoch": 0.7557930621458516, "grad_norm": 0.9519675971196492, "learning_rate": 7.717142105760922e-07, "loss": 0.1164, "step": 8203 }, { "epoch": 0.7558851983231216, "grad_norm": 0.9638373797322568, "learning_rate": 7.711638614201037e-07, "loss": 0.132, "step": 8204 }, { "epoch": 0.7559773345003916, "grad_norm": 0.9227872124153469, "learning_rate": 7.706136727867366e-07, "loss": 0.1185, "step": 8205 }, { "epoch": 0.7560694706776616, "grad_norm": 0.9195582592993863, "learning_rate": 7.700636447270773e-07, "loss": 0.1142, "step": 8206 }, { "epoch": 0.7561616068549316, "grad_norm": 0.9565199463656906, "learning_rate": 7.695137772921938e-07, "loss": 0.125, "step": 8207 }, { "epoch": 0.7562537430322016, "grad_norm": 0.9254905477252218, "learning_rate": 7.68964070533143e-07, "loss": 0.1211, "step": 8208 }, { "epoch": 0.7563458792094716, "grad_norm": 1.000115280608466, "learning_rate": 7.684145245009639e-07, "loss": 0.1332, "step": 8209 }, { "epoch": 0.7564380153867416, "grad_norm": 0.9401494439330383, "learning_rate": 7.678651392466824e-07, "loss": 0.1177, "step": 8210 }, { "epoch": 0.7565301515640116, "grad_norm": 0.9091567374279103, "learning_rate": 7.6731591482131e-07, "loss": 0.1197, "step": 8211 }, { "epoch": 0.7566222877412816, "grad_norm": 0.9063596577785816, "learning_rate": 7.667668512758414e-07, "loss": 0.1129, "step": 8212 }, { "epoch": 0.7567144239185516, "grad_norm": 0.9020032849310663, "learning_rate": 7.662179486612561e-07, "loss": 0.1158, "step": 8213 }, { "epoch": 0.7568065600958216, "grad_norm": 0.9373934933001632, "learning_rate": 7.656692070285212e-07, "loss": 0.127, "step": 8214 }, { "epoch": 0.7568986962730916, "grad_norm": 0.9687990581181442, "learning_rate": 7.651206264285871e-07, "loss": 0.1314, "step": 8215 }, { "epoch": 0.7569908324503616, "grad_norm": 0.9003439445100029, "learning_rate": 7.645722069123904e-07, "loss": 0.118, "step": 8216 }, { "epoch": 0.7570829686276317, "grad_norm": 0.9023461782986097, "learning_rate": 7.640239485308506e-07, "loss": 0.1108, "step": 8217 }, { "epoch": 0.7571751048049017, "grad_norm": 0.9496536924462029, "learning_rate": 7.634758513348737e-07, "loss": 0.1242, "step": 8218 }, { "epoch": 0.7572672409821717, "grad_norm": 0.9003447933629831, "learning_rate": 7.629279153753508e-07, "loss": 0.1134, "step": 8219 }, { "epoch": 0.7573593771594417, "grad_norm": 0.9717119656589931, "learning_rate": 7.623801407031586e-07, "loss": 0.1257, "step": 8220 }, { "epoch": 0.7574515133367117, "grad_norm": 0.9319051148074415, "learning_rate": 7.618325273691565e-07, "loss": 0.1285, "step": 8221 }, { "epoch": 0.7575436495139817, "grad_norm": 0.898820556761226, "learning_rate": 7.612850754241921e-07, "loss": 0.1191, "step": 8222 }, { "epoch": 0.7576357856912517, "grad_norm": 0.9545287362385483, "learning_rate": 7.607377849190947e-07, "loss": 0.129, "step": 8223 }, { "epoch": 0.7577279218685217, "grad_norm": 0.9509213799133734, "learning_rate": 7.601906559046824e-07, "loss": 0.1252, "step": 8224 }, { "epoch": 0.7578200580457917, "grad_norm": 0.99532066898558, "learning_rate": 7.596436884317537e-07, "loss": 0.1418, "step": 8225 }, { "epoch": 0.7579121942230617, "grad_norm": 0.9568231647521277, "learning_rate": 7.590968825510958e-07, "loss": 0.1316, "step": 8226 }, { "epoch": 0.7580043304003317, "grad_norm": 0.9676381668975514, "learning_rate": 7.585502383134807e-07, "loss": 0.1246, "step": 8227 }, { "epoch": 0.7580964665776017, "grad_norm": 0.9686951983539297, "learning_rate": 7.580037557696634e-07, "loss": 0.1281, "step": 8228 }, { "epoch": 0.7581886027548717, "grad_norm": 0.9575475261171695, "learning_rate": 7.574574349703839e-07, "loss": 0.1277, "step": 8229 }, { "epoch": 0.7582807389321417, "grad_norm": 0.966501487465626, "learning_rate": 7.569112759663693e-07, "loss": 0.1169, "step": 8230 }, { "epoch": 0.7583728751094118, "grad_norm": 0.9650356114616608, "learning_rate": 7.5636527880833e-07, "loss": 0.1259, "step": 8231 }, { "epoch": 0.7584650112866818, "grad_norm": 0.9896181093613594, "learning_rate": 7.558194435469634e-07, "loss": 0.1325, "step": 8232 }, { "epoch": 0.7585571474639518, "grad_norm": 0.9215878238578664, "learning_rate": 7.55273770232949e-07, "loss": 0.1194, "step": 8233 }, { "epoch": 0.7586492836412217, "grad_norm": 0.8898782537530946, "learning_rate": 7.547282589169519e-07, "loss": 0.1212, "step": 8234 }, { "epoch": 0.7587414198184917, "grad_norm": 0.961252642668289, "learning_rate": 7.541829096496239e-07, "loss": 0.1148, "step": 8235 }, { "epoch": 0.7588335559957617, "grad_norm": 0.978911061379541, "learning_rate": 7.536377224816008e-07, "loss": 0.1273, "step": 8236 }, { "epoch": 0.7589256921730317, "grad_norm": 0.9326585979443505, "learning_rate": 7.530926974635036e-07, "loss": 0.1308, "step": 8237 }, { "epoch": 0.7590178283503017, "grad_norm": 0.999088474363938, "learning_rate": 7.525478346459369e-07, "loss": 0.1287, "step": 8238 }, { "epoch": 0.7591099645275717, "grad_norm": 0.9697618917331947, "learning_rate": 7.520031340794926e-07, "loss": 0.131, "step": 8239 }, { "epoch": 0.7592021007048417, "grad_norm": 0.9103230425606077, "learning_rate": 7.514585958147444e-07, "loss": 0.1231, "step": 8240 }, { "epoch": 0.7592942368821117, "grad_norm": 0.9963197904910523, "learning_rate": 7.509142199022545e-07, "loss": 0.1251, "step": 8241 }, { "epoch": 0.7593863730593817, "grad_norm": 0.9044203026817673, "learning_rate": 7.50370006392567e-07, "loss": 0.1167, "step": 8242 }, { "epoch": 0.7594785092366517, "grad_norm": 0.9102232962101179, "learning_rate": 7.498259553362128e-07, "loss": 0.1197, "step": 8243 }, { "epoch": 0.7595706454139218, "grad_norm": 0.9331151108211787, "learning_rate": 7.492820667837075e-07, "loss": 0.1209, "step": 8244 }, { "epoch": 0.7596627815911918, "grad_norm": 0.9581217043945808, "learning_rate": 7.487383407855508e-07, "loss": 0.1313, "step": 8245 }, { "epoch": 0.7597549177684618, "grad_norm": 0.859473471190459, "learning_rate": 7.481947773922269e-07, "loss": 0.1056, "step": 8246 }, { "epoch": 0.7598470539457318, "grad_norm": 0.906385166241462, "learning_rate": 7.476513766542065e-07, "loss": 0.1188, "step": 8247 }, { "epoch": 0.7599391901230018, "grad_norm": 0.9363292229597777, "learning_rate": 7.471081386219442e-07, "loss": 0.1308, "step": 8248 }, { "epoch": 0.7600313263002718, "grad_norm": 0.9867804550389615, "learning_rate": 7.465650633458807e-07, "loss": 0.139, "step": 8249 }, { "epoch": 0.7601234624775418, "grad_norm": 0.9925921139578151, "learning_rate": 7.460221508764398e-07, "loss": 0.124, "step": 8250 }, { "epoch": 0.7602155986548118, "grad_norm": 0.9089636015164811, "learning_rate": 7.454794012640301e-07, "loss": 0.1086, "step": 8251 }, { "epoch": 0.7603077348320818, "grad_norm": 0.8845980297368551, "learning_rate": 7.449368145590469e-07, "loss": 0.1212, "step": 8252 }, { "epoch": 0.7603998710093518, "grad_norm": 0.9422456456596634, "learning_rate": 7.443943908118703e-07, "loss": 0.1256, "step": 8253 }, { "epoch": 0.7604920071866218, "grad_norm": 0.9566109791919529, "learning_rate": 7.438521300728624e-07, "loss": 0.1354, "step": 8254 }, { "epoch": 0.7605841433638918, "grad_norm": 0.9351359065136909, "learning_rate": 7.433100323923742e-07, "loss": 0.1264, "step": 8255 }, { "epoch": 0.7606762795411618, "grad_norm": 0.9693764918733778, "learning_rate": 7.427680978207378e-07, "loss": 0.1195, "step": 8256 }, { "epoch": 0.7607684157184318, "grad_norm": 0.903810912945681, "learning_rate": 7.422263264082732e-07, "loss": 0.1026, "step": 8257 }, { "epoch": 0.7608605518957019, "grad_norm": 0.9003919306304144, "learning_rate": 7.416847182052825e-07, "loss": 0.1048, "step": 8258 }, { "epoch": 0.7609526880729719, "grad_norm": 0.9647571730317374, "learning_rate": 7.411432732620552e-07, "loss": 0.121, "step": 8259 }, { "epoch": 0.7610448242502419, "grad_norm": 0.9315295549134404, "learning_rate": 7.406019916288651e-07, "loss": 0.1267, "step": 8260 }, { "epoch": 0.7611369604275119, "grad_norm": 0.8841359303961115, "learning_rate": 7.400608733559692e-07, "loss": 0.1151, "step": 8261 }, { "epoch": 0.7612290966047819, "grad_norm": 0.9333795268076007, "learning_rate": 7.395199184936099e-07, "loss": 0.1235, "step": 8262 }, { "epoch": 0.7613212327820519, "grad_norm": 1.0032582232476528, "learning_rate": 7.389791270920158e-07, "loss": 0.1182, "step": 8263 }, { "epoch": 0.7614133689593219, "grad_norm": 1.008498522600006, "learning_rate": 7.38438499201399e-07, "loss": 0.1416, "step": 8264 }, { "epoch": 0.7615055051365919, "grad_norm": 0.9695756514176418, "learning_rate": 7.378980348719581e-07, "loss": 0.1188, "step": 8265 }, { "epoch": 0.7615976413138619, "grad_norm": 0.9189796715543543, "learning_rate": 7.373577341538742e-07, "loss": 0.1153, "step": 8266 }, { "epoch": 0.7616897774911319, "grad_norm": 0.9377850605107226, "learning_rate": 7.368175970973138e-07, "loss": 0.11, "step": 8267 }, { "epoch": 0.7617819136684019, "grad_norm": 0.9201398168024537, "learning_rate": 7.362776237524291e-07, "loss": 0.1197, "step": 8268 }, { "epoch": 0.7618740498456719, "grad_norm": 1.072128024202621, "learning_rate": 7.357378141693569e-07, "loss": 0.1197, "step": 8269 }, { "epoch": 0.7619661860229419, "grad_norm": 0.9460766863322817, "learning_rate": 7.351981683982193e-07, "loss": 0.1239, "step": 8270 }, { "epoch": 0.762058322200212, "grad_norm": 0.9692369089181001, "learning_rate": 7.346586864891217e-07, "loss": 0.127, "step": 8271 }, { "epoch": 0.762150458377482, "grad_norm": 0.8880843044237137, "learning_rate": 7.341193684921541e-07, "loss": 0.1118, "step": 8272 }, { "epoch": 0.762242594554752, "grad_norm": 0.926515471093357, "learning_rate": 7.335802144573933e-07, "loss": 0.1177, "step": 8273 }, { "epoch": 0.762334730732022, "grad_norm": 0.9482733992199582, "learning_rate": 7.330412244349005e-07, "loss": 0.1163, "step": 8274 }, { "epoch": 0.762426866909292, "grad_norm": 0.9582116484025304, "learning_rate": 7.325023984747195e-07, "loss": 0.1334, "step": 8275 }, { "epoch": 0.762519003086562, "grad_norm": 0.9336639018459388, "learning_rate": 7.319637366268817e-07, "loss": 0.1225, "step": 8276 }, { "epoch": 0.762611139263832, "grad_norm": 0.9118007724591288, "learning_rate": 7.314252389414003e-07, "loss": 0.1131, "step": 8277 }, { "epoch": 0.762703275441102, "grad_norm": 0.9695491078378007, "learning_rate": 7.308869054682769e-07, "loss": 0.1247, "step": 8278 }, { "epoch": 0.7627954116183719, "grad_norm": 0.9668242101632564, "learning_rate": 7.303487362574938e-07, "loss": 0.1261, "step": 8279 }, { "epoch": 0.7628875477956419, "grad_norm": 1.0067394909831462, "learning_rate": 7.298107313590208e-07, "loss": 0.1337, "step": 8280 }, { "epoch": 0.7629796839729119, "grad_norm": 0.9437521576960384, "learning_rate": 7.292728908228127e-07, "loss": 0.1247, "step": 8281 }, { "epoch": 0.7630718201501819, "grad_norm": 0.945932213840081, "learning_rate": 7.287352146988075e-07, "loss": 0.1222, "step": 8282 }, { "epoch": 0.7631639563274519, "grad_norm": 0.8918765975327134, "learning_rate": 7.281977030369275e-07, "loss": 0.1031, "step": 8283 }, { "epoch": 0.7632560925047219, "grad_norm": 0.9158648763429635, "learning_rate": 7.276603558870812e-07, "loss": 0.1208, "step": 8284 }, { "epoch": 0.763348228681992, "grad_norm": 0.9292601320925606, "learning_rate": 7.271231732991619e-07, "loss": 0.1164, "step": 8285 }, { "epoch": 0.763440364859262, "grad_norm": 0.8994955400839384, "learning_rate": 7.265861553230472e-07, "loss": 0.1222, "step": 8286 }, { "epoch": 0.763532501036532, "grad_norm": 0.951619859318042, "learning_rate": 7.26049302008599e-07, "loss": 0.1282, "step": 8287 }, { "epoch": 0.763624637213802, "grad_norm": 0.988325789476459, "learning_rate": 7.255126134056631e-07, "loss": 0.1399, "step": 8288 }, { "epoch": 0.763716773391072, "grad_norm": 1.055346800786715, "learning_rate": 7.249760895640723e-07, "loss": 0.1243, "step": 8289 }, { "epoch": 0.763808909568342, "grad_norm": 0.9044247369547711, "learning_rate": 7.244397305336423e-07, "loss": 0.1155, "step": 8290 }, { "epoch": 0.763901045745612, "grad_norm": 0.9472862151962139, "learning_rate": 7.239035363641752e-07, "loss": 0.1219, "step": 8291 }, { "epoch": 0.763993181922882, "grad_norm": 0.9502049407566469, "learning_rate": 7.233675071054564e-07, "loss": 0.1287, "step": 8292 }, { "epoch": 0.764085318100152, "grad_norm": 0.9285316003020824, "learning_rate": 7.228316428072546e-07, "loss": 0.1236, "step": 8293 }, { "epoch": 0.764177454277422, "grad_norm": 0.9390689107258569, "learning_rate": 7.222959435193258e-07, "loss": 0.1158, "step": 8294 }, { "epoch": 0.764269590454692, "grad_norm": 0.95789176362226, "learning_rate": 7.21760409291411e-07, "loss": 0.1343, "step": 8295 }, { "epoch": 0.764361726631962, "grad_norm": 0.9941924628392527, "learning_rate": 7.212250401732329e-07, "loss": 0.127, "step": 8296 }, { "epoch": 0.764453862809232, "grad_norm": 0.9252169197213751, "learning_rate": 7.206898362145021e-07, "loss": 0.1196, "step": 8297 }, { "epoch": 0.764545998986502, "grad_norm": 0.8762698700489192, "learning_rate": 7.201547974649104e-07, "loss": 0.1122, "step": 8298 }, { "epoch": 0.7646381351637721, "grad_norm": 0.9739201895395896, "learning_rate": 7.196199239741383e-07, "loss": 0.1236, "step": 8299 }, { "epoch": 0.7647302713410421, "grad_norm": 0.9127234805633828, "learning_rate": 7.190852157918468e-07, "loss": 0.1167, "step": 8300 }, { "epoch": 0.7648224075183121, "grad_norm": 0.9358931953978479, "learning_rate": 7.185506729676849e-07, "loss": 0.1136, "step": 8301 }, { "epoch": 0.7649145436955821, "grad_norm": 0.9726783433144469, "learning_rate": 7.180162955512856e-07, "loss": 0.1223, "step": 8302 }, { "epoch": 0.7650066798728521, "grad_norm": 0.9370684221290889, "learning_rate": 7.174820835922649e-07, "loss": 0.1136, "step": 8303 }, { "epoch": 0.7650988160501221, "grad_norm": 0.8790475206195711, "learning_rate": 7.16948037140224e-07, "loss": 0.1164, "step": 8304 }, { "epoch": 0.7651909522273921, "grad_norm": 1.0491204399969105, "learning_rate": 7.164141562447497e-07, "loss": 0.1411, "step": 8305 }, { "epoch": 0.7652830884046621, "grad_norm": 0.9571333133358144, "learning_rate": 7.158804409554126e-07, "loss": 0.1118, "step": 8306 }, { "epoch": 0.7653752245819321, "grad_norm": 0.964770395513596, "learning_rate": 7.153468913217695e-07, "loss": 0.1277, "step": 8307 }, { "epoch": 0.7654673607592021, "grad_norm": 0.9070299902224167, "learning_rate": 7.148135073933599e-07, "loss": 0.1164, "step": 8308 }, { "epoch": 0.7655594969364721, "grad_norm": 0.941404523811673, "learning_rate": 7.142802892197071e-07, "loss": 0.1253, "step": 8309 }, { "epoch": 0.7656516331137421, "grad_norm": 0.9066252176601356, "learning_rate": 7.137472368503217e-07, "loss": 0.1269, "step": 8310 }, { "epoch": 0.7657437692910121, "grad_norm": 0.9096259039649669, "learning_rate": 7.132143503346986e-07, "loss": 0.1159, "step": 8311 }, { "epoch": 0.7658359054682822, "grad_norm": 0.9385880965115956, "learning_rate": 7.126816297223147e-07, "loss": 0.1301, "step": 8312 }, { "epoch": 0.7659280416455522, "grad_norm": 0.9567068424142435, "learning_rate": 7.121490750626342e-07, "loss": 0.1231, "step": 8313 }, { "epoch": 0.7660201778228222, "grad_norm": 0.9173460135425626, "learning_rate": 7.116166864051038e-07, "loss": 0.1137, "step": 8314 }, { "epoch": 0.7661123140000922, "grad_norm": 0.9294880243189472, "learning_rate": 7.110844637991574e-07, "loss": 0.1232, "step": 8315 }, { "epoch": 0.7662044501773622, "grad_norm": 0.9152657770262788, "learning_rate": 7.105524072942105e-07, "loss": 0.1164, "step": 8316 }, { "epoch": 0.7662965863546322, "grad_norm": 0.9396243490752815, "learning_rate": 7.100205169396649e-07, "loss": 0.1227, "step": 8317 }, { "epoch": 0.7663887225319022, "grad_norm": 0.9206804869509456, "learning_rate": 7.09488792784907e-07, "loss": 0.1218, "step": 8318 }, { "epoch": 0.7664808587091722, "grad_norm": 0.935610637811038, "learning_rate": 7.089572348793081e-07, "loss": 0.1202, "step": 8319 }, { "epoch": 0.7665729948864421, "grad_norm": 0.9489679146040931, "learning_rate": 7.084258432722227e-07, "loss": 0.1177, "step": 8320 }, { "epoch": 0.7666651310637121, "grad_norm": 0.9163551987157174, "learning_rate": 7.078946180129898e-07, "loss": 0.1164, "step": 8321 }, { "epoch": 0.7667572672409821, "grad_norm": 0.8913692547879215, "learning_rate": 7.073635591509345e-07, "loss": 0.1064, "step": 8322 }, { "epoch": 0.7668494034182521, "grad_norm": 0.9434948927728509, "learning_rate": 7.068326667353659e-07, "loss": 0.1205, "step": 8323 }, { "epoch": 0.7669415395955221, "grad_norm": 0.9222390272621265, "learning_rate": 7.063019408155777e-07, "loss": 0.1213, "step": 8324 }, { "epoch": 0.7670336757727921, "grad_norm": 0.9288456312089602, "learning_rate": 7.057713814408473e-07, "loss": 0.1217, "step": 8325 }, { "epoch": 0.7671258119500622, "grad_norm": 0.9694355795017486, "learning_rate": 7.052409886604364e-07, "loss": 0.1292, "step": 8326 }, { "epoch": 0.7672179481273322, "grad_norm": 1.0037949780961066, "learning_rate": 7.04710762523593e-07, "loss": 0.1229, "step": 8327 }, { "epoch": 0.7673100843046022, "grad_norm": 1.0245295525799747, "learning_rate": 7.041807030795495e-07, "loss": 0.1302, "step": 8328 }, { "epoch": 0.7674022204818722, "grad_norm": 0.9915072745998507, "learning_rate": 7.036508103775199e-07, "loss": 0.1288, "step": 8329 }, { "epoch": 0.7674943566591422, "grad_norm": 0.9209649117333428, "learning_rate": 7.031210844667066e-07, "loss": 0.1177, "step": 8330 }, { "epoch": 0.7675864928364122, "grad_norm": 0.9777283812460724, "learning_rate": 7.025915253962934e-07, "loss": 0.1194, "step": 8331 }, { "epoch": 0.7676786290136822, "grad_norm": 0.9028823303836367, "learning_rate": 7.020621332154512e-07, "loss": 0.123, "step": 8332 }, { "epoch": 0.7677707651909522, "grad_norm": 0.8798941197971895, "learning_rate": 7.015329079733327e-07, "loss": 0.104, "step": 8333 }, { "epoch": 0.7678629013682222, "grad_norm": 0.9322675197448133, "learning_rate": 7.010038497190774e-07, "loss": 0.1159, "step": 8334 }, { "epoch": 0.7679550375454922, "grad_norm": 1.0339398169724536, "learning_rate": 7.004749585018089e-07, "loss": 0.1274, "step": 8335 }, { "epoch": 0.7680471737227622, "grad_norm": 0.9907312337014706, "learning_rate": 6.999462343706339e-07, "loss": 0.1261, "step": 8336 }, { "epoch": 0.7681393099000322, "grad_norm": 0.9265113005572841, "learning_rate": 6.994176773746445e-07, "loss": 0.1127, "step": 8337 }, { "epoch": 0.7682314460773022, "grad_norm": 0.8763629187072877, "learning_rate": 6.988892875629172e-07, "loss": 0.1123, "step": 8338 }, { "epoch": 0.7683235822545723, "grad_norm": 0.9473048227249817, "learning_rate": 6.983610649845136e-07, "loss": 0.1266, "step": 8339 }, { "epoch": 0.7684157184318423, "grad_norm": 0.9376358861988066, "learning_rate": 6.978330096884794e-07, "loss": 0.118, "step": 8340 }, { "epoch": 0.7685078546091123, "grad_norm": 0.9698111754881962, "learning_rate": 6.973051217238444e-07, "loss": 0.1259, "step": 8341 }, { "epoch": 0.7685999907863823, "grad_norm": 0.912876262065055, "learning_rate": 6.967774011396222e-07, "loss": 0.1077, "step": 8342 }, { "epoch": 0.7686921269636523, "grad_norm": 0.9663238447952306, "learning_rate": 6.962498479848124e-07, "loss": 0.1203, "step": 8343 }, { "epoch": 0.7687842631409223, "grad_norm": 0.8994434460789928, "learning_rate": 6.957224623083989e-07, "loss": 0.1223, "step": 8344 }, { "epoch": 0.7688763993181923, "grad_norm": 0.8717941859314399, "learning_rate": 6.951952441593482e-07, "loss": 0.1125, "step": 8345 }, { "epoch": 0.7689685354954623, "grad_norm": 0.8812348398963182, "learning_rate": 6.946681935866143e-07, "loss": 0.1082, "step": 8346 }, { "epoch": 0.7690606716727323, "grad_norm": 1.0019169701459543, "learning_rate": 6.941413106391321e-07, "loss": 0.128, "step": 8347 }, { "epoch": 0.7691528078500023, "grad_norm": 0.9169717950582553, "learning_rate": 6.936145953658233e-07, "loss": 0.1194, "step": 8348 }, { "epoch": 0.7692449440272723, "grad_norm": 0.9331417687311467, "learning_rate": 6.930880478155946e-07, "loss": 0.1198, "step": 8349 }, { "epoch": 0.7693370802045423, "grad_norm": 0.8913529397204782, "learning_rate": 6.925616680373346e-07, "loss": 0.1165, "step": 8350 }, { "epoch": 0.7694292163818123, "grad_norm": 0.889454311489956, "learning_rate": 6.920354560799189e-07, "loss": 0.1176, "step": 8351 }, { "epoch": 0.7695213525590823, "grad_norm": 0.9482997599159537, "learning_rate": 6.915094119922048e-07, "loss": 0.1333, "step": 8352 }, { "epoch": 0.7696134887363524, "grad_norm": 0.9522924476603231, "learning_rate": 6.909835358230372e-07, "loss": 0.1208, "step": 8353 }, { "epoch": 0.7697056249136224, "grad_norm": 0.9205945084224799, "learning_rate": 6.904578276212423e-07, "loss": 0.1298, "step": 8354 }, { "epoch": 0.7697977610908924, "grad_norm": 0.9486688534035198, "learning_rate": 6.899322874356329e-07, "loss": 0.1303, "step": 8355 }, { "epoch": 0.7698898972681624, "grad_norm": 0.9265740419588309, "learning_rate": 6.89406915315006e-07, "loss": 0.1253, "step": 8356 }, { "epoch": 0.7699820334454324, "grad_norm": 0.9274708447215646, "learning_rate": 6.888817113081419e-07, "loss": 0.1262, "step": 8357 }, { "epoch": 0.7700741696227024, "grad_norm": 0.9727274919868594, "learning_rate": 6.883566754638052e-07, "loss": 0.1318, "step": 8358 }, { "epoch": 0.7701663057999724, "grad_norm": 0.8822267025867091, "learning_rate": 6.878318078307461e-07, "loss": 0.1122, "step": 8359 }, { "epoch": 0.7702584419772424, "grad_norm": 0.9559628727055969, "learning_rate": 6.873071084576985e-07, "loss": 0.1169, "step": 8360 }, { "epoch": 0.7703505781545124, "grad_norm": 0.9606378148147342, "learning_rate": 6.86782577393382e-07, "loss": 0.1277, "step": 8361 }, { "epoch": 0.7704427143317824, "grad_norm": 0.9531820149960578, "learning_rate": 6.862582146864982e-07, "loss": 0.1244, "step": 8362 }, { "epoch": 0.7705348505090523, "grad_norm": 0.90167924888066, "learning_rate": 6.857340203857335e-07, "loss": 0.1117, "step": 8363 }, { "epoch": 0.7706269866863223, "grad_norm": 0.947891508516917, "learning_rate": 6.852099945397603e-07, "loss": 0.1195, "step": 8364 }, { "epoch": 0.7707191228635923, "grad_norm": 0.9378585554874798, "learning_rate": 6.846861371972355e-07, "loss": 0.1244, "step": 8365 }, { "epoch": 0.7708112590408623, "grad_norm": 0.9705047499227291, "learning_rate": 6.841624484067971e-07, "loss": 0.1309, "step": 8366 }, { "epoch": 0.7709033952181324, "grad_norm": 0.8928153670107007, "learning_rate": 6.836389282170716e-07, "loss": 0.1127, "step": 8367 }, { "epoch": 0.7709955313954024, "grad_norm": 0.9662324933740548, "learning_rate": 6.831155766766665e-07, "loss": 0.1336, "step": 8368 }, { "epoch": 0.7710876675726724, "grad_norm": 0.8815268225850014, "learning_rate": 6.825923938341761e-07, "loss": 0.108, "step": 8369 }, { "epoch": 0.7711798037499424, "grad_norm": 0.9841686059201685, "learning_rate": 6.820693797381769e-07, "loss": 0.1247, "step": 8370 }, { "epoch": 0.7712719399272124, "grad_norm": 0.9536490084886785, "learning_rate": 6.815465344372316e-07, "loss": 0.1251, "step": 8371 }, { "epoch": 0.7713640761044824, "grad_norm": 0.9289453206959042, "learning_rate": 6.81023857979887e-07, "loss": 0.1124, "step": 8372 }, { "epoch": 0.7714562122817524, "grad_norm": 0.9312998796681076, "learning_rate": 6.805013504146729e-07, "loss": 0.1187, "step": 8373 }, { "epoch": 0.7715483484590224, "grad_norm": 0.9395698669875502, "learning_rate": 6.799790117901034e-07, "loss": 0.1231, "step": 8374 }, { "epoch": 0.7716404846362924, "grad_norm": 0.9166696590685488, "learning_rate": 6.794568421546785e-07, "loss": 0.1228, "step": 8375 }, { "epoch": 0.7717326208135624, "grad_norm": 0.9550481131258569, "learning_rate": 6.78934841556882e-07, "loss": 0.1279, "step": 8376 }, { "epoch": 0.7718247569908324, "grad_norm": 0.913707817793788, "learning_rate": 6.784130100451819e-07, "loss": 0.1173, "step": 8377 }, { "epoch": 0.7719168931681024, "grad_norm": 0.9996482206708629, "learning_rate": 6.778913476680302e-07, "loss": 0.1227, "step": 8378 }, { "epoch": 0.7720090293453724, "grad_norm": 0.9288326450663615, "learning_rate": 6.773698544738619e-07, "loss": 0.1274, "step": 8379 }, { "epoch": 0.7721011655226425, "grad_norm": 0.9094294673119427, "learning_rate": 6.768485305110989e-07, "loss": 0.1158, "step": 8380 }, { "epoch": 0.7721933016999125, "grad_norm": 0.9455830954897821, "learning_rate": 6.763273758281458e-07, "loss": 0.1176, "step": 8381 }, { "epoch": 0.7722854378771825, "grad_norm": 0.9650385136922474, "learning_rate": 6.758063904733933e-07, "loss": 0.1245, "step": 8382 }, { "epoch": 0.7723775740544525, "grad_norm": 0.9541341797982005, "learning_rate": 6.752855744952136e-07, "loss": 0.1195, "step": 8383 }, { "epoch": 0.7724697102317225, "grad_norm": 0.9791283036740815, "learning_rate": 6.747649279419638e-07, "loss": 0.1189, "step": 8384 }, { "epoch": 0.7725618464089925, "grad_norm": 0.864555357722854, "learning_rate": 6.742444508619869e-07, "loss": 0.1061, "step": 8385 }, { "epoch": 0.7726539825862625, "grad_norm": 0.9755164798582352, "learning_rate": 6.737241433036101e-07, "loss": 0.1258, "step": 8386 }, { "epoch": 0.7727461187635325, "grad_norm": 0.9382934330514728, "learning_rate": 6.732040053151423e-07, "loss": 0.1226, "step": 8387 }, { "epoch": 0.7728382549408025, "grad_norm": 0.9150629750841084, "learning_rate": 6.7268403694488e-07, "loss": 0.1173, "step": 8388 }, { "epoch": 0.7729303911180725, "grad_norm": 0.9115809997858356, "learning_rate": 6.721642382411006e-07, "loss": 0.1146, "step": 8389 }, { "epoch": 0.7730225272953425, "grad_norm": 0.9670908852073896, "learning_rate": 6.716446092520696e-07, "loss": 0.1306, "step": 8390 }, { "epoch": 0.7731146634726125, "grad_norm": 0.9394456052321907, "learning_rate": 6.711251500260322e-07, "loss": 0.1175, "step": 8391 }, { "epoch": 0.7732067996498825, "grad_norm": 1.0224125514598037, "learning_rate": 6.706058606112217e-07, "loss": 0.1264, "step": 8392 }, { "epoch": 0.7732989358271525, "grad_norm": 0.9136807246539035, "learning_rate": 6.700867410558537e-07, "loss": 0.1133, "step": 8393 }, { "epoch": 0.7733910720044226, "grad_norm": 0.9579652653107388, "learning_rate": 6.695677914081303e-07, "loss": 0.1211, "step": 8394 }, { "epoch": 0.7734832081816926, "grad_norm": 0.8953201784587433, "learning_rate": 6.690490117162333e-07, "loss": 0.1177, "step": 8395 }, { "epoch": 0.7735753443589626, "grad_norm": 0.8947390913684228, "learning_rate": 6.68530402028332e-07, "loss": 0.1175, "step": 8396 }, { "epoch": 0.7736674805362326, "grad_norm": 0.930339743335267, "learning_rate": 6.680119623925804e-07, "loss": 0.1119, "step": 8397 }, { "epoch": 0.7737596167135026, "grad_norm": 0.9691742182397383, "learning_rate": 6.67493692857116e-07, "loss": 0.1232, "step": 8398 }, { "epoch": 0.7738517528907726, "grad_norm": 0.9782640805198125, "learning_rate": 6.669755934700586e-07, "loss": 0.1178, "step": 8399 }, { "epoch": 0.7739438890680426, "grad_norm": 0.946658092447457, "learning_rate": 6.664576642795153e-07, "loss": 0.1211, "step": 8400 }, { "epoch": 0.7740360252453126, "grad_norm": 0.9171463357835422, "learning_rate": 6.659399053335747e-07, "loss": 0.1158, "step": 8401 }, { "epoch": 0.7741281614225826, "grad_norm": 0.9237405035504593, "learning_rate": 6.654223166803117e-07, "loss": 0.118, "step": 8402 }, { "epoch": 0.7742202975998526, "grad_norm": 0.8654426344561063, "learning_rate": 6.649048983677834e-07, "loss": 0.1139, "step": 8403 }, { "epoch": 0.7743124337771226, "grad_norm": 0.9287901506367029, "learning_rate": 6.643876504440327e-07, "loss": 0.1165, "step": 8404 }, { "epoch": 0.7744045699543926, "grad_norm": 0.936571591755619, "learning_rate": 6.638705729570871e-07, "loss": 0.1256, "step": 8405 }, { "epoch": 0.7744967061316625, "grad_norm": 0.9037966720069851, "learning_rate": 6.633536659549558e-07, "loss": 0.1117, "step": 8406 }, { "epoch": 0.7745888423089327, "grad_norm": 0.930053506926587, "learning_rate": 6.628369294856349e-07, "loss": 0.1152, "step": 8407 }, { "epoch": 0.7746809784862027, "grad_norm": 1.0105164231758588, "learning_rate": 6.623203635971018e-07, "loss": 0.1324, "step": 8408 }, { "epoch": 0.7747731146634727, "grad_norm": 0.9280072407677732, "learning_rate": 6.618039683373209e-07, "loss": 0.1121, "step": 8409 }, { "epoch": 0.7748652508407426, "grad_norm": 0.931988772652393, "learning_rate": 6.612877437542403e-07, "loss": 0.1175, "step": 8410 }, { "epoch": 0.7749573870180126, "grad_norm": 0.9055453680389911, "learning_rate": 6.607716898957903e-07, "loss": 0.1134, "step": 8411 }, { "epoch": 0.7750495231952826, "grad_norm": 0.9570405732563926, "learning_rate": 6.602558068098864e-07, "loss": 0.126, "step": 8412 }, { "epoch": 0.7751416593725526, "grad_norm": 0.8901939988992377, "learning_rate": 6.597400945444285e-07, "loss": 0.1098, "step": 8413 }, { "epoch": 0.7752337955498226, "grad_norm": 0.9457230744765637, "learning_rate": 6.59224553147301e-07, "loss": 0.1268, "step": 8414 }, { "epoch": 0.7753259317270926, "grad_norm": 0.9090096740087552, "learning_rate": 6.587091826663728e-07, "loss": 0.1165, "step": 8415 }, { "epoch": 0.7754180679043626, "grad_norm": 0.9268957635480991, "learning_rate": 6.581939831494948e-07, "loss": 0.1165, "step": 8416 }, { "epoch": 0.7755102040816326, "grad_norm": 0.8867965628336227, "learning_rate": 6.576789546445031e-07, "loss": 0.1091, "step": 8417 }, { "epoch": 0.7756023402589026, "grad_norm": 0.9892905172706016, "learning_rate": 6.571640971992188e-07, "loss": 0.1157, "step": 8418 }, { "epoch": 0.7756944764361726, "grad_norm": 0.9380259789018441, "learning_rate": 6.566494108614471e-07, "loss": 0.1152, "step": 8419 }, { "epoch": 0.7757866126134426, "grad_norm": 0.9206879628963276, "learning_rate": 6.561348956789751e-07, "loss": 0.1111, "step": 8420 }, { "epoch": 0.7758787487907127, "grad_norm": 0.9614768658559342, "learning_rate": 6.556205516995772e-07, "loss": 0.1194, "step": 8421 }, { "epoch": 0.7759708849679827, "grad_norm": 1.0010135251033254, "learning_rate": 6.551063789710091e-07, "loss": 0.1287, "step": 8422 }, { "epoch": 0.7760630211452527, "grad_norm": 0.9381981477040692, "learning_rate": 6.545923775410129e-07, "loss": 0.1111, "step": 8423 }, { "epoch": 0.7761551573225227, "grad_norm": 0.991357248153485, "learning_rate": 6.540785474573121e-07, "loss": 0.1266, "step": 8424 }, { "epoch": 0.7762472934997927, "grad_norm": 0.9387943094150902, "learning_rate": 6.535648887676171e-07, "loss": 0.1191, "step": 8425 }, { "epoch": 0.7763394296770627, "grad_norm": 0.9860413451407051, "learning_rate": 6.530514015196218e-07, "loss": 0.1304, "step": 8426 }, { "epoch": 0.7764315658543327, "grad_norm": 0.985186213663649, "learning_rate": 6.525380857610022e-07, "loss": 0.1286, "step": 8427 }, { "epoch": 0.7765237020316027, "grad_norm": 0.9875065104492615, "learning_rate": 6.520249415394197e-07, "loss": 0.1194, "step": 8428 }, { "epoch": 0.7766158382088727, "grad_norm": 0.926501626127916, "learning_rate": 6.515119689025201e-07, "loss": 0.1156, "step": 8429 }, { "epoch": 0.7767079743861427, "grad_norm": 1.0045405628938215, "learning_rate": 6.509991678979333e-07, "loss": 0.1312, "step": 8430 }, { "epoch": 0.7768001105634127, "grad_norm": 0.9431212698157658, "learning_rate": 6.504865385732734e-07, "loss": 0.1197, "step": 8431 }, { "epoch": 0.7768922467406827, "grad_norm": 0.9141056582506759, "learning_rate": 6.499740809761373e-07, "loss": 0.1198, "step": 8432 }, { "epoch": 0.7769843829179527, "grad_norm": 0.9006244298850004, "learning_rate": 6.494617951541063e-07, "loss": 0.1112, "step": 8433 }, { "epoch": 0.7770765190952227, "grad_norm": 1.0513186465071094, "learning_rate": 6.489496811547468e-07, "loss": 0.1287, "step": 8434 }, { "epoch": 0.7771686552724928, "grad_norm": 0.9398083516416804, "learning_rate": 6.484377390256086e-07, "loss": 0.1223, "step": 8435 }, { "epoch": 0.7772607914497628, "grad_norm": 0.8917848864473544, "learning_rate": 6.479259688142261e-07, "loss": 0.1034, "step": 8436 }, { "epoch": 0.7773529276270328, "grad_norm": 0.9237163314143305, "learning_rate": 6.474143705681171e-07, "loss": 0.113, "step": 8437 }, { "epoch": 0.7774450638043028, "grad_norm": 0.8793476209899715, "learning_rate": 6.469029443347821e-07, "loss": 0.1095, "step": 8438 }, { "epoch": 0.7775371999815728, "grad_norm": 0.9637901601407782, "learning_rate": 6.463916901617084e-07, "loss": 0.1255, "step": 8439 }, { "epoch": 0.7776293361588428, "grad_norm": 0.9257448847903722, "learning_rate": 6.458806080963664e-07, "loss": 0.1217, "step": 8440 }, { "epoch": 0.7777214723361128, "grad_norm": 0.973025415024205, "learning_rate": 6.453696981862087e-07, "loss": 0.119, "step": 8441 }, { "epoch": 0.7778136085133828, "grad_norm": 0.9278105107397764, "learning_rate": 6.448589604786748e-07, "loss": 0.1176, "step": 8442 }, { "epoch": 0.7779057446906528, "grad_norm": 0.9748580678259127, "learning_rate": 6.443483950211854e-07, "loss": 0.1338, "step": 8443 }, { "epoch": 0.7779978808679228, "grad_norm": 0.9426796776933819, "learning_rate": 6.438380018611481e-07, "loss": 0.1161, "step": 8444 }, { "epoch": 0.7780900170451928, "grad_norm": 0.9374426050170451, "learning_rate": 6.433277810459512e-07, "loss": 0.126, "step": 8445 }, { "epoch": 0.7781821532224628, "grad_norm": 0.9156606080642972, "learning_rate": 6.428177326229698e-07, "loss": 0.1196, "step": 8446 }, { "epoch": 0.7782742893997328, "grad_norm": 0.9785305397305804, "learning_rate": 6.423078566395624e-07, "loss": 0.1278, "step": 8447 }, { "epoch": 0.7783664255770029, "grad_norm": 0.9472568149749723, "learning_rate": 6.417981531430705e-07, "loss": 0.1195, "step": 8448 }, { "epoch": 0.7784585617542729, "grad_norm": 0.9267730257758037, "learning_rate": 6.412886221808193e-07, "loss": 0.1217, "step": 8449 }, { "epoch": 0.7785506979315429, "grad_norm": 0.9245783333379712, "learning_rate": 6.407792638001195e-07, "loss": 0.118, "step": 8450 }, { "epoch": 0.7786428341088129, "grad_norm": 0.8951265439753134, "learning_rate": 6.402700780482651e-07, "loss": 0.1143, "step": 8451 }, { "epoch": 0.7787349702860829, "grad_norm": 0.9733184947776784, "learning_rate": 6.39761064972535e-07, "loss": 0.1192, "step": 8452 }, { "epoch": 0.7788271064633528, "grad_norm": 0.9653018548665765, "learning_rate": 6.392522246201902e-07, "loss": 0.1194, "step": 8453 }, { "epoch": 0.7789192426406228, "grad_norm": 0.9366809581149949, "learning_rate": 6.387435570384759e-07, "loss": 0.1185, "step": 8454 }, { "epoch": 0.7790113788178928, "grad_norm": 0.9412472670526044, "learning_rate": 6.382350622746225e-07, "loss": 0.1257, "step": 8455 }, { "epoch": 0.7791035149951628, "grad_norm": 0.9377271262047409, "learning_rate": 6.377267403758447e-07, "loss": 0.1122, "step": 8456 }, { "epoch": 0.7791956511724328, "grad_norm": 0.9410613069969594, "learning_rate": 6.372185913893389e-07, "loss": 0.1252, "step": 8457 }, { "epoch": 0.7792877873497028, "grad_norm": 1.0330182142744149, "learning_rate": 6.367106153622879e-07, "loss": 0.1228, "step": 8458 }, { "epoch": 0.7793799235269728, "grad_norm": 0.9656816711177176, "learning_rate": 6.362028123418562e-07, "loss": 0.1174, "step": 8459 }, { "epoch": 0.7794720597042428, "grad_norm": 0.9552540658961965, "learning_rate": 6.356951823751947e-07, "loss": 0.1094, "step": 8460 }, { "epoch": 0.7795641958815128, "grad_norm": 0.8951826862510767, "learning_rate": 6.351877255094352e-07, "loss": 0.1064, "step": 8461 }, { "epoch": 0.7796563320587829, "grad_norm": 0.8842323676196631, "learning_rate": 6.346804417916963e-07, "loss": 0.1091, "step": 8462 }, { "epoch": 0.7797484682360529, "grad_norm": 1.021858302504246, "learning_rate": 6.341733312690798e-07, "loss": 0.1224, "step": 8463 }, { "epoch": 0.7798406044133229, "grad_norm": 0.9373733957984577, "learning_rate": 6.336663939886695e-07, "loss": 0.1152, "step": 8464 }, { "epoch": 0.7799327405905929, "grad_norm": 0.9418487894872865, "learning_rate": 6.331596299975362e-07, "loss": 0.1287, "step": 8465 }, { "epoch": 0.7800248767678629, "grad_norm": 0.9485846360794888, "learning_rate": 6.326530393427316e-07, "loss": 0.1197, "step": 8466 }, { "epoch": 0.7801170129451329, "grad_norm": 0.9556435873882474, "learning_rate": 6.321466220712929e-07, "loss": 0.1156, "step": 8467 }, { "epoch": 0.7802091491224029, "grad_norm": 0.8926722100933544, "learning_rate": 6.316403782302416e-07, "loss": 0.113, "step": 8468 }, { "epoch": 0.7803012852996729, "grad_norm": 0.9739250990079734, "learning_rate": 6.311343078665835e-07, "loss": 0.1289, "step": 8469 }, { "epoch": 0.7803934214769429, "grad_norm": 0.988398143690067, "learning_rate": 6.306284110273047e-07, "loss": 0.1248, "step": 8470 }, { "epoch": 0.7804855576542129, "grad_norm": 0.9728431078034094, "learning_rate": 6.301226877593794e-07, "loss": 0.1162, "step": 8471 }, { "epoch": 0.7805776938314829, "grad_norm": 0.9367164880906897, "learning_rate": 6.296171381097635e-07, "loss": 0.1259, "step": 8472 }, { "epoch": 0.7806698300087529, "grad_norm": 0.914892332720901, "learning_rate": 6.291117621253984e-07, "loss": 0.1188, "step": 8473 }, { "epoch": 0.7807619661860229, "grad_norm": 0.9445604136684088, "learning_rate": 6.286065598532065e-07, "loss": 0.1215, "step": 8474 }, { "epoch": 0.780854102363293, "grad_norm": 0.971002437022487, "learning_rate": 6.281015313400981e-07, "loss": 0.127, "step": 8475 }, { "epoch": 0.780946238540563, "grad_norm": 0.9458980424013934, "learning_rate": 6.27596676632963e-07, "loss": 0.1233, "step": 8476 }, { "epoch": 0.781038374717833, "grad_norm": 0.9575720735157595, "learning_rate": 6.270919957786789e-07, "loss": 0.1276, "step": 8477 }, { "epoch": 0.781130510895103, "grad_norm": 0.9043773140494776, "learning_rate": 6.265874888241035e-07, "loss": 0.1148, "step": 8478 }, { "epoch": 0.781222647072373, "grad_norm": 0.9233761176859706, "learning_rate": 6.260831558160818e-07, "loss": 0.1167, "step": 8479 }, { "epoch": 0.781314783249643, "grad_norm": 0.98013498969769, "learning_rate": 6.255789968014411e-07, "loss": 0.1319, "step": 8480 }, { "epoch": 0.781406919426913, "grad_norm": 0.9404213995261698, "learning_rate": 6.250750118269927e-07, "loss": 0.1244, "step": 8481 }, { "epoch": 0.781499055604183, "grad_norm": 0.9519949447236915, "learning_rate": 6.245712009395303e-07, "loss": 0.1134, "step": 8482 }, { "epoch": 0.781591191781453, "grad_norm": 0.9317461820724098, "learning_rate": 6.240675641858335e-07, "loss": 0.1192, "step": 8483 }, { "epoch": 0.781683327958723, "grad_norm": 0.9129871386527313, "learning_rate": 6.235641016126653e-07, "loss": 0.1166, "step": 8484 }, { "epoch": 0.781775464135993, "grad_norm": 1.048428062985597, "learning_rate": 6.230608132667732e-07, "loss": 0.1334, "step": 8485 }, { "epoch": 0.781867600313263, "grad_norm": 0.9451185634087925, "learning_rate": 6.225576991948865e-07, "loss": 0.1149, "step": 8486 }, { "epoch": 0.781959736490533, "grad_norm": 0.99581861616513, "learning_rate": 6.220547594437188e-07, "loss": 0.1187, "step": 8487 }, { "epoch": 0.782051872667803, "grad_norm": 1.0278512933151998, "learning_rate": 6.215519940599687e-07, "loss": 0.1317, "step": 8488 }, { "epoch": 0.7821440088450731, "grad_norm": 0.9460848053133615, "learning_rate": 6.210494030903188e-07, "loss": 0.1257, "step": 8489 }, { "epoch": 0.7822361450223431, "grad_norm": 0.9789169783690399, "learning_rate": 6.205469865814334e-07, "loss": 0.1198, "step": 8490 }, { "epoch": 0.7823282811996131, "grad_norm": 0.9355875645829241, "learning_rate": 6.200447445799631e-07, "loss": 0.1239, "step": 8491 }, { "epoch": 0.7824204173768831, "grad_norm": 0.931025680256216, "learning_rate": 6.195426771325402e-07, "loss": 0.1197, "step": 8492 }, { "epoch": 0.7825125535541531, "grad_norm": 0.9038272461792805, "learning_rate": 6.190407842857818e-07, "loss": 0.109, "step": 8493 }, { "epoch": 0.782604689731423, "grad_norm": 0.9291283890197121, "learning_rate": 6.1853906608629e-07, "loss": 0.1144, "step": 8494 }, { "epoch": 0.782696825908693, "grad_norm": 0.9313673945332007, "learning_rate": 6.180375225806475e-07, "loss": 0.1174, "step": 8495 }, { "epoch": 0.782788962085963, "grad_norm": 0.8837655167803492, "learning_rate": 6.175361538154243e-07, "loss": 0.1078, "step": 8496 }, { "epoch": 0.782881098263233, "grad_norm": 0.9261810944905562, "learning_rate": 6.170349598371711e-07, "loss": 0.1135, "step": 8497 }, { "epoch": 0.782973234440503, "grad_norm": 0.9705748971066371, "learning_rate": 6.165339406924253e-07, "loss": 0.1284, "step": 8498 }, { "epoch": 0.783065370617773, "grad_norm": 0.931338790555389, "learning_rate": 6.16033096427705e-07, "loss": 0.1156, "step": 8499 }, { "epoch": 0.783157506795043, "grad_norm": 0.9412160383814583, "learning_rate": 6.155324270895144e-07, "loss": 0.1259, "step": 8500 }, { "epoch": 0.783157506795043, "eval_loss": 0.12055304646492004, "eval_runtime": 299.0742, "eval_samples_per_second": 23.462, "eval_steps_per_second": 2.936, "step": 8500 }, { "epoch": 0.783249642972313, "grad_norm": 0.9786974177274947, "learning_rate": 6.150319327243417e-07, "loss": 0.1266, "step": 8501 }, { "epoch": 0.783341779149583, "grad_norm": 0.9663666635302712, "learning_rate": 6.145316133786569e-07, "loss": 0.1236, "step": 8502 }, { "epoch": 0.7834339153268531, "grad_norm": 1.0066102428653365, "learning_rate": 6.140314690989138e-07, "loss": 0.1201, "step": 8503 }, { "epoch": 0.7835260515041231, "grad_norm": 0.9032566231861803, "learning_rate": 6.135314999315517e-07, "loss": 0.1188, "step": 8504 }, { "epoch": 0.7836181876813931, "grad_norm": 0.8936724534841473, "learning_rate": 6.130317059229932e-07, "loss": 0.1112, "step": 8505 }, { "epoch": 0.7837103238586631, "grad_norm": 0.8765532770050644, "learning_rate": 6.125320871196445e-07, "loss": 0.1215, "step": 8506 }, { "epoch": 0.7838024600359331, "grad_norm": 0.9812082328646534, "learning_rate": 6.120326435678945e-07, "loss": 0.1204, "step": 8507 }, { "epoch": 0.7838945962132031, "grad_norm": 0.9941206603675594, "learning_rate": 6.115333753141159e-07, "loss": 0.1267, "step": 8508 }, { "epoch": 0.7839867323904731, "grad_norm": 0.9400949014093748, "learning_rate": 6.110342824046667e-07, "loss": 0.1191, "step": 8509 }, { "epoch": 0.7840788685677431, "grad_norm": 0.9885242657468389, "learning_rate": 6.105353648858887e-07, "loss": 0.134, "step": 8510 }, { "epoch": 0.7841710047450131, "grad_norm": 0.9742890666878563, "learning_rate": 6.100366228041043e-07, "loss": 0.1279, "step": 8511 }, { "epoch": 0.7842631409222831, "grad_norm": 0.9855302153041672, "learning_rate": 6.095380562056238e-07, "loss": 0.128, "step": 8512 }, { "epoch": 0.7843552770995531, "grad_norm": 0.9553368441789891, "learning_rate": 6.090396651367375e-07, "loss": 0.1217, "step": 8513 }, { "epoch": 0.7844474132768231, "grad_norm": 0.9679957894036133, "learning_rate": 6.085414496437226e-07, "loss": 0.1314, "step": 8514 }, { "epoch": 0.7845395494540931, "grad_norm": 0.9478895842952598, "learning_rate": 6.080434097728368e-07, "loss": 0.1304, "step": 8515 }, { "epoch": 0.7846316856313632, "grad_norm": 0.8701797009684351, "learning_rate": 6.075455455703242e-07, "loss": 0.1086, "step": 8516 }, { "epoch": 0.7847238218086332, "grad_norm": 0.9906259373704634, "learning_rate": 6.070478570824118e-07, "loss": 0.1289, "step": 8517 }, { "epoch": 0.7848159579859032, "grad_norm": 0.8989965341827344, "learning_rate": 6.065503443553097e-07, "loss": 0.1077, "step": 8518 }, { "epoch": 0.7849080941631732, "grad_norm": 0.959007954608302, "learning_rate": 6.060530074352114e-07, "loss": 0.1263, "step": 8519 }, { "epoch": 0.7850002303404432, "grad_norm": 0.9246171708824706, "learning_rate": 6.055558463682948e-07, "loss": 0.1178, "step": 8520 }, { "epoch": 0.7850923665177132, "grad_norm": 0.9477951948238026, "learning_rate": 6.050588612007221e-07, "loss": 0.1191, "step": 8521 }, { "epoch": 0.7851845026949832, "grad_norm": 0.9658569636554907, "learning_rate": 6.045620519786386e-07, "loss": 0.1155, "step": 8522 }, { "epoch": 0.7852766388722532, "grad_norm": 0.957644890519468, "learning_rate": 6.040654187481726e-07, "loss": 0.1172, "step": 8523 }, { "epoch": 0.7853687750495232, "grad_norm": 0.9855829358535408, "learning_rate": 6.035689615554358e-07, "loss": 0.1212, "step": 8524 }, { "epoch": 0.7854609112267932, "grad_norm": 0.8942975631899651, "learning_rate": 6.030726804465251e-07, "loss": 0.1116, "step": 8525 }, { "epoch": 0.7855530474040632, "grad_norm": 0.9451557449490459, "learning_rate": 6.0257657546752e-07, "loss": 0.12, "step": 8526 }, { "epoch": 0.7856451835813332, "grad_norm": 0.9301666727799931, "learning_rate": 6.020806466644849e-07, "loss": 0.1243, "step": 8527 }, { "epoch": 0.7857373197586032, "grad_norm": 0.9620046325714793, "learning_rate": 6.015848940834662e-07, "loss": 0.1187, "step": 8528 }, { "epoch": 0.7858294559358732, "grad_norm": 0.933481477737301, "learning_rate": 6.010893177704935e-07, "loss": 0.1183, "step": 8529 }, { "epoch": 0.7859215921131433, "grad_norm": 0.9539924108362, "learning_rate": 6.00593917771582e-07, "loss": 0.128, "step": 8530 }, { "epoch": 0.7860137282904133, "grad_norm": 0.9199764287416644, "learning_rate": 6.000986941327303e-07, "loss": 0.114, "step": 8531 }, { "epoch": 0.7861058644676833, "grad_norm": 0.9631603455245744, "learning_rate": 5.996036468999187e-07, "loss": 0.1209, "step": 8532 }, { "epoch": 0.7861980006449533, "grad_norm": 0.946933611017911, "learning_rate": 5.991087761191136e-07, "loss": 0.1266, "step": 8533 }, { "epoch": 0.7862901368222233, "grad_norm": 0.9249986114077867, "learning_rate": 5.986140818362626e-07, "loss": 0.1131, "step": 8534 }, { "epoch": 0.7863822729994933, "grad_norm": 0.9484468777362128, "learning_rate": 5.981195640972995e-07, "loss": 0.1282, "step": 8535 }, { "epoch": 0.7864744091767633, "grad_norm": 0.9371057825811394, "learning_rate": 5.976252229481385e-07, "loss": 0.124, "step": 8536 }, { "epoch": 0.7865665453540333, "grad_norm": 0.9188426598072358, "learning_rate": 5.971310584346807e-07, "loss": 0.1148, "step": 8537 }, { "epoch": 0.7866586815313032, "grad_norm": 0.9073770852124188, "learning_rate": 5.966370706028094e-07, "loss": 0.1145, "step": 8538 }, { "epoch": 0.7867508177085732, "grad_norm": 0.9547670801763313, "learning_rate": 5.96143259498391e-07, "loss": 0.1266, "step": 8539 }, { "epoch": 0.7868429538858432, "grad_norm": 0.9827821775876324, "learning_rate": 5.956496251672752e-07, "loss": 0.128, "step": 8540 }, { "epoch": 0.7869350900631132, "grad_norm": 0.9456692696948865, "learning_rate": 5.951561676552966e-07, "loss": 0.1215, "step": 8541 }, { "epoch": 0.7870272262403832, "grad_norm": 0.9267593713920448, "learning_rate": 5.946628870082729e-07, "loss": 0.123, "step": 8542 }, { "epoch": 0.7871193624176533, "grad_norm": 0.9393627351534874, "learning_rate": 5.941697832720058e-07, "loss": 0.1325, "step": 8543 }, { "epoch": 0.7872114985949233, "grad_norm": 0.9550911434469457, "learning_rate": 5.936768564922796e-07, "loss": 0.1203, "step": 8544 }, { "epoch": 0.7873036347721933, "grad_norm": 0.9004499570914317, "learning_rate": 5.931841067148616e-07, "loss": 0.1176, "step": 8545 }, { "epoch": 0.7873957709494633, "grad_norm": 0.8932941203298196, "learning_rate": 5.926915339855044e-07, "loss": 0.1213, "step": 8546 }, { "epoch": 0.7874879071267333, "grad_norm": 0.8922074120043654, "learning_rate": 5.921991383499445e-07, "loss": 0.1156, "step": 8547 }, { "epoch": 0.7875800433040033, "grad_norm": 0.9050717612224399, "learning_rate": 5.917069198538991e-07, "loss": 0.1103, "step": 8548 }, { "epoch": 0.7876721794812733, "grad_norm": 0.9122458952803807, "learning_rate": 5.912148785430713e-07, "loss": 0.1023, "step": 8549 }, { "epoch": 0.7877643156585433, "grad_norm": 0.9437602990583515, "learning_rate": 5.907230144631485e-07, "loss": 0.1245, "step": 8550 }, { "epoch": 0.7878564518358133, "grad_norm": 0.8797876753243765, "learning_rate": 5.902313276597984e-07, "loss": 0.1112, "step": 8551 }, { "epoch": 0.7879485880130833, "grad_norm": 0.9023470141440695, "learning_rate": 5.89739818178676e-07, "loss": 0.1145, "step": 8552 }, { "epoch": 0.7880407241903533, "grad_norm": 1.0168757196023077, "learning_rate": 5.892484860654163e-07, "loss": 0.1374, "step": 8553 }, { "epoch": 0.7881328603676233, "grad_norm": 0.9206918918167758, "learning_rate": 5.8875733136564e-07, "loss": 0.1309, "step": 8554 }, { "epoch": 0.7882249965448933, "grad_norm": 0.9902963814438155, "learning_rate": 5.882663541249523e-07, "loss": 0.128, "step": 8555 }, { "epoch": 0.7883171327221633, "grad_norm": 0.9183091186455521, "learning_rate": 5.877755543889391e-07, "loss": 0.1202, "step": 8556 }, { "epoch": 0.7884092688994334, "grad_norm": 0.9533822683824176, "learning_rate": 5.872849322031706e-07, "loss": 0.1213, "step": 8557 }, { "epoch": 0.7885014050767034, "grad_norm": 0.9373591429059158, "learning_rate": 5.867944876132022e-07, "loss": 0.1227, "step": 8558 }, { "epoch": 0.7885935412539734, "grad_norm": 0.9103034475496797, "learning_rate": 5.863042206645716e-07, "loss": 0.1034, "step": 8559 }, { "epoch": 0.7886856774312434, "grad_norm": 0.985633430923528, "learning_rate": 5.858141314028007e-07, "loss": 0.1258, "step": 8560 }, { "epoch": 0.7887778136085134, "grad_norm": 0.8959172713941098, "learning_rate": 5.853242198733938e-07, "loss": 0.1123, "step": 8561 }, { "epoch": 0.7888699497857834, "grad_norm": 0.9396584454256341, "learning_rate": 5.848344861218383e-07, "loss": 0.1252, "step": 8562 }, { "epoch": 0.7889620859630534, "grad_norm": 0.9604002869692225, "learning_rate": 5.843449301936068e-07, "loss": 0.1263, "step": 8563 }, { "epoch": 0.7890542221403234, "grad_norm": 0.9298803788187124, "learning_rate": 5.838555521341558e-07, "loss": 0.1278, "step": 8564 }, { "epoch": 0.7891463583175934, "grad_norm": 0.9559648352234692, "learning_rate": 5.833663519889218e-07, "loss": 0.1244, "step": 8565 }, { "epoch": 0.7892384944948634, "grad_norm": 0.8911343469758822, "learning_rate": 5.828773298033294e-07, "loss": 0.1109, "step": 8566 }, { "epoch": 0.7893306306721334, "grad_norm": 0.9454641222726273, "learning_rate": 5.823884856227824e-07, "loss": 0.1232, "step": 8567 }, { "epoch": 0.7894227668494034, "grad_norm": 0.9859866096920007, "learning_rate": 5.818998194926714e-07, "loss": 0.13, "step": 8568 }, { "epoch": 0.7895149030266734, "grad_norm": 0.947179149432619, "learning_rate": 5.81411331458368e-07, "loss": 0.1206, "step": 8569 }, { "epoch": 0.7896070392039435, "grad_norm": 0.9052920558861215, "learning_rate": 5.809230215652292e-07, "loss": 0.1077, "step": 8570 }, { "epoch": 0.7896991753812135, "grad_norm": 0.9195765167110864, "learning_rate": 5.804348898585949e-07, "loss": 0.1171, "step": 8571 }, { "epoch": 0.7897913115584835, "grad_norm": 0.9482984019981444, "learning_rate": 5.799469363837876e-07, "loss": 0.1248, "step": 8572 }, { "epoch": 0.7898834477357535, "grad_norm": 0.9463055742926426, "learning_rate": 5.794591611861134e-07, "loss": 0.1208, "step": 8573 }, { "epoch": 0.7899755839130235, "grad_norm": 0.9494961706911567, "learning_rate": 5.789715643108623e-07, "loss": 0.1139, "step": 8574 }, { "epoch": 0.7900677200902935, "grad_norm": 0.919851610503083, "learning_rate": 5.784841458033086e-07, "loss": 0.1215, "step": 8575 }, { "epoch": 0.7901598562675635, "grad_norm": 0.9245264796890834, "learning_rate": 5.779969057087095e-07, "loss": 0.1176, "step": 8576 }, { "epoch": 0.7902519924448335, "grad_norm": 0.9287192440639785, "learning_rate": 5.775098440723042e-07, "loss": 0.1222, "step": 8577 }, { "epoch": 0.7903441286221035, "grad_norm": 0.9450777250855027, "learning_rate": 5.770229609393166e-07, "loss": 0.1129, "step": 8578 }, { "epoch": 0.7904362647993735, "grad_norm": 0.9492026706943572, "learning_rate": 5.765362563549537e-07, "loss": 0.1278, "step": 8579 }, { "epoch": 0.7905284009766435, "grad_norm": 0.9202309222182878, "learning_rate": 5.760497303644063e-07, "loss": 0.1103, "step": 8580 }, { "epoch": 0.7906205371539134, "grad_norm": 0.9245925883002144, "learning_rate": 5.755633830128493e-07, "loss": 0.1257, "step": 8581 }, { "epoch": 0.7907126733311834, "grad_norm": 0.9517509945193647, "learning_rate": 5.750772143454395e-07, "loss": 0.1261, "step": 8582 }, { "epoch": 0.7908048095084534, "grad_norm": 0.9080767566545325, "learning_rate": 5.745912244073166e-07, "loss": 0.1067, "step": 8583 }, { "epoch": 0.7908969456857236, "grad_norm": 0.9902988389524588, "learning_rate": 5.741054132436058e-07, "loss": 0.1311, "step": 8584 }, { "epoch": 0.7909890818629935, "grad_norm": 0.9161014687025334, "learning_rate": 5.736197808994151e-07, "loss": 0.1168, "step": 8585 }, { "epoch": 0.7910812180402635, "grad_norm": 0.9407576129491564, "learning_rate": 5.731343274198348e-07, "loss": 0.1181, "step": 8586 }, { "epoch": 0.7911733542175335, "grad_norm": 0.9222839395438274, "learning_rate": 5.726490528499398e-07, "loss": 0.1241, "step": 8587 }, { "epoch": 0.7912654903948035, "grad_norm": 0.989642538165895, "learning_rate": 5.721639572347873e-07, "loss": 0.1298, "step": 8588 }, { "epoch": 0.7913576265720735, "grad_norm": 0.910192558868216, "learning_rate": 5.716790406194195e-07, "loss": 0.1155, "step": 8589 }, { "epoch": 0.7914497627493435, "grad_norm": 0.9427897115154177, "learning_rate": 5.711943030488595e-07, "loss": 0.1208, "step": 8590 }, { "epoch": 0.7915418989266135, "grad_norm": 0.9230754902994186, "learning_rate": 5.707097445681162e-07, "loss": 0.1156, "step": 8591 }, { "epoch": 0.7916340351038835, "grad_norm": 0.9736347600748048, "learning_rate": 5.702253652221815e-07, "loss": 0.1209, "step": 8592 }, { "epoch": 0.7917261712811535, "grad_norm": 0.8885640767094255, "learning_rate": 5.697411650560292e-07, "loss": 0.1093, "step": 8593 }, { "epoch": 0.7918183074584235, "grad_norm": 0.9367737786103164, "learning_rate": 5.692571441146167e-07, "loss": 0.1137, "step": 8594 }, { "epoch": 0.7919104436356935, "grad_norm": 0.9616450487450995, "learning_rate": 5.68773302442886e-07, "loss": 0.1251, "step": 8595 }, { "epoch": 0.7920025798129635, "grad_norm": 0.994312643078694, "learning_rate": 5.682896400857623e-07, "loss": 0.1328, "step": 8596 }, { "epoch": 0.7920947159902335, "grad_norm": 0.9133848572401454, "learning_rate": 5.678061570881541e-07, "loss": 0.1175, "step": 8597 }, { "epoch": 0.7921868521675036, "grad_norm": 0.9092316720540026, "learning_rate": 5.673228534949521e-07, "loss": 0.1126, "step": 8598 }, { "epoch": 0.7922789883447736, "grad_norm": 0.9170357165667093, "learning_rate": 5.668397293510303e-07, "loss": 0.1095, "step": 8599 }, { "epoch": 0.7923711245220436, "grad_norm": 0.9195323389718666, "learning_rate": 5.66356784701248e-07, "loss": 0.1139, "step": 8600 }, { "epoch": 0.7924632606993136, "grad_norm": 0.9813287621184931, "learning_rate": 5.658740195904466e-07, "loss": 0.1224, "step": 8601 }, { "epoch": 0.7925553968765836, "grad_norm": 0.9381776235684026, "learning_rate": 5.653914340634504e-07, "loss": 0.1183, "step": 8602 }, { "epoch": 0.7926475330538536, "grad_norm": 0.9323615832783858, "learning_rate": 5.649090281650682e-07, "loss": 0.1234, "step": 8603 }, { "epoch": 0.7927396692311236, "grad_norm": 0.9313543955066301, "learning_rate": 5.644268019400903e-07, "loss": 0.1118, "step": 8604 }, { "epoch": 0.7928318054083936, "grad_norm": 0.9637787040109845, "learning_rate": 5.639447554332928e-07, "loss": 0.1296, "step": 8605 }, { "epoch": 0.7929239415856636, "grad_norm": 0.9278148056738393, "learning_rate": 5.634628886894324e-07, "loss": 0.1084, "step": 8606 }, { "epoch": 0.7930160777629336, "grad_norm": 0.9547852357619248, "learning_rate": 5.629812017532515e-07, "loss": 0.1208, "step": 8607 }, { "epoch": 0.7931082139402036, "grad_norm": 0.9578763992541558, "learning_rate": 5.62499694669475e-07, "loss": 0.1269, "step": 8608 }, { "epoch": 0.7932003501174736, "grad_norm": 0.954352297806697, "learning_rate": 5.620183674828094e-07, "loss": 0.1304, "step": 8609 }, { "epoch": 0.7932924862947436, "grad_norm": 0.9215896860684726, "learning_rate": 5.615372202379482e-07, "loss": 0.1208, "step": 8610 }, { "epoch": 0.7933846224720137, "grad_norm": 0.8858931374801061, "learning_rate": 5.610562529795635e-07, "loss": 0.1101, "step": 8611 }, { "epoch": 0.7934767586492837, "grad_norm": 0.974136599445881, "learning_rate": 5.605754657523147e-07, "loss": 0.1253, "step": 8612 }, { "epoch": 0.7935688948265537, "grad_norm": 0.9639846350892, "learning_rate": 5.600948586008432e-07, "loss": 0.1208, "step": 8613 }, { "epoch": 0.7936610310038237, "grad_norm": 0.9604929500826802, "learning_rate": 5.59614431569773e-07, "loss": 0.122, "step": 8614 }, { "epoch": 0.7937531671810937, "grad_norm": 0.9931624424857912, "learning_rate": 5.591341847037107e-07, "loss": 0.1276, "step": 8615 }, { "epoch": 0.7938453033583637, "grad_norm": 0.9549498208056124, "learning_rate": 5.586541180472485e-07, "loss": 0.1233, "step": 8616 }, { "epoch": 0.7939374395356337, "grad_norm": 0.8682677683889473, "learning_rate": 5.581742316449601e-07, "loss": 0.1024, "step": 8617 }, { "epoch": 0.7940295757129037, "grad_norm": 0.9493113700230978, "learning_rate": 5.57694525541404e-07, "loss": 0.128, "step": 8618 }, { "epoch": 0.7941217118901737, "grad_norm": 0.9031374769038998, "learning_rate": 5.572149997811205e-07, "loss": 0.1172, "step": 8619 }, { "epoch": 0.7942138480674437, "grad_norm": 0.9079305929186919, "learning_rate": 5.567356544086325e-07, "loss": 0.1236, "step": 8620 }, { "epoch": 0.7943059842447137, "grad_norm": 0.9274268890673875, "learning_rate": 5.56256489468448e-07, "loss": 0.1204, "step": 8621 }, { "epoch": 0.7943981204219837, "grad_norm": 0.9514779351411853, "learning_rate": 5.557775050050584e-07, "loss": 0.1267, "step": 8622 }, { "epoch": 0.7944902565992537, "grad_norm": 0.8546480418647092, "learning_rate": 5.552987010629363e-07, "loss": 0.1062, "step": 8623 }, { "epoch": 0.7945823927765236, "grad_norm": 0.8933129494153353, "learning_rate": 5.54820077686539e-07, "loss": 0.1191, "step": 8624 }, { "epoch": 0.7946745289537938, "grad_norm": 0.8839504359155155, "learning_rate": 5.543416349203071e-07, "loss": 0.121, "step": 8625 }, { "epoch": 0.7947666651310638, "grad_norm": 0.9199143993897311, "learning_rate": 5.538633728086643e-07, "loss": 0.1174, "step": 8626 }, { "epoch": 0.7948588013083338, "grad_norm": 0.9234319246914519, "learning_rate": 5.533852913960158e-07, "loss": 0.1123, "step": 8627 }, { "epoch": 0.7949509374856037, "grad_norm": 0.9564128250888884, "learning_rate": 5.529073907267526e-07, "loss": 0.1296, "step": 8628 }, { "epoch": 0.7950430736628737, "grad_norm": 0.9131763952358534, "learning_rate": 5.524296708452476e-07, "loss": 0.1155, "step": 8629 }, { "epoch": 0.7951352098401437, "grad_norm": 0.9744724093653233, "learning_rate": 5.519521317958581e-07, "loss": 0.1262, "step": 8630 }, { "epoch": 0.7952273460174137, "grad_norm": 0.9906485184260504, "learning_rate": 5.514747736229225e-07, "loss": 0.1203, "step": 8631 }, { "epoch": 0.7953194821946837, "grad_norm": 0.9125838552784906, "learning_rate": 5.509975963707636e-07, "loss": 0.1228, "step": 8632 }, { "epoch": 0.7954116183719537, "grad_norm": 0.9283033424610722, "learning_rate": 5.505206000836874e-07, "loss": 0.1193, "step": 8633 }, { "epoch": 0.7955037545492237, "grad_norm": 0.9203411373141805, "learning_rate": 5.500437848059842e-07, "loss": 0.122, "step": 8634 }, { "epoch": 0.7955958907264937, "grad_norm": 0.9121874766032623, "learning_rate": 5.495671505819244e-07, "loss": 0.1182, "step": 8635 }, { "epoch": 0.7956880269037637, "grad_norm": 0.9416908807262255, "learning_rate": 5.490906974557655e-07, "loss": 0.1212, "step": 8636 }, { "epoch": 0.7957801630810337, "grad_norm": 0.9425940298472445, "learning_rate": 5.486144254717446e-07, "loss": 0.1258, "step": 8637 }, { "epoch": 0.7958722992583038, "grad_norm": 0.9684145490905561, "learning_rate": 5.481383346740843e-07, "loss": 0.1163, "step": 8638 }, { "epoch": 0.7959644354355738, "grad_norm": 0.9555213332359374, "learning_rate": 5.476624251069904e-07, "loss": 0.1226, "step": 8639 }, { "epoch": 0.7960565716128438, "grad_norm": 0.9718769525251739, "learning_rate": 5.471866968146497e-07, "loss": 0.1128, "step": 8640 }, { "epoch": 0.7961487077901138, "grad_norm": 0.9887299932093059, "learning_rate": 5.46711149841235e-07, "loss": 0.1306, "step": 8641 }, { "epoch": 0.7962408439673838, "grad_norm": 0.9353887995608104, "learning_rate": 5.462357842308997e-07, "loss": 0.1194, "step": 8642 }, { "epoch": 0.7963329801446538, "grad_norm": 0.9103463409879694, "learning_rate": 5.457606000277826e-07, "loss": 0.1155, "step": 8643 }, { "epoch": 0.7964251163219238, "grad_norm": 0.8959125789177207, "learning_rate": 5.452855972760035e-07, "loss": 0.1126, "step": 8644 }, { "epoch": 0.7965172524991938, "grad_norm": 0.9415821381686513, "learning_rate": 5.448107760196672e-07, "loss": 0.1086, "step": 8645 }, { "epoch": 0.7966093886764638, "grad_norm": 0.9484822727507379, "learning_rate": 5.443361363028612e-07, "loss": 0.1196, "step": 8646 }, { "epoch": 0.7967015248537338, "grad_norm": 0.9729626443315508, "learning_rate": 5.438616781696557e-07, "loss": 0.1292, "step": 8647 }, { "epoch": 0.7967936610310038, "grad_norm": 0.9903115214641103, "learning_rate": 5.43387401664103e-07, "loss": 0.1288, "step": 8648 }, { "epoch": 0.7968857972082738, "grad_norm": 0.9035118638518556, "learning_rate": 5.429133068302405e-07, "loss": 0.1161, "step": 8649 }, { "epoch": 0.7969779333855438, "grad_norm": 0.8673452284329635, "learning_rate": 5.424393937120884e-07, "loss": 0.0994, "step": 8650 }, { "epoch": 0.7970700695628138, "grad_norm": 0.9479150604513178, "learning_rate": 5.419656623536498e-07, "loss": 0.124, "step": 8651 }, { "epoch": 0.7971622057400839, "grad_norm": 0.8588902296167942, "learning_rate": 5.414921127989104e-07, "loss": 0.1042, "step": 8652 }, { "epoch": 0.7972543419173539, "grad_norm": 0.9322158612905973, "learning_rate": 5.410187450918381e-07, "loss": 0.1294, "step": 8653 }, { "epoch": 0.7973464780946239, "grad_norm": 0.9631584611479438, "learning_rate": 5.405455592763864e-07, "loss": 0.1296, "step": 8654 }, { "epoch": 0.7974386142718939, "grad_norm": 0.919274177152333, "learning_rate": 5.400725553964908e-07, "loss": 0.1147, "step": 8655 }, { "epoch": 0.7975307504491639, "grad_norm": 0.9032358397233256, "learning_rate": 5.39599733496069e-07, "loss": 0.1142, "step": 8656 }, { "epoch": 0.7976228866264339, "grad_norm": 0.9549715100342432, "learning_rate": 5.391270936190232e-07, "loss": 0.1179, "step": 8657 }, { "epoch": 0.7977150228037039, "grad_norm": 0.8774598769374871, "learning_rate": 5.386546358092376e-07, "loss": 0.112, "step": 8658 }, { "epoch": 0.7978071589809739, "grad_norm": 0.9155784556515261, "learning_rate": 5.381823601105804e-07, "loss": 0.1172, "step": 8659 }, { "epoch": 0.7978992951582439, "grad_norm": 0.8967035583649721, "learning_rate": 5.377102665669018e-07, "loss": 0.1094, "step": 8660 }, { "epoch": 0.7979914313355139, "grad_norm": 0.9265403880030733, "learning_rate": 5.372383552220358e-07, "loss": 0.1221, "step": 8661 }, { "epoch": 0.7980835675127839, "grad_norm": 0.9252246274238533, "learning_rate": 5.36766626119801e-07, "loss": 0.1191, "step": 8662 }, { "epoch": 0.7981757036900539, "grad_norm": 0.9738521523534015, "learning_rate": 5.362950793039959e-07, "loss": 0.1222, "step": 8663 }, { "epoch": 0.7982678398673239, "grad_norm": 0.9111419202800259, "learning_rate": 5.358237148184034e-07, "loss": 0.1258, "step": 8664 }, { "epoch": 0.7983599760445939, "grad_norm": 0.8727192448465615, "learning_rate": 5.353525327067902e-07, "loss": 0.103, "step": 8665 }, { "epoch": 0.798452112221864, "grad_norm": 0.9316417754395357, "learning_rate": 5.348815330129059e-07, "loss": 0.1191, "step": 8666 }, { "epoch": 0.798544248399134, "grad_norm": 0.9191480528464787, "learning_rate": 5.344107157804834e-07, "loss": 0.1198, "step": 8667 }, { "epoch": 0.798636384576404, "grad_norm": 0.9361991190966118, "learning_rate": 5.339400810532375e-07, "loss": 0.1292, "step": 8668 }, { "epoch": 0.798728520753674, "grad_norm": 0.9428379963087531, "learning_rate": 5.334696288748661e-07, "loss": 0.1162, "step": 8669 }, { "epoch": 0.798820656930944, "grad_norm": 0.8926886143774425, "learning_rate": 5.329993592890512e-07, "loss": 0.1116, "step": 8670 }, { "epoch": 0.798912793108214, "grad_norm": 0.9405908396749681, "learning_rate": 5.325292723394573e-07, "loss": 0.1155, "step": 8671 }, { "epoch": 0.799004929285484, "grad_norm": 0.8988881879842964, "learning_rate": 5.320593680697331e-07, "loss": 0.1146, "step": 8672 }, { "epoch": 0.7990970654627539, "grad_norm": 0.9517179598930791, "learning_rate": 5.315896465235084e-07, "loss": 0.1254, "step": 8673 }, { "epoch": 0.7991892016400239, "grad_norm": 0.905955864737142, "learning_rate": 5.311201077443961e-07, "loss": 0.1063, "step": 8674 }, { "epoch": 0.7992813378172939, "grad_norm": 0.9489280018205543, "learning_rate": 5.306507517759937e-07, "loss": 0.1186, "step": 8675 }, { "epoch": 0.7993734739945639, "grad_norm": 0.9753417396799335, "learning_rate": 5.301815786618816e-07, "loss": 0.1227, "step": 8676 }, { "epoch": 0.7994656101718339, "grad_norm": 0.920701395541936, "learning_rate": 5.297125884456214e-07, "loss": 0.1161, "step": 8677 }, { "epoch": 0.7995577463491039, "grad_norm": 0.9251078334924024, "learning_rate": 5.292437811707599e-07, "loss": 0.1262, "step": 8678 }, { "epoch": 0.799649882526374, "grad_norm": 0.9327552040475929, "learning_rate": 5.287751568808247e-07, "loss": 0.1186, "step": 8679 }, { "epoch": 0.799742018703644, "grad_norm": 0.9619542917813455, "learning_rate": 5.283067156193292e-07, "loss": 0.1203, "step": 8680 }, { "epoch": 0.799834154880914, "grad_norm": 0.9289332567081827, "learning_rate": 5.278384574297665e-07, "loss": 0.1234, "step": 8681 }, { "epoch": 0.799926291058184, "grad_norm": 0.9064646891261017, "learning_rate": 5.273703823556153e-07, "loss": 0.1223, "step": 8682 }, { "epoch": 0.800018427235454, "grad_norm": 0.9316922917712442, "learning_rate": 5.269024904403372e-07, "loss": 0.1134, "step": 8683 }, { "epoch": 0.800110563412724, "grad_norm": 0.9607883705940592, "learning_rate": 5.264347817273752e-07, "loss": 0.1259, "step": 8684 }, { "epoch": 0.800202699589994, "grad_norm": 0.9179273484417532, "learning_rate": 5.259672562601553e-07, "loss": 0.1121, "step": 8685 }, { "epoch": 0.800294835767264, "grad_norm": 0.9843152416241994, "learning_rate": 5.25499914082088e-07, "loss": 0.1197, "step": 8686 }, { "epoch": 0.800386971944534, "grad_norm": 0.9651351060190287, "learning_rate": 5.250327552365664e-07, "loss": 0.1215, "step": 8687 }, { "epoch": 0.800479108121804, "grad_norm": 0.9119186870911993, "learning_rate": 5.245657797669665e-07, "loss": 0.1174, "step": 8688 }, { "epoch": 0.800571244299074, "grad_norm": 0.9238367856143697, "learning_rate": 5.24098987716647e-07, "loss": 0.1147, "step": 8689 }, { "epoch": 0.800663380476344, "grad_norm": 0.9374656049331135, "learning_rate": 5.236323791289479e-07, "loss": 0.12, "step": 8690 }, { "epoch": 0.800755516653614, "grad_norm": 0.9654942943206993, "learning_rate": 5.231659540471954e-07, "loss": 0.1316, "step": 8691 }, { "epoch": 0.800847652830884, "grad_norm": 0.9591977272622936, "learning_rate": 5.226997125146973e-07, "loss": 0.1253, "step": 8692 }, { "epoch": 0.8009397890081541, "grad_norm": 0.9494439671142985, "learning_rate": 5.222336545747434e-07, "loss": 0.1198, "step": 8693 }, { "epoch": 0.8010319251854241, "grad_norm": 0.9537456425037063, "learning_rate": 5.217677802706078e-07, "loss": 0.1193, "step": 8694 }, { "epoch": 0.8011240613626941, "grad_norm": 0.9259471935018194, "learning_rate": 5.213020896455462e-07, "loss": 0.1141, "step": 8695 }, { "epoch": 0.8012161975399641, "grad_norm": 0.9460346609610221, "learning_rate": 5.208365827427985e-07, "loss": 0.1207, "step": 8696 }, { "epoch": 0.8013083337172341, "grad_norm": 0.9684295469141999, "learning_rate": 5.203712596055876e-07, "loss": 0.127, "step": 8697 }, { "epoch": 0.8014004698945041, "grad_norm": 0.9765445354106874, "learning_rate": 5.19906120277118e-07, "loss": 0.1216, "step": 8698 }, { "epoch": 0.8014926060717741, "grad_norm": 0.8910773017327671, "learning_rate": 5.194411648005778e-07, "loss": 0.1112, "step": 8699 }, { "epoch": 0.8015847422490441, "grad_norm": 1.006495006076603, "learning_rate": 5.189763932191396e-07, "loss": 0.1264, "step": 8700 }, { "epoch": 0.8016768784263141, "grad_norm": 0.8756991823220688, "learning_rate": 5.185118055759564e-07, "loss": 0.116, "step": 8701 }, { "epoch": 0.8017690146035841, "grad_norm": 1.0276078013137269, "learning_rate": 5.180474019141646e-07, "loss": 0.1299, "step": 8702 }, { "epoch": 0.8018611507808541, "grad_norm": 1.008868504080066, "learning_rate": 5.175831822768848e-07, "loss": 0.1204, "step": 8703 }, { "epoch": 0.8019532869581241, "grad_norm": 0.9189685434248478, "learning_rate": 5.171191467072201e-07, "loss": 0.1096, "step": 8704 }, { "epoch": 0.8020454231353941, "grad_norm": 0.9502132889598285, "learning_rate": 5.166552952482565e-07, "loss": 0.1232, "step": 8705 }, { "epoch": 0.8021375593126642, "grad_norm": 0.8942341109417716, "learning_rate": 5.161916279430623e-07, "loss": 0.1178, "step": 8706 }, { "epoch": 0.8022296954899342, "grad_norm": 1.00440052137423, "learning_rate": 5.157281448346882e-07, "loss": 0.1317, "step": 8707 }, { "epoch": 0.8023218316672042, "grad_norm": 0.9128632666446306, "learning_rate": 5.152648459661694e-07, "loss": 0.1075, "step": 8708 }, { "epoch": 0.8024139678444742, "grad_norm": 0.9950914671950657, "learning_rate": 5.148017313805237e-07, "loss": 0.1369, "step": 8709 }, { "epoch": 0.8025061040217442, "grad_norm": 0.9559709503514494, "learning_rate": 5.143388011207506e-07, "loss": 0.1216, "step": 8710 }, { "epoch": 0.8025982401990142, "grad_norm": 0.9386882981548607, "learning_rate": 5.138760552298338e-07, "loss": 0.1104, "step": 8711 }, { "epoch": 0.8026903763762842, "grad_norm": 0.9275125291849861, "learning_rate": 5.134134937507387e-07, "loss": 0.1152, "step": 8712 }, { "epoch": 0.8027825125535542, "grad_norm": 0.9156843741584696, "learning_rate": 5.129511167264151e-07, "loss": 0.1127, "step": 8713 }, { "epoch": 0.8028746487308241, "grad_norm": 0.9505867161038387, "learning_rate": 5.124889241997935e-07, "loss": 0.1187, "step": 8714 }, { "epoch": 0.8029667849080941, "grad_norm": 0.99898568796701, "learning_rate": 5.120269162137889e-07, "loss": 0.1231, "step": 8715 }, { "epoch": 0.8030589210853641, "grad_norm": 0.9897462954997086, "learning_rate": 5.115650928113e-07, "loss": 0.1229, "step": 8716 }, { "epoch": 0.8031510572626341, "grad_norm": 0.945085721014716, "learning_rate": 5.111034540352064e-07, "loss": 0.1245, "step": 8717 }, { "epoch": 0.8032431934399041, "grad_norm": 0.9743912216678187, "learning_rate": 5.106419999283702e-07, "loss": 0.1279, "step": 8718 }, { "epoch": 0.8033353296171741, "grad_norm": 0.9710579013133132, "learning_rate": 5.101807305336385e-07, "loss": 0.1339, "step": 8719 }, { "epoch": 0.8034274657944442, "grad_norm": 0.9155853366901943, "learning_rate": 5.0971964589384e-07, "loss": 0.1218, "step": 8720 }, { "epoch": 0.8035196019717142, "grad_norm": 0.958206277205222, "learning_rate": 5.092587460517873e-07, "loss": 0.1239, "step": 8721 }, { "epoch": 0.8036117381489842, "grad_norm": 0.959328723295657, "learning_rate": 5.087980310502743e-07, "loss": 0.1182, "step": 8722 }, { "epoch": 0.8037038743262542, "grad_norm": 0.905234879072177, "learning_rate": 5.083375009320779e-07, "loss": 0.1129, "step": 8723 }, { "epoch": 0.8037960105035242, "grad_norm": 0.8962931301615531, "learning_rate": 5.078771557399586e-07, "loss": 0.1242, "step": 8724 }, { "epoch": 0.8038881466807942, "grad_norm": 0.9194498456557597, "learning_rate": 5.0741699551666e-07, "loss": 0.1107, "step": 8725 }, { "epoch": 0.8039802828580642, "grad_norm": 0.9100045586682919, "learning_rate": 5.069570203049085e-07, "loss": 0.1124, "step": 8726 }, { "epoch": 0.8040724190353342, "grad_norm": 0.9295523104342945, "learning_rate": 5.06497230147412e-07, "loss": 0.1179, "step": 8727 }, { "epoch": 0.8041645552126042, "grad_norm": 0.9613944140250847, "learning_rate": 5.060376250868615e-07, "loss": 0.1192, "step": 8728 }, { "epoch": 0.8042566913898742, "grad_norm": 0.9457683779137299, "learning_rate": 5.055782051659322e-07, "loss": 0.1268, "step": 8729 }, { "epoch": 0.8043488275671442, "grad_norm": 0.9653237653604977, "learning_rate": 5.051189704272819e-07, "loss": 0.1179, "step": 8730 }, { "epoch": 0.8044409637444142, "grad_norm": 0.9299770816436291, "learning_rate": 5.046599209135492e-07, "loss": 0.1243, "step": 8731 }, { "epoch": 0.8045330999216842, "grad_norm": 0.9681021548052552, "learning_rate": 5.042010566673583e-07, "loss": 0.1159, "step": 8732 }, { "epoch": 0.8046252360989542, "grad_norm": 0.8858392947078415, "learning_rate": 5.037423777313132e-07, "loss": 0.1032, "step": 8733 }, { "epoch": 0.8047173722762243, "grad_norm": 0.9198330998999283, "learning_rate": 5.032838841480042e-07, "loss": 0.105, "step": 8734 }, { "epoch": 0.8048095084534943, "grad_norm": 0.948083821447076, "learning_rate": 5.028255759600004e-07, "loss": 0.1232, "step": 8735 }, { "epoch": 0.8049016446307643, "grad_norm": 0.8914840218195179, "learning_rate": 5.023674532098571e-07, "loss": 0.1129, "step": 8736 }, { "epoch": 0.8049937808080343, "grad_norm": 0.9627700380196931, "learning_rate": 5.019095159401113e-07, "loss": 0.1217, "step": 8737 }, { "epoch": 0.8050859169853043, "grad_norm": 0.9673563842251529, "learning_rate": 5.01451764193282e-07, "loss": 0.1292, "step": 8738 }, { "epoch": 0.8051780531625743, "grad_norm": 0.9294744242883962, "learning_rate": 5.009941980118707e-07, "loss": 0.1202, "step": 8739 }, { "epoch": 0.8052701893398443, "grad_norm": 0.9123434635710035, "learning_rate": 5.005368174383634e-07, "loss": 0.1123, "step": 8740 }, { "epoch": 0.8053623255171143, "grad_norm": 0.8951060347141389, "learning_rate": 5.000796225152277e-07, "loss": 0.1152, "step": 8741 }, { "epoch": 0.8054544616943843, "grad_norm": 0.9299640822902256, "learning_rate": 4.996226132849149e-07, "loss": 0.1098, "step": 8742 }, { "epoch": 0.8055465978716543, "grad_norm": 0.9759922313677241, "learning_rate": 4.99165789789858e-07, "loss": 0.1253, "step": 8743 }, { "epoch": 0.8056387340489243, "grad_norm": 0.9719818047810473, "learning_rate": 4.987091520724721e-07, "loss": 0.1178, "step": 8744 }, { "epoch": 0.8057308702261943, "grad_norm": 0.9429694738572041, "learning_rate": 4.982527001751567e-07, "loss": 0.1197, "step": 8745 }, { "epoch": 0.8058230064034643, "grad_norm": 0.9728986864512471, "learning_rate": 4.977964341402941e-07, "loss": 0.1221, "step": 8746 }, { "epoch": 0.8059151425807344, "grad_norm": 0.8994651161363463, "learning_rate": 4.973403540102476e-07, "loss": 0.1085, "step": 8747 }, { "epoch": 0.8060072787580044, "grad_norm": 0.888762649636477, "learning_rate": 4.968844598273653e-07, "loss": 0.1055, "step": 8748 }, { "epoch": 0.8060994149352744, "grad_norm": 0.9187766828800995, "learning_rate": 4.964287516339758e-07, "loss": 0.111, "step": 8749 }, { "epoch": 0.8061915511125444, "grad_norm": 0.9374319253049951, "learning_rate": 4.959732294723932e-07, "loss": 0.1192, "step": 8750 }, { "epoch": 0.8062836872898144, "grad_norm": 0.9165076691998195, "learning_rate": 4.955178933849111e-07, "loss": 0.1178, "step": 8751 }, { "epoch": 0.8063758234670844, "grad_norm": 0.917632651699605, "learning_rate": 4.950627434138083e-07, "loss": 0.1223, "step": 8752 }, { "epoch": 0.8064679596443544, "grad_norm": 0.939684098512974, "learning_rate": 4.946077796013462e-07, "loss": 0.1138, "step": 8753 }, { "epoch": 0.8065600958216244, "grad_norm": 0.9575212037934554, "learning_rate": 4.941530019897669e-07, "loss": 0.1214, "step": 8754 }, { "epoch": 0.8066522319988944, "grad_norm": 0.9044347534310851, "learning_rate": 4.93698410621298e-07, "loss": 0.1144, "step": 8755 }, { "epoch": 0.8067443681761644, "grad_norm": 0.9558305259406638, "learning_rate": 4.932440055381471e-07, "loss": 0.1187, "step": 8756 }, { "epoch": 0.8068365043534343, "grad_norm": 0.9950767718575722, "learning_rate": 4.92789786782506e-07, "loss": 0.1384, "step": 8757 }, { "epoch": 0.8069286405307043, "grad_norm": 0.9110712406529344, "learning_rate": 4.923357543965498e-07, "loss": 0.1064, "step": 8758 }, { "epoch": 0.8070207767079743, "grad_norm": 0.9519585325823425, "learning_rate": 4.918819084224353e-07, "loss": 0.109, "step": 8759 }, { "epoch": 0.8071129128852443, "grad_norm": 0.947790510305991, "learning_rate": 4.914282489023006e-07, "loss": 0.1275, "step": 8760 }, { "epoch": 0.8072050490625144, "grad_norm": 1.0226051981887092, "learning_rate": 4.909747758782693e-07, "loss": 0.1239, "step": 8761 }, { "epoch": 0.8072971852397844, "grad_norm": 0.9217178327591092, "learning_rate": 4.905214893924462e-07, "loss": 0.1128, "step": 8762 }, { "epoch": 0.8073893214170544, "grad_norm": 0.9737955286681498, "learning_rate": 4.900683894869198e-07, "loss": 0.1307, "step": 8763 }, { "epoch": 0.8074814575943244, "grad_norm": 0.9299037243211407, "learning_rate": 4.8961547620376e-07, "loss": 0.1242, "step": 8764 }, { "epoch": 0.8075735937715944, "grad_norm": 0.9475722332886723, "learning_rate": 4.891627495850188e-07, "loss": 0.1142, "step": 8765 }, { "epoch": 0.8076657299488644, "grad_norm": 0.9662419417795653, "learning_rate": 4.887102096727326e-07, "loss": 0.1167, "step": 8766 }, { "epoch": 0.8077578661261344, "grad_norm": 0.9339979179707536, "learning_rate": 4.882578565089205e-07, "loss": 0.1185, "step": 8767 }, { "epoch": 0.8078500023034044, "grad_norm": 0.9596979033805477, "learning_rate": 4.878056901355823e-07, "loss": 0.1279, "step": 8768 }, { "epoch": 0.8079421384806744, "grad_norm": 0.9218071788920659, "learning_rate": 4.873537105947029e-07, "loss": 0.1229, "step": 8769 }, { "epoch": 0.8080342746579444, "grad_norm": 0.9480480193282101, "learning_rate": 4.869019179282478e-07, "loss": 0.1285, "step": 8770 }, { "epoch": 0.8081264108352144, "grad_norm": 0.9074065825918513, "learning_rate": 4.864503121781666e-07, "loss": 0.1232, "step": 8771 }, { "epoch": 0.8082185470124844, "grad_norm": 0.9385402031446006, "learning_rate": 4.859988933863898e-07, "loss": 0.1199, "step": 8772 }, { "epoch": 0.8083106831897544, "grad_norm": 0.9707115923915903, "learning_rate": 4.85547661594833e-07, "loss": 0.1255, "step": 8773 }, { "epoch": 0.8084028193670245, "grad_norm": 0.9897153814911818, "learning_rate": 4.850966168453922e-07, "loss": 0.1305, "step": 8774 }, { "epoch": 0.8084949555442945, "grad_norm": 0.9438068143981858, "learning_rate": 4.846457591799489e-07, "loss": 0.1166, "step": 8775 }, { "epoch": 0.8085870917215645, "grad_norm": 0.929569865631667, "learning_rate": 4.841950886403623e-07, "loss": 0.1201, "step": 8776 }, { "epoch": 0.8086792278988345, "grad_norm": 0.9544358854771392, "learning_rate": 4.837446052684788e-07, "loss": 0.1337, "step": 8777 }, { "epoch": 0.8087713640761045, "grad_norm": 0.8648015438610358, "learning_rate": 4.832943091061257e-07, "loss": 0.1097, "step": 8778 }, { "epoch": 0.8088635002533745, "grad_norm": 0.9271638424570154, "learning_rate": 4.828442001951136e-07, "loss": 0.112, "step": 8779 }, { "epoch": 0.8089556364306445, "grad_norm": 0.8825506069463267, "learning_rate": 4.82394278577234e-07, "loss": 0.1071, "step": 8780 }, { "epoch": 0.8090477726079145, "grad_norm": 0.982444580480559, "learning_rate": 4.819445442942633e-07, "loss": 0.13, "step": 8781 }, { "epoch": 0.8091399087851845, "grad_norm": 0.9338257947288606, "learning_rate": 4.814949973879582e-07, "loss": 0.1148, "step": 8782 }, { "epoch": 0.8092320449624545, "grad_norm": 0.9345277141507186, "learning_rate": 4.8104563790006e-07, "loss": 0.1189, "step": 8783 }, { "epoch": 0.8093241811397245, "grad_norm": 0.9422878793863659, "learning_rate": 4.805964658722922e-07, "loss": 0.1242, "step": 8784 }, { "epoch": 0.8094163173169945, "grad_norm": 0.9994874290723805, "learning_rate": 4.801474813463591e-07, "loss": 0.1259, "step": 8785 }, { "epoch": 0.8095084534942645, "grad_norm": 1.0136117696237055, "learning_rate": 4.796986843639506e-07, "loss": 0.1269, "step": 8786 }, { "epoch": 0.8096005896715345, "grad_norm": 0.9477824056753072, "learning_rate": 4.792500749667359e-07, "loss": 0.1225, "step": 8787 }, { "epoch": 0.8096927258488046, "grad_norm": 0.9447988320137191, "learning_rate": 4.788016531963699e-07, "loss": 0.1164, "step": 8788 }, { "epoch": 0.8097848620260746, "grad_norm": 0.9110046721363823, "learning_rate": 4.783534190944872e-07, "loss": 0.1176, "step": 8789 }, { "epoch": 0.8098769982033446, "grad_norm": 0.9650411635983291, "learning_rate": 4.779053727027072e-07, "loss": 0.118, "step": 8790 }, { "epoch": 0.8099691343806146, "grad_norm": 0.9194160816683928, "learning_rate": 4.774575140626317e-07, "loss": 0.1167, "step": 8791 }, { "epoch": 0.8100612705578846, "grad_norm": 0.9730949942743494, "learning_rate": 4.770098432158434e-07, "loss": 0.1186, "step": 8792 }, { "epoch": 0.8101534067351546, "grad_norm": 1.0026966470323755, "learning_rate": 4.765623602039085e-07, "loss": 0.1261, "step": 8793 }, { "epoch": 0.8102455429124246, "grad_norm": 0.8959270267150095, "learning_rate": 4.76115065068376e-07, "loss": 0.1143, "step": 8794 }, { "epoch": 0.8103376790896946, "grad_norm": 0.9745090604240599, "learning_rate": 4.756679578507778e-07, "loss": 0.1198, "step": 8795 }, { "epoch": 0.8104298152669646, "grad_norm": 0.9752302606638509, "learning_rate": 4.7522103859262813e-07, "loss": 0.1246, "step": 8796 }, { "epoch": 0.8105219514442346, "grad_norm": 0.9154425716815816, "learning_rate": 4.7477430733542273e-07, "loss": 0.1088, "step": 8797 }, { "epoch": 0.8106140876215046, "grad_norm": 0.9209746568913927, "learning_rate": 4.7432776412064034e-07, "loss": 0.1164, "step": 8798 }, { "epoch": 0.8107062237987746, "grad_norm": 0.8858703515048639, "learning_rate": 4.738814089897431e-07, "loss": 0.1118, "step": 8799 }, { "epoch": 0.8107983599760445, "grad_norm": 0.9071064876375858, "learning_rate": 4.734352419841756e-07, "loss": 0.1128, "step": 8800 }, { "epoch": 0.8108904961533145, "grad_norm": 0.9638341312522045, "learning_rate": 4.7298926314536364e-07, "loss": 0.1121, "step": 8801 }, { "epoch": 0.8109826323305847, "grad_norm": 0.9563234435681683, "learning_rate": 4.725434725147171e-07, "loss": 0.1154, "step": 8802 }, { "epoch": 0.8110747685078546, "grad_norm": 0.96543556775914, "learning_rate": 4.720978701336268e-07, "loss": 0.1266, "step": 8803 }, { "epoch": 0.8111669046851246, "grad_norm": 0.9933176223015399, "learning_rate": 4.716524560434679e-07, "loss": 0.1339, "step": 8804 }, { "epoch": 0.8112590408623946, "grad_norm": 0.9202425773610461, "learning_rate": 4.7120723028559633e-07, "loss": 0.1199, "step": 8805 }, { "epoch": 0.8113511770396646, "grad_norm": 0.9422875480282432, "learning_rate": 4.70762192901352e-07, "loss": 0.1225, "step": 8806 }, { "epoch": 0.8114433132169346, "grad_norm": 0.9709769506635573, "learning_rate": 4.7031734393205683e-07, "loss": 0.1239, "step": 8807 }, { "epoch": 0.8115354493942046, "grad_norm": 0.9206405274403489, "learning_rate": 4.6987268341901455e-07, "loss": 0.1179, "step": 8808 }, { "epoch": 0.8116275855714746, "grad_norm": 0.9756799458791882, "learning_rate": 4.6942821140351174e-07, "loss": 0.1174, "step": 8809 }, { "epoch": 0.8117197217487446, "grad_norm": 0.9470559153669584, "learning_rate": 4.6898392792681796e-07, "loss": 0.1187, "step": 8810 }, { "epoch": 0.8118118579260146, "grad_norm": 0.9201916935032508, "learning_rate": 4.6853983303018493e-07, "loss": 0.1206, "step": 8811 }, { "epoch": 0.8119039941032846, "grad_norm": 0.9265359599326323, "learning_rate": 4.680959267548479e-07, "loss": 0.1181, "step": 8812 }, { "epoch": 0.8119961302805546, "grad_norm": 0.9643827677740148, "learning_rate": 4.676522091420227e-07, "loss": 0.1295, "step": 8813 }, { "epoch": 0.8120882664578246, "grad_norm": 0.9221162699337527, "learning_rate": 4.672086802329079e-07, "loss": 0.1106, "step": 8814 }, { "epoch": 0.8121804026350947, "grad_norm": 0.9718760958585148, "learning_rate": 4.667653400686858e-07, "loss": 0.1226, "step": 8815 }, { "epoch": 0.8122725388123647, "grad_norm": 0.8960897169593115, "learning_rate": 4.6632218869052085e-07, "loss": 0.1046, "step": 8816 }, { "epoch": 0.8123646749896347, "grad_norm": 0.9716052904056378, "learning_rate": 4.6587922613956005e-07, "loss": 0.1274, "step": 8817 }, { "epoch": 0.8124568111669047, "grad_norm": 0.9329622015203816, "learning_rate": 4.6543645245693215e-07, "loss": 0.1234, "step": 8818 }, { "epoch": 0.8125489473441747, "grad_norm": 0.9354174342448097, "learning_rate": 4.649938676837479e-07, "loss": 0.1283, "step": 8819 }, { "epoch": 0.8126410835214447, "grad_norm": 0.9504482807872802, "learning_rate": 4.6455147186110217e-07, "loss": 0.1253, "step": 8820 }, { "epoch": 0.8127332196987147, "grad_norm": 0.8710928394218936, "learning_rate": 4.6410926503007187e-07, "loss": 0.1121, "step": 8821 }, { "epoch": 0.8128253558759847, "grad_norm": 0.9167945722916967, "learning_rate": 4.636672472317147e-07, "loss": 0.1219, "step": 8822 }, { "epoch": 0.8129174920532547, "grad_norm": 0.931250359567654, "learning_rate": 4.6322541850707336e-07, "loss": 0.1179, "step": 8823 }, { "epoch": 0.8130096282305247, "grad_norm": 0.9173628142401588, "learning_rate": 4.6278377889717064e-07, "loss": 0.1201, "step": 8824 }, { "epoch": 0.8131017644077947, "grad_norm": 0.9259153539552193, "learning_rate": 4.62342328443014e-07, "loss": 0.1073, "step": 8825 }, { "epoch": 0.8131939005850647, "grad_norm": 0.9620459880501422, "learning_rate": 4.6190106718559056e-07, "loss": 0.1228, "step": 8826 }, { "epoch": 0.8132860367623347, "grad_norm": 0.8858775284263333, "learning_rate": 4.614599951658727e-07, "loss": 0.1136, "step": 8827 }, { "epoch": 0.8133781729396047, "grad_norm": 0.8963968749457515, "learning_rate": 4.6101911242481396e-07, "loss": 0.1072, "step": 8828 }, { "epoch": 0.8134703091168748, "grad_norm": 0.9429238031032099, "learning_rate": 4.605784190033502e-07, "loss": 0.1195, "step": 8829 }, { "epoch": 0.8135624452941448, "grad_norm": 0.9081363924275805, "learning_rate": 4.6013791494239927e-07, "loss": 0.1129, "step": 8830 }, { "epoch": 0.8136545814714148, "grad_norm": 0.928463106569425, "learning_rate": 4.5969760028286236e-07, "loss": 0.1193, "step": 8831 }, { "epoch": 0.8137467176486848, "grad_norm": 0.9505794219950519, "learning_rate": 4.5925747506562287e-07, "loss": 0.1192, "step": 8832 }, { "epoch": 0.8138388538259548, "grad_norm": 0.9083831854315285, "learning_rate": 4.5881753933154695e-07, "loss": 0.1143, "step": 8833 }, { "epoch": 0.8139309900032248, "grad_norm": 1.0098806197047803, "learning_rate": 4.5837779312148225e-07, "loss": 0.1279, "step": 8834 }, { "epoch": 0.8140231261804948, "grad_norm": 0.9610060245617507, "learning_rate": 4.5793823647625856e-07, "loss": 0.129, "step": 8835 }, { "epoch": 0.8141152623577648, "grad_norm": 0.9499210345688431, "learning_rate": 4.574988694366894e-07, "loss": 0.1237, "step": 8836 }, { "epoch": 0.8142073985350348, "grad_norm": 0.916244337778968, "learning_rate": 4.570596920435708e-07, "loss": 0.1119, "step": 8837 }, { "epoch": 0.8142995347123048, "grad_norm": 0.9170844203897126, "learning_rate": 4.566207043376789e-07, "loss": 0.1114, "step": 8838 }, { "epoch": 0.8143916708895748, "grad_norm": 0.9254240160408157, "learning_rate": 4.56181906359775e-07, "loss": 0.123, "step": 8839 }, { "epoch": 0.8144838070668448, "grad_norm": 0.9655610903066298, "learning_rate": 4.557432981506005e-07, "loss": 0.1217, "step": 8840 }, { "epoch": 0.8145759432441148, "grad_norm": 0.9875316818462202, "learning_rate": 4.5530487975088076e-07, "loss": 0.1194, "step": 8841 }, { "epoch": 0.8146680794213849, "grad_norm": 0.9574896478337922, "learning_rate": 4.548666512013236e-07, "loss": 0.1219, "step": 8842 }, { "epoch": 0.8147602155986549, "grad_norm": 0.921246124798205, "learning_rate": 4.5442861254261753e-07, "loss": 0.1186, "step": 8843 }, { "epoch": 0.8148523517759249, "grad_norm": 0.9451372908762516, "learning_rate": 4.5399076381543536e-07, "loss": 0.1192, "step": 8844 }, { "epoch": 0.8149444879531949, "grad_norm": 0.9134889434581871, "learning_rate": 4.5355310506043053e-07, "loss": 0.112, "step": 8845 }, { "epoch": 0.8150366241304648, "grad_norm": 0.902262399393891, "learning_rate": 4.531156363182407e-07, "loss": 0.1112, "step": 8846 }, { "epoch": 0.8151287603077348, "grad_norm": 0.950956875411372, "learning_rate": 4.526783576294835e-07, "loss": 0.1274, "step": 8847 }, { "epoch": 0.8152208964850048, "grad_norm": 0.8980087170242911, "learning_rate": 4.5224126903476136e-07, "loss": 0.1144, "step": 8848 }, { "epoch": 0.8153130326622748, "grad_norm": 0.9102210614967016, "learning_rate": 4.518043705746578e-07, "loss": 0.121, "step": 8849 }, { "epoch": 0.8154051688395448, "grad_norm": 0.9794454379572055, "learning_rate": 4.5136766228974005e-07, "loss": 0.1259, "step": 8850 }, { "epoch": 0.8154973050168148, "grad_norm": 0.9445608794611002, "learning_rate": 4.509311442205538e-07, "loss": 0.1128, "step": 8851 }, { "epoch": 0.8155894411940848, "grad_norm": 0.9359598682098146, "learning_rate": 4.504948164076317e-07, "loss": 0.1173, "step": 8852 }, { "epoch": 0.8156815773713548, "grad_norm": 0.9334601600662357, "learning_rate": 4.5005867889148626e-07, "loss": 0.108, "step": 8853 }, { "epoch": 0.8157737135486248, "grad_norm": 0.8918500853209136, "learning_rate": 4.4962273171261393e-07, "loss": 0.1177, "step": 8854 }, { "epoch": 0.8158658497258948, "grad_norm": 0.9167522940965833, "learning_rate": 4.491869749114908e-07, "loss": 0.1016, "step": 8855 }, { "epoch": 0.8159579859031649, "grad_norm": 0.9233508244470227, "learning_rate": 4.4875140852857854e-07, "loss": 0.1213, "step": 8856 }, { "epoch": 0.8160501220804349, "grad_norm": 0.9724640957299692, "learning_rate": 4.4831603260431787e-07, "loss": 0.1307, "step": 8857 }, { "epoch": 0.8161422582577049, "grad_norm": 0.8845130255857112, "learning_rate": 4.478808471791354e-07, "loss": 0.1084, "step": 8858 }, { "epoch": 0.8162343944349749, "grad_norm": 0.9911551925112649, "learning_rate": 4.474458522934361e-07, "loss": 0.1268, "step": 8859 }, { "epoch": 0.8163265306122449, "grad_norm": 0.9575733009268137, "learning_rate": 4.470110479876105e-07, "loss": 0.1232, "step": 8860 }, { "epoch": 0.8164186667895149, "grad_norm": 0.881394940578178, "learning_rate": 4.4657643430203067e-07, "loss": 0.1086, "step": 8861 }, { "epoch": 0.8165108029667849, "grad_norm": 0.9874625512103724, "learning_rate": 4.461420112770501e-07, "loss": 0.1232, "step": 8862 }, { "epoch": 0.8166029391440549, "grad_norm": 0.9262867632138934, "learning_rate": 4.45707778953004e-07, "loss": 0.1166, "step": 8863 }, { "epoch": 0.8166950753213249, "grad_norm": 0.9675648924666996, "learning_rate": 4.452737373702115e-07, "loss": 0.1229, "step": 8864 }, { "epoch": 0.8167872114985949, "grad_norm": 0.887245591991255, "learning_rate": 4.44839886568974e-07, "loss": 0.1105, "step": 8865 }, { "epoch": 0.8168793476758649, "grad_norm": 0.9227953394306798, "learning_rate": 4.444062265895746e-07, "loss": 0.1207, "step": 8866 }, { "epoch": 0.8169714838531349, "grad_norm": 0.928324613846221, "learning_rate": 4.439727574722783e-07, "loss": 0.1159, "step": 8867 }, { "epoch": 0.8170636200304049, "grad_norm": 0.9133012167675768, "learning_rate": 4.435394792573322e-07, "loss": 0.114, "step": 8868 }, { "epoch": 0.8171557562076749, "grad_norm": 0.9615531030979356, "learning_rate": 4.431063919849668e-07, "loss": 0.1171, "step": 8869 }, { "epoch": 0.817247892384945, "grad_norm": 0.9009290620554269, "learning_rate": 4.4267349569539404e-07, "loss": 0.114, "step": 8870 }, { "epoch": 0.817340028562215, "grad_norm": 0.9012808267067186, "learning_rate": 4.422407904288095e-07, "loss": 0.1259, "step": 8871 }, { "epoch": 0.817432164739485, "grad_norm": 0.9080862780039042, "learning_rate": 4.418082762253889e-07, "loss": 0.1072, "step": 8872 }, { "epoch": 0.817524300916755, "grad_norm": 0.9247328780388934, "learning_rate": 4.4137595312529066e-07, "loss": 0.1004, "step": 8873 }, { "epoch": 0.817616437094025, "grad_norm": 0.9123210256606668, "learning_rate": 4.4094382116865704e-07, "loss": 0.1161, "step": 8874 }, { "epoch": 0.817708573271295, "grad_norm": 0.9462605452591462, "learning_rate": 4.4051188039561156e-07, "loss": 0.1294, "step": 8875 }, { "epoch": 0.817800709448565, "grad_norm": 0.9238681176353081, "learning_rate": 4.400801308462591e-07, "loss": 0.1223, "step": 8876 }, { "epoch": 0.817892845625835, "grad_norm": 0.9192630991789277, "learning_rate": 4.396485725606886e-07, "loss": 0.1133, "step": 8877 }, { "epoch": 0.817984981803105, "grad_norm": 0.9292618059590252, "learning_rate": 4.3921720557896953e-07, "loss": 0.1228, "step": 8878 }, { "epoch": 0.818077117980375, "grad_norm": 0.941305059372131, "learning_rate": 4.387860299411553e-07, "loss": 0.1203, "step": 8879 }, { "epoch": 0.818169254157645, "grad_norm": 0.9116056962903435, "learning_rate": 4.383550456872793e-07, "loss": 0.1145, "step": 8880 }, { "epoch": 0.818261390334915, "grad_norm": 0.9624185944825424, "learning_rate": 4.3792425285735935e-07, "loss": 0.1277, "step": 8881 }, { "epoch": 0.818353526512185, "grad_norm": 0.9945802215949294, "learning_rate": 4.3749365149139493e-07, "loss": 0.1229, "step": 8882 }, { "epoch": 0.8184456626894551, "grad_norm": 0.8888004929847416, "learning_rate": 4.3706324162936684e-07, "loss": 0.1073, "step": 8883 }, { "epoch": 0.8185377988667251, "grad_norm": 0.9515796576447026, "learning_rate": 4.3663302331123815e-07, "loss": 0.119, "step": 8884 }, { "epoch": 0.8186299350439951, "grad_norm": 0.9196874221149748, "learning_rate": 4.362029965769554e-07, "loss": 0.1149, "step": 8885 }, { "epoch": 0.8187220712212651, "grad_norm": 0.9462812699661997, "learning_rate": 4.3577316146644677e-07, "loss": 0.1249, "step": 8886 }, { "epoch": 0.818814207398535, "grad_norm": 0.9160239522286485, "learning_rate": 4.353435180196225e-07, "loss": 0.1158, "step": 8887 }, { "epoch": 0.818906343575805, "grad_norm": 0.9343357130198869, "learning_rate": 4.349140662763751e-07, "loss": 0.118, "step": 8888 }, { "epoch": 0.818998479753075, "grad_norm": 0.9273075309006215, "learning_rate": 4.3448480627657804e-07, "loss": 0.1262, "step": 8889 }, { "epoch": 0.819090615930345, "grad_norm": 0.9436024913907296, "learning_rate": 4.3405573806008905e-07, "loss": 0.1236, "step": 8890 }, { "epoch": 0.819182752107615, "grad_norm": 0.9298816928569952, "learning_rate": 4.336268616667477e-07, "loss": 0.1198, "step": 8891 }, { "epoch": 0.819274888284885, "grad_norm": 0.9208677732375474, "learning_rate": 4.3319817713637415e-07, "loss": 0.1159, "step": 8892 }, { "epoch": 0.819367024462155, "grad_norm": 0.9685921400717235, "learning_rate": 4.327696845087728e-07, "loss": 0.1145, "step": 8893 }, { "epoch": 0.819459160639425, "grad_norm": 0.9149193983462024, "learning_rate": 4.323413838237281e-07, "loss": 0.1148, "step": 8894 }, { "epoch": 0.819551296816695, "grad_norm": 0.8670286833052321, "learning_rate": 4.319132751210084e-07, "loss": 0.1044, "step": 8895 }, { "epoch": 0.819643432993965, "grad_norm": 0.9696750526422799, "learning_rate": 4.3148535844036444e-07, "loss": 0.1277, "step": 8896 }, { "epoch": 0.8197355691712351, "grad_norm": 0.905676179841572, "learning_rate": 4.310576338215269e-07, "loss": 0.1042, "step": 8897 }, { "epoch": 0.8198277053485051, "grad_norm": 0.9891893214431671, "learning_rate": 4.3063010130421133e-07, "loss": 0.1137, "step": 8898 }, { "epoch": 0.8199198415257751, "grad_norm": 0.9729248302550854, "learning_rate": 4.302027609281129e-07, "loss": 0.1196, "step": 8899 }, { "epoch": 0.8200119777030451, "grad_norm": 1.0186935346621, "learning_rate": 4.2977561273291166e-07, "loss": 0.1255, "step": 8900 }, { "epoch": 0.8201041138803151, "grad_norm": 0.9395849723041353, "learning_rate": 4.2934865675826666e-07, "loss": 0.1116, "step": 8901 }, { "epoch": 0.8201962500575851, "grad_norm": 0.9359575585273199, "learning_rate": 4.289218930438219e-07, "loss": 0.1194, "step": 8902 }, { "epoch": 0.8202883862348551, "grad_norm": 0.930147304393213, "learning_rate": 4.284953216292029e-07, "loss": 0.1276, "step": 8903 }, { "epoch": 0.8203805224121251, "grad_norm": 0.9492365348281026, "learning_rate": 4.280689425540163e-07, "loss": 0.1178, "step": 8904 }, { "epoch": 0.8204726585893951, "grad_norm": 0.8818809813498917, "learning_rate": 4.2764275585785054e-07, "loss": 0.1096, "step": 8905 }, { "epoch": 0.8205647947666651, "grad_norm": 0.9030730135112734, "learning_rate": 4.27216761580278e-07, "loss": 0.1183, "step": 8906 }, { "epoch": 0.8206569309439351, "grad_norm": 0.9361132602718762, "learning_rate": 4.2679095976085217e-07, "loss": 0.1202, "step": 8907 }, { "epoch": 0.8207490671212051, "grad_norm": 0.9450714386955552, "learning_rate": 4.2636535043910965e-07, "loss": 0.1181, "step": 8908 }, { "epoch": 0.8208412032984751, "grad_norm": 0.9815734649085485, "learning_rate": 4.2593993365456746e-07, "loss": 0.1126, "step": 8909 }, { "epoch": 0.8209333394757452, "grad_norm": 0.9795019317978423, "learning_rate": 4.255147094467249e-07, "loss": 0.1177, "step": 8910 }, { "epoch": 0.8210254756530152, "grad_norm": 0.9814500011535171, "learning_rate": 4.250896778550648e-07, "loss": 0.1262, "step": 8911 }, { "epoch": 0.8211176118302852, "grad_norm": 0.8741343412062761, "learning_rate": 4.246648389190522e-07, "loss": 0.1115, "step": 8912 }, { "epoch": 0.8212097480075552, "grad_norm": 1.001494769248629, "learning_rate": 4.24240192678132e-07, "loss": 0.1262, "step": 8913 }, { "epoch": 0.8213018841848252, "grad_norm": 0.9333485530166209, "learning_rate": 4.23815739171734e-07, "loss": 0.115, "step": 8914 }, { "epoch": 0.8213940203620952, "grad_norm": 0.9608338050250689, "learning_rate": 4.233914784392673e-07, "loss": 0.1298, "step": 8915 }, { "epoch": 0.8214861565393652, "grad_norm": 0.9396240080659503, "learning_rate": 4.229674105201259e-07, "loss": 0.1166, "step": 8916 }, { "epoch": 0.8215782927166352, "grad_norm": 0.9271200896495708, "learning_rate": 4.225435354536833e-07, "loss": 0.116, "step": 8917 }, { "epoch": 0.8216704288939052, "grad_norm": 0.9403145613253563, "learning_rate": 4.221198532792972e-07, "loss": 0.1141, "step": 8918 }, { "epoch": 0.8217625650711752, "grad_norm": 0.8984489683434724, "learning_rate": 4.2169636403630697e-07, "loss": 0.1184, "step": 8919 }, { "epoch": 0.8218547012484452, "grad_norm": 0.9492944717039095, "learning_rate": 4.212730677640328e-07, "loss": 0.1157, "step": 8920 }, { "epoch": 0.8219468374257152, "grad_norm": 0.9388579066857203, "learning_rate": 4.2084996450177744e-07, "loss": 0.12, "step": 8921 }, { "epoch": 0.8220389736029852, "grad_norm": 0.9925571358138545, "learning_rate": 4.20427054288827e-07, "loss": 0.1286, "step": 8922 }, { "epoch": 0.8221311097802552, "grad_norm": 0.9465299652537837, "learning_rate": 4.20004337164448e-07, "loss": 0.1252, "step": 8923 }, { "epoch": 0.8222232459575253, "grad_norm": 0.8823315904062302, "learning_rate": 4.1958181316789084e-07, "loss": 0.1116, "step": 8924 }, { "epoch": 0.8223153821347953, "grad_norm": 0.9751808798700181, "learning_rate": 4.1915948233838625e-07, "loss": 0.1131, "step": 8925 }, { "epoch": 0.8224075183120653, "grad_norm": 0.8996660151380795, "learning_rate": 4.1873734471514685e-07, "loss": 0.119, "step": 8926 }, { "epoch": 0.8224996544893353, "grad_norm": 0.9027774874768812, "learning_rate": 4.1831540033736935e-07, "loss": 0.1098, "step": 8927 }, { "epoch": 0.8225917906666053, "grad_norm": 0.9123413341572139, "learning_rate": 4.1789364924423067e-07, "loss": 0.1201, "step": 8928 }, { "epoch": 0.8226839268438753, "grad_norm": 0.9650092844823791, "learning_rate": 4.174720914748914e-07, "loss": 0.1212, "step": 8929 }, { "epoch": 0.8227760630211453, "grad_norm": 0.9097635241979137, "learning_rate": 4.1705072706849287e-07, "loss": 0.1146, "step": 8930 }, { "epoch": 0.8228681991984153, "grad_norm": 0.9642879397283781, "learning_rate": 4.166295560641576e-07, "loss": 0.1214, "step": 8931 }, { "epoch": 0.8229603353756852, "grad_norm": 0.9629087714634937, "learning_rate": 4.1620857850099227e-07, "loss": 0.1155, "step": 8932 }, { "epoch": 0.8230524715529552, "grad_norm": 0.9983273086133829, "learning_rate": 4.157877944180852e-07, "loss": 0.1293, "step": 8933 }, { "epoch": 0.8231446077302252, "grad_norm": 0.9935451639675307, "learning_rate": 4.153672038545054e-07, "loss": 0.128, "step": 8934 }, { "epoch": 0.8232367439074952, "grad_norm": 1.061281927988124, "learning_rate": 4.1494680684930485e-07, "loss": 0.1413, "step": 8935 }, { "epoch": 0.8233288800847652, "grad_norm": 0.9332443265144761, "learning_rate": 4.1452660344151826e-07, "loss": 0.1247, "step": 8936 }, { "epoch": 0.8234210162620353, "grad_norm": 0.8874742579698003, "learning_rate": 4.141065936701613e-07, "loss": 0.1045, "step": 8937 }, { "epoch": 0.8235131524393053, "grad_norm": 0.994749623574586, "learning_rate": 4.1368677757423064e-07, "loss": 0.1285, "step": 8938 }, { "epoch": 0.8236052886165753, "grad_norm": 0.954874028724842, "learning_rate": 4.1326715519270725e-07, "loss": 0.1302, "step": 8939 }, { "epoch": 0.8236974247938453, "grad_norm": 0.9643041528254239, "learning_rate": 4.1284772656455334e-07, "loss": 0.1276, "step": 8940 }, { "epoch": 0.8237895609711153, "grad_norm": 0.9820897900287507, "learning_rate": 4.12428491728713e-07, "loss": 0.1341, "step": 8941 }, { "epoch": 0.8238816971483853, "grad_norm": 0.9548532478634758, "learning_rate": 4.1200945072411207e-07, "loss": 0.121, "step": 8942 }, { "epoch": 0.8239738333256553, "grad_norm": 0.9437093357718186, "learning_rate": 4.1159060358965745e-07, "loss": 0.1326, "step": 8943 }, { "epoch": 0.8240659695029253, "grad_norm": 0.9282509901707379, "learning_rate": 4.111719503642403e-07, "loss": 0.1183, "step": 8944 }, { "epoch": 0.8241581056801953, "grad_norm": 0.971786827764824, "learning_rate": 4.1075349108673306e-07, "loss": 0.1355, "step": 8945 }, { "epoch": 0.8242502418574653, "grad_norm": 0.9875630667869919, "learning_rate": 4.1033522579598804e-07, "loss": 0.1413, "step": 8946 }, { "epoch": 0.8243423780347353, "grad_norm": 0.9528042142165237, "learning_rate": 4.0991715453084307e-07, "loss": 0.1146, "step": 8947 }, { "epoch": 0.8244345142120053, "grad_norm": 0.9010012156168963, "learning_rate": 4.0949927733011455e-07, "loss": 0.1172, "step": 8948 }, { "epoch": 0.8245266503892753, "grad_norm": 0.90929624679404, "learning_rate": 4.0908159423260374e-07, "loss": 0.1142, "step": 8949 }, { "epoch": 0.8246187865665453, "grad_norm": 0.943372601274594, "learning_rate": 4.086641052770915e-07, "loss": 0.1162, "step": 8950 }, { "epoch": 0.8247109227438154, "grad_norm": 0.9659695477866573, "learning_rate": 4.082468105023418e-07, "loss": 0.1209, "step": 8951 }, { "epoch": 0.8248030589210854, "grad_norm": 0.8942946250102998, "learning_rate": 4.078297099471018e-07, "loss": 0.1143, "step": 8952 }, { "epoch": 0.8248951950983554, "grad_norm": 0.9233344249672104, "learning_rate": 4.0741280365009765e-07, "loss": 0.1254, "step": 8953 }, { "epoch": 0.8249873312756254, "grad_norm": 0.9404317994762078, "learning_rate": 4.069960916500404e-07, "loss": 0.118, "step": 8954 }, { "epoch": 0.8250794674528954, "grad_norm": 0.9542843518189766, "learning_rate": 4.065795739856207e-07, "loss": 0.1236, "step": 8955 }, { "epoch": 0.8251716036301654, "grad_norm": 0.8768924661820494, "learning_rate": 4.0616325069551296e-07, "loss": 0.1026, "step": 8956 }, { "epoch": 0.8252637398074354, "grad_norm": 0.910496711637403, "learning_rate": 4.057471218183734e-07, "loss": 0.1185, "step": 8957 }, { "epoch": 0.8253558759847054, "grad_norm": 0.9617153732562939, "learning_rate": 4.0533118739283864e-07, "loss": 0.1287, "step": 8958 }, { "epoch": 0.8254480121619754, "grad_norm": 0.9206590523649169, "learning_rate": 4.049154474575284e-07, "loss": 0.1096, "step": 8959 }, { "epoch": 0.8255401483392454, "grad_norm": 0.9911337506691006, "learning_rate": 4.04499902051044e-07, "loss": 0.1254, "step": 8960 }, { "epoch": 0.8256322845165154, "grad_norm": 0.9393601602299094, "learning_rate": 4.0408455121196957e-07, "loss": 0.1216, "step": 8961 }, { "epoch": 0.8257244206937854, "grad_norm": 0.90987307073812, "learning_rate": 4.0366939497887033e-07, "loss": 0.1069, "step": 8962 }, { "epoch": 0.8258165568710554, "grad_norm": 0.9690470990393921, "learning_rate": 4.032544333902935e-07, "loss": 0.1267, "step": 8963 }, { "epoch": 0.8259086930483254, "grad_norm": 0.9604445922242358, "learning_rate": 4.028396664847678e-07, "loss": 0.1182, "step": 8964 }, { "epoch": 0.8260008292255955, "grad_norm": 0.9183917505789629, "learning_rate": 4.0242509430080456e-07, "loss": 0.1126, "step": 8965 }, { "epoch": 0.8260929654028655, "grad_norm": 1.0160636850060218, "learning_rate": 4.0201071687689746e-07, "loss": 0.1324, "step": 8966 }, { "epoch": 0.8261851015801355, "grad_norm": 0.9045916383232453, "learning_rate": 4.0159653425152074e-07, "loss": 0.1165, "step": 8967 }, { "epoch": 0.8262772377574055, "grad_norm": 0.9083173799399996, "learning_rate": 4.011825464631322e-07, "loss": 0.1046, "step": 8968 }, { "epoch": 0.8263693739346755, "grad_norm": 0.9147366746741465, "learning_rate": 4.0076875355016975e-07, "loss": 0.113, "step": 8969 }, { "epoch": 0.8264615101119455, "grad_norm": 0.9910503681642115, "learning_rate": 4.003551555510549e-07, "loss": 0.1175, "step": 8970 }, { "epoch": 0.8265536462892155, "grad_norm": 1.0082243762021406, "learning_rate": 3.99941752504189e-07, "loss": 0.1273, "step": 8971 }, { "epoch": 0.8266457824664855, "grad_norm": 0.9544300821805285, "learning_rate": 3.995285444479574e-07, "loss": 0.1277, "step": 8972 }, { "epoch": 0.8267379186437555, "grad_norm": 0.9873879835090933, "learning_rate": 3.9911553142072733e-07, "loss": 0.1156, "step": 8973 }, { "epoch": 0.8268300548210255, "grad_norm": 1.0143048636164989, "learning_rate": 3.987027134608462e-07, "loss": 0.1268, "step": 8974 }, { "epoch": 0.8269221909982954, "grad_norm": 0.9406111544099246, "learning_rate": 3.9829009060664363e-07, "loss": 0.1246, "step": 8975 }, { "epoch": 0.8270143271755654, "grad_norm": 0.9211034325729525, "learning_rate": 3.9787766289643233e-07, "loss": 0.1066, "step": 8976 }, { "epoch": 0.8271064633528354, "grad_norm": 0.9369902884160378, "learning_rate": 3.974654303685063e-07, "loss": 0.1135, "step": 8977 }, { "epoch": 0.8271985995301055, "grad_norm": 0.9431563970089338, "learning_rate": 3.97053393061142e-07, "loss": 0.111, "step": 8978 }, { "epoch": 0.8272907357073755, "grad_norm": 0.9293997578153168, "learning_rate": 3.966415510125965e-07, "loss": 0.1093, "step": 8979 }, { "epoch": 0.8273828718846455, "grad_norm": 0.9676726803687037, "learning_rate": 3.9622990426110867e-07, "loss": 0.1204, "step": 8980 }, { "epoch": 0.8274750080619155, "grad_norm": 0.9486005141441863, "learning_rate": 3.958184528449005e-07, "loss": 0.1204, "step": 8981 }, { "epoch": 0.8275671442391855, "grad_norm": 0.9918274525746862, "learning_rate": 3.954071968021755e-07, "loss": 0.127, "step": 8982 }, { "epoch": 0.8276592804164555, "grad_norm": 0.9000784607704694, "learning_rate": 3.9499613617111965e-07, "loss": 0.1071, "step": 8983 }, { "epoch": 0.8277514165937255, "grad_norm": 0.954022230403387, "learning_rate": 3.945852709898987e-07, "loss": 0.1233, "step": 8984 }, { "epoch": 0.8278435527709955, "grad_norm": 1.0309117530762104, "learning_rate": 3.941746012966616e-07, "loss": 0.1155, "step": 8985 }, { "epoch": 0.8279356889482655, "grad_norm": 0.9673615839155028, "learning_rate": 3.937641271295392e-07, "loss": 0.1245, "step": 8986 }, { "epoch": 0.8280278251255355, "grad_norm": 0.996234111747313, "learning_rate": 3.93353848526645e-07, "loss": 0.1329, "step": 8987 }, { "epoch": 0.8281199613028055, "grad_norm": 0.9257195359719688, "learning_rate": 3.9294376552607233e-07, "loss": 0.1149, "step": 8988 }, { "epoch": 0.8282120974800755, "grad_norm": 0.8982916316625367, "learning_rate": 3.92533878165898e-07, "loss": 0.1091, "step": 8989 }, { "epoch": 0.8283042336573455, "grad_norm": 0.9789169488412091, "learning_rate": 3.921241864841793e-07, "loss": 0.1123, "step": 8990 }, { "epoch": 0.8283963698346155, "grad_norm": 0.9511169814629293, "learning_rate": 3.917146905189576e-07, "loss": 0.1122, "step": 8991 }, { "epoch": 0.8284885060118856, "grad_norm": 0.9369304152077549, "learning_rate": 3.913053903082531e-07, "loss": 0.1155, "step": 8992 }, { "epoch": 0.8285806421891556, "grad_norm": 0.9261575552816484, "learning_rate": 3.9089628589007e-07, "loss": 0.1204, "step": 8993 }, { "epoch": 0.8286727783664256, "grad_norm": 0.9719530306024485, "learning_rate": 3.9048737730239427e-07, "loss": 0.1287, "step": 8994 }, { "epoch": 0.8287649145436956, "grad_norm": 0.9022992796256752, "learning_rate": 3.9007866458319275e-07, "loss": 0.1032, "step": 8995 }, { "epoch": 0.8288570507209656, "grad_norm": 0.9154891131247639, "learning_rate": 3.8967014777041344e-07, "loss": 0.1183, "step": 8996 }, { "epoch": 0.8289491868982356, "grad_norm": 0.9790283403390899, "learning_rate": 3.892618269019882e-07, "loss": 0.12, "step": 8997 }, { "epoch": 0.8290413230755056, "grad_norm": 0.8989171769636702, "learning_rate": 3.888537020158295e-07, "loss": 0.1162, "step": 8998 }, { "epoch": 0.8291334592527756, "grad_norm": 0.9513898800003195, "learning_rate": 3.8844577314983254e-07, "loss": 0.1206, "step": 8999 }, { "epoch": 0.8292255954300456, "grad_norm": 0.8928187592109958, "learning_rate": 3.8803804034187235e-07, "loss": 0.1147, "step": 9000 }, { "epoch": 0.8292255954300456, "eval_loss": 0.11866238713264465, "eval_runtime": 298.9938, "eval_samples_per_second": 23.469, "eval_steps_per_second": 2.937, "step": 9000 }, { "epoch": 0.8293177316073156, "grad_norm": 0.8963570408818765, "learning_rate": 3.8763050362980723e-07, "loss": 0.1019, "step": 9001 }, { "epoch": 0.8294098677845856, "grad_norm": 0.9809451351037467, "learning_rate": 3.8722316305147693e-07, "loss": 0.1274, "step": 9002 }, { "epoch": 0.8295020039618556, "grad_norm": 0.8795144198454591, "learning_rate": 3.8681601864470396e-07, "loss": 0.1017, "step": 9003 }, { "epoch": 0.8295941401391256, "grad_norm": 0.9827458108554684, "learning_rate": 3.864090704472906e-07, "loss": 0.1246, "step": 9004 }, { "epoch": 0.8296862763163957, "grad_norm": 0.9325877389996922, "learning_rate": 3.86002318497023e-07, "loss": 0.118, "step": 9005 }, { "epoch": 0.8297784124936657, "grad_norm": 0.9312298769357221, "learning_rate": 3.855957628316673e-07, "loss": 0.1339, "step": 9006 }, { "epoch": 0.8298705486709357, "grad_norm": 0.9946128464419297, "learning_rate": 3.8518940348897277e-07, "loss": 0.1323, "step": 9007 }, { "epoch": 0.8299626848482057, "grad_norm": 0.9313704014210649, "learning_rate": 3.8478324050666926e-07, "loss": 0.1181, "step": 9008 }, { "epoch": 0.8300548210254757, "grad_norm": 0.9198770011959588, "learning_rate": 3.8437727392246966e-07, "loss": 0.1142, "step": 9009 }, { "epoch": 0.8301469572027457, "grad_norm": 0.9180412820101661, "learning_rate": 3.839715037740677e-07, "loss": 0.1252, "step": 9010 }, { "epoch": 0.8302390933800157, "grad_norm": 0.9675721326632333, "learning_rate": 3.835659300991401e-07, "loss": 0.1252, "step": 9011 }, { "epoch": 0.8303312295572857, "grad_norm": 0.9748217354158463, "learning_rate": 3.8316055293534353e-07, "loss": 0.1179, "step": 9012 }, { "epoch": 0.8304233657345557, "grad_norm": 0.8849626621010777, "learning_rate": 3.82755372320317e-07, "loss": 0.1157, "step": 9013 }, { "epoch": 0.8305155019118257, "grad_norm": 0.9417758302521162, "learning_rate": 3.823503882916818e-07, "loss": 0.1145, "step": 9014 }, { "epoch": 0.8306076380890957, "grad_norm": 0.9352672162324204, "learning_rate": 3.819456008870412e-07, "loss": 0.1112, "step": 9015 }, { "epoch": 0.8306997742663657, "grad_norm": 0.9546735786454992, "learning_rate": 3.815410101439798e-07, "loss": 0.124, "step": 9016 }, { "epoch": 0.8307919104436357, "grad_norm": 0.8982406751220058, "learning_rate": 3.8113661610006375e-07, "loss": 0.108, "step": 9017 }, { "epoch": 0.8308840466209056, "grad_norm": 0.936872634455003, "learning_rate": 3.8073241879284045e-07, "loss": 0.125, "step": 9018 }, { "epoch": 0.8309761827981758, "grad_norm": 0.9029042550032307, "learning_rate": 3.803284182598399e-07, "loss": 0.111, "step": 9019 }, { "epoch": 0.8310683189754458, "grad_norm": 0.9279239531114798, "learning_rate": 3.799246145385746e-07, "loss": 0.1183, "step": 9020 }, { "epoch": 0.8311604551527157, "grad_norm": 0.9001601906556236, "learning_rate": 3.795210076665362e-07, "loss": 0.1062, "step": 9021 }, { "epoch": 0.8312525913299857, "grad_norm": 0.9096130257836612, "learning_rate": 3.791175976812014e-07, "loss": 0.1121, "step": 9022 }, { "epoch": 0.8313447275072557, "grad_norm": 0.9841772012761711, "learning_rate": 3.78714384620025e-07, "loss": 0.1291, "step": 9023 }, { "epoch": 0.8314368636845257, "grad_norm": 0.978523579768035, "learning_rate": 3.7831136852044705e-07, "loss": 0.1217, "step": 9024 }, { "epoch": 0.8315289998617957, "grad_norm": 0.9247283536985508, "learning_rate": 3.7790854941988615e-07, "loss": 0.1077, "step": 9025 }, { "epoch": 0.8316211360390657, "grad_norm": 0.9377240447676977, "learning_rate": 3.7750592735574494e-07, "loss": 0.1153, "step": 9026 }, { "epoch": 0.8317132722163357, "grad_norm": 0.9592075723655732, "learning_rate": 3.7710350236540737e-07, "loss": 0.1163, "step": 9027 }, { "epoch": 0.8318054083936057, "grad_norm": 0.9618403112391543, "learning_rate": 3.7670127448623804e-07, "loss": 0.1261, "step": 9028 }, { "epoch": 0.8318975445708757, "grad_norm": 0.9207309046891926, "learning_rate": 3.7629924375558347e-07, "loss": 0.1142, "step": 9029 }, { "epoch": 0.8319896807481457, "grad_norm": 0.9393609681547822, "learning_rate": 3.7589741021077234e-07, "loss": 0.1193, "step": 9030 }, { "epoch": 0.8320818169254157, "grad_norm": 0.9288187420162773, "learning_rate": 3.7549577388911546e-07, "loss": 0.1123, "step": 9031 }, { "epoch": 0.8321739531026857, "grad_norm": 0.9124189814585899, "learning_rate": 3.7509433482790515e-07, "loss": 0.11, "step": 9032 }, { "epoch": 0.8322660892799558, "grad_norm": 0.9163760756223935, "learning_rate": 3.7469309306441466e-07, "loss": 0.1089, "step": 9033 }, { "epoch": 0.8323582254572258, "grad_norm": 1.020152579275757, "learning_rate": 3.742920486358986e-07, "loss": 0.1325, "step": 9034 }, { "epoch": 0.8324503616344958, "grad_norm": 0.9058071226707416, "learning_rate": 3.738912015795945e-07, "loss": 0.1124, "step": 9035 }, { "epoch": 0.8325424978117658, "grad_norm": 0.9111574495433679, "learning_rate": 3.734905519327217e-07, "loss": 0.1197, "step": 9036 }, { "epoch": 0.8326346339890358, "grad_norm": 0.9817191595487917, "learning_rate": 3.7309009973247963e-07, "loss": 0.1235, "step": 9037 }, { "epoch": 0.8327267701663058, "grad_norm": 0.8820936392317466, "learning_rate": 3.72689845016051e-07, "loss": 0.1057, "step": 9038 }, { "epoch": 0.8328189063435758, "grad_norm": 0.9080842498257791, "learning_rate": 3.722897878205989e-07, "loss": 0.1238, "step": 9039 }, { "epoch": 0.8329110425208458, "grad_norm": 0.9689835563120064, "learning_rate": 3.718899281832686e-07, "loss": 0.1158, "step": 9040 }, { "epoch": 0.8330031786981158, "grad_norm": 0.9041628190128206, "learning_rate": 3.7149026614118844e-07, "loss": 0.1161, "step": 9041 }, { "epoch": 0.8330953148753858, "grad_norm": 0.8889635923127017, "learning_rate": 3.7109080173146543e-07, "loss": 0.1125, "step": 9042 }, { "epoch": 0.8331874510526558, "grad_norm": 0.9497760569923801, "learning_rate": 3.7069153499119134e-07, "loss": 0.1229, "step": 9043 }, { "epoch": 0.8332795872299258, "grad_norm": 0.9338812497801194, "learning_rate": 3.7029246595743666e-07, "loss": 0.1178, "step": 9044 }, { "epoch": 0.8333717234071958, "grad_norm": 0.8848813835864621, "learning_rate": 3.6989359466725603e-07, "loss": 0.109, "step": 9045 }, { "epoch": 0.8334638595844659, "grad_norm": 0.8906596859436642, "learning_rate": 3.6949492115768425e-07, "loss": 0.1136, "step": 9046 }, { "epoch": 0.8335559957617359, "grad_norm": 0.926402940035174, "learning_rate": 3.6909644546573806e-07, "loss": 0.1124, "step": 9047 }, { "epoch": 0.8336481319390059, "grad_norm": 0.9310849307554305, "learning_rate": 3.686981676284171e-07, "loss": 0.1116, "step": 9048 }, { "epoch": 0.8337402681162759, "grad_norm": 0.9872574472717625, "learning_rate": 3.6830008768270033e-07, "loss": 0.1274, "step": 9049 }, { "epoch": 0.8338324042935459, "grad_norm": 0.9384635786716498, "learning_rate": 3.679022056655493e-07, "loss": 0.115, "step": 9050 }, { "epoch": 0.8339245404708159, "grad_norm": 0.950804946256567, "learning_rate": 3.675045216139081e-07, "loss": 0.1113, "step": 9051 }, { "epoch": 0.8340166766480859, "grad_norm": 0.9885673160133664, "learning_rate": 3.6710703556470136e-07, "loss": 0.1242, "step": 9052 }, { "epoch": 0.8341088128253559, "grad_norm": 0.9689834654878836, "learning_rate": 3.667097475548367e-07, "loss": 0.1243, "step": 9053 }, { "epoch": 0.8342009490026259, "grad_norm": 0.9672726322538227, "learning_rate": 3.663126576212014e-07, "loss": 0.1226, "step": 9054 }, { "epoch": 0.8342930851798959, "grad_norm": 0.9226458262636499, "learning_rate": 3.659157658006651e-07, "loss": 0.1171, "step": 9055 }, { "epoch": 0.8343852213571659, "grad_norm": 0.9283165671065761, "learning_rate": 3.655190721300794e-07, "loss": 0.1175, "step": 9056 }, { "epoch": 0.8344773575344359, "grad_norm": 0.892800777812627, "learning_rate": 3.651225766462782e-07, "loss": 0.1133, "step": 9057 }, { "epoch": 0.8345694937117059, "grad_norm": 1.0480032457880637, "learning_rate": 3.647262793860751e-07, "loss": 0.1168, "step": 9058 }, { "epoch": 0.8346616298889759, "grad_norm": 0.9495060160798997, "learning_rate": 3.643301803862673e-07, "loss": 0.1251, "step": 9059 }, { "epoch": 0.834753766066246, "grad_norm": 0.9767279275355083, "learning_rate": 3.639342796836312e-07, "loss": 0.1249, "step": 9060 }, { "epoch": 0.834845902243516, "grad_norm": 0.956717822836148, "learning_rate": 3.63538577314928e-07, "loss": 0.1167, "step": 9061 }, { "epoch": 0.834938038420786, "grad_norm": 0.8805632595773724, "learning_rate": 3.6314307331689725e-07, "loss": 0.1089, "step": 9062 }, { "epoch": 0.835030174598056, "grad_norm": 0.9608933523035653, "learning_rate": 3.6274776772626197e-07, "loss": 0.1173, "step": 9063 }, { "epoch": 0.835122310775326, "grad_norm": 0.9490880216010513, "learning_rate": 3.6235266057972727e-07, "loss": 0.1236, "step": 9064 }, { "epoch": 0.835214446952596, "grad_norm": 0.9741016505813996, "learning_rate": 3.6195775191397776e-07, "loss": 0.1312, "step": 9065 }, { "epoch": 0.8353065831298659, "grad_norm": 1.061742094019043, "learning_rate": 3.615630417656807e-07, "loss": 0.1333, "step": 9066 }, { "epoch": 0.8353987193071359, "grad_norm": 0.9404116017239663, "learning_rate": 3.611685301714854e-07, "loss": 0.1159, "step": 9067 }, { "epoch": 0.8354908554844059, "grad_norm": 0.948652738305397, "learning_rate": 3.607742171680223e-07, "loss": 0.1156, "step": 9068 }, { "epoch": 0.8355829916616759, "grad_norm": 1.0125976280025968, "learning_rate": 3.6038010279190376e-07, "loss": 0.136, "step": 9069 }, { "epoch": 0.8356751278389459, "grad_norm": 0.8919477550099669, "learning_rate": 3.5998618707972303e-07, "loss": 0.1076, "step": 9070 }, { "epoch": 0.8357672640162159, "grad_norm": 0.9215454245045038, "learning_rate": 3.5959247006805446e-07, "loss": 0.1148, "step": 9071 }, { "epoch": 0.8358594001934859, "grad_norm": 0.9527340209068846, "learning_rate": 3.5919895179345546e-07, "loss": 0.1193, "step": 9072 }, { "epoch": 0.835951536370756, "grad_norm": 0.964974428331, "learning_rate": 3.5880563229246434e-07, "loss": 0.1248, "step": 9073 }, { "epoch": 0.836043672548026, "grad_norm": 0.9511935127041369, "learning_rate": 3.58412511601601e-07, "loss": 0.1213, "step": 9074 }, { "epoch": 0.836135808725296, "grad_norm": 0.9306185915223708, "learning_rate": 3.5801958975736653e-07, "loss": 0.1158, "step": 9075 }, { "epoch": 0.836227944902566, "grad_norm": 0.8972295969668357, "learning_rate": 3.576268667962432e-07, "loss": 0.1121, "step": 9076 }, { "epoch": 0.836320081079836, "grad_norm": 0.9471002321711262, "learning_rate": 3.5723434275469593e-07, "loss": 0.1185, "step": 9077 }, { "epoch": 0.836412217257106, "grad_norm": 0.9437730017790239, "learning_rate": 3.5684201766917087e-07, "loss": 0.1203, "step": 9078 }, { "epoch": 0.836504353434376, "grad_norm": 0.9323885982962973, "learning_rate": 3.564498915760947e-07, "loss": 0.1246, "step": 9079 }, { "epoch": 0.836596489611646, "grad_norm": 0.9202495340206708, "learning_rate": 3.560579645118775e-07, "loss": 0.1123, "step": 9080 }, { "epoch": 0.836688625788916, "grad_norm": 0.9643478363526602, "learning_rate": 3.556662365129082e-07, "loss": 0.1242, "step": 9081 }, { "epoch": 0.836780761966186, "grad_norm": 0.9643206560127056, "learning_rate": 3.5527470761556024e-07, "loss": 0.1313, "step": 9082 }, { "epoch": 0.836872898143456, "grad_norm": 0.9966865526476313, "learning_rate": 3.5488337785618607e-07, "loss": 0.1277, "step": 9083 }, { "epoch": 0.836965034320726, "grad_norm": 0.9510760700309292, "learning_rate": 3.5449224727112094e-07, "loss": 0.1239, "step": 9084 }, { "epoch": 0.837057170497996, "grad_norm": 0.937440293224199, "learning_rate": 3.541013158966816e-07, "loss": 0.1185, "step": 9085 }, { "epoch": 0.837149306675266, "grad_norm": 0.9609619870780663, "learning_rate": 3.5371058376916733e-07, "loss": 0.1136, "step": 9086 }, { "epoch": 0.8372414428525361, "grad_norm": 0.9648051774960312, "learning_rate": 3.5332005092485496e-07, "loss": 0.1196, "step": 9087 }, { "epoch": 0.8373335790298061, "grad_norm": 0.9183894976690667, "learning_rate": 3.529297174000071e-07, "loss": 0.1114, "step": 9088 }, { "epoch": 0.8374257152070761, "grad_norm": 0.9582901426501818, "learning_rate": 3.525395832308659e-07, "loss": 0.114, "step": 9089 }, { "epoch": 0.8375178513843461, "grad_norm": 0.9000415692013729, "learning_rate": 3.52149648453656e-07, "loss": 0.117, "step": 9090 }, { "epoch": 0.8376099875616161, "grad_norm": 0.9444437649158218, "learning_rate": 3.5175991310458207e-07, "loss": 0.1179, "step": 9091 }, { "epoch": 0.8377021237388861, "grad_norm": 0.941489120512911, "learning_rate": 3.513703772198318e-07, "loss": 0.1279, "step": 9092 }, { "epoch": 0.8377942599161561, "grad_norm": 0.9868011195136057, "learning_rate": 3.5098104083557264e-07, "loss": 0.1223, "step": 9093 }, { "epoch": 0.8378863960934261, "grad_norm": 0.8705477423458002, "learning_rate": 3.5059190398795603e-07, "loss": 0.1046, "step": 9094 }, { "epoch": 0.8379785322706961, "grad_norm": 0.9114211371590364, "learning_rate": 3.5020296671311154e-07, "loss": 0.1206, "step": 9095 }, { "epoch": 0.8380706684479661, "grad_norm": 0.8950724214900556, "learning_rate": 3.498142290471529e-07, "loss": 0.113, "step": 9096 }, { "epoch": 0.8381628046252361, "grad_norm": 0.9521022208521928, "learning_rate": 3.4942569102617534e-07, "loss": 0.1135, "step": 9097 }, { "epoch": 0.8382549408025061, "grad_norm": 0.8837117200628214, "learning_rate": 3.490373526862531e-07, "loss": 0.1017, "step": 9098 }, { "epoch": 0.8383470769797761, "grad_norm": 0.8962134046824323, "learning_rate": 3.4864921406344483e-07, "loss": 0.1135, "step": 9099 }, { "epoch": 0.8384392131570461, "grad_norm": 0.8887076436299933, "learning_rate": 3.482612751937878e-07, "loss": 0.1116, "step": 9100 }, { "epoch": 0.8385313493343162, "grad_norm": 0.9453219724054187, "learning_rate": 3.4787353611330317e-07, "loss": 0.1188, "step": 9101 }, { "epoch": 0.8386234855115862, "grad_norm": 0.9381143409167717, "learning_rate": 3.4748599685799274e-07, "loss": 0.1167, "step": 9102 }, { "epoch": 0.8387156216888562, "grad_norm": 0.9600988951205893, "learning_rate": 3.470986574638391e-07, "loss": 0.1087, "step": 9103 }, { "epoch": 0.8388077578661262, "grad_norm": 0.984011588082167, "learning_rate": 3.4671151796680654e-07, "loss": 0.1325, "step": 9104 }, { "epoch": 0.8388998940433962, "grad_norm": 0.9611615555873271, "learning_rate": 3.46324578402841e-07, "loss": 0.1305, "step": 9105 }, { "epoch": 0.8389920302206662, "grad_norm": 0.9586580145664007, "learning_rate": 3.459378388078702e-07, "loss": 0.1225, "step": 9106 }, { "epoch": 0.8390841663979361, "grad_norm": 0.9436411042590699, "learning_rate": 3.4555129921780337e-07, "loss": 0.117, "step": 9107 }, { "epoch": 0.8391763025752061, "grad_norm": 0.9925703690511307, "learning_rate": 3.4516495966853054e-07, "loss": 0.1177, "step": 9108 }, { "epoch": 0.8392684387524761, "grad_norm": 0.9548357208284326, "learning_rate": 3.447788201959223e-07, "loss": 0.1247, "step": 9109 }, { "epoch": 0.8393605749297461, "grad_norm": 0.9972576838402836, "learning_rate": 3.443928808358327e-07, "loss": 0.1367, "step": 9110 }, { "epoch": 0.8394527111070161, "grad_norm": 0.923211368304357, "learning_rate": 3.4400714162409644e-07, "loss": 0.1182, "step": 9111 }, { "epoch": 0.8395448472842861, "grad_norm": 0.9053249971071133, "learning_rate": 3.4362160259652887e-07, "loss": 0.109, "step": 9112 }, { "epoch": 0.8396369834615561, "grad_norm": 1.02878674745858, "learning_rate": 3.4323626378892775e-07, "loss": 0.1369, "step": 9113 }, { "epoch": 0.8397291196388262, "grad_norm": 0.8885051736781546, "learning_rate": 3.4285112523707143e-07, "loss": 0.1153, "step": 9114 }, { "epoch": 0.8398212558160962, "grad_norm": 0.9073801662881984, "learning_rate": 3.424661869767207e-07, "loss": 0.1089, "step": 9115 }, { "epoch": 0.8399133919933662, "grad_norm": 0.928370265115947, "learning_rate": 3.4208144904361613e-07, "loss": 0.1121, "step": 9116 }, { "epoch": 0.8400055281706362, "grad_norm": 0.9212881770336953, "learning_rate": 3.4169691147348137e-07, "loss": 0.1259, "step": 9117 }, { "epoch": 0.8400976643479062, "grad_norm": 0.9788684272042728, "learning_rate": 3.4131257430202124e-07, "loss": 0.1266, "step": 9118 }, { "epoch": 0.8401898005251762, "grad_norm": 0.9755763215915222, "learning_rate": 3.4092843756492075e-07, "loss": 0.1256, "step": 9119 }, { "epoch": 0.8402819367024462, "grad_norm": 0.9751484992123327, "learning_rate": 3.405445012978467e-07, "loss": 0.1343, "step": 9120 }, { "epoch": 0.8403740728797162, "grad_norm": 1.013270465324915, "learning_rate": 3.4016076553644806e-07, "loss": 0.1311, "step": 9121 }, { "epoch": 0.8404662090569862, "grad_norm": 0.9903544054884075, "learning_rate": 3.397772303163549e-07, "loss": 0.1291, "step": 9122 }, { "epoch": 0.8405583452342562, "grad_norm": 1.0099661007009835, "learning_rate": 3.3939389567317876e-07, "loss": 0.1196, "step": 9123 }, { "epoch": 0.8406504814115262, "grad_norm": 0.9572207364409202, "learning_rate": 3.39010761642512e-07, "loss": 0.1328, "step": 9124 }, { "epoch": 0.8407426175887962, "grad_norm": 0.9433063660527777, "learning_rate": 3.386278282599281e-07, "loss": 0.1244, "step": 9125 }, { "epoch": 0.8408347537660662, "grad_norm": 0.9561219271919276, "learning_rate": 3.3824509556098314e-07, "loss": 0.1117, "step": 9126 }, { "epoch": 0.8409268899433362, "grad_norm": 0.9526955884568686, "learning_rate": 3.3786256358121327e-07, "loss": 0.1171, "step": 9127 }, { "epoch": 0.8410190261206063, "grad_norm": 0.9421580524280039, "learning_rate": 3.37480232356138e-07, "loss": 0.1185, "step": 9128 }, { "epoch": 0.8411111622978763, "grad_norm": 0.9007740123953568, "learning_rate": 3.3709810192125576e-07, "loss": 0.1124, "step": 9129 }, { "epoch": 0.8412032984751463, "grad_norm": 0.9257483251297324, "learning_rate": 3.3671617231204683e-07, "loss": 0.1231, "step": 9130 }, { "epoch": 0.8412954346524163, "grad_norm": 0.9196705659888632, "learning_rate": 3.3633444356397425e-07, "loss": 0.1145, "step": 9131 }, { "epoch": 0.8413875708296863, "grad_norm": 0.9501560375264937, "learning_rate": 3.3595291571248216e-07, "loss": 0.1174, "step": 9132 }, { "epoch": 0.8414797070069563, "grad_norm": 0.9529617492932547, "learning_rate": 3.355715887929939e-07, "loss": 0.1235, "step": 9133 }, { "epoch": 0.8415718431842263, "grad_norm": 0.9237901196829995, "learning_rate": 3.351904628409172e-07, "loss": 0.1113, "step": 9134 }, { "epoch": 0.8416639793614963, "grad_norm": 0.9690381039536827, "learning_rate": 3.348095378916386e-07, "loss": 0.1195, "step": 9135 }, { "epoch": 0.8417561155387663, "grad_norm": 0.9498145803212865, "learning_rate": 3.34428813980528e-07, "loss": 0.1163, "step": 9136 }, { "epoch": 0.8418482517160363, "grad_norm": 0.8832640227036879, "learning_rate": 3.3404829114293437e-07, "loss": 0.1051, "step": 9137 }, { "epoch": 0.8419403878933063, "grad_norm": 0.8932009094651336, "learning_rate": 3.336679694141898e-07, "loss": 0.1023, "step": 9138 }, { "epoch": 0.8420325240705763, "grad_norm": 0.9251539366264352, "learning_rate": 3.3328784882960817e-07, "loss": 0.1073, "step": 9139 }, { "epoch": 0.8421246602478463, "grad_norm": 0.9107574392909054, "learning_rate": 3.3290792942448303e-07, "loss": 0.1217, "step": 9140 }, { "epoch": 0.8422167964251164, "grad_norm": 0.9267056836585295, "learning_rate": 3.325282112340894e-07, "loss": 0.1176, "step": 9141 }, { "epoch": 0.8423089326023864, "grad_norm": 0.9393315446042251, "learning_rate": 3.321486942936844e-07, "loss": 0.123, "step": 9142 }, { "epoch": 0.8424010687796564, "grad_norm": 0.9063891028187058, "learning_rate": 3.317693786385065e-07, "loss": 0.1099, "step": 9143 }, { "epoch": 0.8424932049569264, "grad_norm": 0.9198452193152991, "learning_rate": 3.3139026430377583e-07, "loss": 0.1144, "step": 9144 }, { "epoch": 0.8425853411341964, "grad_norm": 0.9107477136619364, "learning_rate": 3.3101135132469237e-07, "loss": 0.1102, "step": 9145 }, { "epoch": 0.8426774773114664, "grad_norm": 0.9865477556790888, "learning_rate": 3.306326397364379e-07, "loss": 0.1203, "step": 9146 }, { "epoch": 0.8427696134887364, "grad_norm": 0.9823050755373736, "learning_rate": 3.3025412957417624e-07, "loss": 0.1287, "step": 9147 }, { "epoch": 0.8428617496660064, "grad_norm": 0.9272156435057771, "learning_rate": 3.298758208730529e-07, "loss": 0.1219, "step": 9148 }, { "epoch": 0.8429538858432764, "grad_norm": 0.9622429969672128, "learning_rate": 3.294977136681923e-07, "loss": 0.1287, "step": 9149 }, { "epoch": 0.8430460220205463, "grad_norm": 1.012662531193091, "learning_rate": 3.291198079947033e-07, "loss": 0.1199, "step": 9150 }, { "epoch": 0.8431381581978163, "grad_norm": 0.903122594351644, "learning_rate": 3.2874210388767313e-07, "loss": 0.1208, "step": 9151 }, { "epoch": 0.8432302943750863, "grad_norm": 0.9666984134750198, "learning_rate": 3.2836460138217287e-07, "loss": 0.1169, "step": 9152 }, { "epoch": 0.8433224305523563, "grad_norm": 1.0222716317336589, "learning_rate": 3.279873005132525e-07, "loss": 0.1298, "step": 9153 }, { "epoch": 0.8434145667296263, "grad_norm": 0.9444079754719301, "learning_rate": 3.276102013159452e-07, "loss": 0.1129, "step": 9154 }, { "epoch": 0.8435067029068964, "grad_norm": 0.9178977470183698, "learning_rate": 3.272333038252648e-07, "loss": 0.1152, "step": 9155 }, { "epoch": 0.8435988390841664, "grad_norm": 0.990355918764317, "learning_rate": 3.2685660807620563e-07, "loss": 0.122, "step": 9156 }, { "epoch": 0.8436909752614364, "grad_norm": 0.994814328810813, "learning_rate": 3.2648011410374463e-07, "loss": 0.1273, "step": 9157 }, { "epoch": 0.8437831114387064, "grad_norm": 0.9642134855127644, "learning_rate": 3.2610382194283865e-07, "loss": 0.115, "step": 9158 }, { "epoch": 0.8438752476159764, "grad_norm": 0.998202330870969, "learning_rate": 3.257277316284266e-07, "loss": 0.1175, "step": 9159 }, { "epoch": 0.8439673837932464, "grad_norm": 0.9220879009717579, "learning_rate": 3.253518431954286e-07, "loss": 0.1144, "step": 9160 }, { "epoch": 0.8440595199705164, "grad_norm": 0.9456319757338142, "learning_rate": 3.249761566787474e-07, "loss": 0.1138, "step": 9161 }, { "epoch": 0.8441516561477864, "grad_norm": 0.9517620248803692, "learning_rate": 3.2460067211326274e-07, "loss": 0.1183, "step": 9162 }, { "epoch": 0.8442437923250564, "grad_norm": 0.9205843267337143, "learning_rate": 3.2422538953383986e-07, "loss": 0.1125, "step": 9163 }, { "epoch": 0.8443359285023264, "grad_norm": 0.9913122073528652, "learning_rate": 3.2385030897532364e-07, "loss": 0.1214, "step": 9164 }, { "epoch": 0.8444280646795964, "grad_norm": 0.9507436181178945, "learning_rate": 3.234754304725413e-07, "loss": 0.1172, "step": 9165 }, { "epoch": 0.8445202008568664, "grad_norm": 0.9003711742448463, "learning_rate": 3.2310075406029875e-07, "loss": 0.1084, "step": 9166 }, { "epoch": 0.8446123370341364, "grad_norm": 1.0251546343565063, "learning_rate": 3.22726279773386e-07, "loss": 0.1329, "step": 9167 }, { "epoch": 0.8447044732114064, "grad_norm": 0.9484851964975073, "learning_rate": 3.223520076465719e-07, "loss": 0.1116, "step": 9168 }, { "epoch": 0.8447966093886765, "grad_norm": 0.9361520273962691, "learning_rate": 3.219779377146087e-07, "loss": 0.1112, "step": 9169 }, { "epoch": 0.8448887455659465, "grad_norm": 0.9910150266298312, "learning_rate": 3.2160407001222805e-07, "loss": 0.1319, "step": 9170 }, { "epoch": 0.8449808817432165, "grad_norm": 0.9437007956534685, "learning_rate": 3.2123040457414377e-07, "loss": 0.1196, "step": 9171 }, { "epoch": 0.8450730179204865, "grad_norm": 0.9162880175898103, "learning_rate": 3.208569414350515e-07, "loss": 0.1205, "step": 9172 }, { "epoch": 0.8451651540977565, "grad_norm": 0.9328157543095837, "learning_rate": 3.204836806296269e-07, "loss": 0.1247, "step": 9173 }, { "epoch": 0.8452572902750265, "grad_norm": 1.0276232815812092, "learning_rate": 3.201106221925265e-07, "loss": 0.1333, "step": 9174 }, { "epoch": 0.8453494264522965, "grad_norm": 0.8934105754776828, "learning_rate": 3.197377661583892e-07, "loss": 0.1047, "step": 9175 }, { "epoch": 0.8454415626295665, "grad_norm": 0.8901250527712635, "learning_rate": 3.1936511256183524e-07, "loss": 0.108, "step": 9176 }, { "epoch": 0.8455336988068365, "grad_norm": 0.9456476762090105, "learning_rate": 3.1899266143746556e-07, "loss": 0.119, "step": 9177 }, { "epoch": 0.8456258349841065, "grad_norm": 0.9369759218485262, "learning_rate": 3.1862041281986224e-07, "loss": 0.1151, "step": 9178 }, { "epoch": 0.8457179711613765, "grad_norm": 0.9202364542455173, "learning_rate": 3.182483667435876e-07, "loss": 0.1141, "step": 9179 }, { "epoch": 0.8458101073386465, "grad_norm": 0.8767559307939479, "learning_rate": 3.1787652324318715e-07, "loss": 0.107, "step": 9180 }, { "epoch": 0.8459022435159165, "grad_norm": 0.9779144285133847, "learning_rate": 3.1750488235318675e-07, "loss": 0.1229, "step": 9181 }, { "epoch": 0.8459943796931866, "grad_norm": 1.0063964020443654, "learning_rate": 3.171334441080923e-07, "loss": 0.124, "step": 9182 }, { "epoch": 0.8460865158704566, "grad_norm": 0.9256576366855372, "learning_rate": 3.1676220854239326e-07, "loss": 0.1179, "step": 9183 }, { "epoch": 0.8461786520477266, "grad_norm": 0.9470305673494429, "learning_rate": 3.1639117569055744e-07, "loss": 0.1211, "step": 9184 }, { "epoch": 0.8462707882249966, "grad_norm": 0.9269075595700427, "learning_rate": 3.160203455870359e-07, "loss": 0.115, "step": 9185 }, { "epoch": 0.8463629244022666, "grad_norm": 0.9437783625013559, "learning_rate": 3.156497182662607e-07, "loss": 0.1096, "step": 9186 }, { "epoch": 0.8464550605795366, "grad_norm": 1.0404071839324958, "learning_rate": 3.1527929376264393e-07, "loss": 0.1346, "step": 9187 }, { "epoch": 0.8465471967568066, "grad_norm": 0.9384504047967224, "learning_rate": 3.149090721105805e-07, "loss": 0.1204, "step": 9188 }, { "epoch": 0.8466393329340766, "grad_norm": 0.9687143806916653, "learning_rate": 3.145390533444442e-07, "loss": 0.1205, "step": 9189 }, { "epoch": 0.8467314691113466, "grad_norm": 0.9811809575277504, "learning_rate": 3.1416923749859244e-07, "loss": 0.124, "step": 9190 }, { "epoch": 0.8468236052886166, "grad_norm": 0.9215113535637731, "learning_rate": 3.1379962460736183e-07, "loss": 0.118, "step": 9191 }, { "epoch": 0.8469157414658866, "grad_norm": 0.9453843360808005, "learning_rate": 3.1343021470507134e-07, "loss": 0.1262, "step": 9192 }, { "epoch": 0.8470078776431565, "grad_norm": 0.9679969128743854, "learning_rate": 3.1306100782602126e-07, "loss": 0.1217, "step": 9193 }, { "epoch": 0.8471000138204265, "grad_norm": 0.943336778635726, "learning_rate": 3.12692004004492e-07, "loss": 0.1292, "step": 9194 }, { "epoch": 0.8471921499976965, "grad_norm": 0.9273773600918996, "learning_rate": 3.1232320327474504e-07, "loss": 0.111, "step": 9195 }, { "epoch": 0.8472842861749666, "grad_norm": 0.9294729570727469, "learning_rate": 3.11954605671024e-07, "loss": 0.1091, "step": 9196 }, { "epoch": 0.8473764223522366, "grad_norm": 1.0014883154780123, "learning_rate": 3.1158621122755336e-07, "loss": 0.1209, "step": 9197 }, { "epoch": 0.8474685585295066, "grad_norm": 0.9293479352770679, "learning_rate": 3.112180199785389e-07, "loss": 0.1186, "step": 9198 }, { "epoch": 0.8475606947067766, "grad_norm": 0.925782398043939, "learning_rate": 3.1085003195816697e-07, "loss": 0.1168, "step": 9199 }, { "epoch": 0.8476528308840466, "grad_norm": 0.9633031131247594, "learning_rate": 3.1048224720060473e-07, "loss": 0.1167, "step": 9200 }, { "epoch": 0.8477449670613166, "grad_norm": 0.883630976694059, "learning_rate": 3.1011466574000144e-07, "loss": 0.1063, "step": 9201 }, { "epoch": 0.8478371032385866, "grad_norm": 0.8883849522188346, "learning_rate": 3.097472876104876e-07, "loss": 0.1141, "step": 9202 }, { "epoch": 0.8479292394158566, "grad_norm": 0.9053676263367607, "learning_rate": 3.093801128461735e-07, "loss": 0.1119, "step": 9203 }, { "epoch": 0.8480213755931266, "grad_norm": 0.8851968674906724, "learning_rate": 3.0901314148115203e-07, "loss": 0.1122, "step": 9204 }, { "epoch": 0.8481135117703966, "grad_norm": 0.9304807734264189, "learning_rate": 3.0864637354949545e-07, "loss": 0.1162, "step": 9205 }, { "epoch": 0.8482056479476666, "grad_norm": 0.956308995162976, "learning_rate": 3.082798090852596e-07, "loss": 0.1159, "step": 9206 }, { "epoch": 0.8482977841249366, "grad_norm": 0.938749830136273, "learning_rate": 3.079134481224788e-07, "loss": 0.1139, "step": 9207 }, { "epoch": 0.8483899203022066, "grad_norm": 0.9347902376284178, "learning_rate": 3.075472906951704e-07, "loss": 0.1213, "step": 9208 }, { "epoch": 0.8484820564794767, "grad_norm": 0.9532417511976031, "learning_rate": 3.0718133683733247e-07, "loss": 0.1308, "step": 9209 }, { "epoch": 0.8485741926567467, "grad_norm": 0.9022635398220523, "learning_rate": 3.0681558658294334e-07, "loss": 0.114, "step": 9210 }, { "epoch": 0.8486663288340167, "grad_norm": 0.953635552226034, "learning_rate": 3.0645003996596254e-07, "loss": 0.1167, "step": 9211 }, { "epoch": 0.8487584650112867, "grad_norm": 0.9560485444002419, "learning_rate": 3.0608469702033185e-07, "loss": 0.1199, "step": 9212 }, { "epoch": 0.8488506011885567, "grad_norm": 0.9130642873786164, "learning_rate": 3.057195577799729e-07, "loss": 0.1144, "step": 9213 }, { "epoch": 0.8489427373658267, "grad_norm": 0.9626403344881101, "learning_rate": 3.053546222787895e-07, "loss": 0.1234, "step": 9214 }, { "epoch": 0.8490348735430967, "grad_norm": 0.9327186071825609, "learning_rate": 3.0498989055066597e-07, "loss": 0.1122, "step": 9215 }, { "epoch": 0.8491270097203667, "grad_norm": 0.9708981271608061, "learning_rate": 3.046253626294665e-07, "loss": 0.1278, "step": 9216 }, { "epoch": 0.8492191458976367, "grad_norm": 0.8690144829639436, "learning_rate": 3.0426103854903856e-07, "loss": 0.1021, "step": 9217 }, { "epoch": 0.8493112820749067, "grad_norm": 0.928018364641159, "learning_rate": 3.0389691834320944e-07, "loss": 0.1133, "step": 9218 }, { "epoch": 0.8494034182521767, "grad_norm": 0.8965255066759291, "learning_rate": 3.0353300204578854e-07, "loss": 0.1102, "step": 9219 }, { "epoch": 0.8494955544294467, "grad_norm": 0.8911346876173641, "learning_rate": 3.031692896905644e-07, "loss": 0.1097, "step": 9220 }, { "epoch": 0.8495876906067167, "grad_norm": 0.96694668047903, "learning_rate": 3.0280578131130805e-07, "loss": 0.1198, "step": 9221 }, { "epoch": 0.8496798267839867, "grad_norm": 0.8842727538731018, "learning_rate": 3.024424769417711e-07, "loss": 0.1018, "step": 9222 }, { "epoch": 0.8497719629612568, "grad_norm": 0.9233797044528547, "learning_rate": 3.020793766156871e-07, "loss": 0.1181, "step": 9223 }, { "epoch": 0.8498640991385268, "grad_norm": 0.9490387802732133, "learning_rate": 3.017164803667691e-07, "loss": 0.1215, "step": 9224 }, { "epoch": 0.8499562353157968, "grad_norm": 1.0032873233600246, "learning_rate": 3.013537882287132e-07, "loss": 0.1153, "step": 9225 }, { "epoch": 0.8500483714930668, "grad_norm": 0.9287605713152437, "learning_rate": 3.0099130023519384e-07, "loss": 0.1153, "step": 9226 }, { "epoch": 0.8501405076703368, "grad_norm": 0.9260430559067087, "learning_rate": 3.0062901641986967e-07, "loss": 0.1125, "step": 9227 }, { "epoch": 0.8502326438476068, "grad_norm": 0.9559548455870349, "learning_rate": 3.002669368163774e-07, "loss": 0.1192, "step": 9228 }, { "epoch": 0.8503247800248768, "grad_norm": 0.9856506355542799, "learning_rate": 2.999050614583368e-07, "loss": 0.1319, "step": 9229 }, { "epoch": 0.8504169162021468, "grad_norm": 0.9733649361017951, "learning_rate": 2.995433903793485e-07, "loss": 0.1178, "step": 9230 }, { "epoch": 0.8505090523794168, "grad_norm": 0.9179807223176983, "learning_rate": 2.9918192361299335e-07, "loss": 0.1113, "step": 9231 }, { "epoch": 0.8506011885566868, "grad_norm": 0.9337430679762098, "learning_rate": 2.988206611928329e-07, "loss": 0.1157, "step": 9232 }, { "epoch": 0.8506933247339568, "grad_norm": 0.9471775562002914, "learning_rate": 2.984596031524109e-07, "loss": 0.1199, "step": 9233 }, { "epoch": 0.8507854609112268, "grad_norm": 0.8764880163175652, "learning_rate": 2.980987495252516e-07, "loss": 0.0979, "step": 9234 }, { "epoch": 0.8508775970884968, "grad_norm": 0.9735124287920944, "learning_rate": 2.9773810034486095e-07, "loss": 0.128, "step": 9235 }, { "epoch": 0.8509697332657667, "grad_norm": 0.9763026567613571, "learning_rate": 2.973776556447247e-07, "loss": 0.117, "step": 9236 }, { "epoch": 0.8510618694430369, "grad_norm": 0.9657511144374241, "learning_rate": 2.970174154583097e-07, "loss": 0.1349, "step": 9237 }, { "epoch": 0.8511540056203069, "grad_norm": 0.9732217261684756, "learning_rate": 2.9665737981906475e-07, "loss": 0.1157, "step": 9238 }, { "epoch": 0.8512461417975768, "grad_norm": 0.9557352112393618, "learning_rate": 2.962975487604197e-07, "loss": 0.1131, "step": 9239 }, { "epoch": 0.8513382779748468, "grad_norm": 0.879001540732117, "learning_rate": 2.9593792231578407e-07, "loss": 0.1091, "step": 9240 }, { "epoch": 0.8514304141521168, "grad_norm": 0.9464176135109578, "learning_rate": 2.9557850051854935e-07, "loss": 0.1133, "step": 9241 }, { "epoch": 0.8515225503293868, "grad_norm": 0.9042055512690569, "learning_rate": 2.9521928340208867e-07, "loss": 0.1073, "step": 9242 }, { "epoch": 0.8516146865066568, "grad_norm": 0.9290529203658289, "learning_rate": 2.9486027099975416e-07, "loss": 0.1111, "step": 9243 }, { "epoch": 0.8517068226839268, "grad_norm": 0.9999503573416042, "learning_rate": 2.9450146334488144e-07, "loss": 0.1238, "step": 9244 }, { "epoch": 0.8517989588611968, "grad_norm": 0.9769975220666243, "learning_rate": 2.9414286047078495e-07, "loss": 0.1185, "step": 9245 }, { "epoch": 0.8518910950384668, "grad_norm": 0.9805675881687931, "learning_rate": 2.937844624107608e-07, "loss": 0.1249, "step": 9246 }, { "epoch": 0.8519832312157368, "grad_norm": 0.9392593771275237, "learning_rate": 2.934262691980877e-07, "loss": 0.1271, "step": 9247 }, { "epoch": 0.8520753673930068, "grad_norm": 0.9420394121292485, "learning_rate": 2.930682808660226e-07, "loss": 0.1238, "step": 9248 }, { "epoch": 0.8521675035702768, "grad_norm": 0.9336936611055597, "learning_rate": 2.927104974478048e-07, "loss": 0.112, "step": 9249 }, { "epoch": 0.8522596397475469, "grad_norm": 0.9187592636253514, "learning_rate": 2.9235291897665497e-07, "loss": 0.1095, "step": 9250 }, { "epoch": 0.8523517759248169, "grad_norm": 0.9382325552043895, "learning_rate": 2.91995545485774e-07, "loss": 0.123, "step": 9251 }, { "epoch": 0.8524439121020869, "grad_norm": 0.9385540114569236, "learning_rate": 2.9163837700834473e-07, "loss": 0.1111, "step": 9252 }, { "epoch": 0.8525360482793569, "grad_norm": 1.0194309807321986, "learning_rate": 2.912814135775299e-07, "loss": 0.1201, "step": 9253 }, { "epoch": 0.8526281844566269, "grad_norm": 0.9422330543550835, "learning_rate": 2.909246552264733e-07, "loss": 0.1185, "step": 9254 }, { "epoch": 0.8527203206338969, "grad_norm": 0.9714237671246142, "learning_rate": 2.905681019882997e-07, "loss": 0.1233, "step": 9255 }, { "epoch": 0.8528124568111669, "grad_norm": 0.889642435524911, "learning_rate": 2.902117538961166e-07, "loss": 0.1089, "step": 9256 }, { "epoch": 0.8529045929884369, "grad_norm": 0.9627728725749669, "learning_rate": 2.898556109830092e-07, "loss": 0.1232, "step": 9257 }, { "epoch": 0.8529967291657069, "grad_norm": 0.924709180557171, "learning_rate": 2.894996732820468e-07, "loss": 0.1198, "step": 9258 }, { "epoch": 0.8530888653429769, "grad_norm": 0.995663879425923, "learning_rate": 2.8914394082627694e-07, "loss": 0.1308, "step": 9259 }, { "epoch": 0.8531810015202469, "grad_norm": 0.8870383336303369, "learning_rate": 2.8878841364873067e-07, "loss": 0.108, "step": 9260 }, { "epoch": 0.8532731376975169, "grad_norm": 0.939204703933531, "learning_rate": 2.8843309178241766e-07, "loss": 0.1229, "step": 9261 }, { "epoch": 0.8533652738747869, "grad_norm": 0.9353801215931173, "learning_rate": 2.880779752603302e-07, "loss": 0.117, "step": 9262 }, { "epoch": 0.8534574100520569, "grad_norm": 0.9319288195935074, "learning_rate": 2.877230641154413e-07, "loss": 0.1109, "step": 9263 }, { "epoch": 0.853549546229327, "grad_norm": 0.9812789745070393, "learning_rate": 2.873683583807038e-07, "loss": 0.1217, "step": 9264 }, { "epoch": 0.853641682406597, "grad_norm": 0.9535806751362134, "learning_rate": 2.8701385808905217e-07, "loss": 0.1217, "step": 9265 }, { "epoch": 0.853733818583867, "grad_norm": 1.0046894585013302, "learning_rate": 2.8665956327340175e-07, "loss": 0.1176, "step": 9266 }, { "epoch": 0.853825954761137, "grad_norm": 0.9303632824861731, "learning_rate": 2.8630547396664905e-07, "loss": 0.1179, "step": 9267 }, { "epoch": 0.853918090938407, "grad_norm": 0.9387604544667134, "learning_rate": 2.8595159020167186e-07, "loss": 0.1261, "step": 9268 }, { "epoch": 0.854010227115677, "grad_norm": 0.9702272703985197, "learning_rate": 2.855979120113278e-07, "loss": 0.1234, "step": 9269 }, { "epoch": 0.854102363292947, "grad_norm": 0.9525420190866508, "learning_rate": 2.8524443942845567e-07, "loss": 0.1259, "step": 9270 }, { "epoch": 0.854194499470217, "grad_norm": 0.9265347955081413, "learning_rate": 2.848911724858755e-07, "loss": 0.1159, "step": 9271 }, { "epoch": 0.854286635647487, "grad_norm": 0.8711773749514531, "learning_rate": 2.8453811121638834e-07, "loss": 0.1004, "step": 9272 }, { "epoch": 0.854378771824757, "grad_norm": 0.9477565096775316, "learning_rate": 2.841852556527763e-07, "loss": 0.1227, "step": 9273 }, { "epoch": 0.854470908002027, "grad_norm": 0.9126296349395749, "learning_rate": 2.8383260582780206e-07, "loss": 0.1102, "step": 9274 }, { "epoch": 0.854563044179297, "grad_norm": 0.9525387630469448, "learning_rate": 2.8348016177420833e-07, "loss": 0.1181, "step": 9275 }, { "epoch": 0.854655180356567, "grad_norm": 0.9081397804878282, "learning_rate": 2.8312792352472003e-07, "loss": 0.1146, "step": 9276 }, { "epoch": 0.8547473165338371, "grad_norm": 0.9292285804423576, "learning_rate": 2.8277589111204315e-07, "loss": 0.1052, "step": 9277 }, { "epoch": 0.8548394527111071, "grad_norm": 0.957473562369141, "learning_rate": 2.824240645688628e-07, "loss": 0.1172, "step": 9278 }, { "epoch": 0.8549315888883771, "grad_norm": 0.9231740975710168, "learning_rate": 2.8207244392784715e-07, "loss": 0.1088, "step": 9279 }, { "epoch": 0.8550237250656471, "grad_norm": 0.9829296508725345, "learning_rate": 2.817210292216435e-07, "loss": 0.1206, "step": 9280 }, { "epoch": 0.855115861242917, "grad_norm": 0.9620000672282338, "learning_rate": 2.813698204828816e-07, "loss": 0.1142, "step": 9281 }, { "epoch": 0.855207997420187, "grad_norm": 0.9896587979379299, "learning_rate": 2.8101881774416975e-07, "loss": 0.1293, "step": 9282 }, { "epoch": 0.855300133597457, "grad_norm": 0.9736012672651115, "learning_rate": 2.806680210380999e-07, "loss": 0.116, "step": 9283 }, { "epoch": 0.855392269774727, "grad_norm": 0.946172637330825, "learning_rate": 2.8031743039724337e-07, "loss": 0.1172, "step": 9284 }, { "epoch": 0.855484405951997, "grad_norm": 0.9513361916546126, "learning_rate": 2.7996704585415227e-07, "loss": 0.1127, "step": 9285 }, { "epoch": 0.855576542129267, "grad_norm": 0.881332691957772, "learning_rate": 2.796168674413596e-07, "loss": 0.1091, "step": 9286 }, { "epoch": 0.855668678306537, "grad_norm": 0.9543852740058231, "learning_rate": 2.7926689519137963e-07, "loss": 0.1069, "step": 9287 }, { "epoch": 0.855760814483807, "grad_norm": 1.0190560153967547, "learning_rate": 2.7891712913670765e-07, "loss": 0.127, "step": 9288 }, { "epoch": 0.855852950661077, "grad_norm": 0.9276584149377606, "learning_rate": 2.785675693098194e-07, "loss": 0.1174, "step": 9289 }, { "epoch": 0.855945086838347, "grad_norm": 0.9138106971403834, "learning_rate": 2.782182157431718e-07, "loss": 0.1122, "step": 9290 }, { "epoch": 0.8560372230156171, "grad_norm": 0.9154240943613224, "learning_rate": 2.778690684692012e-07, "loss": 0.1131, "step": 9291 }, { "epoch": 0.8561293591928871, "grad_norm": 1.0292627495010502, "learning_rate": 2.7752012752032683e-07, "loss": 0.1357, "step": 9292 }, { "epoch": 0.8562214953701571, "grad_norm": 0.8692529164590608, "learning_rate": 2.7717139292894824e-07, "loss": 0.1097, "step": 9293 }, { "epoch": 0.8563136315474271, "grad_norm": 0.9380278752902587, "learning_rate": 2.768228647274446e-07, "loss": 0.1174, "step": 9294 }, { "epoch": 0.8564057677246971, "grad_norm": 0.9121772464230438, "learning_rate": 2.7647454294817773e-07, "loss": 0.1146, "step": 9295 }, { "epoch": 0.8564979039019671, "grad_norm": 0.9604678113327662, "learning_rate": 2.7612642762348844e-07, "loss": 0.1086, "step": 9296 }, { "epoch": 0.8565900400792371, "grad_norm": 0.9726017733465043, "learning_rate": 2.757785187857001e-07, "loss": 0.1143, "step": 9297 }, { "epoch": 0.8566821762565071, "grad_norm": 0.9420082236664085, "learning_rate": 2.7543081646711487e-07, "loss": 0.1122, "step": 9298 }, { "epoch": 0.8567743124337771, "grad_norm": 0.9700969708005628, "learning_rate": 2.7508332070001807e-07, "loss": 0.1133, "step": 9299 }, { "epoch": 0.8568664486110471, "grad_norm": 0.9530651641631397, "learning_rate": 2.74736031516675e-07, "loss": 0.1231, "step": 9300 }, { "epoch": 0.8569585847883171, "grad_norm": 0.9404951910508061, "learning_rate": 2.7438894894933013e-07, "loss": 0.125, "step": 9301 }, { "epoch": 0.8570507209655871, "grad_norm": 0.9157359729471527, "learning_rate": 2.7404207303021153e-07, "loss": 0.1151, "step": 9302 }, { "epoch": 0.8571428571428571, "grad_norm": 0.9539864507426042, "learning_rate": 2.736954037915254e-07, "loss": 0.1224, "step": 9303 }, { "epoch": 0.8572349933201272, "grad_norm": 1.0345955402987674, "learning_rate": 2.733489412654608e-07, "loss": 0.1293, "step": 9304 }, { "epoch": 0.8573271294973972, "grad_norm": 0.9601556995270799, "learning_rate": 2.730026854841869e-07, "loss": 0.1279, "step": 9305 }, { "epoch": 0.8574192656746672, "grad_norm": 0.9853873195926093, "learning_rate": 2.7265663647985357e-07, "loss": 0.127, "step": 9306 }, { "epoch": 0.8575114018519372, "grad_norm": 0.9441216337976664, "learning_rate": 2.723107942845907e-07, "loss": 0.1252, "step": 9307 }, { "epoch": 0.8576035380292072, "grad_norm": 0.9547990137449248, "learning_rate": 2.7196515893051003e-07, "loss": 0.1186, "step": 9308 }, { "epoch": 0.8576956742064772, "grad_norm": 0.9812925813959527, "learning_rate": 2.7161973044970453e-07, "loss": 0.1247, "step": 9309 }, { "epoch": 0.8577878103837472, "grad_norm": 0.9238639272845356, "learning_rate": 2.712745088742472e-07, "loss": 0.1087, "step": 9310 }, { "epoch": 0.8578799465610172, "grad_norm": 0.9477555453822856, "learning_rate": 2.7092949423619145e-07, "loss": 0.1201, "step": 9311 }, { "epoch": 0.8579720827382872, "grad_norm": 0.9510317673971791, "learning_rate": 2.7058468656757183e-07, "loss": 0.1166, "step": 9312 }, { "epoch": 0.8580642189155572, "grad_norm": 0.8710369385781217, "learning_rate": 2.702400859004037e-07, "loss": 0.108, "step": 9313 }, { "epoch": 0.8581563550928272, "grad_norm": 0.9460929888604528, "learning_rate": 2.698956922666843e-07, "loss": 0.1199, "step": 9314 }, { "epoch": 0.8582484912700972, "grad_norm": 0.9849952167511389, "learning_rate": 2.695515056983894e-07, "loss": 0.116, "step": 9315 }, { "epoch": 0.8583406274473672, "grad_norm": 0.9495136032256005, "learning_rate": 2.692075262274771e-07, "loss": 0.1138, "step": 9316 }, { "epoch": 0.8584327636246372, "grad_norm": 0.970959517015624, "learning_rate": 2.6886375388588656e-07, "loss": 0.125, "step": 9317 }, { "epoch": 0.8585248998019073, "grad_norm": 0.9827648328416002, "learning_rate": 2.6852018870553664e-07, "loss": 0.1359, "step": 9318 }, { "epoch": 0.8586170359791773, "grad_norm": 0.9965665810884955, "learning_rate": 2.6817683071832687e-07, "loss": 0.1345, "step": 9319 }, { "epoch": 0.8587091721564473, "grad_norm": 0.8961725861415342, "learning_rate": 2.6783367995613846e-07, "loss": 0.1088, "step": 9320 }, { "epoch": 0.8588013083337173, "grad_norm": 0.9104434301871034, "learning_rate": 2.6749073645083333e-07, "loss": 0.1125, "step": 9321 }, { "epoch": 0.8588934445109873, "grad_norm": 0.9244596919139304, "learning_rate": 2.6714800023425385e-07, "loss": 0.1189, "step": 9322 }, { "epoch": 0.8589855806882573, "grad_norm": 0.9368277789935434, "learning_rate": 2.668054713382229e-07, "loss": 0.1195, "step": 9323 }, { "epoch": 0.8590777168655273, "grad_norm": 0.9729852872130562, "learning_rate": 2.6646314979454386e-07, "loss": 0.1161, "step": 9324 }, { "epoch": 0.8591698530427972, "grad_norm": 0.947259066999539, "learning_rate": 2.6612103563500165e-07, "loss": 0.1269, "step": 9325 }, { "epoch": 0.8592619892200672, "grad_norm": 0.9575031330131318, "learning_rate": 2.657791288913622e-07, "loss": 0.1229, "step": 9326 }, { "epoch": 0.8593541253973372, "grad_norm": 0.8933964099773779, "learning_rate": 2.6543742959537074e-07, "loss": 0.1115, "step": 9327 }, { "epoch": 0.8594462615746072, "grad_norm": 0.880212005558546, "learning_rate": 2.650959377787549e-07, "loss": 0.1084, "step": 9328 }, { "epoch": 0.8595383977518772, "grad_norm": 0.9167841432310003, "learning_rate": 2.647546534732209e-07, "loss": 0.1102, "step": 9329 }, { "epoch": 0.8596305339291472, "grad_norm": 0.9309781613665132, "learning_rate": 2.6441357671045833e-07, "loss": 0.1158, "step": 9330 }, { "epoch": 0.8597226701064172, "grad_norm": 0.923428605833851, "learning_rate": 2.640727075221361e-07, "loss": 0.1164, "step": 9331 }, { "epoch": 0.8598148062836873, "grad_norm": 0.9617325314977327, "learning_rate": 2.637320459399031e-07, "loss": 0.1201, "step": 9332 }, { "epoch": 0.8599069424609573, "grad_norm": 0.8600354204904673, "learning_rate": 2.6339159199539085e-07, "loss": 0.1084, "step": 9333 }, { "epoch": 0.8599990786382273, "grad_norm": 0.9258377390351011, "learning_rate": 2.6305134572020943e-07, "loss": 0.1143, "step": 9334 }, { "epoch": 0.8600912148154973, "grad_norm": 0.9465789010041532, "learning_rate": 2.6271130714595164e-07, "loss": 0.1144, "step": 9335 }, { "epoch": 0.8601833509927673, "grad_norm": 0.8715165159103272, "learning_rate": 2.623714763041896e-07, "loss": 0.1077, "step": 9336 }, { "epoch": 0.8602754871700373, "grad_norm": 0.9303896406648642, "learning_rate": 2.620318532264765e-07, "loss": 0.1173, "step": 9337 }, { "epoch": 0.8603676233473073, "grad_norm": 0.9786268133601739, "learning_rate": 2.6169243794434725e-07, "loss": 0.1242, "step": 9338 }, { "epoch": 0.8604597595245773, "grad_norm": 0.9589918812170614, "learning_rate": 2.613532304893163e-07, "loss": 0.1232, "step": 9339 }, { "epoch": 0.8605518957018473, "grad_norm": 0.8785947502166375, "learning_rate": 2.610142308928779e-07, "loss": 0.1135, "step": 9340 }, { "epoch": 0.8606440318791173, "grad_norm": 0.9312433513423943, "learning_rate": 2.6067543918650935e-07, "loss": 0.1242, "step": 9341 }, { "epoch": 0.8607361680563873, "grad_norm": 0.9325694372347554, "learning_rate": 2.603368554016672e-07, "loss": 0.1129, "step": 9342 }, { "epoch": 0.8608283042336573, "grad_norm": 0.9469327317443748, "learning_rate": 2.5999847956978963e-07, "loss": 0.1158, "step": 9343 }, { "epoch": 0.8609204404109273, "grad_norm": 0.90638245153095, "learning_rate": 2.5966031172229427e-07, "loss": 0.1109, "step": 9344 }, { "epoch": 0.8610125765881974, "grad_norm": 0.9413146612471566, "learning_rate": 2.593223518905796e-07, "loss": 0.1157, "step": 9345 }, { "epoch": 0.8611047127654674, "grad_norm": 0.8941304858723512, "learning_rate": 2.589846001060259e-07, "loss": 0.1084, "step": 9346 }, { "epoch": 0.8611968489427374, "grad_norm": 0.9540972957902403, "learning_rate": 2.586470563999935e-07, "loss": 0.1254, "step": 9347 }, { "epoch": 0.8612889851200074, "grad_norm": 0.9197783560409639, "learning_rate": 2.5830972080382265e-07, "loss": 0.1179, "step": 9348 }, { "epoch": 0.8613811212972774, "grad_norm": 1.0135846500017713, "learning_rate": 2.5797259334883613e-07, "loss": 0.1269, "step": 9349 }, { "epoch": 0.8614732574745474, "grad_norm": 0.9956640673249679, "learning_rate": 2.5763567406633496e-07, "loss": 0.1274, "step": 9350 }, { "epoch": 0.8615653936518174, "grad_norm": 0.9982956385527713, "learning_rate": 2.5729896298760325e-07, "loss": 0.1256, "step": 9351 }, { "epoch": 0.8616575298290874, "grad_norm": 0.9997035981643813, "learning_rate": 2.569624601439039e-07, "loss": 0.1186, "step": 9352 }, { "epoch": 0.8617496660063574, "grad_norm": 0.9473032276378903, "learning_rate": 2.566261655664812e-07, "loss": 0.113, "step": 9353 }, { "epoch": 0.8618418021836274, "grad_norm": 0.8904282708574572, "learning_rate": 2.562900792865611e-07, "loss": 0.1014, "step": 9354 }, { "epoch": 0.8619339383608974, "grad_norm": 0.9421774910383477, "learning_rate": 2.5595420133534887e-07, "loss": 0.1127, "step": 9355 }, { "epoch": 0.8620260745381674, "grad_norm": 0.9437685397025191, "learning_rate": 2.5561853174402964e-07, "loss": 0.1104, "step": 9356 }, { "epoch": 0.8621182107154374, "grad_norm": 0.9365914283115228, "learning_rate": 2.5528307054377145e-07, "loss": 0.1077, "step": 9357 }, { "epoch": 0.8622103468927074, "grad_norm": 0.9285492507636135, "learning_rate": 2.549478177657219e-07, "loss": 0.1051, "step": 9358 }, { "epoch": 0.8623024830699775, "grad_norm": 0.9765891819348158, "learning_rate": 2.546127734410095e-07, "loss": 0.1252, "step": 9359 }, { "epoch": 0.8623946192472475, "grad_norm": 0.9287397592156162, "learning_rate": 2.542779376007426e-07, "loss": 0.119, "step": 9360 }, { "epoch": 0.8624867554245175, "grad_norm": 0.9004090598160094, "learning_rate": 2.5394331027601056e-07, "loss": 0.1024, "step": 9361 }, { "epoch": 0.8625788916017875, "grad_norm": 0.9722848299604637, "learning_rate": 2.5360889149788375e-07, "loss": 0.1241, "step": 9362 }, { "epoch": 0.8626710277790575, "grad_norm": 0.906785297368422, "learning_rate": 2.532746812974132e-07, "loss": 0.114, "step": 9363 }, { "epoch": 0.8627631639563275, "grad_norm": 0.9589773361342592, "learning_rate": 2.529406797056305e-07, "loss": 0.1213, "step": 9364 }, { "epoch": 0.8628553001335975, "grad_norm": 0.9966890284084031, "learning_rate": 2.5260688675354806e-07, "loss": 0.1229, "step": 9365 }, { "epoch": 0.8629474363108675, "grad_norm": 0.9667983213030846, "learning_rate": 2.5227330247215716e-07, "loss": 0.1208, "step": 9366 }, { "epoch": 0.8630395724881375, "grad_norm": 0.9291822525151221, "learning_rate": 2.519399268924322e-07, "loss": 0.1139, "step": 9367 }, { "epoch": 0.8631317086654074, "grad_norm": 0.8994909111600728, "learning_rate": 2.516067600453273e-07, "loss": 0.1034, "step": 9368 }, { "epoch": 0.8632238448426774, "grad_norm": 1.0207921145021992, "learning_rate": 2.5127380196177634e-07, "loss": 0.1284, "step": 9369 }, { "epoch": 0.8633159810199474, "grad_norm": 0.9177180668568711, "learning_rate": 2.509410526726952e-07, "loss": 0.1095, "step": 9370 }, { "epoch": 0.8634081171972174, "grad_norm": 0.8712347431050033, "learning_rate": 2.5060851220897906e-07, "loss": 0.1001, "step": 9371 }, { "epoch": 0.8635002533744875, "grad_norm": 0.9886160970135562, "learning_rate": 2.5027618060150526e-07, "loss": 0.1185, "step": 9372 }, { "epoch": 0.8635923895517575, "grad_norm": 0.9670625304491519, "learning_rate": 2.4994405788112933e-07, "loss": 0.1218, "step": 9373 }, { "epoch": 0.8636845257290275, "grad_norm": 0.9168572699846476, "learning_rate": 2.4961214407869e-07, "loss": 0.1081, "step": 9374 }, { "epoch": 0.8637766619062975, "grad_norm": 0.9599698651657775, "learning_rate": 2.492804392250059e-07, "loss": 0.1181, "step": 9375 }, { "epoch": 0.8638687980835675, "grad_norm": 0.9619307269182482, "learning_rate": 2.489489433508752e-07, "loss": 0.1295, "step": 9376 }, { "epoch": 0.8639609342608375, "grad_norm": 0.9576970010297813, "learning_rate": 2.486176564870768e-07, "loss": 0.1197, "step": 9377 }, { "epoch": 0.8640530704381075, "grad_norm": 1.0203235684877179, "learning_rate": 2.4828657866437123e-07, "loss": 0.1371, "step": 9378 }, { "epoch": 0.8641452066153775, "grad_norm": 0.9495758122219625, "learning_rate": 2.479557099134991e-07, "loss": 0.1164, "step": 9379 }, { "epoch": 0.8642373427926475, "grad_norm": 0.9214388616761735, "learning_rate": 2.4762505026518224e-07, "loss": 0.1094, "step": 9380 }, { "epoch": 0.8643294789699175, "grad_norm": 0.9413072214892965, "learning_rate": 2.4729459975012194e-07, "loss": 0.1158, "step": 9381 }, { "epoch": 0.8644216151471875, "grad_norm": 0.9354397693146485, "learning_rate": 2.46964358399e-07, "loss": 0.1136, "step": 9382 }, { "epoch": 0.8645137513244575, "grad_norm": 0.9088302027421223, "learning_rate": 2.4663432624247975e-07, "loss": 0.1124, "step": 9383 }, { "epoch": 0.8646058875017275, "grad_norm": 0.8911370384300263, "learning_rate": 2.4630450331120547e-07, "loss": 0.1108, "step": 9384 }, { "epoch": 0.8646980236789975, "grad_norm": 0.9630587035183339, "learning_rate": 2.4597488963579995e-07, "loss": 0.1162, "step": 9385 }, { "epoch": 0.8647901598562676, "grad_norm": 0.9209292229525733, "learning_rate": 2.4564548524686925e-07, "loss": 0.1106, "step": 9386 }, { "epoch": 0.8648822960335376, "grad_norm": 0.9432471817490239, "learning_rate": 2.4531629017499724e-07, "loss": 0.1097, "step": 9387 }, { "epoch": 0.8649744322108076, "grad_norm": 0.9825332338153762, "learning_rate": 2.449873044507503e-07, "loss": 0.1241, "step": 9388 }, { "epoch": 0.8650665683880776, "grad_norm": 0.9408846109600006, "learning_rate": 2.446585281046751e-07, "loss": 0.1127, "step": 9389 }, { "epoch": 0.8651587045653476, "grad_norm": 0.9401838784688298, "learning_rate": 2.443299611672981e-07, "loss": 0.1269, "step": 9390 }, { "epoch": 0.8652508407426176, "grad_norm": 0.9111818343603258, "learning_rate": 2.44001603669127e-07, "loss": 0.119, "step": 9391 }, { "epoch": 0.8653429769198876, "grad_norm": 0.906464655426303, "learning_rate": 2.4367345564065003e-07, "loss": 0.1097, "step": 9392 }, { "epoch": 0.8654351130971576, "grad_norm": 0.875938489337497, "learning_rate": 2.433455171123356e-07, "loss": 0.1069, "step": 9393 }, { "epoch": 0.8655272492744276, "grad_norm": 0.9003402477495523, "learning_rate": 2.4301778811463255e-07, "loss": 0.1077, "step": 9394 }, { "epoch": 0.8656193854516976, "grad_norm": 0.9109081886705044, "learning_rate": 2.426902686779706e-07, "loss": 0.1186, "step": 9395 }, { "epoch": 0.8657115216289676, "grad_norm": 0.9609017270753635, "learning_rate": 2.4236295883276e-07, "loss": 0.1208, "step": 9396 }, { "epoch": 0.8658036578062376, "grad_norm": 0.898910913025197, "learning_rate": 2.420358586093921e-07, "loss": 0.1047, "step": 9397 }, { "epoch": 0.8658957939835076, "grad_norm": 0.9206843573844598, "learning_rate": 2.4170896803823785e-07, "loss": 0.1115, "step": 9398 }, { "epoch": 0.8659879301607776, "grad_norm": 0.9272830235116014, "learning_rate": 2.4138228714964853e-07, "loss": 0.1091, "step": 9399 }, { "epoch": 0.8660800663380477, "grad_norm": 0.9547090967557088, "learning_rate": 2.4105581597395705e-07, "loss": 0.1232, "step": 9400 }, { "epoch": 0.8661722025153177, "grad_norm": 0.9480953903908746, "learning_rate": 2.4072955454147643e-07, "loss": 0.1203, "step": 9401 }, { "epoch": 0.8662643386925877, "grad_norm": 0.9798031746500194, "learning_rate": 2.4040350288249944e-07, "loss": 0.1242, "step": 9402 }, { "epoch": 0.8663564748698577, "grad_norm": 0.9522213276476387, "learning_rate": 2.400776610273006e-07, "loss": 0.1155, "step": 9403 }, { "epoch": 0.8664486110471277, "grad_norm": 0.9347048484577208, "learning_rate": 2.397520290061339e-07, "loss": 0.1121, "step": 9404 }, { "epoch": 0.8665407472243977, "grad_norm": 0.9501824992765463, "learning_rate": 2.394266068492351e-07, "loss": 0.1171, "step": 9405 }, { "epoch": 0.8666328834016677, "grad_norm": 0.8502324502165575, "learning_rate": 2.391013945868187e-07, "loss": 0.0941, "step": 9406 }, { "epoch": 0.8667250195789377, "grad_norm": 0.9133438904722627, "learning_rate": 2.38776392249081e-07, "loss": 0.1126, "step": 9407 }, { "epoch": 0.8668171557562077, "grad_norm": 0.9547356127125062, "learning_rate": 2.38451599866199e-07, "loss": 0.1272, "step": 9408 }, { "epoch": 0.8669092919334777, "grad_norm": 1.0557212654833275, "learning_rate": 2.381270174683295e-07, "loss": 0.1321, "step": 9409 }, { "epoch": 0.8670014281107477, "grad_norm": 0.9335280236927755, "learning_rate": 2.3780264508560942e-07, "loss": 0.1165, "step": 9410 }, { "epoch": 0.8670935642880176, "grad_norm": 0.9290477286297832, "learning_rate": 2.3747848274815716e-07, "loss": 0.1198, "step": 9411 }, { "epoch": 0.8671857004652876, "grad_norm": 0.9789407678799273, "learning_rate": 2.3715453048607118e-07, "loss": 0.1263, "step": 9412 }, { "epoch": 0.8672778366425578, "grad_norm": 1.0063340760093704, "learning_rate": 2.368307883294313e-07, "loss": 0.1116, "step": 9413 }, { "epoch": 0.8673699728198278, "grad_norm": 0.9504096460020395, "learning_rate": 2.3650725630829598e-07, "loss": 0.1236, "step": 9414 }, { "epoch": 0.8674621089970977, "grad_norm": 0.9909104584146814, "learning_rate": 2.3618393445270504e-07, "loss": 0.1241, "step": 9415 }, { "epoch": 0.8675542451743677, "grad_norm": 0.8738058725690909, "learning_rate": 2.3586082279267952e-07, "loss": 0.1042, "step": 9416 }, { "epoch": 0.8676463813516377, "grad_norm": 0.9207349471071707, "learning_rate": 2.355379213582204e-07, "loss": 0.121, "step": 9417 }, { "epoch": 0.8677385175289077, "grad_norm": 0.9308193808957346, "learning_rate": 2.3521523017930954e-07, "loss": 0.1124, "step": 9418 }, { "epoch": 0.8678306537061777, "grad_norm": 0.9365520666702065, "learning_rate": 2.3489274928590795e-07, "loss": 0.1225, "step": 9419 }, { "epoch": 0.8679227898834477, "grad_norm": 0.9492932580673579, "learning_rate": 2.3457047870795808e-07, "loss": 0.1151, "step": 9420 }, { "epoch": 0.8680149260607177, "grad_norm": 0.9490480593481834, "learning_rate": 2.3424841847538292e-07, "loss": 0.1168, "step": 9421 }, { "epoch": 0.8681070622379877, "grad_norm": 0.9449269641389587, "learning_rate": 2.3392656861808666e-07, "loss": 0.1193, "step": 9422 }, { "epoch": 0.8681991984152577, "grad_norm": 0.9233460585881546, "learning_rate": 2.3360492916595174e-07, "loss": 0.1207, "step": 9423 }, { "epoch": 0.8682913345925277, "grad_norm": 0.9503848489495152, "learning_rate": 2.332835001488437e-07, "loss": 0.1091, "step": 9424 }, { "epoch": 0.8683834707697977, "grad_norm": 0.9610048378704811, "learning_rate": 2.3296228159660594e-07, "loss": 0.125, "step": 9425 }, { "epoch": 0.8684756069470677, "grad_norm": 0.9498156086623913, "learning_rate": 2.3264127353906485e-07, "loss": 0.1147, "step": 9426 }, { "epoch": 0.8685677431243378, "grad_norm": 0.914848634914354, "learning_rate": 2.323204760060252e-07, "loss": 0.1096, "step": 9427 }, { "epoch": 0.8686598793016078, "grad_norm": 0.8394385416081372, "learning_rate": 2.3199988902727317e-07, "loss": 0.1005, "step": 9428 }, { "epoch": 0.8687520154788778, "grad_norm": 0.9696929167719233, "learning_rate": 2.3167951263257633e-07, "loss": 0.1304, "step": 9429 }, { "epoch": 0.8688441516561478, "grad_norm": 0.9503528872420482, "learning_rate": 2.313593468516806e-07, "loss": 0.1127, "step": 9430 }, { "epoch": 0.8689362878334178, "grad_norm": 0.9408948277709294, "learning_rate": 2.3103939171431305e-07, "loss": 0.1247, "step": 9431 }, { "epoch": 0.8690284240106878, "grad_norm": 0.9920825083534364, "learning_rate": 2.307196472501824e-07, "loss": 0.1306, "step": 9432 }, { "epoch": 0.8691205601879578, "grad_norm": 0.906482830614097, "learning_rate": 2.3040011348897689e-07, "loss": 0.1105, "step": 9433 }, { "epoch": 0.8692126963652278, "grad_norm": 0.9142460311821695, "learning_rate": 2.3008079046036525e-07, "loss": 0.115, "step": 9434 }, { "epoch": 0.8693048325424978, "grad_norm": 0.9322906020030624, "learning_rate": 2.2976167819399652e-07, "loss": 0.1076, "step": 9435 }, { "epoch": 0.8693969687197678, "grad_norm": 0.8997499764924649, "learning_rate": 2.294427767195001e-07, "loss": 0.1058, "step": 9436 }, { "epoch": 0.8694891048970378, "grad_norm": 0.9470817054742046, "learning_rate": 2.291240860664859e-07, "loss": 0.1161, "step": 9437 }, { "epoch": 0.8695812410743078, "grad_norm": 0.922169310738529, "learning_rate": 2.288056062645455e-07, "loss": 0.1101, "step": 9438 }, { "epoch": 0.8696733772515778, "grad_norm": 0.8836399103375951, "learning_rate": 2.2848733734324835e-07, "loss": 0.1008, "step": 9439 }, { "epoch": 0.8697655134288479, "grad_norm": 0.9790710542189891, "learning_rate": 2.281692793321469e-07, "loss": 0.1293, "step": 9440 }, { "epoch": 0.8698576496061179, "grad_norm": 0.9605080787153812, "learning_rate": 2.2785143226077166e-07, "loss": 0.1233, "step": 9441 }, { "epoch": 0.8699497857833879, "grad_norm": 0.9728766436641451, "learning_rate": 2.2753379615863575e-07, "loss": 0.1296, "step": 9442 }, { "epoch": 0.8700419219606579, "grad_norm": 0.9012599081009991, "learning_rate": 2.2721637105523193e-07, "loss": 0.1169, "step": 9443 }, { "epoch": 0.8701340581379279, "grad_norm": 0.9449224767636791, "learning_rate": 2.268991569800319e-07, "loss": 0.1167, "step": 9444 }, { "epoch": 0.8702261943151979, "grad_norm": 0.9397622392423732, "learning_rate": 2.2658215396249046e-07, "loss": 0.1224, "step": 9445 }, { "epoch": 0.8703183304924679, "grad_norm": 1.0206942614633827, "learning_rate": 2.2626536203204014e-07, "loss": 0.1238, "step": 9446 }, { "epoch": 0.8704104666697379, "grad_norm": 0.9508061328678594, "learning_rate": 2.2594878121809633e-07, "loss": 0.1181, "step": 9447 }, { "epoch": 0.8705026028470079, "grad_norm": 0.9776800478848374, "learning_rate": 2.2563241155005216e-07, "loss": 0.1227, "step": 9448 }, { "epoch": 0.8705947390242779, "grad_norm": 0.9499755485624733, "learning_rate": 2.2531625305728362e-07, "loss": 0.1176, "step": 9449 }, { "epoch": 0.8706868752015479, "grad_norm": 0.9626981166059405, "learning_rate": 2.2500030576914606e-07, "loss": 0.1041, "step": 9450 }, { "epoch": 0.8707790113788179, "grad_norm": 0.9322085622455839, "learning_rate": 2.2468456971497493e-07, "loss": 0.1215, "step": 9451 }, { "epoch": 0.8708711475560879, "grad_norm": 1.0594714922946018, "learning_rate": 2.2436904492408596e-07, "loss": 0.1197, "step": 9452 }, { "epoch": 0.8709632837333579, "grad_norm": 0.9755136585207467, "learning_rate": 2.2405373142577597e-07, "loss": 0.1261, "step": 9453 }, { "epoch": 0.871055419910628, "grad_norm": 0.9455054395345747, "learning_rate": 2.237386292493221e-07, "loss": 0.127, "step": 9454 }, { "epoch": 0.871147556087898, "grad_norm": 0.9658714454786119, "learning_rate": 2.2342373842398208e-07, "loss": 0.122, "step": 9455 }, { "epoch": 0.871239692265168, "grad_norm": 0.9632137472514127, "learning_rate": 2.2310905897899275e-07, "loss": 0.117, "step": 9456 }, { "epoch": 0.871331828442438, "grad_norm": 0.9449426092351376, "learning_rate": 2.227945909435719e-07, "loss": 0.1128, "step": 9457 }, { "epoch": 0.871423964619708, "grad_norm": 0.9567787882080494, "learning_rate": 2.224803343469184e-07, "loss": 0.1213, "step": 9458 }, { "epoch": 0.8715161007969779, "grad_norm": 0.9823985814029087, "learning_rate": 2.2216628921821138e-07, "loss": 0.1114, "step": 9459 }, { "epoch": 0.8716082369742479, "grad_norm": 0.9140140102629726, "learning_rate": 2.2185245558660918e-07, "loss": 0.1113, "step": 9460 }, { "epoch": 0.8717003731515179, "grad_norm": 1.0126259446458135, "learning_rate": 2.215388334812521e-07, "loss": 0.1237, "step": 9461 }, { "epoch": 0.8717925093287879, "grad_norm": 0.875242477779259, "learning_rate": 2.2122542293125883e-07, "loss": 0.1051, "step": 9462 }, { "epoch": 0.8718846455060579, "grad_norm": 0.9070067464486096, "learning_rate": 2.2091222396573104e-07, "loss": 0.105, "step": 9463 }, { "epoch": 0.8719767816833279, "grad_norm": 0.8634324183365186, "learning_rate": 2.20599236613748e-07, "loss": 0.1051, "step": 9464 }, { "epoch": 0.8720689178605979, "grad_norm": 0.9097237773646777, "learning_rate": 2.2028646090437117e-07, "loss": 0.1156, "step": 9465 }, { "epoch": 0.8721610540378679, "grad_norm": 0.9388952429509481, "learning_rate": 2.199738968666418e-07, "loss": 0.1137, "step": 9466 }, { "epoch": 0.8722531902151379, "grad_norm": 0.9697976669781796, "learning_rate": 2.1966154452958216e-07, "loss": 0.1247, "step": 9467 }, { "epoch": 0.872345326392408, "grad_norm": 0.8989941935550352, "learning_rate": 2.1934940392219272e-07, "loss": 0.1066, "step": 9468 }, { "epoch": 0.872437462569678, "grad_norm": 0.94116231002316, "learning_rate": 2.190374750734567e-07, "loss": 0.1197, "step": 9469 }, { "epoch": 0.872529598746948, "grad_norm": 0.9274780247234806, "learning_rate": 2.187257580123367e-07, "loss": 0.1091, "step": 9470 }, { "epoch": 0.872621734924218, "grad_norm": 0.9605073108855209, "learning_rate": 2.1841425276777544e-07, "loss": 0.1204, "step": 9471 }, { "epoch": 0.872713871101488, "grad_norm": 0.9908749541818052, "learning_rate": 2.1810295936869675e-07, "loss": 0.1285, "step": 9472 }, { "epoch": 0.872806007278758, "grad_norm": 0.9926019750513289, "learning_rate": 2.1779187784400385e-07, "loss": 0.1288, "step": 9473 }, { "epoch": 0.872898143456028, "grad_norm": 0.9483689313827183, "learning_rate": 2.1748100822258034e-07, "loss": 0.1141, "step": 9474 }, { "epoch": 0.872990279633298, "grad_norm": 0.9137883591856961, "learning_rate": 2.171703505332909e-07, "loss": 0.108, "step": 9475 }, { "epoch": 0.873082415810568, "grad_norm": 0.9370216914924847, "learning_rate": 2.1685990480498048e-07, "loss": 0.126, "step": 9476 }, { "epoch": 0.873174551987838, "grad_norm": 0.9391599852602918, "learning_rate": 2.1654967106647328e-07, "loss": 0.1134, "step": 9477 }, { "epoch": 0.873266688165108, "grad_norm": 0.9333893143260931, "learning_rate": 2.1623964934657516e-07, "loss": 0.1207, "step": 9478 }, { "epoch": 0.873358824342378, "grad_norm": 0.921403332616667, "learning_rate": 2.159298396740711e-07, "loss": 0.1184, "step": 9479 }, { "epoch": 0.873450960519648, "grad_norm": 0.989075346325913, "learning_rate": 2.1562024207772758e-07, "loss": 0.1221, "step": 9480 }, { "epoch": 0.8735430966969181, "grad_norm": 0.8899903434445905, "learning_rate": 2.1531085658628992e-07, "loss": 0.1073, "step": 9481 }, { "epoch": 0.8736352328741881, "grad_norm": 0.9766428519858893, "learning_rate": 2.1500168322848515e-07, "loss": 0.1219, "step": 9482 }, { "epoch": 0.8737273690514581, "grad_norm": 0.9650695272721265, "learning_rate": 2.1469272203302055e-07, "loss": 0.1207, "step": 9483 }, { "epoch": 0.8738195052287281, "grad_norm": 0.9154717048048135, "learning_rate": 2.143839730285824e-07, "loss": 0.1136, "step": 9484 }, { "epoch": 0.8739116414059981, "grad_norm": 0.9660021884154858, "learning_rate": 2.1407543624383798e-07, "loss": 0.1162, "step": 9485 }, { "epoch": 0.8740037775832681, "grad_norm": 0.9456680462862941, "learning_rate": 2.1376711170743553e-07, "loss": 0.1213, "step": 9486 }, { "epoch": 0.8740959137605381, "grad_norm": 0.9118077582257975, "learning_rate": 2.134589994480027e-07, "loss": 0.1139, "step": 9487 }, { "epoch": 0.8741880499378081, "grad_norm": 0.9085493885735575, "learning_rate": 2.1315109949414824e-07, "loss": 0.1191, "step": 9488 }, { "epoch": 0.8742801861150781, "grad_norm": 0.9387869250564302, "learning_rate": 2.1284341187446046e-07, "loss": 0.1219, "step": 9489 }, { "epoch": 0.8743723222923481, "grad_norm": 0.9152316051258936, "learning_rate": 2.1253593661750727e-07, "loss": 0.1052, "step": 9490 }, { "epoch": 0.8744644584696181, "grad_norm": 0.979761804929908, "learning_rate": 2.1222867375183893e-07, "loss": 0.1236, "step": 9491 }, { "epoch": 0.8745565946468881, "grad_norm": 0.931801417869082, "learning_rate": 2.1192162330598453e-07, "loss": 0.1145, "step": 9492 }, { "epoch": 0.8746487308241581, "grad_norm": 0.8688388184702385, "learning_rate": 2.1161478530845353e-07, "loss": 0.1095, "step": 9493 }, { "epoch": 0.8747408670014281, "grad_norm": 0.9228613560998604, "learning_rate": 2.1130815978773616e-07, "loss": 0.1261, "step": 9494 }, { "epoch": 0.8748330031786982, "grad_norm": 0.9595529954449742, "learning_rate": 2.1100174677230217e-07, "loss": 0.1235, "step": 9495 }, { "epoch": 0.8749251393559682, "grad_norm": 0.8906914567887639, "learning_rate": 2.1069554629060297e-07, "loss": 0.1102, "step": 9496 }, { "epoch": 0.8750172755332382, "grad_norm": 0.9461347234134713, "learning_rate": 2.10389558371068e-07, "loss": 0.1151, "step": 9497 }, { "epoch": 0.8751094117105082, "grad_norm": 0.9429358001436208, "learning_rate": 2.1008378304210876e-07, "loss": 0.1251, "step": 9498 }, { "epoch": 0.8752015478877782, "grad_norm": 0.9420068293333895, "learning_rate": 2.0977822033211748e-07, "loss": 0.1164, "step": 9499 }, { "epoch": 0.8752936840650481, "grad_norm": 1.0217294748310057, "learning_rate": 2.0947287026946428e-07, "loss": 0.1175, "step": 9500 }, { "epoch": 0.8752936840650481, "eval_loss": 0.1174582913517952, "eval_runtime": 299.0187, "eval_samples_per_second": 23.467, "eval_steps_per_second": 2.936, "step": 9500 }, { "epoch": 0.8753858202423181, "grad_norm": 0.8911898224450792, "learning_rate": 2.091677328825023e-07, "loss": 0.1057, "step": 9501 }, { "epoch": 0.8754779564195881, "grad_norm": 0.9062661157706274, "learning_rate": 2.0886280819956223e-07, "loss": 0.1122, "step": 9502 }, { "epoch": 0.8755700925968581, "grad_norm": 0.9721174742130575, "learning_rate": 2.0855809624895694e-07, "loss": 0.1268, "step": 9503 }, { "epoch": 0.8756622287741281, "grad_norm": 0.9541176980173214, "learning_rate": 2.082535970589794e-07, "loss": 0.1225, "step": 9504 }, { "epoch": 0.8757543649513981, "grad_norm": 0.9238750790195943, "learning_rate": 2.0794931065790226e-07, "loss": 0.1134, "step": 9505 }, { "epoch": 0.8758465011286681, "grad_norm": 0.9486934776566898, "learning_rate": 2.076452370739776e-07, "loss": 0.1185, "step": 9506 }, { "epoch": 0.8759386373059381, "grad_norm": 1.0458887278730715, "learning_rate": 2.0734137633543954e-07, "loss": 0.1309, "step": 9507 }, { "epoch": 0.8760307734832082, "grad_norm": 0.944357664608892, "learning_rate": 2.0703772847050136e-07, "loss": 0.1102, "step": 9508 }, { "epoch": 0.8761229096604782, "grad_norm": 0.9642165230298722, "learning_rate": 2.0673429350735742e-07, "loss": 0.1214, "step": 9509 }, { "epoch": 0.8762150458377482, "grad_norm": 0.8546678704665883, "learning_rate": 2.06431071474181e-07, "loss": 0.1051, "step": 9510 }, { "epoch": 0.8763071820150182, "grad_norm": 0.9556155038340977, "learning_rate": 2.0612806239912602e-07, "loss": 0.1133, "step": 9511 }, { "epoch": 0.8763993181922882, "grad_norm": 0.940205240213181, "learning_rate": 2.0582526631032745e-07, "loss": 0.1122, "step": 9512 }, { "epoch": 0.8764914543695582, "grad_norm": 0.9471384282662257, "learning_rate": 2.0552268323590002e-07, "loss": 0.1183, "step": 9513 }, { "epoch": 0.8765835905468282, "grad_norm": 0.9996225062721664, "learning_rate": 2.052203132039382e-07, "loss": 0.1233, "step": 9514 }, { "epoch": 0.8766757267240982, "grad_norm": 0.9260611802417887, "learning_rate": 2.0491815624251733e-07, "loss": 0.1184, "step": 9515 }, { "epoch": 0.8767678629013682, "grad_norm": 0.9071613612784006, "learning_rate": 2.046162123796927e-07, "loss": 0.1145, "step": 9516 }, { "epoch": 0.8768599990786382, "grad_norm": 0.9863259108007564, "learning_rate": 2.043144816434997e-07, "loss": 0.1326, "step": 9517 }, { "epoch": 0.8769521352559082, "grad_norm": 0.8971641251120323, "learning_rate": 2.0401296406195426e-07, "loss": 0.1136, "step": 9518 }, { "epoch": 0.8770442714331782, "grad_norm": 0.897002354512559, "learning_rate": 2.0371165966305173e-07, "loss": 0.1127, "step": 9519 }, { "epoch": 0.8771364076104482, "grad_norm": 0.9075910207315132, "learning_rate": 2.0341056847476947e-07, "loss": 0.1084, "step": 9520 }, { "epoch": 0.8772285437877182, "grad_norm": 0.9592976228033577, "learning_rate": 2.031096905250629e-07, "loss": 0.1112, "step": 9521 }, { "epoch": 0.8773206799649883, "grad_norm": 0.9076761866207034, "learning_rate": 2.0280902584186828e-07, "loss": 0.1172, "step": 9522 }, { "epoch": 0.8774128161422583, "grad_norm": 1.0029980746167146, "learning_rate": 2.02508574453103e-07, "loss": 0.1247, "step": 9523 }, { "epoch": 0.8775049523195283, "grad_norm": 0.9757391160570924, "learning_rate": 2.0220833638666393e-07, "loss": 0.1203, "step": 9524 }, { "epoch": 0.8775970884967983, "grad_norm": 0.943872802307776, "learning_rate": 2.0190831167042846e-07, "loss": 0.1152, "step": 9525 }, { "epoch": 0.8776892246740683, "grad_norm": 0.9287559296126232, "learning_rate": 2.016085003322535e-07, "loss": 0.1214, "step": 9526 }, { "epoch": 0.8777813608513383, "grad_norm": 0.9162111580851442, "learning_rate": 2.013089023999762e-07, "loss": 0.1146, "step": 9527 }, { "epoch": 0.8778734970286083, "grad_norm": 0.9129520056654986, "learning_rate": 2.010095179014146e-07, "loss": 0.1116, "step": 9528 }, { "epoch": 0.8779656332058783, "grad_norm": 0.9523827132430306, "learning_rate": 2.00710346864367e-07, "loss": 0.1279, "step": 9529 }, { "epoch": 0.8780577693831483, "grad_norm": 0.9521087669536956, "learning_rate": 2.0041138931661124e-07, "loss": 0.1249, "step": 9530 }, { "epoch": 0.8781499055604183, "grad_norm": 0.9189194425795829, "learning_rate": 2.0011264528590562e-07, "loss": 0.1194, "step": 9531 }, { "epoch": 0.8782420417376883, "grad_norm": 0.9487815314113962, "learning_rate": 1.9981411479998798e-07, "loss": 0.1085, "step": 9532 }, { "epoch": 0.8783341779149583, "grad_norm": 0.963493565405593, "learning_rate": 1.9951579788657748e-07, "loss": 0.1102, "step": 9533 }, { "epoch": 0.8784263140922283, "grad_norm": 0.9495227621628857, "learning_rate": 1.9921769457337286e-07, "loss": 0.1242, "step": 9534 }, { "epoch": 0.8785184502694983, "grad_norm": 0.9305475533838578, "learning_rate": 1.9891980488805278e-07, "loss": 0.114, "step": 9535 }, { "epoch": 0.8786105864467684, "grad_norm": 0.9693885781366122, "learning_rate": 1.986221288582768e-07, "loss": 0.1306, "step": 9536 }, { "epoch": 0.8787027226240384, "grad_norm": 0.9847652103702126, "learning_rate": 1.9832466651168337e-07, "loss": 0.1214, "step": 9537 }, { "epoch": 0.8787948588013084, "grad_norm": 0.8562607646907413, "learning_rate": 1.9802741787589258e-07, "loss": 0.1056, "step": 9538 }, { "epoch": 0.8788869949785784, "grad_norm": 0.9232649256601291, "learning_rate": 1.977303829785035e-07, "loss": 0.116, "step": 9539 }, { "epoch": 0.8789791311558484, "grad_norm": 0.9829657229119272, "learning_rate": 1.9743356184709628e-07, "loss": 0.1243, "step": 9540 }, { "epoch": 0.8790712673331184, "grad_norm": 0.9827295767927928, "learning_rate": 1.9713695450923054e-07, "loss": 0.129, "step": 9541 }, { "epoch": 0.8791634035103884, "grad_norm": 0.94977481989151, "learning_rate": 1.968405609924473e-07, "loss": 0.1198, "step": 9542 }, { "epoch": 0.8792555396876583, "grad_norm": 0.9891164225609158, "learning_rate": 1.9654438132426485e-07, "loss": 0.1231, "step": 9543 }, { "epoch": 0.8793476758649283, "grad_norm": 0.9193503040315959, "learning_rate": 1.9624841553218476e-07, "loss": 0.1083, "step": 9544 }, { "epoch": 0.8794398120421983, "grad_norm": 1.0548903153761093, "learning_rate": 1.9595266364368705e-07, "loss": 0.1317, "step": 9545 }, { "epoch": 0.8795319482194683, "grad_norm": 0.8884916257850111, "learning_rate": 1.9565712568623274e-07, "loss": 0.1113, "step": 9546 }, { "epoch": 0.8796240843967383, "grad_norm": 0.9715085221952553, "learning_rate": 1.9536180168726214e-07, "loss": 0.1127, "step": 9547 }, { "epoch": 0.8797162205740083, "grad_norm": 0.9821115353932246, "learning_rate": 1.9506669167419667e-07, "loss": 0.119, "step": 9548 }, { "epoch": 0.8798083567512784, "grad_norm": 0.9843575732122866, "learning_rate": 1.9477179567443632e-07, "loss": 0.1274, "step": 9549 }, { "epoch": 0.8799004929285484, "grad_norm": 0.9642181310696486, "learning_rate": 1.9447711371536365e-07, "loss": 0.1202, "step": 9550 }, { "epoch": 0.8799926291058184, "grad_norm": 1.0037179892947732, "learning_rate": 1.9418264582433844e-07, "loss": 0.1171, "step": 9551 }, { "epoch": 0.8800847652830884, "grad_norm": 0.9337763265065536, "learning_rate": 1.9388839202870268e-07, "loss": 0.1167, "step": 9552 }, { "epoch": 0.8801769014603584, "grad_norm": 0.8987444148210977, "learning_rate": 1.9359435235577818e-07, "loss": 0.1042, "step": 9553 }, { "epoch": 0.8802690376376284, "grad_norm": 0.9502015836724473, "learning_rate": 1.9330052683286666e-07, "loss": 0.1253, "step": 9554 }, { "epoch": 0.8803611738148984, "grad_norm": 0.93018808175646, "learning_rate": 1.930069154872488e-07, "loss": 0.1134, "step": 9555 }, { "epoch": 0.8804533099921684, "grad_norm": 0.9617089178446352, "learning_rate": 1.92713518346187e-07, "loss": 0.1213, "step": 9556 }, { "epoch": 0.8805454461694384, "grad_norm": 0.9348159689266482, "learning_rate": 1.9242033543692362e-07, "loss": 0.1185, "step": 9557 }, { "epoch": 0.8806375823467084, "grad_norm": 0.9509850882088483, "learning_rate": 1.9212736678668075e-07, "loss": 0.1214, "step": 9558 }, { "epoch": 0.8807297185239784, "grad_norm": 0.9288825792030714, "learning_rate": 1.9183461242266027e-07, "loss": 0.1142, "step": 9559 }, { "epoch": 0.8808218547012484, "grad_norm": 0.9294423662297463, "learning_rate": 1.9154207237204403e-07, "loss": 0.1147, "step": 9560 }, { "epoch": 0.8809139908785184, "grad_norm": 1.0171452491428885, "learning_rate": 1.9124974666199476e-07, "loss": 0.1299, "step": 9561 }, { "epoch": 0.8810061270557884, "grad_norm": 0.9263947519637243, "learning_rate": 1.909576353196549e-07, "loss": 0.1131, "step": 9562 }, { "epoch": 0.8810982632330585, "grad_norm": 0.8906518666115324, "learning_rate": 1.9066573837214773e-07, "loss": 0.113, "step": 9563 }, { "epoch": 0.8811903994103285, "grad_norm": 0.9373704014564624, "learning_rate": 1.90374055846575e-07, "loss": 0.117, "step": 9564 }, { "epoch": 0.8812825355875985, "grad_norm": 0.9669182368686189, "learning_rate": 1.9008258777001963e-07, "loss": 0.1208, "step": 9565 }, { "epoch": 0.8813746717648685, "grad_norm": 0.9085498077985098, "learning_rate": 1.8979133416954453e-07, "loss": 0.1116, "step": 9566 }, { "epoch": 0.8814668079421385, "grad_norm": 0.9457559872850566, "learning_rate": 1.8950029507219302e-07, "loss": 0.1252, "step": 9567 }, { "epoch": 0.8815589441194085, "grad_norm": 0.9720738745786429, "learning_rate": 1.8920947050498711e-07, "loss": 0.1207, "step": 9568 }, { "epoch": 0.8816510802966785, "grad_norm": 0.9886987730813052, "learning_rate": 1.889188604949313e-07, "loss": 0.1293, "step": 9569 }, { "epoch": 0.8817432164739485, "grad_norm": 0.8695655667801883, "learning_rate": 1.8862846506900762e-07, "loss": 0.1005, "step": 9570 }, { "epoch": 0.8818353526512185, "grad_norm": 0.9139453399544554, "learning_rate": 1.8833828425418006e-07, "loss": 0.1113, "step": 9571 }, { "epoch": 0.8819274888284885, "grad_norm": 0.9206137569411968, "learning_rate": 1.8804831807739094e-07, "loss": 0.1122, "step": 9572 }, { "epoch": 0.8820196250057585, "grad_norm": 0.9661739250181918, "learning_rate": 1.8775856656556458e-07, "loss": 0.118, "step": 9573 }, { "epoch": 0.8821117611830285, "grad_norm": 0.9716508124012724, "learning_rate": 1.8746902974560443e-07, "loss": 0.1242, "step": 9574 }, { "epoch": 0.8822038973602985, "grad_norm": 0.9133529948046311, "learning_rate": 1.8717970764439374e-07, "loss": 0.123, "step": 9575 }, { "epoch": 0.8822960335375686, "grad_norm": 0.9832013176200599, "learning_rate": 1.8689060028879602e-07, "loss": 0.1278, "step": 9576 }, { "epoch": 0.8823881697148386, "grad_norm": 0.8860667781542512, "learning_rate": 1.866017077056545e-07, "loss": 0.1193, "step": 9577 }, { "epoch": 0.8824803058921086, "grad_norm": 0.9641731545183843, "learning_rate": 1.8631302992179383e-07, "loss": 0.1184, "step": 9578 }, { "epoch": 0.8825724420693786, "grad_norm": 0.8761458096650978, "learning_rate": 1.860245669640176e-07, "loss": 0.1092, "step": 9579 }, { "epoch": 0.8826645782466486, "grad_norm": 0.9386085977402518, "learning_rate": 1.857363188591091e-07, "loss": 0.1111, "step": 9580 }, { "epoch": 0.8827567144239186, "grad_norm": 0.9293784733624652, "learning_rate": 1.8544828563383243e-07, "loss": 0.1079, "step": 9581 }, { "epoch": 0.8828488506011886, "grad_norm": 0.9749859909166957, "learning_rate": 1.8516046731493127e-07, "loss": 0.1275, "step": 9582 }, { "epoch": 0.8829409867784586, "grad_norm": 0.9022892040966464, "learning_rate": 1.848728639291303e-07, "loss": 0.1096, "step": 9583 }, { "epoch": 0.8830331229557286, "grad_norm": 0.9433013663273301, "learning_rate": 1.8458547550313287e-07, "loss": 0.1191, "step": 9584 }, { "epoch": 0.8831252591329986, "grad_norm": 0.8964672985576979, "learning_rate": 1.8429830206362325e-07, "loss": 0.1137, "step": 9585 }, { "epoch": 0.8832173953102685, "grad_norm": 0.9174450175751114, "learning_rate": 1.8401134363726536e-07, "loss": 0.1138, "step": 9586 }, { "epoch": 0.8833095314875385, "grad_norm": 0.8803209048806083, "learning_rate": 1.8372460025070343e-07, "loss": 0.1152, "step": 9587 }, { "epoch": 0.8834016676648085, "grad_norm": 0.9100598997602551, "learning_rate": 1.8343807193056201e-07, "loss": 0.1113, "step": 9588 }, { "epoch": 0.8834938038420785, "grad_norm": 0.8753907155320467, "learning_rate": 1.8315175870344455e-07, "loss": 0.1149, "step": 9589 }, { "epoch": 0.8835859400193486, "grad_norm": 0.9462524201593473, "learning_rate": 1.8286566059593615e-07, "loss": 0.1154, "step": 9590 }, { "epoch": 0.8836780761966186, "grad_norm": 0.9212877418915894, "learning_rate": 1.825797776346e-07, "loss": 0.1185, "step": 9591 }, { "epoch": 0.8837702123738886, "grad_norm": 0.946329462649158, "learning_rate": 1.8229410984598128e-07, "loss": 0.1243, "step": 9592 }, { "epoch": 0.8838623485511586, "grad_norm": 0.9357474153015102, "learning_rate": 1.820086572566035e-07, "loss": 0.1225, "step": 9593 }, { "epoch": 0.8839544847284286, "grad_norm": 0.9432417654817284, "learning_rate": 1.8172341989297154e-07, "loss": 0.1082, "step": 9594 }, { "epoch": 0.8840466209056986, "grad_norm": 0.9728284426637823, "learning_rate": 1.814383977815698e-07, "loss": 0.1082, "step": 9595 }, { "epoch": 0.8841387570829686, "grad_norm": 0.9521973139587481, "learning_rate": 1.8115359094886238e-07, "loss": 0.1237, "step": 9596 }, { "epoch": 0.8842308932602386, "grad_norm": 0.9412655233696202, "learning_rate": 1.808689994212931e-07, "loss": 0.1181, "step": 9597 }, { "epoch": 0.8843230294375086, "grad_norm": 0.9095779965593743, "learning_rate": 1.8058462322528698e-07, "loss": 0.1093, "step": 9598 }, { "epoch": 0.8844151656147786, "grad_norm": 0.9580227956852666, "learning_rate": 1.8030046238724814e-07, "loss": 0.1191, "step": 9599 }, { "epoch": 0.8845073017920486, "grad_norm": 0.9635109661247991, "learning_rate": 1.8001651693356131e-07, "loss": 0.1228, "step": 9600 }, { "epoch": 0.8845994379693186, "grad_norm": 0.8568137536842804, "learning_rate": 1.797327868905907e-07, "loss": 0.0991, "step": 9601 }, { "epoch": 0.8846915741465886, "grad_norm": 0.9547474068403831, "learning_rate": 1.7944927228467995e-07, "loss": 0.1168, "step": 9602 }, { "epoch": 0.8847837103238586, "grad_norm": 0.9584155615411788, "learning_rate": 1.791659731421541e-07, "loss": 0.1077, "step": 9603 }, { "epoch": 0.8848758465011287, "grad_norm": 0.928296303372466, "learning_rate": 1.7888288948931799e-07, "loss": 0.1165, "step": 9604 }, { "epoch": 0.8849679826783987, "grad_norm": 0.9114506107172179, "learning_rate": 1.786000213524547e-07, "loss": 0.1154, "step": 9605 }, { "epoch": 0.8850601188556687, "grad_norm": 0.9032049431949399, "learning_rate": 1.783173687578299e-07, "loss": 0.1077, "step": 9606 }, { "epoch": 0.8851522550329387, "grad_norm": 1.031588892107339, "learning_rate": 1.7803493173168679e-07, "loss": 0.1249, "step": 9607 }, { "epoch": 0.8852443912102087, "grad_norm": 0.9265771709711623, "learning_rate": 1.777527103002505e-07, "loss": 0.1197, "step": 9608 }, { "epoch": 0.8853365273874787, "grad_norm": 0.9544289104713, "learning_rate": 1.7747070448972475e-07, "loss": 0.1178, "step": 9609 }, { "epoch": 0.8854286635647487, "grad_norm": 0.9971281602223147, "learning_rate": 1.7718891432629392e-07, "loss": 0.1295, "step": 9610 }, { "epoch": 0.8855207997420187, "grad_norm": 0.9433321387081206, "learning_rate": 1.769073398361229e-07, "loss": 0.1169, "step": 9611 }, { "epoch": 0.8856129359192887, "grad_norm": 0.9677629402688539, "learning_rate": 1.7662598104535522e-07, "loss": 0.1211, "step": 9612 }, { "epoch": 0.8857050720965587, "grad_norm": 0.9972427061874779, "learning_rate": 1.7634483798011498e-07, "loss": 0.1217, "step": 9613 }, { "epoch": 0.8857972082738287, "grad_norm": 0.8859263153374594, "learning_rate": 1.760639106665063e-07, "loss": 0.1138, "step": 9614 }, { "epoch": 0.8858893444510987, "grad_norm": 0.9202629453001602, "learning_rate": 1.7578319913061387e-07, "loss": 0.1135, "step": 9615 }, { "epoch": 0.8859814806283687, "grad_norm": 0.9096230922774695, "learning_rate": 1.7550270339850212e-07, "loss": 0.108, "step": 9616 }, { "epoch": 0.8860736168056388, "grad_norm": 0.9214369389617435, "learning_rate": 1.7522242349621438e-07, "loss": 0.1213, "step": 9617 }, { "epoch": 0.8861657529829088, "grad_norm": 0.9458955777475399, "learning_rate": 1.7494235944977427e-07, "loss": 0.1252, "step": 9618 }, { "epoch": 0.8862578891601788, "grad_norm": 0.9374083870077097, "learning_rate": 1.7466251128518629e-07, "loss": 0.1088, "step": 9619 }, { "epoch": 0.8863500253374488, "grad_norm": 0.9111588907855831, "learning_rate": 1.7438287902843465e-07, "loss": 0.1051, "step": 9620 }, { "epoch": 0.8864421615147188, "grad_norm": 0.9315197242699069, "learning_rate": 1.7410346270548328e-07, "loss": 0.1175, "step": 9621 }, { "epoch": 0.8865342976919888, "grad_norm": 1.0174523449968809, "learning_rate": 1.7382426234227562e-07, "loss": 0.1309, "step": 9622 }, { "epoch": 0.8866264338692588, "grad_norm": 0.9663714692842666, "learning_rate": 1.735452779647351e-07, "loss": 0.1191, "step": 9623 }, { "epoch": 0.8867185700465288, "grad_norm": 0.9881143534895991, "learning_rate": 1.7326650959876595e-07, "loss": 0.1308, "step": 9624 }, { "epoch": 0.8868107062237988, "grad_norm": 0.9652210535066099, "learning_rate": 1.7298795727025226e-07, "loss": 0.1118, "step": 9625 }, { "epoch": 0.8869028424010688, "grad_norm": 0.9190988726721089, "learning_rate": 1.7270962100505688e-07, "loss": 0.1202, "step": 9626 }, { "epoch": 0.8869949785783388, "grad_norm": 0.9356727701411983, "learning_rate": 1.724315008290234e-07, "loss": 0.1208, "step": 9627 }, { "epoch": 0.8870871147556088, "grad_norm": 0.9449533031642502, "learning_rate": 1.7215359676797604e-07, "loss": 0.1204, "step": 9628 }, { "epoch": 0.8871792509328787, "grad_norm": 0.9224941964563907, "learning_rate": 1.7187590884771789e-07, "loss": 0.1137, "step": 9629 }, { "epoch": 0.8872713871101487, "grad_norm": 0.9184212287329622, "learning_rate": 1.7159843709403156e-07, "loss": 0.1184, "step": 9630 }, { "epoch": 0.8873635232874189, "grad_norm": 1.033680961033416, "learning_rate": 1.7132118153268097e-07, "loss": 0.1182, "step": 9631 }, { "epoch": 0.8874556594646889, "grad_norm": 0.925393325283817, "learning_rate": 1.7104414218940934e-07, "loss": 0.1161, "step": 9632 }, { "epoch": 0.8875477956419588, "grad_norm": 0.9012734623981898, "learning_rate": 1.7076731908994032e-07, "loss": 0.1063, "step": 9633 }, { "epoch": 0.8876399318192288, "grad_norm": 0.9821681211632581, "learning_rate": 1.704907122599761e-07, "loss": 0.1297, "step": 9634 }, { "epoch": 0.8877320679964988, "grad_norm": 0.937963156497377, "learning_rate": 1.7021432172519974e-07, "loss": 0.1123, "step": 9635 }, { "epoch": 0.8878242041737688, "grad_norm": 0.8937454742138694, "learning_rate": 1.6993814751127435e-07, "loss": 0.115, "step": 9636 }, { "epoch": 0.8879163403510388, "grad_norm": 0.8840091538217429, "learning_rate": 1.69662189643843e-07, "loss": 0.103, "step": 9637 }, { "epoch": 0.8880084765283088, "grad_norm": 0.9022781888770508, "learning_rate": 1.69386448148528e-07, "loss": 0.1093, "step": 9638 }, { "epoch": 0.8881006127055788, "grad_norm": 0.948615735169581, "learning_rate": 1.691109230509322e-07, "loss": 0.1155, "step": 9639 }, { "epoch": 0.8881927488828488, "grad_norm": 0.9448712983100227, "learning_rate": 1.6883561437663788e-07, "loss": 0.1164, "step": 9640 }, { "epoch": 0.8882848850601188, "grad_norm": 0.8863119815353013, "learning_rate": 1.6856052215120794e-07, "loss": 0.1115, "step": 9641 }, { "epoch": 0.8883770212373888, "grad_norm": 0.9719080592411924, "learning_rate": 1.682856464001839e-07, "loss": 0.1166, "step": 9642 }, { "epoch": 0.8884691574146588, "grad_norm": 0.8809419811084558, "learning_rate": 1.680109871490887e-07, "loss": 0.1115, "step": 9643 }, { "epoch": 0.8885612935919289, "grad_norm": 0.9303992924384262, "learning_rate": 1.6773654442342468e-07, "loss": 0.1105, "step": 9644 }, { "epoch": 0.8886534297691989, "grad_norm": 1.0080826201157422, "learning_rate": 1.6746231824867316e-07, "loss": 0.1284, "step": 9645 }, { "epoch": 0.8887455659464689, "grad_norm": 0.9524532559452373, "learning_rate": 1.671883086502968e-07, "loss": 0.1139, "step": 9646 }, { "epoch": 0.8888377021237389, "grad_norm": 0.9658574906611174, "learning_rate": 1.669145156537366e-07, "loss": 0.1339, "step": 9647 }, { "epoch": 0.8889298383010089, "grad_norm": 0.9751522991921692, "learning_rate": 1.6664093928441456e-07, "loss": 0.1203, "step": 9648 }, { "epoch": 0.8890219744782789, "grad_norm": 0.951320518261064, "learning_rate": 1.6636757956773302e-07, "loss": 0.12, "step": 9649 }, { "epoch": 0.8891141106555489, "grad_norm": 0.9416676468741589, "learning_rate": 1.6609443652907287e-07, "loss": 0.1227, "step": 9650 }, { "epoch": 0.8892062468328189, "grad_norm": 1.0030344928799633, "learning_rate": 1.6582151019379517e-07, "loss": 0.1299, "step": 9651 }, { "epoch": 0.8892983830100889, "grad_norm": 0.9258602981126589, "learning_rate": 1.655488005872413e-07, "loss": 0.1126, "step": 9652 }, { "epoch": 0.8893905191873589, "grad_norm": 0.9424174799317092, "learning_rate": 1.6527630773473248e-07, "loss": 0.1178, "step": 9653 }, { "epoch": 0.8894826553646289, "grad_norm": 0.9481218238285717, "learning_rate": 1.650040316615703e-07, "loss": 0.1246, "step": 9654 }, { "epoch": 0.8895747915418989, "grad_norm": 0.9605790380659346, "learning_rate": 1.647319723930349e-07, "loss": 0.124, "step": 9655 }, { "epoch": 0.8896669277191689, "grad_norm": 0.9472833340946128, "learning_rate": 1.6446012995438688e-07, "loss": 0.1252, "step": 9656 }, { "epoch": 0.8897590638964389, "grad_norm": 0.9887156441835274, "learning_rate": 1.6418850437086715e-07, "loss": 0.1311, "step": 9657 }, { "epoch": 0.889851200073709, "grad_norm": 0.9639073024387345, "learning_rate": 1.6391709566769664e-07, "loss": 0.1232, "step": 9658 }, { "epoch": 0.889943336250979, "grad_norm": 0.8845699968934604, "learning_rate": 1.6364590387007468e-07, "loss": 0.1053, "step": 9659 }, { "epoch": 0.890035472428249, "grad_norm": 0.9467171501941805, "learning_rate": 1.6337492900318246e-07, "loss": 0.1118, "step": 9660 }, { "epoch": 0.890127608605519, "grad_norm": 0.8784549110877983, "learning_rate": 1.6310417109217906e-07, "loss": 0.11, "step": 9661 }, { "epoch": 0.890219744782789, "grad_norm": 0.9112414536641252, "learning_rate": 1.6283363016220548e-07, "loss": 0.1077, "step": 9662 }, { "epoch": 0.890311880960059, "grad_norm": 0.9555138237898598, "learning_rate": 1.6256330623838024e-07, "loss": 0.126, "step": 9663 }, { "epoch": 0.890404017137329, "grad_norm": 0.9146987594311619, "learning_rate": 1.6229319934580378e-07, "loss": 0.108, "step": 9664 }, { "epoch": 0.890496153314599, "grad_norm": 0.9870921357946368, "learning_rate": 1.6202330950955552e-07, "loss": 0.1216, "step": 9665 }, { "epoch": 0.890588289491869, "grad_norm": 0.9640885538855887, "learning_rate": 1.6175363675469485e-07, "loss": 0.1239, "step": 9666 }, { "epoch": 0.890680425669139, "grad_norm": 0.9317904692179457, "learning_rate": 1.6148418110626008e-07, "loss": 0.1171, "step": 9667 }, { "epoch": 0.890772561846409, "grad_norm": 0.9345846725693181, "learning_rate": 1.612149425892709e-07, "loss": 0.1137, "step": 9668 }, { "epoch": 0.890864698023679, "grad_norm": 0.9183218187820292, "learning_rate": 1.6094592122872594e-07, "loss": 0.1142, "step": 9669 }, { "epoch": 0.890956834200949, "grad_norm": 0.9700195116573087, "learning_rate": 1.6067711704960408e-07, "loss": 0.1146, "step": 9670 }, { "epoch": 0.8910489703782191, "grad_norm": 0.9380884900362914, "learning_rate": 1.60408530076864e-07, "loss": 0.1258, "step": 9671 }, { "epoch": 0.8911411065554891, "grad_norm": 0.9505680280600464, "learning_rate": 1.6014016033544329e-07, "loss": 0.1125, "step": 9672 }, { "epoch": 0.8912332427327591, "grad_norm": 0.9153781576364745, "learning_rate": 1.5987200785026024e-07, "loss": 0.1134, "step": 9673 }, { "epoch": 0.891325378910029, "grad_norm": 0.9722948079778472, "learning_rate": 1.5960407264621335e-07, "loss": 0.1234, "step": 9674 }, { "epoch": 0.891417515087299, "grad_norm": 0.9108011291797209, "learning_rate": 1.5933635474818048e-07, "loss": 0.1207, "step": 9675 }, { "epoch": 0.891509651264569, "grad_norm": 0.9460370912653332, "learning_rate": 1.5906885418101897e-07, "loss": 0.1169, "step": 9676 }, { "epoch": 0.891601787441839, "grad_norm": 0.9385372386749276, "learning_rate": 1.588015709695659e-07, "loss": 0.1155, "step": 9677 }, { "epoch": 0.891693923619109, "grad_norm": 0.9241971228937185, "learning_rate": 1.5853450513863887e-07, "loss": 0.112, "step": 9678 }, { "epoch": 0.891786059796379, "grad_norm": 0.8890910453397396, "learning_rate": 1.582676567130356e-07, "loss": 0.1149, "step": 9679 }, { "epoch": 0.891878195973649, "grad_norm": 0.9725011168763831, "learning_rate": 1.5800102571753185e-07, "loss": 0.1197, "step": 9680 }, { "epoch": 0.891970332150919, "grad_norm": 0.9501294692501856, "learning_rate": 1.5773461217688552e-07, "loss": 0.118, "step": 9681 }, { "epoch": 0.892062468328189, "grad_norm": 0.9353526816347267, "learning_rate": 1.5746841611583185e-07, "loss": 0.1155, "step": 9682 }, { "epoch": 0.892154604505459, "grad_norm": 0.8727846936420787, "learning_rate": 1.572024375590883e-07, "loss": 0.1078, "step": 9683 }, { "epoch": 0.892246740682729, "grad_norm": 0.9458301911588138, "learning_rate": 1.5693667653135043e-07, "loss": 0.1154, "step": 9684 }, { "epoch": 0.8923388768599991, "grad_norm": 0.9325030933389573, "learning_rate": 1.56671133057294e-07, "loss": 0.1133, "step": 9685 }, { "epoch": 0.8924310130372691, "grad_norm": 0.9620657518647695, "learning_rate": 1.5640580716157566e-07, "loss": 0.1101, "step": 9686 }, { "epoch": 0.8925231492145391, "grad_norm": 0.886768718834908, "learning_rate": 1.5614069886883021e-07, "loss": 0.1093, "step": 9687 }, { "epoch": 0.8926152853918091, "grad_norm": 1.0028608248597497, "learning_rate": 1.5587580820367294e-07, "loss": 0.1294, "step": 9688 }, { "epoch": 0.8927074215690791, "grad_norm": 0.9258946821905417, "learning_rate": 1.5561113519069887e-07, "loss": 0.1232, "step": 9689 }, { "epoch": 0.8927995577463491, "grad_norm": 0.8868177500137526, "learning_rate": 1.5534667985448336e-07, "loss": 0.1154, "step": 9690 }, { "epoch": 0.8928916939236191, "grad_norm": 0.9208808651503403, "learning_rate": 1.5508244221958125e-07, "loss": 0.1114, "step": 9691 }, { "epoch": 0.8929838301008891, "grad_norm": 0.9465660393051565, "learning_rate": 1.5481842231052702e-07, "loss": 0.1236, "step": 9692 }, { "epoch": 0.8930759662781591, "grad_norm": 0.8850488686636715, "learning_rate": 1.5455462015183388e-07, "loss": 0.0956, "step": 9693 }, { "epoch": 0.8931681024554291, "grad_norm": 0.9198730188731642, "learning_rate": 1.5429103576799692e-07, "loss": 0.1096, "step": 9694 }, { "epoch": 0.8932602386326991, "grad_norm": 0.9843260253433014, "learning_rate": 1.540276691834902e-07, "loss": 0.1274, "step": 9695 }, { "epoch": 0.8933523748099691, "grad_norm": 1.0059626536122228, "learning_rate": 1.537645204227664e-07, "loss": 0.1311, "step": 9696 }, { "epoch": 0.8934445109872391, "grad_norm": 0.980951404503035, "learning_rate": 1.5350158951025957e-07, "loss": 0.1183, "step": 9697 }, { "epoch": 0.8935366471645091, "grad_norm": 0.9806529946895199, "learning_rate": 1.5323887647038266e-07, "loss": 0.1249, "step": 9698 }, { "epoch": 0.8936287833417792, "grad_norm": 0.9610970927025647, "learning_rate": 1.5297638132752867e-07, "loss": 0.1138, "step": 9699 }, { "epoch": 0.8937209195190492, "grad_norm": 0.9565492528451189, "learning_rate": 1.5271410410607008e-07, "loss": 0.1103, "step": 9700 }, { "epoch": 0.8938130556963192, "grad_norm": 0.9128992345808833, "learning_rate": 1.5245204483035958e-07, "loss": 0.1194, "step": 9701 }, { "epoch": 0.8939051918735892, "grad_norm": 0.9813926968534401, "learning_rate": 1.5219020352472914e-07, "loss": 0.1208, "step": 9702 }, { "epoch": 0.8939973280508592, "grad_norm": 0.947509072446224, "learning_rate": 1.519285802134915e-07, "loss": 0.113, "step": 9703 }, { "epoch": 0.8940894642281292, "grad_norm": 0.9932594914438546, "learning_rate": 1.5166717492093808e-07, "loss": 0.1217, "step": 9704 }, { "epoch": 0.8941816004053992, "grad_norm": 0.892243585536645, "learning_rate": 1.5140598767133947e-07, "loss": 0.1104, "step": 9705 }, { "epoch": 0.8942737365826692, "grad_norm": 0.9090227640422955, "learning_rate": 1.5114501848894792e-07, "loss": 0.1129, "step": 9706 }, { "epoch": 0.8943658727599392, "grad_norm": 0.9667171942105707, "learning_rate": 1.5088426739799405e-07, "loss": 0.1195, "step": 9707 }, { "epoch": 0.8944580089372092, "grad_norm": 0.9372614582454015, "learning_rate": 1.5062373442268908e-07, "loss": 0.1222, "step": 9708 }, { "epoch": 0.8945501451144792, "grad_norm": 0.933453218296901, "learning_rate": 1.5036341958722334e-07, "loss": 0.1151, "step": 9709 }, { "epoch": 0.8946422812917492, "grad_norm": 0.9340782377512039, "learning_rate": 1.501033229157667e-07, "loss": 0.1205, "step": 9710 }, { "epoch": 0.8947344174690192, "grad_norm": 0.9239264734069119, "learning_rate": 1.4984344443246924e-07, "loss": 0.1126, "step": 9711 }, { "epoch": 0.8948265536462893, "grad_norm": 0.9349919081011316, "learning_rate": 1.495837841614614e-07, "loss": 0.0968, "step": 9712 }, { "epoch": 0.8949186898235593, "grad_norm": 0.9218295205011691, "learning_rate": 1.493243421268517e-07, "loss": 0.1116, "step": 9713 }, { "epoch": 0.8950108260008293, "grad_norm": 0.9371288620444782, "learning_rate": 1.4906511835273003e-07, "loss": 0.1048, "step": 9714 }, { "epoch": 0.8951029621780993, "grad_norm": 0.9444301518399142, "learning_rate": 1.4880611286316487e-07, "loss": 0.122, "step": 9715 }, { "epoch": 0.8951950983553693, "grad_norm": 0.9990493560331191, "learning_rate": 1.4854732568220566e-07, "loss": 0.1316, "step": 9716 }, { "epoch": 0.8952872345326393, "grad_norm": 0.9358320327837656, "learning_rate": 1.4828875683387977e-07, "loss": 0.1169, "step": 9717 }, { "epoch": 0.8953793707099093, "grad_norm": 0.9190529344035475, "learning_rate": 1.4803040634219612e-07, "loss": 0.1094, "step": 9718 }, { "epoch": 0.8954715068871792, "grad_norm": 0.9687233614559736, "learning_rate": 1.4777227423114271e-07, "loss": 0.12, "step": 9719 }, { "epoch": 0.8955636430644492, "grad_norm": 1.0284364178633627, "learning_rate": 1.4751436052468677e-07, "loss": 0.1258, "step": 9720 }, { "epoch": 0.8956557792417192, "grad_norm": 0.87744147112822, "learning_rate": 1.4725666524677496e-07, "loss": 0.1086, "step": 9721 }, { "epoch": 0.8957479154189892, "grad_norm": 0.9439290320132014, "learning_rate": 1.4699918842133536e-07, "loss": 0.1211, "step": 9722 }, { "epoch": 0.8958400515962592, "grad_norm": 0.8633906867334843, "learning_rate": 1.4674193007227416e-07, "loss": 0.1035, "step": 9723 }, { "epoch": 0.8959321877735292, "grad_norm": 0.9399171676121051, "learning_rate": 1.464848902234786e-07, "loss": 0.1188, "step": 9724 }, { "epoch": 0.8960243239507992, "grad_norm": 1.200021321856677, "learning_rate": 1.4622806889881407e-07, "loss": 0.1209, "step": 9725 }, { "epoch": 0.8961164601280693, "grad_norm": 0.9660581201978683, "learning_rate": 1.4597146612212622e-07, "loss": 0.1165, "step": 9726 }, { "epoch": 0.8962085963053393, "grad_norm": 0.9637032682306422, "learning_rate": 1.457150819172412e-07, "loss": 0.123, "step": 9727 }, { "epoch": 0.8963007324826093, "grad_norm": 0.9216934942341704, "learning_rate": 1.454589163079645e-07, "loss": 0.1088, "step": 9728 }, { "epoch": 0.8963928686598793, "grad_norm": 0.8845350904114487, "learning_rate": 1.4520296931808064e-07, "loss": 0.112, "step": 9729 }, { "epoch": 0.8964850048371493, "grad_norm": 0.9438486506585017, "learning_rate": 1.449472409713548e-07, "loss": 0.1236, "step": 9730 }, { "epoch": 0.8965771410144193, "grad_norm": 0.957677136788932, "learning_rate": 1.4469173129153052e-07, "loss": 0.1181, "step": 9731 }, { "epoch": 0.8966692771916893, "grad_norm": 0.9933864337772672, "learning_rate": 1.4443644030233268e-07, "loss": 0.1305, "step": 9732 }, { "epoch": 0.8967614133689593, "grad_norm": 0.8999166286380204, "learning_rate": 1.4418136802746507e-07, "loss": 0.1068, "step": 9733 }, { "epoch": 0.8968535495462293, "grad_norm": 0.9631857910045679, "learning_rate": 1.4392651449061075e-07, "loss": 0.1221, "step": 9734 }, { "epoch": 0.8969456857234993, "grad_norm": 0.8684313853698533, "learning_rate": 1.4367187971543352e-07, "loss": 0.1085, "step": 9735 }, { "epoch": 0.8970378219007693, "grad_norm": 0.8674388014206332, "learning_rate": 1.434174637255753e-07, "loss": 0.0992, "step": 9736 }, { "epoch": 0.8971299580780393, "grad_norm": 0.9874178781613823, "learning_rate": 1.4316326654465972e-07, "loss": 0.128, "step": 9737 }, { "epoch": 0.8972220942553093, "grad_norm": 0.9477060570555671, "learning_rate": 1.429092881962882e-07, "loss": 0.1228, "step": 9738 }, { "epoch": 0.8973142304325794, "grad_norm": 0.9149830735214507, "learning_rate": 1.4265552870404265e-07, "loss": 0.1189, "step": 9739 }, { "epoch": 0.8974063666098494, "grad_norm": 0.9544319608703057, "learning_rate": 1.4240198809148537e-07, "loss": 0.1261, "step": 9740 }, { "epoch": 0.8974985027871194, "grad_norm": 0.9299058821822609, "learning_rate": 1.421486663821575e-07, "loss": 0.1085, "step": 9741 }, { "epoch": 0.8975906389643894, "grad_norm": 0.9000451973033925, "learning_rate": 1.4189556359957917e-07, "loss": 0.1145, "step": 9742 }, { "epoch": 0.8976827751416594, "grad_norm": 0.9255410508309111, "learning_rate": 1.4164267976725154e-07, "loss": 0.1196, "step": 9743 }, { "epoch": 0.8977749113189294, "grad_norm": 0.9849259768868959, "learning_rate": 1.41390014908655e-07, "loss": 0.1232, "step": 9744 }, { "epoch": 0.8978670474961994, "grad_norm": 0.9322779341663346, "learning_rate": 1.4113756904724967e-07, "loss": 0.1113, "step": 9745 }, { "epoch": 0.8979591836734694, "grad_norm": 0.9525263463078988, "learning_rate": 1.4088534220647487e-07, "loss": 0.124, "step": 9746 }, { "epoch": 0.8980513198507394, "grad_norm": 0.921740201644219, "learning_rate": 1.4063333440974963e-07, "loss": 0.1108, "step": 9747 }, { "epoch": 0.8981434560280094, "grad_norm": 0.9209820144117232, "learning_rate": 1.403815456804733e-07, "loss": 0.1071, "step": 9748 }, { "epoch": 0.8982355922052794, "grad_norm": 0.9262180070109551, "learning_rate": 1.4012997604202466e-07, "loss": 0.1143, "step": 9749 }, { "epoch": 0.8983277283825494, "grad_norm": 0.8761195305273904, "learning_rate": 1.3987862551776143e-07, "loss": 0.1079, "step": 9750 }, { "epoch": 0.8984198645598194, "grad_norm": 0.9368357522890151, "learning_rate": 1.3962749413102216e-07, "loss": 0.1136, "step": 9751 }, { "epoch": 0.8985120007370894, "grad_norm": 0.9377461590430607, "learning_rate": 1.3937658190512377e-07, "loss": 0.1209, "step": 9752 }, { "epoch": 0.8986041369143595, "grad_norm": 0.8844793469481224, "learning_rate": 1.3912588886336397e-07, "loss": 0.114, "step": 9753 }, { "epoch": 0.8986962730916295, "grad_norm": 0.9335683035286864, "learning_rate": 1.388754150290192e-07, "loss": 0.1234, "step": 9754 }, { "epoch": 0.8987884092688995, "grad_norm": 0.9053742346487745, "learning_rate": 1.3862516042534634e-07, "loss": 0.1042, "step": 9755 }, { "epoch": 0.8988805454461695, "grad_norm": 0.9444620424685304, "learning_rate": 1.3837512507558188e-07, "loss": 0.1192, "step": 9756 }, { "epoch": 0.8989726816234395, "grad_norm": 0.8816034579977505, "learning_rate": 1.3812530900294107e-07, "loss": 0.1131, "step": 9757 }, { "epoch": 0.8990648178007095, "grad_norm": 0.8964634166927596, "learning_rate": 1.37875712230619e-07, "loss": 0.1141, "step": 9758 }, { "epoch": 0.8991569539779795, "grad_norm": 1.020273171079443, "learning_rate": 1.376263347817916e-07, "loss": 0.1255, "step": 9759 }, { "epoch": 0.8992490901552495, "grad_norm": 0.9267078640525189, "learning_rate": 1.3737717667961308e-07, "loss": 0.1206, "step": 9760 }, { "epoch": 0.8993412263325195, "grad_norm": 0.930151281773034, "learning_rate": 1.371282379472183e-07, "loss": 0.1116, "step": 9761 }, { "epoch": 0.8994333625097894, "grad_norm": 0.88549447679142, "learning_rate": 1.3687951860772098e-07, "loss": 0.1131, "step": 9762 }, { "epoch": 0.8995254986870594, "grad_norm": 0.913221976034471, "learning_rate": 1.366310186842143e-07, "loss": 0.1157, "step": 9763 }, { "epoch": 0.8996176348643294, "grad_norm": 0.9949130994048048, "learning_rate": 1.3638273819977205e-07, "loss": 0.1205, "step": 9764 }, { "epoch": 0.8997097710415994, "grad_norm": 0.9465781966051717, "learning_rate": 1.3613467717744661e-07, "loss": 0.1234, "step": 9765 }, { "epoch": 0.8998019072188694, "grad_norm": 0.9411758903196948, "learning_rate": 1.358868356402715e-07, "loss": 0.122, "step": 9766 }, { "epoch": 0.8998940433961395, "grad_norm": 1.0203783648280509, "learning_rate": 1.3563921361125804e-07, "loss": 0.1273, "step": 9767 }, { "epoch": 0.8999861795734095, "grad_norm": 0.9816594213978099, "learning_rate": 1.3539181111339754e-07, "loss": 0.1182, "step": 9768 }, { "epoch": 0.9000783157506795, "grad_norm": 0.889791816825393, "learning_rate": 1.3514462816966195e-07, "loss": 0.106, "step": 9769 }, { "epoch": 0.9001704519279495, "grad_norm": 0.9633466334197318, "learning_rate": 1.3489766480300232e-07, "loss": 0.1252, "step": 9770 }, { "epoch": 0.9002625881052195, "grad_norm": 0.9379316337177459, "learning_rate": 1.3465092103634892e-07, "loss": 0.1209, "step": 9771 }, { "epoch": 0.9003547242824895, "grad_norm": 0.9954431482111418, "learning_rate": 1.3440439689261232e-07, "loss": 0.1249, "step": 9772 }, { "epoch": 0.9004468604597595, "grad_norm": 0.9238096644006695, "learning_rate": 1.3415809239468198e-07, "loss": 0.1133, "step": 9773 }, { "epoch": 0.9005389966370295, "grad_norm": 0.9604790895841134, "learning_rate": 1.3391200756542738e-07, "loss": 0.1198, "step": 9774 }, { "epoch": 0.9006311328142995, "grad_norm": 0.974038640437757, "learning_rate": 1.336661424276972e-07, "loss": 0.1186, "step": 9775 }, { "epoch": 0.9007232689915695, "grad_norm": 0.9207157778378379, "learning_rate": 1.334204970043204e-07, "loss": 0.112, "step": 9776 }, { "epoch": 0.9008154051688395, "grad_norm": 0.9607697109332836, "learning_rate": 1.331750713181054e-07, "loss": 0.1186, "step": 9777 }, { "epoch": 0.9009075413461095, "grad_norm": 0.9612023119466364, "learning_rate": 1.3292986539184011e-07, "loss": 0.1217, "step": 9778 }, { "epoch": 0.9009996775233795, "grad_norm": 0.9352391442078847, "learning_rate": 1.32684879248291e-07, "loss": 0.1181, "step": 9779 }, { "epoch": 0.9010918137006496, "grad_norm": 0.938840287739617, "learning_rate": 1.324401129102057e-07, "loss": 0.113, "step": 9780 }, { "epoch": 0.9011839498779196, "grad_norm": 0.9599975054539138, "learning_rate": 1.321955664003105e-07, "loss": 0.1229, "step": 9781 }, { "epoch": 0.9012760860551896, "grad_norm": 0.9002389769621546, "learning_rate": 1.3195123974131252e-07, "loss": 0.109, "step": 9782 }, { "epoch": 0.9013682222324596, "grad_norm": 0.9532838208954857, "learning_rate": 1.317071329558961e-07, "loss": 0.1244, "step": 9783 }, { "epoch": 0.9014603584097296, "grad_norm": 0.9008130939211582, "learning_rate": 1.3146324606672754e-07, "loss": 0.1171, "step": 9784 }, { "epoch": 0.9015524945869996, "grad_norm": 0.9205065612854544, "learning_rate": 1.3121957909645155e-07, "loss": 0.1112, "step": 9785 }, { "epoch": 0.9016446307642696, "grad_norm": 0.963261859456219, "learning_rate": 1.309761320676925e-07, "loss": 0.1296, "step": 9786 }, { "epoch": 0.9017367669415396, "grad_norm": 0.9543745475845056, "learning_rate": 1.3073290500305452e-07, "loss": 0.1211, "step": 9787 }, { "epoch": 0.9018289031188096, "grad_norm": 0.939430723758642, "learning_rate": 1.3048989792512096e-07, "loss": 0.1165, "step": 9788 }, { "epoch": 0.9019210392960796, "grad_norm": 0.9899501282112528, "learning_rate": 1.3024711085645597e-07, "loss": 0.1174, "step": 9789 }, { "epoch": 0.9020131754733496, "grad_norm": 0.956721372717787, "learning_rate": 1.3000454381960127e-07, "loss": 0.1151, "step": 9790 }, { "epoch": 0.9021053116506196, "grad_norm": 0.9518141144978524, "learning_rate": 1.297621968370802e-07, "loss": 0.113, "step": 9791 }, { "epoch": 0.9021974478278896, "grad_norm": 0.998188656043015, "learning_rate": 1.2952006993139393e-07, "loss": 0.13, "step": 9792 }, { "epoch": 0.9022895840051596, "grad_norm": 0.9881266943062582, "learning_rate": 1.2927816312502422e-07, "loss": 0.1248, "step": 9793 }, { "epoch": 0.9023817201824297, "grad_norm": 0.9594850371411043, "learning_rate": 1.2903647644043254e-07, "loss": 0.121, "step": 9794 }, { "epoch": 0.9024738563596997, "grad_norm": 0.9412583884780079, "learning_rate": 1.2879500990005926e-07, "loss": 0.1118, "step": 9795 }, { "epoch": 0.9025659925369697, "grad_norm": 0.9863617128648952, "learning_rate": 1.2855376352632427e-07, "loss": 0.1199, "step": 9796 }, { "epoch": 0.9026581287142397, "grad_norm": 0.9075230786695033, "learning_rate": 1.2831273734162736e-07, "loss": 0.1116, "step": 9797 }, { "epoch": 0.9027502648915097, "grad_norm": 0.970810405883738, "learning_rate": 1.280719313683479e-07, "loss": 0.127, "step": 9798 }, { "epoch": 0.9028424010687797, "grad_norm": 0.969479796883492, "learning_rate": 1.2783134562884547e-07, "loss": 0.1309, "step": 9799 }, { "epoch": 0.9029345372460497, "grad_norm": 0.9723007625084326, "learning_rate": 1.275909801454578e-07, "loss": 0.1242, "step": 9800 }, { "epoch": 0.9030266734233197, "grad_norm": 0.9245601520664418, "learning_rate": 1.2735083494050255e-07, "loss": 0.1172, "step": 9801 }, { "epoch": 0.9031188096005897, "grad_norm": 0.939522410574133, "learning_rate": 1.2711091003627773e-07, "loss": 0.1197, "step": 9802 }, { "epoch": 0.9032109457778597, "grad_norm": 0.9359774641408325, "learning_rate": 1.2687120545506054e-07, "loss": 0.1187, "step": 9803 }, { "epoch": 0.9033030819551296, "grad_norm": 0.9609895198256817, "learning_rate": 1.2663172121910705e-07, "loss": 0.1189, "step": 9804 }, { "epoch": 0.9033952181323996, "grad_norm": 0.9098035827692788, "learning_rate": 1.263924573506542e-07, "loss": 0.1092, "step": 9805 }, { "epoch": 0.9034873543096696, "grad_norm": 0.9864415404989244, "learning_rate": 1.2615341387191644e-07, "loss": 0.1253, "step": 9806 }, { "epoch": 0.9035794904869398, "grad_norm": 0.9141531124611704, "learning_rate": 1.2591459080509017e-07, "loss": 0.1108, "step": 9807 }, { "epoch": 0.9036716266642097, "grad_norm": 0.938159819537608, "learning_rate": 1.2567598817234932e-07, "loss": 0.1146, "step": 9808 }, { "epoch": 0.9037637628414797, "grad_norm": 0.9704965870545296, "learning_rate": 1.2543760599584842e-07, "loss": 0.1208, "step": 9809 }, { "epoch": 0.9038558990187497, "grad_norm": 0.933503554525939, "learning_rate": 1.2519944429772168e-07, "loss": 0.1109, "step": 9810 }, { "epoch": 0.9039480351960197, "grad_norm": 1.0386870504682086, "learning_rate": 1.2496150310008226e-07, "loss": 0.1333, "step": 9811 }, { "epoch": 0.9040401713732897, "grad_norm": 0.9608349972329964, "learning_rate": 1.2472378242502247e-07, "loss": 0.1166, "step": 9812 }, { "epoch": 0.9041323075505597, "grad_norm": 0.9204922383585812, "learning_rate": 1.2448628229461522e-07, "loss": 0.1123, "step": 9813 }, { "epoch": 0.9042244437278297, "grad_norm": 0.8858240303231056, "learning_rate": 1.2424900273091206e-07, "loss": 0.1059, "step": 9814 }, { "epoch": 0.9043165799050997, "grad_norm": 0.93623941741783, "learning_rate": 1.2401194375594532e-07, "loss": 0.1171, "step": 9815 }, { "epoch": 0.9044087160823697, "grad_norm": 0.9047481046424645, "learning_rate": 1.237751053917252e-07, "loss": 0.1137, "step": 9816 }, { "epoch": 0.9045008522596397, "grad_norm": 0.9396357652997752, "learning_rate": 1.235384876602419e-07, "loss": 0.1166, "step": 9817 }, { "epoch": 0.9045929884369097, "grad_norm": 0.942299322546276, "learning_rate": 1.233020905834656e-07, "loss": 0.1109, "step": 9818 }, { "epoch": 0.9046851246141797, "grad_norm": 0.9482094011622528, "learning_rate": 1.2306591418334624e-07, "loss": 0.1236, "step": 9819 }, { "epoch": 0.9047772607914497, "grad_norm": 0.973923981370888, "learning_rate": 1.2282995848181267e-07, "loss": 0.1238, "step": 9820 }, { "epoch": 0.9048693969687198, "grad_norm": 0.851301869657395, "learning_rate": 1.2259422350077348e-07, "loss": 0.1026, "step": 9821 }, { "epoch": 0.9049615331459898, "grad_norm": 0.924457941406162, "learning_rate": 1.223587092621162e-07, "loss": 0.1147, "step": 9822 }, { "epoch": 0.9050536693232598, "grad_norm": 0.9255580493486694, "learning_rate": 1.2212341578770854e-07, "loss": 0.1168, "step": 9823 }, { "epoch": 0.9051458055005298, "grad_norm": 0.9023318089918937, "learning_rate": 1.2188834309939806e-07, "loss": 0.1158, "step": 9824 }, { "epoch": 0.9052379416777998, "grad_norm": 0.8721923584983146, "learning_rate": 1.2165349121901037e-07, "loss": 0.1126, "step": 9825 }, { "epoch": 0.9053300778550698, "grad_norm": 0.9105543472402409, "learning_rate": 1.2141886016835246e-07, "loss": 0.1137, "step": 9826 }, { "epoch": 0.9054222140323398, "grad_norm": 0.9505582223687297, "learning_rate": 1.2118444996920887e-07, "loss": 0.1172, "step": 9827 }, { "epoch": 0.9055143502096098, "grad_norm": 0.9050373360289897, "learning_rate": 1.2095026064334548e-07, "loss": 0.1186, "step": 9828 }, { "epoch": 0.9056064863868798, "grad_norm": 0.9383008904163106, "learning_rate": 1.207162922125063e-07, "loss": 0.1197, "step": 9829 }, { "epoch": 0.9056986225641498, "grad_norm": 0.913038696222506, "learning_rate": 1.2048254469841508e-07, "loss": 0.1071, "step": 9830 }, { "epoch": 0.9057907587414198, "grad_norm": 0.9626035864124302, "learning_rate": 1.2024901812277639e-07, "loss": 0.1223, "step": 9831 }, { "epoch": 0.9058828949186898, "grad_norm": 0.9518301451963114, "learning_rate": 1.2001571250727233e-07, "loss": 0.1132, "step": 9832 }, { "epoch": 0.9059750310959598, "grad_norm": 0.8904070068361779, "learning_rate": 1.1978262787356504e-07, "loss": 0.1131, "step": 9833 }, { "epoch": 0.9060671672732298, "grad_norm": 0.8945005093728289, "learning_rate": 1.1954976424329716e-07, "loss": 0.114, "step": 9834 }, { "epoch": 0.9061593034504999, "grad_norm": 0.9772335300116917, "learning_rate": 1.193171216380895e-07, "loss": 0.1168, "step": 9835 }, { "epoch": 0.9062514396277699, "grad_norm": 0.9413307860703609, "learning_rate": 1.1908470007954392e-07, "loss": 0.1175, "step": 9836 }, { "epoch": 0.9063435758050399, "grad_norm": 0.8985101580638164, "learning_rate": 1.188524995892401e-07, "loss": 0.1165, "step": 9837 }, { "epoch": 0.9064357119823099, "grad_norm": 0.9558883496465006, "learning_rate": 1.1862052018873777e-07, "loss": 0.1181, "step": 9838 }, { "epoch": 0.9065278481595799, "grad_norm": 0.9821413035235732, "learning_rate": 1.1838876189957632e-07, "loss": 0.1342, "step": 9839 }, { "epoch": 0.9066199843368499, "grad_norm": 0.9294065453209202, "learning_rate": 1.1815722474327495e-07, "loss": 0.1159, "step": 9840 }, { "epoch": 0.9067121205141199, "grad_norm": 0.941478958853304, "learning_rate": 1.1792590874133119e-07, "loss": 0.115, "step": 9841 }, { "epoch": 0.9068042566913899, "grad_norm": 0.9511182921769091, "learning_rate": 1.176948139152237e-07, "loss": 0.1225, "step": 9842 }, { "epoch": 0.9068963928686599, "grad_norm": 0.9305028355356414, "learning_rate": 1.1746394028640862e-07, "loss": 0.1236, "step": 9843 }, { "epoch": 0.9069885290459299, "grad_norm": 0.909927237435103, "learning_rate": 1.1723328787632354e-07, "loss": 0.119, "step": 9844 }, { "epoch": 0.9070806652231999, "grad_norm": 0.9264740890403086, "learning_rate": 1.1700285670638356e-07, "loss": 0.1105, "step": 9845 }, { "epoch": 0.9071728014004699, "grad_norm": 0.9332255111550347, "learning_rate": 1.1677264679798489e-07, "loss": 0.1135, "step": 9846 }, { "epoch": 0.9072649375777398, "grad_norm": 0.8932121363052186, "learning_rate": 1.1654265817250294e-07, "loss": 0.0993, "step": 9847 }, { "epoch": 0.90735707375501, "grad_norm": 0.984756358362903, "learning_rate": 1.1631289085129143e-07, "loss": 0.1209, "step": 9848 }, { "epoch": 0.90744920993228, "grad_norm": 0.8797646764277562, "learning_rate": 1.1608334485568446e-07, "loss": 0.1099, "step": 9849 }, { "epoch": 0.90754134610955, "grad_norm": 0.9266451524625748, "learning_rate": 1.1585402020699548e-07, "loss": 0.1128, "step": 9850 }, { "epoch": 0.90763348228682, "grad_norm": 0.9936081829017386, "learning_rate": 1.1562491692651723e-07, "loss": 0.1223, "step": 9851 }, { "epoch": 0.90772561846409, "grad_norm": 1.0064731245012568, "learning_rate": 1.153960350355221e-07, "loss": 0.124, "step": 9852 }, { "epoch": 0.9078177546413599, "grad_norm": 0.9249707712056661, "learning_rate": 1.1516737455526228e-07, "loss": 0.1159, "step": 9853 }, { "epoch": 0.9079098908186299, "grad_norm": 0.9030367953806261, "learning_rate": 1.14938935506968e-07, "loss": 0.1184, "step": 9854 }, { "epoch": 0.9080020269958999, "grad_norm": 1.007252102658938, "learning_rate": 1.1471071791185007e-07, "loss": 0.1362, "step": 9855 }, { "epoch": 0.9080941631731699, "grad_norm": 0.9551043810463008, "learning_rate": 1.1448272179109848e-07, "loss": 0.1233, "step": 9856 }, { "epoch": 0.9081862993504399, "grad_norm": 1.0072214892101452, "learning_rate": 1.1425494716588353e-07, "loss": 0.1238, "step": 9857 }, { "epoch": 0.9082784355277099, "grad_norm": 0.9042976003595324, "learning_rate": 1.1402739405735303e-07, "loss": 0.1067, "step": 9858 }, { "epoch": 0.9083705717049799, "grad_norm": 0.9074508823982074, "learning_rate": 1.1380006248663616e-07, "loss": 0.1212, "step": 9859 }, { "epoch": 0.9084627078822499, "grad_norm": 0.9749475166102289, "learning_rate": 1.1357295247483997e-07, "loss": 0.1184, "step": 9860 }, { "epoch": 0.9085548440595199, "grad_norm": 0.9722337825584535, "learning_rate": 1.1334606404305226e-07, "loss": 0.1253, "step": 9861 }, { "epoch": 0.90864698023679, "grad_norm": 0.9000281767600011, "learning_rate": 1.13119397212339e-07, "loss": 0.1063, "step": 9862 }, { "epoch": 0.90873911641406, "grad_norm": 0.8865674739066136, "learning_rate": 1.1289295200374667e-07, "loss": 0.1021, "step": 9863 }, { "epoch": 0.90883125259133, "grad_norm": 0.9338951859368928, "learning_rate": 1.1266672843830095e-07, "loss": 0.1124, "step": 9864 }, { "epoch": 0.9089233887686, "grad_norm": 0.9355079432027297, "learning_rate": 1.1244072653700644e-07, "loss": 0.1071, "step": 9865 }, { "epoch": 0.90901552494587, "grad_norm": 0.9657351672980083, "learning_rate": 1.122149463208469e-07, "loss": 0.1143, "step": 9866 }, { "epoch": 0.90910766112314, "grad_norm": 0.9550170888128628, "learning_rate": 1.1198938781078694e-07, "loss": 0.1183, "step": 9867 }, { "epoch": 0.90919979730041, "grad_norm": 0.9679298136534235, "learning_rate": 1.1176405102776899e-07, "loss": 0.1078, "step": 9868 }, { "epoch": 0.90929193347768, "grad_norm": 0.9730850211805437, "learning_rate": 1.1153893599271631e-07, "loss": 0.1149, "step": 9869 }, { "epoch": 0.90938406965495, "grad_norm": 0.9591042974754497, "learning_rate": 1.1131404272653051e-07, "loss": 0.1122, "step": 9870 }, { "epoch": 0.90947620583222, "grad_norm": 0.9421743521813682, "learning_rate": 1.1108937125009266e-07, "loss": 0.1212, "step": 9871 }, { "epoch": 0.90956834200949, "grad_norm": 0.9474137614317413, "learning_rate": 1.1086492158426387e-07, "loss": 0.1276, "step": 9872 }, { "epoch": 0.90966047818676, "grad_norm": 0.9496210187132605, "learning_rate": 1.106406937498844e-07, "loss": 0.117, "step": 9873 }, { "epoch": 0.90975261436403, "grad_norm": 0.9623210428089947, "learning_rate": 1.1041668776777342e-07, "loss": 0.1188, "step": 9874 }, { "epoch": 0.9098447505413001, "grad_norm": 0.9688398891785434, "learning_rate": 1.1019290365873042e-07, "loss": 0.1167, "step": 9875 }, { "epoch": 0.9099368867185701, "grad_norm": 0.9438973611520155, "learning_rate": 1.0996934144353322e-07, "loss": 0.1238, "step": 9876 }, { "epoch": 0.9100290228958401, "grad_norm": 1.0123754346766263, "learning_rate": 1.0974600114293993e-07, "loss": 0.131, "step": 9877 }, { "epoch": 0.9101211590731101, "grad_norm": 0.9369173621826444, "learning_rate": 1.0952288277768786e-07, "loss": 0.1243, "step": 9878 }, { "epoch": 0.9102132952503801, "grad_norm": 0.9104725907747405, "learning_rate": 1.0929998636849321e-07, "loss": 0.1114, "step": 9879 }, { "epoch": 0.9103054314276501, "grad_norm": 0.8911323682845301, "learning_rate": 1.090773119360522e-07, "loss": 0.1047, "step": 9880 }, { "epoch": 0.9103975676049201, "grad_norm": 0.9328489809284518, "learning_rate": 1.0885485950103997e-07, "loss": 0.115, "step": 9881 }, { "epoch": 0.9104897037821901, "grad_norm": 0.9185301495705797, "learning_rate": 1.0863262908411165e-07, "loss": 0.1102, "step": 9882 }, { "epoch": 0.9105818399594601, "grad_norm": 0.9338449898577366, "learning_rate": 1.0841062070590074e-07, "loss": 0.1195, "step": 9883 }, { "epoch": 0.9106739761367301, "grad_norm": 0.9028633254451033, "learning_rate": 1.0818883438702105e-07, "loss": 0.107, "step": 9884 }, { "epoch": 0.9107661123140001, "grad_norm": 0.9441449194556039, "learning_rate": 1.0796727014806607e-07, "loss": 0.1209, "step": 9885 }, { "epoch": 0.9108582484912701, "grad_norm": 0.943313665217802, "learning_rate": 1.0774592800960715e-07, "loss": 0.119, "step": 9886 }, { "epoch": 0.9109503846685401, "grad_norm": 0.9296020252630918, "learning_rate": 1.0752480799219616e-07, "loss": 0.117, "step": 9887 }, { "epoch": 0.91104252084581, "grad_norm": 0.9667448070909315, "learning_rate": 1.073039101163642e-07, "loss": 0.1242, "step": 9888 }, { "epoch": 0.9111346570230802, "grad_norm": 0.909844133598178, "learning_rate": 1.0708323440262153e-07, "loss": 0.103, "step": 9889 }, { "epoch": 0.9112267932003502, "grad_norm": 0.9273168759001507, "learning_rate": 1.0686278087145868e-07, "loss": 0.1183, "step": 9890 }, { "epoch": 0.9113189293776202, "grad_norm": 0.9491675032145784, "learning_rate": 1.0664254954334402e-07, "loss": 0.112, "step": 9891 }, { "epoch": 0.9114110655548902, "grad_norm": 0.9436010409635454, "learning_rate": 1.064225404387259e-07, "loss": 0.1163, "step": 9892 }, { "epoch": 0.9115032017321602, "grad_norm": 0.9369245507410711, "learning_rate": 1.0620275357803244e-07, "loss": 0.1219, "step": 9893 }, { "epoch": 0.9115953379094301, "grad_norm": 0.934345303014593, "learning_rate": 1.059831889816712e-07, "loss": 0.1149, "step": 9894 }, { "epoch": 0.9116874740867001, "grad_norm": 0.9164697181421179, "learning_rate": 1.0576384667002837e-07, "loss": 0.1091, "step": 9895 }, { "epoch": 0.9117796102639701, "grad_norm": 0.9185437968697807, "learning_rate": 1.0554472666347043e-07, "loss": 0.1245, "step": 9896 }, { "epoch": 0.9118717464412401, "grad_norm": 0.9367584691113693, "learning_rate": 1.0532582898234167e-07, "loss": 0.1224, "step": 9897 }, { "epoch": 0.9119638826185101, "grad_norm": 0.9166216071894971, "learning_rate": 1.0510715364696806e-07, "loss": 0.1278, "step": 9898 }, { "epoch": 0.9120560187957801, "grad_norm": 0.8699664872405362, "learning_rate": 1.048887006776525e-07, "loss": 0.1071, "step": 9899 }, { "epoch": 0.9121481549730501, "grad_norm": 0.9623063255443589, "learning_rate": 1.0467047009467878e-07, "loss": 0.1179, "step": 9900 }, { "epoch": 0.9122402911503201, "grad_norm": 0.9751253664006017, "learning_rate": 1.0445246191831015e-07, "loss": 0.1222, "step": 9901 }, { "epoch": 0.9123324273275901, "grad_norm": 0.9550208126269293, "learning_rate": 1.0423467616878819e-07, "loss": 0.1223, "step": 9902 }, { "epoch": 0.9124245635048602, "grad_norm": 0.9520464240161787, "learning_rate": 1.040171128663342e-07, "loss": 0.1113, "step": 9903 }, { "epoch": 0.9125166996821302, "grad_norm": 0.9800160176815804, "learning_rate": 1.03799772031149e-07, "loss": 0.1258, "step": 9904 }, { "epoch": 0.9126088358594002, "grad_norm": 0.9131937286932651, "learning_rate": 1.035826536834128e-07, "loss": 0.1107, "step": 9905 }, { "epoch": 0.9127009720366702, "grad_norm": 0.9198729568337226, "learning_rate": 1.0336575784328534e-07, "loss": 0.1129, "step": 9906 }, { "epoch": 0.9127931082139402, "grad_norm": 0.9354820808259293, "learning_rate": 1.031490845309055e-07, "loss": 0.1155, "step": 9907 }, { "epoch": 0.9128852443912102, "grad_norm": 0.9341493220138887, "learning_rate": 1.029326337663905e-07, "loss": 0.1136, "step": 9908 }, { "epoch": 0.9129773805684802, "grad_norm": 0.9341967204483506, "learning_rate": 1.0271640556983875e-07, "loss": 0.1117, "step": 9909 }, { "epoch": 0.9130695167457502, "grad_norm": 0.9531420223699355, "learning_rate": 1.0250039996132637e-07, "loss": 0.1191, "step": 9910 }, { "epoch": 0.9131616529230202, "grad_norm": 0.9487379938366196, "learning_rate": 1.0228461696091041e-07, "loss": 0.116, "step": 9911 }, { "epoch": 0.9132537891002902, "grad_norm": 0.9383824540449928, "learning_rate": 1.0206905658862592e-07, "loss": 0.1203, "step": 9912 }, { "epoch": 0.9133459252775602, "grad_norm": 0.8714240991607098, "learning_rate": 1.0185371886448719e-07, "loss": 0.1039, "step": 9913 }, { "epoch": 0.9134380614548302, "grad_norm": 0.9609563547823312, "learning_rate": 1.016386038084885e-07, "loss": 0.1293, "step": 9914 }, { "epoch": 0.9135301976321002, "grad_norm": 0.9029910797521731, "learning_rate": 1.0142371144060414e-07, "loss": 0.1026, "step": 9915 }, { "epoch": 0.9136223338093703, "grad_norm": 0.9201532523605698, "learning_rate": 1.0120904178078594e-07, "loss": 0.1125, "step": 9916 }, { "epoch": 0.9137144699866403, "grad_norm": 0.935929708191619, "learning_rate": 1.0099459484896684e-07, "loss": 0.1161, "step": 9917 }, { "epoch": 0.9138066061639103, "grad_norm": 0.9579121910083173, "learning_rate": 1.007803706650573e-07, "loss": 0.1201, "step": 9918 }, { "epoch": 0.9138987423411803, "grad_norm": 0.8990511111015285, "learning_rate": 1.0056636924894864e-07, "loss": 0.1082, "step": 9919 }, { "epoch": 0.9139908785184503, "grad_norm": 0.9419869327103809, "learning_rate": 1.0035259062051079e-07, "loss": 0.111, "step": 9920 }, { "epoch": 0.9140830146957203, "grad_norm": 0.9481056579845162, "learning_rate": 1.0013903479959313e-07, "loss": 0.1107, "step": 9921 }, { "epoch": 0.9141751508729903, "grad_norm": 0.9419350381453994, "learning_rate": 9.992570180602484e-08, "loss": 0.1183, "step": 9922 }, { "epoch": 0.9142672870502603, "grad_norm": 0.9768882094713995, "learning_rate": 9.971259165961312e-08, "loss": 0.1186, "step": 9923 }, { "epoch": 0.9143594232275303, "grad_norm": 0.9286412912317882, "learning_rate": 9.949970438014544e-08, "loss": 0.1103, "step": 9924 }, { "epoch": 0.9144515594048003, "grad_norm": 0.8956628077663775, "learning_rate": 9.928703998738853e-08, "loss": 0.1068, "step": 9925 }, { "epoch": 0.9145436955820703, "grad_norm": 0.9399039480242753, "learning_rate": 9.907459850108824e-08, "loss": 0.1131, "step": 9926 }, { "epoch": 0.9146358317593403, "grad_norm": 0.9283382943599822, "learning_rate": 9.886237994097048e-08, "loss": 0.1147, "step": 9927 }, { "epoch": 0.9147279679366103, "grad_norm": 0.9424001613736077, "learning_rate": 9.86503843267389e-08, "loss": 0.115, "step": 9928 }, { "epoch": 0.9148201041138803, "grad_norm": 0.9528230598198278, "learning_rate": 9.843861167807722e-08, "loss": 0.1271, "step": 9929 }, { "epoch": 0.9149122402911504, "grad_norm": 0.8861050600901383, "learning_rate": 9.822706201464915e-08, "loss": 0.1077, "step": 9930 }, { "epoch": 0.9150043764684204, "grad_norm": 0.9660146570263729, "learning_rate": 9.801573535609677e-08, "loss": 0.1187, "step": 9931 }, { "epoch": 0.9150965126456904, "grad_norm": 0.9516861737652685, "learning_rate": 9.780463172204186e-08, "loss": 0.122, "step": 9932 }, { "epoch": 0.9151886488229604, "grad_norm": 0.955635835206404, "learning_rate": 9.759375113208541e-08, "loss": 0.1159, "step": 9933 }, { "epoch": 0.9152807850002304, "grad_norm": 0.9384288101603031, "learning_rate": 9.738309360580789e-08, "loss": 0.1297, "step": 9934 }, { "epoch": 0.9153729211775004, "grad_norm": 0.9568435927717273, "learning_rate": 9.717265916276863e-08, "loss": 0.1173, "step": 9935 }, { "epoch": 0.9154650573547704, "grad_norm": 0.8958470833309566, "learning_rate": 9.696244782250675e-08, "loss": 0.1032, "step": 9936 }, { "epoch": 0.9155571935320403, "grad_norm": 0.9548627720230944, "learning_rate": 9.675245960453966e-08, "loss": 0.1208, "step": 9937 }, { "epoch": 0.9156493297093103, "grad_norm": 0.9615401456254469, "learning_rate": 9.654269452836567e-08, "loss": 0.1273, "step": 9938 }, { "epoch": 0.9157414658865803, "grad_norm": 0.9336984788547501, "learning_rate": 9.633315261346115e-08, "loss": 0.1111, "step": 9939 }, { "epoch": 0.9158336020638503, "grad_norm": 0.9676566615609804, "learning_rate": 9.612383387928248e-08, "loss": 0.1238, "step": 9940 }, { "epoch": 0.9159257382411203, "grad_norm": 0.9365548618439336, "learning_rate": 9.59147383452641e-08, "loss": 0.1171, "step": 9941 }, { "epoch": 0.9160178744183903, "grad_norm": 0.9679108953343, "learning_rate": 9.570586603082078e-08, "loss": 0.1187, "step": 9942 }, { "epoch": 0.9161100105956604, "grad_norm": 0.9693508193733982, "learning_rate": 9.549721695534669e-08, "loss": 0.1153, "step": 9943 }, { "epoch": 0.9162021467729304, "grad_norm": 0.8984541268521159, "learning_rate": 9.528879113821526e-08, "loss": 0.1151, "step": 9944 }, { "epoch": 0.9162942829502004, "grad_norm": 0.9104078134300633, "learning_rate": 9.508058859877794e-08, "loss": 0.1149, "step": 9945 }, { "epoch": 0.9163864191274704, "grad_norm": 0.9581148806404735, "learning_rate": 9.487260935636678e-08, "loss": 0.1153, "step": 9946 }, { "epoch": 0.9164785553047404, "grad_norm": 0.8834590853387024, "learning_rate": 9.466485343029269e-08, "loss": 0.109, "step": 9947 }, { "epoch": 0.9165706914820104, "grad_norm": 0.9112418757641385, "learning_rate": 9.44573208398461e-08, "loss": 0.1125, "step": 9948 }, { "epoch": 0.9166628276592804, "grad_norm": 0.9450438994219061, "learning_rate": 9.425001160429603e-08, "loss": 0.1159, "step": 9949 }, { "epoch": 0.9167549638365504, "grad_norm": 0.9472996467579019, "learning_rate": 9.404292574289126e-08, "loss": 0.1183, "step": 9950 }, { "epoch": 0.9168471000138204, "grad_norm": 0.8966195092536017, "learning_rate": 9.383606327485973e-08, "loss": 0.1044, "step": 9951 }, { "epoch": 0.9169392361910904, "grad_norm": 0.9532262581930256, "learning_rate": 9.362942421940885e-08, "loss": 0.1214, "step": 9952 }, { "epoch": 0.9170313723683604, "grad_norm": 0.9579253822700269, "learning_rate": 9.342300859572467e-08, "loss": 0.1217, "step": 9953 }, { "epoch": 0.9171235085456304, "grad_norm": 0.9434676831721229, "learning_rate": 9.321681642297298e-08, "loss": 0.1228, "step": 9954 }, { "epoch": 0.9172156447229004, "grad_norm": 0.9044628242856921, "learning_rate": 9.301084772029928e-08, "loss": 0.1061, "step": 9955 }, { "epoch": 0.9173077809001704, "grad_norm": 0.9238703964017801, "learning_rate": 9.280510250682745e-08, "loss": 0.123, "step": 9956 }, { "epoch": 0.9173999170774405, "grad_norm": 0.8527902116443628, "learning_rate": 9.259958080166081e-08, "loss": 0.1017, "step": 9957 }, { "epoch": 0.9174920532547105, "grad_norm": 0.9095947345394509, "learning_rate": 9.23942826238819e-08, "loss": 0.1098, "step": 9958 }, { "epoch": 0.9175841894319805, "grad_norm": 0.8956255963339151, "learning_rate": 9.218920799255293e-08, "loss": 0.1111, "step": 9959 }, { "epoch": 0.9176763256092505, "grad_norm": 0.971998459665577, "learning_rate": 9.198435692671565e-08, "loss": 0.1157, "step": 9960 }, { "epoch": 0.9177684617865205, "grad_norm": 0.9697718892880428, "learning_rate": 9.177972944538982e-08, "loss": 0.1131, "step": 9961 }, { "epoch": 0.9178605979637905, "grad_norm": 0.9544535119686984, "learning_rate": 9.157532556757526e-08, "loss": 0.1171, "step": 9962 }, { "epoch": 0.9179527341410605, "grad_norm": 0.9497954111135992, "learning_rate": 9.137114531225066e-08, "loss": 0.115, "step": 9963 }, { "epoch": 0.9180448703183305, "grad_norm": 0.9652083061124271, "learning_rate": 9.116718869837449e-08, "loss": 0.1291, "step": 9964 }, { "epoch": 0.9181370064956005, "grad_norm": 0.9278351761508028, "learning_rate": 9.096345574488435e-08, "loss": 0.1164, "step": 9965 }, { "epoch": 0.9182291426728705, "grad_norm": 0.9466540329969422, "learning_rate": 9.075994647069653e-08, "loss": 0.1134, "step": 9966 }, { "epoch": 0.9183212788501405, "grad_norm": 0.9546339745574965, "learning_rate": 9.0556660894707e-08, "loss": 0.1246, "step": 9967 }, { "epoch": 0.9184134150274105, "grad_norm": 0.9071641213415044, "learning_rate": 9.035359903579039e-08, "loss": 0.1085, "step": 9968 }, { "epoch": 0.9185055512046805, "grad_norm": 0.9984994115231136, "learning_rate": 9.015076091280189e-08, "loss": 0.116, "step": 9969 }, { "epoch": 0.9185976873819506, "grad_norm": 0.8703859546656373, "learning_rate": 8.994814654457451e-08, "loss": 0.1085, "step": 9970 }, { "epoch": 0.9186898235592206, "grad_norm": 1.0156798267992415, "learning_rate": 8.974575594992124e-08, "loss": 0.1122, "step": 9971 }, { "epoch": 0.9187819597364906, "grad_norm": 0.921122595652039, "learning_rate": 8.954358914763373e-08, "loss": 0.11, "step": 9972 }, { "epoch": 0.9188740959137606, "grad_norm": 0.8869429691351998, "learning_rate": 8.934164615648333e-08, "loss": 0.1037, "step": 9973 }, { "epoch": 0.9189662320910306, "grad_norm": 0.9087355779950654, "learning_rate": 8.913992699522062e-08, "loss": 0.1063, "step": 9974 }, { "epoch": 0.9190583682683006, "grad_norm": 0.9171403022519661, "learning_rate": 8.893843168257504e-08, "loss": 0.115, "step": 9975 }, { "epoch": 0.9191505044455706, "grad_norm": 0.9090886668961073, "learning_rate": 8.873716023725581e-08, "loss": 0.105, "step": 9976 }, { "epoch": 0.9192426406228406, "grad_norm": 0.9517240376938657, "learning_rate": 8.853611267795076e-08, "loss": 0.1249, "step": 9977 }, { "epoch": 0.9193347768001106, "grad_norm": 0.8765303454447209, "learning_rate": 8.833528902332688e-08, "loss": 0.0996, "step": 9978 }, { "epoch": 0.9194269129773806, "grad_norm": 0.9423252168440611, "learning_rate": 8.813468929203095e-08, "loss": 0.1141, "step": 9979 }, { "epoch": 0.9195190491546505, "grad_norm": 0.9496341928199417, "learning_rate": 8.793431350268861e-08, "loss": 0.1184, "step": 9980 }, { "epoch": 0.9196111853319205, "grad_norm": 0.9484875310380811, "learning_rate": 8.773416167390525e-08, "loss": 0.1151, "step": 9981 }, { "epoch": 0.9197033215091905, "grad_norm": 0.9375366241158559, "learning_rate": 8.753423382426463e-08, "loss": 0.1048, "step": 9982 }, { "epoch": 0.9197954576864605, "grad_norm": 0.9891837534169375, "learning_rate": 8.733452997232967e-08, "loss": 0.1165, "step": 9983 }, { "epoch": 0.9198875938637306, "grad_norm": 0.9944041684722955, "learning_rate": 8.713505013664303e-08, "loss": 0.1319, "step": 9984 }, { "epoch": 0.9199797300410006, "grad_norm": 0.9809021101391047, "learning_rate": 8.693579433572741e-08, "loss": 0.1245, "step": 9985 }, { "epoch": 0.9200718662182706, "grad_norm": 1.0109863882514742, "learning_rate": 8.673676258808244e-08, "loss": 0.1272, "step": 9986 }, { "epoch": 0.9201640023955406, "grad_norm": 0.9204852372456744, "learning_rate": 8.653795491218891e-08, "loss": 0.105, "step": 9987 }, { "epoch": 0.9202561385728106, "grad_norm": 0.9258040595618826, "learning_rate": 8.633937132650593e-08, "loss": 0.1134, "step": 9988 }, { "epoch": 0.9203482747500806, "grad_norm": 0.9293784596137714, "learning_rate": 8.614101184947238e-08, "loss": 0.1193, "step": 9989 }, { "epoch": 0.9204404109273506, "grad_norm": 0.9030214124625668, "learning_rate": 8.594287649950544e-08, "loss": 0.1097, "step": 9990 }, { "epoch": 0.9205325471046206, "grad_norm": 0.9118329350693379, "learning_rate": 8.574496529500209e-08, "loss": 0.1103, "step": 9991 }, { "epoch": 0.9206246832818906, "grad_norm": 0.9446461098329167, "learning_rate": 8.554727825433872e-08, "loss": 0.1192, "step": 9992 }, { "epoch": 0.9207168194591606, "grad_norm": 0.9733014763065767, "learning_rate": 8.53498153958704e-08, "loss": 0.1158, "step": 9993 }, { "epoch": 0.9208089556364306, "grad_norm": 0.9264514392864119, "learning_rate": 8.515257673793159e-08, "loss": 0.1144, "step": 9994 }, { "epoch": 0.9209010918137006, "grad_norm": 0.9796407782816811, "learning_rate": 8.4955562298836e-08, "loss": 0.1202, "step": 9995 }, { "epoch": 0.9209932279909706, "grad_norm": 0.9384931539095959, "learning_rate": 8.475877209687594e-08, "loss": 0.1071, "step": 9996 }, { "epoch": 0.9210853641682406, "grad_norm": 0.9675554946533331, "learning_rate": 8.456220615032429e-08, "loss": 0.1209, "step": 9997 }, { "epoch": 0.9211775003455107, "grad_norm": 0.9773182683269641, "learning_rate": 8.436586447743172e-08, "loss": 0.1226, "step": 9998 }, { "epoch": 0.9212696365227807, "grad_norm": 0.9262159911644006, "learning_rate": 8.416974709642839e-08, "loss": 0.1157, "step": 9999 }, { "epoch": 0.9213617727000507, "grad_norm": 0.9153048649416058, "learning_rate": 8.397385402552415e-08, "loss": 0.1117, "step": 10000 }, { "epoch": 0.9213617727000507, "eval_loss": 0.11681114137172699, "eval_runtime": 300.109, "eval_samples_per_second": 23.382, "eval_steps_per_second": 2.926, "step": 10000 }, { "epoch": 0.9214539088773207, "grad_norm": 0.8953472769400774, "learning_rate": 8.377818528290754e-08, "loss": 0.1116, "step": 10001 }, { "epoch": 0.9215460450545907, "grad_norm": 0.9605648976460276, "learning_rate": 8.358274088674651e-08, "loss": 0.1208, "step": 10002 }, { "epoch": 0.9216381812318607, "grad_norm": 0.9322621523925322, "learning_rate": 8.338752085518819e-08, "loss": 0.1114, "step": 10003 }, { "epoch": 0.9217303174091307, "grad_norm": 0.9272123065068724, "learning_rate": 8.31925252063584e-08, "loss": 0.1073, "step": 10004 }, { "epoch": 0.9218224535864007, "grad_norm": 0.9332881339966433, "learning_rate": 8.299775395836262e-08, "loss": 0.1133, "step": 10005 }, { "epoch": 0.9219145897636707, "grad_norm": 0.9102158918404231, "learning_rate": 8.280320712928585e-08, "loss": 0.1163, "step": 10006 }, { "epoch": 0.9220067259409407, "grad_norm": 0.9326940952236754, "learning_rate": 8.260888473719114e-08, "loss": 0.1158, "step": 10007 }, { "epoch": 0.9220988621182107, "grad_norm": 0.9461833110524152, "learning_rate": 8.241478680012183e-08, "loss": 0.116, "step": 10008 }, { "epoch": 0.9221909982954807, "grad_norm": 0.9551382205517904, "learning_rate": 8.222091333609989e-08, "loss": 0.1193, "step": 10009 }, { "epoch": 0.9222831344727507, "grad_norm": 0.9370064660179406, "learning_rate": 8.202726436312619e-08, "loss": 0.1092, "step": 10010 }, { "epoch": 0.9223752706500208, "grad_norm": 0.9791261039611153, "learning_rate": 8.183383989918109e-08, "loss": 0.1194, "step": 10011 }, { "epoch": 0.9224674068272908, "grad_norm": 0.9504216348772377, "learning_rate": 8.164063996222438e-08, "loss": 0.12, "step": 10012 }, { "epoch": 0.9225595430045608, "grad_norm": 0.936257019374371, "learning_rate": 8.14476645701942e-08, "loss": 0.1137, "step": 10013 }, { "epoch": 0.9226516791818308, "grad_norm": 0.9381555121278079, "learning_rate": 8.125491374100902e-08, "loss": 0.1155, "step": 10014 }, { "epoch": 0.9227438153591008, "grad_norm": 1.0079460709363284, "learning_rate": 8.106238749256562e-08, "loss": 0.1185, "step": 10015 }, { "epoch": 0.9228359515363708, "grad_norm": 0.958334489638207, "learning_rate": 8.087008584273942e-08, "loss": 0.1193, "step": 10016 }, { "epoch": 0.9229280877136408, "grad_norm": 0.9068490658601089, "learning_rate": 8.067800880938615e-08, "loss": 0.1079, "step": 10017 }, { "epoch": 0.9230202238909108, "grad_norm": 0.9435404073158213, "learning_rate": 8.048615641034013e-08, "loss": 0.1118, "step": 10018 }, { "epoch": 0.9231123600681808, "grad_norm": 0.9200330865058696, "learning_rate": 8.029452866341492e-08, "loss": 0.1182, "step": 10019 }, { "epoch": 0.9232044962454508, "grad_norm": 0.914248690361516, "learning_rate": 8.010312558640348e-08, "loss": 0.1062, "step": 10020 }, { "epoch": 0.9232966324227208, "grad_norm": 0.937085460107984, "learning_rate": 7.991194719707663e-08, "loss": 0.1207, "step": 10021 }, { "epoch": 0.9233887685999908, "grad_norm": 0.9829486329659389, "learning_rate": 7.972099351318624e-08, "loss": 0.1263, "step": 10022 }, { "epoch": 0.9234809047772607, "grad_norm": 0.9274488053766404, "learning_rate": 7.953026455246233e-08, "loss": 0.1171, "step": 10023 }, { "epoch": 0.9235730409545307, "grad_norm": 0.9512581459490765, "learning_rate": 7.933976033261348e-08, "loss": 0.1118, "step": 10024 }, { "epoch": 0.9236651771318009, "grad_norm": 0.9332557412920207, "learning_rate": 7.914948087132862e-08, "loss": 0.1221, "step": 10025 }, { "epoch": 0.9237573133090708, "grad_norm": 0.9534856497766078, "learning_rate": 7.895942618627472e-08, "loss": 0.1164, "step": 10026 }, { "epoch": 0.9238494494863408, "grad_norm": 0.9511982683101914, "learning_rate": 7.876959629509907e-08, "loss": 0.1168, "step": 10027 }, { "epoch": 0.9239415856636108, "grad_norm": 0.9863978122827085, "learning_rate": 7.85799912154267e-08, "loss": 0.1153, "step": 10028 }, { "epoch": 0.9240337218408808, "grad_norm": 0.856384935668297, "learning_rate": 7.839061096486273e-08, "loss": 0.1043, "step": 10029 }, { "epoch": 0.9241258580181508, "grad_norm": 0.9659271632688743, "learning_rate": 7.82014555609914e-08, "loss": 0.1252, "step": 10030 }, { "epoch": 0.9242179941954208, "grad_norm": 0.9124472370190727, "learning_rate": 7.801252502137535e-08, "loss": 0.1076, "step": 10031 }, { "epoch": 0.9243101303726908, "grad_norm": 0.943476757994991, "learning_rate": 7.782381936355693e-08, "loss": 0.1244, "step": 10032 }, { "epoch": 0.9244022665499608, "grad_norm": 0.9477335815226061, "learning_rate": 7.763533860505767e-08, "loss": 0.1219, "step": 10033 }, { "epoch": 0.9244944027272308, "grad_norm": 0.9474630709253589, "learning_rate": 7.744708276337776e-08, "loss": 0.1115, "step": 10034 }, { "epoch": 0.9245865389045008, "grad_norm": 0.9124267452326419, "learning_rate": 7.725905185599735e-08, "loss": 0.111, "step": 10035 }, { "epoch": 0.9246786750817708, "grad_norm": 0.9073193546423393, "learning_rate": 7.707124590037445e-08, "loss": 0.1079, "step": 10036 }, { "epoch": 0.9247708112590408, "grad_norm": 0.8735953297989576, "learning_rate": 7.688366491394706e-08, "loss": 0.1057, "step": 10037 }, { "epoch": 0.9248629474363109, "grad_norm": 0.8926319327455886, "learning_rate": 7.669630891413204e-08, "loss": 0.1076, "step": 10038 }, { "epoch": 0.9249550836135809, "grad_norm": 0.9033458944044545, "learning_rate": 7.650917791832608e-08, "loss": 0.108, "step": 10039 }, { "epoch": 0.9250472197908509, "grad_norm": 0.9280311985098185, "learning_rate": 7.632227194390301e-08, "loss": 0.1187, "step": 10040 }, { "epoch": 0.9251393559681209, "grad_norm": 0.8983958238137177, "learning_rate": 7.613559100821843e-08, "loss": 0.1087, "step": 10041 }, { "epoch": 0.9252314921453909, "grad_norm": 0.9239527817591513, "learning_rate": 7.594913512860485e-08, "loss": 0.1131, "step": 10042 }, { "epoch": 0.9253236283226609, "grad_norm": 0.920847354160995, "learning_rate": 7.57629043223751e-08, "loss": 0.1129, "step": 10043 }, { "epoch": 0.9254157644999309, "grad_norm": 0.9208440112474245, "learning_rate": 7.557689860682032e-08, "loss": 0.1123, "step": 10044 }, { "epoch": 0.9255079006772009, "grad_norm": 0.9693290819377761, "learning_rate": 7.539111799921145e-08, "loss": 0.1235, "step": 10045 }, { "epoch": 0.9256000368544709, "grad_norm": 0.9707370388186811, "learning_rate": 7.520556251679856e-08, "loss": 0.1157, "step": 10046 }, { "epoch": 0.9256921730317409, "grad_norm": 0.9373089571232343, "learning_rate": 7.502023217680982e-08, "loss": 0.114, "step": 10047 }, { "epoch": 0.9257843092090109, "grad_norm": 0.9666360388533218, "learning_rate": 7.483512699645368e-08, "loss": 0.1203, "step": 10048 }, { "epoch": 0.9258764453862809, "grad_norm": 0.9974723494362501, "learning_rate": 7.465024699291696e-08, "loss": 0.1253, "step": 10049 }, { "epoch": 0.9259685815635509, "grad_norm": 0.9684830257669516, "learning_rate": 7.446559218336563e-08, "loss": 0.1269, "step": 10050 }, { "epoch": 0.9260607177408209, "grad_norm": 0.941063326323892, "learning_rate": 7.428116258494545e-08, "loss": 0.1184, "step": 10051 }, { "epoch": 0.926152853918091, "grad_norm": 0.959037420198221, "learning_rate": 7.409695821478046e-08, "loss": 0.117, "step": 10052 }, { "epoch": 0.926244990095361, "grad_norm": 0.9716484105543598, "learning_rate": 7.391297908997341e-08, "loss": 0.117, "step": 10053 }, { "epoch": 0.926337126272631, "grad_norm": 0.9243109744426414, "learning_rate": 7.372922522760755e-08, "loss": 0.1085, "step": 10054 }, { "epoch": 0.926429262449901, "grad_norm": 0.9668232342765302, "learning_rate": 7.354569664474426e-08, "loss": 0.1202, "step": 10055 }, { "epoch": 0.926521398627171, "grad_norm": 0.9634841438555413, "learning_rate": 7.33623933584246e-08, "loss": 0.1233, "step": 10056 }, { "epoch": 0.926613534804441, "grad_norm": 0.9764871788307892, "learning_rate": 7.317931538566747e-08, "loss": 0.1249, "step": 10057 }, { "epoch": 0.926705670981711, "grad_norm": 0.9915884927220054, "learning_rate": 7.299646274347205e-08, "loss": 0.1226, "step": 10058 }, { "epoch": 0.926797807158981, "grad_norm": 0.9689646354780623, "learning_rate": 7.281383544881642e-08, "loss": 0.1245, "step": 10059 }, { "epoch": 0.926889943336251, "grad_norm": 1.0001034469069645, "learning_rate": 7.263143351865759e-08, "loss": 0.1312, "step": 10060 }, { "epoch": 0.926982079513521, "grad_norm": 0.975500103508216, "learning_rate": 7.244925696993088e-08, "loss": 0.1208, "step": 10061 }, { "epoch": 0.927074215690791, "grad_norm": 0.9932454627539781, "learning_rate": 7.226730581955249e-08, "loss": 0.1289, "step": 10062 }, { "epoch": 0.927166351868061, "grad_norm": 0.9673568247424288, "learning_rate": 7.208558008441557e-08, "loss": 0.1232, "step": 10063 }, { "epoch": 0.927258488045331, "grad_norm": 0.8870914398841915, "learning_rate": 7.190407978139413e-08, "loss": 0.1098, "step": 10064 }, { "epoch": 0.927350624222601, "grad_norm": 0.9218735423503247, "learning_rate": 7.172280492733996e-08, "loss": 0.1188, "step": 10065 }, { "epoch": 0.9274427603998711, "grad_norm": 0.9383407246695775, "learning_rate": 7.15417555390846e-08, "loss": 0.1243, "step": 10066 }, { "epoch": 0.927534896577141, "grad_norm": 0.9179094929795911, "learning_rate": 7.136093163343877e-08, "loss": 0.1197, "step": 10067 }, { "epoch": 0.927627032754411, "grad_norm": 0.9438528092701421, "learning_rate": 7.118033322719209e-08, "loss": 0.1086, "step": 10068 }, { "epoch": 0.927719168931681, "grad_norm": 0.9768979953603585, "learning_rate": 7.099996033711254e-08, "loss": 0.1215, "step": 10069 }, { "epoch": 0.927811305108951, "grad_norm": 0.92007035394007, "learning_rate": 7.081981297994784e-08, "loss": 0.1098, "step": 10070 }, { "epoch": 0.927903441286221, "grad_norm": 0.9547575066326877, "learning_rate": 7.063989117242514e-08, "loss": 0.1197, "step": 10071 }, { "epoch": 0.927995577463491, "grad_norm": 0.9142725216376145, "learning_rate": 7.046019493125028e-08, "loss": 0.1078, "step": 10072 }, { "epoch": 0.928087713640761, "grad_norm": 0.9579050206705731, "learning_rate": 7.028072427310767e-08, "loss": 0.1216, "step": 10073 }, { "epoch": 0.928179849818031, "grad_norm": 0.9297320319381187, "learning_rate": 7.010147921466121e-08, "loss": 0.1143, "step": 10074 }, { "epoch": 0.928271985995301, "grad_norm": 0.9497750407350231, "learning_rate": 6.992245977255369e-08, "loss": 0.1201, "step": 10075 }, { "epoch": 0.928364122172571, "grad_norm": 0.9907453496156512, "learning_rate": 6.974366596340765e-08, "loss": 0.1263, "step": 10076 }, { "epoch": 0.928456258349841, "grad_norm": 0.9239784611446478, "learning_rate": 6.95650978038237e-08, "loss": 0.1206, "step": 10077 }, { "epoch": 0.928548394527111, "grad_norm": 0.9308862971712853, "learning_rate": 6.93867553103822e-08, "loss": 0.1152, "step": 10078 }, { "epoch": 0.9286405307043811, "grad_norm": 0.9778397868120814, "learning_rate": 6.920863849964154e-08, "loss": 0.1242, "step": 10079 }, { "epoch": 0.9287326668816511, "grad_norm": 0.9989156628975393, "learning_rate": 6.903074738814047e-08, "loss": 0.1256, "step": 10080 }, { "epoch": 0.9288248030589211, "grad_norm": 0.9604361889555622, "learning_rate": 6.88530819923966e-08, "loss": 0.1183, "step": 10081 }, { "epoch": 0.9289169392361911, "grad_norm": 0.9902749552785988, "learning_rate": 6.867564232890534e-08, "loss": 0.1297, "step": 10082 }, { "epoch": 0.9290090754134611, "grad_norm": 0.9193712003271258, "learning_rate": 6.849842841414239e-08, "loss": 0.1128, "step": 10083 }, { "epoch": 0.9291012115907311, "grad_norm": 0.927277125839814, "learning_rate": 6.832144026456211e-08, "loss": 0.1116, "step": 10084 }, { "epoch": 0.9291933477680011, "grad_norm": 0.9509926978450719, "learning_rate": 6.8144677896598e-08, "loss": 0.118, "step": 10085 }, { "epoch": 0.9292854839452711, "grad_norm": 0.89136791200629, "learning_rate": 6.796814132666196e-08, "loss": 0.1133, "step": 10086 }, { "epoch": 0.9293776201225411, "grad_norm": 0.9316915181788125, "learning_rate": 6.779183057114585e-08, "loss": 0.1161, "step": 10087 }, { "epoch": 0.9294697562998111, "grad_norm": 0.955332942284261, "learning_rate": 6.761574564641993e-08, "loss": 0.1207, "step": 10088 }, { "epoch": 0.9295618924770811, "grad_norm": 0.9516950712819171, "learning_rate": 6.743988656883388e-08, "loss": 0.1209, "step": 10089 }, { "epoch": 0.9296540286543511, "grad_norm": 0.955229675549783, "learning_rate": 6.726425335471632e-08, "loss": 0.1173, "step": 10090 }, { "epoch": 0.9297461648316211, "grad_norm": 0.8921231535918545, "learning_rate": 6.708884602037446e-08, "loss": 0.1112, "step": 10091 }, { "epoch": 0.9298383010088911, "grad_norm": 0.9489220528435535, "learning_rate": 6.691366458209503e-08, "loss": 0.1244, "step": 10092 }, { "epoch": 0.9299304371861612, "grad_norm": 0.9299314366674031, "learning_rate": 6.673870905614387e-08, "loss": 0.1152, "step": 10093 }, { "epoch": 0.9300225733634312, "grad_norm": 0.9705811815401614, "learning_rate": 6.656397945876525e-08, "loss": 0.1192, "step": 10094 }, { "epoch": 0.9301147095407012, "grad_norm": 0.9309252837569718, "learning_rate": 6.638947580618338e-08, "loss": 0.1152, "step": 10095 }, { "epoch": 0.9302068457179712, "grad_norm": 0.9649772172337729, "learning_rate": 6.621519811460003e-08, "loss": 0.1184, "step": 10096 }, { "epoch": 0.9302989818952412, "grad_norm": 0.9312697012951024, "learning_rate": 6.60411464001981e-08, "loss": 0.1102, "step": 10097 }, { "epoch": 0.9303911180725112, "grad_norm": 0.9764272745006919, "learning_rate": 6.586732067913715e-08, "loss": 0.1119, "step": 10098 }, { "epoch": 0.9304832542497812, "grad_norm": 0.9436377619779318, "learning_rate": 6.56937209675576e-08, "loss": 0.1234, "step": 10099 }, { "epoch": 0.9305753904270512, "grad_norm": 0.9793883894185555, "learning_rate": 6.552034728157824e-08, "loss": 0.1198, "step": 10100 }, { "epoch": 0.9306675266043212, "grad_norm": 0.9409414214358416, "learning_rate": 6.534719963729646e-08, "loss": 0.1124, "step": 10101 }, { "epoch": 0.9307596627815912, "grad_norm": 0.9221372089696581, "learning_rate": 6.517427805078913e-08, "loss": 0.1198, "step": 10102 }, { "epoch": 0.9308517989588612, "grad_norm": 0.875186624270954, "learning_rate": 6.500158253811228e-08, "loss": 0.0994, "step": 10103 }, { "epoch": 0.9309439351361312, "grad_norm": 0.9267212533368236, "learning_rate": 6.482911311530033e-08, "loss": 0.1136, "step": 10104 }, { "epoch": 0.9310360713134012, "grad_norm": 0.8883942370936049, "learning_rate": 6.465686979836766e-08, "loss": 0.1036, "step": 10105 }, { "epoch": 0.9311282074906713, "grad_norm": 0.9235460645528614, "learning_rate": 6.44848526033065e-08, "loss": 0.1159, "step": 10106 }, { "epoch": 0.9312203436679413, "grad_norm": 0.9071083021798902, "learning_rate": 6.43130615460888e-08, "loss": 0.1105, "step": 10107 }, { "epoch": 0.9313124798452113, "grad_norm": 0.9968005269119061, "learning_rate": 6.41414966426654e-08, "loss": 0.1284, "step": 10108 }, { "epoch": 0.9314046160224813, "grad_norm": 0.9761930692958624, "learning_rate": 6.397015790896633e-08, "loss": 0.1205, "step": 10109 }, { "epoch": 0.9314967521997513, "grad_norm": 0.9365364534222561, "learning_rate": 6.379904536090053e-08, "loss": 0.1076, "step": 10110 }, { "epoch": 0.9315888883770213, "grad_norm": 0.8918112749411182, "learning_rate": 6.362815901435532e-08, "loss": 0.1098, "step": 10111 }, { "epoch": 0.9316810245542912, "grad_norm": 0.9663526734184886, "learning_rate": 6.34574988851977e-08, "loss": 0.1224, "step": 10112 }, { "epoch": 0.9317731607315612, "grad_norm": 0.927169379210311, "learning_rate": 6.328706498927361e-08, "loss": 0.117, "step": 10113 }, { "epoch": 0.9318652969088312, "grad_norm": 0.8784108002332701, "learning_rate": 6.311685734240791e-08, "loss": 0.1074, "step": 10114 }, { "epoch": 0.9319574330861012, "grad_norm": 0.9172721768927773, "learning_rate": 6.294687596040406e-08, "loss": 0.119, "step": 10115 }, { "epoch": 0.9320495692633712, "grad_norm": 0.903584060250854, "learning_rate": 6.277712085904524e-08, "loss": 0.1014, "step": 10116 }, { "epoch": 0.9321417054406412, "grad_norm": 0.9725426775760924, "learning_rate": 6.260759205409278e-08, "loss": 0.134, "step": 10117 }, { "epoch": 0.9322338416179112, "grad_norm": 0.8691523075785011, "learning_rate": 6.243828956128794e-08, "loss": 0.1008, "step": 10118 }, { "epoch": 0.9323259777951812, "grad_norm": 0.9136529794135902, "learning_rate": 6.226921339635012e-08, "loss": 0.1025, "step": 10119 }, { "epoch": 0.9324181139724513, "grad_norm": 1.0116626279614331, "learning_rate": 6.210036357497811e-08, "loss": 0.1259, "step": 10120 }, { "epoch": 0.9325102501497213, "grad_norm": 0.93766496587313, "learning_rate": 6.193174011284997e-08, "loss": 0.1199, "step": 10121 }, { "epoch": 0.9326023863269913, "grad_norm": 0.889965497266823, "learning_rate": 6.176334302562204e-08, "loss": 0.1029, "step": 10122 }, { "epoch": 0.9326945225042613, "grad_norm": 0.9855453499663499, "learning_rate": 6.159517232893014e-08, "loss": 0.121, "step": 10123 }, { "epoch": 0.9327866586815313, "grad_norm": 0.9743008981087332, "learning_rate": 6.142722803838874e-08, "loss": 0.124, "step": 10124 }, { "epoch": 0.9328787948588013, "grad_norm": 0.958869092253525, "learning_rate": 6.125951016959175e-08, "loss": 0.1247, "step": 10125 }, { "epoch": 0.9329709310360713, "grad_norm": 0.9640490274238452, "learning_rate": 6.109201873811171e-08, "loss": 0.1254, "step": 10126 }, { "epoch": 0.9330630672133413, "grad_norm": 0.8968523241668316, "learning_rate": 6.092475375950035e-08, "loss": 0.1058, "step": 10127 }, { "epoch": 0.9331552033906113, "grad_norm": 0.8935034810307682, "learning_rate": 6.075771524928804e-08, "loss": 0.1118, "step": 10128 }, { "epoch": 0.9332473395678813, "grad_norm": 0.9125924372741135, "learning_rate": 6.0590903222984e-08, "loss": 0.1099, "step": 10129 }, { "epoch": 0.9333394757451513, "grad_norm": 0.8980119979407162, "learning_rate": 6.042431769607782e-08, "loss": 0.0987, "step": 10130 }, { "epoch": 0.9334316119224213, "grad_norm": 0.9152540334694288, "learning_rate": 6.025795868403573e-08, "loss": 0.1221, "step": 10131 }, { "epoch": 0.9335237480996913, "grad_norm": 0.9265595110349005, "learning_rate": 6.009182620230508e-08, "loss": 0.1153, "step": 10132 }, { "epoch": 0.9336158842769613, "grad_norm": 0.9307369152940632, "learning_rate": 5.992592026631078e-08, "loss": 0.1064, "step": 10133 }, { "epoch": 0.9337080204542314, "grad_norm": 0.9215489600399154, "learning_rate": 5.976024089145715e-08, "loss": 0.1137, "step": 10134 }, { "epoch": 0.9338001566315014, "grad_norm": 0.9070052737981305, "learning_rate": 5.95947880931283e-08, "loss": 0.118, "step": 10135 }, { "epoch": 0.9338922928087714, "grad_norm": 0.9934855918706669, "learning_rate": 5.942956188668553e-08, "loss": 0.1296, "step": 10136 }, { "epoch": 0.9339844289860414, "grad_norm": 0.9732008800545134, "learning_rate": 5.926456228747102e-08, "loss": 0.1276, "step": 10137 }, { "epoch": 0.9340765651633114, "grad_norm": 0.9041660583571358, "learning_rate": 5.909978931080418e-08, "loss": 0.1115, "step": 10138 }, { "epoch": 0.9341687013405814, "grad_norm": 0.9207987007708457, "learning_rate": 5.8935242971984993e-08, "loss": 0.1173, "step": 10139 }, { "epoch": 0.9342608375178514, "grad_norm": 0.9634402298027609, "learning_rate": 5.877092328629097e-08, "loss": 0.1201, "step": 10140 }, { "epoch": 0.9343529736951214, "grad_norm": 0.9358564013054519, "learning_rate": 5.8606830268979344e-08, "loss": 0.1115, "step": 10141 }, { "epoch": 0.9344451098723914, "grad_norm": 0.9296780517476542, "learning_rate": 5.8442963935286535e-08, "loss": 0.1194, "step": 10142 }, { "epoch": 0.9345372460496614, "grad_norm": 0.8830379602856283, "learning_rate": 5.827932430042732e-08, "loss": 0.1087, "step": 10143 }, { "epoch": 0.9346293822269314, "grad_norm": 0.9437410925435715, "learning_rate": 5.811591137959538e-08, "loss": 0.1187, "step": 10144 }, { "epoch": 0.9347215184042014, "grad_norm": 0.9544053264277833, "learning_rate": 5.7952725187963855e-08, "loss": 0.1108, "step": 10145 }, { "epoch": 0.9348136545814714, "grad_norm": 0.9956439833964636, "learning_rate": 5.778976574068451e-08, "loss": 0.1212, "step": 10146 }, { "epoch": 0.9349057907587415, "grad_norm": 0.8916894305196384, "learning_rate": 5.762703305288858e-08, "loss": 0.1154, "step": 10147 }, { "epoch": 0.9349979269360115, "grad_norm": 0.9453557024716712, "learning_rate": 5.746452713968564e-08, "loss": 0.1232, "step": 10148 }, { "epoch": 0.9350900631132815, "grad_norm": 0.9616711906703019, "learning_rate": 5.730224801616391e-08, "loss": 0.1175, "step": 10149 }, { "epoch": 0.9351821992905515, "grad_norm": 0.9178001385739297, "learning_rate": 5.714019569739132e-08, "loss": 0.1115, "step": 10150 }, { "epoch": 0.9352743354678215, "grad_norm": 0.9475727631500085, "learning_rate": 5.697837019841446e-08, "loss": 0.125, "step": 10151 }, { "epoch": 0.9353664716450915, "grad_norm": 0.9648214850088555, "learning_rate": 5.6816771534258794e-08, "loss": 0.1196, "step": 10152 }, { "epoch": 0.9354586078223615, "grad_norm": 0.9432824464052948, "learning_rate": 5.6655399719929286e-08, "loss": 0.1149, "step": 10153 }, { "epoch": 0.9355507439996315, "grad_norm": 0.8934593126027819, "learning_rate": 5.649425477040837e-08, "loss": 0.1107, "step": 10154 }, { "epoch": 0.9356428801769014, "grad_norm": 0.9901492980135598, "learning_rate": 5.63333367006591e-08, "loss": 0.1259, "step": 10155 }, { "epoch": 0.9357350163541714, "grad_norm": 0.9389511635432886, "learning_rate": 5.617264552562229e-08, "loss": 0.1262, "step": 10156 }, { "epoch": 0.9358271525314414, "grad_norm": 0.9962631342478383, "learning_rate": 5.6012181260218514e-08, "loss": 0.1219, "step": 10157 }, { "epoch": 0.9359192887087114, "grad_norm": 0.9589826323723284, "learning_rate": 5.5851943919346394e-08, "loss": 0.1155, "step": 10158 }, { "epoch": 0.9360114248859814, "grad_norm": 1.0008099775740829, "learning_rate": 5.569193351788516e-08, "loss": 0.1258, "step": 10159 }, { "epoch": 0.9361035610632514, "grad_norm": 0.9701895891527986, "learning_rate": 5.5532150070690404e-08, "loss": 0.1144, "step": 10160 }, { "epoch": 0.9361956972405215, "grad_norm": 0.9887847060003988, "learning_rate": 5.5372593592598333e-08, "loss": 0.1239, "step": 10161 }, { "epoch": 0.9362878334177915, "grad_norm": 0.917151189793383, "learning_rate": 5.521326409842431e-08, "loss": 0.1072, "step": 10162 }, { "epoch": 0.9363799695950615, "grad_norm": 0.9511802364714468, "learning_rate": 5.5054161602961786e-08, "loss": 0.1128, "step": 10163 }, { "epoch": 0.9364721057723315, "grad_norm": 0.9196778717957829, "learning_rate": 5.489528612098366e-08, "loss": 0.1104, "step": 10164 }, { "epoch": 0.9365642419496015, "grad_norm": 0.9109397752453979, "learning_rate": 5.4736637667241465e-08, "loss": 0.1188, "step": 10165 }, { "epoch": 0.9366563781268715, "grad_norm": 0.9572866214282587, "learning_rate": 5.457821625646537e-08, "loss": 0.121, "step": 10166 }, { "epoch": 0.9367485143041415, "grad_norm": 0.9675168468217017, "learning_rate": 5.442002190336498e-08, "loss": 0.1255, "step": 10167 }, { "epoch": 0.9368406504814115, "grad_norm": 0.9939171972835509, "learning_rate": 5.426205462262884e-08, "loss": 0.1363, "step": 10168 }, { "epoch": 0.9369327866586815, "grad_norm": 1.00824738921418, "learning_rate": 5.410431442892411e-08, "loss": 0.1318, "step": 10169 }, { "epoch": 0.9370249228359515, "grad_norm": 0.9286004221876722, "learning_rate": 5.3946801336897395e-08, "loss": 0.1152, "step": 10170 }, { "epoch": 0.9371170590132215, "grad_norm": 0.9394035973214175, "learning_rate": 5.37895153611731e-08, "loss": 0.1161, "step": 10171 }, { "epoch": 0.9372091951904915, "grad_norm": 0.9356338375134339, "learning_rate": 5.363245651635568e-08, "loss": 0.1163, "step": 10172 }, { "epoch": 0.9373013313677615, "grad_norm": 0.9705226684470347, "learning_rate": 5.3475624817027614e-08, "loss": 0.1221, "step": 10173 }, { "epoch": 0.9373934675450316, "grad_norm": 0.9778732464822383, "learning_rate": 5.331902027775143e-08, "loss": 0.1226, "step": 10174 }, { "epoch": 0.9374856037223016, "grad_norm": 0.9344922249700046, "learning_rate": 5.316264291306744e-08, "loss": 0.1171, "step": 10175 }, { "epoch": 0.9375777398995716, "grad_norm": 0.930084190711884, "learning_rate": 5.300649273749542e-08, "loss": 0.1196, "step": 10176 }, { "epoch": 0.9376698760768416, "grad_norm": 0.9179768576305335, "learning_rate": 5.2850569765533766e-08, "loss": 0.1175, "step": 10177 }, { "epoch": 0.9377620122541116, "grad_norm": 0.92930569119096, "learning_rate": 5.2694874011660066e-08, "loss": 0.1195, "step": 10178 }, { "epoch": 0.9378541484313816, "grad_norm": 0.9196928713129051, "learning_rate": 5.253940549033082e-08, "loss": 0.1064, "step": 10179 }, { "epoch": 0.9379462846086516, "grad_norm": 0.9418730370988134, "learning_rate": 5.238416421598142e-08, "loss": 0.1188, "step": 10180 }, { "epoch": 0.9380384207859216, "grad_norm": 0.9425062455243531, "learning_rate": 5.2229150203025604e-08, "loss": 0.117, "step": 10181 }, { "epoch": 0.9381305569631916, "grad_norm": 0.9634946865174938, "learning_rate": 5.2074363465856316e-08, "loss": 0.1138, "step": 10182 }, { "epoch": 0.9382226931404616, "grad_norm": 0.9414468299208367, "learning_rate": 5.191980401884594e-08, "loss": 0.1181, "step": 10183 }, { "epoch": 0.9383148293177316, "grad_norm": 0.9099367437520887, "learning_rate": 5.176547187634551e-08, "loss": 0.1173, "step": 10184 }, { "epoch": 0.9384069654950016, "grad_norm": 0.940932781792052, "learning_rate": 5.161136705268438e-08, "loss": 0.1197, "step": 10185 }, { "epoch": 0.9384991016722716, "grad_norm": 0.9422087246290408, "learning_rate": 5.145748956217139e-08, "loss": 0.1186, "step": 10186 }, { "epoch": 0.9385912378495416, "grad_norm": 0.9339898499942781, "learning_rate": 5.130383941909372e-08, "loss": 0.1077, "step": 10187 }, { "epoch": 0.9386833740268117, "grad_norm": 0.9095970890077272, "learning_rate": 5.1150416637718306e-08, "loss": 0.1071, "step": 10188 }, { "epoch": 0.9387755102040817, "grad_norm": 0.9610721856654976, "learning_rate": 5.0997221232290115e-08, "loss": 0.1182, "step": 10189 }, { "epoch": 0.9388676463813517, "grad_norm": 0.9442119623237161, "learning_rate": 5.0844253217033624e-08, "loss": 0.1253, "step": 10190 }, { "epoch": 0.9389597825586217, "grad_norm": 0.9104684661627135, "learning_rate": 5.06915126061519e-08, "loss": 0.1107, "step": 10191 }, { "epoch": 0.9390519187358917, "grad_norm": 0.9213554783526815, "learning_rate": 5.0538999413826393e-08, "loss": 0.1178, "step": 10192 }, { "epoch": 0.9391440549131617, "grad_norm": 0.9993405206730799, "learning_rate": 5.0386713654218825e-08, "loss": 0.1308, "step": 10193 }, { "epoch": 0.9392361910904317, "grad_norm": 0.9736502010870575, "learning_rate": 5.023465534146843e-08, "loss": 0.1214, "step": 10194 }, { "epoch": 0.9393283272677017, "grad_norm": 0.8838783987621547, "learning_rate": 5.008282448969393e-08, "loss": 0.1084, "step": 10195 }, { "epoch": 0.9394204634449717, "grad_norm": 0.948623222140314, "learning_rate": 4.9931221112992924e-08, "loss": 0.1198, "step": 10196 }, { "epoch": 0.9395125996222417, "grad_norm": 0.8956293742678936, "learning_rate": 4.977984522544166e-08, "loss": 0.1091, "step": 10197 }, { "epoch": 0.9396047357995116, "grad_norm": 0.9255954230214183, "learning_rate": 4.962869684109528e-08, "loss": 0.1097, "step": 10198 }, { "epoch": 0.9396968719767816, "grad_norm": 0.9149389171061979, "learning_rate": 4.947777597398812e-08, "loss": 0.1145, "step": 10199 }, { "epoch": 0.9397890081540516, "grad_norm": 0.9200927674585153, "learning_rate": 4.932708263813341e-08, "loss": 0.1114, "step": 10200 }, { "epoch": 0.9398811443313216, "grad_norm": 0.9676136590853243, "learning_rate": 4.917661684752273e-08, "loss": 0.1214, "step": 10201 }, { "epoch": 0.9399732805085917, "grad_norm": 0.9156287716615673, "learning_rate": 4.9026378616127133e-08, "loss": 0.118, "step": 10202 }, { "epoch": 0.9400654166858617, "grad_norm": 0.889927739280015, "learning_rate": 4.8876367957895744e-08, "loss": 0.1084, "step": 10203 }, { "epoch": 0.9401575528631317, "grad_norm": 0.9299760065843369, "learning_rate": 4.872658488675741e-08, "loss": 0.1127, "step": 10204 }, { "epoch": 0.9402496890404017, "grad_norm": 0.9132737348421209, "learning_rate": 4.8577029416619625e-08, "loss": 0.1131, "step": 10205 }, { "epoch": 0.9403418252176717, "grad_norm": 0.88489448512981, "learning_rate": 4.84277015613685e-08, "loss": 0.1083, "step": 10206 }, { "epoch": 0.9404339613949417, "grad_norm": 1.0037419903582985, "learning_rate": 4.8278601334869056e-08, "loss": 0.1192, "step": 10207 }, { "epoch": 0.9405260975722117, "grad_norm": 0.9268411474644621, "learning_rate": 4.8129728750965224e-08, "loss": 0.1128, "step": 10208 }, { "epoch": 0.9406182337494817, "grad_norm": 0.9709277587211269, "learning_rate": 4.79810838234801e-08, "loss": 0.1303, "step": 10209 }, { "epoch": 0.9407103699267517, "grad_norm": 0.8769457665825694, "learning_rate": 4.7832666566215156e-08, "loss": 0.0994, "step": 10210 }, { "epoch": 0.9408025061040217, "grad_norm": 0.9298202881373417, "learning_rate": 4.7684476992951033e-08, "loss": 0.1122, "step": 10211 }, { "epoch": 0.9408946422812917, "grad_norm": 0.8818956452577879, "learning_rate": 4.753651511744728e-08, "loss": 0.1072, "step": 10212 }, { "epoch": 0.9409867784585617, "grad_norm": 0.9343822631531623, "learning_rate": 4.738878095344207e-08, "loss": 0.1167, "step": 10213 }, { "epoch": 0.9410789146358317, "grad_norm": 0.9605236137441521, "learning_rate": 4.7241274514652217e-08, "loss": 0.114, "step": 10214 }, { "epoch": 0.9411710508131018, "grad_norm": 0.9555618347520516, "learning_rate": 4.7093995814773975e-08, "loss": 0.1234, "step": 10215 }, { "epoch": 0.9412631869903718, "grad_norm": 0.9108221732663505, "learning_rate": 4.694694486748225e-08, "loss": 0.1163, "step": 10216 }, { "epoch": 0.9413553231676418, "grad_norm": 0.9386691959416757, "learning_rate": 4.680012168643111e-08, "loss": 0.1156, "step": 10217 }, { "epoch": 0.9414474593449118, "grad_norm": 0.9589183202714626, "learning_rate": 4.6653526285252437e-08, "loss": 0.1186, "step": 10218 }, { "epoch": 0.9415395955221818, "grad_norm": 0.9383960225445924, "learning_rate": 4.650715867755784e-08, "loss": 0.1124, "step": 10219 }, { "epoch": 0.9416317316994518, "grad_norm": 0.9599936149241113, "learning_rate": 4.636101887693756e-08, "loss": 0.1156, "step": 10220 }, { "epoch": 0.9417238678767218, "grad_norm": 0.9530960877295532, "learning_rate": 4.621510689696046e-08, "loss": 0.1234, "step": 10221 }, { "epoch": 0.9418160040539918, "grad_norm": 0.9144532827198258, "learning_rate": 4.606942275117543e-08, "loss": 0.1082, "step": 10222 }, { "epoch": 0.9419081402312618, "grad_norm": 0.9337094101499579, "learning_rate": 4.5923966453108315e-08, "loss": 0.1154, "step": 10223 }, { "epoch": 0.9420002764085318, "grad_norm": 0.9107710053439749, "learning_rate": 4.57787380162647e-08, "loss": 0.1105, "step": 10224 }, { "epoch": 0.9420924125858018, "grad_norm": 0.9887729337567444, "learning_rate": 4.5633737454129636e-08, "loss": 0.1222, "step": 10225 }, { "epoch": 0.9421845487630718, "grad_norm": 1.0127249337390898, "learning_rate": 4.548896478016651e-08, "loss": 0.1213, "step": 10226 }, { "epoch": 0.9422766849403418, "grad_norm": 0.8962299567936826, "learning_rate": 4.5344420007816526e-08, "loss": 0.1125, "step": 10227 }, { "epoch": 0.9423688211176118, "grad_norm": 0.8985914309284595, "learning_rate": 4.5200103150501996e-08, "loss": 0.1132, "step": 10228 }, { "epoch": 0.9424609572948819, "grad_norm": 0.8947342003365959, "learning_rate": 4.5056014221621645e-08, "loss": 0.1106, "step": 10229 }, { "epoch": 0.9425530934721519, "grad_norm": 0.9554543314067567, "learning_rate": 4.4912153234554777e-08, "loss": 0.1163, "step": 10230 }, { "epoch": 0.9426452296494219, "grad_norm": 0.8884133929587698, "learning_rate": 4.4768520202658484e-08, "loss": 0.1036, "step": 10231 }, { "epoch": 0.9427373658266919, "grad_norm": 0.9632133595908018, "learning_rate": 4.4625115139269314e-08, "loss": 0.1237, "step": 10232 }, { "epoch": 0.9428295020039619, "grad_norm": 0.9369237909664713, "learning_rate": 4.448193805770273e-08, "loss": 0.1118, "step": 10233 }, { "epoch": 0.9429216381812319, "grad_norm": 0.9721798178694152, "learning_rate": 4.4338988971252275e-08, "loss": 0.1255, "step": 10234 }, { "epoch": 0.9430137743585019, "grad_norm": 0.9855443317803402, "learning_rate": 4.4196267893190926e-08, "loss": 0.1181, "step": 10235 }, { "epoch": 0.9431059105357719, "grad_norm": 0.9723576412165987, "learning_rate": 4.4053774836770315e-08, "loss": 0.1235, "step": 10236 }, { "epoch": 0.9431980467130419, "grad_norm": 0.9171237878625916, "learning_rate": 4.3911509815221244e-08, "loss": 0.1065, "step": 10237 }, { "epoch": 0.9432901828903119, "grad_norm": 0.9440385935903521, "learning_rate": 4.3769472841752866e-08, "loss": 0.1147, "step": 10238 }, { "epoch": 0.9433823190675819, "grad_norm": 0.9451368053622421, "learning_rate": 4.362766392955325e-08, "loss": 0.1158, "step": 10239 }, { "epoch": 0.9434744552448519, "grad_norm": 0.9943988884305303, "learning_rate": 4.348608309178909e-08, "loss": 0.1229, "step": 10240 }, { "epoch": 0.9435665914221218, "grad_norm": 0.9477728019676438, "learning_rate": 4.33447303416068e-08, "loss": 0.1156, "step": 10241 }, { "epoch": 0.943658727599392, "grad_norm": 0.9560883582930023, "learning_rate": 4.320360569213061e-08, "loss": 0.1123, "step": 10242 }, { "epoch": 0.943750863776662, "grad_norm": 0.9330272969248564, "learning_rate": 4.3062709156463936e-08, "loss": 0.1157, "step": 10243 }, { "epoch": 0.943842999953932, "grad_norm": 0.9343240680576235, "learning_rate": 4.292204074768908e-08, "loss": 0.1169, "step": 10244 }, { "epoch": 0.943935136131202, "grad_norm": 0.9823045448492629, "learning_rate": 4.278160047886753e-08, "loss": 0.1266, "step": 10245 }, { "epoch": 0.9440272723084719, "grad_norm": 0.9301202110619059, "learning_rate": 4.264138836303861e-08, "loss": 0.1203, "step": 10246 }, { "epoch": 0.9441194084857419, "grad_norm": 0.9450681121818032, "learning_rate": 4.250140441322131e-08, "loss": 0.1272, "step": 10247 }, { "epoch": 0.9442115446630119, "grad_norm": 0.9737075656993364, "learning_rate": 4.236164864241277e-08, "loss": 0.1184, "step": 10248 }, { "epoch": 0.9443036808402819, "grad_norm": 0.9491703019406365, "learning_rate": 4.22221210635898e-08, "loss": 0.1153, "step": 10249 }, { "epoch": 0.9443958170175519, "grad_norm": 0.9452455622285862, "learning_rate": 4.208282168970762e-08, "loss": 0.1173, "step": 10250 }, { "epoch": 0.9444879531948219, "grad_norm": 1.0134904272797585, "learning_rate": 4.1943750533700036e-08, "loss": 0.1264, "step": 10251 }, { "epoch": 0.9445800893720919, "grad_norm": 0.9769733494426245, "learning_rate": 4.1804907608479494e-08, "loss": 0.1312, "step": 10252 }, { "epoch": 0.9446722255493619, "grad_norm": 0.9095232445090923, "learning_rate": 4.166629292693791e-08, "loss": 0.1095, "step": 10253 }, { "epoch": 0.9447643617266319, "grad_norm": 0.9253806676477634, "learning_rate": 4.1527906501945547e-08, "loss": 0.1136, "step": 10254 }, { "epoch": 0.9448564979039019, "grad_norm": 0.9756487468351606, "learning_rate": 4.138974834635157e-08, "loss": 0.123, "step": 10255 }, { "epoch": 0.944948634081172, "grad_norm": 0.9438368581540895, "learning_rate": 4.1251818472984315e-08, "loss": 0.1184, "step": 10256 }, { "epoch": 0.945040770258442, "grad_norm": 0.9009313023621, "learning_rate": 4.1114116894650225e-08, "loss": 0.1161, "step": 10257 }, { "epoch": 0.945132906435712, "grad_norm": 0.9339442841007484, "learning_rate": 4.0976643624134896e-08, "loss": 0.114, "step": 10258 }, { "epoch": 0.945225042612982, "grad_norm": 0.9994792676185544, "learning_rate": 4.0839398674203114e-08, "loss": 0.1172, "step": 10259 }, { "epoch": 0.945317178790252, "grad_norm": 0.8813178121583533, "learning_rate": 4.0702382057597465e-08, "loss": 0.1085, "step": 10260 }, { "epoch": 0.945409314967522, "grad_norm": 0.9806576557306624, "learning_rate": 4.0565593787040555e-08, "loss": 0.1229, "step": 10261 }, { "epoch": 0.945501451144792, "grad_norm": 0.9284548478345062, "learning_rate": 4.042903387523278e-08, "loss": 0.1133, "step": 10262 }, { "epoch": 0.945593587322062, "grad_norm": 0.961086375262101, "learning_rate": 4.029270233485427e-08, "loss": 0.1178, "step": 10263 }, { "epoch": 0.945685723499332, "grad_norm": 0.924194713183989, "learning_rate": 4.0156599178562686e-08, "loss": 0.1157, "step": 10264 }, { "epoch": 0.945777859676602, "grad_norm": 0.9922060533952876, "learning_rate": 4.002072441899568e-08, "loss": 0.1299, "step": 10265 }, { "epoch": 0.945869995853872, "grad_norm": 0.8994135466256734, "learning_rate": 3.988507806876929e-08, "loss": 0.1099, "step": 10266 }, { "epoch": 0.945962132031142, "grad_norm": 0.8825297317998159, "learning_rate": 3.974966014047815e-08, "loss": 0.1083, "step": 10267 }, { "epoch": 0.946054268208412, "grad_norm": 0.8886493644730223, "learning_rate": 3.961447064669582e-08, "loss": 0.1077, "step": 10268 }, { "epoch": 0.946146404385682, "grad_norm": 0.9517858277726193, "learning_rate": 3.9479509599974486e-08, "loss": 0.1205, "step": 10269 }, { "epoch": 0.9462385405629521, "grad_norm": 0.9357653566018014, "learning_rate": 3.9344777012845504e-08, "loss": 0.1142, "step": 10270 }, { "epoch": 0.9463306767402221, "grad_norm": 0.9994080080071049, "learning_rate": 3.921027289781915e-08, "loss": 0.1286, "step": 10271 }, { "epoch": 0.9464228129174921, "grad_norm": 0.9701132832326953, "learning_rate": 3.907599726738348e-08, "loss": 0.1214, "step": 10272 }, { "epoch": 0.9465149490947621, "grad_norm": 0.9475842718266658, "learning_rate": 3.894195013400631e-08, "loss": 0.1087, "step": 10273 }, { "epoch": 0.9466070852720321, "grad_norm": 0.9428359279407029, "learning_rate": 3.8808131510134074e-08, "loss": 0.1263, "step": 10274 }, { "epoch": 0.9466992214493021, "grad_norm": 0.97188586109351, "learning_rate": 3.8674541408191824e-08, "loss": 0.1213, "step": 10275 }, { "epoch": 0.9467913576265721, "grad_norm": 0.97344652490752, "learning_rate": 3.854117984058298e-08, "loss": 0.1212, "step": 10276 }, { "epoch": 0.9468834938038421, "grad_norm": 0.9359938550829311, "learning_rate": 3.840804681969068e-08, "loss": 0.1067, "step": 10277 }, { "epoch": 0.9469756299811121, "grad_norm": 0.9344899765877749, "learning_rate": 3.827514235787616e-08, "loss": 0.1217, "step": 10278 }, { "epoch": 0.9470677661583821, "grad_norm": 0.9439926374477899, "learning_rate": 3.8142466467479265e-08, "loss": 0.1105, "step": 10279 }, { "epoch": 0.9471599023356521, "grad_norm": 0.9322879507849088, "learning_rate": 3.801001916081987e-08, "loss": 0.1158, "step": 10280 }, { "epoch": 0.9472520385129221, "grad_norm": 0.935923550064033, "learning_rate": 3.787780045019479e-08, "loss": 0.119, "step": 10281 }, { "epoch": 0.947344174690192, "grad_norm": 0.9640710670440555, "learning_rate": 3.774581034788116e-08, "loss": 0.1135, "step": 10282 }, { "epoch": 0.9474363108674622, "grad_norm": 0.9311183807439256, "learning_rate": 3.7614048866133624e-08, "loss": 0.1173, "step": 10283 }, { "epoch": 0.9475284470447322, "grad_norm": 0.9739842153491166, "learning_rate": 3.748251601718711e-08, "loss": 0.1223, "step": 10284 }, { "epoch": 0.9476205832220022, "grad_norm": 0.9989638639290763, "learning_rate": 3.7351211813253795e-08, "loss": 0.1196, "step": 10285 }, { "epoch": 0.9477127193992722, "grad_norm": 0.9387016890086035, "learning_rate": 3.722013626652532e-08, "loss": 0.1178, "step": 10286 }, { "epoch": 0.9478048555765421, "grad_norm": 0.9004645700352083, "learning_rate": 3.70892893891725e-08, "loss": 0.1016, "step": 10287 }, { "epoch": 0.9478969917538121, "grad_norm": 0.9072326721273851, "learning_rate": 3.695867119334423e-08, "loss": 0.1173, "step": 10288 }, { "epoch": 0.9479891279310821, "grad_norm": 0.9778845392909062, "learning_rate": 3.682828169116831e-08, "loss": 0.1272, "step": 10289 }, { "epoch": 0.9480812641083521, "grad_norm": 0.909610617103646, "learning_rate": 3.669812089475144e-08, "loss": 0.1161, "step": 10290 }, { "epoch": 0.9481734002856221, "grad_norm": 0.9546052020757654, "learning_rate": 3.656818881617924e-08, "loss": 0.1204, "step": 10291 }, { "epoch": 0.9482655364628921, "grad_norm": 0.9205463321641423, "learning_rate": 3.6438485467515935e-08, "loss": 0.1015, "step": 10292 }, { "epoch": 0.9483576726401621, "grad_norm": 0.9848823587850997, "learning_rate": 3.630901086080441e-08, "loss": 0.1283, "step": 10293 }, { "epoch": 0.9484498088174321, "grad_norm": 0.9526839289073572, "learning_rate": 3.6179765008066134e-08, "loss": 0.117, "step": 10294 }, { "epoch": 0.9485419449947021, "grad_norm": 0.9203969240464469, "learning_rate": 3.605074792130181e-08, "loss": 0.1089, "step": 10295 }, { "epoch": 0.9486340811719721, "grad_norm": 0.9418423365790961, "learning_rate": 3.5921959612491006e-08, "loss": 0.1118, "step": 10296 }, { "epoch": 0.9487262173492422, "grad_norm": 0.9103613162527556, "learning_rate": 3.5793400093591394e-08, "loss": 0.1092, "step": 10297 }, { "epoch": 0.9488183535265122, "grad_norm": 0.9557914846405104, "learning_rate": 3.5665069376539796e-08, "loss": 0.1276, "step": 10298 }, { "epoch": 0.9489104897037822, "grad_norm": 0.9745637820171693, "learning_rate": 3.553696747325142e-08, "loss": 0.1125, "step": 10299 }, { "epoch": 0.9490026258810522, "grad_norm": 0.9457597041155954, "learning_rate": 3.540909439562118e-08, "loss": 0.1186, "step": 10300 }, { "epoch": 0.9490947620583222, "grad_norm": 1.0035411325101824, "learning_rate": 3.528145015552154e-08, "loss": 0.1309, "step": 10301 }, { "epoch": 0.9491868982355922, "grad_norm": 0.9519212745079059, "learning_rate": 3.515403476480439e-08, "loss": 0.1118, "step": 10302 }, { "epoch": 0.9492790344128622, "grad_norm": 0.9403154344488309, "learning_rate": 3.5026848235300834e-08, "loss": 0.11, "step": 10303 }, { "epoch": 0.9493711705901322, "grad_norm": 0.9136726499930174, "learning_rate": 3.489989057881948e-08, "loss": 0.1133, "step": 10304 }, { "epoch": 0.9494633067674022, "grad_norm": 0.955839488777064, "learning_rate": 3.47731618071484e-08, "loss": 0.1243, "step": 10305 }, { "epoch": 0.9495554429446722, "grad_norm": 0.964765603667002, "learning_rate": 3.4646661932054846e-08, "loss": 0.1175, "step": 10306 }, { "epoch": 0.9496475791219422, "grad_norm": 0.9055605967583641, "learning_rate": 3.452039096528359e-08, "loss": 0.1169, "step": 10307 }, { "epoch": 0.9497397152992122, "grad_norm": 0.9543169846360642, "learning_rate": 3.439434891855997e-08, "loss": 0.1117, "step": 10308 }, { "epoch": 0.9498318514764822, "grad_norm": 0.90718852701149, "learning_rate": 3.42685358035863e-08, "loss": 0.117, "step": 10309 }, { "epoch": 0.9499239876537523, "grad_norm": 0.9405814649991735, "learning_rate": 3.4142951632044065e-08, "loss": 0.1161, "step": 10310 }, { "epoch": 0.9500161238310223, "grad_norm": 1.0220816495719443, "learning_rate": 3.401759641559449e-08, "loss": 0.1268, "step": 10311 }, { "epoch": 0.9501082600082923, "grad_norm": 0.9680480102811909, "learning_rate": 3.3892470165876045e-08, "loss": 0.1143, "step": 10312 }, { "epoch": 0.9502003961855623, "grad_norm": 0.9410029223109165, "learning_rate": 3.376757289450777e-08, "loss": 0.1156, "step": 10313 }, { "epoch": 0.9502925323628323, "grad_norm": 0.8905182542520986, "learning_rate": 3.3642904613085393e-08, "loss": 0.1117, "step": 10314 }, { "epoch": 0.9503846685401023, "grad_norm": 0.9202657757695479, "learning_rate": 3.3518465333184925e-08, "loss": 0.1192, "step": 10315 }, { "epoch": 0.9504768047173723, "grad_norm": 0.914152702938313, "learning_rate": 3.339425506636018e-08, "loss": 0.1054, "step": 10316 }, { "epoch": 0.9505689408946423, "grad_norm": 0.9279714745854521, "learning_rate": 3.327027382414444e-08, "loss": 0.1131, "step": 10317 }, { "epoch": 0.9506610770719123, "grad_norm": 0.9479869182555316, "learning_rate": 3.314652161804932e-08, "loss": 0.1161, "step": 10318 }, { "epoch": 0.9507532132491823, "grad_norm": 0.954111959930808, "learning_rate": 3.30229984595648e-08, "loss": 0.1201, "step": 10319 }, { "epoch": 0.9508453494264523, "grad_norm": 0.9734374242952998, "learning_rate": 3.289970436016088e-08, "loss": 0.1221, "step": 10320 }, { "epoch": 0.9509374856037223, "grad_norm": 0.9428411208222813, "learning_rate": 3.2776639331284774e-08, "loss": 0.1144, "step": 10321 }, { "epoch": 0.9510296217809923, "grad_norm": 0.9317668049150903, "learning_rate": 3.2653803384362914e-08, "loss": 0.1181, "step": 10322 }, { "epoch": 0.9511217579582623, "grad_norm": 0.9298411302304473, "learning_rate": 3.253119653080117e-08, "loss": 0.1223, "step": 10323 }, { "epoch": 0.9512138941355324, "grad_norm": 0.9151941756140258, "learning_rate": 3.240881878198349e-08, "loss": 0.1185, "step": 10324 }, { "epoch": 0.9513060303128024, "grad_norm": 0.8593540933721838, "learning_rate": 3.228667014927245e-08, "loss": 0.1042, "step": 10325 }, { "epoch": 0.9513981664900724, "grad_norm": 0.9377788085836343, "learning_rate": 3.2164750644009814e-08, "loss": 0.1217, "step": 10326 }, { "epoch": 0.9514903026673424, "grad_norm": 0.9133361500053995, "learning_rate": 3.204306027751541e-08, "loss": 0.1116, "step": 10327 }, { "epoch": 0.9515824388446124, "grad_norm": 0.9487359846675105, "learning_rate": 3.1921599061088546e-08, "loss": 0.1249, "step": 10328 }, { "epoch": 0.9516745750218824, "grad_norm": 0.9343318497067161, "learning_rate": 3.180036700600686e-08, "loss": 0.1114, "step": 10329 }, { "epoch": 0.9517667111991523, "grad_norm": 0.9296746242117467, "learning_rate": 3.1679364123526625e-08, "loss": 0.1187, "step": 10330 }, { "epoch": 0.9518588473764223, "grad_norm": 0.9647571322981348, "learning_rate": 3.1558590424883294e-08, "loss": 0.1276, "step": 10331 }, { "epoch": 0.9519509835536923, "grad_norm": 0.9685146771017278, "learning_rate": 3.1438045921290404e-08, "loss": 0.124, "step": 10332 }, { "epoch": 0.9520431197309623, "grad_norm": 0.8476579863265571, "learning_rate": 3.1317730623940665e-08, "loss": 0.1033, "step": 10333 }, { "epoch": 0.9521352559082323, "grad_norm": 0.9532082684247339, "learning_rate": 3.119764454400515e-08, "loss": 0.1218, "step": 10334 }, { "epoch": 0.9522273920855023, "grad_norm": 0.9453757489867697, "learning_rate": 3.1077787692634085e-08, "loss": 0.121, "step": 10335 }, { "epoch": 0.9523195282627723, "grad_norm": 0.9229009382067201, "learning_rate": 3.095816008095637e-08, "loss": 0.1149, "step": 10336 }, { "epoch": 0.9524116644400424, "grad_norm": 1.0206879719212576, "learning_rate": 3.083876172007894e-08, "loss": 0.122, "step": 10337 }, { "epoch": 0.9525038006173124, "grad_norm": 0.8983665500372922, "learning_rate": 3.071959262108848e-08, "loss": 0.1063, "step": 10338 }, { "epoch": 0.9525959367945824, "grad_norm": 0.9479813529207537, "learning_rate": 3.0600652795049204e-08, "loss": 0.112, "step": 10339 }, { "epoch": 0.9526880729718524, "grad_norm": 0.9177433261172819, "learning_rate": 3.048194225300532e-08, "loss": 0.112, "step": 10340 }, { "epoch": 0.9527802091491224, "grad_norm": 0.9370376744228802, "learning_rate": 3.0363461005978865e-08, "loss": 0.1103, "step": 10341 }, { "epoch": 0.9528723453263924, "grad_norm": 0.9060338446923794, "learning_rate": 3.024520906497103e-08, "loss": 0.1088, "step": 10342 }, { "epoch": 0.9529644815036624, "grad_norm": 0.9151253682669213, "learning_rate": 3.012718644096107e-08, "loss": 0.116, "step": 10343 }, { "epoch": 0.9530566176809324, "grad_norm": 0.9662487060386523, "learning_rate": 3.0009393144907475e-08, "loss": 0.1025, "step": 10344 }, { "epoch": 0.9531487538582024, "grad_norm": 0.9507650484823077, "learning_rate": 2.989182918774786e-08, "loss": 0.1208, "step": 10345 }, { "epoch": 0.9532408900354724, "grad_norm": 1.045514593246912, "learning_rate": 2.977449458039766e-08, "loss": 0.1339, "step": 10346 }, { "epoch": 0.9533330262127424, "grad_norm": 0.9427860617410642, "learning_rate": 2.9657389333751784e-08, "loss": 0.1154, "step": 10347 }, { "epoch": 0.9534251623900124, "grad_norm": 0.9225913930801135, "learning_rate": 2.954051345868264e-08, "loss": 0.1031, "step": 10348 }, { "epoch": 0.9535172985672824, "grad_norm": 0.9038936333486661, "learning_rate": 2.9423866966042935e-08, "loss": 0.1116, "step": 10349 }, { "epoch": 0.9536094347445524, "grad_norm": 0.9543339656289974, "learning_rate": 2.9307449866663174e-08, "loss": 0.1174, "step": 10350 }, { "epoch": 0.9537015709218225, "grad_norm": 0.9056991309017217, "learning_rate": 2.9191262171352486e-08, "loss": 0.1111, "step": 10351 }, { "epoch": 0.9537937070990925, "grad_norm": 0.9861185880449297, "learning_rate": 2.9075303890899187e-08, "loss": 0.1282, "step": 10352 }, { "epoch": 0.9538858432763625, "grad_norm": 0.9621592809326136, "learning_rate": 2.895957503606939e-08, "loss": 0.1274, "step": 10353 }, { "epoch": 0.9539779794536325, "grad_norm": 0.8922039949226299, "learning_rate": 2.8844075617609492e-08, "loss": 0.1105, "step": 10354 }, { "epoch": 0.9540701156309025, "grad_norm": 0.9228402469930214, "learning_rate": 2.8728805646242863e-08, "loss": 0.1128, "step": 10355 }, { "epoch": 0.9541622518081725, "grad_norm": 0.9617168790205287, "learning_rate": 2.8613765132672612e-08, "loss": 0.1128, "step": 10356 }, { "epoch": 0.9542543879854425, "grad_norm": 0.8580565446966045, "learning_rate": 2.8498954087580187e-08, "loss": 0.0943, "step": 10357 }, { "epoch": 0.9543465241627125, "grad_norm": 0.9520942535427239, "learning_rate": 2.8384372521626236e-08, "loss": 0.1172, "step": 10358 }, { "epoch": 0.9544386603399825, "grad_norm": 0.9365343890221104, "learning_rate": 2.827002044544891e-08, "loss": 0.1148, "step": 10359 }, { "epoch": 0.9545307965172525, "grad_norm": 0.933691258317069, "learning_rate": 2.8155897869666105e-08, "loss": 0.1206, "step": 10360 }, { "epoch": 0.9546229326945225, "grad_norm": 0.9104238953866819, "learning_rate": 2.8042004804874346e-08, "loss": 0.1116, "step": 10361 }, { "epoch": 0.9547150688717925, "grad_norm": 0.8681396545723196, "learning_rate": 2.7928341261648507e-08, "loss": 0.1018, "step": 10362 }, { "epoch": 0.9548072050490625, "grad_norm": 0.9477791985994918, "learning_rate": 2.7814907250542368e-08, "loss": 0.1133, "step": 10363 }, { "epoch": 0.9548993412263325, "grad_norm": 0.9830223390489866, "learning_rate": 2.770170278208806e-08, "loss": 0.1211, "step": 10364 }, { "epoch": 0.9549914774036026, "grad_norm": 0.9651177678762349, "learning_rate": 2.7588727866796617e-08, "loss": 0.1193, "step": 10365 }, { "epoch": 0.9550836135808726, "grad_norm": 0.9231632131169901, "learning_rate": 2.7475982515157986e-08, "loss": 0.108, "step": 10366 }, { "epoch": 0.9551757497581426, "grad_norm": 0.9546876190711204, "learning_rate": 2.7363466737640453e-08, "loss": 0.1126, "step": 10367 }, { "epoch": 0.9552678859354126, "grad_norm": 0.9264536078414163, "learning_rate": 2.7251180544691225e-08, "loss": 0.1166, "step": 10368 }, { "epoch": 0.9553600221126826, "grad_norm": 0.948299270414955, "learning_rate": 2.7139123946735847e-08, "loss": 0.1229, "step": 10369 }, { "epoch": 0.9554521582899526, "grad_norm": 0.9773410646908133, "learning_rate": 2.7027296954178773e-08, "loss": 0.1169, "step": 10370 }, { "epoch": 0.9555442944672226, "grad_norm": 0.9411232679657067, "learning_rate": 2.6915699577403644e-08, "loss": 0.1119, "step": 10371 }, { "epoch": 0.9556364306444926, "grad_norm": 0.9173845585128807, "learning_rate": 2.680433182677189e-08, "loss": 0.1151, "step": 10372 }, { "epoch": 0.9557285668217625, "grad_norm": 0.9916694029075123, "learning_rate": 2.6693193712624133e-08, "loss": 0.1183, "step": 10373 }, { "epoch": 0.9558207029990325, "grad_norm": 0.8983875748435834, "learning_rate": 2.6582285245279338e-08, "loss": 0.1121, "step": 10374 }, { "epoch": 0.9559128391763025, "grad_norm": 0.916895182755261, "learning_rate": 2.6471606435035934e-08, "loss": 0.1075, "step": 10375 }, { "epoch": 0.9560049753535725, "grad_norm": 0.9347044640622088, "learning_rate": 2.6361157292169593e-08, "loss": 0.1195, "step": 10376 }, { "epoch": 0.9560971115308425, "grad_norm": 0.9614171099198148, "learning_rate": 2.6250937826936274e-08, "loss": 0.12, "step": 10377 }, { "epoch": 0.9561892477081126, "grad_norm": 0.891382660362729, "learning_rate": 2.6140948049569737e-08, "loss": 0.111, "step": 10378 }, { "epoch": 0.9562813838853826, "grad_norm": 0.9757278767219288, "learning_rate": 2.603118797028209e-08, "loss": 0.1256, "step": 10379 }, { "epoch": 0.9563735200626526, "grad_norm": 0.9466608595122338, "learning_rate": 2.592165759926518e-08, "loss": 0.1157, "step": 10380 }, { "epoch": 0.9564656562399226, "grad_norm": 0.9383107114478125, "learning_rate": 2.5812356946688376e-08, "loss": 0.1102, "step": 10381 }, { "epoch": 0.9565577924171926, "grad_norm": 0.9982385805871572, "learning_rate": 2.5703286022700503e-08, "loss": 0.1209, "step": 10382 }, { "epoch": 0.9566499285944626, "grad_norm": 0.9727363491362462, "learning_rate": 2.559444483742901e-08, "loss": 0.122, "step": 10383 }, { "epoch": 0.9567420647717326, "grad_norm": 0.9155845511468633, "learning_rate": 2.548583340097971e-08, "loss": 0.1099, "step": 10384 }, { "epoch": 0.9568342009490026, "grad_norm": 0.9366488234735828, "learning_rate": 2.5377451723436753e-08, "loss": 0.1122, "step": 10385 }, { "epoch": 0.9569263371262726, "grad_norm": 0.9856402469594859, "learning_rate": 2.5269299814863756e-08, "loss": 0.125, "step": 10386 }, { "epoch": 0.9570184733035426, "grad_norm": 0.9558864294357874, "learning_rate": 2.5161377685302968e-08, "loss": 0.1283, "step": 10387 }, { "epoch": 0.9571106094808126, "grad_norm": 0.9764377825032131, "learning_rate": 2.505368534477415e-08, "loss": 0.1199, "step": 10388 }, { "epoch": 0.9572027456580826, "grad_norm": 0.8978698039692437, "learning_rate": 2.4946222803277354e-08, "loss": 0.0986, "step": 10389 }, { "epoch": 0.9572948818353526, "grad_norm": 0.9801110295191998, "learning_rate": 2.483899007078988e-08, "loss": 0.1306, "step": 10390 }, { "epoch": 0.9573870180126226, "grad_norm": 0.9985048936130674, "learning_rate": 2.4731987157268768e-08, "loss": 0.1247, "step": 10391 }, { "epoch": 0.9574791541898927, "grad_norm": 0.9170543020639506, "learning_rate": 2.462521407264912e-08, "loss": 0.1122, "step": 10392 }, { "epoch": 0.9575712903671627, "grad_norm": 0.9342437829032304, "learning_rate": 2.4518670826844393e-08, "loss": 0.1129, "step": 10393 }, { "epoch": 0.9576634265444327, "grad_norm": 0.9286150111365903, "learning_rate": 2.4412357429747514e-08, "loss": 0.1156, "step": 10394 }, { "epoch": 0.9577555627217027, "grad_norm": 0.9298276602062225, "learning_rate": 2.4306273891230025e-08, "loss": 0.1104, "step": 10395 }, { "epoch": 0.9578476988989727, "grad_norm": 1.0298146827458847, "learning_rate": 2.4200420221141274e-08, "loss": 0.1333, "step": 10396 }, { "epoch": 0.9579398350762427, "grad_norm": 1.0308681272587117, "learning_rate": 2.4094796429310063e-08, "loss": 0.1398, "step": 10397 }, { "epoch": 0.9580319712535127, "grad_norm": 0.9327133822677386, "learning_rate": 2.398940252554327e-08, "loss": 0.1067, "step": 10398 }, { "epoch": 0.9581241074307827, "grad_norm": 0.9462850181389516, "learning_rate": 2.3884238519626957e-08, "loss": 0.1124, "step": 10399 }, { "epoch": 0.9582162436080527, "grad_norm": 0.9239641699815608, "learning_rate": 2.3779304421325532e-08, "loss": 0.1116, "step": 10400 }, { "epoch": 0.9583083797853227, "grad_norm": 0.9651337343147548, "learning_rate": 2.3674600240382594e-08, "loss": 0.1218, "step": 10401 }, { "epoch": 0.9584005159625927, "grad_norm": 0.9622370171729949, "learning_rate": 2.3570125986518977e-08, "loss": 0.1234, "step": 10402 }, { "epoch": 0.9584926521398627, "grad_norm": 0.9408894748477682, "learning_rate": 2.346588166943581e-08, "loss": 0.115, "step": 10403 }, { "epoch": 0.9585847883171327, "grad_norm": 0.9783764871100286, "learning_rate": 2.336186729881229e-08, "loss": 0.1194, "step": 10404 }, { "epoch": 0.9586769244944028, "grad_norm": 0.9756132049009393, "learning_rate": 2.32580828843057e-08, "loss": 0.1186, "step": 10405 }, { "epoch": 0.9587690606716728, "grad_norm": 0.9510515294492892, "learning_rate": 2.3154528435553046e-08, "loss": 0.1092, "step": 10406 }, { "epoch": 0.9588611968489428, "grad_norm": 0.8845538346516818, "learning_rate": 2.3051203962168588e-08, "loss": 0.0984, "step": 10407 }, { "epoch": 0.9589533330262128, "grad_norm": 0.9793169380312722, "learning_rate": 2.2948109473746593e-08, "loss": 0.1201, "step": 10408 }, { "epoch": 0.9590454692034828, "grad_norm": 0.9673626523788653, "learning_rate": 2.2845244979859127e-08, "loss": 0.1207, "step": 10409 }, { "epoch": 0.9591376053807528, "grad_norm": 0.9894712915426592, "learning_rate": 2.274261049005716e-08, "loss": 0.1243, "step": 10410 }, { "epoch": 0.9592297415580228, "grad_norm": 0.9445587376170052, "learning_rate": 2.264020601387057e-08, "loss": 0.1186, "step": 10411 }, { "epoch": 0.9593218777352928, "grad_norm": 0.9923210445189716, "learning_rate": 2.2538031560807584e-08, "loss": 0.1362, "step": 10412 }, { "epoch": 0.9594140139125628, "grad_norm": 0.9947331667454813, "learning_rate": 2.243608714035478e-08, "loss": 0.1276, "step": 10413 }, { "epoch": 0.9595061500898328, "grad_norm": 0.9192672464588488, "learning_rate": 2.2334372761977918e-08, "loss": 0.1123, "step": 10414 }, { "epoch": 0.9595982862671028, "grad_norm": 0.9176602052717612, "learning_rate": 2.2232888435121115e-08, "loss": 0.1082, "step": 10415 }, { "epoch": 0.9596904224443727, "grad_norm": 0.8490744571811496, "learning_rate": 2.213163416920766e-08, "loss": 0.1001, "step": 10416 }, { "epoch": 0.9597825586216427, "grad_norm": 1.0246092310280255, "learning_rate": 2.203060997363837e-08, "loss": 0.1357, "step": 10417 }, { "epoch": 0.9598746947989127, "grad_norm": 0.9870790553638401, "learning_rate": 2.1929815857793802e-08, "loss": 0.1232, "step": 10418 }, { "epoch": 0.9599668309761828, "grad_norm": 0.8683466030495433, "learning_rate": 2.1829251831032293e-08, "loss": 0.1048, "step": 10419 }, { "epoch": 0.9600589671534528, "grad_norm": 0.9473957011467433, "learning_rate": 2.172891790269166e-08, "loss": 0.1191, "step": 10420 }, { "epoch": 0.9601511033307228, "grad_norm": 0.9543960166674035, "learning_rate": 2.1628814082087503e-08, "loss": 0.1059, "step": 10421 }, { "epoch": 0.9602432395079928, "grad_norm": 0.9508773504440561, "learning_rate": 2.1528940378514885e-08, "loss": 0.1162, "step": 10422 }, { "epoch": 0.9603353756852628, "grad_norm": 0.9490154248407026, "learning_rate": 2.142929680124667e-08, "loss": 0.1193, "step": 10423 }, { "epoch": 0.9604275118625328, "grad_norm": 0.9539270143381033, "learning_rate": 2.1329883359535174e-08, "loss": 0.1224, "step": 10424 }, { "epoch": 0.9605196480398028, "grad_norm": 0.9445515396370072, "learning_rate": 2.12307000626108e-08, "loss": 0.1116, "step": 10425 }, { "epoch": 0.9606117842170728, "grad_norm": 0.9336698772896715, "learning_rate": 2.113174691968256e-08, "loss": 0.1222, "step": 10426 }, { "epoch": 0.9607039203943428, "grad_norm": 0.9736147739548708, "learning_rate": 2.103302393993867e-08, "loss": 0.1215, "step": 10427 }, { "epoch": 0.9607960565716128, "grad_norm": 0.9610154886959913, "learning_rate": 2.0934531132544845e-08, "loss": 0.1119, "step": 10428 }, { "epoch": 0.9608881927488828, "grad_norm": 0.9233255796310752, "learning_rate": 2.0836268506647108e-08, "loss": 0.1159, "step": 10429 }, { "epoch": 0.9609803289261528, "grad_norm": 1.0019170763885927, "learning_rate": 2.0738236071368157e-08, "loss": 0.1267, "step": 10430 }, { "epoch": 0.9610724651034228, "grad_norm": 0.9475199883723734, "learning_rate": 2.0640433835810992e-08, "loss": 0.124, "step": 10431 }, { "epoch": 0.9611646012806928, "grad_norm": 0.9505982590713935, "learning_rate": 2.0542861809056403e-08, "loss": 0.1195, "step": 10432 }, { "epoch": 0.9612567374579629, "grad_norm": 0.9229846484513212, "learning_rate": 2.044552000016409e-08, "loss": 0.1221, "step": 10433 }, { "epoch": 0.9613488736352329, "grad_norm": 1.0038863547016854, "learning_rate": 2.0348408418172095e-08, "loss": 0.1264, "step": 10434 }, { "epoch": 0.9614410098125029, "grad_norm": 0.9560645257488093, "learning_rate": 2.025152707209682e-08, "loss": 0.1254, "step": 10435 }, { "epoch": 0.9615331459897729, "grad_norm": 0.9731366034584815, "learning_rate": 2.0154875970934406e-08, "loss": 0.1203, "step": 10436 }, { "epoch": 0.9616252821670429, "grad_norm": 0.87193348832153, "learning_rate": 2.0058455123658783e-08, "loss": 0.1034, "step": 10437 }, { "epoch": 0.9617174183443129, "grad_norm": 0.9036349774099822, "learning_rate": 1.996226453922251e-08, "loss": 0.1133, "step": 10438 }, { "epoch": 0.9618095545215829, "grad_norm": 0.9620815398911813, "learning_rate": 1.98663042265565e-08, "loss": 0.122, "step": 10439 }, { "epoch": 0.9619016906988529, "grad_norm": 0.9346474041117219, "learning_rate": 1.97705741945714e-08, "loss": 0.1164, "step": 10440 }, { "epoch": 0.9619938268761229, "grad_norm": 0.9400603989396563, "learning_rate": 1.9675074452155385e-08, "loss": 0.122, "step": 10441 }, { "epoch": 0.9620859630533929, "grad_norm": 0.9234460209107835, "learning_rate": 1.9579805008175524e-08, "loss": 0.1125, "step": 10442 }, { "epoch": 0.9621780992306629, "grad_norm": 0.9414505506791186, "learning_rate": 1.9484765871477795e-08, "loss": 0.124, "step": 10443 }, { "epoch": 0.9622702354079329, "grad_norm": 1.0014286121077527, "learning_rate": 1.9389957050886255e-08, "loss": 0.1299, "step": 10444 }, { "epoch": 0.9623623715852029, "grad_norm": 0.9142956574674312, "learning_rate": 1.9295378555204692e-08, "loss": 0.1108, "step": 10445 }, { "epoch": 0.962454507762473, "grad_norm": 0.8926286890511982, "learning_rate": 1.920103039321386e-08, "loss": 0.1038, "step": 10446 }, { "epoch": 0.962546643939743, "grad_norm": 0.9334442791983129, "learning_rate": 1.910691257367425e-08, "loss": 0.1185, "step": 10447 }, { "epoch": 0.962638780117013, "grad_norm": 0.9226218209835843, "learning_rate": 1.9013025105324988e-08, "loss": 0.1156, "step": 10448 }, { "epoch": 0.962730916294283, "grad_norm": 1.0008713216068006, "learning_rate": 1.8919367996883263e-08, "loss": 0.1358, "step": 10449 }, { "epoch": 0.962823052471553, "grad_norm": 0.9420362549155533, "learning_rate": 1.8825941257045178e-08, "loss": 0.1215, "step": 10450 }, { "epoch": 0.962915188648823, "grad_norm": 0.9479544981846614, "learning_rate": 1.8732744894485732e-08, "loss": 0.1147, "step": 10451 }, { "epoch": 0.963007324826093, "grad_norm": 0.911141437496614, "learning_rate": 1.8639778917857732e-08, "loss": 0.1154, "step": 10452 }, { "epoch": 0.963099461003363, "grad_norm": 0.9618422043315887, "learning_rate": 1.8547043335793435e-08, "loss": 0.1207, "step": 10453 }, { "epoch": 0.963191597180633, "grad_norm": 0.9380728000761778, "learning_rate": 1.845453815690318e-08, "loss": 0.1165, "step": 10454 }, { "epoch": 0.963283733357903, "grad_norm": 0.9173483292287571, "learning_rate": 1.8362263389775926e-08, "loss": 0.1158, "step": 10455 }, { "epoch": 0.963375869535173, "grad_norm": 0.9578019512184618, "learning_rate": 1.827021904297982e-08, "loss": 0.1068, "step": 10456 }, { "epoch": 0.963468005712443, "grad_norm": 0.9760195511875756, "learning_rate": 1.8178405125060804e-08, "loss": 0.1254, "step": 10457 }, { "epoch": 0.963560141889713, "grad_norm": 0.9428947080982596, "learning_rate": 1.8086821644544283e-08, "loss": 0.1093, "step": 10458 }, { "epoch": 0.963652278066983, "grad_norm": 0.952402275184984, "learning_rate": 1.7995468609933176e-08, "loss": 0.1197, "step": 10459 }, { "epoch": 0.9637444142442531, "grad_norm": 0.9459210733717505, "learning_rate": 1.790434602971014e-08, "loss": 0.1174, "step": 10460 }, { "epoch": 0.963836550421523, "grad_norm": 0.9453907795866404, "learning_rate": 1.7813453912335354e-08, "loss": 0.1207, "step": 10461 }, { "epoch": 0.963928686598793, "grad_norm": 0.9770369213173757, "learning_rate": 1.772279226624901e-08, "loss": 0.1189, "step": 10462 }, { "epoch": 0.964020822776063, "grad_norm": 0.9195107982697946, "learning_rate": 1.7632361099867988e-08, "loss": 0.1148, "step": 10463 }, { "epoch": 0.964112958953333, "grad_norm": 0.9331915458684126, "learning_rate": 1.7542160421590017e-08, "loss": 0.1204, "step": 10464 }, { "epoch": 0.964205095130603, "grad_norm": 0.8800809838568525, "learning_rate": 1.7452190239789225e-08, "loss": 0.1125, "step": 10465 }, { "epoch": 0.964297231307873, "grad_norm": 0.932302653775477, "learning_rate": 1.7362450562819765e-08, "loss": 0.1112, "step": 10466 }, { "epoch": 0.964389367485143, "grad_norm": 0.9339782759306302, "learning_rate": 1.7272941399013865e-08, "loss": 0.1203, "step": 10467 }, { "epoch": 0.964481503662413, "grad_norm": 0.9185719997260997, "learning_rate": 1.718366275668265e-08, "loss": 0.1135, "step": 10468 }, { "epoch": 0.964573639839683, "grad_norm": 0.947190121034125, "learning_rate": 1.7094614644115605e-08, "loss": 0.1202, "step": 10469 }, { "epoch": 0.964665776016953, "grad_norm": 0.9272255443211805, "learning_rate": 1.700579706958083e-08, "loss": 0.1116, "step": 10470 }, { "epoch": 0.964757912194223, "grad_norm": 0.9462412750065758, "learning_rate": 1.6917210041325073e-08, "loss": 0.1239, "step": 10471 }, { "epoch": 0.964850048371493, "grad_norm": 0.8873810871132549, "learning_rate": 1.6828853567573413e-08, "loss": 0.1103, "step": 10472 }, { "epoch": 0.9649421845487631, "grad_norm": 0.9566149931244136, "learning_rate": 1.6740727656529844e-08, "loss": 0.1229, "step": 10473 }, { "epoch": 0.9650343207260331, "grad_norm": 0.8996264087378854, "learning_rate": 1.6652832316377264e-08, "loss": 0.1136, "step": 10474 }, { "epoch": 0.9651264569033031, "grad_norm": 0.9032876556833013, "learning_rate": 1.6565167555276373e-08, "loss": 0.108, "step": 10475 }, { "epoch": 0.9652185930805731, "grad_norm": 0.9747145064783178, "learning_rate": 1.6477733381367043e-08, "loss": 0.1172, "step": 10476 }, { "epoch": 0.9653107292578431, "grad_norm": 0.955471389790214, "learning_rate": 1.639052980276723e-08, "loss": 0.1171, "step": 10477 }, { "epoch": 0.9654028654351131, "grad_norm": 0.9422341858068135, "learning_rate": 1.6303556827574062e-08, "loss": 0.1103, "step": 10478 }, { "epoch": 0.9654950016123831, "grad_norm": 0.9833514913519336, "learning_rate": 1.6216814463863028e-08, "loss": 0.1112, "step": 10479 }, { "epoch": 0.9655871377896531, "grad_norm": 0.9459532688328239, "learning_rate": 1.6130302719687962e-08, "loss": 0.1148, "step": 10480 }, { "epoch": 0.9656792739669231, "grad_norm": 0.9261751377453326, "learning_rate": 1.6044021603081607e-08, "loss": 0.1188, "step": 10481 }, { "epoch": 0.9657714101441931, "grad_norm": 0.9328018965578682, "learning_rate": 1.5957971122055327e-08, "loss": 0.1224, "step": 10482 }, { "epoch": 0.9658635463214631, "grad_norm": 0.9127921231957702, "learning_rate": 1.5872151284598848e-08, "loss": 0.1106, "step": 10483 }, { "epoch": 0.9659556824987331, "grad_norm": 0.9700118414319213, "learning_rate": 1.5786562098680235e-08, "loss": 0.1217, "step": 10484 }, { "epoch": 0.9660478186760031, "grad_norm": 0.9464480098243716, "learning_rate": 1.570120357224647e-08, "loss": 0.1133, "step": 10485 }, { "epoch": 0.9661399548532731, "grad_norm": 0.9112094496345272, "learning_rate": 1.561607571322371e-08, "loss": 0.1093, "step": 10486 }, { "epoch": 0.9662320910305432, "grad_norm": 0.9105593590010037, "learning_rate": 1.5531178529515635e-08, "loss": 0.1097, "step": 10487 }, { "epoch": 0.9663242272078132, "grad_norm": 0.9055822997245453, "learning_rate": 1.54465120290051e-08, "loss": 0.1054, "step": 10488 }, { "epoch": 0.9664163633850832, "grad_norm": 0.9365050243180014, "learning_rate": 1.5362076219553048e-08, "loss": 0.1167, "step": 10489 }, { "epoch": 0.9665084995623532, "grad_norm": 0.9324783994074396, "learning_rate": 1.5277871108999586e-08, "loss": 0.1196, "step": 10490 }, { "epoch": 0.9666006357396232, "grad_norm": 0.9687391645872897, "learning_rate": 1.519389670516347e-08, "loss": 0.1264, "step": 10491 }, { "epoch": 0.9666927719168932, "grad_norm": 0.9326297055675994, "learning_rate": 1.511015301584151e-08, "loss": 0.1155, "step": 10492 }, { "epoch": 0.9667849080941632, "grad_norm": 0.9116280800410203, "learning_rate": 1.502664004880888e-08, "loss": 0.1104, "step": 10493 }, { "epoch": 0.9668770442714332, "grad_norm": 0.9768260070146615, "learning_rate": 1.4943357811820492e-08, "loss": 0.1179, "step": 10494 }, { "epoch": 0.9669691804487032, "grad_norm": 1.0156559451484888, "learning_rate": 1.4860306312608762e-08, "loss": 0.1246, "step": 10495 }, { "epoch": 0.9670613166259732, "grad_norm": 0.9438961994256015, "learning_rate": 1.4777485558884753e-08, "loss": 0.1193, "step": 10496 }, { "epoch": 0.9671534528032432, "grad_norm": 0.9865514247200415, "learning_rate": 1.4694895558338972e-08, "loss": 0.1222, "step": 10497 }, { "epoch": 0.9672455889805132, "grad_norm": 0.9609002911659604, "learning_rate": 1.4612536318639459e-08, "loss": 0.1188, "step": 10498 }, { "epoch": 0.9673377251577832, "grad_norm": 0.9682466775746156, "learning_rate": 1.4530407847433702e-08, "loss": 0.1288, "step": 10499 }, { "epoch": 0.9674298613350532, "grad_norm": 0.9339595766099066, "learning_rate": 1.4448510152346717e-08, "loss": 0.1133, "step": 10500 }, { "epoch": 0.9674298613350532, "eval_loss": 0.11658257246017456, "eval_runtime": 300.5591, "eval_samples_per_second": 23.346, "eval_steps_per_second": 2.921, "step": 10500 }, { "epoch": 0.9675219975123233, "grad_norm": 0.9331219890938893, "learning_rate": 1.4366843240982975e-08, "loss": 0.1149, "step": 10501 }, { "epoch": 0.9676141336895933, "grad_norm": 0.9234308420470323, "learning_rate": 1.4285407120925854e-08, "loss": 0.1203, "step": 10502 }, { "epoch": 0.9677062698668633, "grad_norm": 0.9100047418427226, "learning_rate": 1.4204201799735973e-08, "loss": 0.1048, "step": 10503 }, { "epoch": 0.9677984060441333, "grad_norm": 0.9445857702495147, "learning_rate": 1.412322728495341e-08, "loss": 0.1076, "step": 10504 }, { "epoch": 0.9678905422214032, "grad_norm": 0.9000245874971252, "learning_rate": 1.40424835840966e-08, "loss": 0.1125, "step": 10505 }, { "epoch": 0.9679826783986732, "grad_norm": 0.9193210702366165, "learning_rate": 1.3961970704662875e-08, "loss": 0.1061, "step": 10506 }, { "epoch": 0.9680748145759432, "grad_norm": 0.876365817619803, "learning_rate": 1.3881688654127645e-08, "loss": 0.1051, "step": 10507 }, { "epoch": 0.9681669507532132, "grad_norm": 0.9577008365137175, "learning_rate": 1.3801637439945225e-08, "loss": 0.1203, "step": 10508 }, { "epoch": 0.9682590869304832, "grad_norm": 0.9348907292875536, "learning_rate": 1.3721817069548282e-08, "loss": 0.1178, "step": 10509 }, { "epoch": 0.9683512231077532, "grad_norm": 0.9456956045310755, "learning_rate": 1.3642227550348387e-08, "loss": 0.1199, "step": 10510 }, { "epoch": 0.9684433592850232, "grad_norm": 0.9169617118871369, "learning_rate": 1.3562868889735182e-08, "loss": 0.104, "step": 10511 }, { "epoch": 0.9685354954622932, "grad_norm": 0.9437229497003773, "learning_rate": 1.348374109507694e-08, "loss": 0.1223, "step": 10512 }, { "epoch": 0.9686276316395632, "grad_norm": 0.980640074035574, "learning_rate": 1.3404844173721398e-08, "loss": 0.1183, "step": 10513 }, { "epoch": 0.9687197678168333, "grad_norm": 0.9759068217403593, "learning_rate": 1.332617813299325e-08, "loss": 0.1234, "step": 10514 }, { "epoch": 0.9688119039941033, "grad_norm": 0.9122055207582814, "learning_rate": 1.324774298019721e-08, "loss": 0.1115, "step": 10515 }, { "epoch": 0.9689040401713733, "grad_norm": 0.9770911885826764, "learning_rate": 1.316953872261606e-08, "loss": 0.1205, "step": 10516 }, { "epoch": 0.9689961763486433, "grad_norm": 0.9367724440014724, "learning_rate": 1.3091565367510661e-08, "loss": 0.1111, "step": 10517 }, { "epoch": 0.9690883125259133, "grad_norm": 0.9332000236767728, "learning_rate": 1.3013822922121332e-08, "loss": 0.1164, "step": 10518 }, { "epoch": 0.9691804487031833, "grad_norm": 0.9715687328064153, "learning_rate": 1.2936311393665912e-08, "loss": 0.1154, "step": 10519 }, { "epoch": 0.9692725848804533, "grad_norm": 0.9494480811734403, "learning_rate": 1.2859030789341698e-08, "loss": 0.1109, "step": 10520 }, { "epoch": 0.9693647210577233, "grad_norm": 0.8713494760993586, "learning_rate": 1.278198111632406e-08, "loss": 0.1073, "step": 10521 }, { "epoch": 0.9694568572349933, "grad_norm": 0.9593584205782064, "learning_rate": 1.2705162381767277e-08, "loss": 0.1166, "step": 10522 }, { "epoch": 0.9695489934122633, "grad_norm": 0.9071851172527483, "learning_rate": 1.2628574592803977e-08, "loss": 0.1131, "step": 10523 }, { "epoch": 0.9696411295895333, "grad_norm": 0.9053877155338074, "learning_rate": 1.2552217756545137e-08, "loss": 0.1115, "step": 10524 }, { "epoch": 0.9697332657668033, "grad_norm": 0.9666711819309619, "learning_rate": 1.2476091880080366e-08, "loss": 0.1189, "step": 10525 }, { "epoch": 0.9698254019440733, "grad_norm": 0.9263691427304236, "learning_rate": 1.240019697047845e-08, "loss": 0.1234, "step": 10526 }, { "epoch": 0.9699175381213433, "grad_norm": 0.9379347777164991, "learning_rate": 1.2324533034785702e-08, "loss": 0.1086, "step": 10527 }, { "epoch": 0.9700096742986134, "grad_norm": 0.9637020462719162, "learning_rate": 1.2249100080028164e-08, "loss": 0.1089, "step": 10528 }, { "epoch": 0.9701018104758834, "grad_norm": 0.9419892430527868, "learning_rate": 1.2173898113209126e-08, "loss": 0.1191, "step": 10529 }, { "epoch": 0.9701939466531534, "grad_norm": 0.9343313110426041, "learning_rate": 1.2098927141311333e-08, "loss": 0.1188, "step": 10530 }, { "epoch": 0.9702860828304234, "grad_norm": 0.9821001712352756, "learning_rate": 1.2024187171296165e-08, "loss": 0.1207, "step": 10531 }, { "epoch": 0.9703782190076934, "grad_norm": 0.936560662898743, "learning_rate": 1.1949678210102788e-08, "loss": 0.1141, "step": 10532 }, { "epoch": 0.9704703551849634, "grad_norm": 0.9816223567816494, "learning_rate": 1.1875400264649562e-08, "loss": 0.1137, "step": 10533 }, { "epoch": 0.9705624913622334, "grad_norm": 0.9258232675199926, "learning_rate": 1.1801353341833466e-08, "loss": 0.1106, "step": 10534 }, { "epoch": 0.9706546275395034, "grad_norm": 0.9150708660768186, "learning_rate": 1.1727537448529003e-08, "loss": 0.1079, "step": 10535 }, { "epoch": 0.9707467637167734, "grad_norm": 0.9204217315060662, "learning_rate": 1.1653952591590967e-08, "loss": 0.1169, "step": 10536 }, { "epoch": 0.9708388998940434, "grad_norm": 0.9549901951555877, "learning_rate": 1.1580598777850837e-08, "loss": 0.1115, "step": 10537 }, { "epoch": 0.9709310360713134, "grad_norm": 0.9021095506714563, "learning_rate": 1.1507476014120112e-08, "loss": 0.1081, "step": 10538 }, { "epoch": 0.9710231722485834, "grad_norm": 0.9292179876258926, "learning_rate": 1.143458430718808e-08, "loss": 0.12, "step": 10539 }, { "epoch": 0.9711153084258534, "grad_norm": 0.9377578388308517, "learning_rate": 1.136192366382266e-08, "loss": 0.1229, "step": 10540 }, { "epoch": 0.9712074446031235, "grad_norm": 0.9572734492491797, "learning_rate": 1.128949409077068e-08, "loss": 0.1144, "step": 10541 }, { "epoch": 0.9712995807803935, "grad_norm": 0.9870783763304519, "learning_rate": 1.121729559475676e-08, "loss": 0.118, "step": 10542 }, { "epoch": 0.9713917169576635, "grad_norm": 0.9116902070429109, "learning_rate": 1.1145328182484706e-08, "loss": 0.1097, "step": 10543 }, { "epoch": 0.9714838531349335, "grad_norm": 0.9063023701626302, "learning_rate": 1.1073591860636946e-08, "loss": 0.1156, "step": 10544 }, { "epoch": 0.9715759893122035, "grad_norm": 0.9457383706995881, "learning_rate": 1.1002086635873987e-08, "loss": 0.1241, "step": 10545 }, { "epoch": 0.9716681254894735, "grad_norm": 0.9838563803147564, "learning_rate": 1.0930812514835243e-08, "loss": 0.1213, "step": 10546 }, { "epoch": 0.9717602616667435, "grad_norm": 0.8800524682172473, "learning_rate": 1.0859769504138196e-08, "loss": 0.1083, "step": 10547 }, { "epoch": 0.9718523978440134, "grad_norm": 0.910856796387508, "learning_rate": 1.0788957610379791e-08, "loss": 0.1113, "step": 10548 }, { "epoch": 0.9719445340212834, "grad_norm": 0.8808796503629651, "learning_rate": 1.0718376840134214e-08, "loss": 0.1117, "step": 10549 }, { "epoch": 0.9720366701985534, "grad_norm": 0.9406849715983612, "learning_rate": 1.0648027199955391e-08, "loss": 0.1214, "step": 10550 }, { "epoch": 0.9721288063758234, "grad_norm": 0.9069532479196183, "learning_rate": 1.0577908696375316e-08, "loss": 0.1165, "step": 10551 }, { "epoch": 0.9722209425530934, "grad_norm": 0.9548171410344726, "learning_rate": 1.0508021335904061e-08, "loss": 0.1156, "step": 10552 }, { "epoch": 0.9723130787303634, "grad_norm": 0.9573038324311234, "learning_rate": 1.0438365125031158e-08, "loss": 0.1192, "step": 10553 }, { "epoch": 0.9724052149076334, "grad_norm": 0.9533383138256408, "learning_rate": 1.0368940070223932e-08, "loss": 0.1135, "step": 10554 }, { "epoch": 0.9724973510849035, "grad_norm": 0.9615848577133012, "learning_rate": 1.0299746177928338e-08, "loss": 0.1158, "step": 10555 }, { "epoch": 0.9725894872621735, "grad_norm": 0.9373204121607308, "learning_rate": 1.0230783454569515e-08, "loss": 0.1143, "step": 10556 }, { "epoch": 0.9726816234394435, "grad_norm": 0.9449065421907685, "learning_rate": 1.0162051906550397e-08, "loss": 0.1136, "step": 10557 }, { "epoch": 0.9727737596167135, "grad_norm": 0.88676617682467, "learning_rate": 1.0093551540252822e-08, "loss": 0.1071, "step": 10558 }, { "epoch": 0.9728658957939835, "grad_norm": 0.905225458618946, "learning_rate": 1.0025282362036704e-08, "loss": 0.1206, "step": 10559 }, { "epoch": 0.9729580319712535, "grad_norm": 0.9183727079728304, "learning_rate": 9.957244378241138e-09, "loss": 0.1101, "step": 10560 }, { "epoch": 0.9730501681485235, "grad_norm": 0.9845449747492825, "learning_rate": 9.889437595183293e-09, "loss": 0.1244, "step": 10561 }, { "epoch": 0.9731423043257935, "grad_norm": 0.9502006703842801, "learning_rate": 9.821862019159522e-09, "loss": 0.1231, "step": 10562 }, { "epoch": 0.9732344405030635, "grad_norm": 0.9617262844228264, "learning_rate": 9.754517656443697e-09, "loss": 0.1221, "step": 10563 }, { "epoch": 0.9733265766803335, "grad_norm": 0.9519604835311853, "learning_rate": 9.68740451328859e-09, "loss": 0.1171, "step": 10564 }, { "epoch": 0.9734187128576035, "grad_norm": 0.9637928773849125, "learning_rate": 9.62052259592644e-09, "loss": 0.1243, "step": 10565 }, { "epoch": 0.9735108490348735, "grad_norm": 0.9288945824254763, "learning_rate": 9.553871910566448e-09, "loss": 0.1239, "step": 10566 }, { "epoch": 0.9736029852121435, "grad_norm": 0.9472650891783666, "learning_rate": 9.487452463397828e-09, "loss": 0.1033, "step": 10567 }, { "epoch": 0.9736951213894135, "grad_norm": 0.9534769666335019, "learning_rate": 9.421264260587038e-09, "loss": 0.1191, "step": 10568 }, { "epoch": 0.9737872575666836, "grad_norm": 0.9897087742581929, "learning_rate": 9.355307308279992e-09, "loss": 0.1246, "step": 10569 }, { "epoch": 0.9738793937439536, "grad_norm": 0.9127514393900824, "learning_rate": 9.289581612600684e-09, "loss": 0.1194, "step": 10570 }, { "epoch": 0.9739715299212236, "grad_norm": 0.9052157433771952, "learning_rate": 9.224087179651731e-09, "loss": 0.1099, "step": 10571 }, { "epoch": 0.9740636660984936, "grad_norm": 0.9536514537796765, "learning_rate": 9.158824015514378e-09, "loss": 0.1174, "step": 10572 }, { "epoch": 0.9741558022757636, "grad_norm": 1.0070035166714224, "learning_rate": 9.093792126248224e-09, "loss": 0.1177, "step": 10573 }, { "epoch": 0.9742479384530336, "grad_norm": 0.9381398739474147, "learning_rate": 9.028991517891495e-09, "loss": 0.119, "step": 10574 }, { "epoch": 0.9743400746303036, "grad_norm": 0.9668291092324927, "learning_rate": 8.964422196461042e-09, "loss": 0.1296, "step": 10575 }, { "epoch": 0.9744322108075736, "grad_norm": 0.8874480746623137, "learning_rate": 8.900084167952072e-09, "loss": 0.1066, "step": 10576 }, { "epoch": 0.9745243469848436, "grad_norm": 0.93955289545439, "learning_rate": 8.835977438338417e-09, "loss": 0.114, "step": 10577 }, { "epoch": 0.9746164831621136, "grad_norm": 0.9040397264793893, "learning_rate": 8.772102013572537e-09, "loss": 0.1093, "step": 10578 }, { "epoch": 0.9747086193393836, "grad_norm": 0.9045961887807713, "learning_rate": 8.708457899584965e-09, "loss": 0.1128, "step": 10579 }, { "epoch": 0.9748007555166536, "grad_norm": 0.9615250420317627, "learning_rate": 8.645045102285143e-09, "loss": 0.1171, "step": 10580 }, { "epoch": 0.9748928916939236, "grad_norm": 0.9054598326002232, "learning_rate": 8.58186362756086e-09, "loss": 0.1186, "step": 10581 }, { "epoch": 0.9749850278711937, "grad_norm": 0.9889995978585244, "learning_rate": 8.518913481278812e-09, "loss": 0.1214, "step": 10582 }, { "epoch": 0.9750771640484637, "grad_norm": 0.9742523995692353, "learning_rate": 8.456194669284046e-09, "loss": 0.1277, "step": 10583 }, { "epoch": 0.9751693002257337, "grad_norm": 0.9658412586374955, "learning_rate": 8.393707197399404e-09, "loss": 0.1194, "step": 10584 }, { "epoch": 0.9752614364030037, "grad_norm": 0.8987042660004325, "learning_rate": 8.331451071427188e-09, "loss": 0.1154, "step": 10585 }, { "epoch": 0.9753535725802737, "grad_norm": 0.9238970510872365, "learning_rate": 8.269426297148053e-09, "loss": 0.1093, "step": 10586 }, { "epoch": 0.9754457087575437, "grad_norm": 0.9426614072312355, "learning_rate": 8.207632880320727e-09, "loss": 0.1089, "step": 10587 }, { "epoch": 0.9755378449348137, "grad_norm": 1.0276725978874846, "learning_rate": 8.146070826683116e-09, "loss": 0.1195, "step": 10588 }, { "epoch": 0.9756299811120837, "grad_norm": 0.9563523741611569, "learning_rate": 8.084740141950653e-09, "loss": 0.1236, "step": 10589 }, { "epoch": 0.9757221172893537, "grad_norm": 0.9156272141595668, "learning_rate": 8.023640831818502e-09, "loss": 0.1095, "step": 10590 }, { "epoch": 0.9758142534666236, "grad_norm": 0.9179595642185693, "learning_rate": 7.962772901959348e-09, "loss": 0.112, "step": 10591 }, { "epoch": 0.9759063896438936, "grad_norm": 0.9278463762785488, "learning_rate": 7.902136358025058e-09, "loss": 0.1138, "step": 10592 }, { "epoch": 0.9759985258211636, "grad_norm": 0.95021990936773, "learning_rate": 7.841731205645576e-09, "loss": 0.124, "step": 10593 }, { "epoch": 0.9760906619984336, "grad_norm": 0.8940755815437218, "learning_rate": 7.781557450429467e-09, "loss": 0.1009, "step": 10594 }, { "epoch": 0.9761827981757036, "grad_norm": 0.9717988157956657, "learning_rate": 7.72161509796393e-09, "loss": 0.1119, "step": 10595 }, { "epoch": 0.9762749343529737, "grad_norm": 0.9008600970075309, "learning_rate": 7.661904153814793e-09, "loss": 0.1133, "step": 10596 }, { "epoch": 0.9763670705302437, "grad_norm": 0.9419592224579174, "learning_rate": 7.60242462352595e-09, "loss": 0.1228, "step": 10597 }, { "epoch": 0.9764592067075137, "grad_norm": 0.9587973988727028, "learning_rate": 7.543176512620487e-09, "loss": 0.1168, "step": 10598 }, { "epoch": 0.9765513428847837, "grad_norm": 0.9321975028306119, "learning_rate": 7.484159826599002e-09, "loss": 0.1176, "step": 10599 }, { "epoch": 0.9766434790620537, "grad_norm": 0.922618089187133, "learning_rate": 7.425374570941557e-09, "loss": 0.1141, "step": 10600 }, { "epoch": 0.9767356152393237, "grad_norm": 0.8982462835378838, "learning_rate": 7.366820751106562e-09, "loss": 0.1142, "step": 10601 }, { "epoch": 0.9768277514165937, "grad_norm": 0.9843917017683375, "learning_rate": 7.308498372530226e-09, "loss": 0.1259, "step": 10602 }, { "epoch": 0.9769198875938637, "grad_norm": 0.9473204680331596, "learning_rate": 7.250407440628493e-09, "loss": 0.1234, "step": 10603 }, { "epoch": 0.9770120237711337, "grad_norm": 1.0040227923056881, "learning_rate": 7.192547960794549e-09, "loss": 0.1217, "step": 10604 }, { "epoch": 0.9771041599484037, "grad_norm": 0.947542454325738, "learning_rate": 7.134919938400486e-09, "loss": 0.1178, "step": 10605 }, { "epoch": 0.9771962961256737, "grad_norm": 0.9500522546507386, "learning_rate": 7.077523378797579e-09, "loss": 0.1136, "step": 10606 }, { "epoch": 0.9772884323029437, "grad_norm": 0.8961019047765427, "learning_rate": 7.0203582873151764e-09, "loss": 0.106, "step": 10607 }, { "epoch": 0.9773805684802137, "grad_norm": 0.9211980530137002, "learning_rate": 6.963424669260421e-09, "loss": 0.1176, "step": 10608 }, { "epoch": 0.9774727046574838, "grad_norm": 0.929269443134366, "learning_rate": 6.906722529920196e-09, "loss": 0.1233, "step": 10609 }, { "epoch": 0.9775648408347538, "grad_norm": 0.9473648799487335, "learning_rate": 6.850251874559177e-09, "loss": 0.1128, "step": 10610 }, { "epoch": 0.9776569770120238, "grad_norm": 0.9169410453945872, "learning_rate": 6.7940127084203945e-09, "loss": 0.1125, "step": 10611 }, { "epoch": 0.9777491131892938, "grad_norm": 0.9092218025061077, "learning_rate": 6.738005036726059e-09, "loss": 0.1082, "step": 10612 }, { "epoch": 0.9778412493665638, "grad_norm": 0.9487463863743852, "learning_rate": 6.682228864675899e-09, "loss": 0.117, "step": 10613 }, { "epoch": 0.9779333855438338, "grad_norm": 0.9286186855517777, "learning_rate": 6.626684197449384e-09, "loss": 0.1073, "step": 10614 }, { "epoch": 0.9780255217211038, "grad_norm": 0.9033427687803351, "learning_rate": 6.5713710402037775e-09, "loss": 0.111, "step": 10615 }, { "epoch": 0.9781176578983738, "grad_norm": 0.9705632853143739, "learning_rate": 6.516289398074416e-09, "loss": 0.1272, "step": 10616 }, { "epoch": 0.9782097940756438, "grad_norm": 0.9143294975731162, "learning_rate": 6.461439276176096e-09, "loss": 0.1156, "step": 10617 }, { "epoch": 0.9783019302529138, "grad_norm": 0.9656105337016601, "learning_rate": 6.406820679601411e-09, "loss": 0.1257, "step": 10618 }, { "epoch": 0.9783940664301838, "grad_norm": 1.006782451352813, "learning_rate": 6.35243361342186e-09, "loss": 0.1294, "step": 10619 }, { "epoch": 0.9784862026074538, "grad_norm": 0.930881618709819, "learning_rate": 6.298278082687015e-09, "loss": 0.1229, "step": 10620 }, { "epoch": 0.9785783387847238, "grad_norm": 0.9250924437201751, "learning_rate": 6.244354092425631e-09, "loss": 0.1092, "step": 10621 }, { "epoch": 0.9786704749619938, "grad_norm": 0.9004668653298935, "learning_rate": 6.190661647644259e-09, "loss": 0.0974, "step": 10622 }, { "epoch": 0.9787626111392639, "grad_norm": 0.9078763930835535, "learning_rate": 6.137200753328354e-09, "loss": 0.1078, "step": 10623 }, { "epoch": 0.9788547473165339, "grad_norm": 0.9246549887726953, "learning_rate": 6.083971414442003e-09, "loss": 0.1059, "step": 10624 }, { "epoch": 0.9789468834938039, "grad_norm": 0.9760946282787811, "learning_rate": 6.030973635926807e-09, "loss": 0.1196, "step": 10625 }, { "epoch": 0.9790390196710739, "grad_norm": 0.9148879730601133, "learning_rate": 5.9782074227046625e-09, "loss": 0.1106, "step": 10626 }, { "epoch": 0.9791311558483439, "grad_norm": 0.9830983954452395, "learning_rate": 5.925672779673875e-09, "loss": 0.13, "step": 10627 }, { "epoch": 0.9792232920256139, "grad_norm": 0.9168069775130212, "learning_rate": 5.87336971171304e-09, "loss": 0.1117, "step": 10628 }, { "epoch": 0.9793154282028839, "grad_norm": 0.927218481484028, "learning_rate": 5.821298223678274e-09, "loss": 0.1152, "step": 10629 }, { "epoch": 0.9794075643801539, "grad_norm": 0.9453897188162311, "learning_rate": 5.76945832040432e-09, "loss": 0.1227, "step": 10630 }, { "epoch": 0.9794997005574239, "grad_norm": 0.9079700350552379, "learning_rate": 5.717850006704551e-09, "loss": 0.1071, "step": 10631 }, { "epoch": 0.9795918367346939, "grad_norm": 0.9516553063862506, "learning_rate": 5.666473287370966e-09, "loss": 0.1245, "step": 10632 }, { "epoch": 0.9796839729119639, "grad_norm": 0.9634462965013579, "learning_rate": 5.615328167173639e-09, "loss": 0.1178, "step": 10633 }, { "epoch": 0.9797761090892338, "grad_norm": 0.9206987163610182, "learning_rate": 5.564414650861549e-09, "loss": 0.1154, "step": 10634 }, { "epoch": 0.9798682452665038, "grad_norm": 0.9014004204638184, "learning_rate": 5.513732743162303e-09, "loss": 0.1063, "step": 10635 }, { "epoch": 0.9799603814437738, "grad_norm": 0.8760217490984519, "learning_rate": 5.463282448781027e-09, "loss": 0.1048, "step": 10636 }, { "epoch": 0.980052517621044, "grad_norm": 0.9331572014494885, "learning_rate": 5.41306377240286e-09, "loss": 0.1111, "step": 10637 }, { "epoch": 0.980144653798314, "grad_norm": 0.9934794177717047, "learning_rate": 5.363076718689908e-09, "loss": 0.1257, "step": 10638 }, { "epoch": 0.9802367899755839, "grad_norm": 0.9255709555345145, "learning_rate": 5.313321292283735e-09, "loss": 0.1118, "step": 10639 }, { "epoch": 0.9803289261528539, "grad_norm": 0.9304134226487328, "learning_rate": 5.263797497804257e-09, "loss": 0.1202, "step": 10640 }, { "epoch": 0.9804210623301239, "grad_norm": 0.9036732654637581, "learning_rate": 5.2145053398494626e-09, "loss": 0.113, "step": 10641 }, { "epoch": 0.9805131985073939, "grad_norm": 0.884189794772743, "learning_rate": 5.165444822996801e-09, "loss": 0.1096, "step": 10642 }, { "epoch": 0.9806053346846639, "grad_norm": 0.9227208369894664, "learning_rate": 5.116615951800685e-09, "loss": 0.1138, "step": 10643 }, { "epoch": 0.9806974708619339, "grad_norm": 0.9727271841430377, "learning_rate": 5.068018730795543e-09, "loss": 0.1278, "step": 10644 }, { "epoch": 0.9807896070392039, "grad_norm": 0.8889123529269067, "learning_rate": 5.019653164493044e-09, "loss": 0.1126, "step": 10645 }, { "epoch": 0.9808817432164739, "grad_norm": 0.9502701859156512, "learning_rate": 4.971519257384316e-09, "loss": 0.1164, "step": 10646 }, { "epoch": 0.9809738793937439, "grad_norm": 0.9612182962807315, "learning_rate": 4.9236170139388415e-09, "loss": 0.1181, "step": 10647 }, { "epoch": 0.9810660155710139, "grad_norm": 0.9677109113081508, "learning_rate": 4.875946438603896e-09, "loss": 0.1192, "step": 10648 }, { "epoch": 0.9811581517482839, "grad_norm": 0.934500406425529, "learning_rate": 4.828507535805937e-09, "loss": 0.1155, "step": 10649 }, { "epoch": 0.981250287925554, "grad_norm": 0.9816515440764665, "learning_rate": 4.781300309949221e-09, "loss": 0.122, "step": 10650 }, { "epoch": 0.981342424102824, "grad_norm": 0.8982107358428756, "learning_rate": 4.734324765417741e-09, "loss": 0.1081, "step": 10651 }, { "epoch": 0.981434560280094, "grad_norm": 0.9325950183542184, "learning_rate": 4.687580906572453e-09, "loss": 0.1197, "step": 10652 }, { "epoch": 0.981526696457364, "grad_norm": 0.9549246082334452, "learning_rate": 4.6410687377540505e-09, "loss": 0.1154, "step": 10653 }, { "epoch": 0.981618832634634, "grad_norm": 0.9353084160232171, "learning_rate": 4.5947882632810244e-09, "loss": 0.1083, "step": 10654 }, { "epoch": 0.981710968811904, "grad_norm": 0.8924218274524065, "learning_rate": 4.5487394874502155e-09, "loss": 0.105, "step": 10655 }, { "epoch": 0.981803104989174, "grad_norm": 0.9371527891685825, "learning_rate": 4.502922414537647e-09, "loss": 0.1162, "step": 10656 }, { "epoch": 0.981895241166444, "grad_norm": 0.9043040642121698, "learning_rate": 4.457337048797139e-09, "loss": 0.112, "step": 10657 }, { "epoch": 0.981987377343714, "grad_norm": 0.9966084418021494, "learning_rate": 4.411983394461694e-09, "loss": 0.1213, "step": 10658 }, { "epoch": 0.982079513520984, "grad_norm": 0.9518988653583343, "learning_rate": 4.366861455742111e-09, "loss": 0.1136, "step": 10659 }, { "epoch": 0.982171649698254, "grad_norm": 0.9182319295200211, "learning_rate": 4.321971236827815e-09, "loss": 0.1124, "step": 10660 }, { "epoch": 0.982263785875524, "grad_norm": 0.9072685324613959, "learning_rate": 4.277312741887418e-09, "loss": 0.1162, "step": 10661 }, { "epoch": 0.982355922052794, "grad_norm": 0.9127397436133858, "learning_rate": 4.232885975066769e-09, "loss": 0.1073, "step": 10662 }, { "epoch": 0.982448058230064, "grad_norm": 0.9097974449201501, "learning_rate": 4.188690940491457e-09, "loss": 0.1221, "step": 10663 }, { "epoch": 0.9825401944073341, "grad_norm": 0.921889778134402, "learning_rate": 4.144727642264867e-09, "loss": 0.1033, "step": 10664 }, { "epoch": 0.9826323305846041, "grad_norm": 0.9406033364188552, "learning_rate": 4.100996084468734e-09, "loss": 0.1169, "step": 10665 }, { "epoch": 0.9827244667618741, "grad_norm": 0.948017766212838, "learning_rate": 4.057496271163974e-09, "loss": 0.1186, "step": 10666 }, { "epoch": 0.9828166029391441, "grad_norm": 0.9274946903488767, "learning_rate": 4.014228206389026e-09, "loss": 0.1189, "step": 10667 }, { "epoch": 0.9829087391164141, "grad_norm": 0.9316329448709069, "learning_rate": 3.971191894161785e-09, "loss": 0.1147, "step": 10668 }, { "epoch": 0.9830008752936841, "grad_norm": 0.9329760448425564, "learning_rate": 3.9283873384779455e-09, "loss": 0.1111, "step": 10669 }, { "epoch": 0.9830930114709541, "grad_norm": 0.9303551616716375, "learning_rate": 3.8858145433118275e-09, "loss": 0.1149, "step": 10670 }, { "epoch": 0.9831851476482241, "grad_norm": 1.0299478339943349, "learning_rate": 3.843473512616658e-09, "loss": 0.1233, "step": 10671 }, { "epoch": 0.9832772838254941, "grad_norm": 0.9347356160626646, "learning_rate": 3.801364250323458e-09, "loss": 0.1204, "step": 10672 }, { "epoch": 0.9833694200027641, "grad_norm": 0.9154680976488679, "learning_rate": 3.759486760342435e-09, "loss": 0.1131, "step": 10673 }, { "epoch": 0.9834615561800341, "grad_norm": 0.9648845681864151, "learning_rate": 3.7178410465615876e-09, "loss": 0.1074, "step": 10674 }, { "epoch": 0.983553692357304, "grad_norm": 0.9607039283534857, "learning_rate": 3.676427112848102e-09, "loss": 0.108, "step": 10675 }, { "epoch": 0.983645828534574, "grad_norm": 0.9682615771187273, "learning_rate": 3.63524496304668e-09, "loss": 0.1303, "step": 10676 }, { "epoch": 0.9837379647118442, "grad_norm": 1.004190066912663, "learning_rate": 3.5942946009814848e-09, "loss": 0.1222, "step": 10677 }, { "epoch": 0.9838301008891142, "grad_norm": 0.9169312481844852, "learning_rate": 3.553576030454753e-09, "loss": 0.1105, "step": 10678 }, { "epoch": 0.9839222370663842, "grad_norm": 0.9427475770972022, "learning_rate": 3.5130892552473485e-09, "loss": 0.122, "step": 10679 }, { "epoch": 0.9840143732436542, "grad_norm": 0.909718197191416, "learning_rate": 3.4728342791179313e-09, "loss": 0.1105, "step": 10680 }, { "epoch": 0.9841065094209241, "grad_norm": 0.958982757205956, "learning_rate": 3.432811105804623e-09, "loss": 0.118, "step": 10681 }, { "epoch": 0.9841986455981941, "grad_norm": 0.9241596455799383, "learning_rate": 3.3930197390236175e-09, "loss": 0.114, "step": 10682 }, { "epoch": 0.9842907817754641, "grad_norm": 0.9580131120057819, "learning_rate": 3.353460182469459e-09, "loss": 0.1243, "step": 10683 }, { "epoch": 0.9843829179527341, "grad_norm": 0.9537908329817629, "learning_rate": 3.3141324398150434e-09, "loss": 0.1231, "step": 10684 }, { "epoch": 0.9844750541300041, "grad_norm": 0.9846295496352669, "learning_rate": 3.275036514712171e-09, "loss": 0.1181, "step": 10685 }, { "epoch": 0.9845671903072741, "grad_norm": 1.0269472534084152, "learning_rate": 3.236172410790994e-09, "loss": 0.1243, "step": 10686 }, { "epoch": 0.9846593264845441, "grad_norm": 0.9138841791343236, "learning_rate": 3.1975401316597376e-09, "loss": 0.1114, "step": 10687 }, { "epoch": 0.9847514626618141, "grad_norm": 0.9102065333454794, "learning_rate": 3.1591396809055317e-09, "loss": 0.1126, "step": 10688 }, { "epoch": 0.9848435988390841, "grad_norm": 0.9490694422798538, "learning_rate": 3.120971062094136e-09, "loss": 0.1243, "step": 10689 }, { "epoch": 0.9849357350163541, "grad_norm": 0.9356220763269121, "learning_rate": 3.0830342787693814e-09, "loss": 0.116, "step": 10690 }, { "epoch": 0.9850278711936242, "grad_norm": 0.9780782171880228, "learning_rate": 3.0453293344534507e-09, "loss": 0.1134, "step": 10691 }, { "epoch": 0.9851200073708942, "grad_norm": 0.927888813498766, "learning_rate": 3.007856232647155e-09, "loss": 0.1128, "step": 10692 }, { "epoch": 0.9852121435481642, "grad_norm": 0.987536393409098, "learning_rate": 2.970614976830488e-09, "loss": 0.1182, "step": 10693 }, { "epoch": 0.9853042797254342, "grad_norm": 0.9263137120241127, "learning_rate": 2.933605570460962e-09, "loss": 0.1059, "step": 10694 }, { "epoch": 0.9853964159027042, "grad_norm": 0.9625610479510435, "learning_rate": 2.8968280169747177e-09, "loss": 0.1197, "step": 10695 }, { "epoch": 0.9854885520799742, "grad_norm": 0.9172728982589524, "learning_rate": 2.8602823197868e-09, "loss": 0.1093, "step": 10696 }, { "epoch": 0.9855806882572442, "grad_norm": 0.9632436012736146, "learning_rate": 2.823968482290329e-09, "loss": 0.1253, "step": 10697 }, { "epoch": 0.9856728244345142, "grad_norm": 0.9223452021204606, "learning_rate": 2.787886507857329e-09, "loss": 0.1129, "step": 10698 }, { "epoch": 0.9857649606117842, "grad_norm": 0.8890863207340997, "learning_rate": 2.7520363998376208e-09, "loss": 0.1153, "step": 10699 }, { "epoch": 0.9858570967890542, "grad_norm": 0.9555634706681613, "learning_rate": 2.716418161560208e-09, "loss": 0.1153, "step": 10700 }, { "epoch": 0.9859492329663242, "grad_norm": 0.9023523390251761, "learning_rate": 2.6810317963321674e-09, "loss": 0.116, "step": 10701 }, { "epoch": 0.9860413691435942, "grad_norm": 0.9676451487324907, "learning_rate": 2.6458773074389266e-09, "loss": 0.1128, "step": 10702 }, { "epoch": 0.9861335053208642, "grad_norm": 0.9623707144940772, "learning_rate": 2.610954698145096e-09, "loss": 0.1231, "step": 10703 }, { "epoch": 0.9862256414981343, "grad_norm": 0.9138591551329498, "learning_rate": 2.5762639716925274e-09, "loss": 0.1164, "step": 10704 }, { "epoch": 0.9863177776754043, "grad_norm": 0.9348545180273251, "learning_rate": 2.5418051313028102e-09, "loss": 0.116, "step": 10705 }, { "epoch": 0.9864099138526743, "grad_norm": 0.9045750930758, "learning_rate": 2.507578180175052e-09, "loss": 0.1059, "step": 10706 }, { "epoch": 0.9865020500299443, "grad_norm": 0.9558289080953325, "learning_rate": 2.473583121487544e-09, "loss": 0.1191, "step": 10707 }, { "epoch": 0.9865941862072143, "grad_norm": 0.9052879057162557, "learning_rate": 2.43981995839665e-09, "loss": 0.1145, "step": 10708 }, { "epoch": 0.9866863223844843, "grad_norm": 0.951275220374051, "learning_rate": 2.406288694037362e-09, "loss": 0.1206, "step": 10709 }, { "epoch": 0.9867784585617543, "grad_norm": 0.9285907818070983, "learning_rate": 2.3729893315230234e-09, "loss": 0.1151, "step": 10710 }, { "epoch": 0.9868705947390243, "grad_norm": 0.9395711360817434, "learning_rate": 2.339921873945328e-09, "loss": 0.1192, "step": 10711 }, { "epoch": 0.9869627309162943, "grad_norm": 0.90872727900148, "learning_rate": 2.3070863243745967e-09, "loss": 0.1197, "step": 10712 }, { "epoch": 0.9870548670935643, "grad_norm": 0.9485592509220756, "learning_rate": 2.2744826858597803e-09, "loss": 0.1159, "step": 10713 }, { "epoch": 0.9871470032708343, "grad_norm": 0.9328209116258498, "learning_rate": 2.2421109614279015e-09, "loss": 0.1123, "step": 10714 }, { "epoch": 0.9872391394481043, "grad_norm": 1.0114257066594805, "learning_rate": 2.209971154084889e-09, "loss": 0.129, "step": 10715 }, { "epoch": 0.9873312756253743, "grad_norm": 0.9576620679676022, "learning_rate": 2.1780632668150226e-09, "loss": 0.1192, "step": 10716 }, { "epoch": 0.9874234118026443, "grad_norm": 0.9937479557344089, "learning_rate": 2.1463873025806547e-09, "loss": 0.13, "step": 10717 }, { "epoch": 0.9875155479799144, "grad_norm": 0.8957813580668273, "learning_rate": 2.1149432643233213e-09, "loss": 0.1015, "step": 10718 }, { "epoch": 0.9876076841571844, "grad_norm": 0.9371753197462088, "learning_rate": 2.0837311549620763e-09, "loss": 0.1105, "step": 10719 }, { "epoch": 0.9876998203344544, "grad_norm": 0.9100179659422312, "learning_rate": 2.052750977395157e-09, "loss": 0.1109, "step": 10720 }, { "epoch": 0.9877919565117244, "grad_norm": 0.9000073802913596, "learning_rate": 2.0220027344994285e-09, "loss": 0.1063, "step": 10721 }, { "epoch": 0.9878840926889944, "grad_norm": 0.913903695944919, "learning_rate": 1.9914864291292747e-09, "loss": 0.1135, "step": 10722 }, { "epoch": 0.9879762288662643, "grad_norm": 0.9678585764440448, "learning_rate": 1.961202064118539e-09, "loss": 0.1222, "step": 10723 }, { "epoch": 0.9880683650435343, "grad_norm": 0.9568563012010259, "learning_rate": 1.9311496422791398e-09, "loss": 0.1183, "step": 10724 }, { "epoch": 0.9881605012208043, "grad_norm": 0.9891580511080038, "learning_rate": 1.9013291664013445e-09, "loss": 0.1247, "step": 10725 }, { "epoch": 0.9882526373980743, "grad_norm": 0.8893119723719847, "learning_rate": 1.8717406392537718e-09, "loss": 0.1032, "step": 10726 }, { "epoch": 0.9883447735753443, "grad_norm": 0.9638566658251766, "learning_rate": 1.8423840635842237e-09, "loss": 0.1255, "step": 10727 }, { "epoch": 0.9884369097526143, "grad_norm": 0.9313632687076696, "learning_rate": 1.8132594421180206e-09, "loss": 0.1166, "step": 10728 }, { "epoch": 0.9885290459298843, "grad_norm": 0.9253116813594806, "learning_rate": 1.7843667775593875e-09, "loss": 0.111, "step": 10729 }, { "epoch": 0.9886211821071543, "grad_norm": 0.8668298633727463, "learning_rate": 1.7557060725914566e-09, "loss": 0.0993, "step": 10730 }, { "epoch": 0.9887133182844243, "grad_norm": 0.9730611123571105, "learning_rate": 1.7272773298748769e-09, "loss": 0.1258, "step": 10731 }, { "epoch": 0.9888054544616944, "grad_norm": 0.9001091638779297, "learning_rate": 1.6990805520494813e-09, "loss": 0.1135, "step": 10732 }, { "epoch": 0.9888975906389644, "grad_norm": 0.9557725581836793, "learning_rate": 1.6711157417334533e-09, "loss": 0.1169, "step": 10733 }, { "epoch": 0.9889897268162344, "grad_norm": 0.9184313551073652, "learning_rate": 1.6433829015230497e-09, "loss": 0.1135, "step": 10734 }, { "epoch": 0.9890818629935044, "grad_norm": 0.8740577853942184, "learning_rate": 1.6158820339937098e-09, "loss": 0.1076, "step": 10735 }, { "epoch": 0.9891739991707744, "grad_norm": 0.9511768299527451, "learning_rate": 1.5886131416981144e-09, "loss": 0.1036, "step": 10736 }, { "epoch": 0.9892661353480444, "grad_norm": 0.9315575336147978, "learning_rate": 1.5615762271689593e-09, "loss": 0.1257, "step": 10737 }, { "epoch": 0.9893582715253144, "grad_norm": 0.9735995642076678, "learning_rate": 1.5347712929164594e-09, "loss": 0.1243, "step": 10738 }, { "epoch": 0.9894504077025844, "grad_norm": 0.9609590717734398, "learning_rate": 1.508198341429179e-09, "loss": 0.1245, "step": 10739 }, { "epoch": 0.9895425438798544, "grad_norm": 0.9803389685165724, "learning_rate": 1.481857375174589e-09, "loss": 0.1245, "step": 10740 }, { "epoch": 0.9896346800571244, "grad_norm": 0.9447187065505281, "learning_rate": 1.4557483965985109e-09, "loss": 0.1217, "step": 10741 }, { "epoch": 0.9897268162343944, "grad_norm": 0.9222073699752521, "learning_rate": 1.4298714081248389e-09, "loss": 0.1154, "step": 10742 }, { "epoch": 0.9898189524116644, "grad_norm": 0.9511417934769287, "learning_rate": 1.4042264121566507e-09, "loss": 0.1159, "step": 10743 }, { "epoch": 0.9899110885889344, "grad_norm": 0.9365644687505328, "learning_rate": 1.3788134110750972e-09, "loss": 0.1162, "step": 10744 }, { "epoch": 0.9900032247662045, "grad_norm": 0.9530663162797376, "learning_rate": 1.3536324072394026e-09, "loss": 0.1217, "step": 10745 }, { "epoch": 0.9900953609434745, "grad_norm": 0.9721468538247888, "learning_rate": 1.3286834029879735e-09, "loss": 0.1193, "step": 10746 }, { "epoch": 0.9901874971207445, "grad_norm": 0.917636144996345, "learning_rate": 1.303966400637291e-09, "loss": 0.1153, "step": 10747 }, { "epoch": 0.9902796332980145, "grad_norm": 0.8968146551784478, "learning_rate": 1.279481402481908e-09, "loss": 0.1095, "step": 10748 }, { "epoch": 0.9903717694752845, "grad_norm": 0.9204522570969634, "learning_rate": 1.255228410795839e-09, "loss": 0.1132, "step": 10749 }, { "epoch": 0.9904639056525545, "grad_norm": 0.909280443704693, "learning_rate": 1.2312074278308939e-09, "loss": 0.1053, "step": 10750 }, { "epoch": 0.9905560418298245, "grad_norm": 0.999015146759792, "learning_rate": 1.2074184558169554e-09, "loss": 0.1238, "step": 10751 }, { "epoch": 0.9906481780070945, "grad_norm": 1.0173745580641365, "learning_rate": 1.1838614969633678e-09, "loss": 0.134, "step": 10752 }, { "epoch": 0.9907403141843645, "grad_norm": 0.9423136441090145, "learning_rate": 1.1605365534569922e-09, "loss": 0.1165, "step": 10753 }, { "epoch": 0.9908324503616345, "grad_norm": 0.9484240848690269, "learning_rate": 1.1374436274635968e-09, "loss": 0.1207, "step": 10754 }, { "epoch": 0.9909245865389045, "grad_norm": 0.9701778726691909, "learning_rate": 1.1145827211278548e-09, "loss": 0.1191, "step": 10755 }, { "epoch": 0.9910167227161745, "grad_norm": 0.918169085936637, "learning_rate": 1.0919538365716797e-09, "loss": 0.1095, "step": 10756 }, { "epoch": 0.9911088588934445, "grad_norm": 0.9436039067062373, "learning_rate": 1.069556975896724e-09, "loss": 0.1164, "step": 10757 }, { "epoch": 0.9912009950707145, "grad_norm": 0.961966720663876, "learning_rate": 1.047392141182435e-09, "loss": 0.1157, "step": 10758 }, { "epoch": 0.9912931312479846, "grad_norm": 0.9031848710547364, "learning_rate": 1.0254593344866115e-09, "loss": 0.1156, "step": 10759 }, { "epoch": 0.9913852674252546, "grad_norm": 0.9289358118171895, "learning_rate": 1.00375855784568e-09, "loss": 0.1161, "step": 10760 }, { "epoch": 0.9914774036025246, "grad_norm": 0.9280764395282413, "learning_rate": 9.822898132749726e-10, "loss": 0.1127, "step": 10761 }, { "epoch": 0.9915695397797946, "grad_norm": 0.9245548787551704, "learning_rate": 9.610531027673398e-10, "loss": 0.1099, "step": 10762 }, { "epoch": 0.9916616759570646, "grad_norm": 0.9526967095042527, "learning_rate": 9.400484282950928e-10, "loss": 0.1116, "step": 10763 }, { "epoch": 0.9917538121343346, "grad_norm": 0.9584267650838287, "learning_rate": 9.192757918083383e-10, "loss": 0.1247, "step": 10764 }, { "epoch": 0.9918459483116046, "grad_norm": 0.9599737471955169, "learning_rate": 8.987351952355338e-10, "loss": 0.1188, "step": 10765 }, { "epoch": 0.9919380844888745, "grad_norm": 0.9471789520600152, "learning_rate": 8.7842664048432e-10, "loss": 0.1232, "step": 10766 }, { "epoch": 0.9920302206661445, "grad_norm": 0.9560335153255229, "learning_rate": 8.58350129440133e-10, "loss": 0.123, "step": 10767 }, { "epoch": 0.9921223568434145, "grad_norm": 0.9297203583707537, "learning_rate": 8.385056639670375e-10, "loss": 0.1104, "step": 10768 }, { "epoch": 0.9922144930206845, "grad_norm": 0.9557436945001951, "learning_rate": 8.188932459077259e-10, "loss": 0.1148, "step": 10769 }, { "epoch": 0.9923066291979545, "grad_norm": 0.9382086601075487, "learning_rate": 7.995128770829641e-10, "loss": 0.1229, "step": 10770 }, { "epoch": 0.9923987653752245, "grad_norm": 0.9776951290640383, "learning_rate": 7.803645592927012e-10, "loss": 0.1261, "step": 10771 }, { "epoch": 0.9924909015524946, "grad_norm": 0.9234222588432033, "learning_rate": 7.614482943144041e-10, "loss": 0.1078, "step": 10772 }, { "epoch": 0.9925830377297646, "grad_norm": 0.9425926563945666, "learning_rate": 7.427640839044458e-10, "loss": 0.1165, "step": 10773 }, { "epoch": 0.9926751739070346, "grad_norm": 0.9997978213398736, "learning_rate": 7.243119297981049e-10, "loss": 0.1301, "step": 10774 }, { "epoch": 0.9927673100843046, "grad_norm": 0.9622010406970339, "learning_rate": 7.060918337081779e-10, "loss": 0.1298, "step": 10775 }, { "epoch": 0.9928594462615746, "grad_norm": 0.9629025338726946, "learning_rate": 6.881037973266447e-10, "loss": 0.1085, "step": 10776 }, { "epoch": 0.9929515824388446, "grad_norm": 0.9442954815335793, "learning_rate": 6.703478223235582e-10, "loss": 0.1183, "step": 10777 }, { "epoch": 0.9930437186161146, "grad_norm": 0.9778711461998094, "learning_rate": 6.528239103478773e-10, "loss": 0.1207, "step": 10778 }, { "epoch": 0.9931358547933846, "grad_norm": 0.8971965968414244, "learning_rate": 6.355320630263561e-10, "loss": 0.108, "step": 10779 }, { "epoch": 0.9932279909706546, "grad_norm": 0.944982803475713, "learning_rate": 6.184722819646549e-10, "loss": 0.1075, "step": 10780 }, { "epoch": 0.9933201271479246, "grad_norm": 0.9722778893436133, "learning_rate": 6.016445687467842e-10, "loss": 0.1195, "step": 10781 }, { "epoch": 0.9934122633251946, "grad_norm": 0.9633531416564072, "learning_rate": 5.850489249351054e-10, "loss": 0.1316, "step": 10782 }, { "epoch": 0.9935043995024646, "grad_norm": 0.8947891332160514, "learning_rate": 5.686853520708857e-10, "loss": 0.1036, "step": 10783 }, { "epoch": 0.9935965356797346, "grad_norm": 0.8921400585380197, "learning_rate": 5.525538516729101e-10, "loss": 0.1088, "step": 10784 }, { "epoch": 0.9936886718570046, "grad_norm": 0.9192509868974265, "learning_rate": 5.366544252397021e-10, "loss": 0.1181, "step": 10785 }, { "epoch": 0.9937808080342747, "grad_norm": 0.9294284724982631, "learning_rate": 5.209870742467482e-10, "loss": 0.1192, "step": 10786 }, { "epoch": 0.9938729442115447, "grad_norm": 0.9204015586179748, "learning_rate": 5.055518001492731e-10, "loss": 0.1158, "step": 10787 }, { "epoch": 0.9939650803888147, "grad_norm": 0.9626572036280917, "learning_rate": 4.903486043802974e-10, "loss": 0.1205, "step": 10788 }, { "epoch": 0.9940572165660847, "grad_norm": 0.9249171298942248, "learning_rate": 4.75377488351747e-10, "loss": 0.1172, "step": 10789 }, { "epoch": 0.9941493527433547, "grad_norm": 0.916272268645282, "learning_rate": 4.6063845345306613e-10, "loss": 0.1096, "step": 10790 }, { "epoch": 0.9942414889206247, "grad_norm": 0.9606174055026327, "learning_rate": 4.4613150105315974e-10, "loss": 0.1159, "step": 10791 }, { "epoch": 0.9943336250978947, "grad_norm": 0.9366043316921812, "learning_rate": 4.3185663249900587e-10, "loss": 0.1151, "step": 10792 }, { "epoch": 0.9944257612751647, "grad_norm": 0.9470015355101646, "learning_rate": 4.1781384911593336e-10, "loss": 0.1106, "step": 10793 }, { "epoch": 0.9945178974524347, "grad_norm": 0.9553448889331428, "learning_rate": 4.040031522078991e-10, "loss": 0.1155, "step": 10794 }, { "epoch": 0.9946100336297047, "grad_norm": 0.943307344715156, "learning_rate": 3.904245430569331e-10, "loss": 0.1182, "step": 10795 }, { "epoch": 0.9947021698069747, "grad_norm": 0.941980364543447, "learning_rate": 3.7707802292424877e-10, "loss": 0.1068, "step": 10796 }, { "epoch": 0.9947943059842447, "grad_norm": 0.9633082238006477, "learning_rate": 3.639635930491325e-10, "loss": 0.1169, "step": 10797 }, { "epoch": 0.9948864421615147, "grad_norm": 0.9486204676908485, "learning_rate": 3.5108125464866636e-10, "loss": 0.1149, "step": 10798 }, { "epoch": 0.9949785783387847, "grad_norm": 0.9663686096850286, "learning_rate": 3.3843100891939316e-10, "loss": 0.118, "step": 10799 }, { "epoch": 0.9950707145160548, "grad_norm": 0.922095330879719, "learning_rate": 3.260128570359289e-10, "loss": 0.1164, "step": 10800 }, { "epoch": 0.9951628506933248, "grad_norm": 1.000098514142364, "learning_rate": 3.138268001509626e-10, "loss": 0.1222, "step": 10801 }, { "epoch": 0.9952549868705948, "grad_norm": 0.9719465806233039, "learning_rate": 3.018728393963666e-10, "loss": 0.1253, "step": 10802 }, { "epoch": 0.9953471230478648, "grad_norm": 0.9543309585482759, "learning_rate": 2.901509758820864e-10, "loss": 0.1206, "step": 10803 }, { "epoch": 0.9954392592251348, "grad_norm": 0.9013190921660513, "learning_rate": 2.786612106961406e-10, "loss": 0.11, "step": 10804 }, { "epoch": 0.9955313954024048, "grad_norm": 0.9654267379006883, "learning_rate": 2.674035449054535e-10, "loss": 0.114, "step": 10805 }, { "epoch": 0.9956235315796748, "grad_norm": 0.995907441347612, "learning_rate": 2.563779795553001e-10, "loss": 0.1223, "step": 10806 }, { "epoch": 0.9957156677569448, "grad_norm": 0.9689107866786532, "learning_rate": 2.455845156695835e-10, "loss": 0.1231, "step": 10807 }, { "epoch": 0.9958078039342148, "grad_norm": 0.8690683165999983, "learning_rate": 2.350231542502801e-10, "loss": 0.0981, "step": 10808 }, { "epoch": 0.9958999401114847, "grad_norm": 0.9517071622004358, "learning_rate": 2.2469389627827188e-10, "loss": 0.1242, "step": 10809 }, { "epoch": 0.9959920762887547, "grad_norm": 0.9644950560808359, "learning_rate": 2.1459674271251397e-10, "loss": 0.1179, "step": 10810 }, { "epoch": 0.9960842124660247, "grad_norm": 0.9585515695749373, "learning_rate": 2.0473169449031217e-10, "loss": 0.1181, "step": 10811 }, { "epoch": 0.9961763486432947, "grad_norm": 0.8761769302597302, "learning_rate": 1.9509875252787803e-10, "loss": 0.1071, "step": 10812 }, { "epoch": 0.9962684848205648, "grad_norm": 0.9240358594187232, "learning_rate": 1.856979177194962e-10, "loss": 0.1127, "step": 10813 }, { "epoch": 0.9963606209978348, "grad_norm": 0.90524334167604, "learning_rate": 1.7652919093807952e-10, "loss": 0.1084, "step": 10814 }, { "epoch": 0.9964527571751048, "grad_norm": 0.9512308901033537, "learning_rate": 1.675925730348915e-10, "loss": 0.1181, "step": 10815 }, { "epoch": 0.9965448933523748, "grad_norm": 0.8919879468606943, "learning_rate": 1.5888806484010143e-10, "loss": 0.1092, "step": 10816 }, { "epoch": 0.9966370295296448, "grad_norm": 0.928927024723345, "learning_rate": 1.5041566716139656e-10, "loss": 0.1152, "step": 10817 }, { "epoch": 0.9967291657069148, "grad_norm": 0.9682198116348629, "learning_rate": 1.4217538078536985e-10, "loss": 0.121, "step": 10818 }, { "epoch": 0.9968213018841848, "grad_norm": 0.9812596295403685, "learning_rate": 1.3416720647779768e-10, "loss": 0.1254, "step": 10819 }, { "epoch": 0.9969134380614548, "grad_norm": 0.9476046140101004, "learning_rate": 1.263911449816968e-10, "loss": 0.1228, "step": 10820 }, { "epoch": 0.9970055742387248, "grad_norm": 0.8940822513705196, "learning_rate": 1.1884719701926727e-10, "loss": 0.1175, "step": 10821 }, { "epoch": 0.9970977104159948, "grad_norm": 0.9495262015744932, "learning_rate": 1.1153536329078229e-10, "loss": 0.1171, "step": 10822 }, { "epoch": 0.9971898465932648, "grad_norm": 0.9466157440151927, "learning_rate": 1.0445564447542078e-10, "loss": 0.1192, "step": 10823 }, { "epoch": 0.9972819827705348, "grad_norm": 0.9413453778321897, "learning_rate": 9.760804123015721e-11, "loss": 0.1141, "step": 10824 }, { "epoch": 0.9973741189478048, "grad_norm": 0.9226586113738102, "learning_rate": 9.099255419114938e-11, "loss": 0.1177, "step": 10825 }, { "epoch": 0.9974662551250748, "grad_norm": 0.9383011446764592, "learning_rate": 8.460918397262818e-11, "loss": 0.1128, "step": 10826 }, { "epoch": 0.9975583913023449, "grad_norm": 0.9252669138882176, "learning_rate": 7.845793116717515e-11, "loss": 0.1134, "step": 10827 }, { "epoch": 0.9976505274796149, "grad_norm": 0.9388645197912326, "learning_rate": 7.253879634600003e-11, "loss": 0.1238, "step": 10828 }, { "epoch": 0.9977426636568849, "grad_norm": 0.9377529336829813, "learning_rate": 6.685178005838567e-11, "loss": 0.1114, "step": 10829 }, { "epoch": 0.9978347998341549, "grad_norm": 1.0046882927242726, "learning_rate": 6.139688283279821e-11, "loss": 0.1272, "step": 10830 }, { "epoch": 0.9979269360114249, "grad_norm": 0.9156009519149251, "learning_rate": 5.617410517549937e-11, "loss": 0.1112, "step": 10831 }, { "epoch": 0.9980190721886949, "grad_norm": 0.9902312030633188, "learning_rate": 5.118344757165661e-11, "loss": 0.1272, "step": 10832 }, { "epoch": 0.9981112083659649, "grad_norm": 0.9937864237747287, "learning_rate": 4.6424910484232924e-11, "loss": 0.1351, "step": 10833 }, { "epoch": 0.9982033445432349, "grad_norm": 0.9211831585262729, "learning_rate": 4.189849435565219e-11, "loss": 0.1033, "step": 10834 }, { "epoch": 0.9982954807205049, "grad_norm": 0.9613565174736062, "learning_rate": 3.7604199605578705e-11, "loss": 0.1195, "step": 10835 }, { "epoch": 0.9983876168977749, "grad_norm": 0.9636026205660925, "learning_rate": 3.35420266328601e-11, "loss": 0.1267, "step": 10836 }, { "epoch": 0.9984797530750449, "grad_norm": 0.9489533548789341, "learning_rate": 2.9711975814972205e-11, "loss": 0.1205, "step": 10837 }, { "epoch": 0.9985718892523149, "grad_norm": 0.9312987097224975, "learning_rate": 2.6114047507463936e-11, "loss": 0.1177, "step": 10838 }, { "epoch": 0.9986640254295849, "grad_norm": 0.9687758766239608, "learning_rate": 2.2748242044234868e-11, "loss": 0.1155, "step": 10839 }, { "epoch": 0.998756161606855, "grad_norm": 0.9124547506561508, "learning_rate": 1.9614559738090345e-11, "loss": 0.1206, "step": 10840 }, { "epoch": 0.998848297784125, "grad_norm": 0.9012464083753811, "learning_rate": 1.671300087935368e-11, "loss": 0.1137, "step": 10841 }, { "epoch": 0.998940433961395, "grad_norm": 0.9556438025043256, "learning_rate": 1.4043565738364184e-11, "loss": 0.112, "step": 10842 }, { "epoch": 0.999032570138665, "grad_norm": 0.9389183452029211, "learning_rate": 1.1606254562146479e-11, "loss": 0.1132, "step": 10843 }, { "epoch": 0.999124706315935, "grad_norm": 0.906558779678973, "learning_rate": 9.401067577463618e-12, "loss": 0.1083, "step": 10844 }, { "epoch": 0.999216842493205, "grad_norm": 0.9626475810080135, "learning_rate": 7.428004988874194e-12, "loss": 0.1201, "step": 10845 }, { "epoch": 0.999308978670475, "grad_norm": 0.9183400382526548, "learning_rate": 5.687066979565003e-12, "loss": 0.1133, "step": 10846 }, { "epoch": 0.999401114847745, "grad_norm": 0.9157534567793171, "learning_rate": 4.178253711351054e-12, "loss": 0.1123, "step": 10847 }, { "epoch": 0.999493251025015, "grad_norm": 0.9607427044331877, "learning_rate": 2.9015653243980034e-12, "loss": 0.1194, "step": 10848 }, { "epoch": 0.999585387202285, "grad_norm": 0.9133340683724209, "learning_rate": 1.8570019369446025e-12, "loss": 0.1209, "step": 10849 }, { "epoch": 0.999677523379555, "grad_norm": 0.9797017728895512, "learning_rate": 1.044563646135366e-12, "loss": 0.1213, "step": 10850 }, { "epoch": 0.999769659556825, "grad_norm": 0.950547168845273, "learning_rate": 4.642505274654596e-13, "loss": 0.1109, "step": 10851 }, { "epoch": 0.999861795734095, "grad_norm": 0.9546759905442439, "learning_rate": 1.1606263450314458e-13, "loss": 0.1163, "step": 10852 }, { "epoch": 0.999953931911365, "grad_norm": 0.99401938130207, "learning_rate": 0.0, "loss": 0.1287, "step": 10853 }, { "epoch": 0.999953931911365, "step": 10853, "total_flos": 1908258935930880.0, "train_loss": 0.14796658507510943, "train_runtime": 123515.0295, "train_samples_per_second": 5.624, "train_steps_per_second": 0.088 } ], "logging_steps": 1, "max_steps": 10853, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1908258935930880.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }